diff options
author | Dan McDonald <danmcd@mnx.io> | 2022-08-14 10:56:57 -0400 |
---|---|---|
committer | Dan McDonald <danmcd@mnx.io> | 2022-08-14 10:56:57 -0400 |
commit | efad77c95d2ca5a22a626a8d732e9e206919c0fb (patch) | |
tree | 96717dd35d86c11f9a2cb67e56b0f5118acd0b84 | |
parent | 8edaf2d79d344fa9c1acb35f317dedd5005871a7 (diff) | |
parent | ee6ee36a8ff1701c4e61e6f118446b145220478c (diff) | |
download | illumos-joyent-efad77c95d2ca5a22a626a8d732e9e206919c0fb.tar.gz |
[illumos-gate merge]
commit ee6ee36a8ff1701c4e61e6f118446b145220478c
14838 Rename erratum 147 handling
14839 Untangle erratum 147 from lockstat
14840 Modernize lockstat probes
14865 mutex_tryenter:adaptive-acquire probe never fires
commit 64439ec0071c576648f76b4466ad6ee7a580ed33
14579 expose virtio 9P transport device
commit ec8422d0a51b3bf0b6550dd15f125990a3f73f4c
7346 beadm list shows duplicates when zone has datasets from multiple pools
commit 46dc144bc2859392d4c62f3e72d661e7b3c22a8e
14900 ddi_fm_capable(9F) man page does not match source
Conflicts:
manifest
usr/src/man/man9f/ddi_fm_init.9f
usr/src/uts/intel/os/driver_aliases
usr/src/uts/intel/os/name_to_major
36 files changed, 1866 insertions, 400 deletions
@@ -809,6 +809,7 @@ f kernel/drv/amd64/usbskel 0755 root sys f kernel/drv/amd64/usbsksp 0755 root sys f kernel/drv/amd64/usbsprl 0755 root sys f kernel/drv/amd64/vgatext 0755 root sys +f kernel/drv/amd64/vio9p 0755 root sys f kernel/drv/amd64/vioblk 0755 root sys f kernel/drv/amd64/vioif 0755 root sys f kernel/drv/amd64/vioscsi 0755 root sys @@ -4899,6 +4900,7 @@ f usr/include/sys/vfs_opreg.h 0644 root bin f usr/include/sys/vfstab.h 0644 root bin f usr/include/sys/vgareg.h 0644 root bin f usr/include/sys/videodev2.h 0644 root bin +f usr/include/sys/vio9p.h 0644 root bin f usr/include/sys/visual_io.h 0644 root bin f usr/include/sys/vlan.h 0644 root bin f usr/include/sys/vm.h 0644 root bin @@ -18988,6 +18990,7 @@ f usr/share/man/man4d/usbftdi.4d 0444 root bin f usr/share/man/man4d/usbsacm.4d 0444 root bin f usr/share/man/man4d/usbsksp.4d 0444 root bin f usr/share/man/man4d/usbsprl.4d 0444 root bin +f usr/share/man/man4d/vio9p.4d 0444 root bin f usr/share/man/man4d/vioblk.4d 0444 root bin f usr/share/man/man4d/vioif.4d 0444 root bin f usr/share/man/man4d/vioscsi.4d 0444 root bin diff --git a/usr/src/cmd/devfsadm/misc_link.c b/usr/src/cmd/devfsadm/misc_link.c index 936560912f..e8b56c7fce 100644 --- a/usr/src/cmd/devfsadm/misc_link.c +++ b/usr/src/cmd/devfsadm/misc_link.c @@ -23,6 +23,7 @@ * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright 2019 Joyent, Inc. * Copyright 2022 Garrett D'Amore <garrett@damore.org> + * Copyright 2022 Oxide Computer Company */ #include <regex.h> @@ -60,6 +61,7 @@ static int cpuid(di_minor_t minor, di_node_t node); static int glvc(di_minor_t minor, di_node_t node); static int ses_callback(di_minor_t minor, di_node_t node); static int kmdrv_create(di_minor_t minor, di_node_t node); +static int vio9p_create(di_minor_t minor, di_node_t node); static devfsadm_create_t misc_cbt[] = { { "pseudo", "ddi_pseudo", "(^sad$)", @@ -215,7 +217,10 @@ static devfsadm_create_t misc_cbt[] = { }, { "pseudo", "ddi_pseudo", "overlay", TYPE_EXACT | DRV_EXACT, ILEVEL_0, minor_name - } + }, + { "9p", "ddi_pseudo", "vio9p", + TYPE_EXACT | DRV_EXACT, ILEVEL_0, vio9p_create, + }, }; DEVFSADM_CREATE_INIT_V0(misc_cbt); @@ -257,7 +262,10 @@ static devfsadm_remove_t misc_remove_cbt[] = { }, { "pseudo", "^sctp|sctp6$", RM_PRE | RM_ALWAYS, ILEVEL_0, devfsadm_rm_link - } + }, + { "9p", "^9p/[0-9]+$", + RM_PRE | RM_HOT | RM_ALWAYS, ILEVEL_0, devfsadm_rm_all + }, }; /* Rules for gpio devices */ @@ -643,6 +651,26 @@ av_create(di_minor_t minor, di_node_t node) } /* + * Create device nodes for Virtio 9P channels: + * /dev/9p/[0-9]+ + */ +static int +vio9p_create(di_minor_t minor, di_node_t node) +{ + char *minor_name = di_minor_name(minor); + char path[PATH_MAX + 1]; + + if (minor_name == NULL || strcmp(minor_name, "9p") != 0) { + return (DEVFSADM_CONTINUE); + } + + (void) snprintf(path, sizeof (path), "9p/%d", di_instance(node)); + (void) devfsadm_mklink(path, node, minor, 0); + + return (DEVFSADM_CONTINUE); +} + +/* * Creates /dev/lom and /dev/tsalarm:ctl for tsalarm node */ static int diff --git a/usr/src/lib/libbe/common/be_activate.c b/usr/src/lib/libbe/common/be_activate.c index 4ec8f055e2..581ddc41c2 100644 --- a/usr/src/lib/libbe/common/be_activate.c +++ b/usr/src/lib/libbe/common/be_activate.c @@ -26,7 +26,7 @@ /* * Copyright 2015 Nexenta Systems, Inc. All rights reserved. * Copyright 2016 Toomas Soome <tsoome@me.com> - * Copyright 2020 OmniOS Community Edition (OmniOSce) Association. + * Copyright 2022 OmniOS Community Edition (OmniOSce) Association. */ #include <assert.h> @@ -266,7 +266,12 @@ _be_activate(char *be_name, be_nextboot_state_t nextboot) return (ret); } - be_make_root_ds(cb.obe_zpool, cb.obe_name, root_ds, sizeof (root_ds)); + if ((ret = be_make_root_ds(cb.obe_zpool, cb.obe_name, root_ds, + sizeof (root_ds))) != BE_SUCCESS) { + be_print_err(gettext("%s: failed to get BE container dataset " + "for %s/%s\n"), __func__, cb.obe_zpool, cb.obe_name); + return (ret); + } cb.obe_root_ds = strdup(root_ds); if (getzoneid() == GLOBAL_ZONEID) { @@ -573,8 +578,13 @@ set_canmount(be_node_list_t *be_nodes, char *value) while (list != NULL) { be_dataset_list_t *datasets = list->be_node_datasets; - be_make_root_ds(list->be_rpool, list->be_node_name, ds_path, - sizeof (ds_path)); + if ((err = be_make_root_ds(list->be_rpool, list->be_node_name, + ds_path, sizeof (ds_path))) != BE_SUCCESS) { + be_print_err(gettext("%s: failed to get BE container " + "dataset for %s/%s\n"), __func__, + list->be_rpool, list->be_node_name); + return (err); + } if ((zhp = zfs_open(g_zfs, ds_path, ZFS_TYPE_DATASET)) == NULL) { @@ -605,9 +615,14 @@ set_canmount(be_node_list_t *be_nodes, char *value) ZFS_CLOSE(zhp); while (datasets != NULL) { - be_make_root_ds(list->be_rpool, + if ((err = be_make_root_ds(list->be_rpool, datasets->be_dataset_name, ds_path, - sizeof (ds_path)); + sizeof (ds_path))) != BE_SUCCESS) { + be_print_err(gettext("%s: failed to get BE " + "container dataset for %s/%s\n"), __func__, + list->be_rpool, datasets->be_dataset_name); + return (err); + } if ((zhp = zfs_open(g_zfs, ds_path, ZFS_TYPE_DATASET)) == NULL) { diff --git a/usr/src/lib/libbe/common/be_create.c b/usr/src/lib/libbe/common/be_create.c index 4158ddb677..62f5bfe112 100644 --- a/usr/src/lib/libbe/common/be_create.c +++ b/usr/src/lib/libbe/common/be_create.c @@ -24,7 +24,7 @@ * Copyright 2013 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2014, 2015 by Delphix. All rights reserved. * Copyright (c) 2016 Martin Matuska. All rights reserved. - * Copyright 2021 OmniOS Community Edition (OmniOSce) Association. + * Copyright 2022 OmniOS Community Edition (OmniOSce) Association. */ /* @@ -206,8 +206,12 @@ be_init(nvlist_t *be_attrs) } /* Generate string for BE's root dataset */ - be_make_root_ds(bt.nbe_zpool, bt.nbe_name, nbe_root_ds, - sizeof (nbe_root_ds)); + if ((ret = be_make_root_ds(bt.nbe_zpool, bt.nbe_name, nbe_root_ds, + sizeof (nbe_root_ds))) != BE_SUCCESS) { + be_print_err(gettext("%s: failed to get BE container dataset " + "for %s/%s\n"), __func__, bt.nbe_zpool, bt.nbe_name); + return (ret); + } /* * Create property list for new BE root dataset. If some @@ -446,8 +450,12 @@ be_destroy(nvlist_t *be_attrs) } /* Generate string for obe_name's root dataset */ - be_make_root_ds(bt.obe_zpool, bt.obe_name, obe_root_ds, - sizeof (obe_root_ds)); + if ((ret = be_make_root_ds(bt.obe_zpool, bt.obe_name, obe_root_ds, + sizeof (obe_root_ds))) != BE_SUCCESS) { + be_print_err(gettext("%s: failed to get BE container dataset " + "for %s/%s\n"), __func__, bt.obe_zpool, bt.obe_name); + return (ret); + } bt.obe_root_ds = obe_root_ds; if (getzoneid() != GLOBAL_ZONEID) { @@ -780,10 +788,28 @@ be_copy(nvlist_t *be_attrs) } /* + * If an auto named BE is desired, it must be in the same + * pool as the original BE. + */ + if (bt.nbe_name == NULL && bt.nbe_zpool != NULL) { + be_print_err(gettext("be_copy: cannot specify pool " + "name when creating an auto named BE\n")); + ret = BE_ERR_INVAL; + goto done; + } + + /* + * If the zpool name to create new BE in is not provided, + * create the new BE in the original BE's pool. + */ + if (bt.nbe_zpool == NULL) + bt.nbe_zpool = bt.obe_zpool; + + /* * If new BE name provided, validate the BE name and then verify * that new BE name doesn't already exist in some pool. */ - if (bt.nbe_name) { + if (bt.nbe_name != NULL) { /* Validate original BE name */ if (!be_valid_be_name(bt.nbe_name)) { be_print_err(gettext("be_copy: " @@ -808,8 +834,13 @@ be_copy(nvlist_t *be_attrs) goto done; } } else { - be_make_root_ds(bt.nbe_zpool, bt.nbe_name, nbe_root_ds, - sizeof (nbe_root_ds)); + if ((ret = be_make_root_ds(bt.nbe_zpool, bt.nbe_name, + nbe_root_ds, sizeof (nbe_root_ds))) != BE_SUCCESS) { + be_print_err(gettext("%s: failed to get BE " + "container dataset for %s/%s\n"), __func__, + bt.nbe_zpool, bt.nbe_name); + goto done; + } if (zfs_dataset_exists(g_zfs, nbe_root_ds, ZFS_TYPE_FILESYSTEM)) { be_print_err(gettext("be_copy: BE (%s) already " @@ -820,17 +851,6 @@ be_copy(nvlist_t *be_attrs) } } else { /* - * If an auto named BE is desired, it must be in the same - * pool is the original BE. - */ - if (bt.nbe_zpool != NULL) { - be_print_err(gettext("be_copy: cannot specify pool " - "name when creating an auto named BE\n")); - ret = BE_ERR_INVAL; - goto done; - } - - /* * Generate auto named BE */ if ((bt.nbe_name = be_auto_be_name(bt.obe_name)) @@ -844,19 +864,19 @@ be_copy(nvlist_t *be_attrs) autoname = B_TRUE; } - /* - * If zpool name to create new BE in is not provided, - * create new BE in original BE's pool. - */ - if (bt.nbe_zpool == NULL) { - bt.nbe_zpool = bt.obe_zpool; - } - /* Get root dataset names for obe_name and nbe_name */ - be_make_root_ds(bt.obe_zpool, bt.obe_name, obe_root_ds, - sizeof (obe_root_ds)); - be_make_root_ds(bt.nbe_zpool, bt.nbe_name, nbe_root_ds, - sizeof (nbe_root_ds)); + if ((ret = be_make_root_ds(bt.obe_zpool, bt.obe_name, obe_root_ds, + sizeof (obe_root_ds))) != BE_SUCCESS) { + be_print_err(gettext("%s: failed to get BE container dataset " + "for %s/%s\n"), __func__, bt.obe_zpool, bt.obe_name); + goto done; + } + if ((ret = be_make_root_ds(bt.nbe_zpool, bt.nbe_name, nbe_root_ds, + sizeof (nbe_root_ds))) != BE_SUCCESS) { + be_print_err(gettext("%s: failed to get BE container dataset " + "for %s/%s\n"), __func__, bt.nbe_zpool, bt.nbe_name); + goto done; + } bt.obe_root_ds = obe_root_ds; bt.nbe_root_ds = nbe_root_ds; @@ -966,8 +986,15 @@ be_copy(nvlist_t *be_attrs) * Regenerate string for new BE's * root dataset name */ - be_make_root_ds(bt.nbe_zpool, bt.nbe_name, - nbe_root_ds, sizeof (nbe_root_ds)); + if ((ret = be_make_root_ds(bt.nbe_zpool, + bt.nbe_name, nbe_root_ds, + sizeof (nbe_root_ds))) != BE_SUCCESS) { + be_print_err(gettext( + "%s: failed to get BE container " + "dataset for %s/%s\n"), __func__, + bt.nbe_zpool, bt.nbe_name); + goto done; + } bt.nbe_root_ds = nbe_root_ds; /* @@ -1122,10 +1149,18 @@ be_copy(nvlist_t *be_attrs) * Update new BE's vfstab. */ - be_make_root_container_ds(bt.obe_zpool, obe_root_container, - sizeof (obe_root_container)); - be_make_root_container_ds(bt.nbe_zpool, nbe_root_container, - sizeof (nbe_root_container)); + if ((ret = be_make_root_container_ds(bt.obe_zpool, obe_root_container, + sizeof (obe_root_container))) != BE_SUCCESS) { + be_print_err(gettext("%s: failed to get BE container dataset " + "for %s\n"), __func__, bt.obe_zpool); + goto done; + } + if ((ret = be_make_root_container_ds(bt.nbe_zpool, nbe_root_container, + sizeof (nbe_root_container))) != BE_SUCCESS) { + be_print_err(gettext("%s: failed to get BE container dataset " + "for %s\n"), __func__, bt.nbe_zpool); + goto done; + } if ((ret = be_update_vfstab(bt.nbe_name, obe_root_container, nbe_root_container, &fld, new_mp)) != BE_SUCCESS) { @@ -1258,11 +1293,15 @@ be_find_zpool_callback(zpool_handle_t *zlp, void *data) be_transaction_data_t *bt = data; const char *zpool = zpool_get_name(zlp); char be_root_ds[MAXPATHLEN]; + int ret = 0; /* * Generate string for the BE's root dataset */ - be_make_root_ds(zpool, bt->obe_name, be_root_ds, sizeof (be_root_ds)); + if (be_make_root_ds(zpool, bt->obe_name, be_root_ds, + sizeof (be_root_ds)) != BE_SUCCESS) { + goto out; + } /* * Check if dataset exists @@ -1270,12 +1309,12 @@ be_find_zpool_callback(zpool_handle_t *zlp, void *data) if (zfs_dataset_exists(g_zfs, be_root_ds, ZFS_TYPE_FILESYSTEM)) { /* BE's root dataset exists in zpool */ bt->obe_zpool = strdup(zpool); - zpool_close(zlp); - return (1); + ret = 1; } +out: zpool_close(zlp); - return (0); + return (ret); } /* @@ -1297,23 +1336,27 @@ be_exists_callback(zpool_handle_t *zlp, void *data) const char *zpool = zpool_get_name(zlp); char *be_name = data; char be_root_ds[MAXPATHLEN]; + int ret = 0; /* * Generate string for the BE's root dataset */ - be_make_root_ds(zpool, be_name, be_root_ds, sizeof (be_root_ds)); + if (be_make_root_ds(zpool, be_name, be_root_ds, + sizeof (be_root_ds)) != BE_SUCCESS) { + goto out; + } /* * Check if dataset exists */ if (zfs_dataset_exists(g_zfs, be_root_ds, ZFS_TYPE_FILESYSTEM)) { /* BE's root dataset exists in zpool */ - zpool_close(zlp); - return (1); + ret = 1; } +out: zpool_close(zlp); - return (0); + return (ret); } /* @@ -1779,8 +1822,12 @@ be_destroy_zone_roots(char *zonepath_ds, be_destroy_data_t *dd) int ret = BE_SUCCESS; /* Generate string for the root container dataset for this zone. */ - be_make_container_ds(zonepath_ds, zone_container_ds, - sizeof (zone_container_ds)); + if ((ret = be_make_container_ds(zonepath_ds, zone_container_ds, + sizeof (zone_container_ds))) != BE_SUCCESS) { + be_print_err(gettext("%s: failed to get BE container dataset " + "for %s\n"), __func__, zonepath_ds); + return (ret); + } /* Get handle to this zone's root container dataset. */ if ((zhp = zfs_open(g_zfs, zone_container_ds, ZFS_TYPE_FILESYSTEM)) @@ -2049,8 +2096,12 @@ be_copy_zones(char *obe_name, char *obe_root_ds, char *nbe_root_ds) goto done; } - be_make_container_ds(zonepath_ds, zone_container_ds, - sizeof (zone_container_ds)); + if ((ret = be_make_container_ds(zonepath_ds, zone_container_ds, + sizeof (zone_container_ds))) != BE_SUCCESS) { + be_print_err(gettext("%s: failed to get BE container " + "dataset for %s\n"), __func__, zonepath_ds); + goto done; + } if ((z_zhp = zfs_open(g_zfs, zoneroot_ds, ZFS_TYPE_FILESYSTEM)) == NULL) { @@ -2901,8 +2952,12 @@ be_create_container_ds(char *zpool) char be_container_ds[MAXPATHLEN]; /* Generate string for BE container dataset for this pool */ - be_make_container_ds(zpool, be_container_ds, - sizeof (be_container_ds)); + if (be_make_container_ds(zpool, be_container_ds, + sizeof (be_container_ds)) != BE_SUCCESS) { + be_print_err(gettext("%s: failed to get BE container dataset " + "for %s\n"), __func__, zpool); + return (B_FALSE); + } if (!zfs_dataset_exists(g_zfs, be_container_ds, ZFS_TYPE_FILESYSTEM)) { diff --git a/usr/src/lib/libbe/common/be_list.c b/usr/src/lib/libbe/common/be_list.c index 3e0833ea83..85b62eff38 100644 --- a/usr/src/lib/libbe/common/be_list.c +++ b/usr/src/lib/libbe/common/be_list.c @@ -29,7 +29,7 @@ * Copyright 2015 Toomas Soome <tsoome@me.com> * Copyright 2015 Gary Mills * Copyright (c) 2016 Martin Matuska. All rights reserved. - * Copyright 2018 OmniOS Community Edition (OmniOSce) Association. + * Copyright 2022 OmniOS Community Edition (OmniOSce) Association. */ #include <assert.h> @@ -445,8 +445,12 @@ be_get_list_callback(zpool_handle_t *zlp, void *data) /* * Generate string for the BE container dataset */ - be_make_container_ds(rpool, be_container_ds, - sizeof (be_container_ds)); + if (be_make_container_ds(rpool, be_container_ds, + sizeof (be_container_ds)) != BE_SUCCESS) { + /* Move on to the next pool */ + zpool_close(zlp); + return (0); + } /* * If a BE name was specified we use it's root dataset in place of @@ -454,12 +458,17 @@ be_get_list_callback(zpool_handle_t *zlp, void *data) * the information for the specified BE. */ if (cb->be_name != NULL) { + int rv; + if (!be_valid_be_name(cb->be_name)) return (BE_ERR_INVAL); /* * Generate string for the BE root dataset */ - be_make_root_ds(rpool, cb->be_name, be_ds, sizeof (be_ds)); + if ((rv = be_make_root_ds(rpool, cb->be_name, be_ds, + sizeof (be_ds))) != BE_SUCCESS) { + return (rv); + } open_ds = be_ds; } else { open_ds = be_container_ds; diff --git a/usr/src/lib/libbe/common/be_mount.c b/usr/src/lib/libbe/common/be_mount.c index 98c861fca4..672424d1f2 100644 --- a/usr/src/lib/libbe/common/be_mount.c +++ b/usr/src/lib/libbe/common/be_mount.c @@ -24,6 +24,7 @@ * Copyright 2013 Nexenta Systems, Inc. All rights reserved. * Copyright 2015 EveryCity Ltd. * Copyright (c) 2015 by Delphix. All rights reserved. + * Copyright 2022 OmniOS Community Edition (OmniOSce) Association. */ /* @@ -277,8 +278,12 @@ _be_mount(char *be_name, char **altroot, int flags) } /* Generate string for obe_name's root dataset */ - be_make_root_ds(bt.obe_zpool, bt.obe_name, obe_root_ds, - sizeof (obe_root_ds)); + if ((ret = be_make_root_ds(bt.obe_zpool, bt.obe_name, obe_root_ds, + sizeof (obe_root_ds))) != BE_SUCCESS) { + be_print_err(gettext("%s: failed to get BE container dataset " + "for %s/%s\n"), __func__, bt.obe_zpool, bt.obe_name); + return (ret); + } bt.obe_root_ds = obe_root_ds; /* Get handle to BE's root dataset */ @@ -448,8 +453,12 @@ _be_unmount(char *be_name, int flags) } /* Generate string for obe_name's root dataset */ - be_make_root_ds(bt.obe_zpool, bt.obe_name, obe_root_ds, - sizeof (obe_root_ds)); + if ((ret = be_make_root_ds(bt.obe_zpool, bt.obe_name, obe_root_ds, + sizeof (obe_root_ds))) != BE_SUCCESS) { + be_print_err(gettext("%s: failed to get BE container dataset " + "for %s/%s\n"), __func__, bt.obe_zpool, bt.obe_name); + return (ret); + } bt.obe_root_ds = obe_root_ds; /* Get handle to BE's root dataset */ @@ -1565,15 +1574,13 @@ iter_shared_fs_callback(zfs_handle_t *zhp, void *data) pool = strtok(tmp_name, "/"); if (pool) { - /* Get the name of this pool's container dataset */ - be_make_container_ds(pool, container_ds, - sizeof (container_ds)); - /* * If what we're processing is this pool's BE container * dataset, skip it. */ - if (strcmp(name, container_ds) == 0) { + if (be_make_container_ds(pool, container_ds, + sizeof (container_ds)) == BE_SUCCESS && + strcmp(name, container_ds) == 0) { ZFS_CLOSE(zhp); return (0); } diff --git a/usr/src/lib/libbe/common/be_rename.c b/usr/src/lib/libbe/common/be_rename.c index f0cd781b22..87dadefaff 100644 --- a/usr/src/lib/libbe/common/be_rename.c +++ b/usr/src/lib/libbe/common/be_rename.c @@ -22,7 +22,7 @@ /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012 by Delphix. All rights reserved. - * Copyright 2021 OmniOS Community Edition (OmniOSce) Association. + * Copyright 2022 OmniOS Community Edition (OmniOSce) Association. */ #include <assert.h> @@ -150,9 +150,19 @@ be_rename(nvlist_t *be_attrs) /* New BE will reside in the same zpool as orig BE */ bt.nbe_zpool = bt.obe_zpool; - be_make_root_ds(bt.obe_zpool, bt.obe_name, root_ds, sizeof (root_ds)); + if ((ret = be_make_root_ds(bt.obe_zpool, bt.obe_name, root_ds, + sizeof (root_ds))) != BE_SUCCESS) { + be_print_err(gettext("%s: failed to get BE container dataset " + "for %s/%s\n"), __func__, bt.obe_zpool, bt.obe_name); + goto done; + }; bt.obe_root_ds = strdup(root_ds); - be_make_root_ds(bt.nbe_zpool, bt.nbe_name, root_ds, sizeof (root_ds)); + if ((ret = be_make_root_ds(bt.nbe_zpool, bt.nbe_name, root_ds, + sizeof (root_ds))) != BE_SUCCESS) { + be_print_err(gettext("%s: failed to get BE container dataset " + "for %s/%s\n"), __func__, bt.nbe_zpool, bt.nbe_name); + goto done; + } bt.nbe_root_ds = strdup(root_ds); /* @@ -248,8 +258,12 @@ be_rename(nvlist_t *be_attrs) * Since the new and old BEs reside in the same pool (see above), * the same variable can be used for the container for both. */ - be_make_root_container_ds(bt.obe_zpool, be_root_container, - sizeof (be_root_container)); + if ((ret = be_make_root_container_ds(bt.obe_zpool, be_root_container, + sizeof (be_root_container))) != BE_SUCCESS) { + be_print_err(gettext("%s: failed to get BE container dataset " + "for %s\n"), __func__, bt.obe_zpool); + goto done; + } if ((ret = be_update_vfstab(bt.nbe_name, be_root_container, be_root_container, &fld, mp)) != BE_SUCCESS) { diff --git a/usr/src/lib/libbe/common/be_snapshot.c b/usr/src/lib/libbe/common/be_snapshot.c index a4e2c79e5a..92fd2ae96d 100644 --- a/usr/src/lib/libbe/common/be_snapshot.c +++ b/usr/src/lib/libbe/common/be_snapshot.c @@ -25,6 +25,7 @@ /* * Copyright 2013 Nexenta Systems, Inc. All rights reserved. + * Copyright 2022 OmniOS Community Edition (OmniOSce) Association. */ /* @@ -88,7 +89,7 @@ be_create_snapshot(nvlist_t *be_attrs) char *snap_name = NULL; char *policy = NULL; boolean_t autoname = B_FALSE; - int ret = BE_SUCCESS; + int ret = BE_SUCCESS; /* Initialize libzfs handle */ if (!be_zfs_init()) @@ -184,7 +185,7 @@ be_destroy_snapshot(nvlist_t *be_attrs) { char *be_name = NULL; char *snap_name = NULL; - int ret = BE_SUCCESS; + int ret = BE_SUCCESS; /* Initialize libzfs handle */ if (!be_zfs_init()) @@ -319,8 +320,12 @@ be_rollback(nvlist_t *be_attrs) } /* Generate string for BE's root dataset */ - be_make_root_ds(bt.obe_zpool, bt.obe_name, obe_root_ds, - sizeof (obe_root_ds)); + if ((ret = be_make_root_ds(bt.obe_zpool, bt.obe_name, obe_root_ds, + sizeof (obe_root_ds))) != BE_SUCCESS) { + be_print_err(gettext("%s: failed to get BE container dataset " + "for %s/%s\n"), __func__, bt.obe_zpool, bt.obe_name); + return (ret); + } bt.obe_root_ds = obe_root_ds; if (getzoneid() != GLOBAL_ZONEID) { @@ -434,8 +439,12 @@ _be_create_snapshot(char *be_name, char **snap_name, char *policy) return (zfs_err_to_be_err(g_zfs)); } - be_make_root_ds(bt.obe_zpool, bt.obe_name, root_ds, - sizeof (root_ds)); + if ((ret = be_make_root_ds(bt.obe_zpool, bt.obe_name, root_ds, + sizeof (root_ds))) != BE_SUCCESS) { + be_print_err(gettext("%s: failed to get BE container dataset " + "for %s/%s\n"), __func__, bt.obe_zpool, bt.obe_name); + return (ret); + } bt.obe_root_ds = root_ds; if (getzoneid() != GLOBAL_ZONEID) { @@ -662,8 +671,12 @@ _be_destroy_snapshot(char *be_name, char *snap_name) return (zfs_err_to_be_err(g_zfs)); } - be_make_root_ds(bt.obe_zpool, bt.obe_name, root_ds, - sizeof (root_ds)); + if ((ret = be_make_root_ds(bt.obe_zpool, bt.obe_name, root_ds, + sizeof (root_ds))) != BE_SUCCESS) { + be_print_err(gettext("%s: failed to get BE container dataset " + "for %s/%s\n"), __func__, bt.obe_zpool, bt.obe_name); + return (ret); + } bt.obe_root_ds = root_ds; zhp = zfs_open(g_zfs, bt.obe_root_ds, ZFS_TYPE_DATASET); diff --git a/usr/src/lib/libbe/common/be_utils.c b/usr/src/lib/libbe/common/be_utils.c index 17d43ee195..8b95f7bc91 100644 --- a/usr/src/lib/libbe/common/be_utils.c +++ b/usr/src/lib/libbe/common/be_utils.c @@ -24,7 +24,7 @@ * Copyright 2013 Nexenta Systems, Inc. All rights reserved. * Copyright 2016 Toomas Soome <tsoome@me.com> * Copyright (c) 2015 by Delphix. All rights reserved. - * Copyright 2019 OmniOS Community Edition (OmniOSce) Association. + * Copyright 2022 OmniOS Community Edition (OmniOSce) Association. * Copyright (c) 2018, Joyent, Inc. */ @@ -439,17 +439,19 @@ be_get_defaults(struct be_defaults *defaults) * be_root_ds - pointer to buffer to return BE root dataset in. * be_root_ds_size - size of be_root_ds * Returns: - * None + * BE_SUCCESS - Success + * be_errno_t - Failure * Scope: * Semi-private (library wide use only) */ -void +int be_make_root_ds(const char *zpool, const char *be_name, char *be_root_ds, int be_root_ds_size) { struct be_defaults be_defaults; be_get_defaults(&be_defaults); - char *root_ds = NULL; + + assert(zpool != NULL); if (getzoneid() == GLOBAL_ZONEID) { if (be_defaults.be_deflt_rpool_container) { @@ -461,18 +463,30 @@ be_make_root_ds(const char *zpool, const char *be_name, char *be_root_ds, } } else { /* - * In non-global zone we can use path from mounted root dataset - * to generate BE's root dataset string. + * In a non-global zone we can use the path from the mounted + * root dataset to generate the BE's root dataset string. */ - if ((root_ds = be_get_ds_from_dir("/")) != NULL) { - (void) snprintf(be_root_ds, be_root_ds_size, "%s/%s", - dirname(root_ds), be_name); - } else { + char *root_ds = be_get_ds_from_dir("/"); + + if (root_ds == NULL) { be_print_err(gettext("be_make_root_ds: zone root " "dataset is not mounted\n")); - return; + return (BE_ERR_NOTMOUNTED); } + if (strncmp(root_ds, zpool, strlen(zpool)) != 0 || + root_ds[strlen(zpool)] != '/') { + /* + * This pool is not the one that contains the zone + * root. + */ + return (BE_ERR_ACCESS); + } + + (void) snprintf(be_root_ds, be_root_ds_size, "%s/%s", + dirname(root_ds), be_name); } + + return (BE_SUCCESS); } /* @@ -484,17 +498,17 @@ be_make_root_ds(const char *zpool, const char *be_name, char *be_root_ds, * dataset in. * container_ds_size - size of container_ds * Returns: - * None + * BE_SUCCESS - Success + * be_errno_t - Failure * Scope: * Semi-private (library wide use only) */ -void -be_make_container_ds(const char *zpool, char *container_ds, +int +be_make_container_ds(const char *zpool, char *container_ds, int container_ds_size) { struct be_defaults be_defaults; be_get_defaults(&be_defaults); - char *root_ds = NULL; if (getzoneid() == GLOBAL_ZONEID) { if (be_defaults.be_deflt_rpool_container) { @@ -505,15 +519,26 @@ be_make_container_ds(const char *zpool, char *container_ds, "%s/%s", zpool, BE_CONTAINER_DS_NAME); } } else { - if ((root_ds = be_get_ds_from_dir("/")) != NULL) { - (void) strlcpy(container_ds, dirname(root_ds), - container_ds_size); - } else { + char *root_ds = be_get_ds_from_dir("/"); + + if (root_ds == NULL) { be_print_err(gettext("be_make_container_ds: zone root " "dataset is not mounted\n")); - return; + return (BE_ERR_NOTMOUNTED); + } + if (strncmp(root_ds, zpool, strlen(zpool)) != 0 || + root_ds[strlen(zpool)] != '/') { + /* + * This pool is not the one that contains the zone + * root. + */ + return (BE_ERR_ACCESS); } + (void) strlcpy(container_ds, dirname(root_ds), + container_ds_size); } + + return (BE_SUCCESS); } /* @@ -525,17 +550,22 @@ be_make_container_ds(const char *zpool, char *container_ds, * container_ds - pointer to buffer in which to return result * container_ds_size - size of container_ds * Returns: - * None + * BE_SUCCESS - Success + * be_errno_t - Failure * Scope: * Semi-private (library wide use only) */ -void +int be_make_root_container_ds(const char *zpool, char *container_ds, int container_ds_size) { char *root; + int ret; - be_make_container_ds(zpool, container_ds, container_ds_size); + if ((ret = be_make_container_ds(zpool, container_ds, + container_ds_size)) != BE_SUCCESS) { + return (ret); + } /* If the container DS ends with /ROOT, remove it. */ @@ -543,6 +573,8 @@ be_make_root_container_ds(const char *zpool, char *container_ds, strcmp(root + 1, BE_CONTAINER_DS_NAME) == 0) { *root = '\0'; } + + return (BE_SUCCESS); } /* @@ -714,7 +746,12 @@ be_append_menu(char *be_name, char *be_root_pool, char *boot_pool, "%s%s", pool_mntpnt, BE_SPARC_MENU); } - be_make_root_ds(be_root_pool, be_name, be_root_ds, sizeof (be_root_ds)); + if ((ret = be_make_root_ds(be_root_pool, be_name, be_root_ds, + sizeof (be_root_ds))) != BE_SUCCESS) { + be_print_err(gettext("%s: failed to get BE container dataset " + "for %s/%s\n"), __func__, be_root_pool, be_name); + goto cleanup; + } /* * Iterate through menu first to make sure the BE doesn't already @@ -969,7 +1006,12 @@ be_remove_menu(char *be_name, char *be_root_pool, char *boot_pool) boot_pool = be_root_pool; /* Get name of BE's root dataset */ - be_make_root_ds(be_root_pool, be_name, be_root_ds, sizeof (be_root_ds)); + if ((ret = be_make_root_ds(be_root_pool, be_name, be_root_ds, + sizeof (be_root_ds))) != BE_SUCCESS) { + be_print_err(gettext("%s: failed to get BE container dataset " + "for %s/%s\n"), __func__, be_root_pool, be_name); + return (ret); + } /* Get handle to pool dataset */ if ((zhp = zfs_open(g_zfs, be_root_pool, ZFS_TYPE_DATASET)) == NULL) { @@ -1579,7 +1621,12 @@ be_change_grub_default(char *be_name, char *be_root_pool) } /* Generate string for BE's root dataset */ - be_make_root_ds(be_root_pool, be_name, be_root_ds, sizeof (be_root_ds)); + if ((ret = be_make_root_ds(be_root_pool, be_name, be_root_ds, + sizeof (be_root_ds))) != BE_SUCCESS) { + be_print_err(gettext("%s: failed to get BE container dataset " + "for %s/%s\n"), __func__, be_root_pool, be_name); + return (ret); + } /* Get handle to pool dataset */ if ((zhp = zfs_open(g_zfs, be_root_pool, ZFS_TYPE_DATASET)) == NULL) { @@ -1848,10 +1895,18 @@ be_update_menu(char *be_orig_name, char *be_new_name, char *be_root_pool, "%s%s", pool_mntpnt, BE_SPARC_MENU); } - be_make_root_ds(be_root_pool, be_orig_name, be_root_ds, - sizeof (be_root_ds)); - be_make_root_ds(be_root_pool, be_new_name, be_new_root_ds, - sizeof (be_new_root_ds)); + if ((ret = be_make_root_ds(be_root_pool, be_orig_name, be_root_ds, + sizeof (be_root_ds))) != BE_SUCCESS) { + be_print_err(gettext("%s: failed to get BE container dataset " + "for %s/%s\n"), __func__, be_root_pool, be_orig_name); + goto cleanup; + } + if ((ret = be_make_root_ds(be_root_pool, be_new_name, be_new_root_ds, + sizeof (be_new_root_ds))) != BE_SUCCESS) { + be_print_err(gettext("%s: failed to get BE container dataset " + "for %s/%s\n"), __func__, be_root_pool, be_new_name); + goto cleanup; + } if ((ret = be_open_menu(be_root_pool, menu_file, &menu_fp, "r", B_TRUE)) != BE_SUCCESS) { @@ -2714,24 +2769,14 @@ be_zpool_find_current_be_callback(zpool_handle_t *zlp, void *data) zfs_handle_t *zhp = NULL; const char *zpool = zpool_get_name(zlp); char be_container_ds[MAXPATHLEN]; - char *zpath = NULL; /* * Generate string for BE container dataset */ - if (getzoneid() != GLOBAL_ZONEID) { - if ((zpath = be_get_ds_from_dir("/")) != NULL) { - (void) strlcpy(be_container_ds, dirname(zpath), - sizeof (be_container_ds)); - } else { - be_print_err(gettext( - "be_zpool_find_current_be_callback: " - "zone root dataset is not mounted\n")); - return (0); - } - } else { - be_make_container_ds(zpool, be_container_ds, - sizeof (be_container_ds)); + if (be_make_container_ds(zpool, be_container_ds, + sizeof (be_container_ds)) != BE_SUCCESS) { + zpool_close(zlp); + return (0); } /* @@ -2865,7 +2910,10 @@ be_check_be_roots_callback(zpool_handle_t *zlp, void *data) char be_container_ds[MAXPATHLEN]; /* Generate string for this pool's BE root container dataset */ - be_make_container_ds(zpool, be_container_ds, sizeof (be_container_ds)); + if (be_make_container_ds(zpool, be_container_ds, + sizeof (be_container_ds)) != BE_SUCCESS) { + return (0); + } /* * If dataset lives under the BE root container dataset @@ -3420,6 +3468,7 @@ update_dataset(char *dataset, int dataset_len, char *be_name, { char *ds = NULL; char *sub_ds = NULL; + int ret; /* Tear off the BE container dataset */ if ((ds = be_make_name_from_ds(dataset, old_rc_loc)) == NULL) { @@ -3430,7 +3479,10 @@ update_dataset(char *dataset, int dataset_len, char *be_name, sub_ds = strchr(ds, '/'); /* Generate the BE root dataset name */ - be_make_root_ds(new_rc_loc, be_name, dataset, dataset_len); + if ((ret = be_make_root_ds(new_rc_loc, be_name, dataset, + dataset_len)) != BE_SUCCESS) { + return (ret); + } /* If a subordinate dataset name was found, append it */ if (sub_ds != NULL) diff --git a/usr/src/lib/libbe/common/be_zones.c b/usr/src/lib/libbe/common/be_zones.c index e257b62125..665db1b1d0 100644 --- a/usr/src/lib/libbe/common/be_zones.c +++ b/usr/src/lib/libbe/common/be_zones.c @@ -25,6 +25,7 @@ /* * Copyright 2013 Nexenta Systems, Inc. All rights reserved. + * Copyright 2022 OmniOS Community Edition (OmniOSce) Association. */ /* @@ -135,9 +136,13 @@ be_find_active_zone_root(zfs_handle_t *be_zhp, char *zonepath_ds, } } - /* Generate string for the root container dataset for this zone. */ - be_make_container_ds(zonepath_ds, zone_container_ds, - sizeof (zone_container_ds)); + /* Generate string for the root container dataset for this zone. */ + if ((ret = be_make_container_ds(zonepath_ds, zone_container_ds, + sizeof (zone_container_ds))) != BE_SUCCESS) { + be_print_err(gettext("%s: failed to get BE container dataset " + "for %s\n"), __func__, zonepath_ds); + return (ret); + } /* Get handle to this zone's root container dataset */ if ((zhp = zfs_open(g_zfs, zone_container_ds, ZFS_TYPE_FILESYSTEM)) @@ -205,8 +210,12 @@ be_find_mounted_zone_root(char *zone_altroot, char *zonepath_ds, int zret = 0; /* Generate string for the root container dataset for this zone. */ - be_make_container_ds(zonepath_ds, zone_container_ds, - sizeof (zone_container_ds)); + if ((ret = be_make_container_ds(zonepath_ds, zone_container_ds, + sizeof (zone_container_ds))) != BE_SUCCESS) { + be_print_err(gettext("%s: failed to get BE container dataset " + "for %s\n"), __func__, zonepath_ds); + return (ret); + } /* Get handle to this zone's root container dataset. */ if ((zhp = zfs_open(g_zfs, zone_container_ds, ZFS_TYPE_FILESYSTEM)) @@ -293,8 +302,12 @@ be_zone_supported(char *zonepath_ds) * Make sure the zonepath has a zone root container dataset * underneath it. */ - be_make_container_ds(zonepath_ds, zone_container_ds, - sizeof (zone_container_ds)); + if ((ret = be_make_container_ds(zonepath_ds, zone_container_ds, + sizeof (zone_container_ds))) != BE_SUCCESS) { + be_print_err(gettext("%s: failed to get BE container dataset " + "for %s\n"), __func__, zonepath_ds); + return (B_FALSE); + } if (!zfs_dataset_exists(g_zfs, zone_container_ds, ZFS_TYPE_FILESYSTEM)) { diff --git a/usr/src/lib/libbe/common/libbe_priv.h b/usr/src/lib/libbe/common/libbe_priv.h index ace201577f..dd73e33bb1 100644 --- a/usr/src/lib/libbe/common/libbe_priv.h +++ b/usr/src/lib/libbe/common/libbe_priv.h @@ -24,7 +24,7 @@ * Copyright 2013 Nexenta Systems, Inc. All rights reserved. * Copyright 2016 Toomas Soome <tsoome@me.com> * Copyright (c) 2015 by Delphix. All rights reserved. - * Copyright 2019 OmniOS Community Edition (OmniOSce) Association. + * Copyright 2022 OmniOS Community Edition (OmniOSce) Association. */ #ifndef _LIBBE_PRIV_H @@ -179,9 +179,9 @@ int _be_destroy_snapshot(char *, char *); /* be_utils.c */ boolean_t be_zfs_init(void); void be_zfs_fini(void); -void be_make_root_ds(const char *, const char *, char *, int); -void be_make_container_ds(const char *, char *, int); -void be_make_root_container_ds(const char *, char *, int); +int be_make_root_ds(const char *, const char *, char *, int); +int be_make_container_ds(const char *, char *, int); +int be_make_root_container_ds(const char *, char *, int); char *be_make_name_from_ds(const char *, char *); int be_append_menu(char *, char *, char *, char *, char *); int be_remove_menu(char *, char *, char *); diff --git a/usr/src/man/man4d/Makefile b/usr/src/man/man4d/Makefile index 8462b451d6..88a0b92076 100644 --- a/usr/src/man/man4d/Makefile +++ b/usr/src/man/man4d/Makefile @@ -16,8 +16,8 @@ # Copyright 2016 Hans Rosenfeld <rosenfeld@grumpf.hope-2000.org> # Copyright 2018 Nexenta Systems, Inc. # Copyright 2020 Peter Tribble -# Copyright 2021 Oxide Computer Company # Copyright 2022 RackTop Systems, Inc. +# Copyright 2022 Oxide Computer Company # include $(SRC)/Makefile.master @@ -241,6 +241,7 @@ i386_MANFILES= ahci.4d \ ural.4d \ urtw.4d \ usmn.4d \ + vio9p.4d \ vioblk.4d \ vioif.4d \ vioscsi.4d \ diff --git a/usr/src/man/man4d/vio9p.4d b/usr/src/man/man4d/vio9p.4d new file mode 100644 index 0000000000..80952da799 --- /dev/null +++ b/usr/src/man/man4d/vio9p.4d @@ -0,0 +1,141 @@ +.\" +.\" This file and its contents are supplied under the terms of the +.\" Common Development and Distribution License ("CDDL"), version 1.0. +.\" You may only use this file in accordance with the terms of version +.\" 1.0 of the CDDL. +.\" +.\" A full copy of the text of the CDDL should have accompanied this +.\" source. A copy of the CDDL is also available via the Internet at +.\" http://www.illumos.org/license/CDDL. +.\" +.\" +.\" Copyright 2022 Oxide Computer Company +.\" +.Dd August 1, 2022 +.Dt VIO9P 4D +.Os +.Sh NAME +.Nm vio9p +.Nd Virtio 9P Transport Driver +.Sh SYNOPSIS +.Pa /dev/9p/* +.Sh DESCRIPTION +The +.Nm +driver provides access to 9P transport devices commonly used by hypervisors +and emulators to expose a shared file system. +.Pp +The +.Nm +driver is not a +.Sy Committed +interface, and may change at any time. +.Sh APPLICATION PROGRAMMING INTERFACE +Each device corresponds to a specific 9P channel, providing exclusive access to +one consumer at a time. +The device may be opened with an +.Xr open 2 +call, which must include at least the +.Dv O_EXCL +and +.Dv O_RDWR +flags. +The +.Dv O_NONBLOCK +or +.Dv O_NDELAY +flags may be used if non-blocking reads and writes are required. +.Pp +Once open, +.Xr read 2 +and +.Xr write 2 +calls may be made against the resulting file descriptor. +Writes represent a 9P request message sent to the hypervisor, and reads +represent responses to those requests. +.Pp +Unlike with other transports like TCP, the channel is not explicitly reset when +the device is opened or closed. +After a call to +.Xr open 2 , +the application should use a +.Sy version +message to open a new session. +This will explicitly discard any previous session, clunking any active fids in +the process and negotiating an appropriate protocol version with the +hypervisor. +It is likely also appropriate to do this as part of closing the device, to +allow the hypervisor to free any session tracking resources. +.Pp +Writes must be well-formed 9P messages, conforming to whichever 9P protocol +specification is used by the hypervisor. +In particular, each message must include a minimum of seven bytes, representing +the message +.Em size[4] , +.Em type[1] , +and +.Em tag[2] . +In most or all available protocol specifications, these fields are unsigned +integers in little-endian order. +The driver limits request and response size to 8192 bytes, and will fail larger +writes with +.Er EMSGSIZE . +Applications should, in their initial +.Sy version +message, +negotiate an +.Em msize[4] +value less than or equal to 8192 bytes. +.Pp +Reads are interruptible and will block waiting for a response to a request sent +in a previous write. +If insufficient buffer space is provided to the read call to receive the +message, the call will fail with +.Er EOVERFLOW +and the message will remain available for a subsequent read. +Messages are provided as-is to the application, including the +.Em size[4] , +.Em type[1] , +and +.Em tag[2] . +.Pp +Depending on the 9P server provided by the hypervisor, requests that are issued +concurrently may result in responses that arrive out of order. +Applications should develop a strategy for allocating unique +.Em tag[2] +values, so that request and response messages can be correlated. +.Sh IOCTLS +The driver provides an ioctl, +.Dv VIO9P_IOC_MOUNT_TAG , +to expose the +.Em Mount Tag +string if one was provided by the hypervisor. +The ioctl is defined in +.In sys/vio9p.h . +The argument must be a +.Vt "char *" , +pointing to a buffer of +.Dv VIO9P_MOUNT_TAG_SIZE +bytes. +On success, the buffer will contain the mount tag string as read from the +hypervisor, followed by a null-terminating zero byte added by the driver to +ensure the result can always be treated as a C string. +While the hypervisor is expected to provide a human-readable C string, +applications should take care to verify that the contents are valid for display +or other purposes. +Note that even if successfully read, the string may be empty. +.Sh FILES +.Bl -tag -width Pa +.It Pa /dev/9p/* +Character device for access to a 9P channel. +.It Pa /kernel/drv/amd64/vio9p +Device driver (x86) +.El +.Sh INTERFACE STABILITY +.Sy Uncommitted +.Sh SEE ALSO +.Xr close 2 , +.Xr ioctl 2 , +.Xr open 2 , +.Xr read 2 , +.Xr write 2 diff --git a/usr/src/pkg/manifests/driver-storage-vio9p.p5m b/usr/src/pkg/manifests/driver-storage-vio9p.p5m new file mode 100644 index 0000000000..f4ce42c1a6 --- /dev/null +++ b/usr/src/pkg/manifests/driver-storage-vio9p.p5m @@ -0,0 +1,40 @@ +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright 2022 Oxide Computer Company +# + +# +# The default for payload-bearing actions in this package is to appear in the +# global zone only. See the include file for greater detail, as well as +# information about overriding the defaults. +# +<include global_zone_only_component> +set name=pkg.fmri value=pkg:/driver/storage/vio9p@$(PKGVERS) +set name=pkg.summary value="Virtio 9P transport driver" +set name=pkg.description value="Virtio 9P transport driver" +set name=info.classification value=org.opensolaris.category.2008:Drivers/Storage +set name=variant.arch value=$(ARCH) +dir path=kernel group=sys +dir path=kernel/drv group=sys +dir path=kernel/drv/$(ARCH64) group=sys +file path=kernel/drv/$(ARCH64)/vio9p group=sys +file path=usr/include/sys/vio9p.h +dir path=usr/share/man +dir path=usr/share/man/man4d +file path=usr/share/man/man4d/vio9p.4d +driver name=vio9p perms="* 0600 root root" alias=pci1af4,9 +license lic_CDDL license=lic_CDDL diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files index dc5503379f..26e20efb9c 100644 --- a/usr/src/uts/common/Makefile.files +++ b/usr/src/uts/common/Makefile.files @@ -2128,6 +2128,9 @@ VIOIF_OBJS = vioif.o # Virtio SCSI driver VIOSCSI_OBJS = vioscsi.o +# Virtio 9P transport driver +VIO9P_OBJS = vio9p.o + # # kiconv modules # diff --git a/usr/src/uts/common/Makefile.rules b/usr/src/uts/common/Makefile.rules index c8a6ce9d0c..8ef8c5eb9d 100644 --- a/usr/src/uts/common/Makefile.rules +++ b/usr/src/uts/common/Makefile.rules @@ -1557,6 +1557,10 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/common/io/vioscsi/%.c $(COMPILE.c) -o $@ $< $(CTFCONVERT_O) +$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/vio9p/%.c + $(COMPILE.c) -o $@ $< + $(CTFCONVERT_O) + $(OBJS_DIR)/%.o: $(COMMONBASE)/idspace/%.c $(COMPILE.c) -o $@ $< $(CTFCONVERT_O) diff --git a/usr/src/uts/common/dtrace/lockstat.c b/usr/src/uts/common/dtrace/lockstat.c index 69c8b72544..08f819d453 100644 --- a/usr/src/uts/common/dtrace/lockstat.c +++ b/usr/src/uts/common/dtrace/lockstat.c @@ -21,6 +21,7 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2022 Oxide Computer Company */ @@ -93,7 +94,7 @@ lockstat_enable(void *arg, dtrace_id_t id, void *parg) lockstat_probemap[probe->lsp_probe] = id; membar_producer(); - lockstat_hot_patch(); + lockstat_hotpatch_probe(probe->lsp_probe); membar_producer(); /* @@ -115,7 +116,7 @@ lockstat_disable(void *arg, dtrace_id_t id, void *parg) ASSERT(lockstat_probemap[probe->lsp_probe]); lockstat_probemap[probe->lsp_probe] = 0; - lockstat_hot_patch(); + lockstat_hotpatch_probe(probe->lsp_probe); membar_producer(); /* diff --git a/usr/src/uts/common/io/vio9p/vio9p.c b/usr/src/uts/common/io/vio9p/vio9p.c new file mode 100644 index 0000000000..5302043365 --- /dev/null +++ b/usr/src/uts/common/io/vio9p/vio9p.c @@ -0,0 +1,839 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2022 Oxide Computer Company + */ + +/* + * VIRTIO 9P DRIVER + * + * This driver provides support for Virtio 9P devices. Each driver instance + * attaches to a single underlying 9P channel. A 9P file system will use LDI + * to open this device. + */ + +#include <sys/modctl.h> +#include <sys/types.h> +#include <sys/file.h> +#include <sys/errno.h> +#include <sys/param.h> +#include <sys/stropts.h> +#include <sys/stream.h> +#include <sys/strsubr.h> +#include <sys/kmem.h> +#include <sys/ddi.h> +#include <sys/sunddi.h> +#include <sys/conf.h> +#include <sys/devops.h> +#include <sys/ksynch.h> +#include <sys/stat.h> +#include <sys/modctl.h> +#include <sys/debug.h> +#include <sys/pci.h> +#include <sys/containerof.h> +#include <sys/ctype.h> +#include <sys/stdbool.h> +#include <sys/sysmacros.h> +#include <sys/list.h> + +#include "virtio.h" +#include "vio9p_impl.h" + +static void *vio9p_state; + +uint_t vio9p_int_handler(caddr_t, caddr_t); +static uint_t vio9p_poll(vio9p_t *); +static int vio9p_quiesce(dev_info_t *); +static int vio9p_attach(dev_info_t *, ddi_attach_cmd_t); +static int vio9p_teardown(vio9p_t *, vio9p_teardown_style_t); +static int vio9p_detach(dev_info_t *, ddi_detach_cmd_t); +static int vio9p_open(dev_t *, int, int, cred_t *); +static int vio9p_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); +static int vio9p_close(dev_t, int, int, cred_t *); +static int vio9p_read(dev_t, uio_t *, cred_t *); +static int vio9p_write(dev_t, uio_t *, cred_t *); +static vio9p_req_t *vio9p_req_alloc_impl(vio9p_t *, int); +static void vio9p_req_free_impl(vio9p_t *, vio9p_req_t *); + +static struct cb_ops vio9p_cb_ops = { + .cb_rev = CB_REV, + .cb_flag = D_NEW | D_MP, + + .cb_open = vio9p_open, + .cb_close = vio9p_close, + .cb_read = vio9p_read, + .cb_write = vio9p_write, + .cb_ioctl = vio9p_ioctl, + + .cb_strategy = nodev, + .cb_print = nodev, + .cb_dump = nodev, + .cb_devmap = nodev, + .cb_mmap = nodev, + .cb_segmap = nodev, + .cb_chpoll = nochpoll, + .cb_prop_op = ddi_prop_op, + .cb_str = NULL, + .cb_aread = nodev, + .cb_awrite = nodev, +}; + +static struct dev_ops vio9p_dev_ops = { + .devo_rev = DEVO_REV, + .devo_refcnt = 0, + + .devo_attach = vio9p_attach, + .devo_detach = vio9p_detach, + .devo_quiesce = vio9p_quiesce, + + .devo_cb_ops = &vio9p_cb_ops, + + .devo_getinfo = ddi_no_info, + .devo_identify = nulldev, + .devo_probe = nulldev, + .devo_reset = nodev, + .devo_bus_ops = NULL, + .devo_power = NULL, +}; + +static struct modldrv vio9p_modldrv = { + .drv_modops = &mod_driverops, + .drv_linkinfo = "VIRTIO 9P driver", + .drv_dev_ops = &vio9p_dev_ops +}; + +static struct modlinkage vio9p_modlinkage = { + .ml_rev = MODREV_1, + .ml_linkage = { &vio9p_modldrv, NULL } +}; + +/* + * DMA attribute template for header and status blocks. + */ +static const ddi_dma_attr_t vio9p_dma_attr = { + .dma_attr_version = DMA_ATTR_V0, + .dma_attr_addr_lo = 0x0000000000000000, + .dma_attr_addr_hi = 0xFFFFFFFFFFFFFFFF, + .dma_attr_count_max = 0x00000000FFFFFFFF, + .dma_attr_align = 1, + .dma_attr_burstsizes = 1, + .dma_attr_minxfer = 1, + .dma_attr_maxxfer = 0x00000000FFFFFFFF, + .dma_attr_seg = 0x00000000FFFFFFFF, + .dma_attr_sgllen = VIRTIO_9P_MAX_SGL, + .dma_attr_granular = 1, + .dma_attr_flags = 0 +}; + +uint_t +vio9p_int_handler(caddr_t arg0, caddr_t arg1) +{ + vio9p_t *vin = (vio9p_t *)arg0; + + mutex_enter(&vin->vin_mutex); + uint_t count = vio9p_poll(vin); + mutex_exit(&vin->vin_mutex); + + return (count > 0 ? DDI_INTR_CLAIMED : DDI_INTR_UNCLAIMED); +} + +static void +vio9p_req_freelist_put(vio9p_t *vin, vio9p_req_t *vnr) +{ + VERIFY(!list_link_active(&vnr->vnr_link_complete)); + VERIFY(!list_link_active(&vnr->vnr_link_free)); + + vin->vin_generation = 0; + list_insert_head(&vin->vin_req_freelist, vnr); + + if (vin->vin_open) { + /* + * Wake any callers waiting in vio9p_req_alloc() for an entry: + */ + cv_broadcast(&vin->vin_cv); + } +} + +static void +vio9p_req_free(vio9p_t *vin, vio9p_req_t *vnr) +{ + VERIFY(MUTEX_HELD(&vin->vin_mutex)); + + if (list_link_active(&vnr->vnr_link_complete)) { + list_remove(&vin->vin_completes, vnr); + } + + vio9p_req_freelist_put(vin, vnr); +} + +static void +vio9p_req_free_impl(vio9p_t *vin, vio9p_req_t *vnr) +{ + if (vnr->vnr_chain != NULL) { + virtio_chain_free(vnr->vnr_chain); + vnr->vnr_chain = NULL; + } + if (vnr->vnr_dma_in != NULL) { + virtio_dma_free(vnr->vnr_dma_in); + vnr->vnr_dma_in = NULL; + } + if (vnr->vnr_dma_out != NULL) { + virtio_dma_free(vnr->vnr_dma_out); + vnr->vnr_dma_out = NULL; + } + + VERIFY(!list_link_active(&vnr->vnr_link_complete)); + VERIFY(!list_link_active(&vnr->vnr_link_free)); + + list_remove(&vin->vin_reqs, vnr); + VERIFY3U(vin->vin_nreqs, >, 0); + vin->vin_nreqs--; + + kmem_free(vnr, sizeof (*vnr)); +} + +/* + * Allocate a request for a transaction. If one is not available and this is + * for a blocking request, wait for one to become available. + */ +static vio9p_req_t * +vio9p_req_alloc(vio9p_t *vin, bool wait) +{ + vio9p_req_t *vnr; + + VERIFY(MUTEX_HELD(&vin->vin_mutex)); + +again: + /* + * Try the free list first: + */ + if ((vnr = list_remove_head(&vin->vin_req_freelist)) != NULL) { + return (vnr); + } + + /* + * Failing that, try to allocate more memory if we are under our + * request cap: + */ + if ((vnr = vio9p_req_alloc_impl(vin, KM_NOSLEEP_LAZY)) != NULL) { + return (vnr); + } + + /* + * If this is a blocking request, wait for an entry to become available + * on the free list: + */ + if (wait) { + if (cv_wait_sig(&vin->vin_cv, &vin->vin_mutex) == 0) { + return (NULL); + } + + goto again; + } + + return (NULL); +} + +static vio9p_req_t * +vio9p_req_alloc_impl(vio9p_t *vin, int kmflag) +{ + dev_info_t *dip = vin->vin_dip; + vio9p_req_t *vnr; + + if (vin->vin_nreqs >= VIRTIO_9P_MAX_REQS) { + /* + * We have reached the limit of requests that we are willing to + * allocate for the whole device. + */ + return (NULL); + } + + /* + * Note that the request object has various list link fields which are + * initialised to zero here and which we check at various points later. + */ + if ((vnr = kmem_zalloc(sizeof (*vnr), kmflag)) == NULL) { + return (NULL); + } + list_insert_tail(&vin->vin_reqs, vnr); + vin->vin_nreqs++; + + if ((vnr->vnr_chain = virtio_chain_alloc(vin->vin_vq, kmflag)) == + NULL) { + dev_err(vin->vin_dip, CE_WARN, "!chain alloc failure"); + goto fail; + } + virtio_chain_data_set(vnr->vnr_chain, vnr); + + /* + * Allocate outbound request buffer: + */ + if ((vnr->vnr_dma_out = virtio_dma_alloc(vin->vin_virtio, + VIRTIO_9P_REQ_SIZE, &vio9p_dma_attr, + DDI_DMA_CONSISTENT | DDI_DMA_WRITE, kmflag)) == NULL) { + dev_err(dip, CE_WARN, "!DMA out alloc failure"); + goto fail; + } + VERIFY3U(virtio_dma_ncookies(vnr->vnr_dma_out), <=, VIRTIO_9P_MAX_SGL); + + for (uint_t n = 0; n < virtio_dma_ncookies(vnr->vnr_dma_out); n++) { + if (virtio_chain_append(vnr->vnr_chain, + virtio_dma_cookie_pa(vnr->vnr_dma_out, n), + virtio_dma_cookie_size(vnr->vnr_dma_out, n), + VIRTIO_DIR_DEVICE_READS) != DDI_SUCCESS) { + dev_err(dip, CE_WARN, "!chain append out failure"); + goto fail; + } + } + + /* + * Allocate inbound request buffer: + */ + if ((vnr->vnr_dma_in = virtio_dma_alloc(vin->vin_virtio, + VIRTIO_9P_REQ_SIZE, &vio9p_dma_attr, + DDI_DMA_CONSISTENT | DDI_DMA_READ, kmflag)) == NULL) { + dev_err(dip, CE_WARN, "!DMA in alloc failure"); + goto fail; + } + VERIFY3U(virtio_dma_ncookies(vnr->vnr_dma_in), <=, VIRTIO_9P_MAX_SGL); + + for (uint_t n = 0; n < virtio_dma_ncookies(vnr->vnr_dma_in); n++) { + if (virtio_chain_append(vnr->vnr_chain, + virtio_dma_cookie_pa(vnr->vnr_dma_in, n), + virtio_dma_cookie_size(vnr->vnr_dma_in, n), + VIRTIO_DIR_DEVICE_WRITES) != DDI_SUCCESS) { + dev_err(dip, CE_WARN, "!chain append in failure"); + goto fail; + } + } + + return (vnr); + +fail: + vio9p_req_free_impl(vin, vnr); + return (NULL); +} + +static uint_t +vio9p_poll(vio9p_t *vin) +{ + virtio_chain_t *vic; + uint_t count = 0; + bool wakeup = false; + + VERIFY(MUTEX_HELD(&vin->vin_mutex)); + + while ((vic = virtio_queue_poll(vin->vin_vq)) != NULL) { + vio9p_req_t *vnr = virtio_chain_data(vic); + + count++; + + virtio_dma_sync(vnr->vnr_dma_in, DDI_DMA_SYNC_FORCPU); + + if (!vin->vin_open || + vnr->vnr_generation != vin->vin_generation) { + /* + * Either the device is not open, or the device has + * been closed and opened again since this request was + * submitted. Just free the memory and drive on. + */ + vio9p_req_free(vin, vnr); + continue; + } + + list_insert_tail(&vin->vin_completes, vnr); + wakeup = true; + } + + if (wakeup) { + cv_broadcast(&vin->vin_cv); + } + + return (count); +} + +static int +vio9p_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) +{ + int instance = ddi_get_instance(dip); + virtio_t *vio; + vio9p_req_t *vnr; + + if (cmd != DDI_ATTACH) { + return (DDI_FAILURE); + } + + if (ddi_soft_state_zalloc(vio9p_state, instance) != DDI_SUCCESS) { + return (DDI_FAILURE); + } + + if ((vio = virtio_init(dip, VIRTIO_9P_WANTED_FEATURES, B_TRUE)) == + NULL) { + ddi_soft_state_free(vio9p_state, instance); + dev_err(dip, CE_WARN, "failed to start Virtio init"); + return (DDI_FAILURE); + } + + vio9p_t *vin = ddi_get_soft_state(vio9p_state, instance); + vin->vin_dip = dip; + vin->vin_virtio = vio; + ddi_set_driver_private(dip, vin); + list_create(&vin->vin_reqs, sizeof (vio9p_req_t), + offsetof(vio9p_req_t, vnr_link)); + list_create(&vin->vin_completes, sizeof (vio9p_req_t), + offsetof(vio9p_req_t, vnr_link_complete)); + list_create(&vin->vin_req_freelist, sizeof (vio9p_req_t), + offsetof(vio9p_req_t, vnr_link_free)); + + if (virtio_feature_present(vio, VIRTIO_9P_F_MOUNT_TAG)) { + uint16_t len = virtio_dev_get16(vio, VIRTIO_9P_CONFIG_TAG_SZ); + if (len > VIRTIO_9P_TAGLEN) { + len = VIRTIO_9P_TAGLEN; + } + + /* + * This array is one byte longer than VIRTIO_9P_TAGLEN, and is + * thus always NUL-terminated by the use of + * ddi_soft_state_zalloc() above. + */ + for (uint16_t n = 0; n < len; n++) { + vin->vin_tag[n] = virtio_dev_get8(vio, + VIRTIO_9P_CONFIG_TAG + n); + } + } + + /* + * When allocating the request queue, we include enough slots for a + * full set of cookies (based on our DMA attributes) in both the in and + * the out direction. + */ + if ((vin->vin_vq = virtio_queue_alloc(vio, VIRTIO_9P_VIRTQ_REQUESTS, + "requests", vio9p_int_handler, vin, B_FALSE, + 2 * VIRTIO_9P_MAX_SGL)) == NULL) { + return (vio9p_teardown(vin, VIRTIO_9P_TEARDOWN_PRE_MUTEX)); + } + + if (virtio_init_complete(vio, VIRTIO_ANY_INTR_TYPE) != DDI_SUCCESS) { + dev_err(dip, CE_WARN, "failed to complete Virtio init"); + return (vio9p_teardown(vin, VIRTIO_9P_TEARDOWN_PRE_MUTEX)); + } + + cv_init(&vin->vin_cv, NULL, CV_DRIVER, NULL); + mutex_init(&vin->vin_mutex, NULL, MUTEX_DRIVER, virtio_intr_pri(vio)); + + /* + * Make sure the free list contains at least one request at attach time + * so that the device is always somewhat useable: + */ + if ((vnr = vio9p_req_alloc_impl(vin, KM_SLEEP)) == NULL) { + dev_err(dip, CE_WARN, "failed to allocate first request"); + return (vio9p_teardown(vin, VIRTIO_9P_TEARDOWN_ATTACH)); + } + vio9p_req_freelist_put(vin, vnr); + + if (virtio_interrupts_enable(vio) != DDI_SUCCESS) { + return (vio9p_teardown(vin, VIRTIO_9P_TEARDOWN_ATTACH)); + } + + /* + * Hang out a minor node so that we can be opened. + */ + int minor = ddi_get_instance(dip); + if (ddi_create_minor_node(dip, "9p", S_IFCHR, minor, DDI_PSEUDO, + 0) != DDI_SUCCESS) { + dev_err(dip, CE_WARN, "could not create minor node"); + return (vio9p_teardown(vin, VIRTIO_9P_TEARDOWN_ATTACH)); + } + + ddi_report_dev(dip); + + return (DDI_SUCCESS); +} + +static int +vio9p_teardown(vio9p_t *vin, vio9p_teardown_style_t style) +{ + dev_info_t *dip = vin->vin_dip; + + if (style != VIRTIO_9P_TEARDOWN_PRE_MUTEX) { + /* + * Make sure we do not hold the mutex across interrupt disable. + */ + VERIFY(MUTEX_NOT_HELD(&vin->vin_mutex)); + } + + ddi_remove_minor_node(dip, NULL); + + if (vin->vin_virtio != NULL) { + /* + * Disable interrupts so that we can be sure our handler does + * not run again while we free things. + */ + virtio_interrupts_disable(vin->vin_virtio); + } + + /* + * Empty the free list: + */ + for (;;) { + vio9p_req_t *vnr = list_remove_head(&vin->vin_req_freelist); + if (vnr == NULL) { + break; + } + vio9p_req_free_impl(vin, vnr); + } + VERIFY(list_is_empty(&vin->vin_req_freelist)); + list_destroy(&vin->vin_req_freelist); + + /* + * Any active requests should have been freed in vio9p_detach(), so + * there should be no other requests left at this point. + */ + VERIFY0(vin->vin_nreqs); + VERIFY(list_is_empty(&vin->vin_reqs)); + list_destroy(&vin->vin_reqs); + + VERIFY(list_is_empty(&vin->vin_completes)); + list_destroy(&vin->vin_completes); + + /* + * Tear down the Virtio framework. + */ + if (vin->vin_virtio != NULL) { + boolean_t failed = (style != VIRTIO_9P_TEARDOWN_DETACH); + virtio_fini(vin->vin_virtio, failed); + } + + if (style != VIRTIO_9P_TEARDOWN_PRE_MUTEX) { + mutex_destroy(&vin->vin_mutex); + cv_destroy(&vin->vin_cv); + } + + ddi_set_driver_private(dip, NULL); + ddi_soft_state_free(vio9p_state, ddi_get_instance(dip)); + + return (style == VIRTIO_9P_TEARDOWN_DETACH ? DDI_SUCCESS : DDI_FAILURE); +} + +static int +vio9p_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) +{ + vio9p_t *vin = ddi_get_driver_private(dip); + + if (cmd != DDI_DETACH) { + return (DDI_FAILURE); + } + + mutex_enter(&vin->vin_mutex); + + /* + * Detach will only be called once we are no longer held open. + */ + VERIFY(!vin->vin_open); + + /* + * If a request was submitted to the hypervisor but never completed, it + * may still be active even though the device has been closed. + */ + bool shutdown = false; + for (vio9p_req_t *vnr = list_head(&vin->vin_reqs); + vnr != NULL; vnr = list_next(&vin->vin_reqs, vnr)) { + if (!list_link_active(&vnr->vnr_link_free)) { + /* + * There is at least one active request. We need to + * reset the device to claw back the DMA memory. + */ + shutdown = true; + break; + } + } + + if (shutdown) { + virtio_chain_t *vic; + + virtio_shutdown(vin->vin_virtio); + while ((vic = virtio_queue_evacuate(vin->vin_vq)) != NULL) { + vio9p_req_t *vnr = virtio_chain_data(vic); + + virtio_dma_sync(vnr->vnr_dma_in, DDI_DMA_SYNC_FORCPU); + + vio9p_req_free_impl(vin, vnr); + } + } + + mutex_exit(&vin->vin_mutex); + + return (vio9p_teardown(vin, VIRTIO_9P_TEARDOWN_DETACH)); +} + +static int +vio9p_quiesce(dev_info_t *dip) +{ + vio9p_t *vin; + + if ((vin = ddi_get_driver_private(dip)) == NULL) { + return (DDI_FAILURE); + } + + return (virtio_quiesce(vin->vin_virtio)); +} + +static int +vio9p_open(dev_t *dev, int flag, int otyp, cred_t *cred) +{ + if (otyp != OTYP_CHR) { + return (EINVAL); + } + + /* + * This device represents a request-response communication channel + * between the host and the hypervisor; as such we insist that it be + * opened exclusively, and for both read and write access. + */ + if (!(flag & FEXCL) || !(flag & FREAD) || !(flag & FWRITE)) { + return (EINVAL); + } + + vio9p_t *vin = ddi_get_soft_state(vio9p_state, getminor(*dev)); + if (vin == NULL) { + return (ENXIO); + } + + mutex_enter(&vin->vin_mutex); + if (vin->vin_open) { + mutex_exit(&vin->vin_mutex); + return (EBUSY); + } + vin->vin_open = true; + + vin->vin_generation++; + if (vin->vin_generation == 0) { + vin->vin_generation++; + } + + mutex_exit(&vin->vin_mutex); + return (0); +} + +static int +vio9p_close(dev_t dev, int flag, int otyp, cred_t *cred) +{ + if (otyp != OTYP_CHR) { + return (EINVAL); + } + + vio9p_t *vin = ddi_get_soft_state(vio9p_state, getminor(dev)); + if (vin == NULL) { + return (ENXIO); + } + + mutex_enter(&vin->vin_mutex); + if (!vin->vin_open) { + mutex_exit(&vin->vin_mutex); + return (EIO); + } + + /* + * Free all completed requests that have not yet been read: + */ + vio9p_req_t *vnr; + while ((vnr = list_remove_head(&vin->vin_completes)) != NULL) { + vio9p_req_free(vin, vnr); + } + + vin->vin_open = false; + mutex_exit(&vin->vin_mutex); + return (0); +} + +static int +vio9p_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred, + int *rvalp) +{ + vio9p_t *vin = ddi_get_soft_state(vio9p_state, getminor(dev)); + if (vin == NULL) { + return (ENXIO); + } + + switch (cmd) { + case VIO9P_IOC_MOUNT_TAG: + if (ddi_copyout(vin->vin_tag, (void *)arg, + sizeof (vin->vin_tag), mode) != 0) { + return (EFAULT); + } + return (0); + + default: + return (ENOTTY); + } +} + +static int +vio9p_read(dev_t dev, struct uio *uio, cred_t *cred) +{ + bool blocking = (uio->uio_fmode & (FNDELAY | FNONBLOCK)) == 0; + vio9p_req_t *vnr; + vio9p_t *vin; + + if ((vin = ddi_get_soft_state(vio9p_state, getminor(dev))) == NULL) { + return (ENXIO); + } + + mutex_enter(&vin->vin_mutex); +again: + if ((vnr = list_remove_head(&vin->vin_completes)) == NULL) { + if (!blocking) { + mutex_exit(&vin->vin_mutex); + return (EAGAIN); + } + + /* + * There is nothing to read right now. Wait for something: + */ + if (cv_wait_sig(&vin->vin_cv, &vin->vin_mutex) == 0) { + mutex_exit(&vin->vin_mutex); + return (EINTR); + } + goto again; + } + + /* + * Determine the size of the response message using the initial size[4] + * field of the response. The various specifying documents that exist + * suggest this is an unsigned integer in little-endian order. + */ + uint32_t msz; + bcopy(virtio_dma_va(vnr->vnr_dma_in, 0), &msz, sizeof (msz)); + msz = LE_32(msz); + if (msz > virtio_dma_size(vnr->vnr_dma_in)) { + msz = virtio_dma_size(vnr->vnr_dma_in); + } + + if (msz > uio->uio_resid) { + /* + * Tell the consumer they are going to need a bigger + * buffer. + */ + list_insert_head(&vin->vin_completes, vnr); + mutex_exit(&vin->vin_mutex); + return (EOVERFLOW); + } + + mutex_exit(&vin->vin_mutex); + int e = uiomove(virtio_dma_va(vnr->vnr_dma_in, 0), msz, UIO_READ, uio); + mutex_enter(&vin->vin_mutex); + + if (e == 0) { + vio9p_req_free(vin, vnr); + } else { + /* + * Put the response back in the list for another try, so that + * we do not drop any messages: + */ + list_insert_head(&vin->vin_completes, vnr); + } + + mutex_exit(&vin->vin_mutex); + return (e); +} + +static int +vio9p_write(dev_t dev, struct uio *uio, cred_t *cred) +{ + bool blocking = (uio->uio_fmode & (FNDELAY | FNONBLOCK)) == 0; + + size_t wsz = uio->uio_resid; + if (wsz < 7) { + /* + * Requests should be well-formed 9P messages. They must + * contain at least 7 bytes: msize[4] + type[1] + tag[2]. + */ + return (EINVAL); + } else if (wsz > VIRTIO_9P_REQ_SIZE) { + return (EMSGSIZE); + } + + vio9p_t *vin = ddi_get_soft_state(vio9p_state, getminor(dev)); + if (vin == NULL) { + return (ENXIO); + } + + mutex_enter(&vin->vin_mutex); + vio9p_req_t *vnr = vio9p_req_alloc(vin, blocking); + if (vnr == NULL) { + mutex_exit(&vin->vin_mutex); + return (blocking ? ENOMEM : EAGAIN); + } + vnr->vnr_generation = vin->vin_generation; + VERIFY3U(wsz, <=, virtio_dma_size(vnr->vnr_dma_out)); + + mutex_exit(&vin->vin_mutex); + int e = uiomove(virtio_dma_va(vnr->vnr_dma_out, 0), wsz, UIO_WRITE, + uio); + mutex_enter(&vin->vin_mutex); + + if (e == 0) { + virtio_dma_sync(vnr->vnr_dma_out, DDI_DMA_SYNC_FORDEV); + virtio_chain_submit(vnr->vnr_chain, B_TRUE); + } else { + vio9p_req_free(vin, vnr); + } + + mutex_exit(&vin->vin_mutex); + return (e); +} + +int +_init(void) +{ + int r; + + if ((r = ddi_soft_state_init(&vio9p_state, sizeof (vio9p_t), 0)) != 0) { + return (r); + } + + if ((r = mod_install(&vio9p_modlinkage)) != 0) { + ddi_soft_state_fini(&vio9p_state); + } + + return (r); +} + +int +_fini(void) +{ + int r; + + if ((r = mod_remove(&vio9p_modlinkage)) != 0) { + return (r); + } + + ddi_soft_state_fini(&vio9p_state); + + return (r); +} + +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&vio9p_modlinkage, modinfop)); +} diff --git a/usr/src/uts/common/io/vio9p/vio9p_impl.h b/usr/src/uts/common/io/vio9p/vio9p_impl.h new file mode 100644 index 0000000000..f8718c1ed2 --- /dev/null +++ b/usr/src/uts/common/io/vio9p/vio9p_impl.h @@ -0,0 +1,126 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2022 Oxide Computer Company + */ + +/* + * VIRTIO 9P DRIVER + */ + +#ifndef _VIO9P_IMPL_H +#define _VIO9P_IMPL_H + +#include "virtio.h" +#include <sys/vio9p.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * VIRTIO 9P CONFIGURATION REGISTERS + * + * These are offsets into the device-specific configuration space available + * through the virtio_dev_*() family of functions. + */ +#define VIRTIO_9P_CONFIG_TAG_SZ 0x00 /* 16 R */ +#define VIRTIO_9P_CONFIG_TAG 0x02 /* SZ R */ + +/* + * VIRTIO 9P VIRTQUEUES + * + * Virtio 9P devices have just one queue which is used to make 9P requests. + * Each submitted chain should include appropriately sized inbound and outbound + * descriptors for the request and response messages. The maximum size is + * negotiated via the "msize" member of the 9P TVERSION request and RVERSION + * response. Some hypervisors may require the first 7 bytes (size, type, tag) + * to be contiguous in the first descriptor. + */ +#define VIRTIO_9P_VIRTQ_REQUESTS 0 + +/* + * VIRTIO 9P FEATURE BITS + */ +#define VIRTIO_9P_F_MOUNT_TAG (1ULL << 0) + +/* + * These features are supported by the driver and we will request them from the + * device. + */ +#define VIRTIO_9P_WANTED_FEATURES (VIRTIO_9P_F_MOUNT_TAG) + +/* + * DRIVER PARAMETERS + */ +#define VIRTIO_9P_MAX_REQS 16 +#define VIRTIO_9P_REQ_SIZE 8192 + +/* + * It is not clear that there is a well-defined number of cookies for this + * interface; QEMU may support as many as there are direct descriptors in the + * ring, and bhyve may support something like 128. We'll use a conservative + * number that's large enough to ensure we'll be able to allocate without + * requiring contiguous pages. + */ +#define VIRTIO_9P_MAX_SGL 8 + +/* + * TYPE DEFINITIONS + */ + +typedef enum vio9p_teardown_style { + VIRTIO_9P_TEARDOWN_PRE_MUTEX, + VIRTIO_9P_TEARDOWN_ATTACH, + VIRTIO_9P_TEARDOWN_DETACH, +} vio9p_teardown_style_t; + +typedef struct vio9p_req { + virtio_dma_t *vnr_dma_in; + virtio_dma_t *vnr_dma_out; + virtio_chain_t *vnr_chain; + list_node_t vnr_link; + list_node_t vnr_link_complete; + list_node_t vnr_link_free; + uint64_t vnr_generation; +} vio9p_req_t; + +typedef struct vio9p { + dev_info_t *vin_dip; + virtio_t *vin_virtio; + virtio_queue_t *vin_vq; + + kmutex_t vin_mutex; + kcondvar_t vin_cv; + + /* + * When the device is opened, select a generation number. This will be + * used to discard completed responses that arrive after the device was + * closed and reopened. + */ + uint64_t vin_generation; + bool vin_open; + + uint_t vin_nreqs; + list_t vin_reqs; + list_t vin_completes; + + list_t vin_req_freelist; + + char vin_tag[VIO9P_MOUNT_TAG_SIZE]; +} vio9p_t; + +#ifdef __cplusplus +} +#endif + +#endif /* _VIO9P_IMPL_H */ diff --git a/usr/src/uts/common/io/vioblk/vioblk.c b/usr/src/uts/common/io/vioblk/vioblk.c index f6649bdd12..1c00d67184 100644 --- a/usr/src/uts/common/io/vioblk/vioblk.c +++ b/usr/src/uts/common/io/vioblk/vioblk.c @@ -981,7 +981,7 @@ vioblk_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) virtio_register_cfgchange_handler(vio, vioblk_cfgchange, vib); - if (virtio_init_complete(vio, 0) != DDI_SUCCESS) { + if (virtio_init_complete(vio, VIRTIO_ANY_INTR_TYPE) != DDI_SUCCESS) { dev_err(dip, CE_WARN, "failed to complete Virtio init"); goto fail; } diff --git a/usr/src/uts/common/io/vioif/vioif.c b/usr/src/uts/common/io/vioif/vioif.c index ae1e2d4ee2..74f1d46a69 100644 --- a/usr/src/uts/common/io/vioif/vioif.c +++ b/usr/src/uts/common/io/vioif/vioif.c @@ -1916,7 +1916,7 @@ vioif_select_interrupt_types(void) * The system may not have valid SMBIOS data, so ignore a * failure here. */ - return (0); + return (VIRTIO_ANY_INTR_TYPE); } if (strcmp(info.smbi_manufacturer, "Google") == 0 && @@ -1931,7 +1931,7 @@ vioif_select_interrupt_types(void) return (DDI_INTR_TYPE_FIXED); } - return (0); + return (VIRTIO_ANY_INTR_TYPE); } static int diff --git a/usr/src/uts/common/io/vioscsi/vioscsi.c b/usr/src/uts/common/io/vioscsi/vioscsi.c index 0c83b33489..6d3d1e374a 100644 --- a/usr/src/uts/common/io/vioscsi/vioscsi.c +++ b/usr/src/uts/common/io/vioscsi/vioscsi.c @@ -1263,7 +1263,7 @@ vioscsi_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) return (DDI_FAILURE); } - if (virtio_init_complete(vio, 0) != DDI_SUCCESS) { + if (virtio_init_complete(vio, VIRTIO_ANY_INTR_TYPE) != DDI_SUCCESS) { dev_err(dip, CE_WARN, "virtio_init_complete failed"); vioscsi_teardown(sc, B_TRUE); return (DDI_FAILURE); diff --git a/usr/src/uts/common/io/virtio/virtio.h b/usr/src/uts/common/io/virtio/virtio.h index 48e15b28f2..820bc3b811 100644 --- a/usr/src/uts/common/io/virtio/virtio.h +++ b/usr/src/uts/common/io/virtio/virtio.h @@ -350,6 +350,12 @@ uint_t virtio_dma_ncookies(virtio_dma_t *); uint64_t virtio_dma_cookie_pa(virtio_dma_t *, uint_t); size_t virtio_dma_cookie_size(virtio_dma_t *, uint_t); +/* + * virtio_init_complete() accepts a mask of allowed interrupt types using the + * DDI_INTR_TYPE_* family of constants. If no specific interrupt type is + * required, pass VIRTIO_ANY_INTR_TYPE instead: + */ +#define VIRTIO_ANY_INTR_TYPE 0 #ifdef __cplusplus } diff --git a/usr/src/uts/common/io/virtio/virtio_dma.c b/usr/src/uts/common/io/virtio/virtio_dma.c index 81972b5402..b2cbbb2acf 100644 --- a/usr/src/uts/common/io/virtio/virtio_dma.c +++ b/usr/src/uts/common/io/virtio/virtio_dma.c @@ -11,6 +11,7 @@ /* * Copyright 2019 Joyent, Inc. + * Copyright 2022 Oxide Computer Company */ /* @@ -40,7 +41,21 @@ #include "virtio.h" #include "virtio_impl.h" +typedef int (dma_wait_t)(caddr_t); +static dma_wait_t * +virtio_dma_wait_from_kmflags(int kmflags) +{ + switch (kmflags) { + case KM_SLEEP: + return (DDI_DMA_SLEEP); + case KM_NOSLEEP: + case KM_NOSLEEP_LAZY: + return (DDI_DMA_DONTWAIT); + default: + panic("unexpected kmflags value 0x%x", kmflags); + } +} void virtio_dma_sync(virtio_dma_t *vidma, int flag) @@ -90,10 +105,7 @@ virtio_dma_init_handle(virtio_t *vio, virtio_dma_t *vidma, { int r; dev_info_t *dip = vio->vio_dip; - - VERIFY(kmflags == KM_SLEEP || kmflags == KM_NOSLEEP); - int (*dma_wait)(caddr_t) = (kmflags == KM_SLEEP) ? DDI_DMA_SLEEP : - DDI_DMA_DONTWAIT; + int (*dma_wait)(caddr_t) = virtio_dma_wait_from_kmflags(kmflags); vidma->vidma_virtio = vio; @@ -124,10 +136,7 @@ virtio_dma_init(virtio_t *vio, virtio_dma_t *vidma, size_t sz, int r; dev_info_t *dip = vio->vio_dip; caddr_t va = NULL; - - VERIFY(kmflags == KM_SLEEP || kmflags == KM_NOSLEEP); - int (*dma_wait)(caddr_t) = (kmflags == KM_SLEEP) ? DDI_DMA_SLEEP : - DDI_DMA_DONTWAIT; + int (*dma_wait)(caddr_t) = virtio_dma_wait_from_kmflags(kmflags); if (virtio_dma_init_handle(vio, vidma, attr, kmflags) != DDI_SUCCESS) { @@ -168,10 +177,7 @@ virtio_dma_bind(virtio_dma_t *vidma, void *va, size_t sz, int dmaflags, int r; dev_info_t *dip = vidma->vidma_virtio->vio_dip; ddi_dma_cookie_t dmac; - - VERIFY(kmflags == KM_SLEEP || kmflags == KM_NOSLEEP); - int (*dma_wait)(caddr_t) = (kmflags == KM_SLEEP) ? DDI_DMA_SLEEP : - DDI_DMA_DONTWAIT; + int (*dma_wait)(caddr_t) = virtio_dma_wait_from_kmflags(kmflags); VERIFY(vidma->vidma_level & VIRTIO_DMALEVEL_HANDLE_ALLOC); VERIFY(!(vidma->vidma_level & VIRTIO_DMALEVEL_HANDLE_BOUND)); diff --git a/usr/src/uts/common/io/virtio/virtio_main.c b/usr/src/uts/common/io/virtio/virtio_main.c index 28dce6dc92..ec8bcd9f22 100644 --- a/usr/src/uts/common/io/virtio/virtio_main.c +++ b/usr/src/uts/common/io/virtio/virtio_main.c @@ -1440,7 +1440,7 @@ virtio_interrupts_setup(virtio_t *vio, int allow_types) return (DDI_FAILURE); } - if (allow_types != 0) { + if (allow_types != VIRTIO_ANY_INTR_TYPE) { /* * Restrict the possible interrupt types at the request of the * driver. diff --git a/usr/src/uts/common/sys/Makefile b/usr/src/uts/common/sys/Makefile index c761028491..3c22bb04ce 100644 --- a/usr/src/uts/common/sys/Makefile +++ b/usr/src/uts/common/sys/Makefile @@ -660,6 +660,7 @@ CHKHDRS= \ vfstab.h \ vgareg.h \ videodev2.h \ + vio9p.h \ visual_io.h \ vlan.h \ vm.h \ diff --git a/usr/src/uts/common/sys/lockstat.h b/usr/src/uts/common/sys/lockstat.h index 308b96326c..f2a10dcc6b 100644 --- a/usr/src/uts/common/sys/lockstat.h +++ b/usr/src/uts/common/sys/lockstat.h @@ -26,8 +26,6 @@ #ifndef _SYS_LOCKSTAT_H #define _SYS_LOCKSTAT_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/dtrace.h> #ifdef __cplusplus @@ -123,7 +121,7 @@ extern void (*lockstat_probe)(dtrace_id_t, uintptr_t, uintptr_t, extern int lockstat_active_threads(void); extern int lockstat_depth(void); -extern void lockstat_hot_patch(void); +extern void lockstat_hotpatch_probe(int); /* * Macros to record lockstat probes. @@ -175,10 +173,10 @@ extern void lockstat_hot_patch(void); if (t_spin) { \ t_spin = gethrtime_waitfree() - t_spin; \ t_spin = CLAMP32(t_spin); \ - } \ + } \ (*lockstat_probe)(id, (uintptr_t)(lp), t_spin, \ 0, 0, 0); \ - } \ + } \ curthread->t_lockstat--; \ } diff --git a/usr/src/uts/common/sys/vio9p.h b/usr/src/uts/common/sys/vio9p.h new file mode 100644 index 0000000000..359862e797 --- /dev/null +++ b/usr/src/uts/common/sys/vio9p.h @@ -0,0 +1,49 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2022 Oxide Computer Company + */ + +#ifndef _SYS_VIO9P_H +#define _SYS_VIO9P_H + +/* + * VIRTIO 9P DRIVER + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * If the hypervisor supports mount tags through the VIRTIO_9P_F_MOUNT_TAG + * feature, they will have at most this many bytes: + */ +#define VIRTIO_9P_TAGLEN 32 + +/* + * ioctl(2) support for 9P channel devices: + */ +#define VIO9P_IOC_BASE (('9' << 16) | ('P' << 8)) +#define VIO9P_IOC_MOUNT_TAG (VIO9P_IOC_BASE | 0x01) + +/* + * Buffer size for the VIO9P_IOC_MOUNT_TAG ioctl, which includes one byte + * beyond the maximum tag length for NUL termination: + */ +#define VIO9P_MOUNT_TAG_SIZE (VIRTIO_9P_TAGLEN + 1) + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_VIO9P_H */ diff --git a/usr/src/uts/i86pc/Makefile.workarounds b/usr/src/uts/i86pc/Makefile.workarounds index 2300e74393..21a6ff9155 100644 --- a/usr/src/uts/i86pc/Makefile.workarounds +++ b/usr/src/uts/i86pc/Makefile.workarounds @@ -101,9 +101,10 @@ WORKAROUND_DEFS += -DOPTERON_ERRATUM_131 WORKAROUND_DEFS += -DOPTERON_WORKAROUND_6336786 # -# Mutex primitives don't work as expected +# Potential Violation of Read Ordering Rules Between Semaphore Operations and +# Unlocked Read-Modify-Write Instructions # -WORKAROUND_DEFS += -DOPTERON_WORKAROUND_6323525 +WORKAROUND_DEFS += -DOPTERON_ERRATUM_147 # # Some Registered DIMMs incompatible with address parity feature diff --git a/usr/src/uts/i86pc/os/cpuid.c b/usr/src/uts/i86pc/os/cpuid.c index 0b42cad19d..35476bb9ed 100644 --- a/usr/src/uts/i86pc/os/cpuid.c +++ b/usr/src/uts/i86pc/os/cpuid.c @@ -6281,7 +6281,7 @@ cpuid_opteron_erratum(cpu_t *cpu, uint_t erratum) return (!(regs.cp_edx & 0x100)); } return (0); - case 6323525: + case 147: /* * This erratum (K8 #147) is not present on family 10 and newer. */ diff --git a/usr/src/uts/i86pc/os/mp_startup.c b/usr/src/uts/i86pc/os/mp_startup.c index e90dc6466a..5310c79db9 100644 --- a/usr/src/uts/i86pc/os/mp_startup.c +++ b/usr/src/uts/i86pc/os/mp_startup.c @@ -30,6 +30,7 @@ * Copyright 2020 Joyent, Inc. * Copyright 2013 Nexenta Systems, Inc. All rights reserved. * Copyright 2018 OmniOS Community Edition (OmniOSce) Association. + * Copyright 2022 Oxide Computer Company */ #include <sys/types.h> @@ -669,8 +670,8 @@ int opteron_workaround_6336786; /* non-zero -> WA relevant and applied */ int opteron_workaround_6336786_UP = 0; /* Not needed for UP */ #endif -#if defined(OPTERON_WORKAROUND_6323525) -int opteron_workaround_6323525; /* if non-zero -> at least one cpu has it */ +#if defined(OPTERON_ERRATUM_147) +int opteron_erratum_147; /* if non-zero -> at least one cpu has it */ #endif #if defined(OPTERON_ERRATUM_298) @@ -1166,8 +1167,8 @@ workaround_errata(struct cpu *cpu) * 'Revision Guide for AMD Athlon 64 and AMD Opteron Processors' * document 25759. */ - if (cpuid_opteron_erratum(cpu, 6323525) > 0) { -#if defined(OPTERON_WORKAROUND_6323525) + if (cpuid_opteron_erratum(cpu, 147) > 0) { +#if defined(OPTERON_ERRATUM_147) /* * This problem only occurs with 2 or more cores. If bit in * MSR_AMD_BU_CFG set, then not applicable. The workaround @@ -1178,8 +1179,8 @@ workaround_errata(struct cpu *cpu) * It is too early in boot to call the patch routine so * set erratum variable to be done in startup_end(). */ - if (opteron_workaround_6323525) { - opteron_workaround_6323525++; + if (opteron_erratum_147) { + opteron_erratum_147++; #if defined(__xpv) } else if (is_x86_feature(x86_featureset, X86FSET_SSE2)) { if (DOMAIN_IS_INITDOMAIN(xen_info)) { @@ -1188,7 +1189,7 @@ workaround_errata(struct cpu *cpu) * operations are supported? */ if (xpv_nr_phys_cpus() > 1) - opteron_workaround_6323525++; + opteron_erratum_147++; } else { /* * We have no way to tell how many physical @@ -1196,18 +1197,18 @@ workaround_errata(struct cpu *cpu) * has the problem, so enable the workaround * unconditionally (at some performance cost). */ - opteron_workaround_6323525++; + opteron_erratum_147++; } #else /* __xpv */ } else if (is_x86_feature(x86_featureset, X86FSET_SSE2) && ((opteron_get_nnodes() * cpuid_get_ncpu_per_chip(cpu)) > 1)) { if ((xrdmsr(MSR_AMD_BU_CFG) & (UINT64_C(1) << 33)) == 0) - opteron_workaround_6323525++; + opteron_erratum_147++; #endif /* __xpv */ } #else - workaround_warning(cpu, 6323525); + workaround_warning(cpu, 147); missing++; #endif } @@ -1306,9 +1307,9 @@ workaround_errata_end() if (opteron_workaround_6336786) workaround_applied(6336786); #endif -#if defined(OPTERON_WORKAROUND_6323525) - if (opteron_workaround_6323525) - workaround_applied(6323525); +#if defined(OPTERON_ERRATUM_147) + if (opteron_erratum_147) + workaround_applied(147); #endif #if defined(OPTERON_ERRATUM_298) if (opteron_erratum_298) { diff --git a/usr/src/uts/i86pc/os/startup.c b/usr/src/uts/i86pc/os/startup.c index 416b3fb520..a8d3a35908 100644 --- a/usr/src/uts/i86pc/os/startup.c +++ b/usr/src/uts/i86pc/os/startup.c @@ -25,7 +25,7 @@ * Copyright 2017 Nexenta Systems, Inc. * Copyright 2020 Joyent, Inc. * Copyright (c) 2015 by Delphix. All rights reserved. - * Copyright 2020 Oxide Computer Company + * Copyright 2022 Oxide Computer Company * Copyright (c) 2020 Carlos Neira <cneirabustos@gmail.com> */ /* @@ -2125,9 +2125,9 @@ startup_end(void) */ cpu_event_init(); -#if defined(OPTERON_WORKAROUND_6323525) - if (opteron_workaround_6323525) - patch_workaround_6323525(); +#if defined(OPTERON_ERRATUM_147) + if (opteron_erratum_147) + patch_erratum_147(); #endif /* * If needed, load TOD module now so that ddi_get_time(9F) etc. work diff --git a/usr/src/uts/intel/Makefile.intel b/usr/src/uts/intel/Makefile.intel index b635e100e1..cb668e8a4b 100644 --- a/usr/src/uts/intel/Makefile.intel +++ b/usr/src/uts/intel/Makefile.intel @@ -418,6 +418,9 @@ DRV_KMODS += vioblk DRV_KMODS += vioif DRV_KMODS += vioscsi +# Virtio 9P transport driver +DRV_KMODS += vio9p + # # DTrace and DTrace Providers # diff --git a/usr/src/uts/intel/ml/lock_prim.s b/usr/src/uts/intel/ml/lock_prim.s index 4267561bf7..ce2e093343 100644 --- a/usr/src/uts/intel/ml/lock_prim.s +++ b/usr/src/uts/intel/ml/lock_prim.s @@ -25,6 +25,7 @@ /* * Copyright 2019 Joyent, Inc. + * Copyright 2022 Oxide Computer Company */ #include "assym.h" @@ -36,6 +37,38 @@ #include <sys/rwlock_impl.h> #include <sys/lockstat.h> + +#if defined(OPTERON_ERRATUM_147) + +/* + * Leave space for an lfence to be inserted if required by a CPU which suffers + * from this erratum. Pad (with nops) the location for the lfence so that it + * is adequately aligned for atomic hotpatching. + */ +#define ERRATUM147_PATCH_POINT(name) \ + .align 4, NOP_INSTR; \ +./**/name/**/_147_patch_point: \ + nop; \ + nop; \ + nop; \ + nop; + +#else /* defined(OPTERON_ERRATUM_147) */ + +/* Empty macro so ifdefs are not required for all of the erratum sites. */ +#define ERRATUM147_PATCH_POINT(name) + +#endif /* defined(OPTERON_ERRATUM_147) */ + +/* + * Patch point for lockstat probes. When the associated probe is disabled, it + * will 'ret' from the function. It is hotpatched to allow execution to fall + * through when the probe is enabled. + */ +#define LOCKSTAT_RET(name) \ +./**/name/**/_lockstat_patch_point: \ + ret; + /* * lock_try(lp), ulock_try(lp) * - returns non-zero on success. @@ -51,8 +84,8 @@ movzbq %dl, %rax xchgb %dl, (%rdi) xorb %dl, %al -.lock_try_lockstat_patch_point: - ret + LOCKSTAT_RET(lock_try) + testb %al, %al jnz 0f ret @@ -79,7 +112,7 @@ movq %rdi, %r12 /* preserve lock ptr for debugging */ leaq .ulock_panic_msg(%rip), %rdi - pushq %rbp /* align stack properly */ + pushq %rbp movq %rsp, %rbp xorl %eax, %eax /* clear for varargs */ call panic @@ -107,8 +140,8 @@ ulock_pass: ENTRY(lock_clear) movb $0, (%rdi) -.lock_clear_lockstat_patch_point: - ret + LOCKSTAT_RET(lock_clear) + movq %rdi, %rsi /* rsi = lock addr */ movq %gs:CPU_THREAD, %rdx /* rdx = thread addr */ movl $LS_LOCK_CLEAR_RELEASE, %edi /* edi = event */ @@ -122,7 +155,7 @@ ulock_pass: jb ulock_clr /* uaddr < kernelbase, proceed */ leaq .ulock_clear_msg(%rip), %rdi - pushq %rbp /* align stack properly */ + pushq %rbp movq %rsp, %rbp xorl %eax, %eax /* clear for varargs */ call panic @@ -163,12 +196,13 @@ ulock_clr: movq 16(%rsp), %rdx /* rdx = old pil addr */ movw %ax, (%rdx) /* store old pil */ leave -.lock_set_spl_lockstat_patch_point: - ret + LOCKSTAT_RET(lock_set_spl) + movq %rdi, %rsi /* rsi = lock addr */ movq %gs:CPU_THREAD, %rdx /* rdx = thread addr */ movl $LS_LOCK_SET_SPL_ACQUIRE, %edi jmp lockstat_wrapper + .lss_miss: movl 8(%rsp), %esi /* new_pil */ movq 16(%rsp), %rdx /* old_pil_addr */ @@ -197,8 +231,8 @@ ulock_clr: xchgb %dl, (%rdi) /* try to set lock */ testb %dl, %dl /* did we get it? */ jnz lock_set_spin /* no, go to C for the hard case */ -.lock_set_lockstat_patch_point: - ret + LOCKSTAT_RET(lock_set) + movq %rdi, %rsi /* rsi = lock addr */ movq %gs:CPU_THREAD, %rdx /* rdx = thread addr */ movl $LS_LOCK_SET_ACQUIRE, %edi @@ -210,40 +244,21 @@ ulock_clr: */ ENTRY(lock_clear_splx) - movb $0, (%rdi) /* clear lock */ -.lock_clear_splx_lockstat_patch_point: - jmp 0f -0: - movl %esi, %edi /* arg for splx */ - jmp splx /* let splx do its thing */ -.lock_clear_splx_lockstat: - pushq %rbp /* align stack properly */ + pushq %rbp movq %rsp, %rbp - subq $16, %rsp /* space to save args across splx */ - movq %rdi, 8(%rsp) /* save lock ptr across splx call */ - movl %esi, %edi /* arg for splx */ - call splx /* lower the priority */ - movq 8(%rsp), %rsi /* rsi = lock ptr */ - leave /* unwind stack */ + pushq %rdi /* save lp across call for lockstat */ + movb $0, (%rdi) /* clear lock */ + movl %esi, %edi /* arg for splx */ + call splx /* let splx do its thing */ + popq %rsi /* retreive lp for lockstat */ + leave + LOCKSTAT_RET(lock_clear_splx) + movq %gs:CPU_THREAD, %rdx /* rdx = thread addr */ movl $LS_LOCK_CLEAR_SPLX_RELEASE, %edi jmp lockstat_wrapper SET_SIZE(lock_clear_splx) -#if defined(__GNUC_AS__) -#define LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_VAL \ - (.lock_clear_splx_lockstat - .lock_clear_splx_lockstat_patch_point - 2) - -#define LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_POINT \ - (.lock_clear_splx_lockstat_patch_point + 1) -#else -#define LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_VAL \ - [.lock_clear_splx_lockstat - .lock_clear_splx_lockstat_patch_point - 2] - -#define LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_POINT \ - [.lock_clear_splx_lockstat_patch_point + 1] -#endif - /* * mutex_enter() and mutex_exit(). * @@ -261,11 +276,6 @@ ulock_clr: * If we interrupt a thread in mutex_exit() that has not yet cleared * the lock, cmnint() resets its PC back to the beginning of * mutex_exit() so it will check again for waiters when it resumes. - * - * The lockstat code below is activated when the lockstat driver - * calls lockstat_hot_patch() to hot-patch the kernel mutex code. - * Note that we don't need to test lockstat_event_mask here -- we won't - * patch this code in unless we're gathering ADAPTIVE_HOLD lockstats. */ ENTRY_NP(mutex_enter) @@ -274,29 +284,27 @@ ulock_clr: lock cmpxchgq %rdx, (%rdi) jnz mutex_vector_enter -.mutex_enter_lockstat_patch_point: -#if defined(OPTERON_WORKAROUND_6323525) -.mutex_enter_6323525_patch_point: - ret /* nop space for lfence */ - nop - nop -.mutex_enter_lockstat_6323525_patch_point: /* new patch point if lfence */ - nop -#else /* OPTERON_WORKAROUND_6323525 */ - ret -#endif /* OPTERON_WORKAROUND_6323525 */ + + ERRATUM147_PATCH_POINT(mutex_enter) + + LOCKSTAT_RET(mutex_enter) + movq %rdi, %rsi movl $LS_MUTEX_ENTER_ACQUIRE, %edi + jmp lockstat_wrapper + SET_SIZE(mutex_enter) + + /* * expects %rdx=thread, %rsi=lock, %edi=lockstat event */ - ALTENTRY(lockstat_wrapper) + ENTRY_NP(lockstat_wrapper) incb T_LOCKSTAT(%rdx) /* curthread->t_lockstat++ */ leaq lockstat_probemap(%rip), %rax movl (%rax, %rdi, DTRACE_IDSIZE), %eax testl %eax, %eax /* check for non-zero probe */ jz 1f - pushq %rbp /* align stack properly */ + pushq %rbp movq %rsp, %rbp movl %eax, %edi movq lockstat_probe, %rax @@ -308,7 +316,6 @@ ulock_clr: movl $1, %eax /* return success if tryenter */ ret SET_SIZE(lockstat_wrapper) - SET_SIZE(mutex_enter) /* * expects %rcx=thread, %rdx=arg, %rsi=lock, %edi=lockstat event @@ -319,7 +326,7 @@ ulock_clr: movl (%rax, %rdi, DTRACE_IDSIZE), %eax testl %eax, %eax /* check for non-zero probe */ jz 1f - pushq %rbp /* align stack properly */ + pushq %rbp movq %rsp, %rbp movl %eax, %edi movq lockstat_probe, %rax @@ -340,20 +347,13 @@ ulock_clr: cmpxchgq %rdx, (%rdi) jnz mutex_vector_tryenter not %eax /* return success (nonzero) */ -#if defined(OPTERON_WORKAROUND_6323525) -.mutex_tryenter_lockstat_patch_point: -.mutex_tryenter_6323525_patch_point: - ret /* nop space for lfence */ - nop - nop -.mutex_tryenter_lockstat_6323525_patch_point: /* new patch point if lfence */ - nop -#else /* OPTERON_WORKAROUND_6323525 */ -.mutex_tryenter_lockstat_patch_point: - ret -#endif /* OPTERON_WORKAROUND_6323525 */ + + ERRATUM147_PATCH_POINT(mutex_tryenter) + + LOCKSTAT_RET(mutex_tryenter) + movq %rdi, %rsi - movl $LS_MUTEX_ENTER_ACQUIRE, %edi + movl $LS_MUTEX_TRYENTER_ACQUIRE, %edi jmp lockstat_wrapper SET_SIZE(mutex_tryenter) @@ -364,15 +364,10 @@ ulock_clr: cmpxchgq %rdx, (%rdi) jnz 0f not %eax /* return success (nonzero) */ -#if defined(OPTERON_WORKAROUND_6323525) -.mutex_atryenter_6323525_patch_point: - ret /* nop space for lfence */ - nop - nop - nop -#else /* OPTERON_WORKAROUND_6323525 */ + + ERRATUM147_PATCH_POINT(mutex_atryenter) + ret -#endif /* OPTERON_WORKAROUND_6323525 */ 0: xorl %eax, %eax /* return failure */ ret @@ -415,8 +410,8 @@ mutex_exit_critical_start: /* If interrupted, restart here */ jne mutex_vector_exit /* wrong type or wrong owner */ movq $0, (%rdi) /* clear owner AND lock */ .mutex_exit_critical_end: -.mutex_exit_lockstat_patch_point: - ret + LOCKSTAT_RET(mutex_exit) + movq %rdi, %rsi movl $LS_MUTEX_EXIT_RELEASE, %edi jmp lockstat_wrapper @@ -448,13 +443,14 @@ mutex_exit_critical_size: lock cmpxchgq %rdx, (%rdi) /* try to grab read lock */ jnz rw_enter_sleep -.rw_read_enter_lockstat_patch_point: - ret + LOCKSTAT_RET(rw_read_enter) + movq %gs:CPU_THREAD, %rcx /* rcx = thread ptr */ movq %rdi, %rsi /* rsi = lock ptr */ movl $LS_RW_ENTER_ACQUIRE, %edi movl $RW_READER, %edx jmp lockstat_wrapper_arg + .rw_write_enter: movq %gs:CPU_THREAD, %rdx orq $RW_WRITE_LOCKED, %rdx /* rdx = write-locked value */ @@ -463,18 +459,9 @@ mutex_exit_critical_size: cmpxchgq %rdx, (%rdi) /* try to grab write lock */ jnz rw_enter_sleep -#if defined(OPTERON_WORKAROUND_6323525) -.rw_write_enter_lockstat_patch_point: -.rw_write_enter_6323525_patch_point: - ret - nop - nop -.rw_write_enter_lockstat_6323525_patch_point: - nop -#else /* OPTERON_WORKAROUND_6323525 */ -.rw_write_enter_lockstat_patch_point: - ret -#endif /* OPTERON_WORKAROUND_6323525 */ + ERRATUM147_PATCH_POINT(rw_write_enter) + + LOCKSTAT_RET(rw_write_enter) movq %gs:CPU_THREAD, %rcx /* rcx = thread ptr */ movq %rdi, %rsi /* rsi = lock ptr */ @@ -492,13 +479,14 @@ mutex_exit_critical_size: lock cmpxchgq %rdx, (%rdi) /* try to drop read lock */ jnz rw_exit_wakeup -.rw_read_exit_lockstat_patch_point: - ret + LOCKSTAT_RET(rw_read_exit) + movq %gs:CPU_THREAD, %rcx /* rcx = thread ptr */ movq %rdi, %rsi /* rsi = lock ptr */ movl $LS_RW_EXIT_RELEASE, %edi movl $RW_READER, %edx jmp lockstat_wrapper_arg + .rw_not_single_reader: testl $RW_WRITE_LOCKED, %eax /* write-locked or write-wanted? */ jnz .rw_write_exit @@ -513,8 +501,8 @@ mutex_exit_critical_size: lock cmpxchgq %rdx, (%rdi) /* try to drop read lock */ jnz rw_exit_wakeup -.rw_write_exit_lockstat_patch_point: - ret + LOCKSTAT_RET(rw_write_exit) + movq %gs:CPU_THREAD, %rcx /* rcx = thread ptr */ movq %rdi, %rsi /* rsi - lock ptr */ movl $LS_RW_EXIT_RELEASE, %edi @@ -522,149 +510,131 @@ mutex_exit_critical_size: jmp lockstat_wrapper_arg SET_SIZE(rw_exit) -#if defined(OPTERON_WORKAROUND_6323525) +#if defined(OPTERON_ERRATUM_147) /* - * If it is necessary to patch the lock enter routines with the lfence - * workaround, workaround_6323525_patched is set to a non-zero value so that - * the lockstat_hat_patch routine can patch to the new location of the 'ret' - * instruction. + * Track if erratum 147 workaround has been hotpatched into place. */ - DGDEF3(workaround_6323525_patched, 4, 4) + DGDEF3(erratum_147_patched, 4, 4) .long 0 -#define HOT_MUTEX_PATCH(srcaddr, dstaddr, size) \ - movq $size, %rbx; \ - movq $dstaddr, %r13; \ - addq %rbx, %r13; \ - movq $srcaddr, %r12; \ - addq %rbx, %r12; \ -0: \ - decq %r13; \ - decq %r12; \ - movzbl (%r12), %esi; \ - movq $1, %rdx; \ - movq %r13, %rdi; \ - call hot_patch_kernel_text; \ - decq %rbx; \ - testq %rbx, %rbx; \ - jg 0b; +#define HOT_MUTEX_PATCH(iaddr, insn_reg) \ + movq $iaddr, %rdi; \ + movl %insn_reg, %esi; \ + movl $4, %edx; \ + call hot_patch_kernel_text; + /* - * patch_workaround_6323525: provide workaround for 6323525 + * void + * patch_erratum_147(void) + * + * Patch lock operations to work around erratum 147. * * The workaround is to place a fencing instruction (lfence) between the * mutex operation and the subsequent read-modify-write instruction. - * - * This routine hot patches the lfence instruction on top of the space - * reserved by nops in the lock enter routines. */ - ENTRY_NP(patch_workaround_6323525) + + ENTRY_NP(patch_erratum_147) pushq %rbp movq %rsp, %rbp pushq %r12 - pushq %r13 - pushq %rbx /* - * lockstat_hot_patch() to use the alternate lockstat workaround - * 6323525 patch points (points past the lfence instruction to the - * new ret) when workaround_6323525_patched is set. + * Patch `nop; nop; nop; nop` sequence to `lfence; nop`. Since those + * patch points have been aligned to a 4-byte boundary, we can be + * confident that hot_patch_kernel_text() will be able to proceed + * safely and successfully. */ - movl $1, workaround_6323525_patched + movl $0x90e8ae0f, %r12d + HOT_MUTEX_PATCH(.mutex_enter_147_patch_point, r12d) + HOT_MUTEX_PATCH(.mutex_tryenter_147_patch_point, r12d) + HOT_MUTEX_PATCH(.mutex_atryenter_147_patch_point, r12d) + HOT_MUTEX_PATCH(.rw_write_enter_147_patch_point, r12d) - /* - * patch ret/nop/nop/nop to lfence/ret at the end of the lock enter - * routines. The 4 bytes are patched in reverse order so that the - * the existing ret is overwritten last. This provides lock enter - * sanity during the intermediate patching stages. - */ - HOT_MUTEX_PATCH(_lfence_insn, .mutex_enter_6323525_patch_point, 4) - HOT_MUTEX_PATCH(_lfence_insn, .mutex_tryenter_6323525_patch_point, 4) - HOT_MUTEX_PATCH(_lfence_insn, .mutex_atryenter_6323525_patch_point, 4) - HOT_MUTEX_PATCH(_lfence_insn, .rw_write_enter_6323525_patch_point, 4) + /* Record that erratum 147 points have been hotpatched */ + movl $1, erratum_147_patched - popq %rbx - popq %r13 popq %r12 movq %rbp, %rsp popq %rbp ret -_lfence_insn: - lfence - ret - SET_SIZE(patch_workaround_6323525) + SET_SIZE(patch_erratum_147) + +#endif /* OPTERON_ERRATUM_147 */ + /* + * void + * lockstat_hotpatch_site(caddr_t instr_addr, int do_enable) + */ + ENTRY(lockstat_hotpatch_site) + pushq %rbp + movq %rsp, %rbp + pushq %rdi + pushq %rsi -#endif /* OPTERON_WORKAROUND_6323525 */ + testl %esi, %esi + jz .do_disable + /* enable the probe (replace ret with nop) */ + movl $NOP_INSTR, %esi + movl $1, %edx + call hot_patch_kernel_text + leave + ret -#define HOT_PATCH(addr, event, active_instr, normal_instr, len) \ - movq $normal_instr, %rsi; \ - movq $active_instr, %rdi; \ - leaq lockstat_probemap(%rip), %rax; \ - movl _MUL(event, DTRACE_IDSIZE)(%rax), %eax; \ - testl %eax, %eax; \ - jz 9f; \ - movq %rdi, %rsi; \ -9: \ - movq $len, %rdx; \ - movq $addr, %rdi; \ +.do_disable: + /* disable the probe (replace nop with ret) */ + movl $RET_INSTR, %esi + movl $1, %edx call hot_patch_kernel_text + leave + ret + SET_SIZE(lockstat_hotpatch_site) + +#define HOT_PATCH_MATCH(name, probe, reg) \ + cmpl $probe, %reg; \ + jne 1f; \ + leaq lockstat_probemap(%rip), %rax; \ + movl _MUL(probe, DTRACE_IDSIZE)(%rax), %esi; \ + movq $./**/name/**/_lockstat_patch_point, %rdi; \ + call lockstat_hotpatch_site; \ + 1: + +/* + * void + * lockstat_hotpatch_probe(int ls_probe) + * + * Given a lockstat probe identifier, hotpatch any associated lockstat + * primitive routine(s) so they fall through into the lockstat_probe() call (if + * the probe is enabled) or return normally (when the probe is disabled). + */ - ENTRY(lockstat_hot_patch) - pushq %rbp /* align stack properly */ + ENTRY(lockstat_hotpatch_probe) + pushq %rbp movq %rsp, %rbp + pushq %r12 + movl %edi, %r12d -#if defined(OPTERON_WORKAROUND_6323525) - cmpl $0, workaround_6323525_patched - je 1f - HOT_PATCH(.mutex_enter_lockstat_6323525_patch_point, - LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1) - HOT_PATCH(.mutex_tryenter_lockstat_6323525_patch_point, - LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1) - HOT_PATCH(.rw_write_enter_lockstat_6323525_patch_point, - LS_RW_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1) - jmp 2f -1: - HOT_PATCH(.mutex_enter_lockstat_patch_point, - LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1) - HOT_PATCH(.mutex_tryenter_lockstat_patch_point, - LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1) - HOT_PATCH(.rw_write_enter_lockstat_patch_point, - LS_RW_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1) -2: -#else /* OPTERON_WORKAROUND_6323525 */ - HOT_PATCH(.mutex_enter_lockstat_patch_point, - LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1) - HOT_PATCH(.mutex_tryenter_lockstat_patch_point, - LS_MUTEX_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1) - HOT_PATCH(.rw_write_enter_lockstat_patch_point, - LS_RW_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1) -#endif /* !OPTERON_WORKAROUND_6323525 */ - HOT_PATCH(.mutex_exit_lockstat_patch_point, - LS_MUTEX_EXIT_RELEASE, NOP_INSTR, RET_INSTR, 1) - HOT_PATCH(.rw_read_enter_lockstat_patch_point, - LS_RW_ENTER_ACQUIRE, NOP_INSTR, RET_INSTR, 1) - HOT_PATCH(.rw_write_exit_lockstat_patch_point, - LS_RW_EXIT_RELEASE, NOP_INSTR, RET_INSTR, 1) - HOT_PATCH(.rw_read_exit_lockstat_patch_point, - LS_RW_EXIT_RELEASE, NOP_INSTR, RET_INSTR, 1) - HOT_PATCH(.lock_set_lockstat_patch_point, - LS_LOCK_SET_ACQUIRE, NOP_INSTR, RET_INSTR, 1) - HOT_PATCH(.lock_try_lockstat_patch_point, - LS_LOCK_TRY_ACQUIRE, NOP_INSTR, RET_INSTR, 1) - HOT_PATCH(.lock_clear_lockstat_patch_point, - LS_LOCK_CLEAR_RELEASE, NOP_INSTR, RET_INSTR, 1) - HOT_PATCH(.lock_set_spl_lockstat_patch_point, - LS_LOCK_SET_SPL_ACQUIRE, NOP_INSTR, RET_INSTR, 1) - - HOT_PATCH(LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_POINT, - LS_LOCK_CLEAR_SPLX_RELEASE, - LOCK_CLEAR_SPLX_LOCKSTAT_PATCH_VAL, 0, 1); - leave /* unwind stack */ + HOT_PATCH_MATCH(mutex_enter, LS_MUTEX_ENTER_ACQUIRE, r12d) + HOT_PATCH_MATCH(mutex_tryenter, LS_MUTEX_TRYENTER_ACQUIRE, r12d) + HOT_PATCH_MATCH(mutex_exit, LS_MUTEX_EXIT_RELEASE, r12d) + + HOT_PATCH_MATCH(rw_write_enter, LS_RW_ENTER_ACQUIRE, r12d) + HOT_PATCH_MATCH(rw_read_enter, LS_RW_ENTER_ACQUIRE, r12d) + HOT_PATCH_MATCH(rw_write_exit, LS_RW_EXIT_RELEASE, r12d) + HOT_PATCH_MATCH(rw_read_exit, LS_RW_EXIT_RELEASE, r12d) + + HOT_PATCH_MATCH(lock_set, LS_LOCK_SET_ACQUIRE, r12d) + HOT_PATCH_MATCH(lock_try, LS_LOCK_TRY_ACQUIRE, r12d) + HOT_PATCH_MATCH(lock_clear, LS_LOCK_CLEAR_RELEASE, r12d) + HOT_PATCH_MATCH(lock_set_spl, LS_LOCK_SET_SPL_ACQUIRE, r12d) + HOT_PATCH_MATCH(lock_clear_splx, LS_LOCK_CLEAR_SPLX_RELEASE, r12d) + + popq %r12 + leave ret - SET_SIZE(lockstat_hot_patch) + SET_SIZE(lockstat_hotpatch_probe) ENTRY(membar_enter) ALTENTRY(membar_exit) diff --git a/usr/src/uts/intel/sys/x86_archext.h b/usr/src/uts/intel/sys/x86_archext.h index ab62bd6deb..c16d430c2e 100644 --- a/usr/src/uts/intel/sys/x86_archext.h +++ b/usr/src/uts/intel/sys/x86_archext.h @@ -1366,9 +1366,9 @@ extern int opteron_erratum_100; extern int opteron_erratum_121; #endif -#if defined(OPTERON_WORKAROUND_6323525) -extern int opteron_workaround_6323525; -extern void patch_workaround_6323525(void); +#if defined(OPTERON_ERRATUM_147) +extern int opteron_erratum_147; +extern void patch_erratum_147(void); #endif #if !defined(__xpv) diff --git a/usr/src/uts/intel/vio9p/Makefile b/usr/src/uts/intel/vio9p/Makefile new file mode 100644 index 0000000000..0774962e52 --- /dev/null +++ b/usr/src/uts/intel/vio9p/Makefile @@ -0,0 +1,67 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2022 Oxide Computer Company +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../.. + +# +# Define the module and object file sets. +# +MODULE = vio9p +OBJECTS = $(VIO9P_OBJS:%=$(OBJS_DIR)/%) +ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE) + +# +# Include common rules. +# +include $(UTSBASE)/intel/Makefile.intel + +# +# Define targets +# +ALL_TARGET = $(BINARY) +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) + +# +# Overrides +# +INC_PATH += -I$(UTSBASE)/common/io/virtio + +# +# Driver depends on virtio +# +LDFLAGS += -N misc/virtio + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/intel/Makefile.targ |