diff options
Diffstat (limited to 'usr/src/uts')
-rw-r--r-- | usr/src/uts/common/fs/zfs/sys/vdev.h | 4 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/sys/vdev_impl.h | 36 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/sys/zfs_bootenv.h | 52 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/vdev.c | 2 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/vdev_label.c | 222 | ||||
-rw-r--r-- | usr/src/uts/common/fs/zfs/zfs_ioctl.c | 62 | ||||
-rw-r--r-- | usr/src/uts/common/sys/fs/zfs.h | 22 | ||||
-rw-r--r-- | usr/src/uts/i86pc/io/vmm/io/vpmtmr.c | 41 | ||||
-rw-r--r-- | usr/src/uts/i86pc/io/vmm/vmm.c | 16 | ||||
-rw-r--r-- | usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c | 43 |
10 files changed, 453 insertions, 47 deletions
diff --git a/usr/src/uts/common/fs/zfs/sys/vdev.h b/usr/src/uts/common/fs/zfs/sys/vdev.h index b8c2ee5c9e..b839ed2359 100644 --- a/usr/src/uts/common/fs/zfs/sys/vdev.h +++ b/usr/src/uts/common/fs/zfs/sys/vdev.h @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2017 by Delphix. All rights reserved. + * Copyright (c) 2011, 2020 by Delphix. All rights reserved. * Copyright (c) 2017, Intel Corporation. * Copyright (c) 2019, Datto Inc. All rights reserved. */ @@ -180,6 +180,8 @@ extern void vdev_uberblock_load(vdev_t *, struct uberblock *, nvlist_t **); extern void vdev_config_generate_stats(vdev_t *vd, nvlist_t *nv); extern void vdev_label_write(zio_t *zio, vdev_t *vd, int l, abd_t *buf, uint64_t offset, uint64_t size, zio_done_func_t *done, void *private, int flags); +extern int vdev_label_read_bootenv(vdev_t *, nvlist_t *); +extern int vdev_label_write_bootenv(vdev_t *, nvlist_t *); typedef enum { VDEV_LABEL_CREATE, /* create/add a new device */ diff --git a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h index fc0f90c8c9..4e42247345 100644 --- a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h +++ b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h @@ -20,8 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2018 by Delphix. All rights reserved. - * Copyright (c) 2011, 2019 by Delphix. All rights reserved. + * Copyright (c) 2011, 2020 by Delphix. All rights reserved. * Copyright (c) 2017, Intel Corporation. * Copyright 2019 Joyent, Inc. * Copyright 2020 Joshua M. Clulow <josh@sysmgr.org> @@ -412,7 +411,7 @@ struct vdev { #define VDEV_RAIDZ_MAXPARITY 3 #define VDEV_PAD_SIZE (8 << 10) -/* 2 padding areas (vl_pad1 and vl_pad2) to skip */ +/* 2 padding areas (vl_pad1 and vl_be) to skip */ #define VDEV_SKIP_SIZE VDEV_PAD_SIZE * 2 #define VDEV_PHYS_SIZE (112 << 10) #define VDEV_UBERBLOCK_RING (128 << 10) @@ -439,9 +438,38 @@ typedef struct vdev_phys { zio_eck_t vp_zbt; } vdev_phys_t; +typedef enum vbe_vers { + /* + * The bootenv file is stored as ascii text in the envblock. + * It is used by the GRUB bootloader used on Linux to store the + * contents of the grubenv file. The file is stored as raw ASCII, + * and is protected by an embedded checksum. By default, GRUB will + * check if the boot filesystem supports storing the environment data + * in a special location, and if so, will invoke filesystem specific + * logic to retrieve it. This can be overriden by a variable, should + * the user so desire. + */ + VB_RAW = 0, + + /* + * The bootenv file is converted to an nvlist and then packed into the + * envblock. + */ + VB_NVLIST = 1 +} vbe_vers_t; + +typedef struct vdev_boot_envblock { + uint64_t vbe_version; + char vbe_bootenv[VDEV_PAD_SIZE - sizeof (uint64_t) - + sizeof (zio_eck_t)]; + zio_eck_t vbe_zbt; +} vdev_boot_envblock_t; + +CTASSERT(sizeof (vdev_boot_envblock_t) == VDEV_PAD_SIZE); + typedef struct vdev_label { char vl_pad1[VDEV_PAD_SIZE]; /* 8K */ - char vl_pad2[VDEV_PAD_SIZE]; /* 8K */ + vdev_boot_envblock_t vl_be; /* 8K */ vdev_phys_t vl_vdev_phys; /* 112K */ char vl_uberblock[VDEV_UBERBLOCK_RING]; /* 128K */ } vdev_label_t; /* 256K total */ diff --git a/usr/src/uts/common/fs/zfs/sys/zfs_bootenv.h b/usr/src/uts/common/fs/zfs/sys/zfs_bootenv.h new file mode 100644 index 0000000000..703a1c8fa6 --- /dev/null +++ b/usr/src/uts/common/fs/zfs/sys/zfs_bootenv.h @@ -0,0 +1,52 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2020 Toomas Soome <tsoome@me.com> + */ + +#ifndef _ZFS_BOOTENV_H +#define _ZFS_BOOTENV_H + +/* + * Define macros for label bootenv nvlist pair keys. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#define BOOTENV_VERSION "version" + +#define BE_ILLUMOS_VENDOR "illumos" +#define BE_FREEBSD_VENDOR "freebsd" +#define BE_GRUB_VENDOR "grub" + +#define BOOTENV_OS BE_ILLUMOS_VENDOR + +#define GRUB_ENVMAP BE_GRUB_VENDOR ":" "envmap" + +#define FREEBSD_BOOTONCE BE_FREEBSD_VENDOR ":" "bootonce" +#define FREEBSD_BOOTONCE_USED BE_FREEBSD_VENDOR ":" "bootonce-used" +#define ILLUMOS_BOOTONCE BE_ILLUMOS_VENDOR ":" "bootonce" +#define ILLUMOS_BOOTONCE_USED BE_ILLUMOS_VENDOR ":" "bootonce-used" +#define FREEBSD_NVSTORE BE_FREEBSD_VENDOR ":" "nvstore" +#define ILLUMOS_NVSTORE BE_ILLUMOS_VENDOR ":" "nvstore" + +#define OS_BOOTONCE BOOTENV_OS ":" "bootonce" +#define OS_BOOTONCE_USED BOOTENV_OS ":" "bootonce-used" +#define OS_NVSTORE BOOTENV_OS ":" "nvstore" + +#ifdef __cplusplus +} +#endif + +#endif /* _ZFS_BOOTENV_H */ diff --git a/usr/src/uts/common/fs/zfs/vdev.c b/usr/src/uts/common/fs/zfs/vdev.c index 37c93f60cc..99ad0cd27d 100644 --- a/usr/src/uts/common/fs/zfs/vdev.c +++ b/usr/src/uts/common/fs/zfs/vdev.c @@ -1469,7 +1469,7 @@ vdev_probe(vdev_t *vd, zio_t *zio) for (int l = 1; l < VDEV_LABELS; l++) { zio_nowait(zio_read_phys(pio, vd, vdev_label_offset(vd->vdev_psize, l, - offsetof(vdev_label_t, vl_pad2)), VDEV_PAD_SIZE, + offsetof(vdev_label_t, vl_be)), VDEV_PAD_SIZE, abd_alloc_for_io(VDEV_PAD_SIZE, B_TRUE), ZIO_CHECKSUM_OFF, vdev_probe_done, vps, ZIO_PRIORITY_SYNC_READ, vps->vps_flags, B_TRUE)); diff --git a/usr/src/uts/common/fs/zfs/vdev_label.c b/usr/src/uts/common/fs/zfs/vdev_label.c index 6235b06f17..b683c3694b 100644 --- a/usr/src/uts/common/fs/zfs/vdev_label.c +++ b/usr/src/uts/common/fs/zfs/vdev_label.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2018 by Delphix. All rights reserved. + * Copyright (c) 2012, 2020 by Delphix. All rights reserved. * Copyright (c) 2017, Intel Corporation. * Copyright 2020 Joyent, Inc. */ @@ -150,6 +150,8 @@ #include <sys/dsl_scan.h> #include <sys/abd.h> #include <sys/fs/zfs.h> +#include <sys/byteorder.h> +#include <sys/zfs_bootenv.h> /* * Basic routines to read and write from a vdev label. @@ -940,7 +942,7 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason) nvlist_t *label; vdev_phys_t *vp; abd_t *vp_abd; - abd_t *pad2; + abd_t *bootenv; uberblock_t *ub; abd_t *ub_abd; zio_t *zio; @@ -1101,8 +1103,8 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason) ub->ub_txg = 0; /* Initialize the 2nd padding area. */ - pad2 = abd_alloc_for_io(VDEV_PAD_SIZE, B_TRUE); - abd_zero(pad2, VDEV_PAD_SIZE); + bootenv = abd_alloc_for_io(VDEV_PAD_SIZE, B_TRUE); + abd_zero(bootenv, VDEV_PAD_SIZE); /* * Write everything in parallel. @@ -1121,8 +1123,8 @@ retry: * Zero out the 2nd padding area where it might have * left over data from previous filesystem format. */ - vdev_label_write(zio, vd, l, pad2, - offsetof(vdev_label_t, vl_pad2), + vdev_label_write(zio, vd, l, bootenv, + offsetof(vdev_label_t, vl_be), VDEV_PAD_SIZE, NULL, NULL, flags); vdev_label_write(zio, vd, l, ub_abd, @@ -1138,7 +1140,7 @@ retry: } nvlist_free(label); - abd_free(pad2); + abd_free(bootenv); abd_free(ub_abd); abd_free(vp_abd); @@ -1162,6 +1164,212 @@ retry: } /* + * Done callback for vdev_label_read_bootenv_impl. If this is the first + * callback to finish, store our abd in the callback pointer. Otherwise, we + * just free our abd and return. + */ +static void +vdev_label_read_bootenv_done(zio_t *zio) +{ + zio_t *rio = zio->io_private; + abd_t **cbp = rio->io_private; + + ASSERT3U(zio->io_size, ==, VDEV_PAD_SIZE); + + if (zio->io_error == 0) { + mutex_enter(&rio->io_lock); + if (*cbp == NULL) { + /* Will free this buffer in vdev_label_read_bootenv. */ + *cbp = zio->io_abd; + } else { + abd_free(zio->io_abd); + } + mutex_exit(&rio->io_lock); + } else { + abd_free(zio->io_abd); + } +} + +static void +vdev_label_read_bootenv_impl(zio_t *zio, vdev_t *vd, int flags) +{ + for (int c = 0; c < vd->vdev_children; c++) + vdev_label_read_bootenv_impl(zio, vd->vdev_child[c], flags); + + /* + * We just use the first label that has a correct checksum; the + * bootloader should have rewritten them all to be the same on boot, + * and any changes we made since boot have been the same across all + * labels. + */ + if (vd->vdev_ops->vdev_op_leaf && vdev_readable(vd)) { + for (int l = 0; l < VDEV_LABELS; l++) { + vdev_label_read(zio, vd, l, + abd_alloc_linear(VDEV_PAD_SIZE, B_FALSE), + offsetof(vdev_label_t, vl_be), VDEV_PAD_SIZE, + vdev_label_read_bootenv_done, zio, flags); + } + } +} + +int +vdev_label_read_bootenv(vdev_t *rvd, nvlist_t *bootenv) +{ + nvlist_t *config; + spa_t *spa = rvd->vdev_spa; + abd_t *abd = NULL; + int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL | + ZIO_FLAG_SPECULATIVE | ZIO_FLAG_TRYHARD; + + ASSERT(bootenv); + ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); + + zio_t *zio = zio_root(spa, NULL, &abd, flags); + vdev_label_read_bootenv_impl(zio, rvd, flags); + int err = zio_wait(zio); + + if (abd != NULL) { + char *buf; + vdev_boot_envblock_t *vbe = abd_to_buf(abd); + + vbe->vbe_version = ntohll(vbe->vbe_version); + switch (vbe->vbe_version) { + case VB_RAW: + /* + * if we have textual data in vbe_bootenv, create nvlist + * with key "envmap". + */ + fnvlist_add_uint64(bootenv, BOOTENV_VERSION, VB_RAW); + vbe->vbe_bootenv[sizeof (vbe->vbe_bootenv) - 1] = '\0'; + fnvlist_add_string(bootenv, GRUB_ENVMAP, + vbe->vbe_bootenv); + break; + + case VB_NVLIST: + err = nvlist_unpack(vbe->vbe_bootenv, + sizeof (vbe->vbe_bootenv), &config, 0); + if (err == 0) { + fnvlist_merge(bootenv, config); + nvlist_free(config); + break; + } + /* FALLTHROUGH */ + default: + /* Check for FreeBSD zfs bootonce command string */ + buf = abd_to_buf(abd); + if (*buf == '\0') { + fnvlist_add_uint64(bootenv, BOOTENV_VERSION, + VB_NVLIST); + break; + } + fnvlist_add_string(bootenv, FREEBSD_BOOTONCE, buf); + } + + /* + * abd was allocated in vdev_label_read_bootenv_impl() + */ + abd_free(abd); + /* + * If we managed to read any successfully, + * return success. + */ + return (0); + } + return (err); +} + +int +vdev_label_write_bootenv(vdev_t *vd, nvlist_t *env) +{ + zio_t *zio; + spa_t *spa = vd->vdev_spa; + vdev_boot_envblock_t *bootenv; + int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL; + int error; + size_t nvsize; + char *nvbuf; + + error = nvlist_size(env, &nvsize, NV_ENCODE_XDR); + if (error != 0) + return (SET_ERROR(error)); + + if (nvsize >= sizeof (bootenv->vbe_bootenv)) { + return (SET_ERROR(E2BIG)); + } + + ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); + + error = ENXIO; + for (int c = 0; c < vd->vdev_children; c++) { + int child_err; + + child_err = vdev_label_write_bootenv(vd->vdev_child[c], env); + /* + * As long as any of the disks managed to write all of their + * labels successfully, return success. + */ + if (child_err == 0) + error = child_err; + } + + if (!vd->vdev_ops->vdev_op_leaf || vdev_is_dead(vd) || + !vdev_writeable(vd)) { + return (error); + } + ASSERT3U(sizeof (*bootenv), ==, VDEV_PAD_SIZE); + abd_t *abd = abd_alloc_for_io(VDEV_PAD_SIZE, B_TRUE); + abd_zero(abd, VDEV_PAD_SIZE); + + bootenv = abd_borrow_buf_copy(abd, VDEV_PAD_SIZE); + nvbuf = bootenv->vbe_bootenv; + nvsize = sizeof (bootenv->vbe_bootenv); + + bootenv->vbe_version = fnvlist_lookup_uint64(env, BOOTENV_VERSION); + switch (bootenv->vbe_version) { + case VB_RAW: + if (nvlist_lookup_string(env, GRUB_ENVMAP, &nvbuf) == 0) { + (void) strlcpy(bootenv->vbe_bootenv, nvbuf, nvsize); + } + error = 0; + break; + + case VB_NVLIST: + error = nvlist_pack(env, &nvbuf, &nvsize, NV_ENCODE_XDR, + KM_SLEEP); + break; + + default: + error = EINVAL; + break; + } + + if (error == 0) { + bootenv->vbe_version = htonll(bootenv->vbe_version); + abd_return_buf_copy(abd, bootenv, VDEV_PAD_SIZE); + } else { + abd_free(abd); + return (SET_ERROR(error)); + } + +retry: + zio = zio_root(spa, NULL, NULL, flags); + for (int l = 0; l < VDEV_LABELS; l++) { + vdev_label_write(zio, vd, l, abd, + offsetof(vdev_label_t, vl_be), + VDEV_PAD_SIZE, NULL, NULL, flags); + } + + error = zio_wait(zio); + if (error != 0 && !(flags & ZIO_FLAG_TRYHARD)) { + flags |= ZIO_FLAG_TRYHARD; + goto retry; + } + + abd_free(abd); + return (error); +} + +/* * ========================================================================== * uberblock load/sync * ========================================================================== diff --git a/usr/src/uts/common/fs/zfs/zfs_ioctl.c b/usr/src/uts/common/fs/zfs/zfs_ioctl.c index 76c7170d38..77d9b48982 100644 --- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c +++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c @@ -3608,6 +3608,58 @@ zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl) } /* + * This ioctl is used to set the bootenv configuration on the current + * pool. This configuration is stored in the second padding area of the label, + * and it is used by the bootloader(s) to store bootloader and/or system + * specific data. + * The data is stored as nvlist data stream, and is protected by + * an embedded checksum. + * The version can have two possible values: + * VB_RAW: nvlist should have key GRUB_ENVMAP, value DATA_TYPE_STRING. + * VB_NVLIST: nvlist with arbitrary <key, value> pairs. + */ +static const zfs_ioc_key_t zfs_keys_set_bootenv[] = { + {"version", DATA_TYPE_UINT64, 0}, + {"<keys>", DATA_TYPE_ANY, ZK_OPTIONAL | ZK_WILDCARDLIST}, +}; + +static int +zfs_ioc_set_bootenv(const char *name, nvlist_t *innvl, + nvlist_t *outnvl __unused) +{ + int error; + spa_t *spa; + + if ((error = spa_open(name, &spa, FTAG)) != 0) + return (error); + spa_vdev_state_enter(spa, SCL_ALL); + error = vdev_label_write_bootenv(spa->spa_root_vdev, innvl); + (void) spa_vdev_state_exit(spa, NULL, 0); + spa_close(spa, FTAG); + return (error); +} + +static const zfs_ioc_key_t zfs_keys_get_bootenv[] = { + /* no nvl keys */ +}; + +static int +zfs_ioc_get_bootenv(const char *name, nvlist_t *innvl __unused, + nvlist_t *outnvl) +{ + spa_t *spa; + int error; + + if ((error = spa_open(name, &spa, FTAG)) != 0) + return (error); + spa_vdev_state_enter(spa, SCL_ALL); + error = vdev_label_read_bootenv(spa->spa_root_vdev, outnvl); + (void) spa_vdev_state_exit(spa, NULL, 0); + spa_close(spa, FTAG); + return (error); +} + +/* * The dp_config_rwlock must not be held when calling this, because the * unmount may need to write out data. * @@ -6813,6 +6865,16 @@ zfs_ioctl_init(void) POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE, zfs_keys_pool_trim, ARRAY_SIZE(zfs_keys_pool_trim)); + zfs_ioctl_register("set_bootenv", ZFS_IOC_SET_BOOTENV, + zfs_ioc_set_bootenv, zfs_secpolicy_config, POOL_NAME, + POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE, + zfs_keys_set_bootenv, ARRAY_SIZE(zfs_keys_set_bootenv)); + + zfs_ioctl_register("get_bootenv", ZFS_IOC_GET_BOOTENV, + zfs_ioc_get_bootenv, zfs_secpolicy_none, POOL_NAME, + POOL_CHECK_SUSPENDED, B_FALSE, B_TRUE, + zfs_keys_get_bootenv, ARRAY_SIZE(zfs_keys_get_bootenv)); + /* IOCTLS that use the legacy function signature */ zfs_ioctl_register_legacy("pool_freeze", ZFS_IOC_POOL_FREEZE, diff --git a/usr/src/uts/common/sys/fs/zfs.h b/usr/src/uts/common/sys/fs/zfs.h index 819905a8d9..73050319b9 100644 --- a/usr/src/uts/common/sys/fs/zfs.h +++ b/usr/src/uts/common/sys/fs/zfs.h @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2018 by Delphix. All rights reserved. + * Copyright (c) 2011, 2020 by Delphix. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2014 Integros [integros.com] * Copyright 2020 Joyent, Inc. @@ -1173,7 +1173,14 @@ typedef struct ddt_histogram { * /dev/zfs ioctl numbers. */ typedef enum zfs_ioc { + /* + * Core features - 81/128 numbers reserved. + */ +#ifdef __FreeBSD__ + ZFS_IOC_FIRST = 0, +#else ZFS_IOC_FIRST = ('Z' << 8), +#endif ZFS_IOC = ZFS_IOC_FIRST, ZFS_IOC_POOL_CREATE = ZFS_IOC_FIRST, /* 0x5a00 */ ZFS_IOC_POOL_DESTROY, /* 0x5a01 */ @@ -1257,6 +1264,19 @@ typedef enum zfs_ioc { ZFS_IOC_POOL_TRIM, /* 0x5a50 */ ZFS_IOC_REDACT, /* 0x5a51 */ ZFS_IOC_GET_BOOKMARK_PROPS, /* 0x5a52 */ + + /* + * Per-platform (Optional) - 8/128 numbers reserved. + */ + ZFS_IOC_PLATFORM = ZFS_IOC_FIRST + 0x80, + ZFS_IOC_EVENTS_NEXT, /* 0x81 (Linux) */ + ZFS_IOC_EVENTS_CLEAR, /* 0x82 (Linux) */ + ZFS_IOC_EVENTS_SEEK, /* 0x83 (Linux) */ + ZFS_IOC_NEXTBOOT, /* 0x84 (FreeBSD) */ + ZFS_IOC_JAIL, /* 0x85 (FreeBSD) */ + ZFS_IOC_UNJAIL, /* 0x86 (FreeBSD) */ + ZFS_IOC_SET_BOOTENV, /* 0x87 */ + ZFS_IOC_GET_BOOTENV, /* 0x88 */ ZFS_IOC_LAST } zfs_ioc_t; diff --git a/usr/src/uts/i86pc/io/vmm/io/vpmtmr.c b/usr/src/uts/i86pc/io/vmm/io/vpmtmr.c index e49f583772..61a50b418c 100644 --- a/usr/src/uts/i86pc/io/vmm/io/vpmtmr.c +++ b/usr/src/uts/i86pc/io/vmm/io/vpmtmr.c @@ -78,6 +78,7 @@ vpmtmr_init(struct vm *vm) struct bintime bt; vpmtmr = malloc(sizeof(struct vpmtmr), M_VPMTMR, M_WAITOK | M_ZERO); + vpmtmr->vm = vm; vpmtmr->baseuptime = sbinuptime(); vpmtmr->baseval = 0; @@ -87,9 +88,35 @@ vpmtmr_init(struct vm *vm) return (vpmtmr); } +static int +vpmtmr_detach_ioport(struct vpmtmr *vpmtmr) +{ + if (vpmtmr->io_cookie != NULL) { + ioport_handler_t old_func; + void *old_arg; + int err; + + err = vm_ioport_detach(vpmtmr->vm, &vpmtmr->io_cookie, + &old_func, &old_arg); + if (err != 0) { + return (err); + } + + ASSERT3P(old_func, ==, vpmtmr_handler); + ASSERT3P(old_arg, ==, vpmtmr); + ASSERT3P(vpmtmr->io_cookie, ==, NULL); + vpmtmr->io_port = 0; + } + return (0); +} + void vpmtmr_cleanup(struct vpmtmr *vpmtmr) { + int err; + + err = vpmtmr_detach_ioport(vpmtmr); + VERIFY3P(err, ==, 0); free(vpmtmr, M_VPMTMR); } @@ -101,23 +128,13 @@ vpmtmr_set_location(struct vm *vm, uint16_t ioport) int err; if (vpmtmr->io_cookie != NULL) { - ioport_handler_t old_func; - void *old_arg; - if (vpmtmr->io_port == ioport) { /* already attached in the right place */ return (0); } - err = vm_ioport_detach(vm, &vpmtmr->io_cookie, &old_func, - &old_arg); - if (err != 0) { - return (err); - } - - ASSERT3P(old_func, ==, vpmtmr_handler); - ASSERT3P(old_arg, ==, vpmtmr); - vpmtmr->io_port = 0; + err = vpmtmr_detach_ioport(vpmtmr); + VERIFY3P(err, ==, 0); } err = vm_ioport_attach(vm, ioport, vpmtmr_handler, vpmtmr, &vpmtmr->io_cookie); diff --git a/usr/src/uts/i86pc/io/vmm/vmm.c b/usr/src/uts/i86pc/io/vmm/vmm.c index 23a4fecf7b..1821a96fd7 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm.c +++ b/usr/src/uts/i86pc/io/vmm/vmm.c @@ -501,9 +501,7 @@ vm_init(struct vm *vm, bool create) if (create) vm->vrtc = vrtc_init(vm); - if (create) { - vm_inout_init(vm, &vm->ioports); - } + vm_inout_init(vm, &vm->ioports); CPU_ZERO(&vm->active_cpus); CPU_ZERO(&vm->debug_cpus); @@ -606,15 +604,19 @@ vm_cleanup(struct vm *vm, bool destroy) if (vm->iommu != NULL) iommu_destroy_domain(vm->iommu); - if (destroy) { - vm_inout_cleanup(vm, &vm->ioports); - } + /* + * Devices which attach their own ioport hooks should be cleaned up + * first so they can tear down those registrations. + */ + vpmtmr_cleanup(vm->vpmtmr); + + vm_inout_cleanup(vm, &vm->ioports); if (destroy) vrtc_cleanup(vm->vrtc); else vrtc_reset(vm->vrtc); - vpmtmr_cleanup(vm->vpmtmr); + vatpit_cleanup(vm->vatpit); vhpet_cleanup(vm->vhpet); vatpic_cleanup(vm->vatpic); diff --git a/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c b/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c index 0ac5e21fd0..31f6ea75b5 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c +++ b/usr/src/uts/i86pc/io/vmm/vmm_instruction_emul.c @@ -993,11 +993,19 @@ vie_emulate_movs(struct vie *vie, struct vm *vm, int vcpuid, uint64_t gpa) */ error = vie_mmio_read(vie, vm, vcpuid, gpa, &val, opsize); - if (error) - goto done; - vm_copyout(vm, vcpuid, &val, copyinfo, opsize); - vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo)); + if (error == 0) { + vm_copyout(vm, vcpuid, &val, copyinfo, opsize); + } + /* + * Regardless of whether the MMIO read was successful or + * not, the copy resources must be cleaned up. + */ + vm_copy_teardown(vm, vcpuid, copyinfo, + nitems(copyinfo)); + if (error != 0) { + goto done; + } } else { /* * Case (4): read from and write to mmio. @@ -2022,7 +2030,8 @@ vie_emulate_mmio(struct vie *vie, struct vm *vm, int vcpuid) } static int -vie_emulate_inout_port(struct vie *vie, struct vm *vm, int vcpuid) +vie_emulate_inout_port(struct vie *vie, struct vm *vm, int vcpuid, + uint32_t *eax) { uint32_t mask, val; bool in; @@ -2032,18 +2041,19 @@ vie_emulate_inout_port(struct vie *vie, struct vm *vm, int vcpuid) in = (vie->inout.flags & INOUT_IN) != 0; if (!in) { - val = vie->inout.eax & mask; + val = *eax & mask; } if (vie->inout_req_state != VR_DONE) { err = vm_ioport_access(vm, vcpuid, in, vie->inout.port, vie->inout.bytes, &val); + val &= mask; } else { /* * This port access was handled in userspace and the result was * injected in to be handled now. */ - val = vie->inout_req_val; + val = vie->inout_req_val & mask; vie->inout_req_state = VR_NONE; err = 0; } @@ -2057,11 +2067,7 @@ vie_emulate_inout_port(struct vie *vie, struct vm *vm, int vcpuid) } if (in) { - val &= mask; - val |= (vie->inout.eax & ~mask); - err = vm_set_register(vm, vcpuid, VM_REG_GUEST_RAX, val); - KASSERT(err == 0, ("emulate_ioport: error %d setting guest " - "rax register", err)); + *eax = (*eax & ~mask) | val; } return (0); } @@ -2156,7 +2162,7 @@ vie_emulate_inout_str(struct vie *vie, struct vm *vm, int vcpuid) vm_copyin(vm, vcpuid, copyinfo, &vie->inout.eax, bytes); } - err = vie_emulate_inout_port(vie, vm, vcpuid); + err = vie_emulate_inout_port(vie, vm, vcpuid, &vie->inout.eax); if (err == 0 && in) { vm_copyout(vm, vcpuid, &vie->inout.eax, copyinfo, bytes); @@ -2217,7 +2223,16 @@ vie_emulate_inout(struct vie *vie, struct vm *vm, int vcpuid) return (EINVAL); } - err = vie_emulate_inout_port(vie, vm, vcpuid); + err = vie_emulate_inout_port(vie, vm, vcpuid, &vie->inout.eax); + if (err == 0 && (vie->inout.flags & INOUT_IN) != 0) { + /* + * With the inX access now a success, the result needs + * to be stored in the guest %rax. + */ + err = vm_set_register(vm, vcpuid, VM_REG_GUEST_RAX, + vie->inout.eax); + VERIFY0(err); + } } else { vie->status &= ~VIES_REPEAT; err = vie_emulate_inout_str(vie, vm, vcpuid); |