diff options
author | Jerry Jelinek <jerry.jelinek@joyent.com> | 2020-09-02 12:17:35 +0000 |
---|---|---|
committer | Jerry Jelinek <jerry.jelinek@joyent.com> | 2020-09-02 12:17:35 +0000 |
commit | e0f664ec13fc70811953d2a807296a60b253b5a2 (patch) | |
tree | c034770384c9194b006ae6adb1fd7fa3132c3e77 | |
parent | 2eee57b86e034221364b0331aaa53c7f4636a043 (diff) | |
parent | 8515d723262b57176aeeda8734edbe79fe1e7a5a (diff) | |
download | illumos-joyent-e0f664ec13fc70811953d2a807296a60b253b5a2.tar.gz |
[illumos-gate merge]
commit 1c8449e95a93a750df972545379490366b392934
13084 tl_open should handle id_space exhaustion
commit d11e14a72ad0bfccf84405261d5d93e6eaafe6a7
13047 SMB server is too strict about security descriptors
commit bdc3270f393f51a419684e0fd3d7112e9b269773
13045 Idmap's KDC lookup override doesn't work
commit 526073d8a2a73617d29e806e575a271bc992905b
13026 SMB and NFS use the global zone's IDMAP when they shouldn't
commit be4662198f3d57350960479f8997432537cfd6a5
13022 Want method to globally disable HMA
commit c94be9439c4f0773ef60e2cec21d548359cfea20
12780 GRUB is broken after 11479
commit ab26215b1a80ead55969e925a597044ad4185a34
12637 ses_facility.c topo methods are not properly terminated
commit 007ca33219ffdc49281657f5f8a9ee1bbfc367ab
13063 clarify VMCS interfaces in bhyve
13064 bhyve should not waste extra page for VMCS
commit 638bc9f013400030354ab6566ae2a5726f7580fa
12752 getipsecprotobyname(3NSL) synopsis is incomplete
commit 7611ab7b771a068fbab94a90143de2afd495769f
13034 dnode_sync is careless with range tree
34 files changed, 837 insertions, 1156 deletions
@@ -19920,6 +19920,7 @@ d usr/share/man/man9f 0755 root bin f usr/share/man/man9f/ASSERT.9f 0444 root bin s usr/share/man/man9f/AVL_NEXT.9f=avl.9f s usr/share/man/man9f/AVL_PREV.9f=avl.9f +s usr/share/man/man9f/CRED.9f=credentials.9f f usr/share/man/man9f/Intro.9f 0444 root bin f usr/share/man/man9f/OTHERQ.9f 0444 root bin f usr/share/man/man9f/RD.9f 0444 root bin @@ -20102,6 +20103,10 @@ f usr/share/man/man9f/copyb.9f 0444 root bin f usr/share/man/man9f/copyin.9f 0444 root bin f usr/share/man/man9f/copymsg.9f 0444 root bin f usr/share/man/man9f/copyout.9f 0444 root bin +f usr/share/man/man9f/credentials.9f 0444 root bin +s usr/share/man/man9f/crdup.9f=credentials.9f +s usr/share/man/man9f/crfree.9f=credentials.9f +s usr/share/man/man9f/crget.9f=credentials.9f s usr/share/man/man9f/crgetgid.9f=ddi_cred.9f s usr/share/man/man9f/crgetgroups.9f=ddi_cred.9f s usr/share/man/man9f/crgetngroups.9f=ddi_cred.9f @@ -20111,6 +20116,7 @@ s usr/share/man/man9f/crgetsgid.9f=ddi_cred.9f s usr/share/man/man9f/crgetsuid.9f=ddi_cred.9f s usr/share/man/man9f/crgetuid.9f=ddi_cred.9f s usr/share/man/man9f/crgetzoneid.9f=ddi_cred.9f +s usr/share/man/man9f/crhold.9f=credentials.9f f usr/share/man/man9f/csx_AccessConfigurationRegister.9f 0444 root bin f usr/share/man/man9f/csx_CS_DDI_Info.9f 0444 root bin f usr/share/man/man9f/csx_ConvertSize.9f 0444 root bin @@ -20603,6 +20609,7 @@ s usr/share/man/man9f/inl.9f=inb.9f f usr/share/man/man9f/insq.9f 0444 root bin s usr/share/man/man9f/intro.9f=Intro.9f s usr/share/man/man9f/inw.9f=inb.9f +s usr/share/man/man9f/kcred.9f=credentials.9f f usr/share/man/man9f/kiconv.9f 0444 root bin f usr/share/man/man9f/kiconv_close.9f 0444 root bin f usr/share/man/man9f/kiconv_open.9f 0444 root bin @@ -21210,6 +21217,7 @@ s usr/share/man/man9f/vsprintf.9f=sprintf.9f s usr/share/man/man9f/vzcmn_err.9f=cmn_err.9f s usr/share/man/man9f/wr.9f=WR.9f s usr/share/man/man9f/zcmn_err.9f=cmn_err.9f +s usr/share/man/man9f/zone_kcred.9f=credentials.9f d usr/share/man/man9p 0755 root bin s usr/share/man/man9p/Nblock.9p=size.9p s usr/share/man/man9p/blksize.9p=size.9p diff --git a/usr/src/cmd/bhyve/bhyverun.c b/usr/src/cmd/bhyve/bhyverun.c index bb3e0721c8..c4ebef1b31 100644 --- a/usr/src/cmd/bhyve/bhyverun.c +++ b/usr/src/cmd/bhyve/bhyverun.c @@ -741,7 +741,12 @@ vmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) return (VMEXIT_CONTINUE); } +#ifdef __FreeBSD__ #define DEBUG_EPT_MISCONFIG +#else +/* EPT misconfig debugging not possible now that raw VMCS access is gone */ +#endif + #ifdef DEBUG_EPT_MISCONFIG #define VMCS_GUEST_PHYSICAL_ADDRESS 0x00002400 diff --git a/usr/src/cmd/bhyvectl/bhyvectl.c b/usr/src/cmd/bhyvectl/bhyvectl.c index 22c72cf5df..e0041ede30 100644 --- a/usr/src/cmd/bhyvectl/bhyvectl.c +++ b/usr/src/cmd/bhyvectl/bhyvectl.c @@ -400,6 +400,7 @@ dump_vm_run_exitcode(struct vm_exit *vmexit, int vcpu) #define MSR_AMD7TH_START 0xC0010000 #define MSR_AMD7TH_END 0xC0011FFF +#ifdef __FreeBSD__ static const char * msr_name(uint32_t msr) { @@ -563,6 +564,21 @@ vm_set_vmcs_field(struct vmctx *ctx, int vcpu, int field, uint64_t val) return (vm_set_register(ctx, vcpu, VMCS_IDENT(field), val)); } +#else /* __FreeBSD__ */ +/* VMCS does not allow arbitrary reads/writes */ +static int +vm_get_vmcs_field(struct vmctx *ctx, int vcpu, int field, uint64_t *ret_val) +{ + *ret_val = 0; + return (0); +} + +static int +vm_set_vmcs_field(struct vmctx *ctx, int vcpu, int field, uint64_t val) +{ + return (EINVAL); +} +#endif /* __FreeBSD__ */ static int vm_get_vmcb_field(struct vmctx *ctx, int vcpu, int off, int bytes, @@ -2182,8 +2198,15 @@ main(int argc, char *argv[]) &addr); } +#ifdef __FreeBSD__ if (error == 0) error = dump_msr_bitmap(vcpu, addr, cpu_intel); +#else + /* + * Skip dumping the MSR bitmap since raw access to the VMCS is + * currently not possible. + */ +#endif /* __FreeBSD__ */ } if (!error && (get_vpid_asid || get_all)) { diff --git a/usr/src/cmd/idmap/idmapd/krb5_lookup.c b/usr/src/cmd/idmap/idmapd/krb5_lookup.c index a45fc5d8f3..bdd0d56759 100644 --- a/usr/src/cmd/idmap/idmapd/krb5_lookup.c +++ b/usr/src/cmd/idmap/idmapd/krb5_lookup.c @@ -10,7 +10,7 @@ */ /* - * Copyright 2014 Nexenta Systems, Inc. All rights reserved. + * Copyright 2020 Nexenta by DDN, Inc. All rights reserved. */ #include <stdio.h> @@ -27,6 +27,8 @@ #include <synch.h> #include <thread.h> +#include <ads/dsgetdc.h> + #include "idmapd.h" #include "libadutils.h" #include "locate_plugin.h" @@ -102,6 +104,12 @@ _krb5_override_service_locator( goto out; } + if ((ds->flags & DS_KDC_FLAG) == 0) { + idmapdlog(LOG_WARNING, "Domain Controller is not a KDC: " + "Kerberos auth may be slow"); + goto out; + } + switch (family) { case AF_UNSPEC: break; /* OK */ diff --git a/usr/src/cmd/idmap/idmapd/mapfile-intf b/usr/src/cmd/idmap/idmapd/mapfile-intf index 1ab5c033d7..d2f8c7cb04 100644 --- a/usr/src/cmd/idmap/idmapd/mapfile-intf +++ b/usr/src/cmd/idmap/idmapd/mapfile-intf @@ -21,6 +21,7 @@ # # Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. +# Copyright 2019 Nexenta by DDN, Inc. All rights reserved. # # @@ -43,4 +44,5 @@ $mapfile_version 2 SYMBOL_SCOPE { global: app_krb5_user_uid; + _krb5_override_service_locator; }; diff --git a/usr/src/cmd/smbsrv/smbd/smbd_krb5lookup.c b/usr/src/cmd/smbsrv/smbd/smbd_krb5lookup.c index af98b15b1b..83bc6b21d2 100644 --- a/usr/src/cmd/smbsrv/smbd/smbd_krb5lookup.c +++ b/usr/src/cmd/smbsrv/smbd/smbd_krb5lookup.c @@ -10,7 +10,7 @@ */ /* - * Copyright 2014 Nexenta Systems, Inc. All rights reserved. + * Copyright 2020 Nexenta by DDN, Inc. All rights reserved. */ #include <stdio.h> @@ -26,6 +26,7 @@ #include <sys/note.h> #include <smbsrv/libsmbns.h> +#include <ads/dsgetdc.h> #include "smbd.h" #include "locate_plugin.h" @@ -107,6 +108,13 @@ _krb5_override_service_locator( dxi.d_dci.dc_addr.a_family == 0) return (KRB5_REALM_CANT_RESOLVE); + if ((dxi.d_dci.dc_flags & DS_KDC_FLAG) == 0) { + smbd_report("_krb5_override_service_locator: " + "Domain Controller is not a KDC: " + "Kerberos auth may be slow"); + return (rc); + } + switch (family) { case AF_UNSPEC: break; /* OK */ diff --git a/usr/src/grub/grub-0.97/stage2/zfs-include/dmu_objset.h b/usr/src/grub/grub-0.97/stage2/zfs-include/dmu_objset.h index 8d1cf8ea33..5e3bd528e8 100644 --- a/usr/src/grub/grub-0.97/stage2/zfs-include/dmu_objset.h +++ b/usr/src/grub/grub-0.97/stage2/zfs-include/dmu_objset.h @@ -20,19 +20,32 @@ * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ +/* + * Copyright 2020 RackTop Systems, Inc. + */ #ifndef _SYS_DMU_OBJSET_H #define _SYS_DMU_OBJSET_H +#define OBJSET_PHYS_SIZE_V1 1024 +#define OBJSET_PHYS_SIZE_V2 2048 +#define OBJSET_PHYS_SIZE_V3 4096 + typedef struct objset_phys { dnode_phys_t os_meta_dnode; zil_header_t os_zil_header; uint64_t os_type; uint64_t os_flags; - char os_pad[2048 - sizeof (dnode_phys_t)*3 - - sizeof (zil_header_t) - sizeof (uint64_t)*2]; + uint8_t os_portable_mac[ZIO_OBJSET_MAC_LEN]; + uint8_t os_local_mac[ZIO_OBJSET_MAC_LEN]; + char os_pad0[OBJSET_PHYS_SIZE_V2 - sizeof (dnode_phys_t)*3 - + sizeof (zil_header_t) - sizeof (uint64_t)*2 - + 2*ZIO_OBJSET_MAC_LEN]; dnode_phys_t os_userused_dnode; dnode_phys_t os_groupused_dnode; + dnode_phys_t os_projectused_dnode; + char os_pad1[OBJSET_PHYS_SIZE_V3 - OBJSET_PHYS_SIZE_V2 - + sizeof (dnode_phys_t)]; } objset_phys_t; #endif /* _SYS_DMU_OBJSET_H */ diff --git a/usr/src/grub/grub-0.97/stage2/zfs-include/zio.h b/usr/src/grub/grub-0.97/stage2/zfs-include/zio.h index 434a2f2ef7..87a40d5a37 100644 --- a/usr/src/grub/grub-0.97/stage2/zfs-include/zio.h +++ b/usr/src/grub/grub-0.97/stage2/zfs-include/zio.h @@ -22,6 +22,7 @@ */ /* * Copyright 2013 by Saso Kiselkov. All rights reserved. + * Copyright 2020 RackTop Systems, Inc. */ #ifndef _ZIO_H @@ -74,6 +75,9 @@ enum zio_checksum { ZIO_CHECKSUM_FUNCTIONS }; +/* macros defining encryption lengths */ +#define ZIO_OBJSET_MAC_LEN 32 + enum zio_compress { ZIO_COMPRESS_INHERIT = 0, ZIO_COMPRESS_ON, diff --git a/usr/src/lib/fm/topo/modules/common/ses/ses_facility.c b/usr/src/lib/fm/topo/modules/common/ses/ses_facility.c index 2c4a1314e3..faef123480 100644 --- a/usr/src/lib/fm/topo/modules/common/ses/ses_facility.c +++ b/usr/src/lib/fm/topo/modules/common/ses/ses_facility.c @@ -113,7 +113,8 @@ typedef struct ses_sensor_desc { static const topo_method_t ses_indicator_methods[] = { { "ses_indicator_mode", TOPO_PROP_METH_DESC, TOPO_METH_SES_MODE_VERSION, TOPO_STABILITY_INTERNAL, - ses_indicator_mode } + ses_indicator_mode }, + { NULL } }; static const topo_method_t ses_sensor_methods[] = { @@ -126,6 +127,7 @@ static const topo_method_t ses_sensor_methods[] = { { "ses_psu_state", TOPO_PROP_METH_DESC, TOPO_METH_SES_PSU_VERSION, TOPO_STABILITY_INTERNAL, ses_psu_state }, + { NULL } }; /* diff --git a/usr/src/lib/gss_mechs/mech_krb5/krb5/krb/bld_princ.c b/usr/src/lib/gss_mechs/mech_krb5/krb5/krb/bld_princ.c index 505bde065e..a456aa04c8 100644 --- a/usr/src/lib/gss_mechs/mech_krb5/krb5/krb/bld_princ.c +++ b/usr/src/lib/gss_mechs/mech_krb5/krb5/krb/bld_princ.c @@ -4,6 +4,8 @@ * Copyright 1991 by the Massachusetts Institute of Technology. * All Rights Reserved. * + * Copyright 2020 Nexenta by DDN, Inc. All rights reserved. + * * Export of this software from the United States of America may * require a specific license from the United States Government. * It is the responsibility of any person or organization contemplating @@ -48,13 +50,14 @@ krb5_build_principal_va(krb5_context context, krb5_principal princ, unsigned int if (data == 0) return ENOMEM; krb5_princ_set_realm_length(context, princ, rlen); - tmpdata = malloc(rlen); + tmpdata = malloc(rlen + 1); if (!tmpdata) { free (data); return ENOMEM; } krb5_princ_set_realm_data(context, princ, tmpdata); memcpy(tmpdata, realm, rlen); + tmpdata[rlen] = '\0'; /* process rest of components */ diff --git a/usr/src/lib/smbsrv/libsmb/common/libsmb.h b/usr/src/lib/smbsrv/libsmb/common/libsmb.h index 4f08abcfca..362c15c294 100644 --- a/usr/src/lib/smbsrv/libsmb/common/libsmb.h +++ b/usr/src/lib/smbsrv/libsmb/common/libsmb.h @@ -21,7 +21,7 @@ /* * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2019 Nexenta Systems, Inc. All rights reserved. + * Copyright 2019 Nexenta by DDN, Inc. All rights reserved. * Copyright 2020 RackTop Systems, Inc. */ @@ -641,6 +641,7 @@ typedef struct smb_trusted_domains { typedef struct smb_dcinfo { char dc_name[MAXHOSTNAMELEN]; smb_inaddr_t dc_addr; + uint32_t dc_flags; } smb_dcinfo_t; /* diff --git a/usr/src/lib/smbsrv/libsmbns/common/libsmbns.h b/usr/src/lib/smbsrv/libsmbns/common/libsmbns.h index 11396695d2..fc8bd69957 100644 --- a/usr/src/lib/smbsrv/libsmbns/common/libsmbns.h +++ b/usr/src/lib/smbsrv/libsmbns/common/libsmbns.h @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2014 Nexenta Systems, Inc. All rights reserved. + * Copyright 2019 Nexenta by DDN, Inc. All rights reserved. */ #ifndef _LIBSMBNS_H @@ -50,6 +50,7 @@ typedef struct smb_ads_host_info { int port; /* ldap port */ int priority; /* DNS SRV record priority */ int weight; /* DNS SRV record weight */ + uint32_t flags; /* DC flags */ smb_inaddr_t ipaddr; /* network byte order */ } smb_ads_host_info_t; diff --git a/usr/src/lib/smbsrv/libsmbns/common/smbns_ads.c b/usr/src/lib/smbsrv/libsmbns/common/smbns_ads.c index 5f797a38aa..44ae747bbf 100644 --- a/usr/src/lib/smbsrv/libsmbns/common/smbns_ads.c +++ b/usr/src/lib/smbsrv/libsmbns/common/smbns_ads.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2014 Nexenta Systems, Inc. All rights reserved. + * Copyright 2019 Nexenta by DDN, Inc. All rights reserved. */ #include <sys/param.h> @@ -476,6 +476,8 @@ again: goto out; } + host->flags = dci->Flags; + (void) mutex_lock(&smb_ads_cached_host_mtx); if (!smb_ads_cached_host_info) smb_ads_cached_host_info = smb_ads_dup_host_info(host); @@ -1974,6 +1976,7 @@ smb_ads_lookup_msdcs(char *fqdn, smb_dcinfo_t *dci) (void) strlcpy(dci->dc_name, hinfo->name, sizeof (dci->dc_name)); dci->dc_addr = hinfo->ipaddr; + dci->dc_flags = hinfo->flags; free(hinfo); return (NT_STATUS_SUCCESS); diff --git a/usr/src/man/man3nsl/getipsecprotobyname.3nsl b/usr/src/man/man3nsl/getipsecprotobyname.3nsl index 41a8a09310..fe636c4fd6 100644 --- a/usr/src/man/man3nsl/getipsecprotobyname.3nsl +++ b/usr/src/man/man3nsl/getipsecprotobyname.3nsl @@ -1,30 +1,27 @@ '\" te .\" Copyright (C) 2003, Sun Microsystems, Inc. All Rights Reserved +.\" Copyright (C) 2020, Sergio Aguayo. All Rights Reserved .\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. .\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. .\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] -.TH GETIPSECPROTOBYNAME 3NSL "Aug 13, 2003" +.TH GETIPSECPROTOBYNAME 3NSL "Aug 27, 2020" .SH NAME getipsecprotobyname, getipsecprotobynum \- query IPsec protocols entries .SH SYNOPSIS -.LP .nf \fBcc \fR\fB-flag \&.\|.\|.\fR \fIfile \fR\&.\|.\|.\fB-l\fRnsl [\fB -library \&.\|.\|. \fR] #include <netdb.h> - -\fBint\fR \fBgetipsecprotobyname\fR(\fBconst char *\fR\fIproto_name\fR +\fBint\fR \fBgetipsecprotobyname\fR(\fBconst char *\fR\fIproto_name\fR); .fi .LP .nf -\fBchar *\fR\fBgetipsecprotobynum\fR(\fBint\fR \fIproto_num\fRp +\fBchar *\fR\fBgetipsecprotobynum\fR(\fBint\fR \fIproto_num\fR); .fi .SH DESCRIPTION -.sp -.LP Use the \fBgetipsecprotobyname()\fR and \fBgetipsecprotobynum()\fR functions to obtain the IPsec algorithm mappings that are defined by \fBipsecalgs\fR(1M). You can also use the \fBgetipsecprotobyname()\fR and \fBgetipsecprotobynum()\fR @@ -66,7 +63,6 @@ provide authentication. .RE .SH PARAMETERS -.sp .ne 2 .na \fB\fIproto_name\fR\fR @@ -81,12 +77,10 @@ A pointer to the name of an IPsec protocol. \fB\fIproto_num\fR\fR .ad .RS 14n -A pointer to a protocol number. conditions. +A pointer to a protocol number. .RE .SH RETURN VALUES -.sp -.LP The \fBgetipsecprotobyname()\fR function returns a protocol number upon success, or \fB-1\fR if the protocol specified does not exist. .sp @@ -94,12 +88,8 @@ success, or \fB-1\fR if the protocol specified does not exist. The \fBgetipsecprotobynum()\fR function returns a protocol name upon success, or the \fINULL\fR value if the protocol number specified does not exist. .SH ATTRIBUTES -.sp -.LP See \fBattributes\fR(5) for descriptions of the following attributes: .sp - -.sp .TS box; c | c @@ -111,8 +101,6 @@ Interface Stability Evolving .TE .SH SEE ALSO -.sp -.LP \fBipsecalgs\fR(1M), \fBgetipsecalgbyname\fR(3NSL), \fBgetipsecalgbyname\fR(3NSL), \fBattributes\fR(5) .sp diff --git a/usr/src/man/man9f/Makefile b/usr/src/man/man9f/Makefile index 82b64823b1..a61f028374 100644 --- a/usr/src/man/man9f/Makefile +++ b/usr/src/man/man9f/Makefile @@ -13,7 +13,7 @@ # Copyright 2017, Richard Lowe # Copyright 2014 Garrett D'Amore <garrett@damore> # Copyright 2019 Joyent, Inc. -# Copyright 2016 Nexenta Systems, Inc. +# Copyright 2020 Nexenta by DDN, Inc. All rights reserved. # Copyright 2016 Hans Rosenfeld <rosenfeld@grumpf.hope-2000.org> # @@ -70,6 +70,7 @@ MANFILES= ASSERT.9f \ copyin.9f \ copymsg.9f \ copyout.9f \ + credentials.9f \ csx_AccessConfigurationRegister.9f \ csx_CS_DDI_Info.9f \ csx_ConvertSize.9f \ @@ -556,6 +557,7 @@ MANFILES= ASSERT.9f \ MANLINKS= AVL_NEXT.9f \ AVL_PREV.9f \ + CRED.9f \ SIZEOF_PTR.9f \ SIZEOF_STRUCT.9f \ STRUCT_BUF.9f \ @@ -690,6 +692,9 @@ MANLINKS= AVL_NEXT.9f \ avl_remove.9f \ avl_swap.9f \ bcanputnext.9f \ + crdup.9f \ + crfree.9f \ + crget.9f \ crgetgid.9f \ crgetgroups.9f \ crgetngroups.9f \ @@ -699,6 +704,7 @@ MANLINKS= AVL_NEXT.9f \ crgetsuid.9f \ crgetuid.9f \ crgetzoneid.9f \ + crhold.9f \ csx_Get16.9f \ csx_Get32.9f \ csx_Get64.9f \ @@ -971,6 +977,7 @@ MANLINKS= AVL_NEXT.9f \ inl.9f \ intro.9f \ inw.9f \ + kcred.9f \ kmem_cache_alloc.9f \ kmem_cache_destroy.9f \ kmem_cache_free.9f \ @@ -1312,7 +1319,8 @@ MANLINKS= AVL_NEXT.9f \ vsprintf.9f \ vzcmn_err.9f \ wr.9f \ - zcmn_err.9f + zcmn_err.9f \ + zone_kcred.9f assert.9f := LINKSRC = ASSERT.9f @@ -1485,6 +1493,14 @@ cv_timedwait_sig.9f := LINKSRC = condvar.9f cv_wait.9f := LINKSRC = condvar.9f cv_wait_sig.9f := LINKSRC = condvar.9f +CRED.9f := LINKSRC = credentials.9f +crdup.9f := LINKSRC = credentials.9f +crfree.9f := LINKSRC = credentials.9f +crget.9f := LINKSRC = credentials.9f +crhold.9f := LINKSRC = credentials.9f +kcred.9f := LINKSRC = credentials.9f +zone_kcred.9f := LINKSRC = credentials.9f + csx_Get16.9f := LINKSRC = csx_Get8.9f csx_Get32.9f := LINKSRC = csx_Get8.9f csx_Get64.9f := LINKSRC = csx_Get8.9f diff --git a/usr/src/man/man9f/credentials.9f b/usr/src/man/man9f/credentials.9f new file mode 100644 index 0000000000..89d2138c95 --- /dev/null +++ b/usr/src/man/man9f/credentials.9f @@ -0,0 +1,201 @@ +.\" +.\" This file and its contents are supplied under the terms of the +.\" Common Development and Distribution License ("CDDL"), version 1.0. +.\" You may only use this file in accordance with the terms of version +.\" 1.0 of the CDDL. +.\" +.\" A full copy of the text of the CDDL should have accompanied this +.\" source. A copy of the CDDL is also available via the Internet at +.\" http://www.illumos.org/license/CDDL. +.\" +.\" +.\" Copyright 2020 Nexenta by DDN, Inc. All rights reserved. +.\" +.Dd Aug 21, 2020 +.Dt CREDENTIALS 9F +.Os +.Sh NAME +.Nm credentials , +.Nm CRED , +.Nm crdup , +.Nm crfree , +.Nm crget , +.Nm crhold , +.Nm kcred , +.Nm zone_kcred +.Nd Functions for obtaining credentials in the kernel +.Sh SYNOPSIS +.In sys/cred.h +.Ft "cred_t *" +.Fo CRED +.Fc +.Ft "cred_t *" +.Fo crdup +.Fa "cred_t *cr" +.Fc +.Ft "void" +.Fo crfree +.Fa "cred_t *cr" +.Fc +.Ft "cred_t *" +.Fo crget +.Fc +.Ft "void" +.Fo crhold +.Fa "cred_t *cr" +.Fc +.Ft "cred_t *" +.Fo zone_kcred +.Fc +.Vt cred_t *kcred +.Sh INTERFACE LEVEL +.Sy Volatile - +This interface is still evolving in illumos. +API and ABI stability is not guaranteed. +.Sh PARAMETERS +.Bl -tag -width Fa +.It Fa cr +pointer to the user credential structure. +.El +.Sh DESCRIPTION +Some kernel interfaces require a credential as an argument. +This page documents the credentials available in the system, +as well as interfaces for creating new credentials. +.Pp +Most users do not need to create new credentials. +Instead, users should generally use the credentials of the executing context. +.Pp +This interface is primarily intended for services that must perform operations +on behalf of multiple remotely authenticated users, whose authentication context +is unrelated to the context of the executing thread or process. +Drivers MUST NOT create new credentials. +Drivers should use the provided credential. +.Pp +For functions that do not return new credentials, +if the credentials will be used outside of their context +(i.e. if the output of zone_kcred() is referenced outside of the zone), +then one should use +.Fn crdup +or +.Fn crhold +to ensure that the credentials remain valid. +.Ss Fn CRED +The +.Fn CRED +function returns the credential of the calling thread. +Its contents depend on the calling context (user, kernel, interrupt). +.Ss Fn crdup +.Fn crdup +returns a newly-allocated copy of +.Fa cr +with reference count of 1. +It sleeps until the allocation succeeds. +.Ss Fn crfree +.Fn crfree +releases a reference to +.Fa cr . +If this is the last reference, the credential is destroyed. +.Ss Fn crhold +.Fn crhold +takes a reference to +.Fa cr . +.Ss Va kcred +.Va kcred +is the root credential of the global zone. +Its UIDs and GIDs are all 0. +It has the following privilege sets by default: +.Bd -literal -offset indent +E: basic,proc_secflags +I: basic,proc_secflags +P: basic,proc_secflags +L: all +.Ed +.Pp +.Dv NET_MAC_AWARE +is set in the credential's flags. +It is not marked privilege-aware. +.Pp +.Va kcred +will never be freed by the system. +.Ss Fn zone_kcred +The +.Fn zone_kcred +function returns the root credential of the zone to which the calling thread belongs. +This cred is derived from the global kcred, minus any privileges denied to the zone. +.Ss Fn crget +The +.Fn crget +function returns a copy of +.Fn zone_kcred +suitable for modification by the caller. +This is useful for obtaining a default, +well-initialized credential in the appropriate zone context, +that is free of privileges or limitations of the originating thread. +It must be freed with +.Fn crfree . +It sleeps until the allocation succeeds. +.Ss Considerations +.Va kcred +and +.Fn zone_kcred +are not privilege-aware, and have all IDs set to 0. +This causes their Limit set to be used in place of the Effective and Permitted sets, +which significantly expands their privileges. +.Pp +If the output of +.Fn crget +is not later marked as privilege aware, and its UID is not set to a non-zero value, +then its Limit set will be used in place of its Effective and Permitted sets, +significantly expanding its privileges. +Callers should either mark the credential as privilege-aware, +reduce the Limit set appropriately, +or ensure that they intend for zero-uid users to have expanded privileges. +.Pp +.Va kcred , +.Fn zone_kcred , +and +.Fn CRED +are not suitable for modfication by the caller. +Callers must use +.Fn crdup +to create a copy of these credentials that are suitable for modification. +.Pp +Callers of +.Fn zone_kcred +and +.Fn crget +must take care to ensure that the calling thread is +executing in the context of the appropriate zone. +If the thread is performing work on behalf of a different zone, +or if one is uncertain of the zone context of the calling thread, +then one should find the appropriate zone by other means, and reference +.Em zone->zone_kcred +explicitly. +.Sh CONTEXT +These functions can be called from +.Sy user +and +.Sy kernel +contexts. +.Sh RETURN VALUES +.Fn zone_kcred +and +.Fn CRED +return a pointer to a +.Vt cred_t +that should not be modified. +.Pp +.Fn crget +and +.Fn crdup +return a pointer to a newly allocated +.Vt cred_t . +.Pp +.Fn zone_kcred , +.Fn CRED , +.Fn crdup , +and +.Fn crget +can never fail, and always return a valid credential. +.Sh SEE ALSO +.Xr ddi_cred 9f diff --git a/usr/src/pkg/manifests/system-kernel.man9f.inc b/usr/src/pkg/manifests/system-kernel.man9f.inc index f02a356bd8..1eb70a7e0a 100644 --- a/usr/src/pkg/manifests/system-kernel.man9f.inc +++ b/usr/src/pkg/manifests/system-kernel.man9f.inc @@ -12,7 +12,7 @@ # # Copyright 2017, Richard Lowe # Copyright 2014 Garrett D'Amore <garrett@damore.org> -# Copyright 2016 Nexenta Systems, Inc. +# Copyright 2020 Nexenta by DDN, Inc. All rights reserved. # Copyright 2016 Hans Rosenfeld <rosenfeld@grumpf.hope-2000.org> # Copyright 2019 Joyent, Inc. # @@ -66,6 +66,7 @@ file path=usr/share/man/man9f/copyb.9f file path=usr/share/man/man9f/copyin.9f file path=usr/share/man/man9f/copymsg.9f file path=usr/share/man/man9f/copyout.9f +file path=usr/share/man/man9f/credentials.9f file path=usr/share/man/man9f/csx_AccessConfigurationRegister.9f file path=usr/share/man/man9f/csx_CS_DDI_Info.9f file path=usr/share/man/man9f/csx_ConvertSize.9f @@ -513,6 +514,7 @@ file path=usr/share/man/man9f/vmem_create.9f file path=usr/share/man/man9f/vmem_walk.9f link path=usr/share/man/man9f/AVL_NEXT.9f target=avl.9f link path=usr/share/man/man9f/AVL_PREV.9f target=avl.9f +link path=usr/share/man/man9f/CRED.9f target=credentials.9f link path=usr/share/man/man9f/SIZEOF_PTR.9f target=STRUCT_DECL.9f link path=usr/share/man/man9f/SIZEOF_STRUCT.9f target=STRUCT_DECL.9f link path=usr/share/man/man9f/STRUCT_BUF.9f target=STRUCT_DECL.9f @@ -647,6 +649,9 @@ link path=usr/share/man/man9f/avl_numnodes.9f target=avl.9f link path=usr/share/man/man9f/avl_remove.9f target=avl.9f link path=usr/share/man/man9f/avl_swap.9f target=avl.9f link path=usr/share/man/man9f/bcanputnext.9f target=canputnext.9f +link path=usr/share/man/man9f/crdup.9f target=credentials.9f +link path=usr/share/man/man9f/crfree.9f target=credentials.9f +link path=usr/share/man/man9f/crget.9f target=credentials.9f link path=usr/share/man/man9f/crgetgid.9f target=ddi_cred.9f link path=usr/share/man/man9f/crgetgroups.9f target=ddi_cred.9f link path=usr/share/man/man9f/crgetngroups.9f target=ddi_cred.9f @@ -656,6 +661,7 @@ link path=usr/share/man/man9f/crgetsgid.9f target=ddi_cred.9f link path=usr/share/man/man9f/crgetsuid.9f target=ddi_cred.9f link path=usr/share/man/man9f/crgetuid.9f target=ddi_cred.9f link path=usr/share/man/man9f/crgetzoneid.9f target=ddi_cred.9f +link path=usr/share/man/man9f/crhold.9f target=credentials.9f link path=usr/share/man/man9f/csx_Get16.9f target=csx_Get8.9f link path=usr/share/man/man9f/csx_Get32.9f target=csx_Get8.9f link path=usr/share/man/man9f/csx_Get64.9f target=csx_Get8.9f @@ -971,6 +977,7 @@ link path=usr/share/man/man9f/id_space_extend.9f target=id_space.9f link path=usr/share/man/man9f/inl.9f target=inb.9f link path=usr/share/man/man9f/intro.9f target=Intro.9f link path=usr/share/man/man9f/inw.9f target=inb.9f +link path=usr/share/man/man9f/kcred.9f target=credentials.9f link path=usr/share/man/man9f/kmem_cache_alloc.9f target=kmem_cache_create.9f link path=usr/share/man/man9f/kmem_cache_destroy.9f \ target=kmem_cache_create.9f @@ -1404,3 +1411,4 @@ link path=usr/share/man/man9f/vsprintf.9f target=sprintf.9f link path=usr/share/man/man9f/vzcmn_err.9f target=cmn_err.9f link path=usr/share/man/man9f/wr.9f target=WR.9f link path=usr/share/man/man9f/zcmn_err.9f target=cmn_err.9f +link path=usr/share/man/man9f/zone_kcred.9f target=credentials.9f diff --git a/usr/src/uts/common/fs/smbsrv/smb2_fsctl_copychunk.c b/usr/src/uts/common/fs/smbsrv/smb2_fsctl_copychunk.c index 4240328207..4a657bbf19 100644 --- a/usr/src/uts/common/fs/smbsrv/smb2_fsctl_copychunk.c +++ b/usr/src/uts/common/fs/smbsrv/smb2_fsctl_copychunk.c @@ -447,6 +447,8 @@ smb2_fsctl_copychunk_meta(smb_request_t *sr, smb_ofile_t *src_of) * here don't generally have WRITE_DAC access (sigh) so we * have to bypass ofile access checks for this operation. * The file-system level still does its access checking. + * + * TODO: this should really copy the SACL, too. */ smb_fssd_init(&fs_sd, secinfo, sd_flags); sr->fid_ofile = NULL; diff --git a/usr/src/uts/common/fs/smbsrv/smb_fsops.c b/usr/src/uts/common/fs/smbsrv/smb_fsops.c index 8fafac5f60..43b513e840 100644 --- a/usr/src/uts/common/fs/smbsrv/smb_fsops.c +++ b/usr/src/uts/common/fs/smbsrv/smb_fsops.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2018 Nexenta Systems, Inc. All rights reserved. + * Copyright 2020 Nexenta by DDN, Inc. All rights reserved. */ #include <sys/sid.h> @@ -147,10 +147,9 @@ smb_fsop_create_with_sd(smb_request_t *sr, cred_t *cr, is_dir = ((fs_sd->sd_flags & SMB_FSSD_FLAGS_DIR) != 0); if (smb_tree_has_feature(sr->tid_tree, SMB_TREE_ACLONCREATE)) { - if (fs_sd->sd_secinfo & SMB_ACL_SECINFO) { - dacl = fs_sd->sd_zdacl; - sacl = fs_sd->sd_zsacl; - ASSERT(dacl || sacl); + dacl = fs_sd->sd_zdacl; + sacl = fs_sd->sd_zsacl; + if (dacl != NULL || sacl != NULL) { if (dacl && sacl) { acl = smb_fsacl_merge(dacl, sacl); } else if (dacl) { @@ -466,15 +465,20 @@ smb_fsop_create_file(smb_request_t *sr, cred_t *cr, if (op->sd) { /* * SD sent by client in Windows format. Needs to be - * converted to FS format. No inheritance. + * converted to FS format. Inherit DACL/SACL if they're not + * specified. */ secinfo = smb_sd_get_secinfo(op->sd); + smb_fssd_init(&fs_sd, secinfo, 0); status = smb_sd_tofs(op->sd, &fs_sd); if (status == NT_STATUS_SUCCESS) { - rc = smb_fsop_create_with_sd(sr, cr, dnode, - name, attr, ret_snode, &fs_sd); + rc = smb_fsop_sdinherit(sr, dnode, &fs_sd); + if (rc == 0) + rc = smb_fsop_create_with_sd(sr, cr, dnode, + name, attr, ret_snode, &fs_sd); + } else { rc = EINVAL; } @@ -485,7 +489,7 @@ smb_fsop_create_file(smb_request_t *sr, cred_t *cr, * Server applies Windows inheritance rules, * see smb_fsop_sdinherit() comments as to why. */ - smb_fssd_init(&fs_sd, SMB_ACL_SECINFO, 0); + smb_fssd_init(&fs_sd, 0, 0); rc = smb_fsop_sdinherit(sr, dnode, &fs_sd); if (rc == 0) { rc = smb_fsop_create_with_sd(sr, cr, dnode, @@ -607,15 +611,19 @@ smb_fsop_mkdir( if (op->sd) { /* * SD sent by client in Windows format. Needs to be - * converted to FS format. No inheritance. + * converted to FS format. Inherit DACL/SACL if they're not + * specified. */ secinfo = smb_sd_get_secinfo(op->sd); + smb_fssd_init(&fs_sd, secinfo, SMB_FSSD_FLAGS_DIR); status = smb_sd_tofs(op->sd, &fs_sd); if (status == NT_STATUS_SUCCESS) { - rc = smb_fsop_create_with_sd(sr, cr, dnode, - name, attr, ret_snode, &fs_sd); + rc = smb_fsop_sdinherit(sr, dnode, &fs_sd); + if (rc == 0) + rc = smb_fsop_create_with_sd(sr, cr, dnode, + name, attr, ret_snode, &fs_sd); } else rc = EINVAL; @@ -626,7 +634,7 @@ smb_fsop_mkdir( * Server applies Windows inheritance rules, * see smb_fsop_sdinherit() comments as to why. */ - smb_fssd_init(&fs_sd, SMB_ACL_SECINFO, SMB_FSSD_FLAGS_DIR); + smb_fssd_init(&fs_sd, 0, SMB_FSSD_FLAGS_DIR); rc = smb_fsop_sdinherit(sr, dnode, &fs_sd); if (rc == 0) { rc = smb_fsop_create_with_sd(sr, cr, dnode, @@ -2391,6 +2399,8 @@ smb_fsop_sdmerge(smb_request_t *sr, smb_node_t *snode, smb_fssd_t *fs_sd) * owner has been specified. Callers should translate this to * STATUS_INVALID_OWNER which is not the normal mapping for EPERM * in upper layers, so EPERM is mapped to EBADE. + * + * If 'overwrite' is non-zero, then the existing ACL is ignored. */ int smb_fsop_sdwrite(smb_request_t *sr, cred_t *cr, smb_node_t *snode, @@ -2456,14 +2466,13 @@ smb_fsop_sdwrite(smb_request_t *sr, cred_t *cr, smb_node_t *snode, } if (fs_sd->sd_secinfo & SMB_ACL_SECINFO) { - if (overwrite == 0) { + if (overwrite == 0) error = smb_fsop_sdmerge(sr, snode, fs_sd); - if (error) - return (error); - } - error = smb_fsop_aclwrite(sr, cr, snode, fs_sd); - if (error) { + if (error == 0) + error = smb_fsop_aclwrite(sr, cr, snode, fs_sd); + + if (error != 0) { /* * Revert uid/gid changes if required. */ @@ -2511,39 +2520,46 @@ smb_fsop_sdinherit(smb_request_t *sr, smb_node_t *dnode, smb_fssd_t *fs_sd) acl_t *sacl = NULL; int is_dir; int error; + uint32_t secinfo; + smb_fssd_t pfs_sd; ASSERT(fs_sd); - if (sr->tid_tree->t_acltype != ACE_T) { - /* - * No forced inheritance for non-ZFS filesystems. - */ - fs_sd->sd_secinfo = 0; + secinfo = fs_sd->sd_secinfo; + + /* Anything to do? */ + if ((secinfo & SMB_ACL_SECINFO) == SMB_ACL_SECINFO) + return (0); + + /* + * No forced inheritance for non-ZFS filesystems. + */ + if (sr->tid_tree->t_acltype != ACE_T) return (0); - } + smb_fssd_init(&pfs_sd, SMB_ACL_SECINFO, fs_sd->sd_flags); /* Fetch parent directory's ACL */ - error = smb_fsop_sdread(sr, zone_kcred(), dnode, fs_sd); + error = smb_fsop_sdread(sr, zone_kcred(), dnode, &pfs_sd); if (error) { return (error); } is_dir = (fs_sd->sd_flags & SMB_FSSD_FLAGS_DIR); - dacl = smb_fsacl_inherit(fs_sd->sd_zdacl, is_dir, SMB_DACL_SECINFO, - sr->user_cr); - sacl = smb_fsacl_inherit(fs_sd->sd_zsacl, is_dir, SMB_SACL_SECINFO, - sr->user_cr); - - if (sacl == NULL) - fs_sd->sd_secinfo &= ~SMB_SACL_SECINFO; - - smb_fsacl_free(fs_sd->sd_zdacl); - smb_fsacl_free(fs_sd->sd_zsacl); + if ((secinfo & SMB_DACL_SECINFO) == 0) { + dacl = smb_fsacl_inherit(pfs_sd.sd_zdacl, is_dir, + SMB_DACL_SECINFO, sr->user_cr); + fs_sd->sd_zdacl = dacl; + } - fs_sd->sd_zdacl = dacl; - fs_sd->sd_zsacl = sacl; + if ((secinfo & SMB_SACL_SECINFO) == 0) { + sacl = smb_fsacl_inherit(pfs_sd.sd_zsacl, is_dir, + SMB_SACL_SECINFO, sr->user_cr); + fs_sd->sd_zsacl = sacl; + } + smb_fsacl_free(pfs_sd.sd_zdacl); + smb_fsacl_free(pfs_sd.sd_zsacl); return (0); } #endif /* _KERNEL */ diff --git a/usr/src/uts/common/fs/smbsrv/smb_idmap.c b/usr/src/uts/common/fs/smbsrv/smb_idmap.c index b9bfa991c4..e6c04193b0 100644 --- a/usr/src/uts/common/fs/smbsrv/smb_idmap.c +++ b/usr/src/uts/common/fs/smbsrv/smb_idmap.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2018 Nexenta Systems, Inc. All rights reserved. + * Copyright 2020 Nexenta by DDN, Inc. All rights reserved. */ /* @@ -83,12 +83,12 @@ smb_idmap_getsid(uid_t id, int idtype, smb_sid_t **sid) switch (idtype) { case SMB_IDMAP_USER: - sim.sim_stat = kidmap_getsidbyuid(global_zone, id, + sim.sim_stat = kidmap_getsidbyuid(curzone, id, (const char **)&sim.sim_domsid, &sim.sim_rid); break; case SMB_IDMAP_GROUP: - sim.sim_stat = kidmap_getsidbygid(global_zone, id, + sim.sim_stat = kidmap_getsidbygid(curzone, id, (const char **)&sim.sim_domsid, &sim.sim_rid); break; @@ -150,17 +150,17 @@ smb_idmap_getid(smb_sid_t *sid, uid_t *id, int *idtype) switch (*idtype) { case SMB_IDMAP_USER: - sim.sim_stat = kidmap_getuidbysid(global_zone, sim.sim_domsid, + sim.sim_stat = kidmap_getuidbysid(curzone, sim.sim_domsid, sim.sim_rid, sim.sim_id); break; case SMB_IDMAP_GROUP: - sim.sim_stat = kidmap_getgidbysid(global_zone, sim.sim_domsid, + sim.sim_stat = kidmap_getgidbysid(curzone, sim.sim_domsid, sim.sim_rid, sim.sim_id); break; case SMB_IDMAP_UNKNOWN: - sim.sim_stat = kidmap_getpidbysid(global_zone, sim.sim_domsid, + sim.sim_stat = kidmap_getpidbysid(curzone, sim.sim_domsid, sim.sim_rid, sim.sim_id, &sim.sim_idtype); break; @@ -186,7 +186,7 @@ smb_idmap_batch_create(smb_idmap_batch_t *sib, uint16_t nmap, int flags) bzero(sib, sizeof (smb_idmap_batch_t)); - sib->sib_idmaph = kidmap_get_create(global_zone); + sib->sib_idmaph = kidmap_get_create(curzone); sib->sib_flags = flags; sib->sib_nmap = nmap; diff --git a/usr/src/uts/common/fs/smbsrv/smb_sd.c b/usr/src/uts/common/fs/smbsrv/smb_sd.c index ddbd7b9413..f7e056c511 100644 --- a/usr/src/uts/common/fs/smbsrv/smb_sd.c +++ b/usr/src/uts/common/fs/smbsrv/smb_sd.c @@ -22,7 +22,7 @@ * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * - * Copyright 2013 Nexenta Systems, Inc. All rights reserved. + * Copyright 2020 Nexenta by DDN, Inc. All rights reserved. */ /* @@ -243,16 +243,29 @@ smb_sd_tofs(smb_sd_t *sd, smb_fssd_t *fs_sd) } } + /* + * In SMB, the 'secinfo' determines which parts of the SD the client + * intends to change. Notably, this includes changing the DACL_PRESENT + * and SACL_PRESENT control bits. The client can specify e.g. + * SACL_SECINFO, but not SACL_PRESENT, and this means the client intends + * to remove the SACL. + * + * If the *_PRESENT bit isn't set, then the respective ACL will be NULL. + * [MS-DTYP] disallows providing an ACL when the PRESENT bit isn't set. + * This is enforced by smb_decode_sd(). + * + * We allow the SACL to be NULL, but we MUST have a DACL. + * If the DACL is NULL, that's equivalent to "everyone:full_set:allow". + */ + /* DACL */ if (fs_sd->sd_secinfo & SMB_DACL_SECINFO) { - if (sd->sd_control & SE_DACL_PRESENT) { - status = smb_acl_to_zfs(sd->sd_dacl, flags, - SMB_DACL_SECINFO, &fs_sd->sd_zdacl); - if (status != NT_STATUS_SUCCESS) - return (status); - } - else - return (NT_STATUS_INVALID_ACL); + ASSERT3U(((sd->sd_control & SE_DACL_PRESENT) != 0), ==, + (sd->sd_dacl != NULL)); + status = smb_acl_to_zfs(sd->sd_dacl, flags, + SMB_DACL_SECINFO, &fs_sd->sd_zdacl); + if (status != NT_STATUS_SUCCESS) + return (status); } /* SACL */ @@ -263,8 +276,6 @@ smb_sd_tofs(smb_sd_t *sd, smb_fssd_t *fs_sd) if (status != NT_STATUS_SUCCESS) { return (status); } - } else { - return (NT_STATUS_INVALID_ACL); } } diff --git a/usr/src/uts/common/fs/zfs/dnode_sync.c b/usr/src/uts/common/fs/zfs/dnode_sync.c index dc7317b411..4a060403da 100644 --- a/usr/src/uts/common/fs/zfs/dnode_sync.c +++ b/usr/src/uts/common/fs/zfs/dnode_sync.c @@ -23,6 +23,7 @@ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012, 2018 by Delphix. All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. + * Copyright 2020 Oxide Computer Company */ #include <sys/zfs_context.h> @@ -736,13 +737,22 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx) dsfra.dsfra_dnode = dn; dsfra.dsfra_tx = tx; dsfra.dsfra_free_indirects = freeing_dnode; + mutex_enter(&dn->dn_mtx); if (freeing_dnode) { ASSERT(range_tree_contains(dn->dn_free_ranges[txgoff], 0, dn->dn_maxblkid + 1)); } - mutex_enter(&dn->dn_mtx); - range_tree_vacate(dn->dn_free_ranges[txgoff], + /* + * Because dnode_sync_free_range() must drop dn_mtx during its + * processing, using it as a callback to range_tree_vacate() is + * not safe. No other operations (besides destroy) are allowed + * once range_tree_vacate() has begun, and dropping dn_mtx + * would leave a window open for another thread to observe that + * invalid (and unsafe) state. + */ + range_tree_walk(dn->dn_free_ranges[txgoff], dnode_sync_free_range, &dsfra); + range_tree_vacate(dn->dn_free_ranges[txgoff], NULL, NULL); range_tree_destroy(dn->dn_free_ranges[txgoff]); dn->dn_free_ranges[txgoff] = NULL; mutex_exit(&dn->dn_mtx); diff --git a/usr/src/uts/common/os/cred.c b/usr/src/uts/common/os/cred.c index 0bd6cfd44f..5e909667de 100644 --- a/usr/src/uts/common/os/cred.c +++ b/usr/src/uts/common/os/cred.c @@ -20,13 +20,14 @@ */ /* * Copyright (c) 2013, Ira Cooper. All rights reserved. + * Copyright 2020 Nexenta by DDN, Inc. All rights reserved. */ /* * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ -/* All Rights Reserved */ +/* All Rights Reserved */ /* * University Copyright- Copyright (c) 1982, 1986, 1988 @@ -288,7 +289,7 @@ crget(void) { cred_t *cr = kmem_cache_alloc(cred_cache, KM_SLEEP); - bcopy(kcred, cr, crsize); + bcopy(zone_kcred(), cr, crsize); cr->cr_ref = 1; zone_cred_hold(cr->cr_zone); if (cr->cr_label) @@ -377,7 +378,7 @@ crfree(cred_t *cr) /* * Copy a cred structure to a new one and free the old one. * The new cred will have two references. One for the calling process, - * and one for the thread. + * and one for the thread. */ cred_t * crcopy(cred_t *cr) @@ -404,7 +405,7 @@ crcopy(cred_t *cr) /* * Copy a cred structure to a new one and free the old one. * The new cred will have two references. One for the calling process, - * and one for the thread. + * and one for the thread. * This variation on crcopy uses a pre-allocated structure for the * "new" cred. */ diff --git a/usr/src/uts/i86pc/io/vmm/intel/ept.c b/usr/src/uts/i86pc/io/vmm/intel/ept.c index 5e5253780e..5e3bd6d309 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/ept.c +++ b/usr/src/uts/i86pc/io/vmm/intel/ept.c @@ -59,7 +59,6 @@ __FBSDID("$FreeBSD$"); #include <machine/vmm.h> -#include "vmx_cpufunc.h" #include "ept.h" #define EPT_SUPPORTS_EXEC_ONLY(cap) ((cap) & (1UL << 0)) @@ -171,31 +170,12 @@ ept_dump(uint64_t *ptp, int nlevels) } #endif -#ifdef __FreeBSD__ -static void -invept_single_context(void *arg) -{ - struct invept_desc desc = *(struct invept_desc *)arg; - - invept(INVEPT_TYPE_SINGLE_CONTEXT, desc); -} - -void -ept_invalidate_mappings(u_long eptp) -{ - struct invept_desc invept_desc = { 0 }; - invept_desc.eptp = eptp; - - smp_rendezvous(NULL, invept_single_context, NULL, &invept_desc); -} -#else /* __FreeBSD__ */ void ept_invalidate_mappings(u_long eptp) { hma_vmx_invept_allcpus((uintptr_t)eptp); } -#endif /* __FreeBSD__ */ static int ept_pinit(pmap_t pmap) diff --git a/usr/src/uts/i86pc/io/vmm/intel/offsets.in b/usr/src/uts/i86pc/io/vmm/intel/offsets.in index cc041eaefc..ca7f967f3b 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/offsets.in +++ b/usr/src/uts/i86pc/io/vmm/intel/offsets.in @@ -22,7 +22,6 @@ #include <machine/pmap.h> #include <machine/vmm.h> -#include "intel/vmx_cpufunc.h" #include "intel/vmx.h" #include "vm/vm_glue.h" diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmcs.c b/usr/src/uts/i86pc/io/vmm/intel/vmcs.c index f1a08cc57d..51ae5fbd0c 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/vmcs.c +++ b/usr/src/uts/i86pc/io/vmm/intel/vmcs.c @@ -39,59 +39,24 @@ * * Copyright 2014 Pluribus Networks Inc. * Copyright 2017 Joyent, Inc. + * Copyright 2020 Oxide Computer Company */ -#ifdef __FreeBSD__ -#include "opt_ddb.h" -#endif - #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); #include <sys/param.h> -#include <sys/sysctl.h> #include <sys/systm.h> -#include <sys/pcpu.h> #include <vm/vm.h> -#include <vm/pmap.h> -#include <machine/segments.h> #include <machine/vmm.h> -#include "vmm_host.h" -#include "vmx_cpufunc.h" -#include "vmcs.h" -#include "ept.h" #include "vmx.h" -#ifdef DDB -#include <ddb/ddb.h> -#endif - -SYSCTL_DECL(_hw_vmm_vmx); - -static int no_flush_rsb; -SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, no_flush_rsb, CTLFLAG_RW, - &no_flush_rsb, 0, "Do not flush RSB upon vmexit"); - -static uint64_t -vmcs_fix_regval(uint32_t encoding, uint64_t val) -{ - - switch (encoding) { - case VMCS_GUEST_CR0: - val = vmx_fix_cr0(val); - break; - case VMCS_GUEST_CR4: - val = vmx_fix_cr4(val); - break; - default: - break; - } - return (val); -} +/* Bits 0-30 of VMX_BASIC MSR contain VMCS revision identifier */ +#define VMX_BASIC_REVISION(v) ((v) & 0x7fffffff) -static uint32_t +uint32_t vmcs_field_encoding(int ident) { switch (ident) { @@ -138,15 +103,13 @@ vmcs_field_encoding(int ident) case VM_REG_GUEST_ENTRY_INST_LENGTH: return (VMCS_ENTRY_INST_LENGTH); default: - return (-1); + return (VMCS_INVALID_ENCODING); } - } -static int +void vmcs_seg_desc_encoding(int seg, uint32_t *base, uint32_t *lim, uint32_t *acc) { - switch (seg) { case VM_REG_GUEST_ES: *base = VMCS_GUEST_ES_BASE; @@ -199,364 +162,111 @@ vmcs_seg_desc_encoding(int seg, uint32_t *base, uint32_t *lim, uint32_t *acc) *acc = VMCS_INVALID_ENCODING; break; default: - return (EINVAL); + panic("invalid segment register %d", seg); } - - return (0); } -int -vmcs_getreg(struct vmcs *vmcs, int running, int ident, uint64_t *retval) +void +vmcs_clear(uintptr_t vmcs_pa) { - int error; - uint32_t encoding; - - /* - * If we need to get at vmx-specific state in the VMCS we can bypass - * the translation of 'ident' to 'encoding' by simply setting the - * sign bit. As it so happens the upper 16 bits are reserved (i.e - * set to 0) in the encodings for the VMCS so we are free to use the - * sign bit. - */ - if (ident < 0) - encoding = ident & 0x7fffffff; - else - encoding = vmcs_field_encoding(ident); - - if (encoding == (uint32_t)-1) - return (EINVAL); + int err; - if (!running) - VMPTRLD(vmcs); + __asm __volatile("vmclear %[addr];" + VMX_SET_ERROR_CODE_ASM + : [error] "=r" (err) + : [addr] "m" (vmcs_pa) + : "memory"); - error = vmread(encoding, retval); - - if (!running) - VMCLEAR(vmcs); - - return (error); -} - -int -vmcs_setreg(struct vmcs *vmcs, int running, int ident, uint64_t val) -{ - int error; - uint32_t encoding; - - if (ident < 0) - encoding = ident & 0x7fffffff; - else - encoding = vmcs_field_encoding(ident); - - if (encoding == (uint32_t)-1) - return (EINVAL); - - val = vmcs_fix_regval(encoding, val); - - if (!running) - VMPTRLD(vmcs); - - error = vmwrite(encoding, val); - - if (!running) - VMCLEAR(vmcs); - - return (error); -} - -int -vmcs_setdesc(struct vmcs *vmcs, int running, int seg, struct seg_desc *desc) -{ - int error; - uint32_t base, limit, access; - - error = vmcs_seg_desc_encoding(seg, &base, &limit, &access); - if (error != 0) - panic("vmcs_setdesc: invalid segment register %d", seg); - - if (!running) - VMPTRLD(vmcs); - if ((error = vmwrite(base, desc->base)) != 0) - goto done; - - if ((error = vmwrite(limit, desc->limit)) != 0) - goto done; - - if (access != VMCS_INVALID_ENCODING) { - if ((error = vmwrite(access, desc->access)) != 0) - goto done; + if (err != 0) { + panic("vmclear(%p) error %d", vmcs_pa, err); } -done: - if (!running) - VMCLEAR(vmcs); - return (error); -} - -int -vmcs_getdesc(struct vmcs *vmcs, int running, int seg, struct seg_desc *desc) -{ - int error; - uint32_t base, limit, access; - uint64_t u64; - - error = vmcs_seg_desc_encoding(seg, &base, &limit, &access); - if (error != 0) - panic("vmcs_getdesc: invalid segment register %d", seg); - if (!running) - VMPTRLD(vmcs); - if ((error = vmread(base, &u64)) != 0) - goto done; - desc->base = u64; - - if ((error = vmread(limit, &u64)) != 0) - goto done; - desc->limit = u64; - - if (access != VMCS_INVALID_ENCODING) { - if ((error = vmread(access, &u64)) != 0) - goto done; - desc->access = u64; - } -done: - if (!running) - VMCLEAR(vmcs); - return (error); + /* + * A call to critical_enter() was made in vmcs_load() to prevent + * preemption. Now that the VMCS is unloaded, it is safe to relax that + * restriction. + */ + critical_exit(); } -int -vmcs_set_msr_save(struct vmcs *vmcs, u_long g_area, u_int g_count) +void +vmcs_initialize(struct vmcs *vmcs, uintptr_t vmcs_pa) { - int error; + int err; - VMPTRLD(vmcs); + /* set to VMCS revision */ + vmcs->identifier = VMX_BASIC_REVISION(rdmsr(MSR_VMX_BASIC)); /* - * Guest MSRs are saved in the VM-exit MSR-store area. - * Guest MSRs are loaded from the VM-entry MSR-load area. - * Both areas point to the same location in memory. + * Perform a vmclear on the VMCS, but without the critical section + * manipulation as done by vmcs_clear() above. */ - if ((error = vmwrite(VMCS_EXIT_MSR_STORE, g_area)) != 0) - goto done; - if ((error = vmwrite(VMCS_EXIT_MSR_STORE_COUNT, g_count)) != 0) - goto done; - - if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD, g_area)) != 0) - goto done; - if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD_COUNT, g_count)) != 0) - goto done; - - error = 0; -done: - VMCLEAR(vmcs); - return (error); + __asm __volatile("vmclear %[addr];" + VMX_SET_ERROR_CODE_ASM + : [error] "=r" (err) + : [addr] "m" (vmcs_pa) + : "memory"); + + if (err != 0) { + panic("vmclear(%p) error %d", vmcs_pa, err); + } } -int -vmcs_init(struct vmcs *vmcs) +void +vmcs_load(uintptr_t vmcs_pa) { - int error, codesel, datasel, tsssel; - u_long cr0, cr4, efer; - uint64_t pat; -#ifdef __FreeBSD__ - uint64_t fsbase, idtrbase; -#endif - - codesel = vmm_get_host_codesel(); - datasel = vmm_get_host_datasel(); - tsssel = vmm_get_host_tsssel(); + int err; /* - * Make sure we have a "current" VMCS to work with. + * While the VMCS is loaded on the CPU for subsequent operations, it is + * important that the thread not be preempted. That is ensured with + * critical_enter() here, with a matching critical_exit() call in + * vmcs_clear() once the VMCS is unloaded. */ - VMPTRLD(vmcs); - - /* Host state */ - - /* Initialize host IA32_PAT MSR */ - pat = vmm_get_host_pat(); - if ((error = vmwrite(VMCS_HOST_IA32_PAT, pat)) != 0) - goto done; + critical_enter(); - /* Load the IA32_EFER MSR */ - efer = vmm_get_host_efer(); - if ((error = vmwrite(VMCS_HOST_IA32_EFER, efer)) != 0) - goto done; + __asm __volatile("vmptrld %[addr];" + VMX_SET_ERROR_CODE_ASM + : [error] "=r" (err) + : [addr] "m" (vmcs_pa) + : "memory"); - /* Load the control registers */ - - cr0 = vmm_get_host_cr0(); - if ((error = vmwrite(VMCS_HOST_CR0, cr0)) != 0) - goto done; - - cr4 = vmm_get_host_cr4() | CR4_VMXE; - if ((error = vmwrite(VMCS_HOST_CR4, cr4)) != 0) - goto done; - - /* Load the segment selectors */ - if ((error = vmwrite(VMCS_HOST_ES_SELECTOR, datasel)) != 0) - goto done; - - if ((error = vmwrite(VMCS_HOST_CS_SELECTOR, codesel)) != 0) - goto done; - - if ((error = vmwrite(VMCS_HOST_SS_SELECTOR, datasel)) != 0) - goto done; - - if ((error = vmwrite(VMCS_HOST_DS_SELECTOR, datasel)) != 0) - goto done; - -#ifdef __FreeBSD__ - if ((error = vmwrite(VMCS_HOST_FS_SELECTOR, datasel)) != 0) - goto done; - - if ((error = vmwrite(VMCS_HOST_GS_SELECTOR, datasel)) != 0) - goto done; -#else - if ((error = vmwrite(VMCS_HOST_FS_SELECTOR, vmm_get_host_fssel())) != 0) - goto done; - - if ((error = vmwrite(VMCS_HOST_GS_SELECTOR, vmm_get_host_gssel())) != 0) - goto done; -#endif - - if ((error = vmwrite(VMCS_HOST_TR_SELECTOR, tsssel)) != 0) - goto done; - -#ifdef __FreeBSD__ - /* - * Load the Base-Address for %fs and idtr. - * - * Note that we exclude %gs, tss and gdtr here because their base - * address is pcpu specific. - */ - fsbase = vmm_get_host_fsbase(); - if ((error = vmwrite(VMCS_HOST_FS_BASE, fsbase)) != 0) - goto done; - - idtrbase = vmm_get_host_idtrbase(); - if ((error = vmwrite(VMCS_HOST_IDTR_BASE, idtrbase)) != 0) - goto done; + if (err != 0) { + panic("vmptrld(%p) error %d", vmcs_pa, err); + } +} -#else /* __FreeBSD__ */ - /* - * Configure host sysenter MSRs to be restored on VM exit. - * The thread-specific MSR_INTC_SEP_ESP value is loaded in vmx_run. - */ - if ((error = vmwrite(VMCS_HOST_IA32_SYSENTER_CS, KCS_SEL)) != 0) - goto done; - /* Natively defined as MSR_INTC_SEP_EIP */ - if ((error = vmwrite(VMCS_HOST_IA32_SYSENTER_EIP, - rdmsr(MSR_SYSENTER_EIP_MSR))) != 0) - goto done; +uint64_t +vmcs_read(uint32_t encoding) +{ + int error; + uint64_t val; -#endif /* __FreeBSD__ */ + __asm __volatile("vmread %[enc], %[val];" + VMX_SET_ERROR_CODE_ASM + : [error] "=r" (error), [val] "=r" (val) + : [enc] "r" ((uint64_t)encoding) + : "memory"); - /* instruction pointer */ - if (no_flush_rsb) { - if ((error = vmwrite(VMCS_HOST_RIP, - (u_long)vmx_exit_guest)) != 0) - goto done; - } else { - if ((error = vmwrite(VMCS_HOST_RIP, - (u_long)vmx_exit_guest_flush_rsb)) != 0) - goto done; + if (error != 0) { + panic("vmread(%x) error %d", encoding, error); } - /* link pointer */ - if ((error = vmwrite(VMCS_LINK_POINTER, ~0)) != 0) - goto done; -done: - VMCLEAR(vmcs); - return (error); + return (val); } -#ifdef DDB -extern int vmxon_enabled[]; - -DB_SHOW_COMMAND(vmcs, db_show_vmcs) +void +vmcs_write(uint32_t encoding, uint64_t val) { - uint64_t cur_vmcs, val; - uint32_t exit; - - if (!vmxon_enabled[curcpu]) { - db_printf("VMX not enabled\n"); - return; - } + int error; - if (have_addr) { - db_printf("Only current VMCS supported\n"); - return; - } + __asm __volatile("vmwrite %[val], %[enc];" + VMX_SET_ERROR_CODE_ASM + : [error] "=r" (error) + : [val] "r" (val), [enc] "r" ((uint64_t)encoding) + : "memory"); - vmptrst(&cur_vmcs); - if (cur_vmcs == VMCS_INITIAL) { - db_printf("No current VM context\n"); - return; - } - db_printf("VMCS: %jx\n", cur_vmcs); - db_printf("VPID: %lu\n", vmcs_read(VMCS_VPID)); - db_printf("Activity: "); - val = vmcs_read(VMCS_GUEST_ACTIVITY); - switch (val) { - case 0: - db_printf("Active"); - break; - case 1: - db_printf("HLT"); - break; - case 2: - db_printf("Shutdown"); - break; - case 3: - db_printf("Wait for SIPI"); - break; - default: - db_printf("Unknown: %#lx", val); - } - db_printf("\n"); - exit = vmcs_read(VMCS_EXIT_REASON); - if (exit & 0x80000000) - db_printf("Entry Failure Reason: %u\n", exit & 0xffff); - else - db_printf("Exit Reason: %u\n", exit & 0xffff); - db_printf("Qualification: %#lx\n", vmcs_exit_qualification()); - db_printf("Guest Linear Address: %#lx\n", - vmcs_read(VMCS_GUEST_LINEAR_ADDRESS)); - switch (exit & 0x8000ffff) { - case EXIT_REASON_EXCEPTION: - case EXIT_REASON_EXT_INTR: - val = vmcs_read(VMCS_EXIT_INTR_INFO); - db_printf("Interrupt Type: "); - switch (val >> 8 & 0x7) { - case 0: - db_printf("external"); - break; - case 2: - db_printf("NMI"); - break; - case 3: - db_printf("HW exception"); - break; - case 4: - db_printf("SW exception"); - break; - default: - db_printf("?? %lu", val >> 8 & 0x7); - break; - } - db_printf(" Vector: %lu", val & 0xff); - if (val & 0x800) - db_printf(" Error Code: %lx", - vmcs_read(VMCS_EXIT_INTR_ERRCODE)); - db_printf("\n"); - break; - case EXIT_REASON_EPT_FAULT: - case EXIT_REASON_EPT_MISCONFIG: - db_printf("Guest Physical Address: %#lx\n", - vmcs_read(VMCS_GUEST_PHYSICAL_ADDRESS)); - break; + if (error != 0) { + panic("vmwrite(%x, %x) error %d", encoding, val, error); } - db_printf("VM-instruction error: %#lx\n", vmcs_instruction_error()); } -#endif diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmcs.h b/usr/src/uts/i86pc/io/vmm/intel/vmcs.h index edde5c6dd5..1713872556 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/vmcs.h +++ b/usr/src/uts/i86pc/io/vmm/intel/vmcs.h @@ -30,6 +30,7 @@ /* * Copyright 2017 Joyent, Inc. + * Copyright 2020 Oxide Computer Company */ #ifndef _VMCS_H_ @@ -41,125 +42,20 @@ struct vmcs { uint32_t identifier; uint32_t abort_code; char _impl_specific[PAGE_SIZE - sizeof(uint32_t) * 2]; -#ifndef __FreeBSD__ - /* - * Keep the physical address of the VMCS cached adjacent for the - * structure so it can be referenced in contexts which are too delicate - * for a call into the HAT. For the moment it means wasting a whole - * page on padding for the PA value to maintain alignment, but it - * allows the consumers of 'struct vmcs *' to easily access the value - * without a significant change to the interface. - */ - uint64_t vmcs_pa; - char _pa_pad[PAGE_SIZE - sizeof (vm_paddr_t)]; -#endif }; -#ifdef __FreeBSD__ -CTASSERT(sizeof(struct vmcs) == PAGE_SIZE); -#else -CTASSERT(sizeof(struct vmcs) == (2*PAGE_SIZE)); -#endif +CTASSERT(sizeof (struct vmcs) == PAGE_SIZE); -/* MSR save region is composed of an array of 'struct msr_entry' */ -struct msr_entry { - uint32_t index; - uint32_t reserved; - uint64_t val; +uint32_t vmcs_field_encoding(int ident); +void vmcs_seg_desc_encoding(int seg, uint32_t *base, uint32_t *lim, + uint32_t *acc); -}; +void vmcs_initialize(struct vmcs *vmcs, uintptr_t vmcs_pa); -int vmcs_set_msr_save(struct vmcs *vmcs, u_long g_area, u_int g_count); -int vmcs_init(struct vmcs *vmcs); -int vmcs_getreg(struct vmcs *vmcs, int running, int ident, uint64_t *rv); -int vmcs_setreg(struct vmcs *vmcs, int running, int ident, uint64_t val); -int vmcs_getdesc(struct vmcs *vmcs, int running, int ident, - struct seg_desc *desc); -int vmcs_setdesc(struct vmcs *vmcs, int running, int ident, - struct seg_desc *desc); +void vmcs_load(uintptr_t vmcs_pa); +void vmcs_clear(uintptr_t vmcs_pa); -/* - * Avoid header pollution caused by inline use of 'vtophys()' in vmx_cpufunc.h - */ -#ifdef _VMX_CPUFUNC_H_ -static __inline uint64_t -vmcs_read(uint32_t encoding) -{ - int error; - uint64_t val; - - error = vmread(encoding, &val); - KASSERT(error == 0, ("vmcs_read(%u) error %d", encoding, error)); - return (val); -} - -static __inline void -vmcs_write(uint32_t encoding, uint64_t val) -{ - int error; - - error = vmwrite(encoding, val); - KASSERT(error == 0, ("vmcs_write(%u) error %d", encoding, error)); -} - -#ifndef __FreeBSD__ -/* - * Due to header complexity combined with the need to cache the physical - * address for the VMCS, these must be defined here rather than vmx_cpufunc.h. - */ -static __inline int -vmclear(struct vmcs *vmcs) -{ - int error; - uint64_t addr = vmcs->vmcs_pa; - - __asm __volatile("vmclear %[addr];" - VMX_SET_ERROR_CODE - : [error] "=r" (error) - : [addr] "m" (*(uint64_t *)&addr) - : "memory"); - return (error); -} - -static __inline int -vmptrld(struct vmcs *vmcs) -{ - int error; - uint64_t addr = vmcs->vmcs_pa; - - __asm __volatile("vmptrld %[addr];" - VMX_SET_ERROR_CODE - : [error] "=r" (error) - : [addr] "m" (*(uint64_t *)&addr) - : "memory"); - return (error); -} - -static __inline void -VMCLEAR(struct vmcs *vmcs) -{ - int err; - - err = vmclear(vmcs); - if (err != 0) - panic("%s: vmclear(%p) error %d", __func__, vmcs, err); - - critical_exit(); -} - -static __inline void -VMPTRLD(struct vmcs *vmcs) -{ - int err; - - critical_enter(); - - err = vmptrld(vmcs); - if (err != 0) - panic("%s: vmptrld(%p) error %d", __func__, vmcs, err); -} -#endif /* __FreeBSD__ */ - -#endif /* _VMX_CPUFUNC_H_ */ +uint64_t vmcs_read(uint32_t encoding); +void vmcs_write(uint32_t encoding, uint64_t val); #define vmexit_instruction_length() vmcs_read(VMCS_EXIT_INSTRUCTION_LENGTH) #define vmcs_guest_rip() vmcs_read(VMCS_GUEST_RIP) @@ -177,7 +73,6 @@ VMPTRLD(struct vmcs *vmcs) #define VMCS_INITIAL 0xffffffffffffffff -#define VMCS_IDENT(encoding) ((encoding) | 0x80000000) /* * VMCS field encodings from Appendix H, Intel Architecture Manual Vol3B. */ diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx.c b/usr/src/uts/i86pc/io/vmm/intel/vmx.c index c46560948e..50001c0735 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/vmx.c +++ b/usr/src/uts/i86pc/io/vmm/intel/vmx.c @@ -88,7 +88,6 @@ __FBSDID("$FreeBSD$"); #include "vlapic_priv.h" #include "ept.h" -#include "vmx_cpufunc.h" #include "vmcs.h" #include "vmx.h" #include "vmx_msr.h" @@ -172,11 +171,6 @@ SYSCTL_DECL(_hw_vmm); SYSCTL_NODE(_hw_vmm, OID_AUTO, vmx, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, NULL); -#ifdef __FreeBSD__ -int vmxon_enabled[MAXCPU]; -static char vmxon_region[MAXCPU][PAGE_SIZE] __aligned(PAGE_SIZE); -#endif /*__FreeBSD__ */ - static uint32_t pinbased_ctls, procbased_ctls, procbased_ctls2; static uint32_t exit_ctls, entry_ctls; @@ -196,10 +190,15 @@ static int vmx_initialized; SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, initialized, CTLFLAG_RD, &vmx_initialized, 0, "Intel VMX initialized"); +static int no_flush_rsb; +SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, no_flush_rsb, CTLFLAG_RW, + &no_flush_rsb, 0, "Do not flush RSB upon vmexit"); + /* * Optional capabilities */ #ifdef __FreeBSD__ +SYSCTL_DECL(_hw_vmm_vmx); static SYSCTL_NODE(_hw_vmm_vmx, OID_AUTO, cap, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, NULL); @@ -240,6 +239,13 @@ int guest_l1d_flush_sw; SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, l1d_flush_sw, CTLFLAG_RD, &guest_l1d_flush_sw, 0, NULL); +/* MSR save region is composed of an array of 'struct msr_entry' */ +struct msr_entry { + uint32_t index; + uint32_t reserved; + uint64_t val; +}; + static struct msr_entry msr_load_list[1] __aligned(16); /* @@ -330,11 +336,8 @@ SDT_PROBE_DEFINE4(vmm, vmx, exit, return, static int vmx_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc); static int vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval); -static int vmxctx_setreg(struct vmxctx *vmxctx, int reg, uint64_t val); static void vmx_inject_pir(struct vlapic *vlapic); -#ifndef __FreeBSD__ -static int vmx_apply_tsc_adjust(struct vmx *, int); -#endif /* __FreeBSD__ */ +static void vmx_apply_tsc_adjust(struct vmx *, int); #ifdef KTR static const char * @@ -504,17 +507,15 @@ vmx_allow_x2apic_msrs(struct vmx *vmx) return (error); } -u_long +static u_long vmx_fix_cr0(u_long cr0) { - return ((cr0 | cr0_ones_mask) & ~cr0_zeros_mask); } -u_long +static u_long vmx_fix_cr4(u_long cr4) { - return ((cr4 | cr4_ones_mask) & ~cr4_zeros_mask); } @@ -845,45 +846,12 @@ vmx_trigger_hostintr(int vector) #endif /* __FreeBSD__ */ } -static int -vmx_setup_cr_shadow(int which, struct vmcs *vmcs, uint32_t initial) -{ - int error, mask_ident, shadow_ident; - uint64_t mask_value; - - if (which != 0 && which != 4) - panic("vmx_setup_cr_shadow: unknown cr%d", which); - - if (which == 0) { - mask_ident = VMCS_CR0_MASK; - mask_value = cr0_ones_mask | cr0_zeros_mask; - shadow_ident = VMCS_CR0_SHADOW; - } else { - mask_ident = VMCS_CR4_MASK; - mask_value = cr4_ones_mask | cr4_zeros_mask; - shadow_ident = VMCS_CR4_SHADOW; - } - - error = vmcs_setreg(vmcs, 0, VMCS_IDENT(mask_ident), mask_value); - if (error) - return (error); - - error = vmcs_setreg(vmcs, 0, VMCS_IDENT(shadow_ident), initial); - if (error) - return (error); - - return (0); -} -#define vmx_setup_cr0_shadow(vmcs,init) vmx_setup_cr_shadow(0, (vmcs), (init)) -#define vmx_setup_cr4_shadow(vmcs,init) vmx_setup_cr_shadow(4, (vmcs), (init)) - static void * vmx_vminit(struct vm *vm, pmap_t pmap) { uint16_t vpid[VM_MAXCPU]; - int i, error; + int i, error, datasel; struct vmx *vmx; - struct vmcs *vmcs; uint32_t exc_bitmap; uint16_t maxcpus; uint32_t proc_ctls, proc2_ctls, pin_ctls; @@ -972,6 +940,7 @@ vmx_vminit(struct vm *vm, pmap_t pmap) } maxcpus = vm_get_maxcpus(vm); + datasel = vmm_get_host_datasel(); for (i = 0; i < maxcpus; i++) { /* * Cache physical address lookups for various components which @@ -982,31 +951,58 @@ vmx_vminit(struct vm *vm, pmap_t pmap) vm_paddr_t apic_page_pa = vtophys(&vmx->apic_page[i]); vm_paddr_t pir_desc_pa = vtophys(&vmx->pir_desc[i]); - vmcs = &vmx->vmcs[i]; - vmcs->identifier = vmx_revision(); - vmcs->vmcs_pa = (uint64_t)vtophys(vmcs); - error = vmclear(vmcs); - if (error != 0) { - panic("vmx_vminit: vmclear error %d on vcpu %d\n", - error, i); - } + vmx->vmcs_pa[i] = (uintptr_t)vtophys(&vmx->vmcs[i]); + vmcs_initialize(&vmx->vmcs[i], vmx->vmcs_pa[i]); vmx_msr_guest_init(vmx, i); - error = vmcs_init(vmcs); - KASSERT(error == 0, ("vmcs_init error %d", error)); + vmcs_load(vmx->vmcs_pa[i]); - VMPTRLD(vmcs); - error = 0; + vmcs_write(VMCS_HOST_IA32_PAT, vmm_get_host_pat()); + vmcs_write(VMCS_HOST_IA32_EFER, vmm_get_host_efer()); + + /* Load the control registers */ + vmcs_write(VMCS_HOST_CR0, vmm_get_host_cr0()); + vmcs_write(VMCS_HOST_CR4, vmm_get_host_cr4() | CR4_VMXE); + + /* Load the segment selectors */ + vmcs_write(VMCS_HOST_CS_SELECTOR, vmm_get_host_codesel()); + + vmcs_write(VMCS_HOST_ES_SELECTOR, datasel); + vmcs_write(VMCS_HOST_SS_SELECTOR, datasel); + vmcs_write(VMCS_HOST_DS_SELECTOR, datasel); + + vmcs_write(VMCS_HOST_FS_SELECTOR, vmm_get_host_fssel()); + vmcs_write(VMCS_HOST_GS_SELECTOR, vmm_get_host_gssel()); + vmcs_write(VMCS_HOST_TR_SELECTOR, vmm_get_host_tsssel()); + + /* + * Configure host sysenter MSRs to be restored on VM exit. + * The thread-specific MSR_INTC_SEP_ESP value is loaded in vmx_run. + */ + vmcs_write(VMCS_HOST_IA32_SYSENTER_CS, KCS_SEL); + vmcs_write(VMCS_HOST_IA32_SYSENTER_EIP, + rdmsr(MSR_SYSENTER_EIP_MSR)); + + /* instruction pointer */ + if (no_flush_rsb) { + vmcs_write(VMCS_HOST_RIP, (uint64_t)vmx_exit_guest); + } else { + vmcs_write(VMCS_HOST_RIP, + (uint64_t)vmx_exit_guest_flush_rsb); + } - error += vmwrite(VMCS_EPTP, vmx->eptp); - error += vmwrite(VMCS_PIN_BASED_CTLS, pin_ctls); - error += vmwrite(VMCS_PRI_PROC_BASED_CTLS, proc_ctls); - error += vmwrite(VMCS_SEC_PROC_BASED_CTLS, proc2_ctls); - error += vmwrite(VMCS_EXIT_CTLS, exit_ctls); - error += vmwrite(VMCS_ENTRY_CTLS, entry_ctls); - error += vmwrite(VMCS_MSR_BITMAP, msr_bitmap_pa); - error += vmwrite(VMCS_VPID, vpid[i]); + /* link pointer */ + vmcs_write(VMCS_LINK_POINTER, ~0); + + vmcs_write(VMCS_EPTP, vmx->eptp); + vmcs_write(VMCS_PIN_BASED_CTLS, pin_ctls); + vmcs_write(VMCS_PRI_PROC_BASED_CTLS, proc_ctls); + vmcs_write(VMCS_SEC_PROC_BASED_CTLS, proc2_ctls); + vmcs_write(VMCS_EXIT_CTLS, exit_ctls); + vmcs_write(VMCS_ENTRY_CTLS, entry_ctls); + vmcs_write(VMCS_MSR_BITMAP, msr_bitmap_pa); + vmcs_write(VMCS_VPID, vpid[i]); if (guest_l1d_flush && !guest_l1d_flush_sw) { vmcs_write(VMCS_ENTRY_MSR_LOAD, pmap_kextract( @@ -1022,28 +1018,39 @@ vmx_vminit(struct vm *vm, pmap_t pmap) exc_bitmap = 0xffffffff; else exc_bitmap = 1 << IDT_MC; - error += vmwrite(VMCS_EXCEPTION_BITMAP, exc_bitmap); + vmcs_write(VMCS_EXCEPTION_BITMAP, exc_bitmap); vmx->ctx[i].guest_dr6 = DBREG_DR6_RESERVED1; - error += vmwrite(VMCS_GUEST_DR7, DBREG_DR7_RESERVED1); + vmcs_write(VMCS_GUEST_DR7, DBREG_DR7_RESERVED1); if (vmx_cap_en(vmx, VMX_CAP_TPR_SHADOW)) { - error += vmwrite(VMCS_VIRTUAL_APIC, apic_page_pa); + vmcs_write(VMCS_VIRTUAL_APIC, apic_page_pa); } if (vmx_cap_en(vmx, VMX_CAP_APICV)) { - error += vmwrite(VMCS_APIC_ACCESS, APIC_ACCESS_ADDRESS); - error += vmwrite(VMCS_EOI_EXIT0, 0); - error += vmwrite(VMCS_EOI_EXIT1, 0); - error += vmwrite(VMCS_EOI_EXIT2, 0); - error += vmwrite(VMCS_EOI_EXIT3, 0); + vmcs_write(VMCS_APIC_ACCESS, APIC_ACCESS_ADDRESS); + vmcs_write(VMCS_EOI_EXIT0, 0); + vmcs_write(VMCS_EOI_EXIT1, 0); + vmcs_write(VMCS_EOI_EXIT2, 0); + vmcs_write(VMCS_EOI_EXIT3, 0); } if (vmx_cap_en(vmx, VMX_CAP_APICV_PIR)) { - error += vmwrite(VMCS_PIR_VECTOR, pirvec); - error += vmwrite(VMCS_PIR_DESC, pir_desc_pa); + vmcs_write(VMCS_PIR_VECTOR, pirvec); + vmcs_write(VMCS_PIR_DESC, pir_desc_pa); } - VMCLEAR(vmcs); - KASSERT(error == 0, ("vmx_vminit: error customizing the vmcs")); + + /* + * Set up the CR0/4 masks and configure the read shadow state + * to the power-on register value from the Intel Sys Arch. + * CR0 - 0x60000010 + * CR4 - 0 + */ + vmcs_write(VMCS_CR0_MASK, cr0_ones_mask | cr0_zeros_mask); + vmcs_write(VMCS_CR0_SHADOW, 0x60000010); + vmcs_write(VMCS_CR4_MASK, cr4_ones_mask | cr4_zeros_mask); + vmcs_write(VMCS_CR4_SHADOW, 0); + + vmcs_clear(vmx->vmcs_pa[i]); vmx->cap[i].set = 0; vmx->cap[i].proc_ctls = proc_ctls; @@ -1054,19 +1061,6 @@ vmx_vminit(struct vm *vm, pmap_t pmap) vmx->state[i].lastcpu = NOCPU; vmx->state[i].vpid = vpid[i]; - /* - * Set up the CR0/4 shadows, and init the read shadow - * to the power-on register value from the Intel Sys Arch. - * CR0 - 0x60000010 - * CR4 - 0 - */ - error = vmx_setup_cr0_shadow(vmcs, 0x60000010); - if (error != 0) - panic("vmx_setup_cr0_shadow %d", error); - - error = vmx_setup_cr4_shadow(vmcs, 0); - if (error != 0) - panic("vmx_setup_cr4_shadow %d", error); vmx->ctx[i].pmap = pmap; } @@ -1123,6 +1117,33 @@ vmx_astpending_trace(struct vmx *vmx, int vcpu, uint64_t rip) static VMM_STAT_INTEL(VCPU_INVVPID_SAVED, "Number of vpid invalidations saved"); static VMM_STAT_INTEL(VCPU_INVVPID_DONE, "Number of vpid invalidations done"); +#define INVVPID_TYPE_ADDRESS 0UL +#define INVVPID_TYPE_SINGLE_CONTEXT 1UL +#define INVVPID_TYPE_ALL_CONTEXTS 2UL + +struct invvpid_desc { + uint16_t vpid; + uint16_t _res1; + uint32_t _res2; + uint64_t linear_addr; +}; +CTASSERT(sizeof(struct invvpid_desc) == 16); + +static __inline void +invvpid(uint64_t type, struct invvpid_desc desc) +{ + int error; + + __asm __volatile("invvpid %[desc], %[type];" + VMX_SET_ERROR_CODE_ASM + : [error] "=r" (error) + : [desc] "m" (desc), [type] "r" (type) + : "memory"); + + if (error) + panic("invvpid error %d", error); +} + /* * Invalidate guest mappings identified by its vpid from the TLB. */ @@ -1190,7 +1211,6 @@ vmx_set_pcpu_defaults(struct vmx *vmx, int vcpu, pmap_t pmap) { struct vmxstate *vmxstate; -#ifndef __FreeBSD__ /* * Regardless of whether the VM appears to have migrated between CPUs, * save the host sysenter stack pointer. As it points to the kernel @@ -1203,8 +1223,7 @@ vmx_set_pcpu_defaults(struct vmx *vmx, int vcpu, pmap_t pmap) * Perform any needed TSC_OFFSET adjustment based on TSC_MSR writes or * migration between host CPUs with differing TSC values. */ - VERIFY0(vmx_apply_tsc_adjust(vmx, vcpu)); -#endif + vmx_apply_tsc_adjust(vmx, vcpu); vmxstate = &vmx->state[vcpu]; if (vmxstate->lastcpu == curcpu) @@ -1214,10 +1233,8 @@ vmx_set_pcpu_defaults(struct vmx *vmx, int vcpu, pmap_t pmap) vmm_stat_incr(vmx->vm, vcpu, VCPU_MIGRATIONS, 1); -#ifndef __FreeBSD__ /* Load the per-CPU IDT address */ vmcs_write(VMCS_HOST_IDTR_BASE, vmm_get_host_idtrbase()); -#endif vmcs_write(VMCS_HOST_TR_BASE, vmm_get_host_trbase()); vmcs_write(VMCS_HOST_GDTR_BASE, vmm_get_host_gdtrbase()); vmcs_write(VMCS_HOST_GS_BASE, vmm_get_host_gsbase()); @@ -1273,23 +1290,6 @@ vmx_clear_nmi_window_exiting(struct vmx *vmx, int vcpu) VCPU_CTR0(vmx->vm, vcpu, "Disabling NMI window exiting"); } -#ifdef __FreeBSD__ -int -vmx_set_tsc_offset(struct vmx *vmx, int vcpu, uint64_t offset) -{ - int error; - - if ((vmx->cap[vcpu].proc_ctls & PROCBASED_TSC_OFFSET) == 0) { - vmx->cap[vcpu].proc_ctls |= PROCBASED_TSC_OFFSET; - vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls); - VCPU_CTR0(vmx->vm, vcpu, "Enabling TSC offsetting"); - } - - error = vmwrite(VMCS_TSC_OFFSET, offset); - - return (error); -} -#else /* __FreeBSD__ */ /* * Set the TSC adjustment, taking into account the offsets measured between * host physical CPUs. This is required even if the guest has not set a TSC @@ -1297,24 +1297,20 @@ vmx_set_tsc_offset(struct vmx *vmx, int vcpu, uint64_t offset) * migrated onto. Without this mitigation, un-synched host TSCs will convey * the appearance of TSC time-travel to the guest as its vCPUs migrate. */ -static int +static void vmx_apply_tsc_adjust(struct vmx *vmx, int vcpu) { extern hrtime_t tsc_gethrtime_tick_delta(void); const uint64_t target_offset = (vcpu_tsc_offset(vmx->vm, vcpu) + (uint64_t)tsc_gethrtime_tick_delta()); - int error = 0; ASSERT(vmx->cap[vcpu].proc_ctls & PROCBASED_TSC_OFFSET); if (vmx->tsc_offset_active[vcpu] != target_offset) { - error = vmwrite(VMCS_TSC_OFFSET, target_offset); + vmcs_write(VMCS_TSC_OFFSET, target_offset); vmx->tsc_offset_active[vcpu] = target_offset; } - - return (error); } -#endif /* __FreeBSD__ */ #define NMI_BLOCKING (VMCS_INTERRUPTIBILITY_NMI_BLOCKING | \ VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING) @@ -2224,9 +2220,7 @@ emulate_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu) static int emulate_rdmsr(struct vmx *vmx, int vcpuid, u_int num, bool *retu) { - struct vmxctx *vmxctx; uint64_t result; - uint32_t eax, edx; int error; if (lapic_msr(num)) @@ -2235,14 +2229,8 @@ emulate_rdmsr(struct vmx *vmx, int vcpuid, u_int num, bool *retu) error = vmx_rdmsr(vmx, vcpuid, num, &result, retu); if (error == 0) { - eax = result; - vmxctx = &vmx->ctx[vcpuid]; - error = vmxctx_setreg(vmxctx, VM_REG_GUEST_RAX, eax); - KASSERT(error == 0, ("vmxctx_setreg(rax) error %d", error)); - - edx = result >> 32; - error = vmxctx_setreg(vmxctx, VM_REG_GUEST_RDX, edx); - KASSERT(error == 0, ("vmxctx_setreg(rdx) error %d", error)); + vmx->ctx[vcpuid].guest_rax = (uint32_t)result; + vmx->ctx[vcpuid].guest_rdx = result >> 32; } return (error); @@ -2580,9 +2568,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) } if (intr_vec == IDT_PF) { - error = vmxctx_setreg(vmxctx, VM_REG_GUEST_CR2, qual); - KASSERT(error == 0, ("%s: vmxctx_setreg(cr2) error %d", - __func__, error)); + vmxctx->guest_cr2 = qual; } /* @@ -2879,7 +2865,7 @@ vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap, struct vmx *vmx; struct vm *vm; struct vmxctx *vmxctx; - struct vmcs *vmcs; + uintptr_t vmcs_pa; struct vm_exit *vmexit; struct vlapic *vlapic; uint32_t exit_reason; @@ -2890,7 +2876,7 @@ vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap, vmx = arg; vm = vmx->vm; - vmcs = &vmx->vmcs[vcpu]; + vmcs_pa = vmx->vmcs_pa[vcpu]; vmxctx = &vmx->ctx[vcpu]; vlapic = vm_lapic(vm, vcpu); vmexit = vm_exitinfo(vm, vcpu); @@ -2901,7 +2887,7 @@ vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap, vmx_msr_guest_enter(vmx, vcpu); - VMPTRLD(vmcs); + vmcs_load(vmcs_pa); #ifndef __FreeBSD__ VERIFY(vmx->vmcs_state[vcpu] == VS_NONE && curthread->t_preempt != 0); @@ -3115,7 +3101,7 @@ vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap, VCPU_CTR1(vm, vcpu, "returning from vmx_run: exitcode %d", vmexit->exitcode); - VMCLEAR(vmcs); + vmcs_clear(vmcs_pa); vmx_msr_guest_exit(vmx, vcpu); #ifndef __FreeBSD__ @@ -3148,7 +3134,6 @@ vmx_vmcleanup(void *arg) static register_t * vmxctx_regptr(struct vmxctx *vmxctx, int reg) { - switch (reg) { case VM_REG_GUEST_RAX: return (&vmxctx->guest_rax); @@ -3199,157 +3184,129 @@ vmxctx_regptr(struct vmxctx *vmxctx, int reg) } static int -vmxctx_getreg(struct vmxctx *vmxctx, int reg, uint64_t *retval) +vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval) { + int running, hostcpu, err; + struct vmx *vmx = arg; register_t *regp; - if ((regp = vmxctx_regptr(vmxctx, reg)) != NULL) { - *retval = *regp; - return (0); - } else - return (EINVAL); -} - -static int -vmxctx_setreg(struct vmxctx *vmxctx, int reg, uint64_t val) -{ - register_t *regp; + running = vcpu_is_running(vmx->vm, vcpu, &hostcpu); + if (running && hostcpu != curcpu) + panic("vmx_getreg: %s%d is running", vm_name(vmx->vm), vcpu); - if ((regp = vmxctx_regptr(vmxctx, reg)) != NULL) { - *regp = val; + /* VMCS access not required for ctx reads */ + if ((regp = vmxctx_regptr(&vmx->ctx[vcpu], reg)) != NULL) { + *retval = *regp; return (0); - } else - return (EINVAL); -} - -static int -vmx_get_intr_shadow(struct vmx *vmx, int vcpu, int running, uint64_t *retval) -{ - uint64_t gi; - int error; - - error = vmcs_getreg(&vmx->vmcs[vcpu], running, - VMCS_IDENT(VMCS_GUEST_INTERRUPTIBILITY), &gi); - *retval = (gi & HWINTR_BLOCKING) ? 1 : 0; - return (error); -} - -static int -vmx_modify_intr_shadow(struct vmx *vmx, int vcpu, int running, uint64_t val) -{ - struct vmcs *vmcs; - uint64_t gi; - int error, ident; - - /* - * Forcing the vcpu into an interrupt shadow is not supported. - */ - if (val) { - error = EINVAL; - goto done; } - vmcs = &vmx->vmcs[vcpu]; - ident = VMCS_IDENT(VMCS_GUEST_INTERRUPTIBILITY); - error = vmcs_getreg(vmcs, running, ident, &gi); - if (error == 0) { - gi &= ~HWINTR_BLOCKING; - error = vmcs_setreg(vmcs, running, ident, gi); + if (!running) { + vmcs_load(vmx->vmcs_pa[vcpu]); } -done: - VCPU_CTR2(vmx->vm, vcpu, "Setting intr_shadow to %#lx %s", val, - error ? "failed" : "succeeded"); - return (error); -} - -static int -vmx_shadow_reg(int reg) -{ - int shreg; - shreg = -1; + err = EINVAL; + if (reg == VM_REG_GUEST_INTR_SHADOW) { + uint64_t gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY); + *retval = (gi & HWINTR_BLOCKING) ? 1 : 0; + err = 0; + } else { + uint32_t encoding; - switch (reg) { - case VM_REG_GUEST_CR0: - shreg = VMCS_CR0_SHADOW; - break; - case VM_REG_GUEST_CR4: - shreg = VMCS_CR4_SHADOW; - break; - default: - break; + encoding = vmcs_field_encoding(reg); + if (encoding != VMCS_INVALID_ENCODING) { + *retval = vmcs_read(encoding); + err = 0; + } } - return (shreg); -} - -static int -vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval) -{ - int running, hostcpu; - struct vmx *vmx = arg; - - running = vcpu_is_running(vmx->vm, vcpu, &hostcpu); - if (running && hostcpu != curcpu) - panic("vmx_getreg: %s%d is running", vm_name(vmx->vm), vcpu); - - if (reg == VM_REG_GUEST_INTR_SHADOW) - return (vmx_get_intr_shadow(vmx, vcpu, running, retval)); - - if (vmxctx_getreg(&vmx->ctx[vcpu], reg, retval) == 0) - return (0); + if (!running) { + vmcs_clear(vmx->vmcs_pa[vcpu]); + } - return (vmcs_getreg(&vmx->vmcs[vcpu], running, reg, retval)); + return (err); } static int vmx_setreg(void *arg, int vcpu, int reg, uint64_t val) { - int error, hostcpu, running, shadow; - uint64_t ctls; - pmap_t pmap; + int running, hostcpu, error; struct vmx *vmx = arg; + register_t *regp; running = vcpu_is_running(vmx->vm, vcpu, &hostcpu); if (running && hostcpu != curcpu) panic("vmx_setreg: %s%d is running", vm_name(vmx->vm), vcpu); - if (reg == VM_REG_GUEST_INTR_SHADOW) - return (vmx_modify_intr_shadow(vmx, vcpu, running, val)); - - if (vmxctx_setreg(&vmx->ctx[vcpu], reg, val) == 0) + /* VMCS access not required for ctx writes */ + if ((regp = vmxctx_regptr(&vmx->ctx[vcpu], reg)) != NULL) { + *regp = val; return (0); + } - error = vmcs_setreg(&vmx->vmcs[vcpu], running, reg, val); - - if (error == 0) { - /* - * If the "load EFER" VM-entry control is 1 then the - * value of EFER.LMA must be identical to "IA-32e mode guest" - * bit in the VM-entry control. - */ - if ((entry_ctls & VM_ENTRY_LOAD_EFER) != 0 && - (reg == VM_REG_GUEST_EFER)) { - vmcs_getreg(&vmx->vmcs[vcpu], running, - VMCS_IDENT(VMCS_ENTRY_CTLS), &ctls); - if (val & EFER_LMA) - ctls |= VM_ENTRY_GUEST_LMA; - else - ctls &= ~VM_ENTRY_GUEST_LMA; - vmcs_setreg(&vmx->vmcs[vcpu], running, - VMCS_IDENT(VMCS_ENTRY_CTLS), ctls); - } + if (!running) { + vmcs_load(vmx->vmcs_pa[vcpu]); + } - shadow = vmx_shadow_reg(reg); - if (shadow > 0) { + if (reg == VM_REG_GUEST_INTR_SHADOW) { + if (val != 0) { /* - * Store the unmodified value in the shadow + * Forcing the vcpu into an interrupt shadow is not + * presently supported. */ - error = vmcs_setreg(&vmx->vmcs[vcpu], running, - VMCS_IDENT(shadow), val); + error = EINVAL; + } else { + uint64_t gi; + + gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY); + gi &= ~HWINTR_BLOCKING; + vmcs_write(VMCS_GUEST_INTERRUPTIBILITY, gi); + error = 0; } + } else { + uint32_t encoding; - if (reg == VM_REG_GUEST_CR3) { + error = 0; + encoding = vmcs_field_encoding(reg); + switch (encoding) { + case VMCS_GUEST_IA32_EFER: + /* + * If the "load EFER" VM-entry control is 1 then the + * value of EFER.LMA must be identical to "IA-32e mode + * guest" bit in the VM-entry control. + */ + if ((entry_ctls & VM_ENTRY_LOAD_EFER) != 0) { + uint64_t ctls; + + ctls = vmcs_read(VMCS_ENTRY_CTLS); + if (val & EFER_LMA) { + ctls |= VM_ENTRY_GUEST_LMA; + } else { + ctls &= ~VM_ENTRY_GUEST_LMA; + } + vmcs_write(VMCS_ENTRY_CTLS, ctls); + } + vmcs_write(encoding, val); + break; + case VMCS_GUEST_CR0: + /* + * The guest is not allowed to modify certain bits in + * %cr0 and %cr4. To maintain the illusion of full + * control, they have shadow versions which contain the + * guest-perceived (via reads from the register) values + * as opposed to the guest-effective values. + * + * This is detailed in the SDM: Vol. 3 Ch. 24.6.6. + */ + vmcs_write(VMCS_CR0_SHADOW, val); + vmcs_write(encoding, vmx_fix_cr0(val)); + break; + case VMCS_GUEST_CR4: + /* See above for detail on %cr4 shadowing */ + vmcs_write(VMCS_CR4_SHADOW, val); + vmcs_write(encoding, vmx_fix_cr4(val)); + break; + case VMCS_GUEST_CR3: + vmcs_write(encoding, val); /* * Invalidate the guest vcpu's TLB mappings to emulate * the behavior of updating %cr3. @@ -3357,38 +3314,80 @@ vmx_setreg(void *arg, int vcpu, int reg, uint64_t val) * XXX the processor retains global mappings when %cr3 * is updated but vmx_invvpid() does not. */ - pmap = vmx->ctx[vcpu].pmap; - vmx_invvpid(vmx, vcpu, pmap, running); + vmx_invvpid(vmx, vcpu, vmx->ctx[vcpu].pmap, running); + break; + case VMCS_INVALID_ENCODING: + error = EINVAL; + break; + default: + vmcs_write(encoding, val); + break; } } + if (!running) { + vmcs_clear(vmx->vmcs_pa[vcpu]); + } + return (error); } static int -vmx_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc) +vmx_getdesc(void *arg, int vcpu, int seg, struct seg_desc *desc) { int hostcpu, running; struct vmx *vmx = arg; + uint32_t base, limit, access; running = vcpu_is_running(vmx->vm, vcpu, &hostcpu); if (running && hostcpu != curcpu) panic("vmx_getdesc: %s%d is running", vm_name(vmx->vm), vcpu); - return (vmcs_getdesc(&vmx->vmcs[vcpu], running, reg, desc)); + if (!running) { + vmcs_load(vmx->vmcs_pa[vcpu]); + } + + vmcs_seg_desc_encoding(seg, &base, &limit, &access); + desc->base = vmcs_read(base); + desc->limit = vmcs_read(limit); + if (access != VMCS_INVALID_ENCODING) { + desc->access = vmcs_read(access); + } else { + desc->access = 0; + } + + if (!running) { + vmcs_clear(vmx->vmcs_pa[vcpu]); + } + return (0); } static int -vmx_setdesc(void *arg, int vcpu, int reg, struct seg_desc *desc) +vmx_setdesc(void *arg, int vcpu, int seg, struct seg_desc *desc) { int hostcpu, running; struct vmx *vmx = arg; + uint32_t base, limit, access; running = vcpu_is_running(vmx->vm, vcpu, &hostcpu); if (running && hostcpu != curcpu) panic("vmx_setdesc: %s%d is running", vm_name(vmx->vm), vcpu); - return (vmcs_setdesc(&vmx->vmcs[vcpu], running, reg, desc)); + if (!running) { + vmcs_load(vmx->vmcs_pa[vcpu]); + } + + vmcs_seg_desc_encoding(seg, &base, &limit, &access); + vmcs_write(base, desc->base); + vmcs_write(limit, desc->limit); + if (access != VMCS_INVALID_ENCODING) { + vmcs_write(access, desc->access); + } + + if (!running) { + vmcs_clear(vmx->vmcs_pa[vcpu]); + } + return (0); } static int @@ -3436,21 +3435,17 @@ static int vmx_setcap(void *arg, int vcpu, int type, int val) { struct vmx *vmx = arg; - struct vmcs *vmcs = &vmx->vmcs[vcpu]; - uint32_t baseval; + uint32_t baseval, reg, flag; uint32_t *pptr; int error; - int flag; - int reg; - int retval; - retval = ENOENT; + error = ENOENT; pptr = NULL; switch (type) { case VM_CAP_HALT_EXIT: if (cap_halt_exit) { - retval = 0; + error = 0; pptr = &vmx->cap[vcpu].proc_ctls; baseval = *pptr; flag = PROCBASED_HLT_EXITING; @@ -3459,7 +3454,7 @@ vmx_setcap(void *arg, int vcpu, int type, int val) break; case VM_CAP_MTRAP_EXIT: if (cap_monitor_trap) { - retval = 0; + error = 0; pptr = &vmx->cap[vcpu].proc_ctls; baseval = *pptr; flag = PROCBASED_MTF; @@ -3468,7 +3463,7 @@ vmx_setcap(void *arg, int vcpu, int type, int val) break; case VM_CAP_PAUSE_EXIT: if (cap_pause_exit) { - retval = 0; + error = 0; pptr = &vmx->cap[vcpu].proc_ctls; baseval = *pptr; flag = PROCBASED_PAUSE_EXITING; @@ -3477,7 +3472,7 @@ vmx_setcap(void *arg, int vcpu, int type, int val) break; case VM_CAP_ENABLE_INVPCID: if (cap_invpcid) { - retval = 0; + error = 0; pptr = &vmx->cap[vcpu].proc_ctls2; baseval = *pptr; flag = PROCBASED2_ENABLE_INVPCID; @@ -3485,7 +3480,7 @@ vmx_setcap(void *arg, int vcpu, int type, int val) } break; case VM_CAP_BPT_EXIT: - retval = 0; + error = 0; /* Don't change the bitmap if we are tracing all exceptions. */ if (vmx->cap[vcpu].exc_bitmap != 0xffffffff) { @@ -3499,8 +3494,9 @@ vmx_setcap(void *arg, int vcpu, int type, int val) break; } - if (retval) - return (retval); + if (error != 0) { + return (error); + } if (pptr != NULL) { if (val) { @@ -3508,12 +3504,9 @@ vmx_setcap(void *arg, int vcpu, int type, int val) } else { baseval &= ~flag; } - VMPTRLD(vmcs); - error = vmwrite(reg, baseval); - VMCLEAR(vmcs); - - if (error) - return (error); + vmcs_load(vmx->vmcs_pa[vcpu]); + vmcs_write(reg, baseval); + vmcs_clear(vmx->vmcs_pa[vcpu]); /* * Update optional stored flags, and record @@ -3715,13 +3708,11 @@ static void vmx_enable_x2apic_mode_ts(struct vlapic *vlapic) { struct vmx *vmx; - struct vmcs *vmcs; uint32_t proc_ctls; int vcpuid; vcpuid = vlapic->vcpuid; vmx = ((struct vlapic_vtx *)vlapic)->vmx; - vmcs = &vmx->vmcs[vcpuid]; proc_ctls = vmx->cap[vcpuid].proc_ctls; proc_ctls &= ~PROCBASED_USE_TPR_SHADOW; @@ -3729,22 +3720,20 @@ vmx_enable_x2apic_mode_ts(struct vlapic *vlapic) proc_ctls |= PROCBASED_CR8_STORE_EXITING; vmx->cap[vcpuid].proc_ctls = proc_ctls; - VMPTRLD(vmcs); + vmcs_load(vmx->vmcs_pa[vcpuid]); vmcs_write(VMCS_PRI_PROC_BASED_CTLS, proc_ctls); - VMCLEAR(vmcs); + vmcs_clear(vmx->vmcs_pa[vcpuid]); } static void vmx_enable_x2apic_mode_vid(struct vlapic *vlapic) { struct vmx *vmx; - struct vmcs *vmcs; uint32_t proc_ctls2; int vcpuid, error; vcpuid = vlapic->vcpuid; vmx = ((struct vlapic_vtx *)vlapic)->vmx; - vmcs = &vmx->vmcs[vcpuid]; proc_ctls2 = vmx->cap[vcpuid].proc_ctls2; KASSERT((proc_ctls2 & PROCBASED2_VIRTUALIZE_APIC_ACCESSES) != 0, @@ -3754,9 +3743,9 @@ vmx_enable_x2apic_mode_vid(struct vlapic *vlapic) proc_ctls2 |= PROCBASED2_VIRTUALIZE_X2APIC_MODE; vmx->cap[vcpuid].proc_ctls2 = proc_ctls2; - VMPTRLD(vmcs); + vmcs_load(vmx->vmcs_pa[vcpuid]); vmcs_write(VMCS_SEC_PROC_BASED_CTLS, proc_ctls2); - VMCLEAR(vmcs); + vmcs_clear(vmx->vmcs_pa[vcpuid]); if (vlapic->vcpuid == 0) { /* @@ -3932,10 +3921,9 @@ static void vmx_savectx(void *arg, int vcpu) { struct vmx *vmx = arg; - struct vmcs *vmcs = &vmx->vmcs[vcpu]; if ((vmx->vmcs_state[vcpu] & VS_LOADED) != 0) { - VERIFY3U(vmclear(vmcs), ==, 0); + vmcs_clear(vmx->vmcs_pa[vcpu]); vmx_msr_guest_exit(vmx, vcpu); /* * Having VMCLEARed the VMCS, it can no longer be re-entered @@ -3951,13 +3939,12 @@ static void vmx_restorectx(void *arg, int vcpu) { struct vmx *vmx = arg; - struct vmcs *vmcs = &vmx->vmcs[vcpu]; ASSERT0(vmx->vmcs_state[vcpu] & VS_LAUNCHED); if ((vmx->vmcs_state[vcpu] & VS_LOADED) != 0) { vmx_msr_guest_enter(vmx, vcpu); - VERIFY3U(vmptrld(vmcs), ==, 0); + vmcs_load(vmx->vmcs_pa[vcpu]); } } #endif /* __FreeBSD__ */ diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx.h b/usr/src/uts/i86pc/io/vmm/intel/vmx.h index 0fd723f9c9..a5647e0b87 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/vmx.h +++ b/usr/src/uts/i86pc/io/vmm/intel/vmx.h @@ -156,6 +156,7 @@ struct vmx { uint64_t host_msrs[VM_MAXCPU][GUEST_MSR_NUM]; uint64_t tsc_offset_active[VM_MAXCPU]; vmcs_state_t vmcs_state[VM_MAXCPU]; + uintptr_t vmcs_pa[VM_MAXCPU]; #endif struct vmxctx ctx[VM_MAXCPU]; struct vmxcap cap[VM_MAXCPU]; @@ -175,17 +176,38 @@ vmx_cap_en(const struct vmx *vmx, enum vmx_caps cap) return ((vmx->vmx_caps & cap) == cap); } + +/* + * Section 5.2 "Conventions" from Intel Architecture Manual 2B. + * + * error + * VMsucceed 0 + * VMFailInvalid 1 + * VMFailValid 2 see also VMCS VM-Instruction Error Field + */ +#define VM_SUCCESS 0 +#define VM_FAIL_INVALID 1 +#define VM_FAIL_VALID 2 +#define VMX_SET_ERROR_CODE_ASM \ + " jnc 1f;" \ + " mov $1, %[error];" /* CF: error = 1 */ \ + " jmp 3f;" \ + "1: jnz 2f;" \ + " mov $2, %[error];" /* ZF: error = 2 */ \ + " jmp 3f;" \ + "2: mov $0, %[error];" \ + "3:" + + #define VMX_GUEST_VMEXIT 0 #define VMX_VMRESUME_ERROR 1 #define VMX_VMLAUNCH_ERROR 2 #define VMX_INVEPT_ERROR 3 #define VMX_VMWRITE_ERROR 4 + int vmx_enter_guest(struct vmxctx *ctx, struct vmx *vmx, int launched); void vmx_call_isr(uintptr_t entry); -u_long vmx_fix_cr0(u_long cr0); -u_long vmx_fix_cr4(u_long cr4); - int vmx_set_tsc_offset(struct vmx *vmx, int vcpu, uint64_t offset); extern char vmx_exit_guest[]; diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx_cpufunc.h b/usr/src/uts/i86pc/io/vmm/intel/vmx_cpufunc.h deleted file mode 100644 index f0c5ba7691..0000000000 --- a/usr/src/uts/i86pc/io/vmm/intel/vmx_cpufunc.h +++ /dev/null @@ -1,244 +0,0 @@ -/*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD - * - * Copyright (c) 2011 NetApp, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ -/* - * This file and its contents are supplied under the terms of the - * Common Development and Distribution License ("CDDL"), version 1.0. - * You may only use this file in accordance with the terms of version - * 1.0 of the CDDL. - * - * A full copy of the text of the CDDL should have accompanied this - * source. A copy of the CDDL is also available via the Internet at - * http://www.illumos.org/license/CDDL. - * - * Copyright 2014 Pluribus Networks Inc. - * Copyright 2017 Joyent, Inc. - */ - -#ifndef _VMX_CPUFUNC_H_ -#define _VMX_CPUFUNC_H_ - -struct vmcs; - -/* - * Section 5.2 "Conventions" from Intel Architecture Manual 2B. - * - * error - * VMsucceed 0 - * VMFailInvalid 1 - * VMFailValid 2 see also VMCS VM-Instruction Error Field - */ -#define VM_SUCCESS 0 -#define VM_FAIL_INVALID 1 -#define VM_FAIL_VALID 2 -#define VMX_SET_ERROR_CODE \ - " jnc 1f;" \ - " mov $1, %[error];" /* CF: error = 1 */ \ - " jmp 3f;" \ - "1: jnz 2f;" \ - " mov $2, %[error];" /* ZF: error = 2 */ \ - " jmp 3f;" \ - "2: mov $0, %[error];" \ - "3:" - -/* returns 0 on success and non-zero on failure */ -static __inline int -vmxon(char *region) -{ - int error; - uint64_t addr; - -#ifdef __FreeBSD__ - addr = vtophys(region); -#else - /* This is pre-translated in illumos */ - addr = (uint64_t)region; -#endif - __asm __volatile("vmxon %[addr];" - VMX_SET_ERROR_CODE - : [error] "=r" (error) - : [addr] "m" (*(uint64_t *)&addr) - : "memory"); - - return (error); -} - -#ifdef __FreeBSD__ -/* returns 0 on success and non-zero on failure */ -static __inline int -vmclear(struct vmcs *vmcs) -{ - int error; - uint64_t addr; - - addr = vtophys(vmcs); - __asm __volatile("vmclear %[addr];" - VMX_SET_ERROR_CODE - : [error] "=r" (error) - : [addr] "m" (*(uint64_t *)&addr) - : "memory"); - return (error); -} -#endif /* __FreeBSD__ */ - -static __inline void -vmxoff(void) -{ - - __asm __volatile("vmxoff"); -} - -static __inline void -vmptrst(uint64_t *addr) -{ - - __asm __volatile("vmptrst %[addr]" :: [addr]"m" (*addr) : "memory"); -} - -#ifdef __FreeBSD__ -static __inline int -vmptrld(struct vmcs *vmcs) -{ - int error; - uint64_t addr; - - addr = vtophys(vmcs); - __asm __volatile("vmptrld %[addr];" - VMX_SET_ERROR_CODE - : [error] "=r" (error) - : [addr] "m" (*(uint64_t *)&addr) - : "memory"); - return (error); -} -#endif /* __FreeBSD__ */ - -static __inline int -vmwrite(uint64_t reg, uint64_t val) -{ - int error; - - __asm __volatile("vmwrite %[val], %[reg];" - VMX_SET_ERROR_CODE - : [error] "=r" (error) - : [val] "r" (val), [reg] "r" (reg) - : "memory"); - - return (error); -} - -static __inline int -vmread(uint64_t r, uint64_t *addr) -{ - int error; - - __asm __volatile("vmread %[r], %[addr];" - VMX_SET_ERROR_CODE - : [error] "=r" (error) - : [r] "r" (r), [addr] "m" (*addr) - : "memory"); - - return (error); -} - -#ifdef __FreeBSD__ -static __inline void -VMCLEAR(struct vmcs *vmcs) -{ - int err; - - err = vmclear(vmcs); - if (err != 0) - panic("%s: vmclear(%p) error %d", __func__, vmcs, err); - - critical_exit(); -} - -static __inline void -VMPTRLD(struct vmcs *vmcs) -{ - int err; - - critical_enter(); - - err = vmptrld(vmcs); - if (err != 0) - panic("%s: vmptrld(%p) error %d", __func__, vmcs, err); -} -#endif /* __FreeBSD__ */ - -#define INVVPID_TYPE_ADDRESS 0UL -#define INVVPID_TYPE_SINGLE_CONTEXT 1UL -#define INVVPID_TYPE_ALL_CONTEXTS 2UL - -struct invvpid_desc { - uint16_t vpid; - uint16_t _res1; - uint32_t _res2; - uint64_t linear_addr; -}; -CTASSERT(sizeof(struct invvpid_desc) == 16); - -static __inline void -invvpid(uint64_t type, struct invvpid_desc desc) -{ - int error; - - __asm __volatile("invvpid %[desc], %[type];" - VMX_SET_ERROR_CODE - : [error] "=r" (error) - : [desc] "m" (desc), [type] "r" (type) - : "memory"); - - if (error) - panic("invvpid error %d", error); -} - -#define INVEPT_TYPE_SINGLE_CONTEXT 1UL -#define INVEPT_TYPE_ALL_CONTEXTS 2UL -struct invept_desc { - uint64_t eptp; - uint64_t _res; -}; -CTASSERT(sizeof(struct invept_desc) == 16); - -static __inline void -invept(uint64_t type, struct invept_desc desc) -{ - int error; - - __asm __volatile("invept %[desc], %[type];" - VMX_SET_ERROR_CODE - : [error] "=r" (error) - : [desc] "m" (desc), [type] "r" (type) - : "memory"); - - if (error) - panic("invept error %d", error); -} -#endif diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c index 6c37c9c234..cfdf2bfe05 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c +++ b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.c @@ -62,13 +62,6 @@ vmx_ctl_allows_zero_setting(uint64_t msr_val, int bitpos) return ((msr_val & (1UL << bitpos)) == 0); } -uint32_t -vmx_revision(void) -{ - - return (rdmsr(MSR_VMX_BASIC) & 0xffffffff); -} - /* * Generate a bitmask to be used for the VMCS execution control fields. * diff --git a/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.h b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.h index ac2adb0dd1..848cdea26b 100644 --- a/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.h +++ b/usr/src/uts/i86pc/io/vmm/intel/vmx_msr.h @@ -40,8 +40,6 @@ void vmx_msr_guest_exit(struct vmx *vmx, int vcpuid); int vmx_rdmsr(struct vmx *, int vcpuid, u_int num, uint64_t *val, bool *retu); int vmx_wrmsr(struct vmx *, int vcpuid, u_int num, uint64_t val, bool *retu); -uint32_t vmx_revision(void); - int vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask, uint32_t zeros_mask, uint32_t *retval); diff --git a/usr/src/uts/i86pc/os/hma.c b/usr/src/uts/i86pc/os/hma.c index a41ff3e0d1..0e84030ac1 100644 --- a/usr/src/uts/i86pc/os/hma.c +++ b/usr/src/uts/i86pc/os/hma.c @@ -11,6 +11,7 @@ /* * Copyright 2019 Joyent, Inc. + * Copyright 2020 OmniOS Community Edition (OmniOSce) Association. */ #include <sys/cpuvar.h> @@ -33,6 +34,7 @@ struct hma_reg { static kmutex_t hma_lock; static list_t hma_registrations; static boolean_t hma_exclusive = B_FALSE; +int hma_disable = 0; static boolean_t hma_vmx_ready = B_FALSE; static const char *hma_vmx_error = NULL; @@ -89,6 +91,11 @@ hma_init(void) list_create(&hma_registrations, sizeof (struct hma_reg), offsetof(struct hma_reg, hr_node)); + if (hma_disable != 0) { + cmn_err(CE_CONT, "?hma_init: disabled"); + return; + } + switch (cpuid_getvendor(CPU)) { case X86_VENDOR_Intel: (void) hma_vmx_init(); diff --git a/usr/src/uts/sun4u/chicago/io/fpc/fpc-impl-4u.c b/usr/src/uts/sun4u/chicago/io/fpc/fpc-impl-4u.c index aa96f19079..a8ceea0344 100644 --- a/usr/src/uts/sun4u/chicago/io/fpc/fpc-impl-4u.c +++ b/usr/src/uts/sun4u/chicago/io/fpc/fpc-impl-4u.c @@ -24,6 +24,10 @@ * Use is subject to license terms. */ +/* + * Copyright 2020 Nexenta by DDN, Inc. All rights reserved. + */ + #include <sys/file.h> #include <sys/sunndi.h> #include <sys/sunddi.h> @@ -101,7 +105,6 @@ static uint64_t counter_reg_offsets[] = { static ldi_ident_t ldi_identifier; static boolean_t ldi_identifier_valid = B_FALSE; -static cred_t *credentials = NULL; /* Called by _init to determine if it is OK to install driver. */ int @@ -116,7 +119,6 @@ fpc_platform_module_init(dev_info_t *dip) { int status; - credentials = crget(); status = ldi_ident_from_dip(dip, &ldi_identifier); if (status == 0) ldi_identifier_valid = B_TRUE; @@ -211,8 +213,6 @@ fpc_platform_module_fini(dev_info_t *dip) { if (ldi_identifier_valid) ldi_ident_release(ldi_identifier); - if (credentials) - crfree(credentials); } fire_perfreg_handle_t @@ -226,7 +226,7 @@ fpc_get_perfreg_handle(int devnum) if ((handle_impl->devspec = fpc_get_platform_data_by_number(devnum)) != NULL) { rval = ldi_open_by_name(handle_impl->devspec->nodename, - OPEN_FLAGS, credentials, &handle_impl->devhandle, + OPEN_FLAGS, kcred, &handle_impl->devhandle, ldi_identifier); } @@ -243,7 +243,7 @@ fpc_free_counter_handle(fire_perfreg_handle_t handle) { fire_counter_handle_impl_t *handle_impl = (fire_counter_handle_impl_t *)handle; - (void) ldi_close(handle_impl->devhandle, OPEN_FLAGS, credentials); + (void) ldi_close(handle_impl->devhandle, OPEN_FLAGS, kcred); kmem_free(handle_impl, sizeof (fire_counter_handle_impl_t)); return (SUCCESS); } @@ -281,7 +281,7 @@ fpc_event_io(fire_perfreg_handle_t handle, fire_perfcnt_t group, /* Read original value. */ if (((rval = ldi_ioctl(handle_impl->devhandle, cmd, (intptr_t)&prg, - FKIOCTL, credentials, &ioctl_rval)) == SUCCESS) && (!is_write)) { + FKIOCTL, kcred, &ioctl_rval)) == SUCCESS) && (!is_write)) { *reg_data = prg.data; } @@ -322,7 +322,7 @@ fpc_counter_io(fire_perfreg_handle_t handle, fire_perfcnt_t group, prg.data = *value; if (((rval = ldi_ioctl(handle_impl->devhandle, command, (intptr_t)&prg, - FKIOCTL, credentials, &ioctl_rval)) == SUCCESS) && (!is_write)) { + FKIOCTL, kcred, &ioctl_rval)) == SUCCESS) && (!is_write)) { *value = prg.data; } |