diff options
Diffstat (limited to 'sysutils/xenkernel411/patches/patch-XSA321')
-rw-r--r-- | sysutils/xenkernel411/patches/patch-XSA321 | 586 |
1 files changed, 586 insertions, 0 deletions
diff --git a/sysutils/xenkernel411/patches/patch-XSA321 b/sysutils/xenkernel411/patches/patch-XSA321 new file mode 100644 index 00000000000..97c42b50a3f --- /dev/null +++ b/sysutils/xenkernel411/patches/patch-XSA321 @@ -0,0 +1,586 @@ +$NetBSD: patch-XSA321,v 1.2.2.2 2020/08/28 15:37:49 bsiegert Exp $ + +From: Jan Beulich <jbeulich@suse.com> +Subject: vtd: improve IOMMU TLB flush + +Do not limit PSI flushes to order 0 pages, in order to avoid doing a +full TLB flush if the passed in page has an order greater than 0 and +is aligned. Should increase the performance of IOMMU TLB flushes when +dealing with page orders greater than 0. + +This is part of XSA-321. + +Signed-off-by: Jan Beulich <jbeulich@suse.com> + +--- xen/drivers/passthrough/vtd/iommu.c.orig ++++ xen/drivers/passthrough/vtd/iommu.c +@@ -612,13 +612,14 @@ static int __must_check iommu_flush_iotl + if ( iommu_domid == -1 ) + continue; + +- if ( page_count != 1 || gfn == gfn_x(INVALID_GFN) ) ++ if ( !page_count || (page_count & (page_count - 1)) || ++ gfn == gfn_x(INVALID_GFN) || !IS_ALIGNED(gfn, page_count) ) + rc = iommu_flush_iotlb_dsi(iommu, iommu_domid, + 0, flush_dev_iotlb); + else + rc = iommu_flush_iotlb_psi(iommu, iommu_domid, + (paddr_t)gfn << PAGE_SHIFT_4K, +- PAGE_ORDER_4K, ++ get_order_from_pages(page_count), + !dma_old_pte_present, + flush_dev_iotlb); + +From: <security@xenproject.org> +Subject: vtd: prune (and rename) cache flush functions + +Rename __iommu_flush_cache to iommu_sync_cache and remove +iommu_flush_cache_page. Also remove the iommu_flush_cache_entry +wrapper and just use iommu_sync_cache instead. Note the _entry suffix +was meaningless as the wrapper was already taking a size parameter in +bytes. While there also constify the addr parameter. + +No functional change intended. + +This is part of XSA-321. + +Reviewed-by: Jan Beulich <jbeulich@suse.com> + +--- xen/drivers/passthrough/vtd/extern.h.orig ++++ xen/drivers/passthrough/vtd/extern.h +@@ -37,8 +37,7 @@ void disable_qinval(struct iommu *iommu) + int enable_intremap(struct iommu *iommu, int eim); + void disable_intremap(struct iommu *iommu); + +-void iommu_flush_cache_entry(void *addr, unsigned int size); +-void iommu_flush_cache_page(void *addr, unsigned long npages); ++void iommu_sync_cache(const void *addr, unsigned int size); + int iommu_alloc(struct acpi_drhd_unit *drhd); + void iommu_free(struct acpi_drhd_unit *drhd); + +--- xen/drivers/passthrough/vtd/intremap.c.orig ++++ xen/drivers/passthrough/vtd/intremap.c +@@ -231,7 +231,7 @@ static void free_remap_entry(struct iomm + iremap_entries, iremap_entry); + + update_irte(iommu, iremap_entry, &new_ire, false); +- iommu_flush_cache_entry(iremap_entry, sizeof(*iremap_entry)); ++ iommu_sync_cache(iremap_entry, sizeof(*iremap_entry)); + iommu_flush_iec_index(iommu, 0, index); + + unmap_vtd_domain_page(iremap_entries); +@@ -403,7 +403,7 @@ static int ioapic_rte_to_remap_entry(str + } + + update_irte(iommu, iremap_entry, &new_ire, !init); +- iommu_flush_cache_entry(iremap_entry, sizeof(*iremap_entry)); ++ iommu_sync_cache(iremap_entry, sizeof(*iremap_entry)); + iommu_flush_iec_index(iommu, 0, index); + + unmap_vtd_domain_page(iremap_entries); +@@ -694,7 +694,7 @@ static int msi_msg_to_remap_entry( + update_irte(iommu, iremap_entry, &new_ire, msi_desc->irte_initialized); + msi_desc->irte_initialized = true; + +- iommu_flush_cache_entry(iremap_entry, sizeof(*iremap_entry)); ++ iommu_sync_cache(iremap_entry, sizeof(*iremap_entry)); + iommu_flush_iec_index(iommu, 0, index); + + unmap_vtd_domain_page(iremap_entries); +--- xen/drivers/passthrough/vtd/iommu.c.orig ++++ xen/drivers/passthrough/vtd/iommu.c +@@ -158,7 +158,8 @@ static void __init free_intel_iommu(stru + } + + static int iommus_incoherent; +-static void __iommu_flush_cache(void *addr, unsigned int size) ++ ++void iommu_sync_cache(const void *addr, unsigned int size) + { + int i; + static unsigned int clflush_size = 0; +@@ -173,16 +174,6 @@ static void __iommu_flush_cache(void *ad + cacheline_flush((char *)addr + i); + } + +-void iommu_flush_cache_entry(void *addr, unsigned int size) +-{ +- __iommu_flush_cache(addr, size); +-} +- +-void iommu_flush_cache_page(void *addr, unsigned long npages) +-{ +- __iommu_flush_cache(addr, PAGE_SIZE * npages); +-} +- + /* Allocate page table, return its machine address */ + u64 alloc_pgtable_maddr(struct acpi_drhd_unit *drhd, unsigned long npages) + { +@@ -207,7 +198,7 @@ u64 alloc_pgtable_maddr(struct acpi_drhd + vaddr = __map_domain_page(cur_pg); + memset(vaddr, 0, PAGE_SIZE); + +- iommu_flush_cache_page(vaddr, 1); ++ iommu_sync_cache(vaddr, PAGE_SIZE); + unmap_domain_page(vaddr); + cur_pg++; + } +@@ -242,7 +233,7 @@ static u64 bus_to_context_maddr(struct i + } + set_root_value(*root, maddr); + set_root_present(*root); +- iommu_flush_cache_entry(root, sizeof(struct root_entry)); ++ iommu_sync_cache(root, sizeof(struct root_entry)); + } + maddr = (u64) get_context_addr(*root); + unmap_vtd_domain_page(root_entries); +@@ -300,7 +291,7 @@ static u64 addr_to_dma_page_maddr(struct + */ + dma_set_pte_readable(*pte); + dma_set_pte_writable(*pte); +- iommu_flush_cache_entry(pte, sizeof(struct dma_pte)); ++ iommu_sync_cache(pte, sizeof(struct dma_pte)); + } + + if ( level == 2 ) +@@ -674,7 +665,7 @@ static int __must_check dma_pte_clear_on + + dma_clear_pte(*pte); + spin_unlock(&hd->arch.mapping_lock); +- iommu_flush_cache_entry(pte, sizeof(struct dma_pte)); ++ iommu_sync_cache(pte, sizeof(struct dma_pte)); + + if ( !this_cpu(iommu_dont_flush_iotlb) ) + rc = iommu_flush_iotlb_pages(domain, addr >> PAGE_SHIFT_4K, 1); +@@ -716,7 +707,7 @@ static void iommu_free_page_table(struct + iommu_free_pagetable(dma_pte_addr(*pte), next_level); + + dma_clear_pte(*pte); +- iommu_flush_cache_entry(pte, sizeof(struct dma_pte)); ++ iommu_sync_cache(pte, sizeof(struct dma_pte)); + } + + unmap_vtd_domain_page(pt_vaddr); +@@ -1449,7 +1440,7 @@ int domain_context_mapping_one( + context_set_address_width(*context, agaw); + context_set_fault_enable(*context); + context_set_present(*context); +- iommu_flush_cache_entry(context, sizeof(struct context_entry)); ++ iommu_sync_cache(context, sizeof(struct context_entry)); + spin_unlock(&iommu->lock); + + /* Context entry was previously non-present (with domid 0). */ +@@ -1602,7 +1593,7 @@ int domain_context_unmap_one( + + context_clear_present(*context); + context_clear_entry(*context); +- iommu_flush_cache_entry(context, sizeof(struct context_entry)); ++ iommu_sync_cache(context, sizeof(struct context_entry)); + + iommu_domid= domain_iommu_domid(domain, iommu); + if ( iommu_domid == -1 ) +@@ -1828,7 +1819,7 @@ static int __must_check intel_iommu_map_ + + *pte = new; + +- iommu_flush_cache_entry(pte, sizeof(struct dma_pte)); ++ iommu_sync_cache(pte, sizeof(struct dma_pte)); + spin_unlock(&hd->arch.mapping_lock); + unmap_vtd_domain_page(page); + +@@ -1862,7 +1853,7 @@ int iommu_pte_flush(struct domain *d, u6 + int iommu_domid; + int rc = 0; + +- iommu_flush_cache_entry(pte, sizeof(struct dma_pte)); ++ iommu_sync_cache(pte, sizeof(struct dma_pte)); + + for_each_drhd_unit ( drhd ) + { +From: <security@xenproject.org> +Subject: x86/iommu: introduce a cache sync hook + +The hook is only implemented for VT-d and it uses the already existing +iommu_sync_cache function present in VT-d code. The new hook is +added so that the cache can be flushed by code outside of VT-d when +using shared page tables. + +Note that alloc_pgtable_maddr must use the now locally defined +sync_cache function, because IOMMU ops are not yet setup the first +time the function gets called during IOMMU initialization. + +No functional change intended. + +This is part of XSA-321. + +Reviewed-by: Jan Beulich <jbeulich@suse.com> + +--- xen/drivers/passthrough/vtd/extern.h.orig ++++ xen/drivers/passthrough/vtd/extern.h +@@ -37,7 +37,6 @@ void disable_qinval(struct iommu *iommu) + int enable_intremap(struct iommu *iommu, int eim); + void disable_intremap(struct iommu *iommu); + +-void iommu_sync_cache(const void *addr, unsigned int size); + int iommu_alloc(struct acpi_drhd_unit *drhd); + void iommu_free(struct acpi_drhd_unit *drhd); + +--- xen/drivers/passthrough/vtd/iommu.c.orig ++++ xen/drivers/passthrough/vtd/iommu.c +@@ -159,7 +159,7 @@ static void __init free_intel_iommu(stru + + static int iommus_incoherent; + +-void iommu_sync_cache(const void *addr, unsigned int size) ++static void sync_cache(const void *addr, unsigned int size) + { + int i; + static unsigned int clflush_size = 0; +@@ -198,7 +198,7 @@ u64 alloc_pgtable_maddr(struct acpi_drhd + vaddr = __map_domain_page(cur_pg); + memset(vaddr, 0, PAGE_SIZE); + +- iommu_sync_cache(vaddr, PAGE_SIZE); ++ sync_cache(vaddr, PAGE_SIZE); + unmap_domain_page(vaddr); + cur_pg++; + } +@@ -2760,6 +2760,7 @@ const struct iommu_ops intel_iommu_ops = + .iotlb_flush_all = iommu_flush_iotlb_all, + .get_reserved_device_memory = intel_iommu_get_reserved_device_memory, + .dump_p2m_table = vtd_dump_p2m_table, ++ .sync_cache = sync_cache, + }; + + /* +--- xen/include/asm-x86/iommu.h.orig ++++ xen/include/asm-x86/iommu.h +@@ -98,6 +98,13 @@ extern bool untrusted_msi; + int pi_update_irte(const struct pi_desc *pi_desc, const struct pirq *pirq, + const uint8_t gvec); + ++#define iommu_sync_cache(addr, size) ({ \ ++ const struct iommu_ops *ops = iommu_get_ops(); \ ++ \ ++ if ( ops->sync_cache ) \ ++ ops->sync_cache(addr, size); \ ++}) ++ + #endif /* !__ARCH_X86_IOMMU_H__ */ + /* + * Local variables: +--- xen/include/xen/iommu.h.orig ++++ xen/include/xen/iommu.h +@@ -161,6 +161,7 @@ struct iommu_ops { + void (*update_ire_from_apic)(unsigned int apic, unsigned int reg, unsigned int value); + unsigned int (*read_apic_from_ire)(unsigned int apic, unsigned int reg); + int (*setup_hpet_msi)(struct msi_desc *); ++ void (*sync_cache)(const void *addr, unsigned int size); + #endif /* CONFIG_X86 */ + int __must_check (*suspend)(void); + void (*resume)(void); +From: <security@xenproject.org> +Subject: vtd: don't assume addresses are aligned in sync_cache + +Current code in sync_cache assume that the address passed in is +aligned to a cache line size. Fix the code to support passing in +arbitrary addresses not necessarily aligned to a cache line size. + +This is part of XSA-321. + +Reviewed-by: Jan Beulich <jbeulich@suse.com> + +--- xen/drivers/passthrough/vtd/iommu.c.orig ++++ xen/drivers/passthrough/vtd/iommu.c +@@ -161,8 +161,8 @@ static int iommus_incoherent; + + static void sync_cache(const void *addr, unsigned int size) + { +- int i; +- static unsigned int clflush_size = 0; ++ static unsigned long clflush_size = 0; ++ const void *end = addr + size; + + if ( !iommus_incoherent ) + return; +@@ -170,8 +170,9 @@ static void sync_cache(const void *addr, + if ( clflush_size == 0 ) + clflush_size = get_cache_line_size(); + +- for ( i = 0; i < size; i += clflush_size ) +- cacheline_flush((char *)addr + i); ++ addr -= (unsigned long)addr & (clflush_size - 1); ++ for ( ; addr < end; addr += clflush_size ) ++ cacheline_flush((char *)addr); + } + + /* Allocate page table, return its machine address */ +From: <security@xenproject.org> +Subject: x86/alternative: introduce alternative_2 + +It's based on alternative_io_2 without inputs or outputs but with an +added memory clobber. + +This is part of XSA-321. + +Acked-by: Jan Beulich <jbeulich@suse.com> + +--- xen/include/asm-x86/alternative.h.orig ++++ xen/include/asm-x86/alternative.h +@@ -113,6 +113,11 @@ extern void alternative_instructions(voi + #define alternative(oldinstr, newinstr, feature) \ + asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory") + ++#define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \ ++ asm volatile (ALTERNATIVE_2(oldinstr, newinstr1, feature1, \ ++ newinstr2, feature2) \ ++ : : : "memory") ++ + /* + * Alternative inline assembly with input. + * +From: <security@xenproject.org> +Subject: vtd: optimize CPU cache sync + +Some VT-d IOMMUs are non-coherent, which requires a cache write back +in order for the changes made by the CPU to be visible to the IOMMU. +This cache write back was unconditionally done using clflush, but there are +other more efficient instructions to do so, hence implement support +for them using the alternative framework. + +This is part of XSA-321. + +Reviewed-by: Jan Beulich <jbeulich@suse.com> + +--- xen/drivers/passthrough/vtd/extern.h.orig ++++ xen/drivers/passthrough/vtd/extern.h +@@ -63,7 +63,6 @@ int __must_check qinval_device_iotlb_syn + u16 did, u16 size, u64 addr); + + unsigned int get_cache_line_size(void); +-void cacheline_flush(char *); + void flush_all_cache(void); + + u64 alloc_pgtable_maddr(struct acpi_drhd_unit *drhd, unsigned long npages); +--- xen/drivers/passthrough/vtd/iommu.c.orig ++++ xen/drivers/passthrough/vtd/iommu.c +@@ -31,6 +31,7 @@ + #include <xen/pci_regs.h> + #include <xen/keyhandler.h> + #include <asm/msi.h> ++#include <asm/nops.h> + #include <asm/irq.h> + #include <asm/hvm/vmx/vmx.h> + #include <asm/p2m.h> +@@ -172,7 +173,42 @@ static void sync_cache(const void *addr, + + addr -= (unsigned long)addr & (clflush_size - 1); + for ( ; addr < end; addr += clflush_size ) +- cacheline_flush((char *)addr); ++/* ++ * The arguments to a macro must not include preprocessor directives. Doing so ++ * results in undefined behavior, so we have to create some defines here in ++ * order to avoid it. ++ */ ++#if defined(HAVE_AS_CLWB) ++# define CLWB_ENCODING "clwb %[p]" ++#elif defined(HAVE_AS_XSAVEOPT) ++# define CLWB_ENCODING "data16 xsaveopt %[p]" /* clwb */ ++#else ++# define CLWB_ENCODING ".byte 0x66, 0x0f, 0xae, 0x30" /* clwb (%%rax) */ ++#endif ++ ++#define BASE_INPUT(addr) [p] "m" (*(const char *)(addr)) ++#if defined(HAVE_AS_CLWB) || defined(HAVE_AS_XSAVEOPT) ++# define INPUT BASE_INPUT ++#else ++# define INPUT(addr) "a" (addr), BASE_INPUT(addr) ++#endif ++ /* ++ * Note regarding the use of NOP_DS_PREFIX: it's faster to do a clflush ++ * + prefix than a clflush + nop, and hence the prefix is added instead ++ * of letting the alternative framework fill the gap by appending nops. ++ */ ++ alternative_io_2(".byte " __stringify(NOP_DS_PREFIX) "; clflush %[p]", ++ "data16 clflush %[p]", /* clflushopt */ ++ X86_FEATURE_CLFLUSHOPT, ++ CLWB_ENCODING, ++ X86_FEATURE_CLWB, /* no outputs */, ++ INPUT(addr)); ++#undef INPUT ++#undef BASE_INPUT ++#undef CLWB_ENCODING ++ ++ alternative_2("", "sfence", X86_FEATURE_CLFLUSHOPT, ++ "sfence", X86_FEATURE_CLWB); + } + + /* Allocate page table, return its machine address */ +--- xen/drivers/passthrough/vtd/x86/vtd.c.orig ++++ xen/drivers/passthrough/vtd/x86/vtd.c +@@ -53,11 +53,6 @@ unsigned int get_cache_line_size(void) + return ((cpuid_ebx(1) >> 8) & 0xff) * 8; + } + +-void cacheline_flush(char * addr) +-{ +- clflush(addr); +-} +- + void flush_all_cache() + { + wbinvd(); +From: <security@xenproject.org> +Subject: x86/ept: flush cache when modifying PTEs and sharing page tables + +Modifications made to the page tables by EPT code need to be written +to memory when the page tables are shared with the IOMMU, as Intel +IOMMUs can be non-coherent and thus require changes to be written to +memory in order to be visible to the IOMMU. + +In order to achieve this make sure data is written back to memory +after writing an EPT entry when the recalc bit is not set in +atomic_write_ept_entry. If such bit is set, the entry will be +adjusted and atomic_write_ept_entry will be called a second time +without the recalc bit set. Note that when splitting a super page the +new tables resulting of the split should also be written back. + +Failure to do so can allow devices behind the IOMMU access to the +stale super page, or cause coherency issues as changes made by the +processor to the page tables are not visible to the IOMMU. + +This allows to remove the VT-d specific iommu_pte_flush helper, since +the cache write back is now performed by atomic_write_ept_entry, and +hence iommu_iotlb_flush can be used to flush the IOMMU TLB. The newly +used method (iommu_iotlb_flush) can result in less flushes, since it +might sometimes be called rightly with 0 flags, in which case it +becomes a no-op. + +This is part of XSA-321. + +Reviewed-by: Jan Beulich <jbeulich@suse.com> + +--- xen/arch/x86/mm/p2m-ept.c.orig ++++ xen/arch/x86/mm/p2m-ept.c +@@ -90,6 +90,19 @@ static int atomic_write_ept_entry(ept_en + + write_atomic(&entryptr->epte, new.epte); + ++ /* ++ * The recalc field on the EPT is used to signal either that a ++ * recalculation of the EMT field is required (which doesn't effect the ++ * IOMMU), or a type change. Type changes can only be between ram_rw, ++ * logdirty and ioreq_server: changes to/from logdirty won't work well with ++ * an IOMMU anyway, as IOMMU #PFs are not synchronous and will lead to ++ * aborts, and changes to/from ioreq_server are already fully flushed ++ * before returning to guest context (see ++ * XEN_DMOP_map_mem_type_to_ioreq_server). ++ */ ++ if ( !new.recalc && iommu_hap_pt_share ) ++ iommu_sync_cache(entryptr, sizeof(*entryptr)); ++ + if ( unlikely(oldmfn != mfn_x(INVALID_MFN)) ) + put_page(mfn_to_page(_mfn(oldmfn))); + +@@ -319,6 +332,9 @@ static bool_t ept_split_super_page(struc + break; + } + ++ if ( iommu_hap_pt_share ) ++ iommu_sync_cache(table, EPT_PAGETABLE_ENTRIES * sizeof(ept_entry_t)); ++ + unmap_domain_page(table); + + /* Even failed we should install the newly allocated ept page. */ +@@ -875,7 +894,7 @@ out: + need_modify_vtd_table ) + { + if ( iommu_hap_pt_share ) +- rc = iommu_pte_flush(d, gfn, &ept_entry->epte, order, vtd_pte_present); ++ rc = iommu_flush_iotlb(d, gfn, vtd_pte_present, 1u << order); + else + { + if ( iommu_flags ) +--- xen/drivers/passthrough/vtd/iommu.c.orig ++++ xen/drivers/passthrough/vtd/iommu.c +@@ -612,10 +612,8 @@ static int __must_check iommu_flush_all( + return rc; + } + +-static int __must_check iommu_flush_iotlb(struct domain *d, +- unsigned long gfn, +- bool_t dma_old_pte_present, +- unsigned int page_count) ++int iommu_flush_iotlb(struct domain *d, unsigned long gfn, ++ bool dma_old_pte_present, unsigned int page_count) + { + struct domain_iommu *hd = dom_iommu(d); + struct acpi_drhd_unit *drhd; +@@ -1880,53 +1878,6 @@ static int __must_check intel_iommu_unma + return dma_pte_clear_one(d, (paddr_t)gfn << PAGE_SHIFT_4K); + } + +-int iommu_pte_flush(struct domain *d, u64 gfn, u64 *pte, +- int order, int present) +-{ +- struct acpi_drhd_unit *drhd; +- struct iommu *iommu = NULL; +- struct domain_iommu *hd = dom_iommu(d); +- bool_t flush_dev_iotlb; +- int iommu_domid; +- int rc = 0; +- +- iommu_sync_cache(pte, sizeof(struct dma_pte)); +- +- for_each_drhd_unit ( drhd ) +- { +- iommu = drhd->iommu; +- if ( !test_bit(iommu->index, &hd->arch.iommu_bitmap) ) +- continue; +- +- flush_dev_iotlb = !!find_ats_dev_drhd(iommu); +- iommu_domid= domain_iommu_domid(d, iommu); +- if ( iommu_domid == -1 ) +- continue; +- +- rc = iommu_flush_iotlb_psi(iommu, iommu_domid, +- (paddr_t)gfn << PAGE_SHIFT_4K, +- order, !present, flush_dev_iotlb); +- if ( rc > 0 ) +- { +- iommu_flush_write_buffer(iommu); +- rc = 0; +- } +- } +- +- if ( unlikely(rc) ) +- { +- if ( !d->is_shutting_down && printk_ratelimit() ) +- printk(XENLOG_ERR VTDPREFIX +- " d%d: IOMMU pages flush failed: %d\n", +- d->domain_id, rc); +- +- if ( !is_hardware_domain(d) ) +- domain_crash(d); +- } +- +- return rc; +-} +- + static int __init vtd_ept_page_compatible(struct iommu *iommu) + { + u64 ept_cap, vtd_cap = iommu->cap; +--- xen/include/asm-x86/iommu.h.orig ++++ xen/include/asm-x86/iommu.h +@@ -87,8 +87,9 @@ int iommu_setup_hpet_msi(struct msi_desc + + /* While VT-d specific, this must get declared in a generic header. */ + int adjust_vtd_irq_affinities(void); +-int __must_check iommu_pte_flush(struct domain *d, u64 gfn, u64 *pte, +- int order, int present); ++int __must_check iommu_flush_iotlb(struct domain *d, unsigned long gfn, ++ bool dma_old_pte_present, ++ unsigned int page_count); + bool_t iommu_supports_eim(void); + int iommu_enable_x2apic_IR(void); + void iommu_disable_x2apic_IR(void); |