summaryrefslogtreecommitdiff
path: root/sysutils/xenkernel413/patches/patch-XSA347
blob: dba51f23dea36b7a055a26b73705b43f9eacbf5d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
$NetBSD: patch-XSA347,v 1.1.2.2 2020/10/22 16:29:05 bsiegert Exp $

From: Jan Beulich <jbeulich@suse.com>
Subject: AMD/IOMMU: convert amd_iommu_pte from struct to union

This is to add a "raw" counterpart to the bitfield equivalent. Take the
opportunity and
 - convert fields to bool / unsigned int,
 - drop the naming of the reserved field,
 - shorten the names of the ignored ones.

This is part of XSA-347.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Paul Durrant <paul@xen.org>

--- xen/drivers/passthrough/amd/iommu_map.c.orig
+++ xen/drivers/passthrough/amd/iommu_map.c
@@ -38,7 +38,7 @@ static unsigned int pfn_to_pde_idx(unsig
 static unsigned int clear_iommu_pte_present(unsigned long l1_mfn,
                                             unsigned long dfn)
 {
-    struct amd_iommu_pte *table, *pte;
+    union amd_iommu_pte *table, *pte;
     unsigned int flush_flags;
 
     table = map_domain_page(_mfn(l1_mfn));
@@ -52,7 +52,7 @@ static unsigned int clear_iommu_pte_pres
     return flush_flags;
 }
 
-static unsigned int set_iommu_pde_present(struct amd_iommu_pte *pte,
+static unsigned int set_iommu_pde_present(union amd_iommu_pte *pte,
                                           unsigned long next_mfn,
                                           unsigned int next_level, bool iw,
                                           bool ir)
@@ -87,7 +87,7 @@ static unsigned int set_iommu_pte_presen
                                           int pde_level,
                                           bool iw, bool ir)
 {
-    struct amd_iommu_pte *table, *pde;
+    union amd_iommu_pte *table, *pde;
     unsigned int flush_flags;
 
     table = map_domain_page(_mfn(pt_mfn));
@@ -178,7 +178,7 @@ void iommu_dte_set_guest_cr3(struct amd_
 static int iommu_pde_from_dfn(struct domain *d, unsigned long dfn,
                               unsigned long pt_mfn[], bool map)
 {
-    struct amd_iommu_pte *pde, *next_table_vaddr;
+    union amd_iommu_pte *pde, *next_table_vaddr;
     unsigned long  next_table_mfn;
     unsigned int level;
     struct page_info *table;
@@ -458,7 +458,7 @@ int __init amd_iommu_quarantine_init(str
     unsigned long end_gfn =
         1ul << (DEFAULT_DOMAIN_ADDRESS_WIDTH - PAGE_SHIFT);
     unsigned int level = amd_iommu_get_paging_mode(end_gfn);
-    struct amd_iommu_pte *table;
+    union amd_iommu_pte *table;
 
     if ( hd->arch.root_table )
     {
@@ -489,7 +489,7 @@ int __init amd_iommu_quarantine_init(str
 
         for ( i = 0; i < PTE_PER_TABLE_SIZE; i++ )
         {
-            struct amd_iommu_pte *pde = &table[i];
+            union amd_iommu_pte *pde = &table[i];
 
             /*
              * PDEs are essentially a subset of PTEs, so this function
--- xen/drivers/passthrough/amd/pci_amd_iommu.c.orig
+++ xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -390,7 +390,7 @@ static void deallocate_next_page_table(s
 
 static void deallocate_page_table(struct page_info *pg)
 {
-    struct amd_iommu_pte *table_vaddr;
+    union amd_iommu_pte *table_vaddr;
     unsigned int index, level = PFN_ORDER(pg);
 
     PFN_ORDER(pg) = 0;
@@ -405,7 +405,7 @@ static void deallocate_page_table(struct
 
     for ( index = 0; index < PTE_PER_TABLE_SIZE; index++ )
     {
-        struct amd_iommu_pte *pde = &table_vaddr[index];
+        union amd_iommu_pte *pde = &table_vaddr[index];
 
         if ( pde->mfn && pde->next_level && pde->pr )
         {
@@ -557,7 +557,7 @@ static void amd_dump_p2m_table_level(str
                                      paddr_t gpa, int indent)
 {
     paddr_t address;
-    struct amd_iommu_pte *table_vaddr;
+    const union amd_iommu_pte *table_vaddr;
     int index;
 
     if ( level < 1 )
@@ -573,7 +573,7 @@ static void amd_dump_p2m_table_level(str
 
     for ( index = 0; index < PTE_PER_TABLE_SIZE; index++ )
     {
-        struct amd_iommu_pte *pde = &table_vaddr[index];
+        const union amd_iommu_pte *pde = &table_vaddr[index];
 
         if ( !(index % 2) )
             process_pending_softirqs();
--- xen/include/asm-x86/hvm/svm/amd-iommu-defs.h.orig
+++ xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
@@ -465,20 +465,23 @@ union amd_iommu_x2apic_control {
 #define IOMMU_PAGE_TABLE_U32_PER_ENTRY	(IOMMU_PAGE_TABLE_ENTRY_SIZE / 4)
 #define IOMMU_PAGE_TABLE_ALIGNMENT	4096
 
-struct amd_iommu_pte {
-    uint64_t pr:1;
-    uint64_t ignored0:4;
-    uint64_t a:1;
-    uint64_t d:1;
-    uint64_t ignored1:2;
-    uint64_t next_level:3;
-    uint64_t mfn:40;
-    uint64_t reserved:7;
-    uint64_t u:1;
-    uint64_t fc:1;
-    uint64_t ir:1;
-    uint64_t iw:1;
-    uint64_t ignored2:1;
+union amd_iommu_pte {
+    uint64_t raw;
+    struct {
+        bool pr:1;
+        unsigned int ign0:4;
+        bool a:1;
+        bool d:1;
+        unsigned int ign1:2;
+        unsigned int next_level:3;
+        uint64_t mfn:40;
+        unsigned int :7;
+        bool u:1;
+        bool fc:1;
+        bool ir:1;
+        bool iw:1;
+        unsigned int ign2:1;
+    };
 };
 
 /* Paging modes */
From: Jan Beulich <jbeulich@suse.com>
Subject: AMD/IOMMU: update live PTEs atomically

Updating a live PTE bitfield by bitfield risks the compiler re-ordering
the individual updates as well as splitting individual updates into
multiple memory writes. Construct the new entry fully in a local
variable, do the check to determine the flushing needs on the thus
established new entry, and then write the new entry by a single insn.

Similarly using memset() to clear a PTE is unsafe, as the order of
writes the function does is, at least in principle, undefined.

This is part of XSA-347.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Paul Durrant <paul@xen.org>

--- xen/drivers/passthrough/amd/iommu_map.c.orig
+++ xen/drivers/passthrough/amd/iommu_map.c
@@ -45,7 +45,7 @@ static unsigned int clear_iommu_pte_pres
     pte = &table[pfn_to_pde_idx(dfn, 1)];
 
     flush_flags = pte->pr ? IOMMU_FLUSHF_modified : 0;
-    memset(pte, 0, sizeof(*pte));
+    write_atomic(&pte->raw, 0);
 
     unmap_domain_page(table);
 
@@ -57,26 +57,30 @@ static unsigned int set_iommu_pde_presen
                                           unsigned int next_level, bool iw,
                                           bool ir)
 {
+    union amd_iommu_pte new = {}, old;
     unsigned int flush_flags = IOMMU_FLUSHF_added;
 
-    if ( pte->pr &&
-         (pte->mfn != next_mfn ||
-          pte->iw != iw ||
-          pte->ir != ir ||
-          pte->next_level != next_level) )
-            flush_flags |= IOMMU_FLUSHF_modified;
-
     /*
      * FC bit should be enabled in PTE, this helps to solve potential
      * issues with ATS devices
      */
-    pte->fc = !next_level;
+    new.fc = !next_level;
+
+    new.mfn = next_mfn;
+    new.iw = iw;
+    new.ir = ir;
+    new.next_level = next_level;
+    new.pr = true;
+
+    old.raw = read_atomic(&pte->raw);
+    old.ign0 = 0;
+    old.ign1 = 0;
+    old.ign2 = 0;
+
+    if ( old.pr && old.raw != new.raw )
+        flush_flags |= IOMMU_FLUSHF_modified;
 
-    pte->mfn = next_mfn;
-    pte->iw = iw;
-    pte->ir = ir;
-    pte->next_level = next_level;
-    pte->pr = 1;
+    write_atomic(&pte->raw, new.raw);
 
     return flush_flags;
 }
From: Jan Beulich <jbeulich@suse.com>
Subject: AMD/IOMMU: ensure suitable ordering of DTE modifications

DMA and interrupt translation should be enabled only after other
applicable DTE fields have been written. Similarly when disabling
translation or when moving a device between domains, translation should
first be disabled, before other entry fields get modified. Note however
that the "moving" aspect doesn't apply to the interrupt remapping side,
as domain specifics are maintained in the IRTEs here, not the DTE. We
also never disable interrupt remapping once it got enabled for a device
(the respective argument passed is always the immutable iommu_intremap).

This is part of XSA-347.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Paul Durrant <paul@xen.org>

--- xen/drivers/passthrough/amd/iommu_map.c.orig
+++ xen/drivers/passthrough/amd/iommu_map.c
@@ -107,11 +107,18 @@ void amd_iommu_set_root_page_table(struc
                                    uint64_t root_ptr, uint16_t domain_id,
                                    uint8_t paging_mode, bool valid)
 {
+    if ( valid || dte->v )
+    {
+        dte->tv = false;
+        dte->v = true;
+        smp_wmb();
+    }
     dte->domain_id = domain_id;
     dte->pt_root = paddr_to_pfn(root_ptr);
     dte->iw = true;
     dte->ir = true;
     dte->paging_mode = paging_mode;
+    smp_wmb();
     dte->tv = true;
     dte->v = valid;
 }
@@ -134,6 +141,7 @@ void amd_iommu_set_intremap_table(
     }
 
     dte->ig = false; /* unmapped interrupts result in i/o page faults */
+    smp_wmb();
     dte->iv = valid;
 }
 
--- xen/drivers/passthrough/amd/pci_amd_iommu.c.orig
+++ xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -120,7 +120,10 @@ static void amd_iommu_setup_domain_devic
         /* Undo what amd_iommu_disable_domain_device() may have done. */
         ivrs_dev = &get_ivrs_mappings(iommu->seg)[req_id];
         if ( dte->it_root )
+        {
             dte->int_ctl = IOMMU_DEV_TABLE_INT_CONTROL_TRANSLATED;
+            smp_wmb();
+        }
         dte->iv = iommu_intremap;
         dte->ex = ivrs_dev->dte_allow_exclusion;
         dte->sys_mgt = MASK_EXTR(ivrs_dev->device_flags, ACPI_IVHD_SYSTEM_MGMT);