diff options
author | bouyer <bouyer@pkgsrc.org> | 2017-12-15 14:00:44 +0000 |
---|---|---|
committer | bouyer <bouyer@pkgsrc.org> | 2017-12-15 14:00:44 +0000 |
commit | 63fe43d4f2340bd60b6f9eb71126e0a7b9b34eb8 (patch) | |
tree | 58675c1c9ad24e057536b4925be1362e3f63b6e9 /sysutils | |
parent | 805093f1da918a4c25c2e80f9a34c94b9df112bb (diff) | |
download | pkgsrc-63fe43d4f2340bd60b6f9eb71126e0a7b9b34eb8.tar.gz |
Apply patches from upstream, fixing security issues XSA246 up to XSA251.
Also update patch-XSA240 from upstream, fixing issues in linear page table
handling introduced by the original XSA240 patch.
Bump PKGREVISION
Diffstat (limited to 'sysutils')
21 files changed, 1550 insertions, 74 deletions
diff --git a/sysutils/xenkernel46/Makefile b/sysutils/xenkernel46/Makefile index 5059c0d4797..dd4ccbd4b78 100644 --- a/sysutils/xenkernel46/Makefile +++ b/sysutils/xenkernel46/Makefile @@ -1,9 +1,9 @@ -# $NetBSD: Makefile,v 1.16 2017/10/17 11:10:35 bouyer Exp $ +# $NetBSD: Makefile,v 1.17 2017/12/15 14:00:44 bouyer Exp $ VERSION= 4.6.6 DISTNAME= xen-${VERSION} PKGNAME= xenkernel46-${VERSION} -PKGREVISION= 1 +PKGREVISION= 2 CATEGORIES= sysutils MASTER_SITES= https://downloads.xenproject.org/release/xen/${VERSION}/ diff --git a/sysutils/xenkernel46/distinfo b/sysutils/xenkernel46/distinfo index 04251e1933a..0b2aca0b3cc 100644 --- a/sysutils/xenkernel46/distinfo +++ b/sysutils/xenkernel46/distinfo @@ -1,4 +1,4 @@ -$NetBSD: distinfo,v 1.10 2017/10/17 10:57:34 bouyer Exp $ +$NetBSD: distinfo,v 1.11 2017/12/15 14:00:44 bouyer Exp $ SHA1 (xen-4.6.6.tar.gz) = 82f39ef4bf754ffd679ab5d15709bc34a98fccb7 RMD160 (xen-4.6.6.tar.gz) = 6412f75183647172d72597e8779235b60e1c00f3 @@ -15,11 +15,17 @@ SHA1 (patch-XSA234) = 0b5973597e3a15fb9ce93d6a735f32794983cfc7 SHA1 (patch-XSA237) = 2a5cd048a04b8cadc67905b9001689b1221edd3e SHA1 (patch-XSA238) = e2059991d12f31740650136ec59c62da20c79633 SHA1 (patch-XSA239) = 10619718e8a1536a7f52eb3838cdb490e6ba8c97 -SHA1 (patch-XSA240) = af3d204e9873fe79b23c714d60dfa91fcbe46ec5 -SHA1 (patch-XSA241) = b506425ca7382190435df6f96800cb0a24aff23e +SHA1 (patch-XSA240) = 9677ebc1ee535b11ae1248325ad63ea213677561 +SHA1 (patch-XSA241) = bf9a488d2da40be0e4aed5270e25c64a9c673ca4 SHA1 (patch-XSA242) = afff314771d78ee2482aec3b7693c12bfe00e0ec SHA1 (patch-XSA243) = ffe83e9e443a2582047f1d17673d39d6746f4b75 SHA1 (patch-XSA244) = 95077513502c26f8d6dae7964a0e422556be322a +SHA1 (patch-XSA246) = a7eb9365cad042f5b1aa3112df6adf8421a3a6e4 +SHA1 (patch-XSA247) = 5a03a8ef20db5cd55fa39314a15f80175be78b94 +SHA1 (patch-XSA248) = d5787fa7fc48449ca90200811b66cb6278c750aa +SHA1 (patch-XSA249) = 7037a35f37eb866f16fe90482e66d0eca95944c4 +SHA1 (patch-XSA250) = 25ab2e8c67ebe2b40cf073197c17f1625f5581f6 +SHA1 (patch-XSA251) = dc0786c85bcfbdd3f7a1c97a3af32c10deea8276 SHA1 (patch-tools_xentrace_xenalyze.c) = ab973cb7090dc90867dcddf9ab8965f8f2f36c46 SHA1 (patch-xen_Makefile) = be3f4577a205b23187b91319f91c50720919f70b SHA1 (patch-xen_arch_arm_xen.lds.S) = df0e4a13b9b3ae863448172bea28b1b92296327b diff --git a/sysutils/xenkernel46/patches/patch-XSA240 b/sysutils/xenkernel46/patches/patch-XSA240 index 6963c280254..136aabdd675 100644 --- a/sysutils/xenkernel46/patches/patch-XSA240 +++ b/sysutils/xenkernel46/patches/patch-XSA240 @@ -1,4 +1,4 @@ -$NetBSD: patch-XSA240,v 1.1 2017/10/17 10:57:34 bouyer Exp $ +$NetBSD: patch-XSA240,v 1.2 2017/12/15 14:00:44 bouyer Exp $ From ce31198dd811479da34dfb66315f399dc4b98055 Mon Sep 17 00:00:00 2001 From: Jan Beulich <jbeulich@suse.com> @@ -532,7 +532,7 @@ index d99a20a44b..c91acaa464 100644 +### pv-linear-pt +> `= <boolean>` + -+> Default: `true` ++> Default: `false` + +Allow PV guests to have pagetable entries pointing to other pagetables +of the same level (i.e., allowing L2 PTEs to point to other L2 pages). @@ -540,9 +540,9 @@ index d99a20a44b..c91acaa464 100644 +used to allow operating systems a simple way to consistently map the +current process's pagetables into its own virtual address space. + -+None of the most common PV operating systems (Linux, MiniOS) -+use this technique, but NetBSD in PV mode, and maybe custom operating -+systems do. ++None of the most common PV operating systems (Linux, NetBSD, MiniOS) ++use this technique, but there may be custom operating systems which ++do. ### reboot > `= t[riple] | k[bd] | a[cpi] | p[ci] | P[ower] | e[fi] | n[o] [, [w]arm | [c]old]` @@ -576,3 +576,91 @@ index 81074aa473..75dd077046 100644 -- 2.14.1 +From: Jan Beulich <jbeulich@suse.com> +Subject: x86: don't wrongly trigger linear page table assertion + +_put_page_type() may do multiple iterations until its cmpxchg() +succeeds. It invokes set_tlbflush_timestamp() on the first +iteration, however. Code inside the function takes care of this, but +- the assertion in _put_final_page_type() would trigger on the second + iteration if time stamps in a debug build are permitted to be + sufficiently much wider than the default 6 bits (see WRAP_MASK in + flushtlb.c), +- it returning -EINTR (for a continuation to be scheduled) would leave + the page inconsistent state (until the re-invocation completes). +Make the set_tlbflush_timestamp() invocation conditional, bypassing it +(for now) only in the case we really can't tolerate the stamp to be +stored. + +This is part of XSA-240. + +Signed-off-by: Jan Beulich <jbeulich@suse.com> +Reviewed-by: George Dunlap <george.dunlap@citrix.com> + +--- xen/arch/x86/mm.c.orig ++++ xen/arch/x86/mm.c +--- xen/arch/x86/mm.c.orig 2017-12-15 10:18:25.000000000 +0100 ++++ xen/arch/x86/mm.c 2017-12-15 10:20:53.000000000 +0100 +@@ -2494,29 +2494,20 @@ + break; + } + +- if ( ptpg && PGT_type_equal(x, ptpg->u.inuse.type_info) ) +- { +- /* +- * page_set_tlbflush_timestamp() accesses the same union +- * linear_pt_count lives in. Unvalidated page table pages, +- * however, should occur during domain destruction only +- * anyway. Updating of linear_pt_count luckily is not +- * necessary anymore for a dying domain. +- */ +- ASSERT(page_get_owner(page)->is_dying); +- ASSERT(page->linear_pt_count < 0); +- ASSERT(ptpg->linear_pt_count > 0); +- ptpg = NULL; +- } +- + /* + * Record TLB information for flush later. We do not stamp page + * tables when running in shadow mode: + * 1. Pointless, since it's the shadow pt's which must be tracked. + * 2. Shadow mode reuses this field for shadowed page tables to + * store flags info -- we don't want to conflict with that. ++ * Also page_set_tlbflush_timestamp() accesses the same union ++ * linear_pt_count lives in. Pages (including page table ones), ++ * however, don't need their flush time stamp set except when ++ * the last reference is being dropped. For page table pages ++ * this happens in _put_final_page_type(). + */ +- if ( !(shadow_mode_enabled(page_get_owner(page)) && ++ if ( (!ptpg || !PGT_type_equal(x, ptpg->u.inuse.type_info)) && ++ !(shadow_mode_enabled(page_get_owner(page)) && + (page->count_info & PGC_page_table)) ) + page->tlbflush_timestamp = tlbflush_current_time(); + } +From: Jan Beulich <jbeulich@suse.com> +Subject: x86: don't wrongly trigger linear page table assertion (2) + +_put_final_page_type(), when free_page_type() has exited early to allow +for preemption, should not update the time stamp, as the page continues +to retain the typ which is in the process of being unvalidated. I can't +see why the time stamp update was put on that path in the first place +(albeit it may well have been me who had put it there years ago). + +This is part of XSA-240. + +Signed-off-by: Jan Beulich <jbeulich@suse.com> +Reviewed-by: <George Dunlap <george.dunlap.com> + +--- xen/arch/x86/mm.c.orig 2017-12-15 10:20:53.000000000 +0100 ++++ xen/arch/x86/mm.c 2017-12-15 10:25:32.000000000 +0100 +@@ -2441,9 +2441,6 @@ + { + ASSERT((page->u.inuse.type_info & + (PGT_count_mask|PGT_validated|PGT_partial)) == 1); +- if ( !(shadow_mode_enabled(page_get_owner(page)) && +- (page->count_info & PGC_page_table)) ) +- page->tlbflush_timestamp = tlbflush_current_time(); + wmb(); + page->u.inuse.type_info |= PGT_validated; + } diff --git a/sysutils/xenkernel46/patches/patch-XSA241 b/sysutils/xenkernel46/patches/patch-XSA241 index 8a220e0b451..5b227a238e1 100644 --- a/sysutils/xenkernel46/patches/patch-XSA241 +++ b/sysutils/xenkernel46/patches/patch-XSA241 @@ -1,4 +1,4 @@ -$NetBSD: patch-XSA241,v 1.1 2017/10/17 10:57:34 bouyer Exp $ +$NetBSD: patch-XSA241,v 1.2 2017/12/15 14:00:44 bouyer Exp $ x86: don't store possibly stale TLB flush time stamp @@ -25,7 +25,7 @@ Reviewed-by: George Dunlap <george.dunlap@citrix.com> #include <asm/cpregs.h> --- xen/arch/x86/mm.c.orig +++ xen/arch/x86/mm.c -@@ -2524,7 +2524,7 @@ static int _put_final_page_type(struct p +@@ -2440,7 +2440,7 @@ static int _put_final_page_type(struct p */ if ( !(shadow_mode_enabled(page_get_owner(page)) && (page->count_info & PGC_page_table)) ) @@ -34,27 +34,9 @@ Reviewed-by: George Dunlap <george.dunlap@citrix.com> wmb(); page->u.inuse.type_info--; } -@@ -2534,7 +2534,7 @@ static int _put_final_page_type(struct p - (PGT_count_mask|PGT_validated|PGT_partial)) == 1); - if ( !(shadow_mode_enabled(page_get_owner(page)) && - (page->count_info & PGC_page_table)) ) -- page->tlbflush_timestamp = tlbflush_current_time(); -+ page_set_tlbflush_timestamp(page); - wmb(); - page->u.inuse.type_info |= PGT_validated; - } -@@ -2588,7 +2588,7 @@ static int _put_page_type(struct page_in - if ( ptpg && PGT_type_equal(x, ptpg->u.inuse.type_info) ) - { - /* -- * page_set_tlbflush_timestamp() accesses the same union -+ * set_tlbflush_timestamp() accesses the same union - * linear_pt_count lives in. Unvalidated page table pages, - * however, should occur during domain destruction only - * anyway. Updating of linear_pt_count luckily is not -@@ -2609,7 +2609,7 @@ static int _put_page_type(struct page_in - */ - if ( !(shadow_mode_enabled(page_get_owner(page)) && +@@ -2510,7 +2510,7 @@ + if ( (!ptpg || !PGT_type_equal(x, ptpg->u.inuse.type_info)) && + !(shadow_mode_enabled(page_get_owner(page)) && (page->count_info & PGC_page_table)) ) - page->tlbflush_timestamp = tlbflush_current_time(); + page_set_tlbflush_timestamp(page); diff --git a/sysutils/xenkernel46/patches/patch-XSA246 b/sysutils/xenkernel46/patches/patch-XSA246 new file mode 100644 index 00000000000..39cfd7ec120 --- /dev/null +++ b/sysutils/xenkernel46/patches/patch-XSA246 @@ -0,0 +1,76 @@ +$NetBSD: patch-XSA246,v 1.1 2017/12/15 14:00:44 bouyer Exp $ + +From: Julien Grall <julien.grall@linaro.org> +Subject: x86/pod: prevent infinite loop when shattering large pages + +When populating pages, the PoD may need to split large ones using +p2m_set_entry and request the caller to retry (see ept_get_entry for +instance). + +p2m_set_entry may fail to shatter if it is not possible to allocate +memory for the new page table. However, the error is not propagated +resulting to the callers to retry infinitely the PoD. + +Prevent the infinite loop by return false when it is not possible to +shatter the large mapping. + +This is XSA-246. + +Signed-off-by: Julien Grall <julien.grall@linaro.org> +Signed-off-by: Jan Beulich <jbeulich@suse.com> +Reviewed-by: George Dunlap <george.dunlap@citrix.com> + +--- xen/arch/x86/mm/p2m-pod.c.orig ++++ xen/arch/x86/mm/p2m-pod.c +@@ -1073,9 +1073,8 @@ p2m_pod_demand_populate(struct p2m_domai + * NOTE: In a fine-grained p2m locking scenario this operation + * may need to promote its locking from gfn->1g superpage + */ +- p2m_set_entry(p2m, gfn_aligned, _mfn(INVALID_MFN), PAGE_ORDER_2M, +- p2m_populate_on_demand, p2m->default_access); +- return 0; ++ return p2m_set_entry(p2m, gfn_aligned, _mfn(INVALID_MFN), PAGE_ORDER_2M, ++ p2m_populate_on_demand, p2m->default_access); + } + + /* Only reclaim if we're in actual need of more cache. */ +@@ -1106,8 +1105,12 @@ p2m_pod_demand_populate(struct p2m_domai + + gfn_aligned = (gfn >> order) << order; + +- p2m_set_entry(p2m, gfn_aligned, mfn, order, p2m_ram_rw, +- p2m->default_access); ++ if ( p2m_set_entry(p2m, gfn_aligned, mfn, order, p2m_ram_rw, ++ p2m->default_access) ) ++ { ++ p2m_pod_cache_add(p2m, p, order); ++ goto out_fail; ++ } + + for( i = 0; i < (1UL << order); i++ ) + { +@@ -1152,13 +1155,18 @@ remap_and_retry: + BUG_ON(order != PAGE_ORDER_2M); + pod_unlock(p2m); + +- /* Remap this 2-meg region in singleton chunks */ +- /* NOTE: In a p2m fine-grained lock scenario this might +- * need promoting the gfn lock from gfn->2M superpage */ ++ /* ++ * Remap this 2-meg region in singleton chunks. See the comment on the ++ * 1G page splitting path above for why a single call suffices. ++ * ++ * NOTE: In a p2m fine-grained lock scenario this might ++ * need promoting the gfn lock from gfn->2M superpage. ++ */ + gfn_aligned = (gfn>>order)<<order; +- for(i=0; i<(1<<order); i++) +- p2m_set_entry(p2m, gfn_aligned + i, _mfn(INVALID_MFN), PAGE_ORDER_4K, +- p2m_populate_on_demand, p2m->default_access); ++ if ( p2m_set_entry(p2m, gfn_aligned, _mfn(INVALID_MFN), PAGE_ORDER_4K, ++ p2m_populate_on_demand, p2m->default_access) ) ++ return -1; ++ + if ( tb_init_done ) + { + struct { diff --git a/sysutils/xenkernel46/patches/patch-XSA247 b/sysutils/xenkernel46/patches/patch-XSA247 new file mode 100644 index 00000000000..65103360865 --- /dev/null +++ b/sysutils/xenkernel46/patches/patch-XSA247 @@ -0,0 +1,286 @@ +$NetBSD: patch-XSA247,v 1.1 2017/12/15 14:00:44 bouyer Exp $ + +From 6208d2d761ca4cec3560322222532c4a5ba1b375 Mon Sep 17 00:00:00 2001 +From: George Dunlap <george.dunlap@citrix.com> +Date: Fri, 10 Nov 2017 16:53:54 +0000 +Subject: [PATCH 1/2] p2m: Always check to see if removing a p2m entry actually + worked + +The PoD zero-check functions speculatively remove memory from the p2m, +then check to see if it's completely zeroed, before putting it in the +cache. + +Unfortunately, the p2m_set_entry() calls may fail if the underlying +pagetable structure needs to change and the domain has exhausted its +p2m memory pool: for instance, if we're removing a 2MiB region out of +a 1GiB entry (in the p2m_pod_zero_check_superpage() case), or a 4k +region out of a 2MiB or larger entry (in the p2m_pod_zero_check() +case); and the return value is not checked. + +The underlying mfn will then be added into the PoD cache, and at some +point mapped into another location in the p2m. If the guest +afterwards ballons out this memory, it will be freed to the hypervisor +and potentially reused by another domain, in spite of the fact that +the original domain still has writable mappings to it. + +There are several places where p2m_set_entry() shouldn't be able to +fail, as it is guaranteed to write an entry of the same order that +succeeded before. Add a backstop of crashing the domain just in case, +and an ASSERT_UNREACHABLE() to flag up the broken assumption on debug +builds. + +While we're here, use PAGE_ORDER_2M rather than a magic constant. + +This is part of XSA-247. + +Reported-by: George Dunlap <george.dunlap.com> +Signed-off-by: George Dunlap <george.dunlap@citrix.com> +Reviewed-by: Jan Beulich <jbeulich@suse.com> +--- +v4: +- Removed some training whitespace +v3: +- Reformat reset clause to be more compact +- Make sure to set map[i] = NULL when unmapping in case we need to bail +v2: +- Crash a domain if a p2m_set_entry we think cannot fail fails anyway. +--- + xen/arch/x86/mm/p2m-pod.c | 76 +++++++++++++++++++++++++++++++++++++---------- + 1 file changed, 60 insertions(+), 16 deletions(-) + +diff --git a/xen/arch/x86/mm/p2m-pod.c b/xen/arch/x86/mm/p2m-pod.c +index 519b80cc3d..b1f0abe02d 100644 +--- xen/arch/x86/mm/p2m-pod.c.orig ++++ xen/arch/x86/mm/p2m-pod.c +@@ -729,8 +729,9 @@ p2m_pod_zero_check_superpage(struct p2m_domain *p2m, unsigned long gfn) + } + + /* Try to remove the page, restoring old mapping if it fails. */ +- p2m_set_entry(p2m, gfn, _mfn(INVALID_MFN), PAGE_ORDER_2M, +- p2m_populate_on_demand, p2m->default_access); ++ if ( p2m_set_entry(p2m, gfn, _mfn(INVALID_MFN), PAGE_ORDER_2M, ++ p2m_populate_on_demand, p2m->default_access) ) ++ goto out; + + /* Make none of the MFNs are used elsewhere... for example, mapped + * via the grant table interface, or by qemu. Allow one refcount for +@@ -786,9 +787,18 @@ p2m_pod_zero_check_superpage(struct p2m_domain *p2m, unsigned long gfn) + ret = SUPERPAGE_PAGES; + + out_reset: +- if ( reset ) +- p2m_set_entry(p2m, gfn, mfn0, 9, type0, p2m->default_access); +- ++ /* ++ * This p2m_set_entry() call shouldn't be able to fail, since the same order ++ * on the same gfn succeeded above. If that turns out to be false, crashing ++ * the domain should be the safest way of making sure we don't leak memory. ++ */ ++ if ( reset && p2m_set_entry(p2m, gfn, mfn0, PAGE_ORDER_2M, ++ type0, p2m->default_access) ) ++ { ++ ASSERT_UNREACHABLE(); ++ domain_crash(d); ++ } ++ + out: + gfn_unlock(p2m, gfn, SUPERPAGE_ORDER); + return ret; +@@ -845,19 +855,30 @@ p2m_pod_zero_check(struct p2m_domain *p2m, unsigned long *gfns, int count) + } + + /* Try to remove the page, restoring old mapping if it fails. */ +- p2m_set_entry(p2m, gfns[i], _mfn(INVALID_MFN), PAGE_ORDER_4K, +- p2m_populate_on_demand, p2m->default_access); ++ if ( p2m_set_entry(p2m, gfns[i], _mfn(INVALID_MFN), PAGE_ORDER_4K, ++ p2m_populate_on_demand, p2m->default_access) ) ++ goto skip; + + /* See if the page was successfully unmapped. (Allow one refcount + * for being allocated to a domain.) */ + if ( (mfn_to_page(mfns[i])->count_info & PGC_count_mask) > 1 ) + { ++ /* ++ * If the previous p2m_set_entry call succeeded, this one shouldn't ++ * be able to fail. If it does, crashing the domain should be safe. ++ */ ++ if ( p2m_set_entry(p2m, gfns[i], mfns[i], PAGE_ORDER_4K, ++ types[i], p2m->default_access) ) ++ { ++ ASSERT_UNREACHABLE(); ++ domain_crash(d); ++ goto out_unmap; ++ } ++ ++ skip: + unmap_domain_page(map[i]); + map[i] = NULL; + +- p2m_set_entry(p2m, gfns[i], mfns[i], PAGE_ORDER_4K, +- types[i], p2m->default_access); +- + continue; + } + } +@@ -874,12 +895,25 @@ p2m_pod_zero_check(struct p2m_domain *p2m, unsigned long *gfns, int count) + + unmap_domain_page(map[i]); + +- /* See comment in p2m_pod_zero_check_superpage() re gnttab +- * check timing. */ +- if ( j < PAGE_SIZE/sizeof(*map[i]) ) ++ map[i] = NULL; ++ ++ /* ++ * See comment in p2m_pod_zero_check_superpage() re gnttab ++ * check timing. ++ */ ++ if ( j < (PAGE_SIZE / sizeof(*map[i])) ) + { +- p2m_set_entry(p2m, gfns[i], mfns[i], PAGE_ORDER_4K, +- types[i], p2m->default_access); ++ /* ++ * If the previous p2m_set_entry call succeeded, this one shouldn't ++ * be able to fail. If it does, crashing the domain should be safe. ++ */ ++ if ( p2m_set_entry(p2m, gfns[i], mfns[i], PAGE_ORDER_4K, ++ types[i], p2m->default_access) ) ++ { ++ ASSERT_UNREACHABLE(); ++ domain_crash(d); ++ goto out_unmap; ++ } + } + else + { +@@ -903,7 +937,17 @@ p2m_pod_zero_check(struct p2m_domain *p2m, unsigned long *gfns, int count) + p2m->pod.entry_count++; + } + } +- ++ ++ return; ++ ++out_unmap: ++ /* ++ * Something went wrong, probably crashing the domain. Unmap ++ * everything and return. ++ */ ++ for ( i = 0; i < count; i++ ) ++ if ( map[i] ) ++ unmap_domain_page(map[i]); + } + + #define POD_SWEEP_LIMIT 1024 +-- +2.15.0 + +From d65a029d34e3d6157c87ac343dc8eefa1b12818e Mon Sep 17 00:00:00 2001 +From: George Dunlap <george.dunlap@citrix.com> +Date: Fri, 10 Nov 2017 16:53:55 +0000 +Subject: [PATCH 2/2] p2m: Check return value of p2m_set_entry() when + decreasing reservation + +If the entire range specified to p2m_pod_decrease_reservation() is marked +populate-on-demand, then it will make a single p2m_set_entry() call, +reducing its PoD entry count. + +Unfortunately, in the right circumstances, this p2m_set_entry() call +may fail. It that case, repeated calls to decrease_reservation() may +cause p2m->pod.entry_count to fall below zero, potentially tripping +over BUG_ON()s to the contrary. + +Instead, check to see if the entry succeeded, and return false if not. +The caller will then call guest_remove_page() on the gfns, which will +return -EINVAL upon finding no valid memory there to return. + +Unfortunately if the order > 0, the entry may have partially changed. +A domain_crash() is probably the safest thing in that case. + +Other p2m_set_entry() calls in the same function should be fine, +because they are writing the entry at its current order. Nonetheless, +check the return value and crash if our assumption turns otu to be +wrong. + +This is part of XSA-247. + +Reported-by: George Dunlap <george.dunlap.com> +Signed-off-by: George Dunlap <george.dunlap@citrix.com> +Reviewed-by: Jan Beulich <jbeulich@suse.com> +--- +v2: Crash the domain if we're not sure it's safe (or if we think it +can't happen) +--- + xen/arch/x86/mm/p2m-pod.c | 42 +++++++++++++++++++++++++++++++++--------- + 1 file changed, 33 insertions(+), 9 deletions(-) + +diff --git a/xen/arch/x86/mm/p2m-pod.c b/xen/arch/x86/mm/p2m-pod.c +index b1f0abe02d..9324f16c91 100644 +--- xen/arch/x86/mm/p2m-pod.c.orig ++++ xen/arch/x86/mm/p2m-pod.c +@@ -559,11 +559,23 @@ recount: + + if ( !nonpod ) + { +- /* All PoD: Mark the whole region invalid and tell caller +- * we're done. */ +- p2m_set_entry(p2m, gpfn, _mfn(INVALID_MFN), order, p2m_invalid, +- p2m->default_access); +- p2m->pod.entry_count-=(1<<order); ++ /* ++ * All PoD: Mark the whole region invalid and tell caller ++ * we're done. ++ */ ++ if ( p2m_set_entry(p2m, gpfn, _mfn(INVALID_MFN), order, p2m_invalid, ++ p2m->default_access) ) ++ { ++ /* ++ * If this fails, we can't tell how much of the range was changed. ++ * Best to crash the domain unless we're sure a partial change is ++ * impossible. ++ */ ++ if ( order != 0 ) ++ domain_crash(d); ++ goto out_unlock; ++ } ++ p2m->pod.entry_count -= 1UL << order; + BUG_ON(p2m->pod.entry_count < 0); + ret = 1; + goto out_entry_check; +@@ -595,8 +607,14 @@ recount: + mfn = p2m->get_entry(p2m, gpfn + i, &t, &a, 0, NULL, NULL); + if ( t == p2m_populate_on_demand ) + { +- p2m_set_entry(p2m, gpfn + i, _mfn(INVALID_MFN), 0, p2m_invalid, +- p2m->default_access); ++ /* This shouldn't be able to fail */ ++ if ( p2m_set_entry(p2m, gpfn + i, _mfn(INVALID_MFN), 0, ++ p2m_invalid, p2m->default_access) ) ++ { ++ ASSERT_UNREACHABLE(); ++ domain_crash(d); ++ goto out_unlock; ++ } + p2m->pod.entry_count--; + BUG_ON(p2m->pod.entry_count < 0); + pod--; +@@ -609,8 +627,14 @@ recount: + + page = mfn_to_page(mfn); + +- p2m_set_entry(p2m, gpfn + i, _mfn(INVALID_MFN), 0, p2m_invalid, +- p2m->default_access); ++ /* This shouldn't be able to fail */ ++ if ( p2m_set_entry(p2m, gpfn + i, _mfn(INVALID_MFN), 0, ++ p2m_invalid, p2m->default_access) ) ++ { ++ ASSERT_UNREACHABLE(); ++ domain_crash(d); ++ goto out_unlock; ++ } + set_gpfn_from_mfn(mfn_x(mfn), INVALID_M2P_ENTRY); + + p2m_pod_cache_add(p2m, page, 0); +-- +2.15.0 + diff --git a/sysutils/xenkernel46/patches/patch-XSA248 b/sysutils/xenkernel46/patches/patch-XSA248 new file mode 100644 index 00000000000..87655c6d29b --- /dev/null +++ b/sysutils/xenkernel46/patches/patch-XSA248 @@ -0,0 +1,164 @@ +$NetBSD: patch-XSA248,v 1.1 2017/12/15 14:00:44 bouyer Exp $ + +From: Jan Beulich <jbeulich@suse.com> +Subject: x86/mm: don't wrongly set page ownership + +PV domains can obtain mappings of any pages owned by the correct domain, +including ones that aren't actually assigned as "normal" RAM, but used +by Xen internally. At the moment such "internal" pages marked as owned +by a guest include pages used to track logdirty bits, as well as p2m +pages and the "unpaged pagetable" for HVM guests. Since the PV memory +management and shadow code conflict in their use of struct page_info +fields, and since shadow code is being used for log-dirty handling for +PV domains, pages coming from the shadow pool must, for PV domains, not +have the domain set as their owner. + +While the change could be done conditionally for just the PV case in +shadow code, do it unconditionally (and for consistency also for HAP), +just to be on the safe side. + +There's one special case though for shadow code: The page table used for +running a HVM guest in unpaged mode is subject to get_page() (in +set_shadow_status()) and hence must have its owner set. + +This is XSA-248. + +Signed-off-by: Jan Beulich <jbeulich@suse.com> +Reviewed-by: Tim Deegan <tim@xen.org> +Reviewed-by: George Dunlap <george.dunlap@citrix.com> + +--- xen/arch/x86/mm/hap/hap.c.orig ++++ xen/arch/x86/mm/hap/hap.c +@@ -283,8 +283,7 @@ static struct page_info *hap_alloc_p2m_p + { + d->arch.paging.hap.total_pages--; + d->arch.paging.hap.p2m_pages++; +- page_set_owner(pg, d); +- pg->count_info |= 1; ++ ASSERT(!page_get_owner(pg) && !(pg->count_info & PGC_count_mask)); + } + else if ( !d->arch.paging.p2m_alloc_failed ) + { +@@ -299,21 +298,23 @@ static struct page_info *hap_alloc_p2m_p + + static void hap_free_p2m_page(struct domain *d, struct page_info *pg) + { ++ struct domain *owner = page_get_owner(pg); ++ + /* This is called both from the p2m code (which never holds the + * paging lock) and the log-dirty code (which always does). */ + paging_lock_recursive(d); + +- ASSERT(page_get_owner(pg) == d); +- /* Should have just the one ref we gave it in alloc_p2m_page() */ +- if ( (pg->count_info & PGC_count_mask) != 1 ) { +- HAP_ERROR("Odd p2m page %p count c=%#lx t=%"PRtype_info"\n", +- pg, pg->count_info, pg->u.inuse.type_info); ++ /* Should still have no owner and count zero. */ ++ if ( owner || (pg->count_info & PGC_count_mask) ) ++ { ++ HAP_ERROR("d%d: Odd p2m page %"PRI_mfn" d=%d c=%lx t=%"PRtype_info"\n", ++ d->domain_id, mfn_x(page_to_mfn(pg)), ++ owner ? owner->domain_id : DOMID_INVALID, ++ pg->count_info, pg->u.inuse.type_info); + WARN(); ++ pg->count_info &= ~PGC_count_mask; ++ page_set_owner(pg, NULL); + } +- pg->count_info &= ~PGC_count_mask; +- /* Free should not decrement domain's total allocation, since +- * these pages were allocated without an owner. */ +- page_set_owner(pg, NULL); + d->arch.paging.hap.p2m_pages--; + d->arch.paging.hap.total_pages++; + hap_free(d, page_to_mfn(pg)); +--- xen/arch/x86/mm/shadow/common.c.orig ++++ xen/arch/x86/mm/shadow/common.c +@@ -1573,32 +1573,29 @@ shadow_alloc_p2m_page(struct domain *d) + pg = mfn_to_page(shadow_alloc(d, SH_type_p2m_table, 0)); + d->arch.paging.shadow.p2m_pages++; + d->arch.paging.shadow.total_pages--; ++ ASSERT(!page_get_owner(pg) && !(pg->count_info & PGC_count_mask)); + + paging_unlock(d); + +- /* Unlike shadow pages, mark p2m pages as owned by the domain. +- * Marking the domain as the owner would normally allow the guest to +- * create mappings of these pages, but these p2m pages will never be +- * in the domain's guest-physical address space, and so that is not +- * believed to be a concern. */ +- page_set_owner(pg, d); +- pg->count_info |= 1; + return pg; + } + + static void + shadow_free_p2m_page(struct domain *d, struct page_info *pg) + { +- ASSERT(page_get_owner(pg) == d); +- /* Should have just the one ref we gave it in alloc_p2m_page() */ +- if ( (pg->count_info & PGC_count_mask) != 1 ) ++ struct domain *owner = page_get_owner(pg); ++ ++ /* Should still have no owner and count zero. */ ++ if ( owner || (pg->count_info & PGC_count_mask) ) + { +- SHADOW_ERROR("Odd p2m page count c=%#lx t=%"PRtype_info"\n", ++ SHADOW_ERROR("d%d: Odd p2m page %"PRI_mfn" d=%d c=%lx t=%"PRtype_info"\n", ++ d->domain_id, mfn_x(page_to_mfn(pg)), ++ owner ? owner->domain_id : DOMID_INVALID, + pg->count_info, pg->u.inuse.type_info); ++ pg->count_info &= ~PGC_count_mask; ++ page_set_owner(pg, NULL); + } +- pg->count_info &= ~PGC_count_mask; + pg->u.sh.type = SH_type_p2m_table; /* p2m code reuses type-info */ +- page_set_owner(pg, NULL); + + /* This is called both from the p2m code (which never holds the + * paging lock) and the log-dirty code (which always does). */ +@@ -3216,7 +3213,9 @@ int shadow_enable(struct domain *d, u32 + | _PAGE_PRESENT | _PAGE_RW | _PAGE_USER + | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE); + unmap_domain_page(e); ++ pg->count_info = 1; + pg->u.inuse.type_info = PGT_l2_page_table | 1 | PGT_validated; ++ page_set_owner(pg, d); + } + + paging_lock(d); +@@ -3254,7 +3253,11 @@ int shadow_enable(struct domain *d, u32 + if ( rv != 0 && !pagetable_is_null(p2m_get_pagetable(p2m)) ) + p2m_teardown(p2m); + if ( rv != 0 && pg != NULL ) ++ { ++ pg->count_info &= ~PGC_count_mask; ++ page_set_owner(pg, NULL); + shadow_free_p2m_page(d, pg); ++ } + domain_unpause(d); + return rv; + } +@@ -3363,7 +3366,22 @@ out: + + /* Must be called outside the lock */ + if ( unpaged_pagetable ) ++ { ++ if ( page_get_owner(unpaged_pagetable) == d && ++ (unpaged_pagetable->count_info & PGC_count_mask) == 1 ) ++ { ++ unpaged_pagetable->count_info &= ~PGC_count_mask; ++ page_set_owner(unpaged_pagetable, NULL); ++ } ++ /* Complain here in cases where shadow_free_p2m_page() won't. */ ++ else if ( !page_get_owner(unpaged_pagetable) && ++ !(unpaged_pagetable->count_info & PGC_count_mask) ) ++ SHADOW_ERROR("d%d: Odd unpaged pt %"PRI_mfn" c=%lx t=%"PRtype_info"\n", ++ d->domain_id, mfn_x(page_to_mfn(unpaged_pagetable)), ++ unpaged_pagetable->count_info, ++ unpaged_pagetable->u.inuse.type_info); + shadow_free_p2m_page(d, unpaged_pagetable); ++ } + } + + void shadow_final_teardown(struct domain *d) diff --git a/sysutils/xenkernel46/patches/patch-XSA249 b/sysutils/xenkernel46/patches/patch-XSA249 new file mode 100644 index 00000000000..462a4b38abd --- /dev/null +++ b/sysutils/xenkernel46/patches/patch-XSA249 @@ -0,0 +1,44 @@ +$NetBSD: patch-XSA249,v 1.1 2017/12/15 14:00:44 bouyer Exp $ + +From: Jan Beulich <jbeulich@suse.com> +Subject: x86/shadow: fix refcount overflow check + +Commit c385d27079 ("x86 shadow: for multi-page shadows, explicitly track +the first page") reduced the refcount width to 25, without adjusting the +overflow check. Eliminate the disconnect by using a manifest constant. + +Interestingly, up to commit 047782fa01 ("Out-of-sync L1 shadows: OOS +snapshot") the refcount was 27 bits wide, yet the check was already +using 26. + +This is XSA-249. + +Signed-off-by: Jan Beulich <jbeulich@suse.com> +Reviewed-by: George Dunlap <george.dunlap@citrix.com> +Reviewed-by: Tim Deegan <tim@xen.org> +--- +v2: Simplify expression back to the style it was. + +--- xen/arch/x86/mm/shadow/private.h.orig ++++ xen/arch/x86/mm/shadow/private.h +@@ -529,7 +529,7 @@ static inline int sh_get_ref(struct doma + x = sp->u.sh.count; + nx = x + 1; + +- if ( unlikely(nx >= 1U<<26) ) ++ if ( unlikely(nx >= (1U << PAGE_SH_REFCOUNT_WIDTH)) ) + { + SHADOW_PRINTK("shadow ref overflow, gmfn=%lx smfn=%lx\n", + __backpointer(sp), mfn_x(smfn)); +--- xen/include/asm-x86/mm.h.orig ++++ xen/include/asm-x86/mm.h +@@ -82,7 +82,8 @@ struct page_info + unsigned long type:5; /* What kind of shadow is this? */ + unsigned long pinned:1; /* Is the shadow pinned? */ + unsigned long head:1; /* Is this the first page of the shadow? */ +- unsigned long count:25; /* Reference count */ ++#define PAGE_SH_REFCOUNT_WIDTH 25 ++ unsigned long count:PAGE_SH_REFCOUNT_WIDTH; /* Reference count */ + } sh; + + /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */ diff --git a/sysutils/xenkernel46/patches/patch-XSA250 b/sysutils/xenkernel46/patches/patch-XSA250 new file mode 100644 index 00000000000..1f31945f4fb --- /dev/null +++ b/sysutils/xenkernel46/patches/patch-XSA250 @@ -0,0 +1,69 @@ +$NetBSD: patch-XSA250,v 1.1 2017/12/15 14:00:44 bouyer Exp $ + +From: Jan Beulich <jbeulich@suse.com> +Subject: x86/shadow: fix ref-counting error handling + +The old-Linux handling in shadow_set_l4e() mistakenly ORed together the +results of sh_get_ref() and sh_pin(). As the latter failing is not a +correctness problem, simply ignore its return value. + +In sh_set_toplevel_shadow() a failing sh_get_ref() must not be +accompanied by installing the entry, despite the domain being crashed. + +This is XSA-250. + +Signed-off-by: Jan Beulich <jbeulich@suse.com> +Reviewed-by: Tim Deegan <tim@xen.org> + +--- xen/arch/x86/mm/shadow/multi.c.orig ++++ xen/arch/x86/mm/shadow/multi.c +@@ -923,7 +923,7 @@ static int shadow_set_l4e(struct domain + shadow_l4e_t new_sl4e, + mfn_t sl4mfn) + { +- int flags = 0, ok; ++ int flags = 0; + shadow_l4e_t old_sl4e; + paddr_t paddr; + ASSERT(sl4e != NULL); +@@ -938,15 +938,16 @@ static int shadow_set_l4e(struct domain + { + /* About to install a new reference */ + mfn_t sl3mfn = shadow_l4e_get_mfn(new_sl4e); +- ok = sh_get_ref(d, sl3mfn, paddr); +- /* Are we pinning l3 shadows to handle wierd linux behaviour? */ +- if ( sh_type_is_pinnable(d, SH_type_l3_64_shadow) ) +- ok |= sh_pin(d, sl3mfn); +- if ( !ok ) ++ ++ if ( !sh_get_ref(d, sl3mfn, paddr) ) + { + domain_crash(d); + return SHADOW_SET_ERROR; + } ++ ++ /* Are we pinning l3 shadows to handle weird Linux behaviour? */ ++ if ( sh_type_is_pinnable(d, SH_type_l3_64_shadow) ) ++ sh_pin(d, sl3mfn); + } + + /* Write the new entry */ +@@ -3965,14 +3966,15 @@ sh_set_toplevel_shadow(struct vcpu *v, + + /* Take a ref to this page: it will be released in sh_detach_old_tables() + * or the next call to set_toplevel_shadow() */ +- if ( !sh_get_ref(d, smfn, 0) ) ++ if ( sh_get_ref(d, smfn, 0) ) ++ new_entry = pagetable_from_mfn(smfn); ++ else + { + SHADOW_ERROR("can't install %#lx as toplevel shadow\n", mfn_x(smfn)); + domain_crash(d); ++ new_entry = pagetable_null(); + } + +- new_entry = pagetable_from_mfn(smfn); +- + install_new_entry: + /* Done. Install it */ + SHADOW_PRINTK("%u/%u [%u] gmfn %#"PRI_mfn" smfn %#"PRI_mfn"\n", diff --git a/sysutils/xenkernel46/patches/patch-XSA251 b/sysutils/xenkernel46/patches/patch-XSA251 new file mode 100644 index 00000000000..9eeb71f942a --- /dev/null +++ b/sysutils/xenkernel46/patches/patch-XSA251 @@ -0,0 +1,23 @@ +$NetBSD: patch-XSA251,v 1.1 2017/12/15 14:00:44 bouyer Exp $ + +From: Jan Beulich <jbeulich@suse.com> +Subject: x86/paging: don't unconditionally BUG() on finding SHARED_M2P_ENTRY + +PV guests can fully control the values written into the P2M. + +This is XSA-251. + +Signed-off-by: Jan Beulich <jbeulich@suse.com> +Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> + +--- xen/arch/x86/mm/paging.c.orig ++++ xen/arch/x86/mm/paging.c +@@ -276,7 +276,7 @@ void paging_mark_pfn_dirty(struct domain + return; + + /* Shared MFNs should NEVER be marked dirty */ +- BUG_ON(SHARED_M2P(pfn)); ++ BUG_ON(paging_mode_translate(d) && SHARED_M2P(pfn)); + + /* + * Values with the MSB set denote MFNs that aren't really part of the diff --git a/sysutils/xenkernel48/Makefile b/sysutils/xenkernel48/Makefile index 8d561caaee4..ebf064fc9cf 100644 --- a/sysutils/xenkernel48/Makefile +++ b/sysutils/xenkernel48/Makefile @@ -1,9 +1,9 @@ -# $NetBSD: Makefile,v 1.8 2017/10/17 11:10:36 bouyer Exp $ +# $NetBSD: Makefile,v 1.9 2017/12/15 14:02:15 bouyer Exp $ VERSION= 4.8.2 DISTNAME= xen-${VERSION} PKGNAME= xenkernel48-${VERSION} -PKGREVISION= 1 +PKGREVISION= 2 CATEGORIES= sysutils MASTER_SITES= https://downloads.xenproject.org/release/xen/${VERSION}/ DIST_SUBDIR= xen48 diff --git a/sysutils/xenkernel48/distinfo b/sysutils/xenkernel48/distinfo index 08f09502550..1f71e6463a9 100644 --- a/sysutils/xenkernel48/distinfo +++ b/sysutils/xenkernel48/distinfo @@ -1,4 +1,4 @@ -$NetBSD: distinfo,v 1.3 2017/10/17 08:42:30 bouyer Exp $ +$NetBSD: distinfo,v 1.4 2017/12/15 14:02:15 bouyer Exp $ SHA1 (xen48/xen-4.8.2.tar.gz) = 184c57ce9e71e34b3cbdd318524021f44946efbe RMD160 (xen48/xen-4.8.2.tar.gz) = f4126cb0f7ff427ed7d20ce399dcd1077c599343 @@ -11,11 +11,17 @@ SHA1 (patch-XSA234) = acf4170a410d9f314c0cc0c5c092db6bb6cc69a0 SHA1 (patch-XSA237) = 3125554b155bd650480934a37d89d1a7471dfb20 SHA1 (patch-XSA238) = 58b6fcb73d314d7f06256ed3769210e49197aa90 SHA1 (patch-XSA239) = 10619718e8a1536a7f52eb3838cdb490e6ba8c97 -SHA1 (patch-XSA240) = dca90d33d30167edbe07071795f18159e3e20c57 -SHA1 (patch-XSA241) = b506425ca7382190435df6f96800cb0a24aff23e -SHA1 (patch-XSA242) = afff314771d78ee2482aec3b7693c12bfe00e0ec +SHA1 (patch-XSA240) = 77b398914ca79da6cd6abf34674d5476b6d3bcba +SHA1 (patch-XSA241) = 351395135fcd30b7ba35e84a64bf6348214d4fa6 +SHA1 (patch-XSA242) = 77e224f927818adb77b8ef10329fd886ece62835 SHA1 (patch-XSA243) = 75eef49628bc0b3bd4fe8b023cb2da75928103a7 SHA1 (patch-XSA244) = 2739ff8a920630088853a9076f71ca2caf639320 +SHA1 (patch-XSA246) = b48433ee2213340d1bd3c810ea3e5c6de7890fd7 +SHA1 (patch-XSA247) = b92c4a7528ebd121ba2700610589df6fff40cbbf +SHA1 (patch-XSA248) = d5787fa7fc48449ca90200811b66cb6278c750aa +SHA1 (patch-XSA249) = 7037a35f37eb866f16fe90482e66d0eca95944c4 +SHA1 (patch-XSA250) = 25ab2e8c67ebe2b40cf073197c17f1625f5581f6 +SHA1 (patch-XSA251) = dc0786c85bcfbdd3f7a1c97a3af32c10deea8276 SHA1 (patch-xen_Makefile) = be3f4577a205b23187b91319f91c50720919f70b SHA1 (patch-xen_Rules.mk) = 5f33a667bae67c85d997a968c0f8b014b707d13c SHA1 (patch-xen_arch_x86_Rules.mk) = e2d148fb308c37c047ca41a678471217b6166977 diff --git a/sysutils/xenkernel48/patches/patch-XSA240 b/sysutils/xenkernel48/patches/patch-XSA240 index c3c153fbd2a..8bc1f97215f 100644 --- a/sysutils/xenkernel48/patches/patch-XSA240 +++ b/sysutils/xenkernel48/patches/patch-XSA240 @@ -1,4 +1,4 @@ -$NetBSD: patch-XSA240,v 1.1 2017/10/17 08:42:30 bouyer Exp $ +$NetBSD: patch-XSA240,v 1.2 2017/12/15 14:02:15 bouyer Exp $ From 2315b8c651e0cc31c9153d09c9912b8fbe632ad2 Mon Sep 17 00:00:00 2001 From: Jan Beulich <jbeulich@suse.com> @@ -532,7 +532,7 @@ index 54acc60723..ffa66eb146 100644 +### pv-linear-pt +> `= <boolean>` + -+> Default: `true` ++> Default: `false` + +Allow PV guests to have pagetable entries pointing to other pagetables +of the same level (i.e., allowing L2 PTEs to point to other L2 pages). @@ -540,9 +540,9 @@ index 54acc60723..ffa66eb146 100644 +used to allow operating systems a simple way to consistently map the +current process's pagetables into its own virtual address space. + -+None of the most common PV operating systems (Linux, MiniOS) -+use this technique, but NetBSD in PV mode, and maybe custom operating -+systems which do. ++None of the most common PV operating systems (Linux, NetBSD, MiniOS) ++use this technique, but there may be custom operating systems which ++do. ### reboot > `= t[riple] | k[bd] | a[cpi] | p[ci] | P[ower] | e[fi] | n[o] [, [w]arm | [c]old]` @@ -576,3 +576,90 @@ index 31d4a03840..5d125cff3a 100644 -- 2.14.1 +From: Jan Beulich <jbeulich@suse.com> +Subject: x86: don't wrongly trigger linear page table assertion + +_put_page_type() may do multiple iterations until its cmpxchg() +succeeds. It invokes set_tlbflush_timestamp() on the first +iteration, however. Code inside the function takes care of this, but +- the assertion in _put_final_page_type() would trigger on the second + iteration if time stamps in a debug build are permitted to be + sufficiently much wider than the default 6 bits (see WRAP_MASK in + flushtlb.c), +- it returning -EINTR (for a continuation to be scheduled) would leave + the page inconsistent state (until the re-invocation completes). +Make the set_tlbflush_timestamp() invocation conditional, bypassing it +(for now) only in the case we really can't tolerate the stamp to be +stored. + +This is part of XSA-240. + +Signed-off-by: Jan Beulich <jbeulich@suse.com> +Reviewed-by: George Dunlap <george.dunlap@citrix.com> + +--- xen/arch/x86/mm.c.orig ++++ xen/arch/x86/mm.c +@@ -2561,30 +2561,21 @@ + break; + } + +- if ( ptpg && PGT_type_equal(x, ptpg->u.inuse.type_info) ) +- { +- /* +- * page_set_tlbflush_timestamp() accesses the same union +- * linear_pt_count lives in. Unvalidated page table pages, +- * however, should occur during domain destruction only +- * anyway. Updating of linear_pt_count luckily is not +- * necessary anymore for a dying domain. +- */ +- ASSERT(page_get_owner(page)->is_dying); +- ASSERT(page->linear_pt_count < 0); +- ASSERT(ptpg->linear_pt_count > 0); +- ptpg = NULL; +- } +- + /* + * Record TLB information for flush later. We do not stamp page + * tables when running in shadow mode: + * 1. Pointless, since it's the shadow pt's which must be tracked. + * 2. Shadow mode reuses this field for shadowed page tables to + * store flags info -- we don't want to conflict with that. ++ * Also page_set_tlbflush_timestamp() accesses the same union ++ * linear_pt_count lives in. Pages (including page table ones), ++ * however, don't need their flush time stamp set except when ++ * the last reference is being dropped. For page table pages ++ * this happens in _put_final_page_type(). + */ +- if ( !(shadow_mode_enabled(page_get_owner(page)) && ++ if ( (!ptpg || !PGT_type_equal(x, ptpg->u.inuse.type_info)) && ++ !(shadow_mode_enabled(page_get_owner(page)) && + (page->count_info & PGC_page_table)) ) + page->tlbflush_timestamp = tlbflush_current_time(); + } + +From: Jan Beulich <jbeulich@suse.com> +Subject: x86: don't wrongly trigger linear page table assertion (2) + +_put_final_page_type(), when free_page_type() has exited early to allow +for preemption, should not update the time stamp, as the page continues +to retain the typ which is in the process of being unvalidated. I can't +see why the time stamp update was put on that path in the first place +(albeit it may well have been me who had put it there years ago). + +This is part of XSA-240. + +Signed-off-by: Jan Beulich <jbeulich@suse.com> +Reviewed-by: <George Dunlap <george.dunlap.com> + +--- xen/arch/x86/mm.c.orig ++++ xen/arch/x86/mm.c +@@ -2560,9 +2560,6 @@ static int _put_final_page_type(struct p + { + ASSERT((page->u.inuse.type_info & + (PGT_count_mask|PGT_validated|PGT_partial)) == 1); +- if ( !(shadow_mode_enabled(page_get_owner(page)) && +- (page->count_info & PGC_page_table)) ) +- page->tlbflush_timestamp = tlbflush_current_time(); + wmb(); + page->u.inuse.type_info |= PGT_validated; + } diff --git a/sysutils/xenkernel48/patches/patch-XSA241 b/sysutils/xenkernel48/patches/patch-XSA241 index 5afdee4a48b..840b744fa43 100644 --- a/sysutils/xenkernel48/patches/patch-XSA241 +++ b/sysutils/xenkernel48/patches/patch-XSA241 @@ -1,4 +1,4 @@ -$NetBSD: patch-XSA241,v 1.1 2017/10/17 08:42:30 bouyer Exp $ +$NetBSD: patch-XSA241,v 1.2 2017/12/15 14:02:15 bouyer Exp $ x86: don't store possibly stale TLB flush time stamp @@ -23,9 +23,9 @@ Reviewed-by: George Dunlap <george.dunlap@citrix.com> #include <asm/system.h> #include <asm/smp.h> #include <asm/cpregs.h> ---- xen/arch/x86/mm.c.orig -+++ xen/arch/x86/mm.c -@@ -2524,7 +2524,7 @@ static int _put_final_page_type(struct p +--- xen/arch/x86/mm.c.orig 2017-12-15 14:29:51.000000000 +0100 ++++ xen/arch/x86/mm.c 2017-12-15 14:30:10.000000000 +0100 +@@ -2500,7 +2500,7 @@ */ if ( !(shadow_mode_enabled(page_get_owner(page)) && (page->count_info & PGC_page_table)) ) @@ -34,27 +34,9 @@ Reviewed-by: George Dunlap <george.dunlap@citrix.com> wmb(); page->u.inuse.type_info--; } -@@ -2534,7 +2534,7 @@ static int _put_final_page_type(struct p - (PGT_count_mask|PGT_validated|PGT_partial)) == 1); - if ( !(shadow_mode_enabled(page_get_owner(page)) && - (page->count_info & PGC_page_table)) ) -- page->tlbflush_timestamp = tlbflush_current_time(); -+ page_set_tlbflush_timestamp(page); - wmb(); - page->u.inuse.type_info |= PGT_validated; - } -@@ -2588,7 +2588,7 @@ static int _put_page_type(struct page_in - if ( ptpg && PGT_type_equal(x, ptpg->u.inuse.type_info) ) - { - /* -- * page_set_tlbflush_timestamp() accesses the same union -+ * set_tlbflush_timestamp() accesses the same union - * linear_pt_count lives in. Unvalidated page table pages, - * however, should occur during domain destruction only - * anyway. Updating of linear_pt_count luckily is not -@@ -2609,7 +2609,7 @@ static int _put_page_type(struct page_in - */ - if ( !(shadow_mode_enabled(page_get_owner(page)) && +@@ -2573,7 +2573,7 @@ + if ( (!ptpg || !PGT_type_equal(x, ptpg->u.inuse.type_info)) && + !(shadow_mode_enabled(page_get_owner(page)) && (page->count_info & PGC_page_table)) ) - page->tlbflush_timestamp = tlbflush_current_time(); + page_set_tlbflush_timestamp(page); diff --git a/sysutils/xenkernel48/patches/patch-XSA242 b/sysutils/xenkernel48/patches/patch-XSA242 index 23c0996509a..c5614cd0a79 100644 --- a/sysutils/xenkernel48/patches/patch-XSA242 +++ b/sysutils/xenkernel48/patches/patch-XSA242 @@ -1,4 +1,4 @@ -$NetBSD: patch-XSA242,v 1.1 2017/10/17 08:42:30 bouyer Exp $ +$NetBSD: patch-XSA242,v 1.2 2017/12/15 14:02:15 bouyer Exp $ From: Jan Beulich <jbeulich@suse.com> Subject: x86: don't allow page_unlock() to drop the last type reference @@ -11,9 +11,9 @@ This is XSA-242. Signed-off-by: Jan Beulich <jbeulich@suse.com> ---- xen/arch/x86/mm.c.orig -+++ xen/arch/x86/mm.c -@@ -1923,7 +1923,11 @@ void page_unlock(struct page_info *page) +--- xen/arch/x86/mm.c.orig 2017-12-15 14:30:10.000000000 +0100 ++++ xen/arch/x86/mm.c 2017-12-15 14:31:32.000000000 +0100 +@@ -1906,7 +1906,11 @@ do { x = y; @@ -25,7 +25,7 @@ Signed-off-by: Jan Beulich <jbeulich@suse.com> } while ( (y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x ); } -@@ -2611,6 +2615,17 @@ static int _put_page_type(struct page_in +@@ -2575,6 +2579,17 @@ (page->count_info & PGC_page_table)) ) page_set_tlbflush_timestamp(page); } diff --git a/sysutils/xenkernel48/patches/patch-XSA246 b/sysutils/xenkernel48/patches/patch-XSA246 new file mode 100644 index 00000000000..4fedacc0c49 --- /dev/null +++ b/sysutils/xenkernel48/patches/patch-XSA246 @@ -0,0 +1,76 @@ +$NetBSD: patch-XSA246,v 1.1 2017/12/15 14:02:15 bouyer Exp $ + +From: Julien Grall <julien.grall@linaro.org> +Subject: x86/pod: prevent infinite loop when shattering large pages + +When populating pages, the PoD may need to split large ones using +p2m_set_entry and request the caller to retry (see ept_get_entry for +instance). + +p2m_set_entry may fail to shatter if it is not possible to allocate +memory for the new page table. However, the error is not propagated +resulting to the callers to retry infinitely the PoD. + +Prevent the infinite loop by return false when it is not possible to +shatter the large mapping. + +This is XSA-246. + +Signed-off-by: Julien Grall <julien.grall@linaro.org> +Signed-off-by: Jan Beulich <jbeulich@suse.com> +Reviewed-by: George Dunlap <george.dunlap@citrix.com> + +--- xen/arch/x86/mm/p2m-pod.c.orig ++++ xen/arch/x86/mm/p2m-pod.c +@@ -1071,9 +1071,8 @@ p2m_pod_demand_populate(struct p2m_domai + * NOTE: In a fine-grained p2m locking scenario this operation + * may need to promote its locking from gfn->1g superpage + */ +- p2m_set_entry(p2m, gfn_aligned, INVALID_MFN, PAGE_ORDER_2M, +- p2m_populate_on_demand, p2m->default_access); +- return 0; ++ return p2m_set_entry(p2m, gfn_aligned, INVALID_MFN, PAGE_ORDER_2M, ++ p2m_populate_on_demand, p2m->default_access); + } + + /* Only reclaim if we're in actual need of more cache. */ +@@ -1104,8 +1103,12 @@ p2m_pod_demand_populate(struct p2m_domai + + gfn_aligned = (gfn >> order) << order; + +- p2m_set_entry(p2m, gfn_aligned, mfn, order, p2m_ram_rw, +- p2m->default_access); ++ if ( p2m_set_entry(p2m, gfn_aligned, mfn, order, p2m_ram_rw, ++ p2m->default_access) ) ++ { ++ p2m_pod_cache_add(p2m, p, order); ++ goto out_fail; ++ } + + for( i = 0; i < (1UL << order); i++ ) + { +@@ -1150,13 +1153,18 @@ remap_and_retry: + BUG_ON(order != PAGE_ORDER_2M); + pod_unlock(p2m); + +- /* Remap this 2-meg region in singleton chunks */ +- /* NOTE: In a p2m fine-grained lock scenario this might +- * need promoting the gfn lock from gfn->2M superpage */ ++ /* ++ * Remap this 2-meg region in singleton chunks. See the comment on the ++ * 1G page splitting path above for why a single call suffices. ++ * ++ * NOTE: In a p2m fine-grained lock scenario this might ++ * need promoting the gfn lock from gfn->2M superpage. ++ */ + gfn_aligned = (gfn>>order)<<order; +- for(i=0; i<(1<<order); i++) +- p2m_set_entry(p2m, gfn_aligned + i, INVALID_MFN, PAGE_ORDER_4K, +- p2m_populate_on_demand, p2m->default_access); ++ if ( p2m_set_entry(p2m, gfn_aligned, INVALID_MFN, PAGE_ORDER_4K, ++ p2m_populate_on_demand, p2m->default_access) ) ++ return -1; ++ + if ( tb_init_done ) + { + struct { diff --git a/sysutils/xenkernel48/patches/patch-XSA247 b/sysutils/xenkernel48/patches/patch-XSA247 new file mode 100644 index 00000000000..248e2702a0d --- /dev/null +++ b/sysutils/xenkernel48/patches/patch-XSA247 @@ -0,0 +1,287 @@ +$NetBSD: patch-XSA247,v 1.1 2017/12/15 14:02:15 bouyer Exp $ + +From 0a004cf322940d99432b84284b22f3a9ea67a282 Mon Sep 17 00:00:00 2001 +From: George Dunlap <george.dunlap@citrix.com> +Date: Fri, 10 Nov 2017 16:53:54 +0000 +Subject: [PATCH 1/2] p2m: Always check to see if removing a p2m entry actually + worked + +The PoD zero-check functions speculatively remove memory from the p2m, +then check to see if it's completely zeroed, before putting it in the +cache. + +Unfortunately, the p2m_set_entry() calls may fail if the underlying +pagetable structure needs to change and the domain has exhausted its +p2m memory pool: for instance, if we're removing a 2MiB region out of +a 1GiB entry (in the p2m_pod_zero_check_superpage() case), or a 4k +region out of a 2MiB or larger entry (in the p2m_pod_zero_check() +case); and the return value is not checked. + +The underlying mfn will then be added into the PoD cache, and at some +point mapped into another location in the p2m. If the guest +afterwards ballons out this memory, it will be freed to the hypervisor +and potentially reused by another domain, in spite of the fact that +the original domain still has writable mappings to it. + +There are several places where p2m_set_entry() shouldn't be able to +fail, as it is guaranteed to write an entry of the same order that +succeeded before. Add a backstop of crashing the domain just in case, +and an ASSERT_UNREACHABLE() to flag up the broken assumption on debug +builds. + +While we're here, use PAGE_ORDER_2M rather than a magic constant. + +This is part of XSA-247. + +Reported-by: George Dunlap <george.dunlap.com> +Signed-off-by: George Dunlap <george.dunlap@citrix.com> +Reviewed-by: Jan Beulich <jbeulich@suse.com> +--- +v4: +- Removed some training whitespace +v3: +- Reformat reset clause to be more compact +- Make sure to set map[i] = NULL when unmapping in case we need to bail +v2: +- Crash a domain if a p2m_set_entry we think cannot fail fails anyway. +--- + xen/arch/x86/mm/p2m-pod.c | 77 +++++++++++++++++++++++++++++++++++++---------- + 1 file changed, 61 insertions(+), 16 deletions(-) + +diff --git a/xen/arch/x86/mm/p2m-pod.c b/xen/arch/x86/mm/p2m-pod.c +index 0e15290390..d73a86dde0 100644 +--- xen/arch/x86/mm/p2m-pod.c.orig ++++ xen/arch/x86/mm/p2m-pod.c +@@ -754,8 +754,10 @@ p2m_pod_zero_check_superpage(struct p2m_domain *p2m, unsigned long gfn) + } + + /* Try to remove the page, restoring old mapping if it fails. */ +- p2m_set_entry(p2m, gfn, INVALID_MFN, PAGE_ORDER_2M, +- p2m_populate_on_demand, p2m->default_access); ++ if ( p2m_set_entry(p2m, gfn, INVALID_MFN, PAGE_ORDER_2M, ++ p2m_populate_on_demand, p2m->default_access) ) ++ goto out; ++ + p2m_tlb_flush_sync(p2m); + + /* Make none of the MFNs are used elsewhere... for example, mapped +@@ -812,9 +814,18 @@ p2m_pod_zero_check_superpage(struct p2m_domain *p2m, unsigned long gfn) + ret = SUPERPAGE_PAGES; + + out_reset: +- if ( reset ) +- p2m_set_entry(p2m, gfn, mfn0, 9, type0, p2m->default_access); +- ++ /* ++ * This p2m_set_entry() call shouldn't be able to fail, since the same order ++ * on the same gfn succeeded above. If that turns out to be false, crashing ++ * the domain should be the safest way of making sure we don't leak memory. ++ */ ++ if ( reset && p2m_set_entry(p2m, gfn, mfn0, PAGE_ORDER_2M, ++ type0, p2m->default_access) ) ++ { ++ ASSERT_UNREACHABLE(); ++ domain_crash(d); ++ } ++ + out: + gfn_unlock(p2m, gfn, SUPERPAGE_ORDER); + return ret; +@@ -871,19 +882,30 @@ p2m_pod_zero_check(struct p2m_domain *p2m, unsigned long *gfns, int count) + } + + /* Try to remove the page, restoring old mapping if it fails. */ +- p2m_set_entry(p2m, gfns[i], INVALID_MFN, PAGE_ORDER_4K, +- p2m_populate_on_demand, p2m->default_access); ++ if ( p2m_set_entry(p2m, gfns[i], INVALID_MFN, PAGE_ORDER_4K, ++ p2m_populate_on_demand, p2m->default_access) ) ++ goto skip; + + /* See if the page was successfully unmapped. (Allow one refcount + * for being allocated to a domain.) */ + if ( (mfn_to_page(mfns[i])->count_info & PGC_count_mask) > 1 ) + { ++ /* ++ * If the previous p2m_set_entry call succeeded, this one shouldn't ++ * be able to fail. If it does, crashing the domain should be safe. ++ */ ++ if ( p2m_set_entry(p2m, gfns[i], mfns[i], PAGE_ORDER_4K, ++ types[i], p2m->default_access) ) ++ { ++ ASSERT_UNREACHABLE(); ++ domain_crash(d); ++ goto out_unmap; ++ } ++ ++ skip: + unmap_domain_page(map[i]); + map[i] = NULL; + +- p2m_set_entry(p2m, gfns[i], mfns[i], PAGE_ORDER_4K, +- types[i], p2m->default_access); +- + continue; + } + } +@@ -902,12 +924,25 @@ p2m_pod_zero_check(struct p2m_domain *p2m, unsigned long *gfns, int count) + + unmap_domain_page(map[i]); + +- /* See comment in p2m_pod_zero_check_superpage() re gnttab +- * check timing. */ +- if ( j < PAGE_SIZE/sizeof(*map[i]) ) ++ map[i] = NULL; ++ ++ /* ++ * See comment in p2m_pod_zero_check_superpage() re gnttab ++ * check timing. ++ */ ++ if ( j < (PAGE_SIZE / sizeof(*map[i])) ) + { +- p2m_set_entry(p2m, gfns[i], mfns[i], PAGE_ORDER_4K, +- types[i], p2m->default_access); ++ /* ++ * If the previous p2m_set_entry call succeeded, this one shouldn't ++ * be able to fail. If it does, crashing the domain should be safe. ++ */ ++ if ( p2m_set_entry(p2m, gfns[i], mfns[i], PAGE_ORDER_4K, ++ types[i], p2m->default_access) ) ++ { ++ ASSERT_UNREACHABLE(); ++ domain_crash(d); ++ goto out_unmap; ++ } + } + else + { +@@ -931,7 +966,17 @@ p2m_pod_zero_check(struct p2m_domain *p2m, unsigned long *gfns, int count) + p2m->pod.entry_count++; + } + } +- ++ ++ return; ++ ++out_unmap: ++ /* ++ * Something went wrong, probably crashing the domain. Unmap ++ * everything and return. ++ */ ++ for ( i = 0; i < count; i++ ) ++ if ( map[i] ) ++ unmap_domain_page(map[i]); + } + + #define POD_SWEEP_LIMIT 1024 +-- +2.15.0 + +From f01b21460bdd5205e1a92552d37a276866f64f1f Mon Sep 17 00:00:00 2001 +From: George Dunlap <george.dunlap@citrix.com> +Date: Fri, 10 Nov 2017 16:53:55 +0000 +Subject: [PATCH 2/2] p2m: Check return value of p2m_set_entry() when + decreasing reservation + +If the entire range specified to p2m_pod_decrease_reservation() is marked +populate-on-demand, then it will make a single p2m_set_entry() call, +reducing its PoD entry count. + +Unfortunately, in the right circumstances, this p2m_set_entry() call +may fail. It that case, repeated calls to decrease_reservation() may +cause p2m->pod.entry_count to fall below zero, potentially tripping +over BUG_ON()s to the contrary. + +Instead, check to see if the entry succeeded, and return false if not. +The caller will then call guest_remove_page() on the gfns, which will +return -EINVAL upon finding no valid memory there to return. + +Unfortunately if the order > 0, the entry may have partially changed. +A domain_crash() is probably the safest thing in that case. + +Other p2m_set_entry() calls in the same function should be fine, +because they are writing the entry at its current order. Nonetheless, +check the return value and crash if our assumption turns otu to be +wrong. + +This is part of XSA-247. + +Reported-by: George Dunlap <george.dunlap.com> +Signed-off-by: George Dunlap <george.dunlap@citrix.com> +Reviewed-by: Jan Beulich <jbeulich@suse.com> +--- +v2: Crash the domain if we're not sure it's safe (or if we think it +can't happen) +--- + xen/arch/x86/mm/p2m-pod.c | 42 +++++++++++++++++++++++++++++++++--------- + 1 file changed, 33 insertions(+), 9 deletions(-) + +diff --git a/xen/arch/x86/mm/p2m-pod.c b/xen/arch/x86/mm/p2m-pod.c +index d73a86dde0..c750d0d8cc 100644 +--- xen/arch/x86/mm/p2m-pod.c.orig ++++ xen/arch/x86/mm/p2m-pod.c +@@ -557,11 +557,23 @@ p2m_pod_decrease_reservation(struct domain *d, + + if ( !nonpod ) + { +- /* All PoD: Mark the whole region invalid and tell caller +- * we're done. */ +- p2m_set_entry(p2m, gpfn, INVALID_MFN, order, p2m_invalid, +- p2m->default_access); +- p2m->pod.entry_count-=(1<<order); ++ /* ++ * All PoD: Mark the whole region invalid and tell caller ++ * we're done. ++ */ ++ if ( p2m_set_entry(p2m, gpfn, INVALID_MFN, order, p2m_invalid, ++ p2m->default_access) ) ++ { ++ /* ++ * If this fails, we can't tell how much of the range was changed. ++ * Best to crash the domain unless we're sure a partial change is ++ * impossible. ++ */ ++ if ( order != 0 ) ++ domain_crash(d); ++ goto out_unlock; ++ } ++ p2m->pod.entry_count -= 1UL << order; + BUG_ON(p2m->pod.entry_count < 0); + ret = 1; + goto out_entry_check; +@@ -602,8 +614,14 @@ p2m_pod_decrease_reservation(struct domain *d, + n = 1UL << cur_order; + if ( t == p2m_populate_on_demand ) + { +- p2m_set_entry(p2m, gpfn + i, INVALID_MFN, cur_order, +- p2m_invalid, p2m->default_access); ++ /* This shouldn't be able to fail */ ++ if ( p2m_set_entry(p2m, gpfn + i, INVALID_MFN, cur_order, ++ p2m_invalid, p2m->default_access) ) ++ { ++ ASSERT_UNREACHABLE(); ++ domain_crash(d); ++ goto out_unlock; ++ } + p2m->pod.entry_count -= n; + BUG_ON(p2m->pod.entry_count < 0); + pod -= n; +@@ -624,8 +642,14 @@ p2m_pod_decrease_reservation(struct domain *d, + + page = mfn_to_page(mfn); + +- p2m_set_entry(p2m, gpfn + i, INVALID_MFN, cur_order, +- p2m_invalid, p2m->default_access); ++ /* This shouldn't be able to fail */ ++ if ( p2m_set_entry(p2m, gpfn + i, INVALID_MFN, cur_order, ++ p2m_invalid, p2m->default_access) ) ++ { ++ ASSERT_UNREACHABLE(); ++ domain_crash(d); ++ goto out_unlock; ++ } + p2m_tlb_flush_sync(p2m); + for ( j = 0; j < n; ++j ) + set_gpfn_from_mfn(mfn_x(mfn), INVALID_M2P_ENTRY); +-- +2.15.0 + diff --git a/sysutils/xenkernel48/patches/patch-XSA248 b/sysutils/xenkernel48/patches/patch-XSA248 new file mode 100644 index 00000000000..b0ccf377bb2 --- /dev/null +++ b/sysutils/xenkernel48/patches/patch-XSA248 @@ -0,0 +1,164 @@ +$NetBSD: patch-XSA248,v 1.1 2017/12/15 14:02:15 bouyer Exp $ + +From: Jan Beulich <jbeulich@suse.com> +Subject: x86/mm: don't wrongly set page ownership + +PV domains can obtain mappings of any pages owned by the correct domain, +including ones that aren't actually assigned as "normal" RAM, but used +by Xen internally. At the moment such "internal" pages marked as owned +by a guest include pages used to track logdirty bits, as well as p2m +pages and the "unpaged pagetable" for HVM guests. Since the PV memory +management and shadow code conflict in their use of struct page_info +fields, and since shadow code is being used for log-dirty handling for +PV domains, pages coming from the shadow pool must, for PV domains, not +have the domain set as their owner. + +While the change could be done conditionally for just the PV case in +shadow code, do it unconditionally (and for consistency also for HAP), +just to be on the safe side. + +There's one special case though for shadow code: The page table used for +running a HVM guest in unpaged mode is subject to get_page() (in +set_shadow_status()) and hence must have its owner set. + +This is XSA-248. + +Signed-off-by: Jan Beulich <jbeulich@suse.com> +Reviewed-by: Tim Deegan <tim@xen.org> +Reviewed-by: George Dunlap <george.dunlap@citrix.com> + +--- xen/arch/x86/mm/hap/hap.c.orig ++++ xen/arch/x86/mm/hap/hap.c +@@ -283,8 +283,7 @@ static struct page_info *hap_alloc_p2m_p + { + d->arch.paging.hap.total_pages--; + d->arch.paging.hap.p2m_pages++; +- page_set_owner(pg, d); +- pg->count_info |= 1; ++ ASSERT(!page_get_owner(pg) && !(pg->count_info & PGC_count_mask)); + } + else if ( !d->arch.paging.p2m_alloc_failed ) + { +@@ -299,21 +298,23 @@ static struct page_info *hap_alloc_p2m_p + + static void hap_free_p2m_page(struct domain *d, struct page_info *pg) + { ++ struct domain *owner = page_get_owner(pg); ++ + /* This is called both from the p2m code (which never holds the + * paging lock) and the log-dirty code (which always does). */ + paging_lock_recursive(d); + +- ASSERT(page_get_owner(pg) == d); +- /* Should have just the one ref we gave it in alloc_p2m_page() */ +- if ( (pg->count_info & PGC_count_mask) != 1 ) { +- HAP_ERROR("Odd p2m page %p count c=%#lx t=%"PRtype_info"\n", +- pg, pg->count_info, pg->u.inuse.type_info); ++ /* Should still have no owner and count zero. */ ++ if ( owner || (pg->count_info & PGC_count_mask) ) ++ { ++ HAP_ERROR("d%d: Odd p2m page %"PRI_mfn" d=%d c=%lx t=%"PRtype_info"\n", ++ d->domain_id, mfn_x(page_to_mfn(pg)), ++ owner ? owner->domain_id : DOMID_INVALID, ++ pg->count_info, pg->u.inuse.type_info); + WARN(); ++ pg->count_info &= ~PGC_count_mask; ++ page_set_owner(pg, NULL); + } +- pg->count_info &= ~PGC_count_mask; +- /* Free should not decrement domain's total allocation, since +- * these pages were allocated without an owner. */ +- page_set_owner(pg, NULL); + d->arch.paging.hap.p2m_pages--; + d->arch.paging.hap.total_pages++; + hap_free(d, page_to_mfn(pg)); +--- xen/arch/x86/mm/shadow/common.c.orig ++++ xen/arch/x86/mm/shadow/common.c +@@ -1573,32 +1573,29 @@ shadow_alloc_p2m_page(struct domain *d) + pg = mfn_to_page(shadow_alloc(d, SH_type_p2m_table, 0)); + d->arch.paging.shadow.p2m_pages++; + d->arch.paging.shadow.total_pages--; ++ ASSERT(!page_get_owner(pg) && !(pg->count_info & PGC_count_mask)); + + paging_unlock(d); + +- /* Unlike shadow pages, mark p2m pages as owned by the domain. +- * Marking the domain as the owner would normally allow the guest to +- * create mappings of these pages, but these p2m pages will never be +- * in the domain's guest-physical address space, and so that is not +- * believed to be a concern. */ +- page_set_owner(pg, d); +- pg->count_info |= 1; + return pg; + } + + static void + shadow_free_p2m_page(struct domain *d, struct page_info *pg) + { +- ASSERT(page_get_owner(pg) == d); +- /* Should have just the one ref we gave it in alloc_p2m_page() */ +- if ( (pg->count_info & PGC_count_mask) != 1 ) ++ struct domain *owner = page_get_owner(pg); ++ ++ /* Should still have no owner and count zero. */ ++ if ( owner || (pg->count_info & PGC_count_mask) ) + { +- SHADOW_ERROR("Odd p2m page count c=%#lx t=%"PRtype_info"\n", ++ SHADOW_ERROR("d%d: Odd p2m page %"PRI_mfn" d=%d c=%lx t=%"PRtype_info"\n", ++ d->domain_id, mfn_x(page_to_mfn(pg)), ++ owner ? owner->domain_id : DOMID_INVALID, + pg->count_info, pg->u.inuse.type_info); ++ pg->count_info &= ~PGC_count_mask; ++ page_set_owner(pg, NULL); + } +- pg->count_info &= ~PGC_count_mask; + pg->u.sh.type = SH_type_p2m_table; /* p2m code reuses type-info */ +- page_set_owner(pg, NULL); + + /* This is called both from the p2m code (which never holds the + * paging lock) and the log-dirty code (which always does). */ +@@ -3216,7 +3213,9 @@ int shadow_enable(struct domain *d, u32 + | _PAGE_PRESENT | _PAGE_RW | _PAGE_USER + | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE); + unmap_domain_page(e); ++ pg->count_info = 1; + pg->u.inuse.type_info = PGT_l2_page_table | 1 | PGT_validated; ++ page_set_owner(pg, d); + } + + paging_lock(d); +@@ -3254,7 +3253,11 @@ int shadow_enable(struct domain *d, u32 + if ( rv != 0 && !pagetable_is_null(p2m_get_pagetable(p2m)) ) + p2m_teardown(p2m); + if ( rv != 0 && pg != NULL ) ++ { ++ pg->count_info &= ~PGC_count_mask; ++ page_set_owner(pg, NULL); + shadow_free_p2m_page(d, pg); ++ } + domain_unpause(d); + return rv; + } +@@ -3363,7 +3366,22 @@ out: + + /* Must be called outside the lock */ + if ( unpaged_pagetable ) ++ { ++ if ( page_get_owner(unpaged_pagetable) == d && ++ (unpaged_pagetable->count_info & PGC_count_mask) == 1 ) ++ { ++ unpaged_pagetable->count_info &= ~PGC_count_mask; ++ page_set_owner(unpaged_pagetable, NULL); ++ } ++ /* Complain here in cases where shadow_free_p2m_page() won't. */ ++ else if ( !page_get_owner(unpaged_pagetable) && ++ !(unpaged_pagetable->count_info & PGC_count_mask) ) ++ SHADOW_ERROR("d%d: Odd unpaged pt %"PRI_mfn" c=%lx t=%"PRtype_info"\n", ++ d->domain_id, mfn_x(page_to_mfn(unpaged_pagetable)), ++ unpaged_pagetable->count_info, ++ unpaged_pagetable->u.inuse.type_info); + shadow_free_p2m_page(d, unpaged_pagetable); ++ } + } + + void shadow_final_teardown(struct domain *d) diff --git a/sysutils/xenkernel48/patches/patch-XSA249 b/sysutils/xenkernel48/patches/patch-XSA249 new file mode 100644 index 00000000000..a0780ca267c --- /dev/null +++ b/sysutils/xenkernel48/patches/patch-XSA249 @@ -0,0 +1,44 @@ +$NetBSD: patch-XSA249,v 1.1 2017/12/15 14:02:15 bouyer Exp $ + +From: Jan Beulich <jbeulich@suse.com> +Subject: x86/shadow: fix refcount overflow check + +Commit c385d27079 ("x86 shadow: for multi-page shadows, explicitly track +the first page") reduced the refcount width to 25, without adjusting the +overflow check. Eliminate the disconnect by using a manifest constant. + +Interestingly, up to commit 047782fa01 ("Out-of-sync L1 shadows: OOS +snapshot") the refcount was 27 bits wide, yet the check was already +using 26. + +This is XSA-249. + +Signed-off-by: Jan Beulich <jbeulich@suse.com> +Reviewed-by: George Dunlap <george.dunlap@citrix.com> +Reviewed-by: Tim Deegan <tim@xen.org> +--- +v2: Simplify expression back to the style it was. + +--- xen/arch/x86/mm/shadow/private.h.orig ++++ xen/arch/x86/mm/shadow/private.h +@@ -529,7 +529,7 @@ static inline int sh_get_ref(struct doma + x = sp->u.sh.count; + nx = x + 1; + +- if ( unlikely(nx >= 1U<<26) ) ++ if ( unlikely(nx >= (1U << PAGE_SH_REFCOUNT_WIDTH)) ) + { + SHADOW_PRINTK("shadow ref overflow, gmfn=%lx smfn=%lx\n", + __backpointer(sp), mfn_x(smfn)); +--- xen/include/asm-x86/mm.h.orig ++++ xen/include/asm-x86/mm.h +@@ -82,7 +82,8 @@ struct page_info + unsigned long type:5; /* What kind of shadow is this? */ + unsigned long pinned:1; /* Is the shadow pinned? */ + unsigned long head:1; /* Is this the first page of the shadow? */ +- unsigned long count:25; /* Reference count */ ++#define PAGE_SH_REFCOUNT_WIDTH 25 ++ unsigned long count:PAGE_SH_REFCOUNT_WIDTH; /* Reference count */ + } sh; + + /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */ diff --git a/sysutils/xenkernel48/patches/patch-XSA250 b/sysutils/xenkernel48/patches/patch-XSA250 new file mode 100644 index 00000000000..0ca2deeda00 --- /dev/null +++ b/sysutils/xenkernel48/patches/patch-XSA250 @@ -0,0 +1,69 @@ +$NetBSD: patch-XSA250,v 1.1 2017/12/15 14:02:15 bouyer Exp $ + +From: Jan Beulich <jbeulich@suse.com> +Subject: x86/shadow: fix ref-counting error handling + +The old-Linux handling in shadow_set_l4e() mistakenly ORed together the +results of sh_get_ref() and sh_pin(). As the latter failing is not a +correctness problem, simply ignore its return value. + +In sh_set_toplevel_shadow() a failing sh_get_ref() must not be +accompanied by installing the entry, despite the domain being crashed. + +This is XSA-250. + +Signed-off-by: Jan Beulich <jbeulich@suse.com> +Reviewed-by: Tim Deegan <tim@xen.org> + +--- xen/arch/x86/mm/shadow/multi.c.orig ++++ xen/arch/x86/mm/shadow/multi.c +@@ -923,7 +923,7 @@ static int shadow_set_l4e(struct domain + shadow_l4e_t new_sl4e, + mfn_t sl4mfn) + { +- int flags = 0, ok; ++ int flags = 0; + shadow_l4e_t old_sl4e; + paddr_t paddr; + ASSERT(sl4e != NULL); +@@ -938,15 +938,16 @@ static int shadow_set_l4e(struct domain + { + /* About to install a new reference */ + mfn_t sl3mfn = shadow_l4e_get_mfn(new_sl4e); +- ok = sh_get_ref(d, sl3mfn, paddr); +- /* Are we pinning l3 shadows to handle wierd linux behaviour? */ +- if ( sh_type_is_pinnable(d, SH_type_l3_64_shadow) ) +- ok |= sh_pin(d, sl3mfn); +- if ( !ok ) ++ ++ if ( !sh_get_ref(d, sl3mfn, paddr) ) + { + domain_crash(d); + return SHADOW_SET_ERROR; + } ++ ++ /* Are we pinning l3 shadows to handle weird Linux behaviour? */ ++ if ( sh_type_is_pinnable(d, SH_type_l3_64_shadow) ) ++ sh_pin(d, sl3mfn); + } + + /* Write the new entry */ +@@ -3965,14 +3966,15 @@ sh_set_toplevel_shadow(struct vcpu *v, + + /* Take a ref to this page: it will be released in sh_detach_old_tables() + * or the next call to set_toplevel_shadow() */ +- if ( !sh_get_ref(d, smfn, 0) ) ++ if ( sh_get_ref(d, smfn, 0) ) ++ new_entry = pagetable_from_mfn(smfn); ++ else + { + SHADOW_ERROR("can't install %#lx as toplevel shadow\n", mfn_x(smfn)); + domain_crash(d); ++ new_entry = pagetable_null(); + } + +- new_entry = pagetable_from_mfn(smfn); +- + install_new_entry: + /* Done. Install it */ + SHADOW_PRINTK("%u/%u [%u] gmfn %#"PRI_mfn" smfn %#"PRI_mfn"\n", diff --git a/sysutils/xenkernel48/patches/patch-XSA251 b/sysutils/xenkernel48/patches/patch-XSA251 new file mode 100644 index 00000000000..929c0901897 --- /dev/null +++ b/sysutils/xenkernel48/patches/patch-XSA251 @@ -0,0 +1,23 @@ +$NetBSD: patch-XSA251,v 1.1 2017/12/15 14:02:15 bouyer Exp $ + +From: Jan Beulich <jbeulich@suse.com> +Subject: x86/paging: don't unconditionally BUG() on finding SHARED_M2P_ENTRY + +PV guests can fully control the values written into the P2M. + +This is XSA-251. + +Signed-off-by: Jan Beulich <jbeulich@suse.com> +Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> + +--- xen/arch/x86/mm/paging.c.orig ++++ xen/arch/x86/mm/paging.c +@@ -276,7 +276,7 @@ void paging_mark_pfn_dirty(struct domain + return; + + /* Shared MFNs should NEVER be marked dirty */ +- BUG_ON(SHARED_M2P(pfn)); ++ BUG_ON(paging_mode_translate(d) && SHARED_M2P(pfn)); + + /* + * Values with the MSB set denote MFNs that aren't really part of the |