summaryrefslogtreecommitdiffstats
path: root/system/xen/xsa/xsa240-4.9-0001-x86-limit-linear-page-table-use-to-a-single-level.patch
diff options
context:
space:
mode:
Diffstat (limited to 'system/xen/xsa/xsa240-4.9-0001-x86-limit-linear-page-table-use-to-a-single-level.patch')
-rw-r--r--system/xen/xsa/xsa240-4.9-0001-x86-limit-linear-page-table-use-to-a-single-level.patch494
1 files changed, 0 insertions, 494 deletions
diff --git a/system/xen/xsa/xsa240-4.9-0001-x86-limit-linear-page-table-use-to-a-single-level.patch b/system/xen/xsa/xsa240-4.9-0001-x86-limit-linear-page-table-use-to-a-single-level.patch
deleted file mode 100644
index 515ad22b66..0000000000
--- a/system/xen/xsa/xsa240-4.9-0001-x86-limit-linear-page-table-use-to-a-single-level.patch
+++ /dev/null
@@ -1,494 +0,0 @@
-From 867988237d3e472fe2c99e81ae733e103422566c Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Thu, 28 Sep 2017 15:17:25 +0100
-Subject: [PATCH 1/2] x86: limit linear page table use to a single level
-
-That's the only way that they're meant to be used. Without such a
-restriction arbitrarily long chains of same-level page tables can be
-built, tearing down of which may then cause arbitrarily deep recursion,
-causing a stack overflow. To facilitate this restriction, a counter is
-being introduced to track both the number of same-level entries in a
-page table as well as the number of uses of a page table in another
-same-level one (counting into positive and negative direction
-respectively, utilizing the fact that both counts can't be non-zero at
-the same time).
-
-Note that the added accounting introduces a restriction on the number
-of times a page can be used in other same-level page tables - more than
-32k of such uses are no longer possible.
-
-Note also that some put_page_and_type[_preemptible]() calls are
-replaced with open-coded equivalents. This seemed preferrable to
-adding "parent_table" to the matrix of functions.
-
-Note further that cross-domain same-level page table references are no
-longer permitted (they probably never should have been).
-
-This is XSA-240.
-
-Reported-by: Jann Horn <jannh@google.com>
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Signed-off-by: George Dunlap <george.dunlap@citrix.com>
----
- xen/arch/x86/domain.c | 1 +
- xen/arch/x86/mm.c | 171 ++++++++++++++++++++++++++++++++++++++-----
- xen/include/asm-x86/domain.h | 2 +
- xen/include/asm-x86/mm.h | 25 +++++--
- 4 files changed, 175 insertions(+), 24 deletions(-)
-
-diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
-index d7e699228c..d7ed72c246 100644
---- a/xen/arch/x86/domain.c
-+++ b/xen/arch/x86/domain.c
-@@ -1226,6 +1226,7 @@ int arch_set_info_guest(
- rc = -ERESTART;
- /* Fallthrough */
- case -ERESTART:
-+ v->arch.old_guest_ptpg = NULL;
- v->arch.old_guest_table =
- pagetable_get_page(v->arch.guest_table);
- v->arch.guest_table = pagetable_null();
-diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
-index 86f5eda52d..1e469bd354 100644
---- a/xen/arch/x86/mm.c
-+++ b/xen/arch/x86/mm.c
-@@ -747,6 +747,61 @@ static void put_data_page(
- put_page(page);
- }
-
-+static bool inc_linear_entries(struct page_info *pg)
-+{
-+ typeof(pg->linear_pt_count) nc = read_atomic(&pg->linear_pt_count), oc;
-+
-+ do {
-+ /*
-+ * The check below checks for the "linear use" count being non-zero
-+ * as well as overflow. Signed integer overflow is undefined behavior
-+ * according to the C spec. However, as long as linear_pt_count is
-+ * smaller in size than 'int', the arithmetic operation of the
-+ * increment below won't overflow; rather the result will be truncated
-+ * when stored. Ensure that this is always true.
-+ */
-+ BUILD_BUG_ON(sizeof(nc) >= sizeof(int));
-+ oc = nc++;
-+ if ( nc <= 0 )
-+ return false;
-+ nc = cmpxchg(&pg->linear_pt_count, oc, nc);
-+ } while ( oc != nc );
-+
-+ return true;
-+}
-+
-+static void dec_linear_entries(struct page_info *pg)
-+{
-+ typeof(pg->linear_pt_count) oc;
-+
-+ oc = arch_fetch_and_add(&pg->linear_pt_count, -1);
-+ ASSERT(oc > 0);
-+}
-+
-+static bool inc_linear_uses(struct page_info *pg)
-+{
-+ typeof(pg->linear_pt_count) nc = read_atomic(&pg->linear_pt_count), oc;
-+
-+ do {
-+ /* See the respective comment in inc_linear_entries(). */
-+ BUILD_BUG_ON(sizeof(nc) >= sizeof(int));
-+ oc = nc--;
-+ if ( nc >= 0 )
-+ return false;
-+ nc = cmpxchg(&pg->linear_pt_count, oc, nc);
-+ } while ( oc != nc );
-+
-+ return true;
-+}
-+
-+static void dec_linear_uses(struct page_info *pg)
-+{
-+ typeof(pg->linear_pt_count) oc;
-+
-+ oc = arch_fetch_and_add(&pg->linear_pt_count, 1);
-+ ASSERT(oc < 0);
-+}
-+
- /*
- * We allow root tables to map each other (a.k.a. linear page tables). It
- * needs some special care with reference counts and access permissions:
-@@ -777,15 +832,35 @@ get_##level##_linear_pagetable( \
- \
- if ( (pfn = level##e_get_pfn(pde)) != pde_pfn ) \
- { \
-+ struct page_info *ptpg = mfn_to_page(pde_pfn); \
-+ \
-+ /* Make sure the page table belongs to the correct domain. */ \
-+ if ( unlikely(page_get_owner(ptpg) != d) ) \
-+ return 0; \
-+ \
- /* Make sure the mapped frame belongs to the correct domain. */ \
- if ( unlikely(!get_page_from_pagenr(pfn, d)) ) \
- return 0; \
- \
- /* \
-- * Ensure that the mapped frame is an already-validated page table. \
-+ * Ensure that the mapped frame is an already-validated page table \
-+ * and is not itself having linear entries, as well as that the \
-+ * containing page table is not iself in use as a linear page table \
-+ * elsewhere. \
- * If so, atomically increment the count (checking for overflow). \
- */ \
- page = mfn_to_page(pfn); \
-+ if ( !inc_linear_entries(ptpg) ) \
-+ { \
-+ put_page(page); \
-+ return 0; \
-+ } \
-+ if ( !inc_linear_uses(page) ) \
-+ { \
-+ dec_linear_entries(ptpg); \
-+ put_page(page); \
-+ return 0; \
-+ } \
- y = page->u.inuse.type_info; \
- do { \
- x = y; \
-@@ -793,6 +868,8 @@ get_##level##_linear_pagetable( \
- unlikely((x & (PGT_type_mask|PGT_validated)) != \
- (PGT_##level##_page_table|PGT_validated)) ) \
- { \
-+ dec_linear_uses(page); \
-+ dec_linear_entries(ptpg); \
- put_page(page); \
- return 0; \
- } \
-@@ -1226,6 +1303,9 @@ get_page_from_l4e(
- l3e_remove_flags((pl3e), _PAGE_USER|_PAGE_RW|_PAGE_ACCESSED); \
- } while ( 0 )
-
-+static int _put_page_type(struct page_info *page, bool preemptible,
-+ struct page_info *ptpg);
-+
- void put_page_from_l1e(l1_pgentry_t l1e, struct domain *l1e_owner)
- {
- unsigned long pfn = l1e_get_pfn(l1e);
-@@ -1296,17 +1376,22 @@ static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn)
- if ( l2e_get_flags(l2e) & _PAGE_PSE )
- put_superpage(l2e_get_pfn(l2e));
- else
-- put_page_and_type(l2e_get_page(l2e));
-+ {
-+ struct page_info *pg = l2e_get_page(l2e);
-+ int rc = _put_page_type(pg, false, mfn_to_page(pfn));
-+
-+ ASSERT(!rc);
-+ put_page(pg);
-+ }
-
- return 0;
- }
-
--static int __put_page_type(struct page_info *, int preemptible);
--
- static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn,
- int partial, bool_t defer)
- {
- struct page_info *pg;
-+ int rc;
-
- if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) || (l3e_get_pfn(l3e) == pfn) )
- return 1;
-@@ -1329,21 +1414,28 @@ static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn,
- if ( unlikely(partial > 0) )
- {
- ASSERT(!defer);
-- return __put_page_type(pg, 1);
-+ return _put_page_type(pg, true, mfn_to_page(pfn));
- }
-
- if ( defer )
- {
-+ current->arch.old_guest_ptpg = mfn_to_page(pfn);
- current->arch.old_guest_table = pg;
- return 0;
- }
-
-- return put_page_and_type_preemptible(pg);
-+ rc = _put_page_type(pg, true, mfn_to_page(pfn));
-+ if ( likely(!rc) )
-+ put_page(pg);
-+
-+ return rc;
- }
-
- static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn,
- int partial, bool_t defer)
- {
-+ int rc = 1;
-+
- if ( (l4e_get_flags(l4e) & _PAGE_PRESENT) &&
- (l4e_get_pfn(l4e) != pfn) )
- {
-@@ -1352,18 +1444,22 @@ static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn,
- if ( unlikely(partial > 0) )
- {
- ASSERT(!defer);
-- return __put_page_type(pg, 1);
-+ return _put_page_type(pg, true, mfn_to_page(pfn));
- }
-
- if ( defer )
- {
-+ current->arch.old_guest_ptpg = mfn_to_page(pfn);
- current->arch.old_guest_table = pg;
- return 0;
- }
-
-- return put_page_and_type_preemptible(pg);
-+ rc = _put_page_type(pg, true, mfn_to_page(pfn));
-+ if ( likely(!rc) )
-+ put_page(pg);
- }
-- return 1;
-+
-+ return rc;
- }
-
- static int alloc_l1_table(struct page_info *page)
-@@ -1561,6 +1657,7 @@ static int alloc_l3_table(struct page_info *page)
- {
- page->nr_validated_ptes = i;
- page->partial_pte = 0;
-+ current->arch.old_guest_ptpg = NULL;
- current->arch.old_guest_table = page;
- }
- while ( i-- > 0 )
-@@ -1654,6 +1751,7 @@ static int alloc_l4_table(struct page_info *page)
- {
- if ( current->arch.old_guest_table )
- page->nr_validated_ptes++;
-+ current->arch.old_guest_ptpg = NULL;
- current->arch.old_guest_table = page;
- }
- }
-@@ -2403,14 +2501,20 @@ int free_page_type(struct page_info *pag
- }
-
-
--static int __put_final_page_type(
-- struct page_info *page, unsigned long type, int preemptible)
-+static int _put_final_page_type(struct page_info *page, unsigned long type,
-+ bool preemptible, struct page_info *ptpg)
- {
- int rc = free_page_type(page, type, preemptible);
-
- /* No need for atomic update of type_info here: noone else updates it. */
- if ( rc == 0 )
- {
-+ if ( ptpg && PGT_type_equal(type, ptpg->u.inuse.type_info) )
-+ {
-+ dec_linear_uses(page);
-+ dec_linear_entries(ptpg);
-+ }
-+ ASSERT(!page->linear_pt_count || page_get_owner(page)->is_dying);
- /*
- * Record TLB information for flush later. We do not stamp page tables
- * when running in shadow mode:
-@@ -2446,8 +2550,8 @@ static int __put_final_page_type(
- }
-
-
--static int __put_page_type(struct page_info *page,
-- int preemptible)
-+static int _put_page_type(struct page_info *page, bool preemptible,
-+ struct page_info *ptpg)
- {
- unsigned long nx, x, y = page->u.inuse.type_info;
- int rc = 0;
-@@ -2474,12 +2578,28 @@ static int __put_page_type(struct page_info *page,
- x, nx)) != x) )
- continue;
- /* We cleared the 'valid bit' so we do the clean up. */
-- rc = __put_final_page_type(page, x, preemptible);
-+ rc = _put_final_page_type(page, x, preemptible, ptpg);
-+ ptpg = NULL;
- if ( x & PGT_partial )
- put_page(page);
- break;
- }
-
-+ if ( ptpg && PGT_type_equal(x, ptpg->u.inuse.type_info) )
-+ {
-+ /*
-+ * page_set_tlbflush_timestamp() accesses the same union
-+ * linear_pt_count lives in. Unvalidated page table pages,
-+ * however, should occur during domain destruction only
-+ * anyway. Updating of linear_pt_count luckily is not
-+ * necessary anymore for a dying domain.
-+ */
-+ ASSERT(page_get_owner(page)->is_dying);
-+ ASSERT(page->linear_pt_count < 0);
-+ ASSERT(ptpg->linear_pt_count > 0);
-+ ptpg = NULL;
-+ }
-+
- /*
- * Record TLB information for flush later. We do not stamp page
- * tables when running in shadow mode:
-@@ -2499,6 +2619,13 @@ static int __put_page_type(struct page_info *page,
- return -EINTR;
- }
-
-+ if ( ptpg && PGT_type_equal(x, ptpg->u.inuse.type_info) )
-+ {
-+ ASSERT(!rc);
-+ dec_linear_uses(page);
-+ dec_linear_entries(ptpg);
-+ }
-+
- return rc;
- }
-
-@@ -2638,6 +2765,7 @@ static int __get_page_type(struct page_info *page, unsigned long type,
- page->nr_validated_ptes = 0;
- page->partial_pte = 0;
- }
-+ page->linear_pt_count = 0;
- rc = alloc_page_type(page, type, preemptible);
- }
-
-@@ -2652,7 +2780,7 @@ static int __get_page_type(struct page_info *page, unsigned long type,
-
- void put_page_type(struct page_info *page)
- {
-- int rc = __put_page_type(page, 0);
-+ int rc = _put_page_type(page, false, NULL);
- ASSERT(rc == 0);
- (void)rc;
- }
-@@ -2668,7 +2796,7 @@ int get_page_type(struct page_info *page, unsigned long type)
-
- int put_page_type_preemptible(struct page_info *page)
- {
-- return __put_page_type(page, 1);
-+ return _put_page_type(page, true, NULL);
- }
-
- int get_page_type_preemptible(struct page_info *page, unsigned long type)
-@@ -2878,11 +3006,14 @@ int put_old_guest_table(struct vcpu *v)
- if ( !v->arch.old_guest_table )
- return 0;
-
-- switch ( rc = put_page_and_type_preemptible(v->arch.old_guest_table) )
-+ switch ( rc = _put_page_type(v->arch.old_guest_table, true,
-+ v->arch.old_guest_ptpg) )
- {
- case -EINTR:
- case -ERESTART:
- return -ERESTART;
-+ case 0:
-+ put_page(v->arch.old_guest_table);
- }
-
- v->arch.old_guest_table = NULL;
-@@ -3042,6 +3173,7 @@ int new_guest_cr3(unsigned long mfn)
- rc = -ERESTART;
- /* fallthrough */
- case -ERESTART:
-+ curr->arch.old_guest_ptpg = NULL;
- curr->arch.old_guest_table = page;
- break;
- default:
-@@ -3310,7 +3442,10 @@ long do_mmuext_op(
- if ( type == PGT_l1_page_table )
- put_page_and_type(page);
- else
-+ {
-+ curr->arch.old_guest_ptpg = NULL;
- curr->arch.old_guest_table = page;
-+ }
- }
- }
-
-@@ -3346,6 +3481,7 @@ long do_mmuext_op(
- {
- case -EINTR:
- case -ERESTART:
-+ curr->arch.old_guest_ptpg = NULL;
- curr->arch.old_guest_table = page;
- rc = 0;
- break;
-@@ -3425,6 +3561,7 @@ long do_mmuext_op(
- rc = -ERESTART;
- /* fallthrough */
- case -ERESTART:
-+ curr->arch.old_guest_ptpg = NULL;
- curr->arch.old_guest_table = page;
- break;
- default:
-diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
-index 924caac834..5a512918cc 100644
---- a/xen/include/asm-x86/domain.h
-+++ b/xen/include/asm-x86/domain.h
-@@ -527,6 +527,8 @@ struct arch_vcpu
- pagetable_t guest_table_user; /* (MFN) x86/64 user-space pagetable */
- pagetable_t guest_table; /* (MFN) guest notion of cr3 */
- struct page_info *old_guest_table; /* partially destructed pagetable */
-+ struct page_info *old_guest_ptpg; /* containing page table of the */
-+ /* former, if any */
- /* guest_table holds a ref to the page, and also a type-count unless
- * shadow refcounts are in use */
- pagetable_t shadow_table[4]; /* (MFN) shadow(s) of guest */
-diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
-index 119d7dec6b..445da50d47 100644
---- a/xen/include/asm-x86/mm.h
-+++ b/xen/include/asm-x86/mm.h
-@@ -124,11 +124,11 @@ struct page_info
- u32 tlbflush_timestamp;
-
- /*
-- * When PGT_partial is true then this field is valid and indicates
-- * that PTEs in the range [0, @nr_validated_ptes) have been validated.
-- * An extra page reference must be acquired (or not dropped) whenever
-- * PGT_partial gets set, and it must be dropped when the flag gets
-- * cleared. This is so that a get() leaving a page in partially
-+ * When PGT_partial is true then the first two fields are valid and
-+ * indicate that PTEs in the range [0, @nr_validated_ptes) have been
-+ * validated. An extra page reference must be acquired (or not dropped)
-+ * whenever PGT_partial gets set, and it must be dropped when the flag
-+ * gets cleared. This is so that a get() leaving a page in partially
- * validated state (where the caller would drop the reference acquired
- * due to the getting of the type [apparently] failing [-ERESTART])
- * would not accidentally result in a page left with zero general
-@@ -152,10 +152,18 @@ struct page_info
- * put_page_from_lNe() (due to the apparent failure), and hence it
- * must be dropped when the put operation is resumed (and completes),
- * but it must not be acquired if picking up the page for validation.
-+ *
-+ * The 3rd field, @linear_pt_count, indicates
-+ * - by a positive value, how many same-level page table entries a page
-+ * table has,
-+ * - by a negative value, in how many same-level page tables a page is
-+ * in use.
- */
- struct {
-- u16 nr_validated_ptes;
-- s8 partial_pte;
-+ u16 nr_validated_ptes:PAGETABLE_ORDER + 1;
-+ u16 :16 - PAGETABLE_ORDER - 1 - 2;
-+ s16 partial_pte:2;
-+ s16 linear_pt_count;
- };
-
- /*
-@@ -206,6 +214,9 @@ struct page_info
- #define PGT_count_width PG_shift(9)
- #define PGT_count_mask ((1UL<<PGT_count_width)-1)
-
-+/* Are the 'type mask' bits identical? */
-+#define PGT_type_equal(x, y) (!(((x) ^ (y)) & PGT_type_mask))
-+
- /* Cleared when the owning guest 'frees' this page. */
- #define _PGC_allocated PG_shift(1)
- #define PGC_allocated PG_mask(1, 1)
---
-2.14.1
-