diff options
Diffstat (limited to 'system/xen/xsa/xsa299-4.12-0011-x86-mm-Don-t-drop-a-type-ref-unless-you-held-a-ref-t.patch')
-rw-r--r-- | system/xen/xsa/xsa299-4.12-0011-x86-mm-Don-t-drop-a-type-ref-unless-you-held-a-ref-t.patch | 413 |
1 files changed, 0 insertions, 413 deletions
diff --git a/system/xen/xsa/xsa299-4.12-0011-x86-mm-Don-t-drop-a-type-ref-unless-you-held-a-ref-t.patch b/system/xen/xsa/xsa299-4.12-0011-x86-mm-Don-t-drop-a-type-ref-unless-you-held-a-ref-t.patch deleted file mode 100644 index ad7e6fee1b..0000000000 --- a/system/xen/xsa/xsa299-4.12-0011-x86-mm-Don-t-drop-a-type-ref-unless-you-held-a-ref-t.patch +++ /dev/null @@ -1,413 +0,0 @@ -From ea3dc624c5e6325a9c2f079e52a85965d4ab6ce8 Mon Sep 17 00:00:00 2001 -From: George Dunlap <george.dunlap@citrix.com> -Date: Thu, 10 Oct 2019 17:57:50 +0100 -Subject: [PATCH 11/11] x86/mm: Don't drop a type ref unless you held a ref to - begin with - -Validation and de-validation of pagetable trees may take arbitrarily -large amounts of time, and so must be preemptible. This is indicated -by setting the PGT_partial bit in the type_info, and setting -nr_validated_entries and partial_flags appropriately. Specifically, -if the entry at [nr_validated_entries] is partially validated, -partial_flags should have the PGT_partial_set bit set, and the entry -should hold a general reference count. During de-validation, -put_page_type() is called on partially validated entries. - -Unfortunately, there are a number of issues with the current algorithm. - -First, doing a "normal" put_page_type() is not safe when no type ref -is held: there is nothing to stop another vcpu from coming along and -picking up validation again: at which point the put_page_type may drop -the only page ref on an in-use page. Some examples are listed in the -appendix. - -The core issue is that put_page_type() is being called both to clean -up PGT_partial, and to drop a type count; and has no way of knowing -which is which; and so if in between, PGT_partial is cleared, -put_page_type() will drop the type ref erroneously. - -What is needed is to distinguish between two states: -- Dropping a type ref which is held -- Cleaning up a page which has been partially de/validated - -Fix this by telling put_page_type() which of the two activities you -intend. - -When cleaning up a partial de/validation, take no action unless you -find a page partially validated. - -If put_page_type() is called without PTF_partial_set, and finds the -page in a PGT_partial state anyway, then there's certainly been a -misaccounting somewhere, and carrying on would almost certainly cause -a security issue, so crash the host instead. - -In put_page_from_lNe, pass partial_flags on to _put_page_type(). - -old_guest_table may be set either with a fully validated page (when -using the "deferred put" pattern), or with a partially validated page -(when a normal "de-validation" is interrupted, or when a validation -fails part-way through due to invalid entries). Add a flag, -old_guest_table_partial, to indicate which of these it is, and use -that to pass the appropriate flag to _put_page_type(). - -While here, delete stray trailing whitespace. - -This is part of XSA-299. - -Reported-by: George Dunlap <george.dunlap@citrix.com> -Signed-off-by: George Dunlap <george.dunlap@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> ------ -Appendix: - -Suppose page A, when interpreted as an l3 pagetable, contains all -valid entries; and suppose A[x] points to page B, which when -interpreted as an l2 pagetable, contains all valid entries. - -P1: PIN_L3_TABLE - A -> PGT_l3_table | 1 | valid - B -> PGT_l2_table | 1 | valid - -P1: UNPIN_TABLE - > Arrange to interrupt after B has been de-validated - B: - type_info -> PGT_l2_table | 0 - A: - type_info -> PGT_l3_table | 1 | partial - nr_validated_enties -> (less than x) - -P2: mod_l4_entry to point to A - > Arrange for this to be interrupted while B is being validated - B: - type_info -> PGT_l2_table | 1 | partial - (nr_validated_entires &c set as appropriate) - A: - type_info -> PGT_l3_table | 1 | partial - nr_validated_entries -> x - partial_pte = 1 - -P3: mod_l3_entry some other unrelated l3 to point to B: - B: - type_info -> PGT_l2_table | 1 - -P1: Restart UNPIN_TABLE - -At this point, since A.nr_validate_entries == x and A.partial_pte != -0, free_l3_table() will call put_page_from_l3e() on pl3e[x], dropping -its type count to 0 while it's still being pointed to by some other l3 - -A similar issue arises with old_guest_table. Consider the following -scenario: - -Suppose A is a page which, when interpreted as an l2, has valid entries -until entry x, which is invalid. - -V1: PIN_L2_TABLE(A) - <Validate until we try to validate [x], get -EINVAL> - A -> PGT_l2_table | 1 | PGT_partial - V1 -> old_guest_table = A - <delayed> - -V2: PIN_L2_TABLE(A) - <Pick up where V1 left off, try to re-validate [x], get -EINVAL> - A -> PGT_l2_table | 1 | PGT_partial - V2 -> old_guest_table = A - <restart> - put_old_guest_table() - _put_page_type(A) - A -> PGT_l2_table | 0 - -V1: <restart> - put_old_guest_table() - _put_page_type(A) # UNDERFLOW - -Indeed, it is possible to engineer for old_guest_table for every vcpu -a guest has to point to the same page. ---- - xen/arch/x86/domain.c | 6 +++ - xen/arch/x86/mm.c | 99 +++++++++++++++++++++++++++++++----- - xen/include/asm-x86/domain.h | 4 +- - 3 files changed, 95 insertions(+), 14 deletions(-) - -diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c -index 59df8a6d8d..f1ae5f89f5 100644 ---- a/xen/arch/x86/domain.c -+++ b/xen/arch/x86/domain.c -@@ -1104,9 +1104,15 @@ int arch_set_info_guest( - rc = -ERESTART; - /* Fallthrough */ - case -ERESTART: -+ /* -+ * NB that we're putting the kernel-mode table -+ * here, which we've already successfully -+ * validated above; hence partial = false; -+ */ - v->arch.old_guest_ptpg = NULL; - v->arch.old_guest_table = - pagetable_get_page(v->arch.guest_table); -+ v->arch.old_guest_table_partial = false; - v->arch.guest_table = pagetable_null(); - break; - default: -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index a432e69c74..81774368a0 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -1359,10 +1359,11 @@ static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn, - { - current->arch.old_guest_ptpg = ptpg; - current->arch.old_guest_table = pg; -+ current->arch.old_guest_table_partial = false; - } - else - { -- rc = _put_page_type(pg, PTF_preemptible, ptpg); -+ rc = _put_page_type(pg, flags | PTF_preemptible, ptpg); - if ( likely(!rc) ) - put_page(pg); - } -@@ -1385,6 +1386,7 @@ static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn, - unsigned long mfn = l3e_get_pfn(l3e); - bool writeable = l3e_get_flags(l3e) & _PAGE_RW; - -+ ASSERT(!(flags & PTF_partial_set)); - ASSERT(!(mfn & ((1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1))); - do { - put_data_page(mfn_to_page(_mfn(mfn)), writeable); -@@ -1397,12 +1399,14 @@ static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn, - - if ( flags & PTF_defer ) - { -+ ASSERT(!(flags & PTF_partial_set)); - current->arch.old_guest_ptpg = mfn_to_page(_mfn(pfn)); - current->arch.old_guest_table = pg; -+ current->arch.old_guest_table_partial = false; - return 0; - } - -- rc = _put_page_type(pg, PTF_preemptible, mfn_to_page(_mfn(pfn))); -+ rc = _put_page_type(pg, flags | PTF_preemptible, mfn_to_page(_mfn(pfn))); - if ( likely(!rc) ) - put_page(pg); - -@@ -1421,12 +1425,15 @@ static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn, - - if ( flags & PTF_defer ) - { -+ ASSERT(!(flags & PTF_partial_set)); - current->arch.old_guest_ptpg = mfn_to_page(_mfn(pfn)); - current->arch.old_guest_table = pg; -+ current->arch.old_guest_table_partial = false; - return 0; - } - -- rc = _put_page_type(pg, PTF_preemptible, mfn_to_page(_mfn(pfn))); -+ rc = _put_page_type(pg, flags | PTF_preemptible, -+ mfn_to_page(_mfn(pfn))); - if ( likely(!rc) ) - put_page(pg); - } -@@ -1535,6 +1542,14 @@ static int alloc_l2_table(struct page_info *page, unsigned long type) - - pl2e = map_domain_page(_mfn(pfn)); - -+ /* -+ * NB that alloc_l2_table will never set partial_pte on an l2; but -+ * free_l2_table might if a linear_pagetable entry is interrupted -+ * partway through de-validation. In that circumstance, -+ * get_page_from_l2e() will always return -EINVAL; and we must -+ * retain the type ref by doing the normal partial_flags tracking. -+ */ -+ - for ( i = page->nr_validated_ptes; i < L2_PAGETABLE_ENTRIES; - i++, partial_flags = 0 ) - { -@@ -1598,6 +1613,7 @@ static int alloc_l2_table(struct page_info *page, unsigned long type) - page->partial_flags = partial_flags; - current->arch.old_guest_ptpg = NULL; - current->arch.old_guest_table = page; -+ current->arch.old_guest_table_partial = true; - } - } - if ( rc < 0 ) -@@ -1704,12 +1720,16 @@ static int alloc_l3_table(struct page_info *page) - * builds. - */ - if ( current->arch.old_guest_table == l3e_get_page(l3e) ) -+ { -+ ASSERT(current->arch.old_guest_table_partial); - page->partial_flags = PTF_partial_set; -+ } - else - ASSERT_UNREACHABLE(); - } - current->arch.old_guest_ptpg = NULL; - current->arch.old_guest_table = page; -+ current->arch.old_guest_table_partial = true; - } - while ( i-- > 0 ) - pl3e[i] = unadjust_guest_l3e(pl3e[i], d); -@@ -1897,12 +1917,16 @@ static int alloc_l4_table(struct page_info *page) - * builds. - */ - if ( current->arch.old_guest_table == l4e_get_page(l4e) ) -+ { -+ ASSERT(current->arch.old_guest_table_partial); - page->partial_flags = PTF_partial_set; -+ } - else - ASSERT_UNREACHABLE(); - } - current->arch.old_guest_ptpg = NULL; - current->arch.old_guest_table = page; -+ current->arch.old_guest_table_partial = true; - } - } - } -@@ -2831,6 +2855,28 @@ static int _put_page_type(struct page_info *page, unsigned int flags, - x = y; - nx = x - 1; - -+ /* -+ * Is this expected to do a full reference drop, or only -+ * cleanup partial validation / devalidation? -+ * -+ * If the former, the caller must hold a "full" type ref; -+ * which means the page must be validated. If the page is -+ * *not* fully validated, continuing would almost certainly -+ * open up a security hole. An exception to this is during -+ * domain destruction, where PGT_validated can be dropped -+ * without dropping a type ref. -+ * -+ * If the latter, do nothing unless type PGT_partial is set. -+ * If it is set, the type count must be 1. -+ */ -+ if ( !(flags & PTF_partial_set) ) -+ BUG_ON((x & PGT_partial) || -+ !((x & PGT_validated) || page_get_owner(page)->is_dying)); -+ else if ( !(x & PGT_partial) ) -+ return 0; -+ else -+ BUG_ON((x & PGT_count_mask) != 1); -+ - ASSERT((x & PGT_count_mask) != 0); - - switch ( nx & (PGT_locked | PGT_count_mask) ) -@@ -3092,17 +3138,34 @@ int put_old_guest_table(struct vcpu *v) - if ( !v->arch.old_guest_table ) - return 0; - -- switch ( rc = _put_page_type(v->arch.old_guest_table, PTF_preemptible, -- v->arch.old_guest_ptpg) ) -+ rc = _put_page_type(v->arch.old_guest_table, -+ PTF_preemptible | -+ ( v->arch.old_guest_table_partial ? -+ PTF_partial_set : 0 ), -+ v->arch.old_guest_ptpg); -+ -+ if ( rc == -ERESTART || rc == -EINTR ) - { -- case -EINTR: -- case -ERESTART: -+ v->arch.old_guest_table_partial = (rc == -ERESTART); - return -ERESTART; -- case 0: -- put_page(v->arch.old_guest_table); - } - -+ /* -+ * It shouldn't be possible for _put_page_type() to return -+ * anything else at the moment; but if it does happen in -+ * production, leaking the type ref is probably the best thing to -+ * do. Either way, drop the general ref held by old_guest_table. -+ */ -+ ASSERT(rc == 0); -+ -+ put_page(v->arch.old_guest_table); - v->arch.old_guest_table = NULL; -+ v->arch.old_guest_ptpg = NULL; -+ /* -+ * Safest default if someone sets old_guest_table without -+ * explicitly setting old_guest_table_partial. -+ */ -+ v->arch.old_guest_table_partial = true; - - return rc; - } -@@ -3253,11 +3316,11 @@ int new_guest_cr3(mfn_t mfn) - switch ( rc = put_page_and_type_preemptible(page) ) - { - case -EINTR: -- rc = -ERESTART; -- /* fallthrough */ - case -ERESTART: - curr->arch.old_guest_ptpg = NULL; - curr->arch.old_guest_table = page; -+ curr->arch.old_guest_table_partial = (rc == -ERESTART); -+ rc = -ERESTART; - break; - default: - BUG_ON(rc); -@@ -3494,6 +3557,7 @@ long do_mmuext_op( - { - curr->arch.old_guest_ptpg = NULL; - curr->arch.old_guest_table = page; -+ curr->arch.old_guest_table_partial = false; - } - } - } -@@ -3528,6 +3592,11 @@ long do_mmuext_op( - case -ERESTART: - curr->arch.old_guest_ptpg = NULL; - curr->arch.old_guest_table = page; -+ /* -+ * EINTR means we still hold the type ref; ERESTART -+ * means PGT_partial holds the type ref -+ */ -+ curr->arch.old_guest_table_partial = (rc == -ERESTART); - rc = 0; - break; - default: -@@ -3596,11 +3665,15 @@ long do_mmuext_op( - switch ( rc = put_page_and_type_preemptible(page) ) - { - case -EINTR: -- rc = -ERESTART; -- /* fallthrough */ - case -ERESTART: - curr->arch.old_guest_ptpg = NULL; - curr->arch.old_guest_table = page; -+ /* -+ * EINTR means we still hold the type ref; -+ * ERESTART means PGT_partial holds the ref -+ */ -+ curr->arch.old_guest_table_partial = (rc == -ERESTART); -+ rc = -ERESTART; - break; - default: - BUG_ON(rc); -diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h -index 214e44ce1c..2cfce7b36b 100644 ---- a/xen/include/asm-x86/domain.h -+++ b/xen/include/asm-x86/domain.h -@@ -307,7 +307,7 @@ struct arch_domain - - struct paging_domain paging; - struct p2m_domain *p2m; -- /* To enforce lock ordering in the pod code wrt the -+ /* To enforce lock ordering in the pod code wrt the - * page_alloc lock */ - int page_alloc_unlock_level; - -@@ -581,6 +581,8 @@ struct arch_vcpu - struct page_info *old_guest_table; /* partially destructed pagetable */ - struct page_info *old_guest_ptpg; /* containing page table of the */ - /* former, if any */ -+ bool old_guest_table_partial; /* Are we dropping a type ref, or just -+ * finishing up a partial de-validation? */ - /* guest_table holds a ref to the page, and also a type-count unless - * shadow refcounts are in use */ - pagetable_t shadow_table[4]; /* (MFN) shadow(s) of guest */ --- -2.23.0 - |