summaryrefslogtreecommitdiffstats
path: root/system/xen/xsa/xsa401-4.16-1.patch
diff options
context:
space:
mode:
Diffstat (limited to 'system/xen/xsa/xsa401-4.16-1.patch')
-rw-r--r--system/xen/xsa/xsa401-4.16-1.patch170
1 files changed, 170 insertions, 0 deletions
diff --git a/system/xen/xsa/xsa401-4.16-1.patch b/system/xen/xsa/xsa401-4.16-1.patch
new file mode 100644
index 0000000000..5c8c50617a
--- /dev/null
+++ b/system/xen/xsa/xsa401-4.16-1.patch
@@ -0,0 +1,170 @@
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Subject: x86/pv: Clean up _get_page_type()
+
+Various fixes for clarity, ahead of making complicated changes.
+
+ * Split the overflow check out of the if/else chain for type handling, as
+ it's somewhat unrelated.
+ * Comment the main if/else chain to explain what is going on. Adjust one
+ ASSERT() and state the bit layout for validate-locked and partial states.
+ * Correct the comment about TLB flushing, as it's backwards. The problem
+ case is when writeable mappings are retained to a page becoming read-only,
+ as it allows the guest to bypass Xen's safety checks for updates.
+ * Reduce the scope of 'y'. It is an artefact of the cmpxchg loop and not
+ valid for use by subsequent logic. Switch to using ACCESS_ONCE() to treat
+ all reads as explicitly volatile. The only thing preventing the validated
+ wait-loop being infinite is the compiler barrier hidden in cpu_relax().
+ * Replace one page_get_owner(page) with the already-calculated 'd' already in
+ scope.
+
+No functional change.
+
+This is part of XSA-401 / CVE-2022-26362.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: George Dunlap <george.dunlap@citrix.com>
+
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index 796faca64103..ddd32f88c798 100644
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -2935,16 +2935,17 @@ static int _put_page_type(struct page_info *page, unsigned int flags,
+ static int _get_page_type(struct page_info *page, unsigned long type,
+ bool preemptible)
+ {
+- unsigned long nx, x, y = page->u.inuse.type_info;
++ unsigned long nx, x;
+ int rc = 0;
+
+ ASSERT(!(type & ~(PGT_type_mask | PGT_pae_xen_l2)));
+ ASSERT(!in_irq());
+
+- for ( ; ; )
++ for ( unsigned long y = ACCESS_ONCE(page->u.inuse.type_info); ; )
+ {
+ x = y;
+ nx = x + 1;
++
+ if ( unlikely((nx & PGT_count_mask) == 0) )
+ {
+ gdprintk(XENLOG_WARNING,
+@@ -2952,8 +2953,15 @@ static int _get_page_type(struct page_info *page, unsigned long type,
+ mfn_x(page_to_mfn(page)));
+ return -EINVAL;
+ }
+- else if ( unlikely((x & PGT_count_mask) == 0) )
++
++ if ( unlikely((x & PGT_count_mask) == 0) )
+ {
++ /*
++ * Typeref 0 -> 1.
++ *
++ * Type changes are permitted when the typeref is 0. If the type
++ * actually changes, the page needs re-validating.
++ */
+ struct domain *d = page_get_owner(page);
+
+ if ( d && shadow_mode_enabled(d) )
+@@ -2964,8 +2972,8 @@ static int _get_page_type(struct page_info *page, unsigned long type,
+ {
+ /*
+ * On type change we check to flush stale TLB entries. It is
+- * vital that no other CPUs are left with mappings of a frame
+- * which is about to become writeable to the guest.
++ * vital that no other CPUs are left with writeable mappings
++ * to a frame which is intending to become pgtable/segdesc.
+ */
+ cpumask_t *mask = this_cpu(scratch_cpumask);
+
+@@ -2977,7 +2985,7 @@ static int _get_page_type(struct page_info *page, unsigned long type,
+
+ if ( unlikely(!cpumask_empty(mask)) &&
+ /* Shadow mode: track only writable pages. */
+- (!shadow_mode_enabled(page_get_owner(page)) ||
++ (!shadow_mode_enabled(d) ||
+ ((nx & PGT_type_mask) == PGT_writable_page)) )
+ {
+ perfc_incr(need_flush_tlb_flush);
+@@ -3008,7 +3016,14 @@ static int _get_page_type(struct page_info *page, unsigned long type,
+ }
+ else if ( unlikely((x & (PGT_type_mask|PGT_pae_xen_l2)) != type) )
+ {
+- /* Don't log failure if it could be a recursive-mapping attempt. */
++ /*
++ * else, we're trying to take a new reference, of the wrong type.
++ *
++ * This (being able to prohibit use of the wrong type) is what the
++ * typeref system exists for, but skip printing the failure if it
++ * looks like a recursive mapping, as subsequent logic might
++ * ultimately permit the attempt.
++ */
+ if ( ((x & PGT_type_mask) == PGT_l2_page_table) &&
+ (type == PGT_l1_page_table) )
+ return -EINVAL;
+@@ -3027,18 +3042,46 @@ static int _get_page_type(struct page_info *page, unsigned long type,
+ }
+ else if ( unlikely(!(x & PGT_validated)) )
+ {
++ /*
++ * else, the count is non-zero, and we're grabbing the right type;
++ * but the page hasn't been validated yet.
++ *
++ * The page is in one of two states (depending on PGT_partial),
++ * and should have exactly one reference.
++ */
++ ASSERT((x & (PGT_type_mask | PGT_count_mask)) == (type | 1));
++
+ if ( !(x & PGT_partial) )
+ {
+- /* Someone else is updating validation of this page. Wait... */
++ /*
++ * The page has been left in the "validate locked" state
++ * (i.e. PGT_[type] | 1) which means that a concurrent caller
++ * of _get_page_type() is in the middle of validation.
++ *
++ * Spin waiting for the concurrent user to complete (partial
++ * or fully validated), then restart our attempt to acquire a
++ * type reference.
++ */
+ do {
+ if ( preemptible && hypercall_preempt_check() )
+ return -EINTR;
+ cpu_relax();
+- } while ( (y = page->u.inuse.type_info) == x );
++ } while ( (y = ACCESS_ONCE(page->u.inuse.type_info)) == x );
+ continue;
+ }
+- /* Type ref count was left at 1 when PGT_partial got set. */
+- ASSERT((x & PGT_count_mask) == 1);
++
++ /*
++ * The page has been left in the "partial" state
++ * (i.e., PGT_[type] | PGT_partial | 1).
++ *
++ * Rather than bumping the type count, we need to try to grab the
++ * validation lock; if we succeed, we need to validate the page,
++ * then drop the general ref associated with the PGT_partial bit.
++ *
++ * We grab the validation lock by setting nx to (PGT_[type] | 1)
++ * (i.e., non-zero type count, neither PGT_validated nor
++ * PGT_partial set).
++ */
+ nx = x & ~PGT_partial;
+ }
+
+@@ -3087,6 +3130,13 @@ static int _get_page_type(struct page_info *page, unsigned long type,
+ }
+
+ out:
++ /*
++ * Did we drop the PGT_partial bit when acquiring the typeref? If so,
++ * drop the general reference that went along with it.
++ *
++ * N.B. validate_page() may have have re-set PGT_partial, not reflected in
++ * nx, but will have taken an extra ref when doing so.
++ */
+ if ( (x & PGT_partial) && !(nx & PGT_partial) )
+ put_page(page);
+