summaryrefslogtreecommitdiffstats
path: root/system/xen/xsa
diff options
context:
space:
mode:
Diffstat (limited to 'system/xen/xsa')
-rw-r--r--system/xen/xsa/xsa317.patch50
-rw-r--r--system/xen/xsa/xsa319.patch27
-rw-r--r--system/xen/xsa/xsa320-4.13-1.patch117
-rw-r--r--system/xen/xsa/xsa320-4.13-2.patch179
-rw-r--r--system/xen/xsa/xsa320-4.13-3.patch36
-rw-r--r--system/xen/xsa/xsa327.patch63
-rw-r--r--system/xen/xsa/xsa328-4.13-1.patch118
-rw-r--r--system/xen/xsa/xsa328-4.13-2.patch48
-rw-r--r--system/xen/xsa/xsa328-post-xsa321-4.13-1.patch31
-rw-r--r--system/xen/xsa/xsa328-post-xsa321-4.13-2.patch175
-rw-r--r--system/xen/xsa/xsa328-post-xsa321-4.13-3.patch82
-rw-r--r--system/xen/xsa/xsa328-post-xsa321-4.13-4.patch36
-rw-r--r--system/xen/xsa/xsa328-post-xsa321-4.13-5.patch24
-rw-r--r--system/xen/xsa/xsa328-post-xsa321-4.13-6.patch91
-rw-r--r--system/xen/xsa/xsa328-post-xsa321-4.13-7.patch153
-rw-r--r--system/xen/xsa/xsa333.patch39
-rw-r--r--system/xen/xsa/xsa334.patch51
-rw-r--r--system/xen/xsa/xsa335-qemu.patch84
-rw-r--r--system/xen/xsa/xsa336.patch283
-rw-r--r--system/xen/xsa/xsa337-4.13-1.patch87
-rw-r--r--system/xen/xsa/xsa337-4.13-2.patch181
-rw-r--r--system/xen/xsa/xsa338.patch42
-rw-r--r--system/xen/xsa/xsa339.patch76
-rw-r--r--system/xen/xsa/xsa340.patch65
-rw-r--r--system/xen/xsa/xsa342-4.13.patch145
-rw-r--r--system/xen/xsa/xsa343-1.patch199
-rw-r--r--system/xen/xsa/xsa343-2.patch295
-rw-r--r--system/xen/xsa/xsa343-3.patch392
-rw-r--r--system/xen/xsa/xsa344-4.13-1.patch130
-rw-r--r--system/xen/xsa/xsa344-4.13-2.patch203
-rw-r--r--system/xen/xsa/xsa345-0001-x86-mm-Refactor-map_pages_to_xen-to-have-only-a-sing.patch94
-rw-r--r--system/xen/xsa/xsa345-0002-x86-mm-Refactor-modify_xen_mappings-to-have-one-exit.patch68
-rw-r--r--system/xen/xsa/xsa345-0003-x86-mm-Prevent-some-races-in-hypervisor-mapping-upda.patch249
-rw-r--r--system/xen/xsa/xsa346-4.13-1.patch50
-rw-r--r--system/xen/xsa/xsa346-4.13-2.patch204
-rw-r--r--system/xen/xsa/xsa347-4.13-1.patch149
-rw-r--r--system/xen/xsa/xsa347-4.13-2.patch72
-rw-r--r--system/xen/xsa/xsa347-4.13-3.patch59
38 files changed, 0 insertions, 4447 deletions
diff --git a/system/xen/xsa/xsa317.patch b/system/xen/xsa/xsa317.patch
deleted file mode 100644
index 20e2c643d0..0000000000
--- a/system/xen/xsa/xsa317.patch
+++ /dev/null
@@ -1,50 +0,0 @@
-From aeb46e92f915f19a61d5a8a1f4b696793f64e6fb Mon Sep 17 00:00:00 2001
-From: Julien Grall <jgrall@amazon.com>
-Date: Thu, 19 Mar 2020 13:17:31 +0000
-Subject: [PATCH] xen/common: event_channel: Don't ignore error in
- get_free_port()
-
-Currently, get_free_port() is assuming that the port has been allocated
-when evtchn_allocate_port() is not return -EBUSY.
-
-However, the function may return an error when:
- - We exhausted all the event channels. This can happen if the limit
- configured by the administrator for the guest ('max_event_channels'
- in xl cfg) is higher than the ABI used by the guest. For instance,
- if the guest is using 2L, the limit should not be higher than 4095.
- - We cannot allocate memory (e.g Xen has not more memory).
-
-Users of get_free_port() (such as EVTCHNOP_alloc_unbound) will validly
-assuming the port was valid and will next call evtchn_from_port(). This
-will result to a crash as the memory backing the event channel structure
-is not present.
-
-Fixes: 368ae9a05fe ("xen/pvshim: forward evtchn ops between L0 Xen and L2 DomU")
-Signed-off-by: Julien Grall <jgrall@amazon.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
----
- xen/common/event_channel.c | 8 ++++----
- 1 file changed, 4 insertions(+), 4 deletions(-)
-
-diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c
-index e86e2bfab0..a8d182b584 100644
---- a/xen/common/event_channel.c
-+++ b/xen/common/event_channel.c
-@@ -195,10 +195,10 @@ static int get_free_port(struct domain *d)
- {
- int rc = evtchn_allocate_port(d, port);
-
-- if ( rc == -EBUSY )
-- continue;
--
-- return port;
-+ if ( rc == 0 )
-+ return port;
-+ else if ( rc != -EBUSY )
-+ return rc;
- }
-
- return -ENOSPC;
---
-2.17.1
-
diff --git a/system/xen/xsa/xsa319.patch b/system/xen/xsa/xsa319.patch
deleted file mode 100644
index 769443c900..0000000000
--- a/system/xen/xsa/xsa319.patch
+++ /dev/null
@@ -1,27 +0,0 @@
-From: Jan Beulich <jbeulich@suse.com>
-Subject: x86/shadow: correct an inverted conditional in dirty VRAM tracking
-
-This originally was "mfn_x(mfn) == INVALID_MFN". Make it like this
-again, taking the opportunity to also drop the unnecessary nearby
-braces.
-
-This is XSA-319.
-
-Fixes: 246a5a3377c2 ("xen: Use a typesafe to define INVALID_MFN")
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
-
---- a/xen/arch/x86/mm/shadow/common.c
-+++ b/xen/arch/x86/mm/shadow/common.c
-@@ -3252,10 +3252,8 @@ int shadow_track_dirty_vram(struct domai
- int dirty = 0;
- paddr_t sl1ma = dirty_vram->sl1ma[i];
-
-- if ( !mfn_eq(mfn, INVALID_MFN) )
-- {
-+ if ( mfn_eq(mfn, INVALID_MFN) )
- dirty = 1;
-- }
- else
- {
- page = mfn_to_page(mfn);
diff --git a/system/xen/xsa/xsa320-4.13-1.patch b/system/xen/xsa/xsa320-4.13-1.patch
deleted file mode 100644
index 09eb8ea98e..0000000000
--- a/system/xen/xsa/xsa320-4.13-1.patch
+++ /dev/null
@@ -1,117 +0,0 @@
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Subject: x86/spec-ctrl: CPUID/MSR definitions for Special Register Buffer Data Sampling
-
-This is part of XSA-320 / CVE-2020-0543
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-Acked-by: Wei Liu <wl@xen.org>
-
-diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
-index 1d9d816622..9268454297 100644
---- a/docs/misc/xen-command-line.pandoc
-+++ b/docs/misc/xen-command-line.pandoc
-@@ -483,10 +483,10 @@ accounting for hardware capabilities as enumerated via CPUID.
-
- Currently accepted:
-
--The Speculation Control hardware features `md-clear`, `ibrsb`, `stibp`, `ibpb`,
--`l1d-flush` and `ssbd` are used by default if available and applicable. They can
--be ignored, e.g. `no-ibrsb`, at which point Xen won't use them itself, and
--won't offer them to guests.
-+The Speculation Control hardware features `srbds-ctrl`, `md-clear`, `ibrsb`,
-+`stibp`, `ibpb`, `l1d-flush` and `ssbd` are used by default if available and
-+applicable. They can be ignored, e.g. `no-ibrsb`, at which point Xen won't
-+use them itself, and won't offer them to guests.
-
- ### cpuid_mask_cpu
- > `= fam_0f_rev_[cdefg] | fam_10_rev_[bc] | fam_11_rev_b`
-diff --git a/tools/libxl/libxl_cpuid.c b/tools/libxl/libxl_cpuid.c
-index 6cea4227ba..a78f08b927 100644
---- a/tools/libxl/libxl_cpuid.c
-+++ b/tools/libxl/libxl_cpuid.c
-@@ -213,6 +213,7 @@ int libxl_cpuid_parse_config(libxl_cpuid_policy_list *cpuid, const char* str)
-
- {"avx512-4vnniw",0x00000007, 0, CPUID_REG_EDX, 2, 1},
- {"avx512-4fmaps",0x00000007, 0, CPUID_REG_EDX, 3, 1},
-+ {"srbds-ctrl", 0x00000007, 0, CPUID_REG_EDX, 9, 1},
- {"md-clear", 0x00000007, 0, CPUID_REG_EDX, 10, 1},
- {"cet-ibt", 0x00000007, 0, CPUID_REG_EDX, 20, 1},
- {"ibrsb", 0x00000007, 0, CPUID_REG_EDX, 26, 1},
-diff --git a/tools/misc/xen-cpuid.c b/tools/misc/xen-cpuid.c
-index 603e1d65fd..a09440813b 100644
---- a/tools/misc/xen-cpuid.c
-+++ b/tools/misc/xen-cpuid.c
-@@ -157,6 +157,7 @@ static const char *const str_7d0[32] =
- [ 2] = "avx512_4vnniw", [ 3] = "avx512_4fmaps",
- [ 4] = "fsrm",
-
-+ /* 8 */ [ 9] = "srbds-ctrl",
- [10] = "md-clear",
- /* 12 */ [13] = "tsx-force-abort",
-
-diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c
-index 4b12103482..0cded3c0ad 100644
---- a/xen/arch/x86/msr.c
-+++ b/xen/arch/x86/msr.c
-@@ -134,6 +134,7 @@ int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val)
- /* Write-only */
- case MSR_TSX_FORCE_ABORT:
- case MSR_TSX_CTRL:
-+ case MSR_MCU_OPT_CTRL:
- case MSR_U_CET:
- case MSR_S_CET:
- case MSR_PL0_SSP ... MSR_INTERRUPT_SSP_TABLE:
-@@ -288,6 +289,7 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val)
- /* Read-only */
- case MSR_TSX_FORCE_ABORT:
- case MSR_TSX_CTRL:
-+ case MSR_MCU_OPT_CTRL:
- case MSR_U_CET:
- case MSR_S_CET:
- case MSR_PL0_SSP ... MSR_INTERRUPT_SSP_TABLE:
-diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
-index 6656c44aec..5fc1c6827e 100644
---- a/xen/arch/x86/spec_ctrl.c
-+++ b/xen/arch/x86/spec_ctrl.c
-@@ -312,12 +312,13 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps)
- printk("Speculative mitigation facilities:\n");
-
- /* Hardware features which pertain to speculative mitigations. */
-- printk(" Hardware features:%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
-+ printk(" Hardware features:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
- (_7d0 & cpufeat_mask(X86_FEATURE_IBRSB)) ? " IBRS/IBPB" : "",
- (_7d0 & cpufeat_mask(X86_FEATURE_STIBP)) ? " STIBP" : "",
- (_7d0 & cpufeat_mask(X86_FEATURE_L1D_FLUSH)) ? " L1D_FLUSH" : "",
- (_7d0 & cpufeat_mask(X86_FEATURE_SSBD)) ? " SSBD" : "",
- (_7d0 & cpufeat_mask(X86_FEATURE_MD_CLEAR)) ? " MD_CLEAR" : "",
-+ (_7d0 & cpufeat_mask(X86_FEATURE_SRBDS_CTRL)) ? " SRBDS_CTRL" : "",
- (e8b & cpufeat_mask(X86_FEATURE_IBPB)) ? " IBPB" : "",
- (caps & ARCH_CAPS_IBRS_ALL) ? " IBRS_ALL" : "",
- (caps & ARCH_CAPS_RDCL_NO) ? " RDCL_NO" : "",
-diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h
-index 7693c4a71a..91994669e1 100644
---- a/xen/include/asm-x86/msr-index.h
-+++ b/xen/include/asm-x86/msr-index.h
-@@ -179,6 +179,9 @@
- #define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x490
- #define MSR_IA32_VMX_VMFUNC 0x491
-
-+#define MSR_MCU_OPT_CTRL 0x00000123
-+#define MCU_OPT_CTRL_RNGDS_MITG_DIS (_AC(1, ULL) << 0)
-+
- #define MSR_U_CET 0x000006a0
- #define MSR_S_CET 0x000006a2
- #define MSR_PL0_SSP 0x000006a4
-diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h
-index 2835688f1c..a2482c3627 100644
---- a/xen/include/public/arch-x86/cpufeatureset.h
-+++ b/xen/include/public/arch-x86/cpufeatureset.h
-@@ -252,6 +252,7 @@ XEN_CPUFEATURE(IBPB, 8*32+12) /*A IBPB support only (no IBRS, used by
- /* Intel-defined CPU features, CPUID level 0x00000007:0.edx, word 9 */
- XEN_CPUFEATURE(AVX512_4VNNIW, 9*32+ 2) /*A AVX512 Neural Network Instructions */
- XEN_CPUFEATURE(AVX512_4FMAPS, 9*32+ 3) /*A AVX512 Multiply Accumulation Single Precision */
-+XEN_CPUFEATURE(SRBDS_CTRL, 9*32+ 9) /* MSR_MCU_OPT_CTRL and RNGDS_MITG_DIS. */
- XEN_CPUFEATURE(MD_CLEAR, 9*32+10) /*A VERW clears microarchitectural buffers */
- XEN_CPUFEATURE(TSX_FORCE_ABORT, 9*32+13) /* MSR_TSX_FORCE_ABORT.RTM_ABORT */
- XEN_CPUFEATURE(CET_IBT, 9*32+20) /* CET - Indirect Branch Tracking */
diff --git a/system/xen/xsa/xsa320-4.13-2.patch b/system/xen/xsa/xsa320-4.13-2.patch
deleted file mode 100644
index 8a8080a312..0000000000
--- a/system/xen/xsa/xsa320-4.13-2.patch
+++ /dev/null
@@ -1,179 +0,0 @@
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Subject: x86/spec-ctrl: Mitigate the Special Register Buffer Data Sampling sidechannel
-
-See patch documentation and comments.
-
-This is part of XSA-320 / CVE-2020-0543
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-
-diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
-index 9268454297..c780312531 100644
---- a/docs/misc/xen-command-line.pandoc
-+++ b/docs/misc/xen-command-line.pandoc
-@@ -1991,7 +1991,7 @@ By default SSBD will be mitigated at runtime (i.e `ssbd=runtime`).
- ### spec-ctrl (x86)
- > `= List of [ <bool>, xen=<bool>, {pv,hvm,msr-sc,rsb,md-clear}=<bool>,
- > bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,eager-fpu,
--> l1d-flush,branch-harden}=<bool> ]`
-+> l1d-flush,branch-harden,srb-lock}=<bool> ]`
-
- Controls for speculative execution sidechannel mitigations. By default, Xen
- will pick the most appropriate mitigations based on compiled in support,
-@@ -2068,6 +2068,12 @@ If Xen is compiled with `CONFIG_SPECULATIVE_HARDEN_BRANCH`, the
- speculation barriers to protect selected conditional branches. By default,
- Xen will enable this mitigation.
-
-+On hardware supporting SRBDS_CTRL, the `srb-lock=` option can be used to force
-+or prevent Xen from protect the Special Register Buffer from leaking stale
-+data. By default, Xen will enable this mitigation, except on parts where MDS
-+is fixed and TAA is fixed/mitigated (in which case, there is believed to be no
-+way for an attacker to obtain the stale data).
-+
- ### sync_console
- > `= <boolean>`
-
-diff --git a/xen/arch/x86/acpi/power.c b/xen/arch/x86/acpi/power.c
-index feb0f6ce20..75c6e34164 100644
---- a/xen/arch/x86/acpi/power.c
-+++ b/xen/arch/x86/acpi/power.c
-@@ -295,6 +295,9 @@ static int enter_state(u32 state)
- ci->spec_ctrl_flags |= (default_spec_ctrl_flags & SCF_ist_wrmsr);
- spec_ctrl_exit_idle(ci);
-
-+ if ( boot_cpu_has(X86_FEATURE_SRBDS_CTRL) )
-+ wrmsrl(MSR_MCU_OPT_CTRL, default_xen_mcu_opt_ctrl);
-+
- done:
- spin_debug_enable();
- local_irq_restore(flags);
-diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
-index dc8fdac1a1..b1e51b3aff 100644
---- a/xen/arch/x86/smpboot.c
-+++ b/xen/arch/x86/smpboot.c
-@@ -361,12 +361,14 @@ void start_secondary(void *unused)
- microcode_update_one(false);
-
- /*
-- * If MSR_SPEC_CTRL is available, apply Xen's default setting and discard
-- * any firmware settings. Note: MSR_SPEC_CTRL may only become available
-- * after loading microcode.
-+ * If any speculative control MSRs are available, apply Xen's default
-+ * settings. Note: These MSRs may only become available after loading
-+ * microcode.
- */
- if ( boot_cpu_has(X86_FEATURE_IBRSB) )
- wrmsrl(MSR_SPEC_CTRL, default_xen_spec_ctrl);
-+ if ( boot_cpu_has(X86_FEATURE_SRBDS_CTRL) )
-+ wrmsrl(MSR_MCU_OPT_CTRL, default_xen_mcu_opt_ctrl);
-
- tsx_init(); /* Needs microcode. May change HLE/RTM feature bits. */
-
-diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
-index 5fc1c6827e..33343062a7 100644
---- a/xen/arch/x86/spec_ctrl.c
-+++ b/xen/arch/x86/spec_ctrl.c
-@@ -65,6 +65,9 @@ static unsigned int __initdata l1d_maxphysaddr;
- static bool __initdata cpu_has_bug_msbds_only; /* => minimal HT impact. */
- static bool __initdata cpu_has_bug_mds; /* Any other M{LP,SB,FB}DS combination. */
-
-+static int8_t __initdata opt_srb_lock = -1;
-+uint64_t __read_mostly default_xen_mcu_opt_ctrl;
-+
- static int __init parse_spec_ctrl(const char *s)
- {
- const char *ss;
-@@ -112,6 +115,7 @@ static int __init parse_spec_ctrl(const char *s)
- opt_ssbd = false;
- opt_l1d_flush = 0;
- opt_branch_harden = false;
-+ opt_srb_lock = 0;
- }
- else if ( val > 0 )
- rc = -EINVAL;
-@@ -178,6 +182,8 @@ static int __init parse_spec_ctrl(const char *s)
- opt_l1d_flush = val;
- else if ( (val = parse_boolean("branch-harden", s, ss)) >= 0 )
- opt_branch_harden = val;
-+ else if ( (val = parse_boolean("srb-lock", s, ss)) >= 0 )
-+ opt_srb_lock = val;
- else
- rc = -EINVAL;
-
-@@ -341,7 +347,7 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps)
- "\n");
-
- /* Settings for Xen's protection, irrespective of guests. */
-- printk(" Xen settings: BTI-Thunk %s, SPEC_CTRL: %s%s%s, Other:%s%s%s%s\n",
-+ printk(" Xen settings: BTI-Thunk %s, SPEC_CTRL: %s%s%s, Other:%s%s%s%s%s\n",
- thunk == THUNK_NONE ? "N/A" :
- thunk == THUNK_RETPOLINE ? "RETPOLINE" :
- thunk == THUNK_LFENCE ? "LFENCE" :
-@@ -352,6 +358,8 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps)
- (default_xen_spec_ctrl & SPEC_CTRL_SSBD) ? " SSBD+" : " SSBD-",
- !(caps & ARCH_CAPS_TSX_CTRL) ? "" :
- (opt_tsx & 1) ? " TSX+" : " TSX-",
-+ !boot_cpu_has(X86_FEATURE_SRBDS_CTRL) ? "" :
-+ opt_srb_lock ? " SRB_LOCK+" : " SRB_LOCK-",
- opt_ibpb ? " IBPB" : "",
- opt_l1d_flush ? " L1D_FLUSH" : "",
- opt_md_clear_pv || opt_md_clear_hvm ? " VERW" : "",
-@@ -1149,6 +1157,34 @@ void __init init_speculation_mitigations(void)
- tsx_init();
- }
-
-+ /* Calculate suitable defaults for MSR_MCU_OPT_CTRL */
-+ if ( boot_cpu_has(X86_FEATURE_SRBDS_CTRL) )
-+ {
-+ uint64_t val;
-+
-+ rdmsrl(MSR_MCU_OPT_CTRL, val);
-+
-+ /*
-+ * On some SRBDS-affected hardware, it may be safe to relax srb-lock
-+ * by default.
-+ *
-+ * On parts which enumerate MDS_NO and not TAA_NO, TSX is the only way
-+ * to access the Fill Buffer. If TSX isn't available (inc. SKU
-+ * reasons on some models), or TSX is explicitly disabled, then there
-+ * is no need for the extra overhead to protect RDRAND/RDSEED.
-+ */
-+ if ( opt_srb_lock == -1 &&
-+ (caps & (ARCH_CAPS_MDS_NO|ARCH_CAPS_TAA_NO)) == ARCH_CAPS_MDS_NO &&
-+ (!cpu_has_hle || ((caps & ARCH_CAPS_TSX_CTRL) && opt_tsx == 0)) )
-+ opt_srb_lock = 0;
-+
-+ val &= ~MCU_OPT_CTRL_RNGDS_MITG_DIS;
-+ if ( !opt_srb_lock )
-+ val |= MCU_OPT_CTRL_RNGDS_MITG_DIS;
-+
-+ default_xen_mcu_opt_ctrl = val;
-+ }
-+
- print_details(thunk, caps);
-
- /*
-@@ -1180,6 +1216,9 @@ void __init init_speculation_mitigations(void)
-
- wrmsrl(MSR_SPEC_CTRL, bsp_delay_spec_ctrl ? 0 : default_xen_spec_ctrl);
- }
-+
-+ if ( boot_cpu_has(X86_FEATURE_SRBDS_CTRL) )
-+ wrmsrl(MSR_MCU_OPT_CTRL, default_xen_mcu_opt_ctrl);
- }
-
- static void __init __maybe_unused build_assertions(void)
-diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h
-index 9caecddfec..b252bb8631 100644
---- a/xen/include/asm-x86/spec_ctrl.h
-+++ b/xen/include/asm-x86/spec_ctrl.h
-@@ -54,6 +54,8 @@ extern int8_t opt_pv_l1tf_hwdom, opt_pv_l1tf_domu;
- */
- extern paddr_t l1tf_addr_mask, l1tf_safe_maddr;
-
-+extern uint64_t default_xen_mcu_opt_ctrl;
-+
- static inline void init_shadow_spec_ctrl_state(void)
- {
- struct cpu_info *info = get_cpu_info();
diff --git a/system/xen/xsa/xsa320-4.13-3.patch b/system/xen/xsa/xsa320-4.13-3.patch
deleted file mode 100644
index b52eeb338a..0000000000
--- a/system/xen/xsa/xsa320-4.13-3.patch
+++ /dev/null
@@ -1,36 +0,0 @@
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Subject: x86/spec-ctrl: Update docs with SRBDS workaround
-
-RDRAND/RDSEED can be hidden using cpuid= to mitigate SRBDS if microcode
-isn't available.
-
-This is part of XSA-320 / CVE-2020-0543.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Acked-by: Julien Grall <jgrall@amazon.com>
-
-diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
-index c780312531..81e12d053c 100644
---- a/docs/misc/xen-command-line.pandoc
-+++ b/docs/misc/xen-command-line.pandoc
-@@ -481,12 +481,18 @@ choice of `dom0-kernel` is deprecated and not supported by all Dom0 kernels.
- This option allows for fine tuning of the facilities Xen will use, after
- accounting for hardware capabilities as enumerated via CPUID.
-
-+Unless otherwise noted, options only have any effect in their negative form,
-+to hide the named feature(s). Ignoring a feature using this mechanism will
-+cause Xen not to use the feature, nor offer them as usable to guests.
-+
- Currently accepted:
-
- The Speculation Control hardware features `srbds-ctrl`, `md-clear`, `ibrsb`,
- `stibp`, `ibpb`, `l1d-flush` and `ssbd` are used by default if available and
--applicable. They can be ignored, e.g. `no-ibrsb`, at which point Xen won't
--use them itself, and won't offer them to guests.
-+applicable. They can all be ignored.
-+
-+`rdrand` and `rdseed` can be ignored, as a mitigation to XSA-320 /
-+CVE-2020-0543.
-
- ### cpuid_mask_cpu
- > `= fam_0f_rev_[cdefg] | fam_10_rev_[bc] | fam_11_rev_b`
diff --git a/system/xen/xsa/xsa327.patch b/system/xen/xsa/xsa327.patch
deleted file mode 100644
index 0541cfa0df..0000000000
--- a/system/xen/xsa/xsa327.patch
+++ /dev/null
@@ -1,63 +0,0 @@
-From 030300ebbb86c40c12db038714479d746167c767 Mon Sep 17 00:00:00 2001
-From: Julien Grall <jgrall@amazon.com>
-Date: Tue, 26 May 2020 18:31:33 +0100
-Subject: [PATCH] xen: Check the alignment of the offset pased via
- VCPUOP_register_vcpu_info
-
-Currently a guest is able to register any guest physical address to use
-for the vcpu_info structure as long as the structure can fits in the
-rest of the frame.
-
-This means a guest can provide an address that is not aligned to the
-natural alignment of the structure.
-
-On Arm 32-bit, unaligned access are completely forbidden by the
-hypervisor. This will result to a data abort which is fatal.
-
-On Arm 64-bit, unaligned access are only forbidden when used for atomic
-access. As the structure contains fields (such as evtchn_pending_self)
-that are updated using atomic operations, any unaligned access will be
-fatal as well.
-
-While the misalignment is only fatal on Arm, a generic check is added
-as an x86 guest shouldn't sensibly pass an unaligned address (this
-would result to a split lock).
-
-This is XSA-327.
-
-Reported-by: Julien Grall <jgrall@amazon.com>
-Signed-off-by: Julien Grall <jgrall@amazon.com>
-Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
----
- xen/common/domain.c | 10 ++++++++++
- 1 file changed, 10 insertions(+)
-
-diff --git a/xen/common/domain.c b/xen/common/domain.c
-index 7cc9526139a6..e9be05f1d05f 100644
---- a/xen/common/domain.c
-+++ b/xen/common/domain.c
-@@ -1227,10 +1227,20 @@ int map_vcpu_info(struct vcpu *v, unsigned long gfn, unsigned offset)
- void *mapping;
- vcpu_info_t *new_info;
- struct page_info *page;
-+ unsigned int align;
-
- if ( offset > (PAGE_SIZE - sizeof(vcpu_info_t)) )
- return -EINVAL;
-
-+#ifdef CONFIG_COMPAT
-+ if ( has_32bit_shinfo(d) )
-+ align = alignof(new_info->compat);
-+ else
-+#endif
-+ align = alignof(*new_info);
-+ if ( offset & (align - 1) )
-+ return -EINVAL;
-+
- if ( !mfn_eq(v->vcpu_info_mfn, INVALID_MFN) )
- return -EINVAL;
-
---
-2.17.1
-
diff --git a/system/xen/xsa/xsa328-4.13-1.patch b/system/xen/xsa/xsa328-4.13-1.patch
deleted file mode 100644
index 56e48de3e9..0000000000
--- a/system/xen/xsa/xsa328-4.13-1.patch
+++ /dev/null
@@ -1,118 +0,0 @@
-From: Jan Beulich <jbeulich@suse.com>
-Subject: x86/EPT: ept_set_middle_entry() related adjustments
-
-ept_split_super_page() wants to further modify the newly allocated
-table, so have ept_set_middle_entry() return the mapped pointer rather
-than tearing it down and then getting re-established right again.
-
-Similarly ept_next_level() wants to hand back a mapped pointer of
-the next level page, so re-use the one established by
-ept_set_middle_entry() in case that path was taken.
-
-Pull the setting of suppress_ve ahead of insertion into the higher level
-table, and don't have ept_split_super_page() set the field a 2nd time.
-
-This is part of XSA-328.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-
---- a/xen/arch/x86/mm/p2m-ept.c
-+++ b/xen/arch/x86/mm/p2m-ept.c
-@@ -187,8 +187,9 @@ static void ept_p2m_type_to_flags(struct
- #define GUEST_TABLE_SUPER_PAGE 2
- #define GUEST_TABLE_POD_PAGE 3
-
--/* Fill in middle levels of ept table */
--static int ept_set_middle_entry(struct p2m_domain *p2m, ept_entry_t *ept_entry)
-+/* Fill in middle level of ept table; return pointer to mapped new table. */
-+static ept_entry_t *ept_set_middle_entry(struct p2m_domain *p2m,
-+ ept_entry_t *ept_entry)
- {
- mfn_t mfn;
- ept_entry_t *table;
-@@ -196,7 +197,12 @@ static int ept_set_middle_entry(struct p
-
- mfn = p2m_alloc_ptp(p2m, 0);
- if ( mfn_eq(mfn, INVALID_MFN) )
-- return 0;
-+ return NULL;
-+
-+ table = map_domain_page(mfn);
-+
-+ for ( i = 0; i < EPT_PAGETABLE_ENTRIES; i++ )
-+ table[i].suppress_ve = 1;
-
- ept_entry->epte = 0;
- ept_entry->mfn = mfn_x(mfn);
-@@ -208,14 +214,7 @@ static int ept_set_middle_entry(struct p
-
- ept_entry->suppress_ve = 1;
-
-- table = map_domain_page(mfn);
--
-- for ( i = 0; i < EPT_PAGETABLE_ENTRIES; i++ )
-- table[i].suppress_ve = 1;
--
-- unmap_domain_page(table);
--
-- return 1;
-+ return table;
- }
-
- /* free ept sub tree behind an entry */
-@@ -253,10 +252,10 @@ static bool_t ept_split_super_page(struc
-
- ASSERT(is_epte_superpage(ept_entry));
-
-- if ( !ept_set_middle_entry(p2m, &new_ept) )
-+ table = ept_set_middle_entry(p2m, &new_ept);
-+ if ( !table )
- return 0;
-
-- table = map_domain_page(_mfn(new_ept.mfn));
- trunk = 1UL << ((level - 1) * EPT_TABLE_ORDER);
-
- for ( i = 0; i < EPT_PAGETABLE_ENTRIES; i++ )
-@@ -267,7 +266,6 @@ static bool_t ept_split_super_page(struc
- epte->sp = (level > 1);
- epte->mfn += i * trunk;
- epte->snp = is_iommu_enabled(p2m->domain) && iommu_snoop;
-- epte->suppress_ve = 1;
-
- ept_p2m_type_to_flags(p2m, epte, epte->sa_p2mt, epte->access);
-
-@@ -306,8 +304,7 @@ static int ept_next_level(struct p2m_dom
- ept_entry_t **table, unsigned long *gfn_remainder,
- int next_level)
- {
-- unsigned long mfn;
-- ept_entry_t *ept_entry, e;
-+ ept_entry_t *ept_entry, *next = NULL, e;
- u32 shift, index;
-
- shift = next_level * EPT_TABLE_ORDER;
-@@ -332,19 +329,17 @@ static int ept_next_level(struct p2m_dom
- if ( read_only )
- return GUEST_TABLE_MAP_FAILED;
-
-- if ( !ept_set_middle_entry(p2m, ept_entry) )
-+ next = ept_set_middle_entry(p2m, ept_entry);
-+ if ( !next )
- return GUEST_TABLE_MAP_FAILED;
-- else
-- e = atomic_read_ept_entry(ept_entry); /* Refresh */
-+ /* e is now stale and hence may not be used anymore below. */
- }
--
- /* The only time sp would be set here is if we had hit a superpage */
-- if ( is_epte_superpage(&e) )
-+ else if ( is_epte_superpage(&e) )
- return GUEST_TABLE_SUPER_PAGE;
-
-- mfn = e.mfn;
- unmap_domain_page(*table);
-- *table = map_domain_page(_mfn(mfn));
-+ *table = next ?: map_domain_page(_mfn(e.mfn));
- *gfn_remainder &= (1UL << shift) - 1;
- return GUEST_TABLE_NORMAL_PAGE;
- }
diff --git a/system/xen/xsa/xsa328-4.13-2.patch b/system/xen/xsa/xsa328-4.13-2.patch
deleted file mode 100644
index c4f437f625..0000000000
--- a/system/xen/xsa/xsa328-4.13-2.patch
+++ /dev/null
@@ -1,48 +0,0 @@
-From: <security@xenproject.org>
-Subject: x86/ept: atomically modify entries in ept_next_level
-
-ept_next_level was passing a live PTE pointer to ept_set_middle_entry,
-which was then modified without taking into account that the PTE could
-be part of a live EPT table. This wasn't a security issue because the
-pages returned by p2m_alloc_ptp are zeroed, so adding such an entry
-before actually initializing it didn't allow a guest to access
-physical memory addresses it wasn't supposed to access.
-
-This is part of XSA-328.
-
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-
---- a/xen/arch/x86/mm/p2m-ept.c
-+++ b/xen/arch/x86/mm/p2m-ept.c
-@@ -307,6 +307,8 @@ static int ept_next_level(struct p2m_dom
- ept_entry_t *ept_entry, *next = NULL, e;
- u32 shift, index;
-
-+ ASSERT(next_level);
-+
- shift = next_level * EPT_TABLE_ORDER;
-
- index = *gfn_remainder >> shift;
-@@ -323,16 +325,20 @@ static int ept_next_level(struct p2m_dom
-
- if ( !is_epte_present(&e) )
- {
-+ int rc;
-+
- if ( e.sa_p2mt == p2m_populate_on_demand )
- return GUEST_TABLE_POD_PAGE;
-
- if ( read_only )
- return GUEST_TABLE_MAP_FAILED;
-
-- next = ept_set_middle_entry(p2m, ept_entry);
-+ next = ept_set_middle_entry(p2m, &e);
- if ( !next )
- return GUEST_TABLE_MAP_FAILED;
-- /* e is now stale and hence may not be used anymore below. */
-+
-+ rc = atomic_write_ept_entry(p2m, ept_entry, e, next_level);
-+ ASSERT(rc == 0);
- }
- /* The only time sp would be set here is if we had hit a superpage */
- else if ( is_epte_superpage(&e) )
diff --git a/system/xen/xsa/xsa328-post-xsa321-4.13-1.patch b/system/xen/xsa/xsa328-post-xsa321-4.13-1.patch
deleted file mode 100644
index 9a08ab240e..0000000000
--- a/system/xen/xsa/xsa328-post-xsa321-4.13-1.patch
+++ /dev/null
@@ -1,31 +0,0 @@
-From: Jan Beulich <jbeulich@suse.com>
-Subject: vtd: improve IOMMU TLB flush
-
-Do not limit PSI flushes to order 0 pages, in order to avoid doing a
-full TLB flush if the passed in page has an order greater than 0 and
-is aligned. Should increase the performance of IOMMU TLB flushes when
-dealing with page orders greater than 0.
-
-This is part of XSA-321.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-
---- a/xen/drivers/passthrough/vtd/iommu.c
-+++ b/xen/drivers/passthrough/vtd/iommu.c
-@@ -570,13 +570,14 @@ static int __must_check iommu_flush_iotl
- if ( iommu_domid == -1 )
- continue;
-
-- if ( page_count != 1 || dfn_eq(dfn, INVALID_DFN) )
-+ if ( !page_count || (page_count & (page_count - 1)) ||
-+ dfn_eq(dfn, INVALID_DFN) || !IS_ALIGNED(dfn_x(dfn), page_count) )
- rc = iommu_flush_iotlb_dsi(iommu, iommu_domid,
- 0, flush_dev_iotlb);
- else
- rc = iommu_flush_iotlb_psi(iommu, iommu_domid,
- dfn_to_daddr(dfn),
-- PAGE_ORDER_4K,
-+ get_order_from_pages(page_count),
- !dma_old_pte_present,
- flush_dev_iotlb);
-
diff --git a/system/xen/xsa/xsa328-post-xsa321-4.13-2.patch b/system/xen/xsa/xsa328-post-xsa321-4.13-2.patch
deleted file mode 100644
index 1e48615f2b..0000000000
--- a/system/xen/xsa/xsa328-post-xsa321-4.13-2.patch
+++ /dev/null
@@ -1,175 +0,0 @@
-From: <security@xenproject.org>
-Subject: vtd: prune (and rename) cache flush functions
-
-Rename __iommu_flush_cache to iommu_sync_cache and remove
-iommu_flush_cache_page. Also remove the iommu_flush_cache_entry
-wrapper and just use iommu_sync_cache instead. Note the _entry suffix
-was meaningless as the wrapper was already taking a size parameter in
-bytes. While there also constify the addr parameter.
-
-No functional change intended.
-
-This is part of XSA-321.
-
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-
---- a/xen/drivers/passthrough/vtd/extern.h
-+++ b/xen/drivers/passthrough/vtd/extern.h
-@@ -43,8 +43,7 @@ void disable_qinval(struct vtd_iommu *io
- int enable_intremap(struct vtd_iommu *iommu, int eim);
- void disable_intremap(struct vtd_iommu *iommu);
-
--void iommu_flush_cache_entry(void *addr, unsigned int size);
--void iommu_flush_cache_page(void *addr, unsigned long npages);
-+void iommu_sync_cache(const void *addr, unsigned int size);
- int iommu_alloc(struct acpi_drhd_unit *drhd);
- void iommu_free(struct acpi_drhd_unit *drhd);
-
---- a/xen/drivers/passthrough/vtd/intremap.c
-+++ b/xen/drivers/passthrough/vtd/intremap.c
-@@ -230,7 +230,7 @@ static void free_remap_entry(struct vtd_
- iremap_entries, iremap_entry);
-
- update_irte(iommu, iremap_entry, &new_ire, false);
-- iommu_flush_cache_entry(iremap_entry, sizeof(*iremap_entry));
-+ iommu_sync_cache(iremap_entry, sizeof(*iremap_entry));
- iommu_flush_iec_index(iommu, 0, index);
-
- unmap_vtd_domain_page(iremap_entries);
-@@ -406,7 +406,7 @@ static int ioapic_rte_to_remap_entry(str
- }
-
- update_irte(iommu, iremap_entry, &new_ire, !init);
-- iommu_flush_cache_entry(iremap_entry, sizeof(*iremap_entry));
-+ iommu_sync_cache(iremap_entry, sizeof(*iremap_entry));
- iommu_flush_iec_index(iommu, 0, index);
-
- unmap_vtd_domain_page(iremap_entries);
-@@ -695,7 +695,7 @@ static int msi_msg_to_remap_entry(
- update_irte(iommu, iremap_entry, &new_ire, msi_desc->irte_initialized);
- msi_desc->irte_initialized = true;
-
-- iommu_flush_cache_entry(iremap_entry, sizeof(*iremap_entry));
-+ iommu_sync_cache(iremap_entry, sizeof(*iremap_entry));
- iommu_flush_iec_index(iommu, 0, index);
-
- unmap_vtd_domain_page(iremap_entries);
---- a/xen/drivers/passthrough/vtd/iommu.c
-+++ b/xen/drivers/passthrough/vtd/iommu.c
-@@ -140,7 +140,8 @@ static int context_get_domain_id(struct
- }
-
- static int iommus_incoherent;
--static void __iommu_flush_cache(void *addr, unsigned int size)
-+
-+void iommu_sync_cache(const void *addr, unsigned int size)
- {
- int i;
- static unsigned int clflush_size = 0;
-@@ -155,16 +156,6 @@ static void __iommu_flush_cache(void *ad
- cacheline_flush((char *)addr + i);
- }
-
--void iommu_flush_cache_entry(void *addr, unsigned int size)
--{
-- __iommu_flush_cache(addr, size);
--}
--
--void iommu_flush_cache_page(void *addr, unsigned long npages)
--{
-- __iommu_flush_cache(addr, PAGE_SIZE * npages);
--}
--
- /* Allocate page table, return its machine address */
- uint64_t alloc_pgtable_maddr(unsigned long npages, nodeid_t node)
- {
-@@ -183,7 +174,7 @@ uint64_t alloc_pgtable_maddr(unsigned lo
- vaddr = __map_domain_page(cur_pg);
- memset(vaddr, 0, PAGE_SIZE);
-
-- iommu_flush_cache_page(vaddr, 1);
-+ iommu_sync_cache(vaddr, PAGE_SIZE);
- unmap_domain_page(vaddr);
- cur_pg++;
- }
-@@ -216,7 +207,7 @@ static u64 bus_to_context_maddr(struct v
- }
- set_root_value(*root, maddr);
- set_root_present(*root);
-- iommu_flush_cache_entry(root, sizeof(struct root_entry));
-+ iommu_sync_cache(root, sizeof(struct root_entry));
- }
- maddr = (u64) get_context_addr(*root);
- unmap_vtd_domain_page(root_entries);
-@@ -263,7 +254,7 @@ static u64 addr_to_dma_page_maddr(struct
- */
- dma_set_pte_readable(*pte);
- dma_set_pte_writable(*pte);
-- iommu_flush_cache_entry(pte, sizeof(struct dma_pte));
-+ iommu_sync_cache(pte, sizeof(struct dma_pte));
- }
-
- if ( level == 2 )
-@@ -640,7 +631,7 @@ static int __must_check dma_pte_clear_on
- *flush_flags |= IOMMU_FLUSHF_modified;
-
- spin_unlock(&hd->arch.mapping_lock);
-- iommu_flush_cache_entry(pte, sizeof(struct dma_pte));
-+ iommu_sync_cache(pte, sizeof(struct dma_pte));
-
- unmap_vtd_domain_page(page);
-
-@@ -679,7 +670,7 @@ static void iommu_free_page_table(struct
- iommu_free_pagetable(dma_pte_addr(*pte), next_level);
-
- dma_clear_pte(*pte);
-- iommu_flush_cache_entry(pte, sizeof(struct dma_pte));
-+ iommu_sync_cache(pte, sizeof(struct dma_pte));
- }
-
- unmap_vtd_domain_page(pt_vaddr);
-@@ -1400,7 +1391,7 @@ int domain_context_mapping_one(
- context_set_address_width(*context, agaw);
- context_set_fault_enable(*context);
- context_set_present(*context);
-- iommu_flush_cache_entry(context, sizeof(struct context_entry));
-+ iommu_sync_cache(context, sizeof(struct context_entry));
- spin_unlock(&iommu->lock);
-
- /* Context entry was previously non-present (with domid 0). */
-@@ -1564,7 +1555,7 @@ int domain_context_unmap_one(
-
- context_clear_present(*context);
- context_clear_entry(*context);
-- iommu_flush_cache_entry(context, sizeof(struct context_entry));
-+ iommu_sync_cache(context, sizeof(struct context_entry));
-
- iommu_domid= domain_iommu_domid(domain, iommu);
- if ( iommu_domid == -1 )
-@@ -1791,7 +1782,7 @@ static int __must_check intel_iommu_map_
-
- *pte = new;
-
-- iommu_flush_cache_entry(pte, sizeof(struct dma_pte));
-+ iommu_sync_cache(pte, sizeof(struct dma_pte));
- spin_unlock(&hd->arch.mapping_lock);
- unmap_vtd_domain_page(page);
-
-@@ -1866,7 +1857,7 @@ int iommu_pte_flush(struct domain *d, ui
- int iommu_domid;
- int rc = 0;
-
-- iommu_flush_cache_entry(pte, sizeof(struct dma_pte));
-+ iommu_sync_cache(pte, sizeof(struct dma_pte));
-
- for_each_drhd_unit ( drhd )
- {
-@@ -2724,7 +2715,7 @@ static int __init intel_iommu_quarantine
- dma_set_pte_addr(*pte, maddr);
- dma_set_pte_readable(*pte);
- }
-- iommu_flush_cache_page(parent, 1);
-+ iommu_sync_cache(parent, PAGE_SIZE);
-
- unmap_vtd_domain_page(parent);
- parent = map_vtd_domain_page(maddr);
diff --git a/system/xen/xsa/xsa328-post-xsa321-4.13-3.patch b/system/xen/xsa/xsa328-post-xsa321-4.13-3.patch
deleted file mode 100644
index c141c4b785..0000000000
--- a/system/xen/xsa/xsa328-post-xsa321-4.13-3.patch
+++ /dev/null
@@ -1,82 +0,0 @@
-From: <security@xenproject.org>
-Subject: x86/iommu: introduce a cache sync hook
-
-The hook is only implemented for VT-d and it uses the already existing
-iommu_sync_cache function present in VT-d code. The new hook is
-added so that the cache can be flushed by code outside of VT-d when
-using shared page tables.
-
-Note that alloc_pgtable_maddr must use the now locally defined
-sync_cache function, because IOMMU ops are not yet setup the first
-time the function gets called during IOMMU initialization.
-
-No functional change intended.
-
-This is part of XSA-321.
-
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-
---- a/xen/drivers/passthrough/vtd/extern.h
-+++ b/xen/drivers/passthrough/vtd/extern.h
-@@ -43,7 +43,6 @@ void disable_qinval(struct vtd_iommu *io
- int enable_intremap(struct vtd_iommu *iommu, int eim);
- void disable_intremap(struct vtd_iommu *iommu);
-
--void iommu_sync_cache(const void *addr, unsigned int size);
- int iommu_alloc(struct acpi_drhd_unit *drhd);
- void iommu_free(struct acpi_drhd_unit *drhd);
-
---- a/xen/drivers/passthrough/vtd/iommu.c
-+++ b/xen/drivers/passthrough/vtd/iommu.c
-@@ -141,7 +141,7 @@ static int context_get_domain_id(struct
-
- static int iommus_incoherent;
-
--void iommu_sync_cache(const void *addr, unsigned int size)
-+static void sync_cache(const void *addr, unsigned int size)
- {
- int i;
- static unsigned int clflush_size = 0;
-@@ -174,7 +174,7 @@ uint64_t alloc_pgtable_maddr(unsigned lo
- vaddr = __map_domain_page(cur_pg);
- memset(vaddr, 0, PAGE_SIZE);
-
-- iommu_sync_cache(vaddr, PAGE_SIZE);
-+ sync_cache(vaddr, PAGE_SIZE);
- unmap_domain_page(vaddr);
- cur_pg++;
- }
-@@ -2763,6 +2763,7 @@ const struct iommu_ops __initconstrel in
- .iotlb_flush_all = iommu_flush_iotlb_all,
- .get_reserved_device_memory = intel_iommu_get_reserved_device_memory,
- .dump_p2m_table = vtd_dump_p2m_table,
-+ .sync_cache = sync_cache,
- };
-
- const struct iommu_init_ops __initconstrel intel_iommu_init_ops = {
---- a/xen/include/asm-x86/iommu.h
-+++ b/xen/include/asm-x86/iommu.h
-@@ -121,6 +121,13 @@ extern bool untrusted_msi;
- int pi_update_irte(const struct pi_desc *pi_desc, const struct pirq *pirq,
- const uint8_t gvec);
-
-+#define iommu_sync_cache(addr, size) ({ \
-+ const struct iommu_ops *ops = iommu_get_ops(); \
-+ \
-+ if ( ops->sync_cache ) \
-+ iommu_vcall(ops, sync_cache, addr, size); \
-+})
-+
- #endif /* !__ARCH_X86_IOMMU_H__ */
- /*
- * Local variables:
---- a/xen/include/xen/iommu.h
-+++ b/xen/include/xen/iommu.h
-@@ -250,6 +250,7 @@ struct iommu_ops {
- int (*setup_hpet_msi)(struct msi_desc *);
-
- int (*adjust_irq_affinities)(void);
-+ void (*sync_cache)(const void *addr, unsigned int size);
- #endif /* CONFIG_X86 */
-
- int __must_check (*suspend)(void);
diff --git a/system/xen/xsa/xsa328-post-xsa321-4.13-4.patch b/system/xen/xsa/xsa328-post-xsa321-4.13-4.patch
deleted file mode 100644
index 62bbcc7271..0000000000
--- a/system/xen/xsa/xsa328-post-xsa321-4.13-4.patch
+++ /dev/null
@@ -1,36 +0,0 @@
-From: <security@xenproject.org>
-Subject: vtd: don't assume addresses are aligned in sync_cache
-
-Current code in sync_cache assume that the address passed in is
-aligned to a cache line size. Fix the code to support passing in
-arbitrary addresses not necessarily aligned to a cache line size.
-
-This is part of XSA-321.
-
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-
---- a/xen/drivers/passthrough/vtd/iommu.c
-+++ b/xen/drivers/passthrough/vtd/iommu.c
-@@ -143,8 +143,8 @@ static int iommus_incoherent;
-
- static void sync_cache(const void *addr, unsigned int size)
- {
-- int i;
-- static unsigned int clflush_size = 0;
-+ static unsigned long clflush_size = 0;
-+ const void *end = addr + size;
-
- if ( !iommus_incoherent )
- return;
-@@ -152,8 +152,9 @@ static void sync_cache(const void *addr,
- if ( clflush_size == 0 )
- clflush_size = get_cache_line_size();
-
-- for ( i = 0; i < size; i += clflush_size )
-- cacheline_flush((char *)addr + i);
-+ addr -= (unsigned long)addr & (clflush_size - 1);
-+ for ( ; addr < end; addr += clflush_size )
-+ cacheline_flush((char *)addr);
- }
-
- /* Allocate page table, return its machine address */
diff --git a/system/xen/xsa/xsa328-post-xsa321-4.13-5.patch b/system/xen/xsa/xsa328-post-xsa321-4.13-5.patch
deleted file mode 100644
index 60cfe6ccdf..0000000000
--- a/system/xen/xsa/xsa328-post-xsa321-4.13-5.patch
+++ /dev/null
@@ -1,24 +0,0 @@
-From: <security@xenproject.org>
-Subject: x86/alternative: introduce alternative_2
-
-It's based on alternative_io_2 without inputs or outputs but with an
-added memory clobber.
-
-This is part of XSA-321.
-
-Acked-by: Jan Beulich <jbeulich@suse.com>
-
---- a/xen/include/asm-x86/alternative.h
-+++ b/xen/include/asm-x86/alternative.h
-@@ -114,6 +114,11 @@ extern void alternative_branches(void);
- #define alternative(oldinstr, newinstr, feature) \
- asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory")
-
-+#define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \
-+ asm volatile (ALTERNATIVE_2(oldinstr, newinstr1, feature1, \
-+ newinstr2, feature2) \
-+ : : : "memory")
-+
- /*
- * Alternative inline assembly with input.
- *
diff --git a/system/xen/xsa/xsa328-post-xsa321-4.13-6.patch b/system/xen/xsa/xsa328-post-xsa321-4.13-6.patch
deleted file mode 100644
index 4c5c5ab0ba..0000000000
--- a/system/xen/xsa/xsa328-post-xsa321-4.13-6.patch
+++ /dev/null
@@ -1,91 +0,0 @@
-From: <security@xenproject.org>
-Subject: vtd: optimize CPU cache sync
-
-Some VT-d IOMMUs are non-coherent, which requires a cache write back
-in order for the changes made by the CPU to be visible to the IOMMU.
-This cache write back was unconditionally done using clflush, but there are
-other more efficient instructions to do so, hence implement support
-for them using the alternative framework.
-
-This is part of XSA-321.
-
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-
---- a/xen/drivers/passthrough/vtd/extern.h
-+++ b/xen/drivers/passthrough/vtd/extern.h
-@@ -68,7 +68,6 @@ int __must_check qinval_device_iotlb_syn
- u16 did, u16 size, u64 addr);
-
- unsigned int get_cache_line_size(void);
--void cacheline_flush(char *);
- void flush_all_cache(void);
-
- uint64_t alloc_pgtable_maddr(unsigned long npages, nodeid_t node);
---- a/xen/drivers/passthrough/vtd/iommu.c
-+++ b/xen/drivers/passthrough/vtd/iommu.c
-@@ -31,6 +31,7 @@
- #include <xen/pci_regs.h>
- #include <xen/keyhandler.h>
- #include <asm/msi.h>
-+#include <asm/nops.h>
- #include <asm/irq.h>
- #include <asm/hvm/vmx/vmx.h>
- #include <asm/p2m.h>
-@@ -154,7 +155,42 @@ static void sync_cache(const void *addr,
-
- addr -= (unsigned long)addr & (clflush_size - 1);
- for ( ; addr < end; addr += clflush_size )
-- cacheline_flush((char *)addr);
-+/*
-+ * The arguments to a macro must not include preprocessor directives. Doing so
-+ * results in undefined behavior, so we have to create some defines here in
-+ * order to avoid it.
-+ */
-+#if defined(HAVE_AS_CLWB)
-+# define CLWB_ENCODING "clwb %[p]"
-+#elif defined(HAVE_AS_XSAVEOPT)
-+# define CLWB_ENCODING "data16 xsaveopt %[p]" /* clwb */
-+#else
-+# define CLWB_ENCODING ".byte 0x66, 0x0f, 0xae, 0x30" /* clwb (%%rax) */
-+#endif
-+
-+#define BASE_INPUT(addr) [p] "m" (*(const char *)(addr))
-+#if defined(HAVE_AS_CLWB) || defined(HAVE_AS_XSAVEOPT)
-+# define INPUT BASE_INPUT
-+#else
-+# define INPUT(addr) "a" (addr), BASE_INPUT(addr)
-+#endif
-+ /*
-+ * Note regarding the use of NOP_DS_PREFIX: it's faster to do a clflush
-+ * + prefix than a clflush + nop, and hence the prefix is added instead
-+ * of letting the alternative framework fill the gap by appending nops.
-+ */
-+ alternative_io_2(".byte " __stringify(NOP_DS_PREFIX) "; clflush %[p]",
-+ "data16 clflush %[p]", /* clflushopt */
-+ X86_FEATURE_CLFLUSHOPT,
-+ CLWB_ENCODING,
-+ X86_FEATURE_CLWB, /* no outputs */,
-+ INPUT(addr));
-+#undef INPUT
-+#undef BASE_INPUT
-+#undef CLWB_ENCODING
-+
-+ alternative_2("", "sfence", X86_FEATURE_CLFLUSHOPT,
-+ "sfence", X86_FEATURE_CLWB);
- }
-
- /* Allocate page table, return its machine address */
---- a/xen/drivers/passthrough/vtd/x86/vtd.c
-+++ b/xen/drivers/passthrough/vtd/x86/vtd.c
-@@ -51,11 +51,6 @@ unsigned int get_cache_line_size(void)
- return ((cpuid_ebx(1) >> 8) & 0xff) * 8;
- }
-
--void cacheline_flush(char * addr)
--{
-- clflush(addr);
--}
--
- void flush_all_cache()
- {
- wbinvd();
diff --git a/system/xen/xsa/xsa328-post-xsa321-4.13-7.patch b/system/xen/xsa/xsa328-post-xsa321-4.13-7.patch
deleted file mode 100644
index 0bd018f972..0000000000
--- a/system/xen/xsa/xsa328-post-xsa321-4.13-7.patch
+++ /dev/null
@@ -1,153 +0,0 @@
-From: <security@xenproject.org>
-Subject: x86/ept: flush cache when modifying PTEs and sharing page tables
-
-Modifications made to the page tables by EPT code need to be written
-to memory when the page tables are shared with the IOMMU, as Intel
-IOMMUs can be non-coherent and thus require changes to be written to
-memory in order to be visible to the IOMMU.
-
-In order to achieve this make sure data is written back to memory
-after writing an EPT entry when the recalc bit is not set in
-atomic_write_ept_entry. If such bit is set, the entry will be
-adjusted and atomic_write_ept_entry will be called a second time
-without the recalc bit set. Note that when splitting a super page the
-new tables resulting of the split should also be written back.
-
-Failure to do so can allow devices behind the IOMMU access to the
-stale super page, or cause coherency issues as changes made by the
-processor to the page tables are not visible to the IOMMU.
-
-This allows to remove the VT-d specific iommu_pte_flush helper, since
-the cache write back is now performed by atomic_write_ept_entry, and
-hence iommu_iotlb_flush can be used to flush the IOMMU TLB. The newly
-used method (iommu_iotlb_flush) can result in less flushes, since it
-might sometimes be called rightly with 0 flags, in which case it
-becomes a no-op.
-
-This is part of XSA-321.
-
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-
---- a/xen/arch/x86/mm/p2m-ept.c
-+++ b/xen/arch/x86/mm/p2m-ept.c
-@@ -58,6 +58,19 @@ static int atomic_write_ept_entry(struct
-
- write_atomic(&entryptr->epte, new.epte);
-
-+ /*
-+ * The recalc field on the EPT is used to signal either that a
-+ * recalculation of the EMT field is required (which doesn't effect the
-+ * IOMMU), or a type change. Type changes can only be between ram_rw,
-+ * logdirty and ioreq_server: changes to/from logdirty won't work well with
-+ * an IOMMU anyway, as IOMMU #PFs are not synchronous and will lead to
-+ * aborts, and changes to/from ioreq_server are already fully flushed
-+ * before returning to guest context (see
-+ * XEN_DMOP_map_mem_type_to_ioreq_server).
-+ */
-+ if ( !new.recalc && iommu_use_hap_pt(p2m->domain) )
-+ iommu_sync_cache(entryptr, sizeof(*entryptr));
-+
- return 0;
- }
-
-@@ -278,6 +291,9 @@ static bool_t ept_split_super_page(struc
- break;
- }
-
-+ if ( iommu_use_hap_pt(p2m->domain) )
-+ iommu_sync_cache(table, EPT_PAGETABLE_ENTRIES * sizeof(ept_entry_t));
-+
- unmap_domain_page(table);
-
- /* Even failed we should install the newly allocated ept page. */
-@@ -337,6 +353,9 @@ static int ept_next_level(struct p2m_dom
- if ( !next )
- return GUEST_TABLE_MAP_FAILED;
-
-+ if ( iommu_use_hap_pt(p2m->domain) )
-+ iommu_sync_cache(next, EPT_PAGETABLE_ENTRIES * sizeof(ept_entry_t));
-+
- rc = atomic_write_ept_entry(p2m, ept_entry, e, next_level);
- ASSERT(rc == 0);
- }
-@@ -821,7 +840,10 @@ out:
- need_modify_vtd_table )
- {
- if ( iommu_use_hap_pt(d) )
-- rc = iommu_pte_flush(d, gfn, &ept_entry->epte, order, vtd_pte_present);
-+ rc = iommu_iotlb_flush(d, _dfn(gfn), (1u << order),
-+ (iommu_flags ? IOMMU_FLUSHF_added : 0) |
-+ (vtd_pte_present ? IOMMU_FLUSHF_modified
-+ : 0));
- else if ( need_iommu_pt_sync(d) )
- rc = iommu_flags ?
- iommu_legacy_map(d, _dfn(gfn), mfn, order, iommu_flags) :
---- a/xen/drivers/passthrough/vtd/iommu.c
-+++ b/xen/drivers/passthrough/vtd/iommu.c
-@@ -1884,53 +1884,6 @@ static int intel_iommu_lookup_page(struc
- return 0;
- }
-
--int iommu_pte_flush(struct domain *d, uint64_t dfn, uint64_t *pte,
-- int order, int present)
--{
-- struct acpi_drhd_unit *drhd;
-- struct vtd_iommu *iommu = NULL;
-- struct domain_iommu *hd = dom_iommu(d);
-- bool_t flush_dev_iotlb;
-- int iommu_domid;
-- int rc = 0;
--
-- iommu_sync_cache(pte, sizeof(struct dma_pte));
--
-- for_each_drhd_unit ( drhd )
-- {
-- iommu = drhd->iommu;
-- if ( !test_bit(iommu->index, &hd->arch.iommu_bitmap) )
-- continue;
--
-- flush_dev_iotlb = !!find_ats_dev_drhd(iommu);
-- iommu_domid= domain_iommu_domid(d, iommu);
-- if ( iommu_domid == -1 )
-- continue;
--
-- rc = iommu_flush_iotlb_psi(iommu, iommu_domid,
-- __dfn_to_daddr(dfn),
-- order, !present, flush_dev_iotlb);
-- if ( rc > 0 )
-- {
-- iommu_flush_write_buffer(iommu);
-- rc = 0;
-- }
-- }
--
-- if ( unlikely(rc) )
-- {
-- if ( !d->is_shutting_down && printk_ratelimit() )
-- printk(XENLOG_ERR VTDPREFIX
-- " d%d: IOMMU pages flush failed: %d\n",
-- d->domain_id, rc);
--
-- if ( !is_hardware_domain(d) )
-- domain_crash(d);
-- }
--
-- return rc;
--}
--
- static int __init vtd_ept_page_compatible(struct vtd_iommu *iommu)
- {
- u64 ept_cap, vtd_cap = iommu->cap;
---- a/xen/include/asm-x86/iommu.h
-+++ b/xen/include/asm-x86/iommu.h
-@@ -97,10 +97,6 @@ static inline int iommu_adjust_irq_affin
- : 0;
- }
-
--/* While VT-d specific, this must get declared in a generic header. */
--int __must_check iommu_pte_flush(struct domain *d, u64 gfn, u64 *pte,
-- int order, int present);
--
- static inline bool iommu_supports_x2apic(void)
- {
- return iommu_init_ops && iommu_init_ops->supports_x2apic
diff --git a/system/xen/xsa/xsa333.patch b/system/xen/xsa/xsa333.patch
deleted file mode 100644
index 6b86c942fa..0000000000
--- a/system/xen/xsa/xsa333.patch
+++ /dev/null
@@ -1,39 +0,0 @@
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Subject: x86/pv: Handle the Intel-specific MSR_MISC_ENABLE correctly
-
-This MSR doesn't exist on AMD hardware, and switching away from the safe
-functions in the common MSR path was an erroneous change.
-
-Partially revert the change.
-
-This is XSA-333.
-
-Fixes: 4fdc932b3cc ("x86/Intel: drop another 32-bit leftover")
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Wei Liu <wl@xen.org>
-
-diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c
-index efeb2a727e..6332c74b80 100644
---- a/xen/arch/x86/pv/emul-priv-op.c
-+++ b/xen/arch/x86/pv/emul-priv-op.c
-@@ -924,7 +924,8 @@ static int read_msr(unsigned int reg, uint64_t *val,
- return X86EMUL_OKAY;
-
- case MSR_IA32_MISC_ENABLE:
-- rdmsrl(reg, *val);
-+ if ( rdmsr_safe(reg, *val) )
-+ break;
- *val = guest_misc_enable(*val);
- return X86EMUL_OKAY;
-
-@@ -1059,7 +1060,8 @@ static int write_msr(unsigned int reg, uint64_t val,
- break;
-
- case MSR_IA32_MISC_ENABLE:
-- rdmsrl(reg, temp);
-+ if ( rdmsr_safe(reg, temp) )
-+ break;
- if ( val != guest_misc_enable(temp) )
- goto invalid;
- return X86EMUL_OKAY;
diff --git a/system/xen/xsa/xsa334.patch b/system/xen/xsa/xsa334.patch
deleted file mode 100644
index 4260cdb2b2..0000000000
--- a/system/xen/xsa/xsa334.patch
+++ /dev/null
@@ -1,51 +0,0 @@
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Subject: xen/memory: Don't skip the RCU unlock path in acquire_resource()
-
-In the case that an HVM Stubdomain makes an XENMEM_acquire_resource hypercall,
-the FIXME path will bypass rcu_unlock_domain() on the way out of the function.
-
-Move the check to the start of the function. This does change the behaviour
-of the get-size path for HVM Stubdomains, but that functionality is currently
-broken and unused anyway, as well as being quite useless to entities which
-can't actually map the resource anyway.
-
-This is XSA-334.
-
-Fixes: 83fa6552ce ("common: add a new mappable resource type: XENMEM_resource_grant_table")
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-
-diff --git a/xen/common/memory.c b/xen/common/memory.c
-index 1a3c9ffb30..29741d8904 100644
---- a/xen/common/memory.c
-+++ b/xen/common/memory.c
-@@ -1058,6 +1058,14 @@ static int acquire_resource(
- xen_pfn_t mfn_list[32];
- int rc;
-
-+ /*
-+ * FIXME: Until foreign pages inserted into the P2M are properly
-+ * reference counted, it is unsafe to allow mapping of
-+ * resource pages unless the caller is the hardware domain.
-+ */
-+ if ( paging_mode_translate(currd) && !is_hardware_domain(currd) )
-+ return -EACCES;
-+
- if ( copy_from_guest(&xmar, arg, 1) )
- return -EFAULT;
-
-@@ -1114,14 +1122,6 @@ static int acquire_resource(
- xen_pfn_t gfn_list[ARRAY_SIZE(mfn_list)];
- unsigned int i;
-
-- /*
-- * FIXME: Until foreign pages inserted into the P2M are properly
-- * reference counted, it is unsafe to allow mapping of
-- * resource pages unless the caller is the hardware domain.
-- */
-- if ( !is_hardware_domain(currd) )
-- return -EACCES;
--
- if ( copy_from_guest(gfn_list, xmar.frame_list, xmar.nr_frames) )
- rc = -EFAULT;
-
diff --git a/system/xen/xsa/xsa335-qemu.patch b/system/xen/xsa/xsa335-qemu.patch
deleted file mode 100644
index 5617502359..0000000000
--- a/system/xen/xsa/xsa335-qemu.patch
+++ /dev/null
@@ -1,84 +0,0 @@
-From c5bd2924c6d6a5bcbffb8b5e7798a88970131c07 Mon Sep 17 00:00:00 2001
-From: Gerd Hoffmann <kraxel@redhat.com>
-Date: Mon, 17 Aug 2020 08:34:22 +0200
-Subject: [PATCH] usb: fix setup_len init (CVE-2020-14364)
-
-Store calculated setup_len in a local variable, verify it, and only
-write it to the struct (USBDevice->setup_len) in case it passed the
-sanity checks.
-
-This prevents other code (do_token_{in,out} functions specifically)
-from working with invalid USBDevice->setup_len values and overrunning
-the USBDevice->setup_buf[] buffer.
-
-Fixes: CVE-2020-14364
-Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
----
- hw/usb/core.c | 16 ++++++++++------
- 1 file changed, 10 insertions(+), 6 deletions(-)
-
-diff --git a/hw/usb/core.c b/hw/usb/core.c
-index 5abd128b6bc5..5234dcc73fea 100644
---- a/hw/usb/core.c
-+++ b/hw/usb/core.c
-@@ -129,6 +129,7 @@ void usb_wakeup(USBEndpoint *ep, unsigned int stream)
- static void do_token_setup(USBDevice *s, USBPacket *p)
- {
- int request, value, index;
-+ unsigned int setup_len;
-
- if (p->iov.size != 8) {
- p->status = USB_RET_STALL;
-@@ -138,14 +139,15 @@ static void do_token_setup(USBDevice *s, USBPacket *p)
- usb_packet_copy(p, s->setup_buf, p->iov.size);
- s->setup_index = 0;
- p->actual_length = 0;
-- s->setup_len = (s->setup_buf[7] << 8) | s->setup_buf[6];
-- if (s->setup_len > sizeof(s->data_buf)) {
-+ setup_len = (s->setup_buf[7] << 8) | s->setup_buf[6];
-+ if (setup_len > sizeof(s->data_buf)) {
- fprintf(stderr,
- "usb_generic_handle_packet: ctrl buffer too small (%d > %zu)\n",
-- s->setup_len, sizeof(s->data_buf));
-+ setup_len, sizeof(s->data_buf));
- p->status = USB_RET_STALL;
- return;
- }
-+ s->setup_len = setup_len;
-
- request = (s->setup_buf[0] << 8) | s->setup_buf[1];
- value = (s->setup_buf[3] << 8) | s->setup_buf[2];
-@@ -259,26 +261,28 @@ static void do_token_out(USBDevice *s, USBPacket *p)
- static void do_parameter(USBDevice *s, USBPacket *p)
- {
- int i, request, value, index;
-+ unsigned int setup_len;
-
- for (i = 0; i < 8; i++) {
- s->setup_buf[i] = p->parameter >> (i*8);
- }
-
- s->setup_state = SETUP_STATE_PARAM;
-- s->setup_len = (s->setup_buf[7] << 8) | s->setup_buf[6];
- s->setup_index = 0;
-
- request = (s->setup_buf[0] << 8) | s->setup_buf[1];
- value = (s->setup_buf[3] << 8) | s->setup_buf[2];
- index = (s->setup_buf[5] << 8) | s->setup_buf[4];
-
-- if (s->setup_len > sizeof(s->data_buf)) {
-+ setup_len = (s->setup_buf[7] << 8) | s->setup_buf[6];
-+ if (setup_len > sizeof(s->data_buf)) {
- fprintf(stderr,
- "usb_generic_handle_packet: ctrl buffer too small (%d > %zu)\n",
-- s->setup_len, sizeof(s->data_buf));
-+ setup_len, sizeof(s->data_buf));
- p->status = USB_RET_STALL;
- return;
- }
-+ s->setup_len = setup_len;
-
- if (p->pid == USB_TOKEN_OUT) {
- usb_packet_copy(p, s->data_buf, s->setup_len);
---
-2.18.4
diff --git a/system/xen/xsa/xsa336.patch b/system/xen/xsa/xsa336.patch
deleted file mode 100644
index b44c298b70..0000000000
--- a/system/xen/xsa/xsa336.patch
+++ /dev/null
@@ -1,283 +0,0 @@
-From: Roger Pau Monné <roger.pau@citrix.com>
-Subject: x86/vpt: fix race when migrating timers between vCPUs
-
-The current vPT code will migrate the emulated timers between vCPUs
-(change the pt->vcpu field) while just holding the destination lock,
-either from create_periodic_time or pt_adjust_global_vcpu_target if
-the global target is adjusted. Changing the periodic_timer vCPU field
-in this way creates a race where a third party could grab the lock in
-the unlocked region of pt_adjust_global_vcpu_target (or before
-create_periodic_time performs the vcpu change) and then release the
-lock from a different vCPU, creating a locking imbalance.
-
-Introduce a per-domain rwlock in order to protect periodic_time
-migration between vCPU lists. Taking the lock in read mode prevents
-any timer from being migrated to a different vCPU, while taking it in
-write mode allows performing migration of timers across vCPUs. The
-per-vcpu locks are still used to protect all the other fields from the
-periodic_timer struct.
-
-Note that such migration shouldn't happen frequently, and hence
-there's no performance drop as a result of such locking.
-
-This is XSA-336.
-
-Reported-by: Igor Druzhinin <igor.druzhinin@citrix.com>
-Tested-by: Igor Druzhinin <igor.druzhinin@citrix.com>
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
----
-Changes since v2:
- - Re-order pt_adjust_vcpu to remove one if.
- - Fix pt_lock to not call pt_vcpu_lock, as we might end up using a
- stale value of pt->vcpu when taking the per-vcpu lock.
-
-Changes since v1:
- - Use a per-domain rwlock to protect timer vCPU migration.
-
---- a/xen/arch/x86/hvm/hvm.c
-+++ b/xen/arch/x86/hvm/hvm.c
-@@ -658,6 +658,8 @@ int hvm_domain_initialise(struct domain
- /* need link to containing domain */
- d->arch.hvm.pl_time->domain = d;
-
-+ rwlock_init(&d->arch.hvm.pl_time->pt_migrate);
-+
- /* Set the default IO Bitmap. */
- if ( is_hardware_domain(d) )
- {
---- a/xen/arch/x86/hvm/vpt.c
-+++ b/xen/arch/x86/hvm/vpt.c
-@@ -153,23 +153,32 @@ static int pt_irq_masked(struct periodic
- return 1;
- }
-
--static void pt_lock(struct periodic_time *pt)
-+static void pt_vcpu_lock(struct vcpu *v)
- {
-- struct vcpu *v;
-+ read_lock(&v->domain->arch.hvm.pl_time->pt_migrate);
-+ spin_lock(&v->arch.hvm.tm_lock);
-+}
-
-- for ( ; ; )
-- {
-- v = pt->vcpu;
-- spin_lock(&v->arch.hvm.tm_lock);
-- if ( likely(pt->vcpu == v) )
-- break;
-- spin_unlock(&v->arch.hvm.tm_lock);
-- }
-+static void pt_vcpu_unlock(struct vcpu *v)
-+{
-+ spin_unlock(&v->arch.hvm.tm_lock);
-+ read_unlock(&v->domain->arch.hvm.pl_time->pt_migrate);
-+}
-+
-+static void pt_lock(struct periodic_time *pt)
-+{
-+ /*
-+ * We cannot use pt_vcpu_lock here, because we need to acquire the
-+ * per-domain lock first and then (re-)fetch the value of pt->vcpu, or
-+ * else we might be using a stale value of pt->vcpu.
-+ */
-+ read_lock(&pt->vcpu->domain->arch.hvm.pl_time->pt_migrate);
-+ spin_lock(&pt->vcpu->arch.hvm.tm_lock);
- }
-
- static void pt_unlock(struct periodic_time *pt)
- {
-- spin_unlock(&pt->vcpu->arch.hvm.tm_lock);
-+ pt_vcpu_unlock(pt->vcpu);
- }
-
- static void pt_process_missed_ticks(struct periodic_time *pt)
-@@ -219,7 +228,7 @@ void pt_save_timer(struct vcpu *v)
- if ( v->pause_flags & VPF_blocked )
- return;
-
-- spin_lock(&v->arch.hvm.tm_lock);
-+ pt_vcpu_lock(v);
-
- list_for_each_entry ( pt, head, list )
- if ( !pt->do_not_freeze )
-@@ -227,7 +236,7 @@ void pt_save_timer(struct vcpu *v)
-
- pt_freeze_time(v);
-
-- spin_unlock(&v->arch.hvm.tm_lock);
-+ pt_vcpu_unlock(v);
- }
-
- void pt_restore_timer(struct vcpu *v)
-@@ -235,7 +244,7 @@ void pt_restore_timer(struct vcpu *v)
- struct list_head *head = &v->arch.hvm.tm_list;
- struct periodic_time *pt;
-
-- spin_lock(&v->arch.hvm.tm_lock);
-+ pt_vcpu_lock(v);
-
- list_for_each_entry ( pt, head, list )
- {
-@@ -248,7 +257,7 @@ void pt_restore_timer(struct vcpu *v)
-
- pt_thaw_time(v);
-
-- spin_unlock(&v->arch.hvm.tm_lock);
-+ pt_vcpu_unlock(v);
- }
-
- static void pt_timer_fn(void *data)
-@@ -309,7 +318,7 @@ int pt_update_irq(struct vcpu *v)
- int irq, pt_vector = -1;
- bool level;
-
-- spin_lock(&v->arch.hvm.tm_lock);
-+ pt_vcpu_lock(v);
-
- earliest_pt = NULL;
- max_lag = -1ULL;
-@@ -339,7 +348,7 @@ int pt_update_irq(struct vcpu *v)
-
- if ( earliest_pt == NULL )
- {
-- spin_unlock(&v->arch.hvm.tm_lock);
-+ pt_vcpu_unlock(v);
- return -1;
- }
-
-@@ -347,7 +356,7 @@ int pt_update_irq(struct vcpu *v)
- irq = earliest_pt->irq;
- level = earliest_pt->level;
-
-- spin_unlock(&v->arch.hvm.tm_lock);
-+ pt_vcpu_unlock(v);
-
- switch ( earliest_pt->source )
- {
-@@ -394,7 +403,7 @@ int pt_update_irq(struct vcpu *v)
- time_cb *cb = NULL;
- void *cb_priv;
-
-- spin_lock(&v->arch.hvm.tm_lock);
-+ pt_vcpu_lock(v);
- /* Make sure the timer is still on the list. */
- list_for_each_entry ( pt, &v->arch.hvm.tm_list, list )
- if ( pt == earliest_pt )
-@@ -404,7 +413,7 @@ int pt_update_irq(struct vcpu *v)
- cb_priv = pt->priv;
- break;
- }
-- spin_unlock(&v->arch.hvm.tm_lock);
-+ pt_vcpu_unlock(v);
-
- if ( cb != NULL )
- cb(v, cb_priv);
-@@ -441,12 +450,12 @@ void pt_intr_post(struct vcpu *v, struct
- if ( intack.source == hvm_intsrc_vector )
- return;
-
-- spin_lock(&v->arch.hvm.tm_lock);
-+ pt_vcpu_lock(v);
-
- pt = is_pt_irq(v, intack);
- if ( pt == NULL )
- {
-- spin_unlock(&v->arch.hvm.tm_lock);
-+ pt_vcpu_unlock(v);
- return;
- }
-
-@@ -455,7 +464,7 @@ void pt_intr_post(struct vcpu *v, struct
- cb = pt->cb;
- cb_priv = pt->priv;
-
-- spin_unlock(&v->arch.hvm.tm_lock);
-+ pt_vcpu_unlock(v);
-
- if ( cb != NULL )
- cb(v, cb_priv);
-@@ -466,12 +475,12 @@ void pt_migrate(struct vcpu *v)
- struct list_head *head = &v->arch.hvm.tm_list;
- struct periodic_time *pt;
-
-- spin_lock(&v->arch.hvm.tm_lock);
-+ pt_vcpu_lock(v);
-
- list_for_each_entry ( pt, head, list )
- migrate_timer(&pt->timer, v->processor);
-
-- spin_unlock(&v->arch.hvm.tm_lock);
-+ pt_vcpu_unlock(v);
- }
-
- void create_periodic_time(
-@@ -490,7 +499,7 @@ void create_periodic_time(
-
- destroy_periodic_time(pt);
-
-- spin_lock(&v->arch.hvm.tm_lock);
-+ write_lock(&v->domain->arch.hvm.pl_time->pt_migrate);
-
- pt->pending_intr_nr = 0;
- pt->do_not_freeze = 0;
-@@ -540,7 +549,7 @@ void create_periodic_time(
- init_timer(&pt->timer, pt_timer_fn, pt, v->processor);
- set_timer(&pt->timer, pt->scheduled);
-
-- spin_unlock(&v->arch.hvm.tm_lock);
-+ write_unlock(&v->domain->arch.hvm.pl_time->pt_migrate);
- }
-
- void destroy_periodic_time(struct periodic_time *pt)
-@@ -565,30 +574,20 @@ void destroy_periodic_time(struct period
-
- static void pt_adjust_vcpu(struct periodic_time *pt, struct vcpu *v)
- {
-- int on_list;
--
- ASSERT(pt->source == PTSRC_isa || pt->source == PTSRC_ioapic);
-
- if ( pt->vcpu == NULL )
- return;
-
-- pt_lock(pt);
-- on_list = pt->on_list;
-- if ( pt->on_list )
-- list_del(&pt->list);
-- pt->on_list = 0;
-- pt_unlock(pt);
--
-- spin_lock(&v->arch.hvm.tm_lock);
-+ write_lock(&pt->vcpu->domain->arch.hvm.pl_time->pt_migrate);
- pt->vcpu = v;
-- if ( on_list )
-+ if ( pt->on_list )
- {
-- pt->on_list = 1;
-+ list_del(&pt->list);
- list_add(&pt->list, &v->arch.hvm.tm_list);
--
- migrate_timer(&pt->timer, v->processor);
- }
-- spin_unlock(&v->arch.hvm.tm_lock);
-+ write_unlock(&pt->vcpu->domain->arch.hvm.pl_time->pt_migrate);
- }
-
- void pt_adjust_global_vcpu_target(struct vcpu *v)
---- a/xen/include/asm-x86/hvm/vpt.h
-+++ b/xen/include/asm-x86/hvm/vpt.h
-@@ -128,6 +128,13 @@ struct pl_time { /* platform time */
- struct RTCState vrtc;
- struct HPETState vhpet;
- struct PMTState vpmt;
-+ /*
-+ * rwlock to prevent periodic_time vCPU migration. Take the lock in read
-+ * mode in order to prevent the vcpu field of periodic_time from changing.
-+ * Lock must be taken in write mode when changes to the vcpu field are
-+ * performed, as it allows exclusive access to all the timers of a domain.
-+ */
-+ rwlock_t pt_migrate;
- /* guest_time = Xen sys time + stime_offset */
- int64_t stime_offset;
- /* Ensures monotonicity in appropriate timer modes. */
diff --git a/system/xen/xsa/xsa337-4.13-1.patch b/system/xen/xsa/xsa337-4.13-1.patch
deleted file mode 100644
index 2091626f4f..0000000000
--- a/system/xen/xsa/xsa337-4.13-1.patch
+++ /dev/null
@@ -1,87 +0,0 @@
-From: Roger Pau Monné <roger.pau@citrix.com>
-Subject: x86/msi: get rid of read_msi_msg
-
-It's safer and faster to just use the cached last written
-(untranslated) MSI message stored in msi_desc for the single user that
-calls read_msi_msg.
-
-This also prevents relying on the data read from the device MSI
-registers in order to figure out the index into the IOMMU interrupt
-remapping table, which is not safe.
-
-This is part of XSA-337.
-
-Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Requested-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-
---- a/xen/arch/x86/msi.c
-+++ b/xen/arch/x86/msi.c
-@@ -183,54 +183,6 @@ void msi_compose_msg(unsigned vector, co
- MSI_DATA_VECTOR(vector);
- }
-
--static bool read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
--{
-- switch ( entry->msi_attrib.type )
-- {
-- case PCI_CAP_ID_MSI:
-- {
-- struct pci_dev *dev = entry->dev;
-- int pos = entry->msi_attrib.pos;
-- uint16_t data;
--
-- msg->address_lo = pci_conf_read32(dev->sbdf,
-- msi_lower_address_reg(pos));
-- if ( entry->msi_attrib.is_64 )
-- {
-- msg->address_hi = pci_conf_read32(dev->sbdf,
-- msi_upper_address_reg(pos));
-- data = pci_conf_read16(dev->sbdf, msi_data_reg(pos, 1));
-- }
-- else
-- {
-- msg->address_hi = 0;
-- data = pci_conf_read16(dev->sbdf, msi_data_reg(pos, 0));
-- }
-- msg->data = data;
-- break;
-- }
-- case PCI_CAP_ID_MSIX:
-- {
-- void __iomem *base = entry->mask_base;
--
-- if ( unlikely(!msix_memory_decoded(entry->dev,
-- entry->msi_attrib.pos)) )
-- return false;
-- msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
-- msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
-- msg->data = readl(base + PCI_MSIX_ENTRY_DATA_OFFSET);
-- break;
-- }
-- default:
-- BUG();
-- }
--
-- if ( iommu_intremap )
-- iommu_read_msi_from_ire(entry, msg);
--
-- return true;
--}
--
- static int write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
- {
- entry->msg = *msg;
-@@ -302,10 +254,7 @@ void set_msi_affinity(struct irq_desc *d
-
- ASSERT(spin_is_locked(&desc->lock));
-
-- memset(&msg, 0, sizeof(msg));
-- if ( !read_msi_msg(msi_desc, &msg) )
-- return;
--
-+ msg = msi_desc->msg;
- msg.data &= ~MSI_DATA_VECTOR_MASK;
- msg.data |= MSI_DATA_VECTOR(desc->arch.vector);
- msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
diff --git a/system/xen/xsa/xsa337-4.13-2.patch b/system/xen/xsa/xsa337-4.13-2.patch
deleted file mode 100644
index bdefd37cdc..0000000000
--- a/system/xen/xsa/xsa337-4.13-2.patch
+++ /dev/null
@@ -1,181 +0,0 @@
-From: Jan Beulich <jbeulich@suse.com>
-Subject: x86/MSI-X: restrict reading of table/PBA bases from BARs
-
-When assigned to less trusted or un-trusted guests, devices may change
-state behind our backs (they may e.g. get reset by means we may not know
-about). Therefore we should avoid reading BARs from hardware once a
-device is no longer owned by Dom0. Furthermore when we can't read a BAR,
-or when we read zero, we shouldn't instead use the caller provided
-address unless that caller can be trusted.
-
-Re-arrange the logic in msix_capability_init() such that only Dom0 (and
-only if the device isn't DomU-owned yet) or calls through
-PHYSDEVOP_prepare_msix will actually result in the reading of the
-respective BAR register(s). Additionally do so only as long as in-use
-table entries are known (note that invocation of PHYSDEVOP_prepare_msix
-counts as a "pseudo" entry). In all other uses the value already
-recorded will get used instead.
-
-Clear the recorded values in _pci_cleanup_msix() as well as on the one
-affected error path. (Adjust this error path to also avoid blindly
-disabling MSI-X when it was enabled on entry to the function.)
-
-While moving around variable declarations (in many cases to reduce their
-scopes), also adjust some of their types.
-
-This is part of XSA-337.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
-
---- a/xen/arch/x86/msi.c
-+++ b/xen/arch/x86/msi.c
-@@ -769,16 +769,14 @@ static int msix_capability_init(struct p
- {
- struct arch_msix *msix = dev->msix;
- struct msi_desc *entry = NULL;
-- int vf;
- u16 control;
- u64 table_paddr;
- u32 table_offset;
-- u8 bir, pbus, pslot, pfunc;
- u16 seg = dev->seg;
- u8 bus = dev->bus;
- u8 slot = PCI_SLOT(dev->devfn);
- u8 func = PCI_FUNC(dev->devfn);
-- bool maskall = msix->host_maskall;
-+ bool maskall = msix->host_maskall, zap_on_error = false;
- unsigned int pos = pci_find_cap_offset(seg, bus, slot, func,
- PCI_CAP_ID_MSIX);
-
-@@ -820,43 +818,45 @@ static int msix_capability_init(struct p
-
- /* Locate MSI-X table region */
- table_offset = pci_conf_read32(dev->sbdf, msix_table_offset_reg(pos));
-- bir = (u8)(table_offset & PCI_MSIX_BIRMASK);
-- table_offset &= ~PCI_MSIX_BIRMASK;
-+ if ( !msix->used_entries &&
-+ (!msi ||
-+ (is_hardware_domain(current->domain) &&
-+ (dev->domain == current->domain || dev->domain == dom_io))) )
-+ {
-+ unsigned int bir = table_offset & PCI_MSIX_BIRMASK, pbus, pslot, pfunc;
-+ int vf;
-+ paddr_t pba_paddr;
-+ unsigned int pba_offset;
-
-- if ( !dev->info.is_virtfn )
-- {
-- pbus = bus;
-- pslot = slot;
-- pfunc = func;
-- vf = -1;
-- }
-- else
-- {
-- pbus = dev->info.physfn.bus;
-- pslot = PCI_SLOT(dev->info.physfn.devfn);
-- pfunc = PCI_FUNC(dev->info.physfn.devfn);
-- vf = PCI_BDF2(dev->bus, dev->devfn);
-- }
--
-- table_paddr = read_pci_mem_bar(seg, pbus, pslot, pfunc, bir, vf);
-- WARN_ON(msi && msi->table_base != table_paddr);
-- if ( !table_paddr )
-- {
-- if ( !msi || !msi->table_base )
-+ if ( !dev->info.is_virtfn )
- {
-- pci_conf_write16(dev->sbdf, msix_control_reg(pos),
-- control & ~PCI_MSIX_FLAGS_ENABLE);
-- xfree(entry);
-- return -ENXIO;
-+ pbus = bus;
-+ pslot = slot;
-+ pfunc = func;
-+ vf = -1;
-+ }
-+ else
-+ {
-+ pbus = dev->info.physfn.bus;
-+ pslot = PCI_SLOT(dev->info.physfn.devfn);
-+ pfunc = PCI_FUNC(dev->info.physfn.devfn);
-+ vf = PCI_BDF2(dev->bus, dev->devfn);
- }
-- table_paddr = msi->table_base;
-- }
-- table_paddr += table_offset;
-
-- if ( !msix->used_entries )
-- {
-- u64 pba_paddr;
-- u32 pba_offset;
-+ table_paddr = read_pci_mem_bar(seg, pbus, pslot, pfunc, bir, vf);
-+ WARN_ON(msi && msi->table_base != table_paddr);
-+ if ( !table_paddr )
-+ {
-+ if ( !msi || !msi->table_base )
-+ {
-+ pci_conf_write16(dev->sbdf, msix_control_reg(pos),
-+ control & ~PCI_MSIX_FLAGS_ENABLE);
-+ xfree(entry);
-+ return -ENXIO;
-+ }
-+ table_paddr = msi->table_base;
-+ }
-+ table_paddr += table_offset & ~PCI_MSIX_BIRMASK;
-
- msix->table.first = PFN_DOWN(table_paddr);
- msix->table.last = PFN_DOWN(table_paddr +
-@@ -875,7 +875,18 @@ static int msix_capability_init(struct p
- BITS_TO_LONGS(msix->nr_entries) - 1);
- WARN_ON(rangeset_overlaps_range(mmio_ro_ranges, msix->pba.first,
- msix->pba.last));
-+
-+ zap_on_error = true;
-+ }
-+ else if ( !msix->table.first )
-+ {
-+ pci_conf_write16(dev->sbdf, msix_control_reg(pos), control);
-+ xfree(entry);
-+ return -ENODATA;
- }
-+ else
-+ table_paddr = (msix->table.first << PAGE_SHIFT) +
-+ (table_offset & ~PCI_MSIX_BIRMASK & ~PAGE_MASK);
-
- if ( entry )
- {
-@@ -886,8 +897,15 @@ static int msix_capability_init(struct p
-
- if ( idx < 0 )
- {
-- pci_conf_write16(dev->sbdf, msix_control_reg(pos),
-- control & ~PCI_MSIX_FLAGS_ENABLE);
-+ if ( zap_on_error )
-+ {
-+ msix->table.first = 0;
-+ msix->pba.first = 0;
-+
-+ control &= ~PCI_MSIX_FLAGS_ENABLE;
-+ }
-+
-+ pci_conf_write16(dev->sbdf, msix_control_reg(pos), control);
- xfree(entry);
- return idx;
- }
-@@ -1076,9 +1094,14 @@ static void _pci_cleanup_msix(struct arc
- if ( rangeset_remove_range(mmio_ro_ranges, msix->table.first,
- msix->table.last) )
- WARN();
-+ msix->table.first = 0;
-+ msix->table.last = 0;
-+
- if ( rangeset_remove_range(mmio_ro_ranges, msix->pba.first,
- msix->pba.last) )
- WARN();
-+ msix->pba.first = 0;
-+ msix->pba.last = 0;
- }
- }
-
diff --git a/system/xen/xsa/xsa338.patch b/system/xen/xsa/xsa338.patch
deleted file mode 100644
index 776521990e..0000000000
--- a/system/xen/xsa/xsa338.patch
+++ /dev/null
@@ -1,42 +0,0 @@
-From: Jan Beulich <jbeulich@suse.com>
-Subject: evtchn: relax port_is_valid()
-
-To avoid ports potentially becoming invalid behind the back of certain
-other functions (due to ->max_evtchn shrinking) because of
-- a guest invoking evtchn_reset() and from a 2nd vCPU opening new
- channels in parallel (see also XSA-343),
-- alloc_unbound_xen_event_channel() produced channels living above the
- 2-level range (see also XSA-342),
-drop the max_evtchns check from port_is_valid(). For a port for which
-the function once returned "true", the returned value may not turn into
-"false" later on. The function's result may only depend on bounds which
-can only ever grow (which is the case for d->valid_evtchns).
-
-This also eliminates a false sense of safety, utilized by some of the
-users (see again XSA-343): Without a suitable lock held, d->max_evtchns
-may change at any time, and hence deducing that certain other operations
-are safe when port_is_valid() returned true is not legitimate. The
-opportunities to abuse this may get widened by the change here
-(depending on guest and host configuration), but will be taken care of
-by the other XSA.
-
-This is XSA-338.
-
-Fixes: 48974e6ce52e ("evtchn: use a per-domain variable for the max number of event channels")
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
-Reviewed-by: Julien Grall <jgrall@amazon.com>
----
-v5: New, split from larger patch.
-
---- a/xen/include/xen/event.h
-+++ b/xen/include/xen/event.h
-@@ -107,8 +107,6 @@ void notify_via_xen_event_channel(struct
-
- static inline bool_t port_is_valid(struct domain *d, unsigned int p)
- {
-- if ( p >= d->max_evtchns )
-- return 0;
- return p < read_atomic(&d->valid_evtchns);
- }
-
diff --git a/system/xen/xsa/xsa339.patch b/system/xen/xsa/xsa339.patch
deleted file mode 100644
index 3311ae093f..0000000000
--- a/system/xen/xsa/xsa339.patch
+++ /dev/null
@@ -1,76 +0,0 @@
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Subject: x86/pv: Avoid double exception injection
-
-There is at least one path (SYSENTER with NT set, Xen converts to #GP) which
-ends up injecting the #GP fault twice, first in compat_sysenter(), and then a
-second time in compat_test_all_events(), due to the stale TBF_EXCEPTION left
-in TRAPBOUNCE_flags.
-
-The guest kernel sees the second fault first, which is a kernel level #GP
-pointing at the head of the #GP handler, and is therefore a userspace
-trigger-able DoS.
-
-This particular bug has bitten us several times before, so rearrange
-{compat_,}create_bounce_frame() to clobber TRAPBOUNCE on success, rather than
-leaving this task to one area of code which isn't used uniformly.
-
-Other scenarios which might result in a double injection (e.g. two calls
-directly to compat_create_bounce_frame) will now crash the guest, which is far
-more obvious than letting the kernel run with corrupt state.
-
-This is XSA-339
-
-Fixes: fdac9515607b ("x86: clear EFLAGS.NT in SYSENTER entry path")
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-
-diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S
-index c3e62f8734..73619f57ca 100644
---- a/xen/arch/x86/x86_64/compat/entry.S
-+++ b/xen/arch/x86/x86_64/compat/entry.S
-@@ -78,7 +78,6 @@ compat_process_softirqs:
- sti
- .Lcompat_bounce_exception:
- call compat_create_bounce_frame
-- movb $0, TRAPBOUNCE_flags(%rdx)
- jmp compat_test_all_events
-
- ALIGN
-@@ -352,7 +351,13 @@ __UNLIKELY_END(compat_bounce_null_selector)
- movl %eax,UREGS_cs+8(%rsp)
- movl TRAPBOUNCE_eip(%rdx),%eax
- movl %eax,UREGS_rip+8(%rsp)
-+
-+ /* Trapbounce complete. Clobber state to avoid an erroneous second injection. */
-+ xor %eax, %eax
-+ mov %ax, TRAPBOUNCE_cs(%rdx)
-+ mov %al, TRAPBOUNCE_flags(%rdx)
- ret
-+
- .section .fixup,"ax"
- .Lfx13:
- xorl %edi,%edi
-diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
-index 1e880eb9f6..71a00e846b 100644
---- a/xen/arch/x86/x86_64/entry.S
-+++ b/xen/arch/x86/x86_64/entry.S
-@@ -90,7 +90,6 @@ process_softirqs:
- sti
- .Lbounce_exception:
- call create_bounce_frame
-- movb $0, TRAPBOUNCE_flags(%rdx)
- jmp test_all_events
-
- ALIGN
-@@ -512,6 +511,11 @@ UNLIKELY_START(z, create_bounce_frame_bad_bounce_ip)
- jmp asm_domain_crash_synchronous /* Does not return */
- __UNLIKELY_END(create_bounce_frame_bad_bounce_ip)
- movq %rax,UREGS_rip+8(%rsp)
-+
-+ /* Trapbounce complete. Clobber state to avoid an erroneous second injection. */
-+ xor %eax, %eax
-+ mov %rax, TRAPBOUNCE_eip(%rdx)
-+ mov %al, TRAPBOUNCE_flags(%rdx)
- ret
-
- .pushsection .fixup, "ax", @progbits
diff --git a/system/xen/xsa/xsa340.patch b/system/xen/xsa/xsa340.patch
deleted file mode 100644
index 38d04da465..0000000000
--- a/system/xen/xsa/xsa340.patch
+++ /dev/null
@@ -1,65 +0,0 @@
-From: Julien Grall <jgrall@amazon.com>
-Subject: xen/evtchn: Add missing barriers when accessing/allocating an event channel
-
-While the allocation of a bucket is always performed with the per-domain
-lock, the bucket may be accessed without the lock taken (for instance, see
-evtchn_send()).
-
-Instead such sites relies on port_is_valid() to return a non-zero value
-when the port has a struct evtchn associated to it. The function will
-mostly check whether the port is less than d->valid_evtchns as all the
-buckets/event channels should be allocated up to that point.
-
-Unfortunately a compiler is free to re-order the assignment in
-evtchn_allocate_port() so it would be possible to have d->valid_evtchns
-updated before the new bucket has finish to allocate.
-
-Additionally on Arm, even if this was compiled "correctly", the
-processor can still re-order the memory access.
-
-Add a write memory barrier in the allocation side and a read memory
-barrier when the port is valid to prevent any re-ordering issue.
-
-This is XSA-340.
-
-Reported-by: Julien Grall <jgrall@amazon.com>
-Signed-off-by: Julien Grall <jgrall@amazon.com>
-Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
-
---- a/xen/common/event_channel.c
-+++ b/xen/common/event_channel.c
-@@ -178,6 +178,13 @@ int evtchn_allocate_port(struct domain *
- return -ENOMEM;
- bucket_from_port(d, port) = chn;
-
-+ /*
-+ * d->valid_evtchns is used to check whether the bucket can be
-+ * accessed without the per-domain lock. Therefore,
-+ * d->valid_evtchns should be seen *after* the new bucket has
-+ * been setup.
-+ */
-+ smp_wmb();
- write_atomic(&d->valid_evtchns, d->valid_evtchns + EVTCHNS_PER_BUCKET);
- }
-
---- a/xen/include/xen/event.h
-+++ b/xen/include/xen/event.h
-@@ -107,7 +107,17 @@ void notify_via_xen_event_channel(struct
-
- static inline bool_t port_is_valid(struct domain *d, unsigned int p)
- {
-- return p < read_atomic(&d->valid_evtchns);
-+ if ( p >= read_atomic(&d->valid_evtchns) )
-+ return false;
-+
-+ /*
-+ * The caller will usually access the event channel afterwards and
-+ * may be done without taking the per-domain lock. The barrier is
-+ * going in pair the smp_wmb() barrier in evtchn_allocate_port().
-+ */
-+ smp_rmb();
-+
-+ return true;
- }
-
- static inline struct evtchn *evtchn_from_port(struct domain *d, unsigned int p)
diff --git a/system/xen/xsa/xsa342-4.13.patch b/system/xen/xsa/xsa342-4.13.patch
deleted file mode 100644
index 334baf1b69..0000000000
--- a/system/xen/xsa/xsa342-4.13.patch
+++ /dev/null
@@ -1,145 +0,0 @@
-From: Jan Beulich <jbeulich@suse.com>
-Subject: evtchn/x86: enforce correct upper limit for 32-bit guests
-
-The recording of d->max_evtchns in evtchn_2l_init(), in particular with
-the limited set of callers of the function, is insufficient. Neither for
-PV nor for HVM guests the bitness is known at domain_create() time, yet
-the upper bound in 2-level mode depends upon guest bitness. Recording
-too high a limit "allows" x86 32-bit domains to open not properly usable
-event channels, management of which (inside Xen) would then result in
-corruption of the shared info and vCPU info structures.
-
-Keep the upper limit dynamic for the 2-level case, introducing a helper
-function to retrieve the effective limit. This helper is now supposed to
-be private to the event channel code. The used in do_poll() and
-domain_dump_evtchn_info() weren't consistent with port uses elsewhere
-and hence get switched to port_is_valid().
-
-Furthermore FIFO mode's setup_ports() gets adjusted to loop only up to
-the prior ABI limit, rather than all the way up to the new one.
-
-Finally a word on the change to do_poll(): Accessing ->max_evtchns
-without holding a suitable lock was never safe, as it as well as
-->evtchn_port_ops may change behind do_poll()'s back. Using
-port_is_valid() instead widens some the window for potential abuse,
-until we've dealt with the race altogether (see XSA-343).
-
-This is XSA-342.
-
-Reported-by: Julien Grall <jgrall@amazon.com>
-Fixes: 48974e6ce52e ("evtchn: use a per-domain variable for the max number of event channels")
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
-Reviewed-by: Julien Grall <jgrall@amazon.com>
-
---- a/xen/common/event_2l.c
-+++ b/xen/common/event_2l.c
-@@ -103,7 +103,6 @@ static const struct evtchn_port_ops evtc
- void evtchn_2l_init(struct domain *d)
- {
- d->evtchn_port_ops = &evtchn_port_ops_2l;
-- d->max_evtchns = BITS_PER_EVTCHN_WORD(d) * BITS_PER_EVTCHN_WORD(d);
- }
-
- /*
---- a/xen/common/event_channel.c
-+++ b/xen/common/event_channel.c
-@@ -151,7 +151,7 @@ static void free_evtchn_bucket(struct do
-
- int evtchn_allocate_port(struct domain *d, evtchn_port_t port)
- {
-- if ( port > d->max_evtchn_port || port >= d->max_evtchns )
-+ if ( port > d->max_evtchn_port || port >= max_evtchns(d) )
- return -ENOSPC;
-
- if ( port_is_valid(d, port) )
-@@ -1396,13 +1396,11 @@ static void domain_dump_evtchn_info(stru
-
- spin_lock(&d->event_lock);
-
-- for ( port = 1; port < d->max_evtchns; ++port )
-+ for ( port = 1; port_is_valid(d, port); ++port )
- {
- const struct evtchn *chn;
- char *ssid;
-
-- if ( !port_is_valid(d, port) )
-- continue;
- chn = evtchn_from_port(d, port);
- if ( chn->state == ECS_FREE )
- continue;
---- a/xen/common/event_fifo.c
-+++ b/xen/common/event_fifo.c
-@@ -478,7 +478,7 @@ static void cleanup_event_array(struct d
- d->evtchn_fifo = NULL;
- }
-
--static void setup_ports(struct domain *d)
-+static void setup_ports(struct domain *d, unsigned int prev_evtchns)
- {
- unsigned int port;
-
-@@ -488,7 +488,7 @@ static void setup_ports(struct domain *d
- * - save its pending state.
- * - set default priority.
- */
-- for ( port = 1; port < d->max_evtchns; port++ )
-+ for ( port = 1; port < prev_evtchns; port++ )
- {
- struct evtchn *evtchn;
-
-@@ -546,6 +546,8 @@ int evtchn_fifo_init_control(struct evtc
- if ( !d->evtchn_fifo )
- {
- struct vcpu *vcb;
-+ /* Latch the value before it changes during setup_event_array(). */
-+ unsigned int prev_evtchns = max_evtchns(d);
-
- for_each_vcpu ( d, vcb ) {
- rc = setup_control_block(vcb);
-@@ -562,8 +564,7 @@ int evtchn_fifo_init_control(struct evtc
- goto error;
-
- d->evtchn_port_ops = &evtchn_port_ops_fifo;
-- d->max_evtchns = EVTCHN_FIFO_NR_CHANNELS;
-- setup_ports(d);
-+ setup_ports(d, prev_evtchns);
- }
- else
- rc = map_control_block(v, gfn, offset);
---- a/xen/common/schedule.c
-+++ b/xen/common/schedule.c
-@@ -1434,7 +1434,7 @@ static long do_poll(struct sched_poll *s
- goto out;
-
- rc = -EINVAL;
-- if ( port >= d->max_evtchns )
-+ if ( !port_is_valid(d, port) )
- goto out;
-
- rc = 0;
---- a/xen/include/xen/event.h
-+++ b/xen/include/xen/event.h
-@@ -105,6 +105,12 @@ void notify_via_xen_event_channel(struct
- #define bucket_from_port(d, p) \
- ((group_from_port(d, p))[((p) % EVTCHNS_PER_GROUP) / EVTCHNS_PER_BUCKET])
-
-+static inline unsigned int max_evtchns(const struct domain *d)
-+{
-+ return d->evtchn_fifo ? EVTCHN_FIFO_NR_CHANNELS
-+ : BITS_PER_EVTCHN_WORD(d) * BITS_PER_EVTCHN_WORD(d);
-+}
-+
- static inline bool_t port_is_valid(struct domain *d, unsigned int p)
- {
- if ( p >= read_atomic(&d->valid_evtchns) )
---- a/xen/include/xen/sched.h
-+++ b/xen/include/xen/sched.h
-@@ -382,7 +382,6 @@ struct domain
- /* Event channel information. */
- struct evtchn *evtchn; /* first bucket only */
- struct evtchn **evtchn_group[NR_EVTCHN_GROUPS]; /* all other buckets */
-- unsigned int max_evtchns; /* number supported by ABI */
- unsigned int max_evtchn_port; /* max permitted port number */
- unsigned int valid_evtchns; /* number of allocated event channels */
- spinlock_t event_lock;
diff --git a/system/xen/xsa/xsa343-1.patch b/system/xen/xsa/xsa343-1.patch
deleted file mode 100644
index 0abbc03e8d..0000000000
--- a/system/xen/xsa/xsa343-1.patch
+++ /dev/null
@@ -1,199 +0,0 @@
-From: Jan Beulich <jbeulich@suse.com>
-Subject: evtchn: evtchn_reset() shouldn't succeed with still-open ports
-
-While the function closes all ports, it does so without holding any
-lock, and hence racing requests may be issued causing new ports to get
-opened. This would have been problematic in particular if such a newly
-opened port had a port number above the new implementation limit (i.e.
-when switching from FIFO to 2-level) after the reset, as prior to
-"evtchn: relax port_is_valid()" this could have led to e.g.
-evtchn_close()'s "BUG_ON(!port_is_valid(d2, port2))" to trigger.
-
-Introduce a counter of active ports and check that it's (still) no
-larger then the number of Xen internally used ones after obtaining the
-necessary lock in evtchn_reset().
-
-As to the access model of the new {active,xen}_evtchns fields - while
-all writes get done using write_atomic(), reads ought to use
-read_atomic() only when outside of a suitably locked region.
-
-Note that as of now evtchn_bind_virq() and evtchn_bind_ipi() don't have
-a need to call check_free_port().
-
-This is part of XSA-343.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
-Reviewed-by: Julien Grall <jgrall@amazon.com>
----
-v7: Drop optimization from evtchn_reset().
-v6: Fix loop exit condition in evtchn_reset(). Use {read,write}_atomic()
- also for xen_evtchns.
-v5: Move increment in alloc_unbound_xen_event_channel() out of the inner
- locked region.
-v4: Account for Xen internal ports.
-v3: Document intended access next to new struct field.
-v2: Add comment to check_free_port(). Drop commented out calls.
-
---- a/xen/common/event_channel.c
-+++ b/xen/common/event_channel.c
-@@ -188,6 +188,8 @@ int evtchn_allocate_port(struct domain *
- write_atomic(&d->valid_evtchns, d->valid_evtchns + EVTCHNS_PER_BUCKET);
- }
-
-+ write_atomic(&d->active_evtchns, d->active_evtchns + 1);
-+
- return 0;
- }
-
-@@ -211,11 +213,26 @@ static int get_free_port(struct domain *
- return -ENOSPC;
- }
-
-+/*
-+ * Check whether a port is still marked free, and if so update the domain
-+ * counter accordingly. To be used on function exit paths.
-+ */
-+static void check_free_port(struct domain *d, evtchn_port_t port)
-+{
-+ if ( port_is_valid(d, port) &&
-+ evtchn_from_port(d, port)->state == ECS_FREE )
-+ write_atomic(&d->active_evtchns, d->active_evtchns - 1);
-+}
-+
- void evtchn_free(struct domain *d, struct evtchn *chn)
- {
- /* Clear pending event to avoid unexpected behavior on re-bind. */
- evtchn_port_clear_pending(d, chn);
-
-+ if ( consumer_is_xen(chn) )
-+ write_atomic(&d->xen_evtchns, d->xen_evtchns - 1);
-+ write_atomic(&d->active_evtchns, d->active_evtchns - 1);
-+
- /* Reset binding to vcpu0 when the channel is freed. */
- chn->state = ECS_FREE;
- chn->notify_vcpu_id = 0;
-@@ -258,6 +275,7 @@ static long evtchn_alloc_unbound(evtchn_
- alloc->port = port;
-
- out:
-+ check_free_port(d, port);
- spin_unlock(&d->event_lock);
- rcu_unlock_domain(d);
-
-@@ -351,6 +369,7 @@ static long evtchn_bind_interdomain(evtc
- bind->local_port = lport;
-
- out:
-+ check_free_port(ld, lport);
- spin_unlock(&ld->event_lock);
- if ( ld != rd )
- spin_unlock(&rd->event_lock);
-@@ -488,7 +507,7 @@ static long evtchn_bind_pirq(evtchn_bind
- struct domain *d = current->domain;
- struct vcpu *v = d->vcpu[0];
- struct pirq *info;
-- int port, pirq = bind->pirq;
-+ int port = 0, pirq = bind->pirq;
- long rc;
-
- if ( (pirq < 0) || (pirq >= d->nr_pirqs) )
-@@ -536,6 +555,7 @@ static long evtchn_bind_pirq(evtchn_bind
- arch_evtchn_bind_pirq(d, pirq);
-
- out:
-+ check_free_port(d, port);
- spin_unlock(&d->event_lock);
-
- return rc;
-@@ -1011,10 +1031,10 @@ int evtchn_unmask(unsigned int port)
- return 0;
- }
-
--
- int evtchn_reset(struct domain *d)
- {
- unsigned int i;
-+ int rc = 0;
-
- if ( d != current->domain && !d->controller_pause_count )
- return -EINVAL;
-@@ -1024,7 +1044,9 @@ int evtchn_reset(struct domain *d)
-
- spin_lock(&d->event_lock);
-
-- if ( d->evtchn_fifo )
-+ if ( d->active_evtchns > d->xen_evtchns )
-+ rc = -EAGAIN;
-+ else if ( d->evtchn_fifo )
- {
- /* Switching back to 2-level ABI. */
- evtchn_fifo_destroy(d);
-@@ -1033,7 +1055,7 @@ int evtchn_reset(struct domain *d)
-
- spin_unlock(&d->event_lock);
-
-- return 0;
-+ return rc;
- }
-
- static long evtchn_set_priority(const struct evtchn_set_priority *set_priority)
-@@ -1219,10 +1241,9 @@ int alloc_unbound_xen_event_channel(
-
- spin_lock(&ld->event_lock);
-
-- rc = get_free_port(ld);
-+ port = rc = get_free_port(ld);
- if ( rc < 0 )
- goto out;
-- port = rc;
- chn = evtchn_from_port(ld, port);
-
- rc = xsm_evtchn_unbound(XSM_TARGET, ld, chn, remote_domid);
-@@ -1238,7 +1259,10 @@ int alloc_unbound_xen_event_channel(
-
- spin_unlock(&chn->lock);
-
-+ write_atomic(&ld->xen_evtchns, ld->xen_evtchns + 1);
-+
- out:
-+ check_free_port(ld, port);
- spin_unlock(&ld->event_lock);
-
- return rc < 0 ? rc : port;
-@@ -1314,6 +1338,7 @@ int evtchn_init(struct domain *d, unsign
- return -EINVAL;
- }
- evtchn_from_port(d, 0)->state = ECS_RESERVED;
-+ write_atomic(&d->active_evtchns, 0);
-
- #if MAX_VIRT_CPUS > BITS_PER_LONG
- d->poll_mask = xzalloc_array(unsigned long, BITS_TO_LONGS(d->max_vcpus));
-@@ -1340,6 +1365,8 @@ void evtchn_destroy(struct domain *d)
- for ( i = 0; port_is_valid(d, i); i++ )
- evtchn_close(d, i, 0);
-
-+ ASSERT(!d->active_evtchns);
-+
- clear_global_virq_handlers(d);
-
- evtchn_fifo_destroy(d);
---- a/xen/include/xen/sched.h
-+++ b/xen/include/xen/sched.h
-@@ -361,6 +361,16 @@ struct domain
- struct evtchn **evtchn_group[NR_EVTCHN_GROUPS]; /* all other buckets */
- unsigned int max_evtchn_port; /* max permitted port number */
- unsigned int valid_evtchns; /* number of allocated event channels */
-+ /*
-+ * Number of in-use event channels. Writers should use write_atomic().
-+ * Readers need to use read_atomic() only when not holding event_lock.
-+ */
-+ unsigned int active_evtchns;
-+ /*
-+ * Number of event channels used internally by Xen (not subject to
-+ * EVTCHNOP_reset). Read/write access like for active_evtchns.
-+ */
-+ unsigned int xen_evtchns;
- spinlock_t event_lock;
- const struct evtchn_port_ops *evtchn_port_ops;
- struct evtchn_fifo_domain *evtchn_fifo;
diff --git a/system/xen/xsa/xsa343-2.patch b/system/xen/xsa/xsa343-2.patch
deleted file mode 100644
index b8eb4998f1..0000000000
--- a/system/xen/xsa/xsa343-2.patch
+++ /dev/null
@@ -1,295 +0,0 @@
-From: Jan Beulich <jbeulich@suse.com>
-Subject: evtchn: convert per-channel lock to be IRQ-safe
-
-... in order for send_guest_{global,vcpu}_virq() to be able to make use
-of it.
-
-This is part of XSA-343.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Acked-by: Julien Grall <jgrall@amazon.com>
----
-v6: New.
----
-TBD: This is the "dumb" conversion variant. In a couple of cases the
- slightly simpler spin_{,un}lock_irq() could apparently be used.
-
---- a/xen/common/event_channel.c
-+++ b/xen/common/event_channel.c
-@@ -248,6 +248,7 @@ static long evtchn_alloc_unbound(evtchn_
- int port;
- domid_t dom = alloc->dom;
- long rc;
-+ unsigned long flags;
-
- d = rcu_lock_domain_by_any_id(dom);
- if ( d == NULL )
-@@ -263,14 +264,14 @@ static long evtchn_alloc_unbound(evtchn_
- if ( rc )
- goto out;
-
-- spin_lock(&chn->lock);
-+ spin_lock_irqsave(&chn->lock, flags);
-
- chn->state = ECS_UNBOUND;
- if ( (chn->u.unbound.remote_domid = alloc->remote_dom) == DOMID_SELF )
- chn->u.unbound.remote_domid = current->domain->domain_id;
- evtchn_port_init(d, chn);
-
-- spin_unlock(&chn->lock);
-+ spin_unlock_irqrestore(&chn->lock, flags);
-
- alloc->port = port;
-
-@@ -283,26 +284,32 @@ static long evtchn_alloc_unbound(evtchn_
- }
-
-
--static void double_evtchn_lock(struct evtchn *lchn, struct evtchn *rchn)
-+static unsigned long double_evtchn_lock(struct evtchn *lchn,
-+ struct evtchn *rchn)
- {
-- if ( lchn < rchn )
-+ unsigned long flags;
-+
-+ if ( lchn <= rchn )
- {
-- spin_lock(&lchn->lock);
-- spin_lock(&rchn->lock);
-+ spin_lock_irqsave(&lchn->lock, flags);
-+ if ( lchn != rchn )
-+ spin_lock(&rchn->lock);
- }
- else
- {
-- if ( lchn != rchn )
-- spin_lock(&rchn->lock);
-+ spin_lock_irqsave(&rchn->lock, flags);
- spin_lock(&lchn->lock);
- }
-+
-+ return flags;
- }
-
--static void double_evtchn_unlock(struct evtchn *lchn, struct evtchn *rchn)
-+static void double_evtchn_unlock(struct evtchn *lchn, struct evtchn *rchn,
-+ unsigned long flags)
- {
-- spin_unlock(&lchn->lock);
- if ( lchn != rchn )
-- spin_unlock(&rchn->lock);
-+ spin_unlock(&lchn->lock);
-+ spin_unlock_irqrestore(&rchn->lock, flags);
- }
-
- static long evtchn_bind_interdomain(evtchn_bind_interdomain_t *bind)
-@@ -312,6 +319,7 @@ static long evtchn_bind_interdomain(evtc
- int lport, rport = bind->remote_port;
- domid_t rdom = bind->remote_dom;
- long rc;
-+ unsigned long flags;
-
- if ( rdom == DOMID_SELF )
- rdom = current->domain->domain_id;
-@@ -347,7 +355,7 @@ static long evtchn_bind_interdomain(evtc
- if ( rc )
- goto out;
-
-- double_evtchn_lock(lchn, rchn);
-+ flags = double_evtchn_lock(lchn, rchn);
-
- lchn->u.interdomain.remote_dom = rd;
- lchn->u.interdomain.remote_port = rport;
-@@ -364,7 +372,7 @@ static long evtchn_bind_interdomain(evtc
- */
- evtchn_port_set_pending(ld, lchn->notify_vcpu_id, lchn);
-
-- double_evtchn_unlock(lchn, rchn);
-+ double_evtchn_unlock(lchn, rchn, flags);
-
- bind->local_port = lport;
-
-@@ -387,6 +395,7 @@ int evtchn_bind_virq(evtchn_bind_virq_t
- struct domain *d = current->domain;
- int virq = bind->virq, vcpu = bind->vcpu;
- int rc = 0;
-+ unsigned long flags;
-
- if ( (virq < 0) || (virq >= ARRAY_SIZE(v->virq_to_evtchn)) )
- return -EINVAL;
-@@ -424,14 +433,14 @@ int evtchn_bind_virq(evtchn_bind_virq_t
-
- chn = evtchn_from_port(d, port);
-
-- spin_lock(&chn->lock);
-+ spin_lock_irqsave(&chn->lock, flags);
-
- chn->state = ECS_VIRQ;
- chn->notify_vcpu_id = vcpu;
- chn->u.virq = virq;
- evtchn_port_init(d, chn);
-
-- spin_unlock(&chn->lock);
-+ spin_unlock_irqrestore(&chn->lock, flags);
-
- v->virq_to_evtchn[virq] = bind->port = port;
-
-@@ -448,6 +457,7 @@ static long evtchn_bind_ipi(evtchn_bind_
- struct domain *d = current->domain;
- int port, vcpu = bind->vcpu;
- long rc = 0;
-+ unsigned long flags;
-
- if ( domain_vcpu(d, vcpu) == NULL )
- return -ENOENT;
-@@ -459,13 +469,13 @@ static long evtchn_bind_ipi(evtchn_bind_
-
- chn = evtchn_from_port(d, port);
-
-- spin_lock(&chn->lock);
-+ spin_lock_irqsave(&chn->lock, flags);
-
- chn->state = ECS_IPI;
- chn->notify_vcpu_id = vcpu;
- evtchn_port_init(d, chn);
-
-- spin_unlock(&chn->lock);
-+ spin_unlock_irqrestore(&chn->lock, flags);
-
- bind->port = port;
-
-@@ -509,6 +519,7 @@ static long evtchn_bind_pirq(evtchn_bind
- struct pirq *info;
- int port = 0, pirq = bind->pirq;
- long rc;
-+ unsigned long flags;
-
- if ( (pirq < 0) || (pirq >= d->nr_pirqs) )
- return -EINVAL;
-@@ -541,14 +552,14 @@ static long evtchn_bind_pirq(evtchn_bind
- goto out;
- }
-
-- spin_lock(&chn->lock);
-+ spin_lock_irqsave(&chn->lock, flags);
-
- chn->state = ECS_PIRQ;
- chn->u.pirq.irq = pirq;
- link_pirq_port(port, chn, v);
- evtchn_port_init(d, chn);
-
-- spin_unlock(&chn->lock);
-+ spin_unlock_irqrestore(&chn->lock, flags);
-
- bind->port = port;
-
-@@ -569,6 +580,7 @@ int evtchn_close(struct domain *d1, int
- struct evtchn *chn1, *chn2;
- int port2;
- long rc = 0;
-+ unsigned long flags;
-
- again:
- spin_lock(&d1->event_lock);
-@@ -668,14 +680,14 @@ int evtchn_close(struct domain *d1, int
- BUG_ON(chn2->state != ECS_INTERDOMAIN);
- BUG_ON(chn2->u.interdomain.remote_dom != d1);
-
-- double_evtchn_lock(chn1, chn2);
-+ flags = double_evtchn_lock(chn1, chn2);
-
- evtchn_free(d1, chn1);
-
- chn2->state = ECS_UNBOUND;
- chn2->u.unbound.remote_domid = d1->domain_id;
-
-- double_evtchn_unlock(chn1, chn2);
-+ double_evtchn_unlock(chn1, chn2, flags);
-
- goto out;
-
-@@ -683,9 +695,9 @@ int evtchn_close(struct domain *d1, int
- BUG();
- }
-
-- spin_lock(&chn1->lock);
-+ spin_lock_irqsave(&chn1->lock, flags);
- evtchn_free(d1, chn1);
-- spin_unlock(&chn1->lock);
-+ spin_unlock_irqrestore(&chn1->lock, flags);
-
- out:
- if ( d2 != NULL )
-@@ -705,13 +717,14 @@ int evtchn_send(struct domain *ld, unsig
- struct evtchn *lchn, *rchn;
- struct domain *rd;
- int rport, ret = 0;
-+ unsigned long flags;
-
- if ( !port_is_valid(ld, lport) )
- return -EINVAL;
-
- lchn = evtchn_from_port(ld, lport);
-
-- spin_lock(&lchn->lock);
-+ spin_lock_irqsave(&lchn->lock, flags);
-
- /* Guest cannot send via a Xen-attached event channel. */
- if ( unlikely(consumer_is_xen(lchn)) )
-@@ -746,7 +759,7 @@ int evtchn_send(struct domain *ld, unsig
- }
-
- out:
-- spin_unlock(&lchn->lock);
-+ spin_unlock_irqrestore(&lchn->lock, flags);
-
- return ret;
- }
-@@ -1238,6 +1251,7 @@ int alloc_unbound_xen_event_channel(
- {
- struct evtchn *chn;
- int port, rc;
-+ unsigned long flags;
-
- spin_lock(&ld->event_lock);
-
-@@ -1250,14 +1264,14 @@ int alloc_unbound_xen_event_channel(
- if ( rc )
- goto out;
-
-- spin_lock(&chn->lock);
-+ spin_lock_irqsave(&chn->lock, flags);
-
- chn->state = ECS_UNBOUND;
- chn->xen_consumer = get_xen_consumer(notification_fn);
- chn->notify_vcpu_id = lvcpu;
- chn->u.unbound.remote_domid = remote_domid;
-
-- spin_unlock(&chn->lock);
-+ spin_unlock_irqrestore(&chn->lock, flags);
-
- write_atomic(&ld->xen_evtchns, ld->xen_evtchns + 1);
-
-@@ -1280,11 +1294,12 @@ void notify_via_xen_event_channel(struct
- {
- struct evtchn *lchn, *rchn;
- struct domain *rd;
-+ unsigned long flags;
-
- ASSERT(port_is_valid(ld, lport));
- lchn = evtchn_from_port(ld, lport);
-
-- spin_lock(&lchn->lock);
-+ spin_lock_irqsave(&lchn->lock, flags);
-
- if ( likely(lchn->state == ECS_INTERDOMAIN) )
- {
-@@ -1294,7 +1309,7 @@ void notify_via_xen_event_channel(struct
- evtchn_port_set_pending(rd, rchn->notify_vcpu_id, rchn);
- }
-
-- spin_unlock(&lchn->lock);
-+ spin_unlock_irqrestore(&lchn->lock, flags);
- }
-
- void evtchn_check_pollers(struct domain *d, unsigned int port)
diff --git a/system/xen/xsa/xsa343-3.patch b/system/xen/xsa/xsa343-3.patch
deleted file mode 100644
index e513e308eb..0000000000
--- a/system/xen/xsa/xsa343-3.patch
+++ /dev/null
@@ -1,392 +0,0 @@
-From: Jan Beulich <jbeulich@suse.com>
-Subject: evtchn: address races with evtchn_reset()
-
-Neither d->evtchn_port_ops nor max_evtchns(d) may be used in an entirely
-lock-less manner, as both may change by a racing evtchn_reset(). In the
-common case, at least one of the domain's event lock or the per-channel
-lock needs to be held. In the specific case of the inter-domain sending
-by evtchn_send() and notify_via_xen_event_channel() holding the other
-side's per-channel lock is sufficient, as the channel can't change state
-without both per-channel locks held. Without such a channel changing
-state, evtchn_reset() can't complete successfully.
-
-Lock-free accesses continue to be permitted for the shim (calling some
-otherwise internal event channel functions), as this happens while the
-domain is in effectively single-threaded mode. Special care also needs
-taking for the shim's marking of in-use ports as ECS_RESERVED (allowing
-use of such ports in the shim case is okay because switching into and
-hence also out of FIFO mode is impossihble there).
-
-As a side effect, certain operations on Xen bound event channels which
-were mistakenly permitted so far (e.g. unmask or poll) will be refused
-now.
-
-This is part of XSA-343.
-
-Reported-by: Julien Grall <jgrall@amazon.com>
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Acked-by: Julien Grall <jgrall@amazon.com>
----
-v9: Add arch_evtchn_is_special() to fix PV shim.
-v8: Add BUILD_BUG_ON() in evtchn_usable().
-v7: Add locking related comment ahead of struct evtchn_port_ops.
-v6: New.
----
-TBD: I've been considering to move some of the wrappers from xen/event.h
- into event_channel.c (or even drop them altogether), when they
- require external locking (e.g. evtchn_port_init() or
- evtchn_port_set_priority()). Does anyone have a strong opinion
- either way?
-
---- a/xen/arch/x86/irq.c
-+++ b/xen/arch/x86/irq.c
-@@ -2488,14 +2488,24 @@ static void dump_irqs(unsigned char key)
-
- for ( i = 0; i < action->nr_guests; )
- {
-+ struct evtchn *evtchn;
-+ unsigned int pending = 2, masked = 2;
-+
- d = action->guest[i++];
- pirq = domain_irq_to_pirq(d, irq);
- info = pirq_info(d, pirq);
-+ evtchn = evtchn_from_port(d, info->evtchn);
-+ local_irq_disable();
-+ if ( spin_trylock(&evtchn->lock) )
-+ {
-+ pending = evtchn_is_pending(d, evtchn);
-+ masked = evtchn_is_masked(d, evtchn);
-+ spin_unlock(&evtchn->lock);
-+ }
-+ local_irq_enable();
- printk("d%d:%3d(%c%c%c)%c",
-- d->domain_id, pirq,
-- evtchn_port_is_pending(d, info->evtchn) ? 'P' : '-',
-- evtchn_port_is_masked(d, info->evtchn) ? 'M' : '-',
-- info->masked ? 'M' : '-',
-+ d->domain_id, pirq, "-P?"[pending],
-+ "-M?"[masked], info->masked ? 'M' : '-',
- i < action->nr_guests ? ',' : '\n');
- }
- }
---- a/xen/arch/x86/pv/shim.c
-+++ b/xen/arch/x86/pv/shim.c
-@@ -660,8 +660,11 @@ void pv_shim_inject_evtchn(unsigned int
- if ( port_is_valid(guest, port) )
- {
- struct evtchn *chn = evtchn_from_port(guest, port);
-+ unsigned long flags;
-
-+ spin_lock_irqsave(&chn->lock, flags);
- evtchn_port_set_pending(guest, chn->notify_vcpu_id, chn);
-+ spin_unlock_irqrestore(&chn->lock, flags);
- }
- }
-
---- a/xen/common/event_2l.c
-+++ b/xen/common/event_2l.c
-@@ -63,8 +63,10 @@ static void evtchn_2l_unmask(struct doma
- }
- }
-
--static bool evtchn_2l_is_pending(const struct domain *d, evtchn_port_t port)
-+static bool evtchn_2l_is_pending(const struct domain *d,
-+ const struct evtchn *evtchn)
- {
-+ evtchn_port_t port = evtchn->port;
- unsigned int max_ports = BITS_PER_EVTCHN_WORD(d) * BITS_PER_EVTCHN_WORD(d);
-
- ASSERT(port < max_ports);
-@@ -72,8 +74,10 @@ static bool evtchn_2l_is_pending(const s
- guest_test_bit(d, port, &shared_info(d, evtchn_pending)));
- }
-
--static bool evtchn_2l_is_masked(const struct domain *d, evtchn_port_t port)
-+static bool evtchn_2l_is_masked(const struct domain *d,
-+ const struct evtchn *evtchn)
- {
-+ evtchn_port_t port = evtchn->port;
- unsigned int max_ports = BITS_PER_EVTCHN_WORD(d) * BITS_PER_EVTCHN_WORD(d);
-
- ASSERT(port < max_ports);
---- a/xen/common/event_channel.c
-+++ b/xen/common/event_channel.c
-@@ -156,8 +156,9 @@ int evtchn_allocate_port(struct domain *
-
- if ( port_is_valid(d, port) )
- {
-- if ( evtchn_from_port(d, port)->state != ECS_FREE ||
-- evtchn_port_is_busy(d, port) )
-+ const struct evtchn *chn = evtchn_from_port(d, port);
-+
-+ if ( chn->state != ECS_FREE || evtchn_is_busy(d, chn) )
- return -EBUSY;
- }
- else
-@@ -774,6 +775,7 @@ void send_guest_vcpu_virq(struct vcpu *v
- unsigned long flags;
- int port;
- struct domain *d;
-+ struct evtchn *chn;
-
- ASSERT(!virq_is_global(virq));
-
-@@ -784,7 +786,10 @@ void send_guest_vcpu_virq(struct vcpu *v
- goto out;
-
- d = v->domain;
-- evtchn_port_set_pending(d, v->vcpu_id, evtchn_from_port(d, port));
-+ chn = evtchn_from_port(d, port);
-+ spin_lock(&chn->lock);
-+ evtchn_port_set_pending(d, v->vcpu_id, chn);
-+ spin_unlock(&chn->lock);
-
- out:
- spin_unlock_irqrestore(&v->virq_lock, flags);
-@@ -813,7 +818,9 @@ void send_guest_global_virq(struct domai
- goto out;
-
- chn = evtchn_from_port(d, port);
-+ spin_lock(&chn->lock);
- evtchn_port_set_pending(d, chn->notify_vcpu_id, chn);
-+ spin_unlock(&chn->lock);
-
- out:
- spin_unlock_irqrestore(&v->virq_lock, flags);
-@@ -823,6 +830,7 @@ void send_guest_pirq(struct domain *d, c
- {
- int port;
- struct evtchn *chn;
-+ unsigned long flags;
-
- /*
- * PV guests: It should not be possible to race with __evtchn_close(). The
-@@ -837,7 +845,9 @@ void send_guest_pirq(struct domain *d, c
- }
-
- chn = evtchn_from_port(d, port);
-+ spin_lock_irqsave(&chn->lock, flags);
- evtchn_port_set_pending(d, chn->notify_vcpu_id, chn);
-+ spin_unlock_irqrestore(&chn->lock, flags);
- }
-
- static struct domain *global_virq_handlers[NR_VIRQS] __read_mostly;
-@@ -1034,12 +1044,15 @@ int evtchn_unmask(unsigned int port)
- {
- struct domain *d = current->domain;
- struct evtchn *evtchn;
-+ unsigned long flags;
-
- if ( unlikely(!port_is_valid(d, port)) )
- return -EINVAL;
-
- evtchn = evtchn_from_port(d, port);
-+ spin_lock_irqsave(&evtchn->lock, flags);
- evtchn_port_unmask(d, evtchn);
-+ spin_unlock_irqrestore(&evtchn->lock, flags);
-
- return 0;
- }
-@@ -1449,8 +1462,8 @@ static void domain_dump_evtchn_info(stru
-
- printk(" %4u [%d/%d/",
- port,
-- evtchn_port_is_pending(d, port),
-- evtchn_port_is_masked(d, port));
-+ evtchn_is_pending(d, chn),
-+ evtchn_is_masked(d, chn));
- evtchn_port_print_state(d, chn);
- printk("]: s=%d n=%d x=%d",
- chn->state, chn->notify_vcpu_id, chn->xen_consumer);
---- a/xen/common/event_fifo.c
-+++ b/xen/common/event_fifo.c
-@@ -296,23 +296,26 @@ static void evtchn_fifo_unmask(struct do
- evtchn_fifo_set_pending(v, evtchn);
- }
-
--static bool evtchn_fifo_is_pending(const struct domain *d, evtchn_port_t port)
-+static bool evtchn_fifo_is_pending(const struct domain *d,
-+ const struct evtchn *evtchn)
- {
-- const event_word_t *word = evtchn_fifo_word_from_port(d, port);
-+ const event_word_t *word = evtchn_fifo_word_from_port(d, evtchn->port);
-
- return word && guest_test_bit(d, EVTCHN_FIFO_PENDING, word);
- }
-
--static bool_t evtchn_fifo_is_masked(const struct domain *d, evtchn_port_t port)
-+static bool_t evtchn_fifo_is_masked(const struct domain *d,
-+ const struct evtchn *evtchn)
- {
-- const event_word_t *word = evtchn_fifo_word_from_port(d, port);
-+ const event_word_t *word = evtchn_fifo_word_from_port(d, evtchn->port);
-
- return !word || guest_test_bit(d, EVTCHN_FIFO_MASKED, word);
- }
-
--static bool_t evtchn_fifo_is_busy(const struct domain *d, evtchn_port_t port)
-+static bool_t evtchn_fifo_is_busy(const struct domain *d,
-+ const struct evtchn *evtchn)
- {
-- const event_word_t *word = evtchn_fifo_word_from_port(d, port);
-+ const event_word_t *word = evtchn_fifo_word_from_port(d, evtchn->port);
-
- return word && guest_test_bit(d, EVTCHN_FIFO_LINKED, word);
- }
---- a/xen/include/asm-x86/event.h
-+++ b/xen/include/asm-x86/event.h
-@@ -47,4 +47,10 @@ static inline bool arch_virq_is_global(u
- return true;
- }
-
-+#ifdef CONFIG_PV_SHIM
-+# include <asm/pv/shim.h>
-+# define arch_evtchn_is_special(chn) \
-+ (pv_shim && (chn)->port && (chn)->state == ECS_RESERVED)
-+#endif
-+
- #endif
---- a/xen/include/xen/event.h
-+++ b/xen/include/xen/event.h
-@@ -133,6 +133,24 @@ static inline struct evtchn *evtchn_from
- return bucket_from_port(d, p) + (p % EVTCHNS_PER_BUCKET);
- }
-
-+/*
-+ * "usable" as in "by a guest", i.e. Xen consumed channels are assumed to be
-+ * taken care of separately where used for Xen's internal purposes.
-+ */
-+static bool evtchn_usable(const struct evtchn *evtchn)
-+{
-+ if ( evtchn->xen_consumer )
-+ return false;
-+
-+#ifdef arch_evtchn_is_special
-+ if ( arch_evtchn_is_special(evtchn) )
-+ return true;
-+#endif
-+
-+ BUILD_BUG_ON(ECS_FREE > ECS_RESERVED);
-+ return evtchn->state > ECS_RESERVED;
-+}
-+
- /* Wait on a Xen-attached event channel. */
- #define wait_on_xen_event_channel(port, condition) \
- do { \
-@@ -165,19 +183,24 @@ int evtchn_reset(struct domain *d);
-
- /*
- * Low-level event channel port ops.
-+ *
-+ * All hooks have to be called with a lock held which prevents the channel
-+ * from changing state. This may be the domain event lock, the per-channel
-+ * lock, or in the case of sending interdomain events also the other side's
-+ * per-channel lock. Exceptions apply in certain cases for the PV shim.
- */
- struct evtchn_port_ops {
- void (*init)(struct domain *d, struct evtchn *evtchn);
- void (*set_pending)(struct vcpu *v, struct evtchn *evtchn);
- void (*clear_pending)(struct domain *d, struct evtchn *evtchn);
- void (*unmask)(struct domain *d, struct evtchn *evtchn);
-- bool (*is_pending)(const struct domain *d, evtchn_port_t port);
-- bool (*is_masked)(const struct domain *d, evtchn_port_t port);
-+ bool (*is_pending)(const struct domain *d, const struct evtchn *evtchn);
-+ bool (*is_masked)(const struct domain *d, const struct evtchn *evtchn);
- /*
- * Is the port unavailable because it's still being cleaned up
- * after being closed?
- */
-- bool (*is_busy)(const struct domain *d, evtchn_port_t port);
-+ bool (*is_busy)(const struct domain *d, const struct evtchn *evtchn);
- int (*set_priority)(struct domain *d, struct evtchn *evtchn,
- unsigned int priority);
- void (*print_state)(struct domain *d, const struct evtchn *evtchn);
-@@ -193,38 +216,67 @@ static inline void evtchn_port_set_pendi
- unsigned int vcpu_id,
- struct evtchn *evtchn)
- {
-- d->evtchn_port_ops->set_pending(d->vcpu[vcpu_id], evtchn);
-+ if ( evtchn_usable(evtchn) )
-+ d->evtchn_port_ops->set_pending(d->vcpu[vcpu_id], evtchn);
- }
-
- static inline void evtchn_port_clear_pending(struct domain *d,
- struct evtchn *evtchn)
- {
-- d->evtchn_port_ops->clear_pending(d, evtchn);
-+ if ( evtchn_usable(evtchn) )
-+ d->evtchn_port_ops->clear_pending(d, evtchn);
- }
-
- static inline void evtchn_port_unmask(struct domain *d,
- struct evtchn *evtchn)
- {
-- d->evtchn_port_ops->unmask(d, evtchn);
-+ if ( evtchn_usable(evtchn) )
-+ d->evtchn_port_ops->unmask(d, evtchn);
- }
-
--static inline bool evtchn_port_is_pending(const struct domain *d,
-- evtchn_port_t port)
-+static inline bool evtchn_is_pending(const struct domain *d,
-+ const struct evtchn *evtchn)
- {
-- return d->evtchn_port_ops->is_pending(d, port);
-+ return evtchn_usable(evtchn) && d->evtchn_port_ops->is_pending(d, evtchn);
- }
-
--static inline bool evtchn_port_is_masked(const struct domain *d,
-- evtchn_port_t port)
-+static inline bool evtchn_port_is_pending(struct domain *d, evtchn_port_t port)
- {
-- return d->evtchn_port_ops->is_masked(d, port);
-+ struct evtchn *evtchn = evtchn_from_port(d, port);
-+ bool rc;
-+ unsigned long flags;
-+
-+ spin_lock_irqsave(&evtchn->lock, flags);
-+ rc = evtchn_is_pending(d, evtchn);
-+ spin_unlock_irqrestore(&evtchn->lock, flags);
-+
-+ return rc;
-+}
-+
-+static inline bool evtchn_is_masked(const struct domain *d,
-+ const struct evtchn *evtchn)
-+{
-+ return !evtchn_usable(evtchn) || d->evtchn_port_ops->is_masked(d, evtchn);
-+}
-+
-+static inline bool evtchn_port_is_masked(struct domain *d, evtchn_port_t port)
-+{
-+ struct evtchn *evtchn = evtchn_from_port(d, port);
-+ bool rc;
-+ unsigned long flags;
-+
-+ spin_lock_irqsave(&evtchn->lock, flags);
-+ rc = evtchn_is_masked(d, evtchn);
-+ spin_unlock_irqrestore(&evtchn->lock, flags);
-+
-+ return rc;
- }
-
--static inline bool evtchn_port_is_busy(const struct domain *d,
-- evtchn_port_t port)
-+static inline bool evtchn_is_busy(const struct domain *d,
-+ const struct evtchn *evtchn)
- {
- return d->evtchn_port_ops->is_busy &&
-- d->evtchn_port_ops->is_busy(d, port);
-+ d->evtchn_port_ops->is_busy(d, evtchn);
- }
-
- static inline int evtchn_port_set_priority(struct domain *d,
-@@ -233,6 +285,8 @@ static inline int evtchn_port_set_priori
- {
- if ( !d->evtchn_port_ops->set_priority )
- return -ENOSYS;
-+ if ( !evtchn_usable(evtchn) )
-+ return -EACCES;
- return d->evtchn_port_ops->set_priority(d, evtchn, priority);
- }
-
diff --git a/system/xen/xsa/xsa344-4.13-1.patch b/system/xen/xsa/xsa344-4.13-1.patch
deleted file mode 100644
index d8e9b3f43f..0000000000
--- a/system/xen/xsa/xsa344-4.13-1.patch
+++ /dev/null
@@ -1,130 +0,0 @@
-From: Jan Beulich <jbeulich@suse.com>
-Subject: evtchn: arrange for preemption in evtchn_destroy()
-
-Especially closing of fully established interdomain channels can take
-quite some time, due to the locking involved. Therefore we shouldn't
-assume we can clean up still active ports all in one go. Besides adding
-the necessary preemption check, also avoid pointlessly starting from
-(or now really ending at) 0; 1 is the lowest numbered port which may
-need closing.
-
-Since we're now reducing ->valid_evtchns, free_xen_event_channel(),
-and (at least to be on the safe side) notify_via_xen_event_channel()
-need to cope with attempts to close / unbind from / send through already
-closed (and no longer valid, as per port_is_valid()) ports.
-
-This is part of XSA-344.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Acked-by: Julien Grall <jgrall@amazon.com>
-Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
-
---- a/xen/common/domain.c
-+++ b/xen/common/domain.c
-@@ -770,12 +770,14 @@ int domain_kill(struct domain *d)
- return domain_kill(d);
- d->is_dying = DOMDYING_dying;
- argo_destroy(d);
-- evtchn_destroy(d);
- gnttab_release_mappings(d);
- vnuma_destroy(d->vnuma);
- domain_set_outstanding_pages(d, 0);
- /* fallthrough */
- case DOMDYING_dying:
-+ rc = evtchn_destroy(d);
-+ if ( rc )
-+ break;
- rc = domain_relinquish_resources(d);
- if ( rc != 0 )
- break;
---- a/xen/common/event_channel.c
-+++ b/xen/common/event_channel.c
-@@ -1297,7 +1297,16 @@ int alloc_unbound_xen_event_channel(
-
- void free_xen_event_channel(struct domain *d, int port)
- {
-- BUG_ON(!port_is_valid(d, port));
-+ if ( !port_is_valid(d, port) )
-+ {
-+ /*
-+ * Make sure ->is_dying is read /after/ ->valid_evtchns, pairing
-+ * with the spin_barrier() and BUG_ON() in evtchn_destroy().
-+ */
-+ smp_rmb();
-+ BUG_ON(!d->is_dying);
-+ return;
-+ }
-
- evtchn_close(d, port, 0);
- }
-@@ -1309,7 +1318,17 @@ void notify_via_xen_event_channel(struct
- struct domain *rd;
- unsigned long flags;
-
-- ASSERT(port_is_valid(ld, lport));
-+ if ( !port_is_valid(ld, lport) )
-+ {
-+ /*
-+ * Make sure ->is_dying is read /after/ ->valid_evtchns, pairing
-+ * with the spin_barrier() and BUG_ON() in evtchn_destroy().
-+ */
-+ smp_rmb();
-+ ASSERT(ld->is_dying);
-+ return;
-+ }
-+
- lchn = evtchn_from_port(ld, lport);
-
- spin_lock_irqsave(&lchn->lock, flags);
-@@ -1380,8 +1399,7 @@ int evtchn_init(struct domain *d, unsign
- return 0;
- }
-
--
--void evtchn_destroy(struct domain *d)
-+int evtchn_destroy(struct domain *d)
- {
- unsigned int i;
-
-@@ -1390,14 +1408,29 @@ void evtchn_destroy(struct domain *d)
- spin_barrier(&d->event_lock);
-
- /* Close all existing event channels. */
-- for ( i = 0; port_is_valid(d, i); i++ )
-+ for ( i = d->valid_evtchns; --i; )
-+ {
- evtchn_close(d, i, 0);
-
-+ /*
-+ * Avoid preempting when called from domain_create()'s error path,
-+ * and don't check too often (choice of frequency is arbitrary).
-+ */
-+ if ( i && !(i & 0x3f) && d->is_dying != DOMDYING_dead &&
-+ hypercall_preempt_check() )
-+ {
-+ write_atomic(&d->valid_evtchns, i);
-+ return -ERESTART;
-+ }
-+ }
-+
- ASSERT(!d->active_evtchns);
-
- clear_global_virq_handlers(d);
-
- evtchn_fifo_destroy(d);
-+
-+ return 0;
- }
-
-
---- a/xen/include/xen/sched.h
-+++ b/xen/include/xen/sched.h
-@@ -136,7 +136,7 @@ struct evtchn
- } __attribute__((aligned(64)));
-
- int evtchn_init(struct domain *d, unsigned int max_port);
--void evtchn_destroy(struct domain *d); /* from domain_kill */
-+int evtchn_destroy(struct domain *d); /* from domain_kill */
- void evtchn_destroy_final(struct domain *d); /* from complete_domain_destroy */
-
- struct waitqueue_vcpu;
diff --git a/system/xen/xsa/xsa344-4.13-2.patch b/system/xen/xsa/xsa344-4.13-2.patch
deleted file mode 100644
index 3f0339498f..0000000000
--- a/system/xen/xsa/xsa344-4.13-2.patch
+++ /dev/null
@@ -1,203 +0,0 @@
-From: Jan Beulich <jbeulich@suse.com>
-Subject: evtchn: arrange for preemption in evtchn_reset()
-
-Like for evtchn_destroy() looping over all possible event channels to
-close them can take a significant amount of time. Unlike done there, we
-can't alter domain properties (i.e. d->valid_evtchns) here. Borrow, in a
-lightweight form, the paging domctl continuation concept, redirecting
-the continuations to different sub-ops. Just like there this is to be
-able to allow for predictable overall results of the involved sub-ops:
-Racing requests should either complete or be refused.
-
-Note that a domain can't interfere with an already started (by a remote
-domain) reset, due to being paused. It can prevent a remote reset from
-happening by leaving a reset unfinished, but that's only going to affect
-itself.
-
-This is part of XSA-344.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Acked-by: Julien Grall <jgrall@amazon.com>
-Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
-
---- a/xen/common/domain.c
-+++ b/xen/common/domain.c
-@@ -1214,7 +1214,7 @@ void domain_unpause_except_self(struct d
- domain_unpause(d);
- }
-
--int domain_soft_reset(struct domain *d)
-+int domain_soft_reset(struct domain *d, bool resuming)
- {
- struct vcpu *v;
- int rc;
-@@ -1228,7 +1228,7 @@ int domain_soft_reset(struct domain *d)
- }
- spin_unlock(&d->shutdown_lock);
-
-- rc = evtchn_reset(d);
-+ rc = evtchn_reset(d, resuming);
- if ( rc )
- return rc;
-
---- a/xen/common/domctl.c
-+++ b/xen/common/domctl.c
-@@ -572,12 +572,22 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xe
- }
-
- case XEN_DOMCTL_soft_reset:
-+ case XEN_DOMCTL_soft_reset_cont:
- if ( d == current->domain ) /* no domain_pause() */
- {
- ret = -EINVAL;
- break;
- }
-- ret = domain_soft_reset(d);
-+ ret = domain_soft_reset(d, op->cmd == XEN_DOMCTL_soft_reset_cont);
-+ if ( ret == -ERESTART )
-+ {
-+ op->cmd = XEN_DOMCTL_soft_reset_cont;
-+ if ( !__copy_field_to_guest(u_domctl, op, cmd) )
-+ ret = hypercall_create_continuation(__HYPERVISOR_domctl,
-+ "h", u_domctl);
-+ else
-+ ret = -EFAULT;
-+ }
- break;
-
- case XEN_DOMCTL_destroydomain:
---- a/xen/common/event_channel.c
-+++ b/xen/common/event_channel.c
-@@ -1057,7 +1057,7 @@ int evtchn_unmask(unsigned int port)
- return 0;
- }
-
--int evtchn_reset(struct domain *d)
-+int evtchn_reset(struct domain *d, bool resuming)
- {
- unsigned int i;
- int rc = 0;
-@@ -1065,11 +1065,40 @@ int evtchn_reset(struct domain *d)
- if ( d != current->domain && !d->controller_pause_count )
- return -EINVAL;
-
-- for ( i = 0; port_is_valid(d, i); i++ )
-+ spin_lock(&d->event_lock);
-+
-+ /*
-+ * If we are resuming, then start where we stopped. Otherwise, check
-+ * that a reset operation is not already in progress, and if none is,
-+ * record that this is now the case.
-+ */
-+ i = resuming ? d->next_evtchn : !d->next_evtchn;
-+ if ( i > d->next_evtchn )
-+ d->next_evtchn = i;
-+
-+ spin_unlock(&d->event_lock);
-+
-+ if ( !i )
-+ return -EBUSY;
-+
-+ for ( ; port_is_valid(d, i); i++ )
-+ {
- evtchn_close(d, i, 1);
-
-+ /* NB: Choice of frequency is arbitrary. */
-+ if ( !(i & 0x3f) && hypercall_preempt_check() )
-+ {
-+ spin_lock(&d->event_lock);
-+ d->next_evtchn = i;
-+ spin_unlock(&d->event_lock);
-+ return -ERESTART;
-+ }
-+ }
-+
- spin_lock(&d->event_lock);
-
-+ d->next_evtchn = 0;
-+
- if ( d->active_evtchns > d->xen_evtchns )
- rc = -EAGAIN;
- else if ( d->evtchn_fifo )
-@@ -1204,7 +1233,8 @@ long do_event_channel_op(int cmd, XEN_GU
- break;
- }
-
-- case EVTCHNOP_reset: {
-+ case EVTCHNOP_reset:
-+ case EVTCHNOP_reset_cont: {
- struct evtchn_reset reset;
- struct domain *d;
-
-@@ -1217,9 +1247,13 @@ long do_event_channel_op(int cmd, XEN_GU
-
- rc = xsm_evtchn_reset(XSM_TARGET, current->domain, d);
- if ( !rc )
-- rc = evtchn_reset(d);
-+ rc = evtchn_reset(d, cmd == EVTCHNOP_reset_cont);
-
- rcu_unlock_domain(d);
-+
-+ if ( rc == -ERESTART )
-+ rc = hypercall_create_continuation(__HYPERVISOR_event_channel_op,
-+ "ih", EVTCHNOP_reset_cont, arg);
- break;
- }
-
---- a/xen/include/public/domctl.h
-+++ b/xen/include/public/domctl.h
-@@ -1152,7 +1152,10 @@ struct xen_domctl {
- #define XEN_DOMCTL_iomem_permission 20
- #define XEN_DOMCTL_ioport_permission 21
- #define XEN_DOMCTL_hypercall_init 22
--#define XEN_DOMCTL_arch_setup 23 /* Obsolete IA64 only */
-+#ifdef __XEN__
-+/* #define XEN_DOMCTL_arch_setup 23 Obsolete IA64 only */
-+#define XEN_DOMCTL_soft_reset_cont 23
-+#endif
- #define XEN_DOMCTL_settimeoffset 24
- #define XEN_DOMCTL_getvcpuaffinity 25
- #define XEN_DOMCTL_real_mode_area 26 /* Obsolete PPC only */
---- a/xen/include/public/event_channel.h
-+++ b/xen/include/public/event_channel.h
-@@ -74,6 +74,9 @@
- #define EVTCHNOP_init_control 11
- #define EVTCHNOP_expand_array 12
- #define EVTCHNOP_set_priority 13
-+#ifdef __XEN__
-+#define EVTCHNOP_reset_cont 14
-+#endif
- /* ` } */
-
- typedef uint32_t evtchn_port_t;
---- a/xen/include/xen/event.h
-+++ b/xen/include/xen/event.h
-@@ -171,7 +171,7 @@ void evtchn_check_pollers(struct domain
- void evtchn_2l_init(struct domain *d);
-
- /* Close all event channels and reset to 2-level ABI. */
--int evtchn_reset(struct domain *d);
-+int evtchn_reset(struct domain *d, bool resuming);
-
- /*
- * Low-level event channel port ops.
---- a/xen/include/xen/sched.h
-+++ b/xen/include/xen/sched.h
-@@ -394,6 +394,8 @@ struct domain
- * EVTCHNOP_reset). Read/write access like for active_evtchns.
- */
- unsigned int xen_evtchns;
-+ /* Port to resume from in evtchn_reset(), when in a continuation. */
-+ unsigned int next_evtchn;
- spinlock_t event_lock;
- const struct evtchn_port_ops *evtchn_port_ops;
- struct evtchn_fifo_domain *evtchn_fifo;
-@@ -663,7 +665,7 @@ int domain_shutdown(struct domain *d, u8
- void domain_resume(struct domain *d);
- void domain_pause_for_debugger(void);
-
--int domain_soft_reset(struct domain *d);
-+int domain_soft_reset(struct domain *d, bool resuming);
-
- int vcpu_start_shutdown_deferral(struct vcpu *v);
- void vcpu_end_shutdown_deferral(struct vcpu *v);
diff --git a/system/xen/xsa/xsa345-0001-x86-mm-Refactor-map_pages_to_xen-to-have-only-a-sing.patch b/system/xen/xsa/xsa345-0001-x86-mm-Refactor-map_pages_to_xen-to-have-only-a-sing.patch
deleted file mode 100644
index d325385a56..0000000000
--- a/system/xen/xsa/xsa345-0001-x86-mm-Refactor-map_pages_to_xen-to-have-only-a-sing.patch
+++ /dev/null
@@ -1,94 +0,0 @@
-From b3e0d4e37b7902533a463812374947d4d6d2e463 Mon Sep 17 00:00:00 2001
-From: Wei Liu <wei.liu2@citrix.com>
-Date: Sat, 11 Jan 2020 21:57:41 +0000
-Subject: [PATCH 1/3] x86/mm: Refactor map_pages_to_xen to have only a single
- exit path
-
-We will soon need to perform clean-ups before returning.
-
-No functional change.
-
-This is part of XSA-345.
-
-Reported-by: Hongyan Xia <hongyxia@amazon.com>
-Signed-off-by: Wei Liu <wei.liu2@citrix.com>
-Signed-off-by: Hongyan Xia <hongyxia@amazon.com>
-Signed-off-by: George Dunlap <george.dunlap@citrix.com>
-Acked-by: Jan Beulich <jbeulich@suse.com>
----
- xen/arch/x86/mm.c | 17 +++++++++++------
- 1 file changed, 11 insertions(+), 6 deletions(-)
-
-diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
-index 30dffb68e8..133a393875 100644
---- a/xen/arch/x86/mm.c
-+++ b/xen/arch/x86/mm.c
-@@ -5187,6 +5187,7 @@ int map_pages_to_xen(
- l2_pgentry_t *pl2e, ol2e;
- l1_pgentry_t *pl1e, ol1e;
- unsigned int i;
-+ int rc = -ENOMEM;
-
- #define flush_flags(oldf) do { \
- unsigned int o_ = (oldf); \
-@@ -5207,7 +5208,8 @@ int map_pages_to_xen(
- l3_pgentry_t ol3e, *pl3e = virt_to_xen_l3e(virt);
-
- if ( !pl3e )
-- return -ENOMEM;
-+ goto out;
-+
- ol3e = *pl3e;
-
- if ( cpu_has_page1gb &&
-@@ -5295,7 +5297,7 @@ int map_pages_to_xen(
-
- pl2e = alloc_xen_pagetable();
- if ( pl2e == NULL )
-- return -ENOMEM;
-+ goto out;
-
- for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
- l2e_write(pl2e + i,
-@@ -5324,7 +5326,7 @@ int map_pages_to_xen(
-
- pl2e = virt_to_xen_l2e(virt);
- if ( !pl2e )
-- return -ENOMEM;
-+ goto out;
-
- if ( ((((virt >> PAGE_SHIFT) | mfn_x(mfn)) &
- ((1u << PAGETABLE_ORDER) - 1)) == 0) &&
-@@ -5367,7 +5369,7 @@ int map_pages_to_xen(
- {
- pl1e = virt_to_xen_l1e(virt);
- if ( pl1e == NULL )
-- return -ENOMEM;
-+ goto out;
- }
- else if ( l2e_get_flags(*pl2e) & _PAGE_PSE )
- {
-@@ -5394,7 +5396,7 @@ int map_pages_to_xen(
-
- pl1e = alloc_xen_pagetable();
- if ( pl1e == NULL )
-- return -ENOMEM;
-+ goto out;
-
- for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
- l1e_write(&pl1e[i],
-@@ -5538,7 +5540,10 @@ int map_pages_to_xen(
-
- #undef flush_flags
-
-- return 0;
-+ rc = 0;
-+
-+ out:
-+ return rc;
- }
-
- int populate_pt_range(unsigned long virt, unsigned long nr_mfns)
---
-2.25.1
-
diff --git a/system/xen/xsa/xsa345-0002-x86-mm-Refactor-modify_xen_mappings-to-have-one-exit.patch b/system/xen/xsa/xsa345-0002-x86-mm-Refactor-modify_xen_mappings-to-have-one-exit.patch
deleted file mode 100644
index 836bed681a..0000000000
--- a/system/xen/xsa/xsa345-0002-x86-mm-Refactor-modify_xen_mappings-to-have-one-exit.patch
+++ /dev/null
@@ -1,68 +0,0 @@
-From 9f6f35b833d295acaaa2d8ff8cf309bf688cfd50 Mon Sep 17 00:00:00 2001
-From: Wei Liu <wei.liu2@citrix.com>
-Date: Sat, 11 Jan 2020 21:57:42 +0000
-Subject: [PATCH 2/3] x86/mm: Refactor modify_xen_mappings to have one exit
- path
-
-We will soon need to perform clean-ups before returning.
-
-No functional change.
-
-This is part of XSA-345.
-
-Reported-by: Hongyan Xia <hongyxia@amazon.com>
-Signed-off-by: Wei Liu <wei.liu2@citrix.com>
-Signed-off-by: Hongyan Xia <hongyxia@amazon.com>
-Signed-off-by: George Dunlap <george.dunlap@citrix.com>
-Acked-by: Jan Beulich <jbeulich@suse.com>
----
- xen/arch/x86/mm.c | 12 +++++++++---
- 1 file changed, 9 insertions(+), 3 deletions(-)
-
-diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
-index 133a393875..af726d3274 100644
---- a/xen/arch/x86/mm.c
-+++ b/xen/arch/x86/mm.c
-@@ -5570,6 +5570,7 @@ int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int nf)
- l1_pgentry_t *pl1e;
- unsigned int i;
- unsigned long v = s;
-+ int rc = -ENOMEM;
-
- /* Set of valid PTE bits which may be altered. */
- #define FLAGS_MASK (_PAGE_NX|_PAGE_RW|_PAGE_PRESENT)
-@@ -5611,7 +5612,8 @@ int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int nf)
- /* PAGE1GB: shatter the superpage and fall through. */
- pl2e = alloc_xen_pagetable();
- if ( !pl2e )
-- return -ENOMEM;
-+ goto out;
-+
- for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
- l2e_write(pl2e + i,
- l2e_from_pfn(l3e_get_pfn(*pl3e) +
-@@ -5666,7 +5668,8 @@ int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int nf)
- /* PSE: shatter the superpage and try again. */
- pl1e = alloc_xen_pagetable();
- if ( !pl1e )
-- return -ENOMEM;
-+ goto out;
-+
- for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
- l1e_write(&pl1e[i],
- l1e_from_pfn(l2e_get_pfn(*pl2e) + i,
-@@ -5795,7 +5798,10 @@ int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int nf)
- flush_area(NULL, FLUSH_TLB_GLOBAL);
-
- #undef FLAGS_MASK
-- return 0;
-+ rc = 0;
-+
-+ out:
-+ return rc;
- }
-
- #undef flush_area
---
-2.25.1
-
diff --git a/system/xen/xsa/xsa345-0003-x86-mm-Prevent-some-races-in-hypervisor-mapping-upda.patch b/system/xen/xsa/xsa345-0003-x86-mm-Prevent-some-races-in-hypervisor-mapping-upda.patch
deleted file mode 100644
index db407416b9..0000000000
--- a/system/xen/xsa/xsa345-0003-x86-mm-Prevent-some-races-in-hypervisor-mapping-upda.patch
+++ /dev/null
@@ -1,249 +0,0 @@
-From 0ff9a8453dc47cd47eee9659d5916afb5094e871 Mon Sep 17 00:00:00 2001
-From: Hongyan Xia <hongyxia@amazon.com>
-Date: Sat, 11 Jan 2020 21:57:43 +0000
-Subject: [PATCH 3/3] x86/mm: Prevent some races in hypervisor mapping updates
-
-map_pages_to_xen will attempt to coalesce mappings into 2MiB and 1GiB
-superpages if possible, to maximize TLB efficiency. This means both
-replacing superpage entries with smaller entries, and replacing
-smaller entries with superpages.
-
-Unfortunately, while some potential races are handled correctly,
-others are not. These include:
-
-1. When one processor modifies a sub-superpage mapping while another
-processor replaces the entire range with a superpage.
-
-Take the following example:
-
-Suppose L3[N] points to L2. And suppose we have two processors, A and
-B.
-
-* A walks the pagetables, get a pointer to L2.
-* B replaces L3[N] with a 1GiB mapping.
-* B Frees L2
-* A writes L2[M] #
-
-This is race exacerbated by the fact that virt_to_xen_l[21]e doesn't
-handle higher-level superpages properly: If you call virt_xen_to_l2e
-on a virtual address within an L3 superpage, you'll either hit a BUG()
-(most likely), or get a pointer into the middle of a data page; same
-with virt_xen_to_l1 on a virtual address within either an L3 or L2
-superpage.
-
-So take the following example:
-
-* A reads pl3e and discovers it to point to an L2.
-* B replaces L3[N] with a 1GiB mapping
-* A calls virt_to_xen_l2e() and hits the BUG_ON() #
-
-2. When two processors simultaneously try to replace a sub-superpage
-mapping with a superpage mapping.
-
-Take the following example:
-
-Suppose L3[N] points to L2. And suppose we have two processors, A and B,
-both trying to replace L3[N] with a superpage.
-
-* A walks the pagetables, get a pointer to pl3e, and takes a copy ol3e pointing to L2.
-* B walks the pagetables, gets a pointre to pl3e, and takes a copy ol3e pointing to L2.
-* A writes the new value into L3[N]
-* B writes the new value into L3[N]
-* A recursively frees all the L1's under L2, then frees L2
-* B recursively double-frees all the L1's under L2, then double-frees L2 #
-
-Fix this by grabbing a lock for the entirety of the mapping update
-operation.
-
-Rather than grabbing map_pgdir_lock for the entire operation, however,
-repurpose the PGT_locked bit from L3's page->type_info as a lock.
-This means that rather than locking the entire address space, we
-"only" lock a single 512GiB chunk of hypervisor address space at a
-time.
-
-There was a proposal for a lock-and-reverify approach, where we walk
-the pagetables to the point where we decide what to do; then grab the
-map_pgdir_lock, re-verify the information we collected without the
-lock, and finally make the change (starting over again if anything had
-changed). Without being able to guarantee that the L2 table wasn't
-freed, however, that means every read would need to be considered
-potentially unsafe. Thinking carefully about that is probably
-something that wants to be done on public, not under time pressure.
-
-This is part of XSA-345.
-
-Reported-by: Hongyan Xia <hongyxia@amazon.com>
-Signed-off-by: Hongyan Xia <hongyxia@amazon.com>
-Signed-off-by: George Dunlap <george.dunlap@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
----
- xen/arch/x86/mm.c | 92 +++++++++++++++++++++++++++++++++++++++++++++--
- 1 file changed, 89 insertions(+), 3 deletions(-)
-
-diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
-index af726d3274..d6a0761f43 100644
---- a/xen/arch/x86/mm.c
-+++ b/xen/arch/x86/mm.c
-@@ -2167,6 +2167,50 @@ void page_unlock(struct page_info *page)
- current_locked_page_set(NULL);
- }
-
-+/*
-+ * L3 table locks:
-+ *
-+ * Used for serialization in map_pages_to_xen() and modify_xen_mappings().
-+ *
-+ * For Xen PT pages, the page->u.inuse.type_info is unused and it is safe to
-+ * reuse the PGT_locked flag. This lock is taken only when we move down to L3
-+ * tables and below, since L4 (and above, for 5-level paging) is still globally
-+ * protected by map_pgdir_lock.
-+ *
-+ * PV MMU update hypercalls call map_pages_to_xen while holding a page's page_lock().
-+ * This has two implications:
-+ * - We cannot reuse reuse current_locked_page_* for debugging
-+ * - To avoid the chance of deadlock, even for different pages, we
-+ * must never grab page_lock() after grabbing l3t_lock(). This
-+ * includes any page_lock()-based locks, such as
-+ * mem_sharing_page_lock().
-+ *
-+ * Also note that we grab the map_pgdir_lock while holding the
-+ * l3t_lock(), so to avoid deadlock we must avoid grabbing them in
-+ * reverse order.
-+ */
-+static void l3t_lock(struct page_info *page)
-+{
-+ unsigned long x, nx;
-+
-+ do {
-+ while ( (x = page->u.inuse.type_info) & PGT_locked )
-+ cpu_relax();
-+ nx = x | PGT_locked;
-+ } while ( cmpxchg(&page->u.inuse.type_info, x, nx) != x );
-+}
-+
-+static void l3t_unlock(struct page_info *page)
-+{
-+ unsigned long x, nx, y = page->u.inuse.type_info;
-+
-+ do {
-+ x = y;
-+ BUG_ON(!(x & PGT_locked));
-+ nx = x & ~PGT_locked;
-+ } while ( (y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x );
-+}
-+
- #ifdef CONFIG_PV
- /*
- * PTE flags that a guest may change without re-validating the PTE.
-@@ -5177,6 +5221,23 @@ l1_pgentry_t *virt_to_xen_l1e(unsigned long v)
- flush_area_local((const void *)v, f) : \
- flush_area_all((const void *)v, f))
-
-+#define L3T_INIT(page) (page) = ZERO_BLOCK_PTR
-+
-+#define L3T_LOCK(page) \
-+ do { \
-+ if ( locking ) \
-+ l3t_lock(page); \
-+ } while ( false )
-+
-+#define L3T_UNLOCK(page) \
-+ do { \
-+ if ( locking && (page) != ZERO_BLOCK_PTR ) \
-+ { \
-+ l3t_unlock(page); \
-+ (page) = ZERO_BLOCK_PTR; \
-+ } \
-+ } while ( false )
-+
- int map_pages_to_xen(
- unsigned long virt,
- mfn_t mfn,
-@@ -5188,6 +5249,7 @@ int map_pages_to_xen(
- l1_pgentry_t *pl1e, ol1e;
- unsigned int i;
- int rc = -ENOMEM;
-+ struct page_info *current_l3page;
-
- #define flush_flags(oldf) do { \
- unsigned int o_ = (oldf); \
-@@ -5203,13 +5265,20 @@ int map_pages_to_xen(
- } \
- } while (0)
-
-+ L3T_INIT(current_l3page);
-+
- while ( nr_mfns != 0 )
- {
-- l3_pgentry_t ol3e, *pl3e = virt_to_xen_l3e(virt);
-+ l3_pgentry_t *pl3e, ol3e;
-
-+ L3T_UNLOCK(current_l3page);
-+
-+ pl3e = virt_to_xen_l3e(virt);
- if ( !pl3e )
- goto out;
-
-+ current_l3page = virt_to_page(pl3e);
-+ L3T_LOCK(current_l3page);
- ol3e = *pl3e;
-
- if ( cpu_has_page1gb &&
-@@ -5543,6 +5612,7 @@ int map_pages_to_xen(
- rc = 0;
-
- out:
-+ L3T_UNLOCK(current_l3page);
- return rc;
- }
-
-@@ -5571,6 +5641,7 @@ int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int nf)
- unsigned int i;
- unsigned long v = s;
- int rc = -ENOMEM;
-+ struct page_info *current_l3page;
-
- /* Set of valid PTE bits which may be altered. */
- #define FLAGS_MASK (_PAGE_NX|_PAGE_RW|_PAGE_PRESENT)
-@@ -5579,11 +5650,22 @@ int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int nf)
- ASSERT(IS_ALIGNED(s, PAGE_SIZE));
- ASSERT(IS_ALIGNED(e, PAGE_SIZE));
-
-+ L3T_INIT(current_l3page);
-+
- while ( v < e )
- {
-- l3_pgentry_t *pl3e = virt_to_xen_l3e(v);
-+ l3_pgentry_t *pl3e;
-+
-+ L3T_UNLOCK(current_l3page);
-
-- if ( !pl3e || !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) )
-+ pl3e = virt_to_xen_l3e(v);
-+ if ( !pl3e )
-+ goto out;
-+
-+ current_l3page = virt_to_page(pl3e);
-+ L3T_LOCK(current_l3page);
-+
-+ if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) )
- {
- /* Confirm the caller isn't trying to create new mappings. */
- ASSERT(!(nf & _PAGE_PRESENT));
-@@ -5801,9 +5883,13 @@ int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int nf)
- rc = 0;
-
- out:
-+ L3T_UNLOCK(current_l3page);
- return rc;
- }
-
-+#undef L3T_LOCK
-+#undef L3T_UNLOCK
-+
- #undef flush_area
-
- int destroy_xen_mappings(unsigned long s, unsigned long e)
---
-2.25.1
-
diff --git a/system/xen/xsa/xsa346-4.13-1.patch b/system/xen/xsa/xsa346-4.13-1.patch
deleted file mode 100644
index a32e658e80..0000000000
--- a/system/xen/xsa/xsa346-4.13-1.patch
+++ /dev/null
@@ -1,50 +0,0 @@
-From: Jan Beulich <jbeulich@suse.com>
-Subject: IOMMU: suppress "iommu_dont_flush_iotlb" when about to free a page
-
-Deferring flushes to a single, wide range one - as is done when
-handling XENMAPSPACE_gmfn_range - is okay only as long as
-pages don't get freed ahead of the eventual flush. While the only
-function setting the flag (xenmem_add_to_physmap()) suggests by its name
-that it's only mapping new entries, in reality the way
-xenmem_add_to_physmap_one() works means an unmap would happen not only
-for the page being moved (but not freed) but, if the destination GFN is
-populated, also for the page being displaced from that GFN. Collapsing
-the two flushes for this GFN into just one (end even more so deferring
-it to a batched invocation) is not correct.
-
-This is part of XSA-346.
-
-Fixes: cf95b2a9fd5a ("iommu: Introduce per cpu flag (iommu_dont_flush_iotlb) to avoid unnecessary iotlb... ")
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Paul Durrant <paul@xen.org>
-Acked-by: Julien Grall <jgrall@amazon.com>
-
---- a/xen/common/memory.c
-+++ b/xen/common/memory.c
-@@ -292,6 +292,7 @@ int guest_remove_page(struct domain *d,
- p2m_type_t p2mt;
- #endif
- mfn_t mfn;
-+ bool *dont_flush_p, dont_flush;
- int rc;
-
- #ifdef CONFIG_X86
-@@ -378,8 +379,18 @@ int guest_remove_page(struct domain *d,
- return -ENXIO;
- }
-
-+ /*
-+ * Since we're likely to free the page below, we need to suspend
-+ * xenmem_add_to_physmap()'s suppressing of IOMMU TLB flushes.
-+ */
-+ dont_flush_p = &this_cpu(iommu_dont_flush_iotlb);
-+ dont_flush = *dont_flush_p;
-+ *dont_flush_p = false;
-+
- rc = guest_physmap_remove_page(d, _gfn(gmfn), mfn, 0);
-
-+ *dont_flush_p = dont_flush;
-+
- /*
- * With the lack of an IOMMU on some platforms, domains with DMA-capable
- * device must retrieve the same pfn when the hypercall populate_physmap
diff --git a/system/xen/xsa/xsa346-4.13-2.patch b/system/xen/xsa/xsa346-4.13-2.patch
deleted file mode 100644
index 6371b5c3db..0000000000
--- a/system/xen/xsa/xsa346-4.13-2.patch
+++ /dev/null
@@ -1,204 +0,0 @@
-From: Jan Beulich <jbeulich@suse.com>
-Subject: IOMMU: hold page ref until after deferred TLB flush
-
-When moving around a page via XENMAPSPACE_gmfn_range, deferring the TLB
-flush for the "from" GFN range requires that the page remains allocated
-to the guest until the TLB flush has actually occurred. Otherwise a
-parallel hypercall to remove the page would only flush the TLB for the
-GFN it has been moved to, but not the one is was mapped at originally.
-
-This is part of XSA-346.
-
-Fixes: cf95b2a9fd5a ("iommu: Introduce per cpu flag (iommu_dont_flush_iotlb) to avoid unnecessary iotlb... ")
-Reported-by: Julien Grall <jgrall@amazon.com>
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Acked-by: Julien Grall <jgrall@amazon.com>
-
---- a/xen/arch/arm/mm.c
-+++ b/xen/arch/arm/mm.c
-@@ -1407,7 +1407,7 @@ void share_xen_page_with_guest(struct pa
- int xenmem_add_to_physmap_one(
- struct domain *d,
- unsigned int space,
-- union xen_add_to_physmap_batch_extra extra,
-+ union add_to_physmap_extra extra,
- unsigned long idx,
- gfn_t gfn)
- {
-@@ -1480,10 +1480,6 @@ int xenmem_add_to_physmap_one(
- break;
- }
- case XENMAPSPACE_dev_mmio:
-- /* extra should be 0. Reserved for future use. */
-- if ( extra.res0 )
-- return -EOPNOTSUPP;
--
- rc = map_dev_mmio_region(d, gfn, 1, _mfn(idx));
- return rc;
-
---- a/xen/arch/x86/mm.c
-+++ b/xen/arch/x86/mm.c
-@@ -4617,7 +4617,7 @@ static int handle_iomem_range(unsigned l
- int xenmem_add_to_physmap_one(
- struct domain *d,
- unsigned int space,
-- union xen_add_to_physmap_batch_extra extra,
-+ union add_to_physmap_extra extra,
- unsigned long idx,
- gfn_t gpfn)
- {
-@@ -4701,9 +4701,20 @@ int xenmem_add_to_physmap_one(
- rc = guest_physmap_add_page(d, gpfn, mfn, PAGE_ORDER_4K);
-
- put_both:
-- /* In the XENMAPSPACE_gmfn case, we took a ref of the gfn at the top. */
-+ /*
-+ * In the XENMAPSPACE_gmfn case, we took a ref of the gfn at the top.
-+ * We also may need to transfer ownership of the page reference to our
-+ * caller.
-+ */
- if ( space == XENMAPSPACE_gmfn )
-+ {
- put_gfn(d, gfn);
-+ if ( !rc && extra.ppage )
-+ {
-+ *extra.ppage = page;
-+ page = NULL;
-+ }
-+ }
-
- if ( page )
- put_page(page);
---- a/xen/common/memory.c
-+++ b/xen/common/memory.c
-@@ -814,13 +814,12 @@ int xenmem_add_to_physmap(struct domain
- {
- unsigned int done = 0;
- long rc = 0;
-- union xen_add_to_physmap_batch_extra extra;
-+ union add_to_physmap_extra extra = {};
-+ struct page_info *pages[16];
-
- ASSERT(paging_mode_translate(d));
-
-- if ( xatp->space != XENMAPSPACE_gmfn_foreign )
-- extra.res0 = 0;
-- else
-+ if ( xatp->space == XENMAPSPACE_gmfn_foreign )
- extra.foreign_domid = DOMID_INVALID;
-
- if ( xatp->space != XENMAPSPACE_gmfn_range )
-@@ -835,7 +834,10 @@ int xenmem_add_to_physmap(struct domain
- xatp->size -= start;
-
- if ( is_iommu_enabled(d) )
-+ {
- this_cpu(iommu_dont_flush_iotlb) = 1;
-+ extra.ppage = &pages[0];
-+ }
-
- while ( xatp->size > done )
- {
-@@ -847,8 +849,12 @@ int xenmem_add_to_physmap(struct domain
- xatp->idx++;
- xatp->gpfn++;
-
-+ if ( extra.ppage )
-+ ++extra.ppage;
-+
- /* Check for continuation if it's not the last iteration. */
-- if ( xatp->size > ++done && hypercall_preempt_check() )
-+ if ( (++done > ARRAY_SIZE(pages) && extra.ppage) ||
-+ (xatp->size > done && hypercall_preempt_check()) )
- {
- rc = start + done;
- break;
-@@ -858,6 +864,7 @@ int xenmem_add_to_physmap(struct domain
- if ( is_iommu_enabled(d) )
- {
- int ret;
-+ unsigned int i;
-
- this_cpu(iommu_dont_flush_iotlb) = 0;
-
-@@ -866,6 +873,15 @@ int xenmem_add_to_physmap(struct domain
- if ( unlikely(ret) && rc >= 0 )
- rc = ret;
-
-+ /*
-+ * Now that the IOMMU TLB flush was done for the original GFN, drop
-+ * the page references. The 2nd flush below is fine to make later, as
-+ * whoever removes the page again from its new GFN will have to do
-+ * another flush anyway.
-+ */
-+ for ( i = 0; i < done; ++i )
-+ put_page(pages[i]);
-+
- ret = iommu_iotlb_flush(d, _dfn(xatp->gpfn - done), done,
- IOMMU_FLUSHF_added | IOMMU_FLUSHF_modified);
- if ( unlikely(ret) && rc >= 0 )
-@@ -879,6 +895,8 @@ static int xenmem_add_to_physmap_batch(s
- struct xen_add_to_physmap_batch *xatpb,
- unsigned int extent)
- {
-+ union add_to_physmap_extra extra = {};
-+
- if ( unlikely(xatpb->size < extent) )
- return -EILSEQ;
-
-@@ -890,6 +908,19 @@ static int xenmem_add_to_physmap_batch(s
- !guest_handle_subrange_okay(xatpb->errs, extent, xatpb->size - 1) )
- return -EFAULT;
-
-+ switch ( xatpb->space )
-+ {
-+ case XENMAPSPACE_dev_mmio:
-+ /* res0 is reserved for future use. */
-+ if ( xatpb->u.res0 )
-+ return -EOPNOTSUPP;
-+ break;
-+
-+ case XENMAPSPACE_gmfn_foreign:
-+ extra.foreign_domid = xatpb->u.foreign_domid;
-+ break;
-+ }
-+
- while ( xatpb->size > extent )
- {
- xen_ulong_t idx;
-@@ -902,8 +933,7 @@ static int xenmem_add_to_physmap_batch(s
- extent, 1)) )
- return -EFAULT;
-
-- rc = xenmem_add_to_physmap_one(d, xatpb->space,
-- xatpb->u,
-+ rc = xenmem_add_to_physmap_one(d, xatpb->space, extra,
- idx, _gfn(gpfn));
-
- if ( unlikely(__copy_to_guest_offset(xatpb->errs, extent, &rc, 1)) )
---- a/xen/include/xen/mm.h
-+++ b/xen/include/xen/mm.h
-@@ -588,8 +588,22 @@ void scrub_one_page(struct page_info *);
- &(d)->xenpage_list : &(d)->page_list)
- #endif
-
-+union add_to_physmap_extra {
-+ /*
-+ * XENMAPSPACE_gmfn: When deferring TLB flushes, a page reference needs
-+ * to be kept until after the flush, so the page can't get removed from
-+ * the domain (and re-used for another purpose) beforehand. By passing
-+ * non-NULL, the caller of xenmem_add_to_physmap_one() indicates it wants
-+ * to have ownership of such a reference transferred in the success case.
-+ */
-+ struct page_info **ppage;
-+
-+ /* XENMAPSPACE_gmfn_foreign */
-+ domid_t foreign_domid;
-+};
-+
- int xenmem_add_to_physmap_one(struct domain *d, unsigned int space,
-- union xen_add_to_physmap_batch_extra extra,
-+ union add_to_physmap_extra extra,
- unsigned long idx, gfn_t gfn);
-
- int xenmem_add_to_physmap(struct domain *d, struct xen_add_to_physmap *xatp,
diff --git a/system/xen/xsa/xsa347-4.13-1.patch b/system/xen/xsa/xsa347-4.13-1.patch
deleted file mode 100644
index e9f31a151f..0000000000
--- a/system/xen/xsa/xsa347-4.13-1.patch
+++ /dev/null
@@ -1,149 +0,0 @@
-From: Jan Beulich <jbeulich@suse.com>
-Subject: AMD/IOMMU: convert amd_iommu_pte from struct to union
-
-This is to add a "raw" counterpart to the bitfield equivalent. Take the
-opportunity and
- - convert fields to bool / unsigned int,
- - drop the naming of the reserved field,
- - shorten the names of the ignored ones.
-
-This is part of XSA-347.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Paul Durrant <paul@xen.org>
-
---- a/xen/drivers/passthrough/amd/iommu_map.c
-+++ b/xen/drivers/passthrough/amd/iommu_map.c
-@@ -38,7 +38,7 @@ static unsigned int pfn_to_pde_idx(unsig
- static unsigned int clear_iommu_pte_present(unsigned long l1_mfn,
- unsigned long dfn)
- {
-- struct amd_iommu_pte *table, *pte;
-+ union amd_iommu_pte *table, *pte;
- unsigned int flush_flags;
-
- table = map_domain_page(_mfn(l1_mfn));
-@@ -52,7 +52,7 @@ static unsigned int clear_iommu_pte_pres
- return flush_flags;
- }
-
--static unsigned int set_iommu_pde_present(struct amd_iommu_pte *pte,
-+static unsigned int set_iommu_pde_present(union amd_iommu_pte *pte,
- unsigned long next_mfn,
- unsigned int next_level, bool iw,
- bool ir)
-@@ -87,7 +87,7 @@ static unsigned int set_iommu_pte_presen
- int pde_level,
- bool iw, bool ir)
- {
-- struct amd_iommu_pte *table, *pde;
-+ union amd_iommu_pte *table, *pde;
- unsigned int flush_flags;
-
- table = map_domain_page(_mfn(pt_mfn));
-@@ -178,7 +178,7 @@ void iommu_dte_set_guest_cr3(struct amd_
- static int iommu_pde_from_dfn(struct domain *d, unsigned long dfn,
- unsigned long pt_mfn[], bool map)
- {
-- struct amd_iommu_pte *pde, *next_table_vaddr;
-+ union amd_iommu_pte *pde, *next_table_vaddr;
- unsigned long next_table_mfn;
- unsigned int level;
- struct page_info *table;
-@@ -458,7 +458,7 @@ int __init amd_iommu_quarantine_init(str
- unsigned long end_gfn =
- 1ul << (DEFAULT_DOMAIN_ADDRESS_WIDTH - PAGE_SHIFT);
- unsigned int level = amd_iommu_get_paging_mode(end_gfn);
-- struct amd_iommu_pte *table;
-+ union amd_iommu_pte *table;
-
- if ( hd->arch.root_table )
- {
-@@ -489,7 +489,7 @@ int __init amd_iommu_quarantine_init(str
-
- for ( i = 0; i < PTE_PER_TABLE_SIZE; i++ )
- {
-- struct amd_iommu_pte *pde = &table[i];
-+ union amd_iommu_pte *pde = &table[i];
-
- /*
- * PDEs are essentially a subset of PTEs, so this function
---- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
-+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
-@@ -390,7 +390,7 @@ static void deallocate_next_page_table(s
-
- static void deallocate_page_table(struct page_info *pg)
- {
-- struct amd_iommu_pte *table_vaddr;
-+ union amd_iommu_pte *table_vaddr;
- unsigned int index, level = PFN_ORDER(pg);
-
- PFN_ORDER(pg) = 0;
-@@ -405,7 +405,7 @@ static void deallocate_page_table(struct
-
- for ( index = 0; index < PTE_PER_TABLE_SIZE; index++ )
- {
-- struct amd_iommu_pte *pde = &table_vaddr[index];
-+ union amd_iommu_pte *pde = &table_vaddr[index];
-
- if ( pde->mfn && pde->next_level && pde->pr )
- {
-@@ -557,7 +557,7 @@ static void amd_dump_p2m_table_level(str
- paddr_t gpa, int indent)
- {
- paddr_t address;
-- struct amd_iommu_pte *table_vaddr;
-+ const union amd_iommu_pte *table_vaddr;
- int index;
-
- if ( level < 1 )
-@@ -573,7 +573,7 @@ static void amd_dump_p2m_table_level(str
-
- for ( index = 0; index < PTE_PER_TABLE_SIZE; index++ )
- {
-- struct amd_iommu_pte *pde = &table_vaddr[index];
-+ const union amd_iommu_pte *pde = &table_vaddr[index];
-
- if ( !(index % 2) )
- process_pending_softirqs();
---- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
-+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
-@@ -465,20 +465,23 @@ union amd_iommu_x2apic_control {
- #define IOMMU_PAGE_TABLE_U32_PER_ENTRY (IOMMU_PAGE_TABLE_ENTRY_SIZE / 4)
- #define IOMMU_PAGE_TABLE_ALIGNMENT 4096
-
--struct amd_iommu_pte {
-- uint64_t pr:1;
-- uint64_t ignored0:4;
-- uint64_t a:1;
-- uint64_t d:1;
-- uint64_t ignored1:2;
-- uint64_t next_level:3;
-- uint64_t mfn:40;
-- uint64_t reserved:7;
-- uint64_t u:1;
-- uint64_t fc:1;
-- uint64_t ir:1;
-- uint64_t iw:1;
-- uint64_t ignored2:1;
-+union amd_iommu_pte {
-+ uint64_t raw;
-+ struct {
-+ bool pr:1;
-+ unsigned int ign0:4;
-+ bool a:1;
-+ bool d:1;
-+ unsigned int ign1:2;
-+ unsigned int next_level:3;
-+ uint64_t mfn:40;
-+ unsigned int :7;
-+ bool u:1;
-+ bool fc:1;
-+ bool ir:1;
-+ bool iw:1;
-+ unsigned int ign2:1;
-+ };
- };
-
- /* Paging modes */
diff --git a/system/xen/xsa/xsa347-4.13-2.patch b/system/xen/xsa/xsa347-4.13-2.patch
deleted file mode 100644
index fbe7461636..0000000000
--- a/system/xen/xsa/xsa347-4.13-2.patch
+++ /dev/null
@@ -1,72 +0,0 @@
-From: Jan Beulich <jbeulich@suse.com>
-Subject: AMD/IOMMU: update live PTEs atomically
-
-Updating a live PTE bitfield by bitfield risks the compiler re-ordering
-the individual updates as well as splitting individual updates into
-multiple memory writes. Construct the new entry fully in a local
-variable, do the check to determine the flushing needs on the thus
-established new entry, and then write the new entry by a single insn.
-
-Similarly using memset() to clear a PTE is unsafe, as the order of
-writes the function does is, at least in principle, undefined.
-
-This is part of XSA-347.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Paul Durrant <paul@xen.org>
-
---- a/xen/drivers/passthrough/amd/iommu_map.c
-+++ b/xen/drivers/passthrough/amd/iommu_map.c
-@@ -45,7 +45,7 @@ static unsigned int clear_iommu_pte_pres
- pte = &table[pfn_to_pde_idx(dfn, 1)];
-
- flush_flags = pte->pr ? IOMMU_FLUSHF_modified : 0;
-- memset(pte, 0, sizeof(*pte));
-+ write_atomic(&pte->raw, 0);
-
- unmap_domain_page(table);
-
-@@ -57,26 +57,30 @@ static unsigned int set_iommu_pde_presen
- unsigned int next_level, bool iw,
- bool ir)
- {
-+ union amd_iommu_pte new = {}, old;
- unsigned int flush_flags = IOMMU_FLUSHF_added;
-
-- if ( pte->pr &&
-- (pte->mfn != next_mfn ||
-- pte->iw != iw ||
-- pte->ir != ir ||
-- pte->next_level != next_level) )
-- flush_flags |= IOMMU_FLUSHF_modified;
--
- /*
- * FC bit should be enabled in PTE, this helps to solve potential
- * issues with ATS devices
- */
-- pte->fc = !next_level;
-+ new.fc = !next_level;
-+
-+ new.mfn = next_mfn;
-+ new.iw = iw;
-+ new.ir = ir;
-+ new.next_level = next_level;
-+ new.pr = true;
-+
-+ old.raw = read_atomic(&pte->raw);
-+ old.ign0 = 0;
-+ old.ign1 = 0;
-+ old.ign2 = 0;
-+
-+ if ( old.pr && old.raw != new.raw )
-+ flush_flags |= IOMMU_FLUSHF_modified;
-
-- pte->mfn = next_mfn;
-- pte->iw = iw;
-- pte->ir = ir;
-- pte->next_level = next_level;
-- pte->pr = 1;
-+ write_atomic(&pte->raw, new.raw);
-
- return flush_flags;
- }
diff --git a/system/xen/xsa/xsa347-4.13-3.patch b/system/xen/xsa/xsa347-4.13-3.patch
deleted file mode 100644
index 90c8e66020..0000000000
--- a/system/xen/xsa/xsa347-4.13-3.patch
+++ /dev/null
@@ -1,59 +0,0 @@
-From: Jan Beulich <jbeulich@suse.com>
-Subject: AMD/IOMMU: ensure suitable ordering of DTE modifications
-
-DMA and interrupt translation should be enabled only after other
-applicable DTE fields have been written. Similarly when disabling
-translation or when moving a device between domains, translation should
-first be disabled, before other entry fields get modified. Note however
-that the "moving" aspect doesn't apply to the interrupt remapping side,
-as domain specifics are maintained in the IRTEs here, not the DTE. We
-also never disable interrupt remapping once it got enabled for a device
-(the respective argument passed is always the immutable iommu_intremap).
-
-This is part of XSA-347.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Paul Durrant <paul@xen.org>
-
---- a/xen/drivers/passthrough/amd/iommu_map.c
-+++ b/xen/drivers/passthrough/amd/iommu_map.c
-@@ -107,11 +107,18 @@ void amd_iommu_set_root_page_table(struc
- uint64_t root_ptr, uint16_t domain_id,
- uint8_t paging_mode, bool valid)
- {
-+ if ( valid || dte->v )
-+ {
-+ dte->tv = false;
-+ dte->v = true;
-+ smp_wmb();
-+ }
- dte->domain_id = domain_id;
- dte->pt_root = paddr_to_pfn(root_ptr);
- dte->iw = true;
- dte->ir = true;
- dte->paging_mode = paging_mode;
-+ smp_wmb();
- dte->tv = true;
- dte->v = valid;
- }
-@@ -134,6 +141,7 @@ void amd_iommu_set_intremap_table(
- }
-
- dte->ig = false; /* unmapped interrupts result in i/o page faults */
-+ smp_wmb();
- dte->iv = valid;
- }
-
---- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
-+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
-@@ -120,7 +120,10 @@ static void amd_iommu_setup_domain_devic
- /* Undo what amd_iommu_disable_domain_device() may have done. */
- ivrs_dev = &get_ivrs_mappings(iommu->seg)[req_id];
- if ( dte->it_root )
-+ {
- dte->int_ctl = IOMMU_DEV_TABLE_INT_CONTROL_TRANSLATED;
-+ smp_wmb();
-+ }
- dte->iv = iommu_intremap;
- dte->ex = ivrs_dev->dte_allow_exclusion;
- dte->sys_mgt = MASK_EXTR(ivrs_dev->device_flags, ACPI_IVHD_SYSTEM_MGMT);