aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoromni <omni+alpine@hack.org>2024-03-18 23:54:19 +0000
committeromni <omni+alpine@hack.org>2024-03-19 00:00:06 +0000
commit7bf0401d2127ae56b44e9a667fcc60191f37af7c (patch)
tree5a67e444f491dd637b6cbcb8fbfdd5ebbc928b7e
parentd453d8a898509dbc616c4d467a9bf537b785ca13 (diff)
main/xen: add mitigations for XSA-451, XSA-452 & XSA-4533.16-stable
-rw-r--r--main/xen/APKBUILD39
-rw-r--r--main/xen/xsa451-4.16.patch193
-rw-r--r--main/xen/xsa452-4.16-1.patch304
-rw-r--r--main/xen/xsa452-4.16-2.patch87
-rw-r--r--main/xen/xsa452-4.16-3.patch135
-rw-r--r--main/xen/xsa452-4.16-4.patch197
-rw-r--r--main/xen/xsa452-4.16-5.patch237
-rw-r--r--main/xen/xsa452-4.16-6.patch163
-rw-r--r--main/xen/xsa452-4.16-7.patch299
-rw-r--r--main/xen/xsa453-4.16-1.patch148
-rw-r--r--main/xen/xsa453-4.16-2.patch49
-rw-r--r--main/xen/xsa453-4.16-3.patch313
-rw-r--r--main/xen/xsa453-4.16-4.patch113
-rw-r--r--main/xen/xsa453-4.16-5.patch75
-rw-r--r--main/xen/xsa453-4.16-6.patch382
-rw-r--r--main/xen/xsa453-4.16-7.patch61
-rw-r--r--main/xen/xsa453-4.16-8.patch201
17 files changed, 2995 insertions, 1 deletions
diff --git a/main/xen/APKBUILD b/main/xen/APKBUILD
index bbf9359e7ed..6f385ecada9 100644
--- a/main/xen/APKBUILD
+++ b/main/xen/APKBUILD
@@ -2,7 +2,7 @@
# Maintainer: Natanael Copa <ncopa@alpinelinux.org>
pkgname=xen
pkgver=4.16.5
-pkgrel=6
+pkgrel=7
pkgdesc="Xen hypervisor"
url="https://www.xenproject.org/"
arch="x86_64 armv7 aarch64" # enable armv7 when builds with gcc8
@@ -45,6 +45,7 @@ options="!strip"
# 0:
# - CVE-2020-29568 XSA-349
# - CVE-2020-29569 XSA-350
+# - CVE-2023-46840 XSA-450
# 4.7.0-r0:
# - CVE-2016-6258 XSA-182
# - CVE-2016-6259 XSA-183
@@ -354,6 +355,10 @@ options="!strip"
# - CVE-2023-46837 XSA-447
# 4.16.5-r6:
# - CVE-2023-46839 XSA-449
+# 4.16.5-r7:
+# - CVE-2023-46841 XSA-451
+# - CVE-2023-28746 XSA-452
+# - CVE-2024-2193 XSA-453
case "$CARCH" in
x86*)
@@ -419,6 +424,22 @@ source="https://downloads.xenproject.org/release/xen/$pkgver/xen-$pkgver.tar.gz
xsa446.patch
xsa447-4.16.patch
xsa449-4.16.patch
+ xsa451-4.16.patch
+ xsa452-4.16-1.patch
+ xsa452-4.16-2.patch
+ xsa452-4.16-3.patch
+ xsa452-4.16-4.patch
+ xsa452-4.16-5.patch
+ xsa452-4.16-6.patch
+ xsa452-4.16-7.patch
+ xsa453-4.16-1.patch
+ xsa453-4.16-2.patch
+ xsa453-4.16-3.patch
+ xsa453-4.16-4.patch
+ xsa453-4.16-5.patch
+ xsa453-4.16-6.patch
+ xsa453-4.16-7.patch
+ xsa453-4.16-8.patch
mini-os-__divmoddi4.patch
qemu-xen_paths.patch
@@ -725,6 +746,22 @@ f3694e355ae921528591dfec79c187eb22890f5a294ad2f4d4e96ed0476aa290c9a993f30a51e2ca
229319de61f83d98b41ff7bf8ac944f7d5283f190ae54ed01087409b2cf42c141455b2a56c28898288db85780587803670671c1f5f446359a1d9767259f975d5 xsa446.patch
98ac1fd6b2755e4034d70f283253eb18011b81ecb78f6507629ff8144faf422008ec6c603b6f9727bc752f57f7d09f9fce3cde3127b006c6e4ef0aeab319647f xsa447-4.16.patch
4baf6b93eaa46b90a5784502857fcbe271a06bf433d2b58b47c6777e3bc0860d94bb4c4e8e93b9cb9295c475e6d030fd59b4d9b7efd57ad087108650c5022656 xsa449-4.16.patch
+8a8228b9da87b4d10217c3d7a8091655feee5d43c7a370ea869e4cf9f7b5c679ef56ec530fa49dbac5b607c24687bd8810b33a040734f5750aa04917f6a8e250 xsa451-4.16.patch
+64c8bf4350955dc2ef052713415d7dace6c7afcc35d18b9a0d648e25de4cb02783fcec3828fad5c33d2e482ca42e13936c14a8274ac34f0f9130905c8f010b3f xsa452-4.16-1.patch
+16aaabfec8abf90c416c86c123c8f657c6d9e3f5a40cf558fe4b207f9fc6e95253697c0a9f4a8c9332782bd627de10bd8c930b2c31a93d02717795c36e36e4b1 xsa452-4.16-2.patch
+a81677105f358974f8709fbee70d0ba16eee64d4261de69f012bc0ec33955cf2f76f4fc11a4a027ac6552d7b448fc18a5fd4f9bb58bcab819b9492376254a3c1 xsa452-4.16-3.patch
+8344d55fea823ec8711d27fb6188ee0e8a613383f814a0f83046c7e822a72e6583cb94c0e7536c47d9a9209805492fa6a99505ae925f60f0ac68911a4fd06e54 xsa452-4.16-4.patch
+dfe195a53d90ee4fd1e44ba5ab270ac01d8a241f279fe9c1c0977a0e5c3d4c1792ab9fa20bd37ece7fd494d00d9009f8a1922c3438b08eed852982e88ed85638 xsa452-4.16-5.patch
+6c045a64ff119ec5e90cad0974230aca7cc94e7990a85136ec0fd38a051f6179139d75de95766d36e6d6330f48999721d37ba7102a2eab2850fff9c56b236abb xsa452-4.16-6.patch
+f196f4e3877093d2892106778aaa61d318a65392adde7d38a8256ccf5e76eab4f8d833e3f7f119611598b26c190f197e85bd97ecf7ad73d362ec4e2b8302e37a xsa452-4.16-7.patch
+23abb30d679285fff2687a0183425c30aa5214026b0d061d079911a471ffa823d48d6e3929e9180c583db8eab03bf871236b94c5ebbe9b94f016896474734abc xsa453-4.16-1.patch
+2bf1ec8e49347f2fb305e1bb8d6b6be98b409409c93ce0d6a83751aea681be1548200acb111c242767033489595c040ffff32b31ae5e03dce21e5916cc59b470 xsa453-4.16-2.patch
+a8be0ffce584d408ae865679ea131e76157ee73e83c878b3a36aa795cb75ef63b6c6ab74fd54f4b0f283f557299d4eb9d2dae4f526c3dd8b9b0c0ea4e4fd369e xsa453-4.16-3.patch
+3c0cc89564ae76714304041a67612fe035617f7709c62a3d069c66b09228fa611881cedf81b7d9a405c9410477796cdfe62399b7f95790789b708b05311df912 xsa453-4.16-4.patch
+5004450f8446c4e7dc9b3c40a5140aae9779a1cf36be1465b6e46a3b6b433acecc248b557713771129c6ec7a06080330bb9e9f4055f9787105ec2e118efbe893 xsa453-4.16-5.patch
+ed7c23e0ae1ab9df5c6ddc581110345208f924071fa012f5471991fa26f35245c7c8cbb6d822d5c98a8f5a8bcf0b267243429b9b6e416201898a78a152de1d23 xsa453-4.16-6.patch
+3559110c1f4b26b0714cdd0d33f644262e331d3fc6bf29377ad85194a74e2efdb58841cfe9f6b2932a853c3ff702d0e38e0bc9c6e25aa65c7236b236f17ebdd1 xsa453-4.16-7.patch
+cf01db150adc7d737161cd9c2c607e85e05b0d5d74371b1bcbc2e87d52f38adaa17bb798dacfd690e63de2cf165421847c722eed0eef5474565b3fdd1c6b25e6 xsa453-4.16-8.patch
2e0b0fd23e6f10742a5517981e5171c6e88b0a93c83da701b296f5c0861d72c19782daab589a7eac3f9032152a0fc7eff7f5362db8fccc4859564a9aa82329cf gmp-4.3.2.tar.bz2
c2bc9ffc8583aeae71cee9ddcc4418969768d4e3764d47307da54f93981c0109fb07d84b061b3a3628bd00ba4d14a54742bc04848110eb3ae8ca25dbfbaabadb grub-0.97.tar.gz
1465b58279af1647f909450e394fe002ca165f0ff4a0254bfa9fe0e64316f50facdde2729d79a4e632565b4500cf4d6c74192ac0dd3bc9fe09129bbd67ba089d lwip-1.3.0.tar.gz
diff --git a/main/xen/xsa451-4.16.patch b/main/xen/xsa451-4.16.patch
new file mode 100644
index 00000000000..540ae0e0e5c
--- /dev/null
+++ b/main/xen/xsa451-4.16.patch
@@ -0,0 +1,193 @@
+From: Jan Beulich <jbeulich@suse.com>
+Subject: x86: account for shadow stack in exception-from-stub recovery
+
+Dealing with exceptions raised from within emulation stubs involves
+discarding return address (replaced by exception related information).
+Such discarding of course also requires removing the corresponding entry
+from the shadow stack.
+
+Also amend the comment in fixup_exception_return(), to further clarify
+why use of ptr[1] can't be an out-of-bounds access.
+
+This is CVE-2023-46841 / XSA-451.
+
+Fixes: 209fb9919b50 ("x86/extable: Adjust extable handling to be shadow stack compatible")
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+
+--- a/xen/arch/x86/extable.c
++++ b/xen/arch/x86/extable.c
+@@ -86,26 +86,29 @@ search_one_extable(const struct exceptio
+ }
+
+ unsigned long
+-search_exception_table(const struct cpu_user_regs *regs)
++search_exception_table(const struct cpu_user_regs *regs, unsigned long *stub_ra)
+ {
+ const struct virtual_region *region = find_text_region(regs->rip);
+ unsigned long stub = this_cpu(stubs.addr);
+
+ if ( region && region->ex )
++ {
++ *stub_ra = 0;
+ return search_one_extable(region->ex, region->ex_end, regs->rip);
++ }
+
+ if ( regs->rip >= stub + STUB_BUF_SIZE / 2 &&
+ regs->rip < stub + STUB_BUF_SIZE &&
+ regs->rsp > (unsigned long)regs &&
+ regs->rsp < (unsigned long)get_cpu_info() )
+ {
+- unsigned long retptr = *(unsigned long *)regs->rsp;
++ unsigned long retaddr = *(unsigned long *)regs->rsp, fixup;
+
+- region = find_text_region(retptr);
+- retptr = region && region->ex
+- ? search_one_extable(region->ex, region->ex_end, retptr)
+- : 0;
+- if ( retptr )
++ region = find_text_region(retaddr);
++ fixup = region && region->ex
++ ? search_one_extable(region->ex, region->ex_end, retaddr)
++ : 0;
++ if ( fixup )
+ {
+ /*
+ * Put trap number and error code on the stack (in place of the
+@@ -117,7 +120,8 @@ search_exception_table(const struct cpu_
+ };
+
+ *(unsigned long *)regs->rsp = token.raw;
+- return retptr;
++ *stub_ra = retaddr;
++ return fixup;
+ }
+ }
+
+--- a/xen/arch/x86/traps.c
++++ b/xen/arch/x86/traps.c
+@@ -895,7 +895,7 @@ static void do_reserved_trap(struct cpu_
+ }
+
+ static void fixup_exception_return(struct cpu_user_regs *regs,
+- unsigned long fixup)
++ unsigned long fixup, unsigned long stub_ra)
+ {
+ if ( IS_ENABLED(CONFIG_XEN_SHSTK) )
+ {
+@@ -912,7 +912,8 @@ static void fixup_exception_return(struc
+ /*
+ * Search for %rip. The shstk currently looks like this:
+ *
+- * ... [Likely pointed to by SSP]
++ * tok [Supervisor token, == &tok | BUSY, only with FRED inactive]
++ * ... [Pointed to by SSP for most exceptions, empty in IST cases]
+ * %cs [== regs->cs]
+ * %rip [== regs->rip]
+ * SSP [Likely points to 3 slots higher, above %cs]
+@@ -930,7 +931,56 @@ static void fixup_exception_return(struc
+ */
+ if ( ptr[0] == regs->rip && ptr[1] == regs->cs )
+ {
++ unsigned long primary_shstk =
++ (ssp & ~(STACK_SIZE - 1)) +
++ (PRIMARY_SHSTK_SLOT + 1) * PAGE_SIZE - 8;
++
+ wrss(fixup, ptr);
++
++ if ( !stub_ra )
++ goto shstk_done;
++
++ /*
++ * Stub recovery ought to happen only when the outer context
++ * was on the main shadow stack. We need to also "pop" the
++ * stub's return address from the interrupted context's shadow
++ * stack. That is,
++ * - if we're still on the main stack, we need to move the
++ * entire stack (up to and including the exception frame)
++ * up by one slot, incrementing the original SSP in the
++ * exception frame,
++ * - if we're on an IST stack, we need to increment the
++ * original SSP.
++ */
++ BUG_ON((ptr[-1] ^ primary_shstk) >> PAGE_SHIFT);
++
++ if ( (ssp ^ primary_shstk) >> PAGE_SHIFT )
++ {
++ /*
++ * We're on an IST stack. First make sure the two return
++ * addresses actually match. Then increment the interrupted
++ * context's SSP.
++ */
++ BUG_ON(stub_ra != *(unsigned long*)ptr[-1]);
++ wrss(ptr[-1] + 8, &ptr[-1]);
++ goto shstk_done;
++ }
++
++ /* Make sure the two return addresses actually match. */
++ BUG_ON(stub_ra != ptr[2]);
++
++ /* Move exception frame, updating SSP there. */
++ wrss(ptr[1], &ptr[2]); /* %cs */
++ wrss(ptr[0], &ptr[1]); /* %rip */
++ wrss(ptr[-1] + 8, &ptr[0]); /* SSP */
++
++ /* Move all newer entries. */
++ while ( --ptr != _p(ssp) )
++ wrss(ptr[-1], &ptr[0]);
++
++ /* Finally account for our own stack having shifted up. */
++ asm volatile ( "incsspd %0" :: "r" (2) );
++
+ goto shstk_done;
+ }
+ }
+@@ -951,7 +1001,8 @@ static void fixup_exception_return(struc
+
+ static bool extable_fixup(struct cpu_user_regs *regs, bool print)
+ {
+- unsigned long fixup = search_exception_table(regs);
++ unsigned long stub_ra = 0;
++ unsigned long fixup = search_exception_table(regs, &stub_ra);
+
+ if ( unlikely(fixup == 0) )
+ return false;
+@@ -965,7 +1016,7 @@ static bool extable_fixup(struct cpu_use
+ vec_name(regs->entry_vector), regs->error_code,
+ _p(regs->rip), _p(regs->rip), _p(fixup));
+
+- fixup_exception_return(regs, fixup);
++ fixup_exception_return(regs, fixup, stub_ra);
+ this_cpu(last_extable_addr) = regs->rip;
+
+ return true;
+@@ -1256,7 +1307,7 @@ void do_invalid_op(struct cpu_user_regs
+ void (*fn)(struct cpu_user_regs *) = bug_ptr(bug);
+
+ fn(regs);
+- fixup_exception_return(regs, (unsigned long)eip);
++ fixup_exception_return(regs, (unsigned long)eip, 0);
+ return;
+ }
+
+@@ -1277,7 +1328,7 @@ void do_invalid_op(struct cpu_user_regs
+ case BUGFRAME_warn:
+ printk("Xen WARN at %s%s:%d\n", prefix, filename, lineno);
+ show_execution_state(regs);
+- fixup_exception_return(regs, (unsigned long)eip);
++ fixup_exception_return(regs, (unsigned long)eip, 0);
+ return;
+
+ case BUGFRAME_bug:
+--- a/xen/include/asm-x86/uaccess.h
++++ b/xen/include/asm-x86/uaccess.h
+@@ -421,7 +421,8 @@ union stub_exception_token {
+ unsigned long raw;
+ };
+
+-extern unsigned long search_exception_table(const struct cpu_user_regs *regs);
++extern unsigned long search_exception_table(const struct cpu_user_regs *regs,
++ unsigned long *stub_ra);
+ extern void sort_exception_tables(void);
+ extern void sort_exception_table(struct exception_table_entry *start,
+ const struct exception_table_entry *stop);
diff --git a/main/xen/xsa452-4.16-1.patch b/main/xen/xsa452-4.16-1.patch
new file mode 100644
index 00000000000..d86b5eff8e1
--- /dev/null
+++ b/main/xen/xsa452-4.16-1.patch
@@ -0,0 +1,304 @@
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Subject: x86/entry: Introduce EFRAME_* constants
+
+restore_all_guest() does a lot of manipulation of the stack after popping the
+GPRs, and uses raw %rsp displacements to do so. Also, almost all entrypaths
+use raw %rsp displacements prior to pushing GPRs.
+
+Provide better mnemonics, to aid readability and reduce the chance of errors
+when editing.
+
+No functional change. The resulting binary is identical.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit 37541208f119a9c552c6c6c3246ea61be0d44035)
+
+diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c
+index 287dac101ad4..31fa63b77fd1 100644
+--- a/xen/arch/x86/x86_64/asm-offsets.c
++++ b/xen/arch/x86/x86_64/asm-offsets.c
+@@ -51,6 +51,23 @@ void __dummy__(void)
+ OFFSET(UREGS_kernel_sizeof, struct cpu_user_regs, es);
+ BLANK();
+
++ /*
++ * EFRAME_* is for the entry/exit logic where %rsp is pointing at
++ * UREGS_error_code and GPRs are still/already guest values.
++ */
++#define OFFSET_EF(sym, mem) \
++ DEFINE(sym, offsetof(struct cpu_user_regs, mem) - \
++ offsetof(struct cpu_user_regs, error_code))
++
++ OFFSET_EF(EFRAME_entry_vector, entry_vector);
++ OFFSET_EF(EFRAME_rip, rip);
++ OFFSET_EF(EFRAME_cs, cs);
++ OFFSET_EF(EFRAME_eflags, eflags);
++ OFFSET_EF(EFRAME_rsp, rsp);
++ BLANK();
++
++#undef OFFSET_EF
++
+ OFFSET(VCPU_processor, struct vcpu, processor);
+ OFFSET(VCPU_domain, struct vcpu, domain);
+ OFFSET(VCPU_vcpu_info, struct vcpu, vcpu_info);
+diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S
+index 253bb1688c4f..7c211314d885 100644
+--- a/xen/arch/x86/x86_64/compat/entry.S
++++ b/xen/arch/x86/x86_64/compat/entry.S
+@@ -15,7 +15,7 @@ ENTRY(entry_int82)
+ ENDBR64
+ ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP
+ pushq $0
+- movl $HYPERCALL_VECTOR, 4(%rsp)
++ movl $HYPERCALL_VECTOR, EFRAME_entry_vector(%rsp)
+ SAVE_ALL compat=1 /* DPL1 gate, restricted to 32bit PV guests only. */
+
+ SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */
+diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
+index 837a31b40524..10f11986d8b9 100644
+--- a/xen/arch/x86/x86_64/entry.S
++++ b/xen/arch/x86/x86_64/entry.S
+@@ -190,15 +190,15 @@ restore_all_guest:
+ SPEC_CTRL_EXIT_TO_PV /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */
+
+ RESTORE_ALL
+- testw $TRAP_syscall,4(%rsp)
++ testw $TRAP_syscall, EFRAME_entry_vector(%rsp)
+ jz iret_exit_to_guest
+
+- movq 24(%rsp),%r11 # RFLAGS
++ mov EFRAME_eflags(%rsp), %r11
+ andq $~(X86_EFLAGS_IOPL | X86_EFLAGS_VM), %r11
+ orq $X86_EFLAGS_IF,%r11
+
+ /* Don't use SYSRET path if the return address is not canonical. */
+- movq 8(%rsp),%rcx
++ mov EFRAME_rip(%rsp), %rcx
+ sarq $47,%rcx
+ incl %ecx
+ cmpl $1,%ecx
+@@ -213,20 +213,20 @@ restore_all_guest:
+ ALTERNATIVE "", rag_clrssbsy, X86_FEATURE_XEN_SHSTK
+ #endif
+
+- movq 8(%rsp), %rcx # RIP
+- cmpw $FLAT_USER_CS32,16(%rsp)# CS
+- movq 32(%rsp),%rsp # RSP
++ mov EFRAME_rip(%rsp), %rcx
++ cmpw $FLAT_USER_CS32, EFRAME_cs(%rsp)
++ mov EFRAME_rsp(%rsp), %rsp
+ je 1f
+ sysretq
+ 1: sysretl
+
+ ALIGN
+ .Lrestore_rcx_iret_exit_to_guest:
+- movq 8(%rsp), %rcx # RIP
++ mov EFRAME_rip(%rsp), %rcx
+ /* No special register assumptions. */
+ iret_exit_to_guest:
+- andl $~(X86_EFLAGS_IOPL | X86_EFLAGS_VM), 24(%rsp)
+- orl $X86_EFLAGS_IF,24(%rsp)
++ andl $~(X86_EFLAGS_IOPL | X86_EFLAGS_VM), EFRAME_eflags(%rsp)
++ orl $X86_EFLAGS_IF, EFRAME_eflags(%rsp)
+ addq $8,%rsp
+ .Lft0: iretq
+ _ASM_PRE_EXTABLE(.Lft0, handle_exception)
+@@ -257,7 +257,7 @@ ENTRY(lstar_enter)
+ pushq $FLAT_KERNEL_CS64
+ pushq %rcx
+ pushq $0
+- movl $TRAP_syscall, 4(%rsp)
++ movl $TRAP_syscall, EFRAME_entry_vector(%rsp)
+ SAVE_ALL
+
+ SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */
+@@ -294,7 +294,7 @@ ENTRY(cstar_enter)
+ pushq $FLAT_USER_CS32
+ pushq %rcx
+ pushq $0
+- movl $TRAP_syscall, 4(%rsp)
++ movl $TRAP_syscall, EFRAME_entry_vector(%rsp)
+ SAVE_ALL
+
+ SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */
+@@ -335,7 +335,7 @@ GLOBAL(sysenter_eflags_saved)
+ pushq $3 /* ring 3 null cs */
+ pushq $0 /* null rip */
+ pushq $0
+- movl $TRAP_syscall, 4(%rsp)
++ movl $TRAP_syscall, EFRAME_entry_vector(%rsp)
+ SAVE_ALL
+
+ SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */
+@@ -389,7 +389,7 @@ ENTRY(int80_direct_trap)
+ ENDBR64
+ ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP
+ pushq $0
+- movl $0x80, 4(%rsp)
++ movl $0x80, EFRAME_entry_vector(%rsp)
+ SAVE_ALL
+
+ SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */
+@@ -707,7 +707,7 @@ ENTRY(common_interrupt)
+
+ ENTRY(page_fault)
+ ENDBR64
+- movl $TRAP_page_fault,4(%rsp)
++ movl $TRAP_page_fault, EFRAME_entry_vector(%rsp)
+ /* No special register assumptions. */
+ GLOBAL(handle_exception)
+ ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP
+@@ -849,90 +849,90 @@ FATAL_exception_with_ints_disabled:
+ ENTRY(divide_error)
+ ENDBR64
+ pushq $0
+- movl $TRAP_divide_error,4(%rsp)
++ movl $TRAP_divide_error, EFRAME_entry_vector(%rsp)
+ jmp handle_exception
+
+ ENTRY(coprocessor_error)
+ ENDBR64
+ pushq $0
+- movl $TRAP_copro_error,4(%rsp)
++ movl $TRAP_copro_error, EFRAME_entry_vector(%rsp)
+ jmp handle_exception
+
+ ENTRY(simd_coprocessor_error)
+ ENDBR64
+ pushq $0
+- movl $TRAP_simd_error,4(%rsp)
++ movl $TRAP_simd_error, EFRAME_entry_vector(%rsp)
+ jmp handle_exception
+
+ ENTRY(device_not_available)
+ ENDBR64
+ pushq $0
+- movl $TRAP_no_device,4(%rsp)
++ movl $TRAP_no_device, EFRAME_entry_vector(%rsp)
+ jmp handle_exception
+
+ ENTRY(debug)
+ ENDBR64
+ pushq $0
+- movl $TRAP_debug,4(%rsp)
++ movl $TRAP_debug, EFRAME_entry_vector(%rsp)
+ jmp handle_ist_exception
+
+ ENTRY(int3)
+ ENDBR64
+ pushq $0
+- movl $TRAP_int3,4(%rsp)
++ movl $TRAP_int3, EFRAME_entry_vector(%rsp)
+ jmp handle_exception
+
+ ENTRY(overflow)
+ ENDBR64
+ pushq $0
+- movl $TRAP_overflow,4(%rsp)
++ movl $TRAP_overflow, EFRAME_entry_vector(%rsp)
+ jmp handle_exception
+
+ ENTRY(bounds)
+ ENDBR64
+ pushq $0
+- movl $TRAP_bounds,4(%rsp)
++ movl $TRAP_bounds, EFRAME_entry_vector(%rsp)
+ jmp handle_exception
+
+ ENTRY(invalid_op)
+ ENDBR64
+ pushq $0
+- movl $TRAP_invalid_op,4(%rsp)
++ movl $TRAP_invalid_op, EFRAME_entry_vector(%rsp)
+ jmp handle_exception
+
+ ENTRY(invalid_TSS)
+ ENDBR64
+- movl $TRAP_invalid_tss,4(%rsp)
++ movl $TRAP_invalid_tss, EFRAME_entry_vector(%rsp)
+ jmp handle_exception
+
+ ENTRY(segment_not_present)
+ ENDBR64
+- movl $TRAP_no_segment,4(%rsp)
++ movl $TRAP_no_segment, EFRAME_entry_vector(%rsp)
+ jmp handle_exception
+
+ ENTRY(stack_segment)
+ ENDBR64
+- movl $TRAP_stack_error,4(%rsp)
++ movl $TRAP_stack_error, EFRAME_entry_vector(%rsp)
+ jmp handle_exception
+
+ ENTRY(general_protection)
+ ENDBR64
+- movl $TRAP_gp_fault,4(%rsp)
++ movl $TRAP_gp_fault, EFRAME_entry_vector(%rsp)
+ jmp handle_exception
+
+ ENTRY(alignment_check)
+ ENDBR64
+- movl $TRAP_alignment_check,4(%rsp)
++ movl $TRAP_alignment_check, EFRAME_entry_vector(%rsp)
+ jmp handle_exception
+
+ ENTRY(entry_CP)
+ ENDBR64
+- movl $X86_EXC_CP, 4(%rsp)
++ movl $X86_EXC_CP, EFRAME_entry_vector(%rsp)
+ jmp handle_exception
+
+ ENTRY(double_fault)
+ ENDBR64
+- movl $TRAP_double_fault,4(%rsp)
++ movl $TRAP_double_fault, EFRAME_entry_vector(%rsp)
+ /* Set AC to reduce chance of further SMAP faults */
+ ALTERNATIVE "", stac, X86_FEATURE_XEN_SMAP
+ SAVE_ALL
+@@ -958,7 +958,7 @@ ENTRY(double_fault)
+ .pushsection .init.text, "ax", @progbits
+ ENTRY(early_page_fault)
+ ENDBR64
+- movl $TRAP_page_fault,4(%rsp)
++ movl $TRAP_page_fault, EFRAME_entry_vector(%rsp)
+ SAVE_ALL
+ movq %rsp,%rdi
+ call do_early_page_fault
+@@ -968,7 +968,7 @@ ENTRY(early_page_fault)
+ ENTRY(nmi)
+ ENDBR64
+ pushq $0
+- movl $TRAP_nmi,4(%rsp)
++ movl $TRAP_nmi, EFRAME_entry_vector(%rsp)
+ handle_ist_exception:
+ ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP
+ SAVE_ALL
+@@ -1075,7 +1075,7 @@ handle_ist_exception:
+ ENTRY(machine_check)
+ ENDBR64
+ pushq $0
+- movl $TRAP_machine_check,4(%rsp)
++ movl $TRAP_machine_check, EFRAME_entry_vector(%rsp)
+ jmp handle_ist_exception
+
+ /* No op trap handler. Required for kexec crash path. */
+@@ -1112,7 +1112,7 @@ autogen_stubs: /* Automatically generated stubs. */
+ 1:
+ ENDBR64
+ pushq $0
+- movb $vec,4(%rsp)
++ movb $vec, EFRAME_entry_vector(%rsp)
+ jmp common_interrupt
+
+ entrypoint 1b
+@@ -1126,7 +1126,7 @@ autogen_stubs: /* Automatically generated stubs. */
+ test $8,%spl /* 64bit exception frames are 16 byte aligned, but the word */
+ jz 2f /* size is 8 bytes. Check whether the processor gave us an */
+ pushq $0 /* error code, and insert an empty one if not. */
+-2: movb $vec,4(%rsp)
++2: movb $vec, EFRAME_entry_vector(%rsp)
+ jmp handle_exception
+
+ entrypoint 1b
diff --git a/main/xen/xsa452-4.16-2.patch b/main/xen/xsa452-4.16-2.patch
new file mode 100644
index 00000000000..5df731528a4
--- /dev/null
+++ b/main/xen/xsa452-4.16-2.patch
@@ -0,0 +1,87 @@
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Subject: x86/cpu-policy: Allow for levelling of VERW side effects
+
+MD_CLEAR and FB_CLEAR need OR-ing across a migrate pool. Allow this, by
+having them unconditinally set in max, with the host values reflected in
+default. Annotate the bits as having special properies.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
+(cherry picked from commit de17162cafd27f2865a3102a2ec0f386a02ed03d)
+
+diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c
+index f38063b667b0..34f778dbafbb 100644
+--- a/xen/arch/x86/cpu-policy.c
++++ b/xen/arch/x86/cpu-policy.c
+@@ -434,6 +434,16 @@ static void __init guest_common_max_feature_adjustments(uint32_t *fs)
+ __set_bit(X86_FEATURE_RSBA, fs);
+ __set_bit(X86_FEATURE_RRSBA, fs);
+
++ /*
++ * These bits indicate that the VERW instruction may have gained
++ * scrubbing side effects. With pooling, they mean "you might migrate
++ * somewhere where scrubbing is necessary", and may need exposing on
++ * unaffected hardware. This is fine, because the VERW instruction
++ * has been around since the 286.
++ */
++ __set_bit(X86_FEATURE_MD_CLEAR, fs);
++ __set_bit(X86_FEATURE_FB_CLEAR, fs);
++
+ /*
+ * The Gather Data Sampling microcode mitigation (August 2023) has an
+ * adverse performance impact on the CLWB instruction on SKX/CLX/CPX.
+@@ -468,6 +478,20 @@ static void __init guest_common_default_feature_adjustments(uint32_t *fs)
+ cpu_has_rdrand && !is_forced_cpu_cap(X86_FEATURE_RDRAND) )
+ __clear_bit(X86_FEATURE_RDRAND, fs);
+
++ /*
++ * These bits indicate that the VERW instruction may have gained
++ * scrubbing side effects. The max policy has them set for migration
++ * reasons, so reset the default policy back to the host values in
++ * case we're unaffected.
++ */
++ __clear_bit(X86_FEATURE_MD_CLEAR, fs);
++ if ( cpu_has_md_clear )
++ __set_bit(X86_FEATURE_MD_CLEAR, fs);
++
++ __clear_bit(X86_FEATURE_FB_CLEAR, fs);
++ if ( cpu_has_fb_clear )
++ __set_bit(X86_FEATURE_FB_CLEAR, fs);
++
+ /*
+ * The Gather Data Sampling microcode mitigation (August 2023) has an
+ * adverse performance impact on the CLWB instruction on SKX/CLX/CPX.
+diff --git a/xen/include/asm-x86/cpufeature.h b/xen/include/asm-x86/cpufeature.h
+index 1ac3d3a1f946..81ac4d76eea6 100644
+--- a/xen/include/asm-x86/cpufeature.h
++++ b/xen/include/asm-x86/cpufeature.h
+@@ -134,6 +134,7 @@
+ #define cpu_has_avx512_4fmaps boot_cpu_has(X86_FEATURE_AVX512_4FMAPS)
+ #define cpu_has_avx512_vp2intersect boot_cpu_has(X86_FEATURE_AVX512_VP2INTERSECT)
+ #define cpu_has_srbds_ctrl boot_cpu_has(X86_FEATURE_SRBDS_CTRL)
++#define cpu_has_md_clear boot_cpu_has(X86_FEATURE_MD_CLEAR)
+ #define cpu_has_rtm_always_abort boot_cpu_has(X86_FEATURE_RTM_ALWAYS_ABORT)
+ #define cpu_has_tsx_force_abort boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)
+ #define cpu_has_serialize boot_cpu_has(X86_FEATURE_SERIALIZE)
+diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h
+index 0ee1d1d90330..2906eaa6c290 100644
+--- a/xen/include/public/arch-x86/cpufeatureset.h
++++ b/xen/include/public/arch-x86/cpufeatureset.h
+@@ -275,7 +275,7 @@ XEN_CPUFEATURE(AVX512_4FMAPS, 9*32+ 3) /*A AVX512 Multiply Accumulation Single
+ XEN_CPUFEATURE(FSRM, 9*32+ 4) /*A Fast Short REP MOVS */
+ XEN_CPUFEATURE(AVX512_VP2INTERSECT, 9*32+8) /*a VP2INTERSECT{D,Q} insns */
+ XEN_CPUFEATURE(SRBDS_CTRL, 9*32+ 9) /* MSR_MCU_OPT_CTRL and RNGDS_MITG_DIS. */
+-XEN_CPUFEATURE(MD_CLEAR, 9*32+10) /*A VERW clears microarchitectural buffers */
++XEN_CPUFEATURE(MD_CLEAR, 9*32+10) /*!A VERW clears microarchitectural buffers */
+ XEN_CPUFEATURE(RTM_ALWAYS_ABORT, 9*32+11) /*! June 2021 TSX defeaturing in microcode. */
+ XEN_CPUFEATURE(TSX_FORCE_ABORT, 9*32+13) /* MSR_TSX_FORCE_ABORT.RTM_ABORT */
+ XEN_CPUFEATURE(SERIALIZE, 9*32+14) /*a SERIALIZE insn */
+@@ -329,7 +329,7 @@ XEN_CPUFEATURE(DOITM, 16*32+12) /* Data Operand Invariant Timing
+ XEN_CPUFEATURE(SBDR_SSDP_NO, 16*32+13) /*A No Shared Buffer Data Read or Sideband Stale Data Propagation */
+ XEN_CPUFEATURE(FBSDP_NO, 16*32+14) /*A No Fill Buffer Stale Data Propagation */
+ XEN_CPUFEATURE(PSDP_NO, 16*32+15) /*A No Primary Stale Data Propagation */
+-XEN_CPUFEATURE(FB_CLEAR, 16*32+17) /*A Fill Buffers cleared by VERW */
++XEN_CPUFEATURE(FB_CLEAR, 16*32+17) /*!A Fill Buffers cleared by VERW */
+ XEN_CPUFEATURE(FB_CLEAR_CTRL, 16*32+18) /* MSR_OPT_CPU_CTRL.FB_CLEAR_DIS */
+ XEN_CPUFEATURE(RRSBA, 16*32+19) /*! Restricted RSB Alternative */
+ XEN_CPUFEATURE(BHI_NO, 16*32+20) /*A No Branch History Injection */
diff --git a/main/xen/xsa452-4.16-3.patch b/main/xen/xsa452-4.16-3.patch
new file mode 100644
index 00000000000..bd15964146a
--- /dev/null
+++ b/main/xen/xsa452-4.16-3.patch
@@ -0,0 +1,135 @@
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Subject: x86/vmx: Perform VERW flushing later in the VMExit path
+
+Broken out of the following patch because this change is subtle enough on its
+own. See it for the rational of why we're moving VERW.
+
+As for how, extend the trick already used to hold one condition in
+flags (RESUME vs LAUNCH) through the POPing of GPRs.
+
+Move the MOV CR earlier. Intel specify flags to be undefined across it.
+
+Encode the two conditions we want using SF and PF. See the code comment for
+exactly how.
+
+Leave a comment to explain the lack of any content around
+SPEC_CTRL_EXIT_TO_VMX, but leave the block in place. Sods law says if we
+delete it, we'll need to reintroduce it.
+
+This is part of XSA-452 / CVE-2023-28746.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit 475fa20b7384464210f42bad7195f87bd6f1c63f)
+
+diff --git a/xen/arch/x86/hvm/vmx/entry.S b/xen/arch/x86/hvm/vmx/entry.S
+index 5f5de45a1309..cdde76e13892 100644
+--- a/xen/arch/x86/hvm/vmx/entry.S
++++ b/xen/arch/x86/hvm/vmx/entry.S
+@@ -87,17 +87,39 @@ UNLIKELY_END(realmode)
+
+ /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
+ /* SPEC_CTRL_EXIT_TO_VMX Req: %rsp=regs/cpuinfo Clob: */
+- DO_SPEC_CTRL_COND_VERW
++ /*
++ * All speculation safety work happens to be elsewhere. VERW is after
++ * popping the GPRs, while restoring the guest MSR_SPEC_CTRL is left
++ * to the MSR load list.
++ */
+
+ mov VCPU_hvm_guest_cr2(%rbx),%rax
++ mov %rax, %cr2
++
++ /*
++ * We need to perform two conditional actions (VERW, and Resume vs
++ * Launch) after popping GPRs. With some cunning, we can encode both
++ * of these in eflags together.
++ *
++ * Parity is only calculated over the bottom byte of the answer, while
++ * Sign is simply the top bit.
++ *
++ * Therefore, the final OR instruction ends up producing:
++ * SF = VCPU_vmx_launched
++ * PF = !SCF_verw
++ */
++ BUILD_BUG_ON(SCF_verw & ~0xff)
++ movzbl VCPU_vmx_launched(%rbx), %ecx
++ shl $31, %ecx
++ movzbl CPUINFO_spec_ctrl_flags(%rsp), %eax
++ and $SCF_verw, %eax
++ or %eax, %ecx
+
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbp
+- mov %rax,%cr2
+- cmpb $0,VCPU_vmx_launched(%rbx)
+ pop %rbx
+ pop %r11
+ pop %r10
+@@ -108,7 +130,13 @@ UNLIKELY_END(realmode)
+ pop %rdx
+ pop %rsi
+ pop %rdi
+- je .Lvmx_launch
++
++ jpe .L_skip_verw
++ /* VERW clobbers ZF, but preserves all others, including SF. */
++ verw STK_REL(CPUINFO_verw_sel, CPUINFO_error_code)(%rsp)
++.L_skip_verw:
++
++ jns .Lvmx_launch
+
+ /*.Lvmx_resume:*/
+ VMRESUME
+diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c
+index 31fa63b77fd1..a4e94d693024 100644
+--- a/xen/arch/x86/x86_64/asm-offsets.c
++++ b/xen/arch/x86/x86_64/asm-offsets.c
+@@ -135,6 +135,7 @@ void __dummy__(void)
+ #endif
+
+ OFFSET(CPUINFO_guest_cpu_user_regs, struct cpu_info, guest_cpu_user_regs);
++ OFFSET(CPUINFO_error_code, struct cpu_info, guest_cpu_user_regs.error_code);
+ OFFSET(CPUINFO_verw_sel, struct cpu_info, verw_sel);
+ OFFSET(CPUINFO_current_vcpu, struct cpu_info, current_vcpu);
+ OFFSET(CPUINFO_per_cpu_offset, struct cpu_info, per_cpu_offset);
+diff --git a/xen/include/asm-x86/asm_defns.h b/xen/include/asm-x86/asm_defns.h
+index d9431180cfba..abc6822b08c8 100644
+--- a/xen/include/asm-x86/asm_defns.h
++++ b/xen/include/asm-x86/asm_defns.h
+@@ -81,6 +81,14 @@ register unsigned long current_stack_pointer asm("rsp");
+
+ #ifdef __ASSEMBLY__
+
++.macro BUILD_BUG_ON condstr, cond:vararg
++ .if \cond
++ .error "Condition \"\condstr\" not satisfied"
++ .endif
++.endm
++/* preprocessor macro to make error message more user friendly */
++#define BUILD_BUG_ON(cond) BUILD_BUG_ON #cond, cond
++
+ #ifdef HAVE_AS_QUOTED_SYM
+ #define SUBSECTION_LBL(tag) \
+ .ifndef .L.tag; \
+diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h
+index 0e69971f663f..e807ff6d1db2 100644
+--- a/xen/include/asm-x86/spec_ctrl_asm.h
++++ b/xen/include/asm-x86/spec_ctrl_asm.h
+@@ -169,6 +169,13 @@
+ #endif
+ .endm
+
++/*
++ * Helper to improve the readibility of stack dispacements with %rsp in
++ * unusual positions. Both @field and @top_of_stack should be constants from
++ * the same object. @top_of_stack should be where %rsp is currently pointing.
++ */
++#define STK_REL(field, top_of_stk) ((field) - (top_of_stk))
++
+ .macro DO_SPEC_CTRL_COND_VERW
+ /*
+ * Requires %rsp=cpuinfo
diff --git a/main/xen/xsa452-4.16-4.patch b/main/xen/xsa452-4.16-4.patch
new file mode 100644
index 00000000000..59a124067df
--- /dev/null
+++ b/main/xen/xsa452-4.16-4.patch
@@ -0,0 +1,197 @@
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Subject: x86/spec-ctrl: Perform VERW flushing later in exit paths
+
+On parts vulnerable to RFDS, VERW's side effects are extended to scrub all
+non-architectural entries in various Physical Register Files. To remove all
+of Xen's values, the VERW must be after popping the GPRs.
+
+Rework SPEC_CTRL_COND_VERW to default to an CPUINFO_error_code %rsp position,
+but with overrides for other contexts. Identify that it clobbers eflags; this
+is particularly relevant for the SYSRET path.
+
+For the IST exit return to Xen, have the main SPEC_CTRL_EXIT_TO_XEN put a
+shadow copy of spec_ctrl_flags, as GPRs can't be used at the point we want to
+issue the VERW.
+
+This is part of XSA-452 / CVE-2023-28746.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit 0a666cf2cd99df6faf3eebc81a1fc286e4eca4c7)
+
+diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c
+index a4e94d693024..4cd5938d7b9d 100644
+--- a/xen/arch/x86/x86_64/asm-offsets.c
++++ b/xen/arch/x86/x86_64/asm-offsets.c
+@@ -55,14 +55,22 @@ void __dummy__(void)
+ * EFRAME_* is for the entry/exit logic where %rsp is pointing at
+ * UREGS_error_code and GPRs are still/already guest values.
+ */
+-#define OFFSET_EF(sym, mem) \
++#define OFFSET_EF(sym, mem, ...) \
+ DEFINE(sym, offsetof(struct cpu_user_regs, mem) - \
+- offsetof(struct cpu_user_regs, error_code))
++ offsetof(struct cpu_user_regs, error_code) __VA_ARGS__)
+
+ OFFSET_EF(EFRAME_entry_vector, entry_vector);
+ OFFSET_EF(EFRAME_rip, rip);
+ OFFSET_EF(EFRAME_cs, cs);
+ OFFSET_EF(EFRAME_eflags, eflags);
++
++ /*
++ * These aren't real fields. They're spare space, used by the IST
++ * exit-to-xen path.
++ */
++ OFFSET_EF(EFRAME_shadow_scf, eflags, +4);
++ OFFSET_EF(EFRAME_shadow_sel, eflags, +6);
++
+ OFFSET_EF(EFRAME_rsp, rsp);
+ BLANK();
+
+@@ -136,6 +144,7 @@ void __dummy__(void)
+
+ OFFSET(CPUINFO_guest_cpu_user_regs, struct cpu_info, guest_cpu_user_regs);
+ OFFSET(CPUINFO_error_code, struct cpu_info, guest_cpu_user_regs.error_code);
++ OFFSET(CPUINFO_rip, struct cpu_info, guest_cpu_user_regs.rip);
+ OFFSET(CPUINFO_verw_sel, struct cpu_info, verw_sel);
+ OFFSET(CPUINFO_current_vcpu, struct cpu_info, current_vcpu);
+ OFFSET(CPUINFO_per_cpu_offset, struct cpu_info, per_cpu_offset);
+diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S
+index 7c211314d885..3b2fbcd8733a 100644
+--- a/xen/arch/x86/x86_64/compat/entry.S
++++ b/xen/arch/x86/x86_64/compat/entry.S
+@@ -161,6 +161,12 @@ ENTRY(compat_restore_all_guest)
+ SPEC_CTRL_EXIT_TO_PV /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */
+
+ RESTORE_ALL adj=8 compat=1
++
++ /* Account for ev/ec having already been popped off the stack. */
++ SPEC_CTRL_COND_VERW \
++ scf=STK_REL(CPUINFO_spec_ctrl_flags, CPUINFO_rip), \
++ sel=STK_REL(CPUINFO_verw_sel, CPUINFO_rip)
++
+ .Lft0: iretq
+ _ASM_PRE_EXTABLE(.Lft0, handle_exception)
+
+diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
+index 10f11986d8b9..9b1fa9ed192f 100644
+--- a/xen/arch/x86/x86_64/entry.S
++++ b/xen/arch/x86/x86_64/entry.S
+@@ -214,6 +214,9 @@ restore_all_guest:
+ #endif
+
+ mov EFRAME_rip(%rsp), %rcx
++
++ SPEC_CTRL_COND_VERW /* Req: %rsp=eframe Clob: efl */
++
+ cmpw $FLAT_USER_CS32, EFRAME_cs(%rsp)
+ mov EFRAME_rsp(%rsp), %rsp
+ je 1f
+@@ -227,6 +230,9 @@ restore_all_guest:
+ iret_exit_to_guest:
+ andl $~(X86_EFLAGS_IOPL | X86_EFLAGS_VM), EFRAME_eflags(%rsp)
+ orl $X86_EFLAGS_IF, EFRAME_eflags(%rsp)
++
++ SPEC_CTRL_COND_VERW /* Req: %rsp=eframe Clob: efl */
++
+ addq $8,%rsp
+ .Lft0: iretq
+ _ASM_PRE_EXTABLE(.Lft0, handle_exception)
+@@ -670,9 +676,22 @@ UNLIKELY_START(ne, exit_cr3)
+ UNLIKELY_END(exit_cr3)
+
+ /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
+- SPEC_CTRL_EXIT_TO_XEN /* Req: %r12=ist_exit %r14=end, Clob: abcd */
++ SPEC_CTRL_EXIT_TO_XEN /* Req: %r12=ist_exit %r14=end %rsp=regs, Clob: abcd */
+
+ RESTORE_ALL adj=8
++
++ /*
++ * When the CPU pushed this exception frame, it zero-extended eflags.
++ * For an IST exit, SPEC_CTRL_EXIT_TO_XEN stashed shadow copies of
++ * spec_ctrl_flags and ver_sel above eflags, as we can't use any GPRs,
++ * and we're at a random place on the stack, not in a CPUFINFO block.
++ *
++ * Account for ev/ec having already been popped off the stack.
++ */
++ SPEC_CTRL_COND_VERW \
++ scf=STK_REL(EFRAME_shadow_scf, EFRAME_rip), \
++ sel=STK_REL(EFRAME_shadow_sel, EFRAME_rip)
++
+ iretq
+
+ ENTRY(common_interrupt)
+diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h
+index e807ff6d1db2..6e7725c11f3a 100644
+--- a/xen/include/asm-x86/spec_ctrl_asm.h
++++ b/xen/include/asm-x86/spec_ctrl_asm.h
+@@ -176,16 +176,23 @@
+ */
+ #define STK_REL(field, top_of_stk) ((field) - (top_of_stk))
+
+-.macro DO_SPEC_CTRL_COND_VERW
++.macro SPEC_CTRL_COND_VERW \
++ scf=STK_REL(CPUINFO_spec_ctrl_flags, CPUINFO_error_code), \
++ sel=STK_REL(CPUINFO_verw_sel, CPUINFO_error_code)
+ /*
+- * Requires %rsp=cpuinfo
++ * Requires \scf and \sel as %rsp-relative expressions
++ * Clobbers eflags
++ *
++ * VERW needs to run after guest GPRs have been restored, where only %rsp is
++ * good to use. Default to expecting %rsp pointing at CPUINFO_error_code.
++ * Contexts where this is not true must provide an alternative \scf and \sel.
+ *
+ * Issue a VERW for its flushing side effect, if indicated. This is a Spectre
+ * v1 gadget, but the IRET/VMEntry is serialising.
+ */
+- testb $SCF_verw, CPUINFO_spec_ctrl_flags(%rsp)
++ testb $SCF_verw, \scf(%rsp)
+ jz .L\@_verw_skip
+- verw CPUINFO_verw_sel(%rsp)
++ verw \sel(%rsp)
+ .L\@_verw_skip:
+ .endm
+
+@@ -303,8 +310,6 @@
+ */
+ ALTERNATIVE "", DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_PV
+
+- DO_SPEC_CTRL_COND_VERW
+-
+ ALTERNATIVE "", DO_SPEC_CTRL_DIV, X86_FEATURE_SC_DIV
+ .endm
+
+@@ -384,7 +389,7 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise):
+ */
+ .macro SPEC_CTRL_EXIT_TO_XEN
+ /*
+- * Requires %r12=ist_exit, %r14=stack_end
++ * Requires %r12=ist_exit, %r14=stack_end, %rsp=regs
+ * Clobbers %rax, %rbx, %rcx, %rdx
+ */
+ movzbl STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14), %ebx
+@@ -412,11 +417,18 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise):
+ test %r12, %r12
+ jz .L\@_skip_ist_exit
+
+- /* Logically DO_SPEC_CTRL_COND_VERW but without the %rsp=cpuinfo dependency */
+- testb $SCF_verw, %bl
+- jz .L\@_skip_verw
+- verw STACK_CPUINFO_FIELD(verw_sel)(%r14)
+-.L\@_skip_verw:
++ /*
++ * Stash SCF and verw_sel above eflags in the case of an IST_exit. The
++ * VERW logic needs to run after guest GPRs have been restored; i.e. where
++ * we cannot use %r12 or %r14 for the purposes they have here.
++ *
++ * When the CPU pushed this exception frame, it zero-extended eflags.
++ * Therefore it is safe for the VERW logic to look at the stashed SCF
++ * outside of the ist_exit condition. Also, this stashing won't influence
++ * any other restore_all_guest() paths.
++ */
++ or $(__HYPERVISOR_DS32 << 16), %ebx
++ mov %ebx, UREGS_eflags + 4(%rsp) /* EFRAME_shadow_scf/sel */
+
+ ALTERNATIVE "", DO_SPEC_CTRL_DIV, X86_FEATURE_SC_DIV
+
diff --git a/main/xen/xsa452-4.16-5.patch b/main/xen/xsa452-4.16-5.patch
new file mode 100644
index 00000000000..48ce9b02cf3
--- /dev/null
+++ b/main/xen/xsa452-4.16-5.patch
@@ -0,0 +1,237 @@
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Subject: x86/spec-ctrl: Rename VERW related options
+
+VERW is going to be used for a 3rd purpose, and the existing nomenclature
+didn't survive the Stale MMIO issues terribly well.
+
+Rename the command line option from `md-clear=` to `verw=`. This is more
+consistent with other options which tend to be named based on what they're
+doing, not which feature enumeration they use behind the scenes. Retain
+`md-clear=` as a deprecated alias.
+
+Rename opt_md_clear_{pv,hvm} and opt_fb_clear_mmio to opt_verw_{pv,hvm,mmio},
+which has a side effect of making spec_ctrl_init_domain() rather clearer to
+follow.
+
+No functional change.
+
+This is part of XSA-452 / CVE-2023-28746.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit f7603ca252e4226739eb3129a5290ee3da3f8ea4)
+
+diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
+index a7a1362bac28..029002fa82d6 100644
+--- a/docs/misc/xen-command-line.pandoc
++++ b/docs/misc/xen-command-line.pandoc
+@@ -2260,7 +2260,7 @@ By default SSBD will be mitigated at runtime (i.e `ssbd=runtime`).
+
+ ### spec-ctrl (x86)
+ > `= List of [ <bool>, xen=<bool>, {pv,hvm}=<bool>,
+-> {msr-sc,rsb,md-clear,ibpb-entry}=<bool>|{pv,hvm}=<bool>,
++> {msr-sc,rsb,verw,ibpb-entry}=<bool>|{pv,hvm}=<bool>,
+ > bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,psfd,
+ > eager-fpu,l1d-flush,branch-harden,srb-lock,
+ > unpriv-mmio,gds-mit,div-scrub}=<bool> ]`
+@@ -2285,7 +2285,7 @@ in place for guests to use.
+
+ Use of a positive boolean value for either of these options is invalid.
+
+-The `pv=`, `hvm=`, `msr-sc=`, `rsb=`, `md-clear=` and `ibpb-entry=` options
++The `pv=`, `hvm=`, `msr-sc=`, `rsb=`, `verw=` and `ibpb-entry=` options
+ offer fine grained control over the primitives by Xen. These impact Xen's
+ ability to protect itself, and/or Xen's ability to virtualise support for
+ guests to use.
+@@ -2302,11 +2302,12 @@ guests to use.
+ guests and if disabled, guests will be unable to use IBRS/STIBP/SSBD/etc.
+ * `rsb=` offers control over whether to overwrite the Return Stack Buffer /
+ Return Address Stack on entry to Xen and on idle.
+-* `md-clear=` offers control over whether to use VERW to flush
+- microarchitectural buffers on idle and exit from Xen. *Note: For
+- compatibility with development versions of this fix, `mds=` is also accepted
+- on Xen 4.12 and earlier as an alias. Consult vendor documentation in
+- preference to here.*
++* `verw=` offers control over whether to use VERW for its scrubbing side
++ effects at appropriate privilege transitions. The exact side effects are
++ microarchitecture and microcode specific. *Note: `md-clear=` is accepted as
++ a deprecated alias. For compatibility with development versions of XSA-297,
++ `mds=` is also accepted on Xen 4.12 and earlier as an alias. Consult vendor
++ documentation in preference to here.*
+ * `ibpb-entry=` offers control over whether IBPB (Indirect Branch Prediction
+ Barrier) is used on entry to Xen. This is used by default on hardware
+ vulnerable to Branch Type Confusion, and hardware vulnerable to Speculative
+diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
+index 6e82a126a3e2..292b5b1c7ba1 100644
+--- a/xen/arch/x86/spec_ctrl.c
++++ b/xen/arch/x86/spec_ctrl.c
+@@ -37,8 +37,8 @@ static bool __initdata opt_msr_sc_pv = true;
+ static bool __initdata opt_msr_sc_hvm = true;
+ static int8_t __initdata opt_rsb_pv = -1;
+ static bool __initdata opt_rsb_hvm = true;
+-static int8_t __read_mostly opt_md_clear_pv = -1;
+-static int8_t __read_mostly opt_md_clear_hvm = -1;
++static int8_t __read_mostly opt_verw_pv = -1;
++static int8_t __read_mostly opt_verw_hvm = -1;
+
+ static int8_t __read_mostly opt_ibpb_entry_pv = -1;
+ static int8_t __read_mostly opt_ibpb_entry_hvm = -1;
+@@ -77,7 +77,7 @@ static bool __initdata cpu_has_bug_mds; /* Any other M{LP,SB,FB}DS combination.
+
+ static int8_t __initdata opt_srb_lock = -1;
+ static bool __initdata opt_unpriv_mmio;
+-static bool __read_mostly opt_fb_clear_mmio;
++static bool __read_mostly opt_verw_mmio;
+ static int8_t __initdata opt_gds_mit = -1;
+ static int8_t __initdata opt_div_scrub = -1;
+
+@@ -119,8 +119,8 @@ static int __init parse_spec_ctrl(const char *s)
+ disable_common:
+ opt_rsb_pv = false;
+ opt_rsb_hvm = false;
+- opt_md_clear_pv = 0;
+- opt_md_clear_hvm = 0;
++ opt_verw_pv = 0;
++ opt_verw_hvm = 0;
+ opt_ibpb_entry_pv = 0;
+ opt_ibpb_entry_hvm = 0;
+ opt_ibpb_entry_dom0 = false;
+@@ -151,14 +151,14 @@ static int __init parse_spec_ctrl(const char *s)
+ {
+ opt_msr_sc_pv = val;
+ opt_rsb_pv = val;
+- opt_md_clear_pv = val;
++ opt_verw_pv = val;
+ opt_ibpb_entry_pv = val;
+ }
+ else if ( (val = parse_boolean("hvm", s, ss)) >= 0 )
+ {
+ opt_msr_sc_hvm = val;
+ opt_rsb_hvm = val;
+- opt_md_clear_hvm = val;
++ opt_verw_hvm = val;
+ opt_ibpb_entry_hvm = val;
+ }
+ else if ( (val = parse_boolean("msr-sc", s, ss)) != -1 )
+@@ -203,21 +203,22 @@ static int __init parse_spec_ctrl(const char *s)
+ break;
+ }
+ }
+- else if ( (val = parse_boolean("md-clear", s, ss)) != -1 )
++ else if ( (val = parse_boolean("verw", s, ss)) != -1 ||
++ (val = parse_boolean("md-clear", s, ss)) != -1 )
+ {
+ switch ( val )
+ {
+ case 0:
+ case 1:
+- opt_md_clear_pv = opt_md_clear_hvm = val;
++ opt_verw_pv = opt_verw_hvm = val;
+ break;
+
+ case -2:
+- s += strlen("md-clear=");
++ s += (*s == 'v') ? strlen("verw=") : strlen("md-clear=");
+ if ( (val = parse_boolean("pv", s, ss)) >= 0 )
+- opt_md_clear_pv = val;
++ opt_verw_pv = val;
+ else if ( (val = parse_boolean("hvm", s, ss)) >= 0 )
+- opt_md_clear_hvm = val;
++ opt_verw_hvm = val;
+ else
+ default:
+ rc = -EINVAL;
+@@ -512,8 +513,8 @@ static void __init print_details(enum ind_thunk thunk)
+ opt_srb_lock ? " SRB_LOCK+" : " SRB_LOCK-",
+ opt_ibpb_ctxt_switch ? " IBPB-ctxt" : "",
+ opt_l1d_flush ? " L1D_FLUSH" : "",
+- opt_md_clear_pv || opt_md_clear_hvm ||
+- opt_fb_clear_mmio ? " VERW" : "",
++ opt_verw_pv || opt_verw_hvm ||
++ opt_verw_mmio ? " VERW" : "",
+ opt_div_scrub ? " DIV" : "",
+ opt_branch_harden ? " BRANCH_HARDEN" : "");
+
+@@ -533,11 +534,11 @@ static void __init print_details(enum ind_thunk thunk)
+ (boot_cpu_has(X86_FEATURE_SC_MSR_HVM) ||
+ boot_cpu_has(X86_FEATURE_SC_RSB_HVM) ||
+ boot_cpu_has(X86_FEATURE_IBPB_ENTRY_HVM) ||
+- opt_eager_fpu || opt_md_clear_hvm) ? "" : " None",
++ opt_eager_fpu || opt_verw_hvm) ? "" : " None",
+ boot_cpu_has(X86_FEATURE_SC_MSR_HVM) ? " MSR_SPEC_CTRL" : "",
+ boot_cpu_has(X86_FEATURE_SC_RSB_HVM) ? " RSB" : "",
+ opt_eager_fpu ? " EAGER_FPU" : "",
+- opt_md_clear_hvm ? " MD_CLEAR" : "",
++ opt_verw_hvm ? " VERW" : "",
+ boot_cpu_has(X86_FEATURE_IBPB_ENTRY_HVM) ? " IBPB-entry" : "");
+
+ #endif
+@@ -546,11 +547,11 @@ static void __init print_details(enum ind_thunk thunk)
+ (boot_cpu_has(X86_FEATURE_SC_MSR_PV) ||
+ boot_cpu_has(X86_FEATURE_SC_RSB_PV) ||
+ boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) ||
+- opt_eager_fpu || opt_md_clear_pv) ? "" : " None",
++ opt_eager_fpu || opt_verw_pv) ? "" : " None",
+ boot_cpu_has(X86_FEATURE_SC_MSR_PV) ? " MSR_SPEC_CTRL" : "",
+ boot_cpu_has(X86_FEATURE_SC_RSB_PV) ? " RSB" : "",
+ opt_eager_fpu ? " EAGER_FPU" : "",
+- opt_md_clear_pv ? " MD_CLEAR" : "",
++ opt_verw_pv ? " VERW" : "",
+ boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) ? " IBPB-entry" : "");
+
+ printk(" XPTI (64-bit PV only): Dom0 %s, DomU %s (with%s PCID)\n",
+@@ -1479,8 +1480,8 @@ void spec_ctrl_init_domain(struct domain *d)
+ {
+ bool pv = is_pv_domain(d);
+
+- bool verw = ((pv ? opt_md_clear_pv : opt_md_clear_hvm) ||
+- (opt_fb_clear_mmio && is_iommu_enabled(d)));
++ bool verw = ((pv ? opt_verw_pv : opt_verw_hvm) ||
++ (opt_verw_mmio && is_iommu_enabled(d)));
+
+ bool ibpb = ((pv ? opt_ibpb_entry_pv : opt_ibpb_entry_hvm) &&
+ (d->domain_id != 0 || opt_ibpb_entry_dom0));
+@@ -1838,19 +1839,20 @@ void __init init_speculation_mitigations(void)
+ * the return-to-guest path.
+ */
+ if ( opt_unpriv_mmio )
+- opt_fb_clear_mmio = cpu_has_fb_clear;
++ opt_verw_mmio = cpu_has_fb_clear;
+
+ /*
+ * By default, enable PV and HVM mitigations on MDS-vulnerable hardware.
+ * This will only be a token effort for MLPDS/MFBDS when HT is enabled,
+ * but it is somewhat better than nothing.
+ */
+- if ( opt_md_clear_pv == -1 )
+- opt_md_clear_pv = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) &&
+- boot_cpu_has(X86_FEATURE_MD_CLEAR));
+- if ( opt_md_clear_hvm == -1 )
+- opt_md_clear_hvm = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) &&
+- boot_cpu_has(X86_FEATURE_MD_CLEAR));
++ if ( opt_verw_pv == -1 )
++ opt_verw_pv = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) &&
++ cpu_has_md_clear);
++
++ if ( opt_verw_hvm == -1 )
++ opt_verw_hvm = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) &&
++ cpu_has_md_clear);
+
+ /*
+ * Enable MDS/MMIO defences as applicable. The Idle blocks need using if
+@@ -1863,12 +1865,12 @@ void __init init_speculation_mitigations(void)
+ * MDS mitigations. L1D_FLUSH is not safe for MMIO mitigations.)
+ *
+ * After calculating the appropriate idle setting, simplify
+- * opt_md_clear_hvm to mean just "should we VERW on the way into HVM
++ * opt_verw_hvm to mean just "should we VERW on the way into HVM
+ * guests", so spec_ctrl_init_domain() can calculate suitable settings.
+ */
+- if ( opt_md_clear_pv || opt_md_clear_hvm || opt_fb_clear_mmio )
++ if ( opt_verw_pv || opt_verw_hvm || opt_verw_mmio )
+ setup_force_cpu_cap(X86_FEATURE_SC_VERW_IDLE);
+- opt_md_clear_hvm &= !cpu_has_skip_l1dfl && !opt_l1d_flush;
++ opt_verw_hvm &= !cpu_has_skip_l1dfl && !opt_l1d_flush;
+
+ /*
+ * Warn the user if they are on MLPDS/MFBDS-vulnerable hardware with HT
diff --git a/main/xen/xsa452-4.16-6.patch b/main/xen/xsa452-4.16-6.patch
new file mode 100644
index 00000000000..e7e9ff09589
--- /dev/null
+++ b/main/xen/xsa452-4.16-6.patch
@@ -0,0 +1,163 @@
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Subject: x86/spec-ctrl: VERW-handling adjustments
+
+... before we add yet more complexity to this logic. Mostly expanded
+comments, but with three minor changes.
+
+1) Introduce cpu_has_useful_md_clear to simplify later logic in this patch and
+ future ones.
+
+2) We only ever need SC_VERW_IDLE when SMT is active. If SMT isn't active,
+ then there's no re-partition of pipeline resources based on thread-idleness
+ to worry about.
+
+3) The logic to adjust HVM VERW based on L1D_FLUSH is unmaintainable and, as
+ it turns out, wrong. SKIP_L1DFL is just a hint bit, whereas opt_l1d_flush
+ is the relevant decision of whether to use L1D_FLUSH based on
+ susceptibility and user preference.
+
+ Rewrite the logic so it can be followed, and incorporate the fact that when
+ FB_CLEAR is visible, L1D_FLUSH isn't a safe substitution.
+
+This is part of XSA-452 / CVE-2023-28746.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Acked-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit 1eb91a8a06230b4b64228c9a380194f8cfe6c5e2)
+
+diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
+index 292b5b1c7ba1..2e80e0871642 100644
+--- a/xen/arch/x86/spec_ctrl.c
++++ b/xen/arch/x86/spec_ctrl.c
+@@ -1496,7 +1496,7 @@ void __init init_speculation_mitigations(void)
+ {
+ enum ind_thunk thunk = THUNK_DEFAULT;
+ bool has_spec_ctrl, ibrs = false, hw_smt_enabled;
+- bool cpu_has_bug_taa, retpoline_safe;
++ bool cpu_has_bug_taa, cpu_has_useful_md_clear, retpoline_safe;
+
+ hw_smt_enabled = check_smt_enabled();
+
+@@ -1827,50 +1827,97 @@ void __init init_speculation_mitigations(void)
+ "enabled. Please assess your configuration and choose an\n"
+ "explicit 'smt=<bool>' setting. See XSA-273.\n");
+
++ /*
++ * A brief summary of VERW-related changes.
++ *
++ * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/intel-analysis-microarchitectural-data-sampling.html
++ * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/processor-mmio-stale-data-vulnerabilities.html
++ *
++ * Relevant ucodes:
++ *
++ * - May 2019, for MDS. Introduces the MD_CLEAR CPUID bit and VERW side
++ * effects to scrub Store/Load/Fill buffers as applicable. MD_CLEAR
++ * exists architecturally, even when the side effects have been removed.
++ *
++ * Use VERW to scrub on return-to-guest. Parts with L1D_FLUSH to
++ * mitigate L1TF have the same side effect, so no need to do both.
++ *
++ * Various Atoms suffer from Store-buffer sampling only. Store buffers
++ * are statically partitioned between non-idle threads, so scrubbing is
++ * wanted when going idle too.
++ *
++ * Load ports and Fill buffers are competitively shared between threads.
++ * SMT must be disabled for VERW scrubbing to be fully effective.
++ *
++ * - November 2019, for TAA. Extended VERW side effects to TSX-enabled
++ * MDS_NO parts.
++ *
++ * - February 2022, for Client TSX de-feature. Removed VERW side effects
++ * from Client CPUs only.
++ *
++ * - May 2022, for MMIO Stale Data. (Re)introduced Fill Buffer scrubbing
++ * on all MMIO-affected parts which didn't already have it for MDS
++ * reasons, enumerating FB_CLEAR on those parts only.
++ *
++ * If FB_CLEAR is enumerated, L1D_FLUSH does not have the same scrubbing
++ * side effects as VERW and cannot be used in its place.
++ */
+ mds_calculations();
+
+ /*
+- * Parts which enumerate FB_CLEAR are those which are post-MDS_NO and have
+- * reintroduced the VERW fill buffer flushing side effect because of a
+- * susceptibility to FBSDP.
++ * Parts which enumerate FB_CLEAR are those with now-updated microcode
++ * which weren't susceptible to the original MFBDS (and therefore didn't
++ * have Fill Buffer scrubbing side effects to begin with, or were Client
++ * MDS_NO non-TAA_NO parts where the scrubbing was removed), but have had
++ * the scrubbing reintroduced because of a susceptibility to FBSDP.
+ *
+ * If unprivileged guests have (or will have) MMIO mappings, we can
+ * mitigate cross-domain leakage of fill buffer data by issuing VERW on
+- * the return-to-guest path.
++ * the return-to-guest path. This is only a token effort if SMT is
++ * active.
+ */
+ if ( opt_unpriv_mmio )
+ opt_verw_mmio = cpu_has_fb_clear;
+
+ /*
+- * By default, enable PV and HVM mitigations on MDS-vulnerable hardware.
+- * This will only be a token effort for MLPDS/MFBDS when HT is enabled,
+- * but it is somewhat better than nothing.
++ * MD_CLEAR is enumerated architecturally forevermore, even after the
++ * scrubbing side effects have been removed. Create ourselves an version
++ * which expressed whether we think MD_CLEAR is having any useful side
++ * effect.
++ */
++ cpu_has_useful_md_clear = (cpu_has_md_clear &&
++ (cpu_has_bug_mds || cpu_has_bug_msbds_only));
++
++ /*
++ * By default, use VERW scrubbing on applicable hardware, if we think it's
++ * going to have an effect. This will only be a token effort for
++ * MLPDS/MFBDS when SMT is enabled.
+ */
+ if ( opt_verw_pv == -1 )
+- opt_verw_pv = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) &&
+- cpu_has_md_clear);
++ opt_verw_pv = cpu_has_useful_md_clear;
+
+ if ( opt_verw_hvm == -1 )
+- opt_verw_hvm = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) &&
+- cpu_has_md_clear);
++ opt_verw_hvm = cpu_has_useful_md_clear;
+
+ /*
+- * Enable MDS/MMIO defences as applicable. The Idle blocks need using if
+- * either the PV or HVM MDS defences are used, or if we may give MMIO
+- * access to untrusted guests.
+- *
+- * HVM is more complicated. The MD_CLEAR microcode extends L1D_FLUSH with
+- * equivalent semantics to avoid needing to perform both flushes on the
+- * HVM path. Therefore, we don't need VERW in addition to L1D_FLUSH (for
+- * MDS mitigations. L1D_FLUSH is not safe for MMIO mitigations.)
+- *
+- * After calculating the appropriate idle setting, simplify
+- * opt_verw_hvm to mean just "should we VERW on the way into HVM
+- * guests", so spec_ctrl_init_domain() can calculate suitable settings.
++ * If SMT is active, and we're protecting against MDS or MMIO stale data,
++ * we need to scrub before going idle as well as on return to guest.
++ * Various pipeline resources are repartitioned amongst non-idle threads.
+ */
+- if ( opt_verw_pv || opt_verw_hvm || opt_verw_mmio )
++ if ( ((cpu_has_useful_md_clear && (opt_verw_pv || opt_verw_hvm)) ||
++ opt_verw_mmio) && hw_smt_enabled )
+ setup_force_cpu_cap(X86_FEATURE_SC_VERW_IDLE);
+- opt_verw_hvm &= !cpu_has_skip_l1dfl && !opt_l1d_flush;
++
++ /*
++ * After calculating the appropriate idle setting, simplify opt_verw_hvm
++ * to mean just "should we VERW on the way into HVM guests", so
++ * spec_ctrl_init_domain() can calculate suitable settings.
++ *
++ * It is only safe to use L1D_FLUSH in place of VERW when MD_CLEAR is the
++ * only *_CLEAR we can see.
++ */
++ if ( opt_l1d_flush && cpu_has_md_clear && !cpu_has_fb_clear )
++ opt_verw_hvm = false;
+
+ /*
+ * Warn the user if they are on MLPDS/MFBDS-vulnerable hardware with HT
diff --git a/main/xen/xsa452-4.16-7.patch b/main/xen/xsa452-4.16-7.patch
new file mode 100644
index 00000000000..9862522917d
--- /dev/null
+++ b/main/xen/xsa452-4.16-7.patch
@@ -0,0 +1,299 @@
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Subject: x86/spec-ctrl: Mitigation Register File Data Sampling
+
+RFDS affects Atom cores, also branded E-cores, between the Goldmont and
+Gracemont microarchitectures. This includes Alder Lake and Raptor Lake hybrid
+clien systems which have a mix of Gracemont and other types of cores.
+
+Two new bits have been defined; RFDS_CLEAR to indicate VERW has more side
+effets, and RFDS_NO to incidate that the system is unaffected. Plenty of
+unaffected CPUs won't be getting RFDS_NO retrofitted in microcode, so we
+synthesise it. Alder Lake and Raptor Lake Xeon-E's are unaffected due to
+their platform configuration, and we must use the Hybrid CPUID bit to
+distinguish them from their non-Xeon counterparts.
+
+Like MD_CLEAR and FB_CLEAR, RFDS_CLEAR needs OR-ing across a resource pool, so
+set it in the max policies and reflect the host setting in default.
+
+This is part of XSA-452 / CVE-2023-28746.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit fb5b6f6744713410c74cfc12b7176c108e3c9a31)
+
+diff --git a/tools/misc/xen-cpuid.c b/tools/misc/xen-cpuid.c
+index c55a6e767809..0c792679e594 100644
+--- a/tools/misc/xen-cpuid.c
++++ b/tools/misc/xen-cpuid.c
+@@ -170,7 +170,7 @@ static const char *const str_7d0[32] =
+ [ 8] = "avx512-vp2intersect", [ 9] = "srbds-ctrl",
+ [10] = "md-clear", [11] = "rtm-always-abort",
+ /* 12 */ [13] = "tsx-force-abort",
+- [14] = "serialize",
++ [14] = "serialize", [15] = "hybrid",
+ [16] = "tsxldtrk",
+ [18] = "pconfig",
+ [20] = "cet-ibt",
+@@ -230,7 +230,8 @@ static const char *const str_m10Al[32] =
+ [20] = "bhi-no", [21] = "xapic-status",
+ /* 22 */ [23] = "ovrclk-status",
+ [24] = "pbrsb-no", [25] = "gds-ctrl",
+- [26] = "gds-no",
++ [26] = "gds-no", [27] = "rfds-no",
++ [28] = "rfds-clear",
+ };
+
+ static const char *const str_m10Ah[32] =
+diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c
+index 34f778dbafbb..c872afda3e2b 100644
+--- a/xen/arch/x86/cpu-policy.c
++++ b/xen/arch/x86/cpu-policy.c
+@@ -443,6 +443,7 @@ static void __init guest_common_max_feature_adjustments(uint32_t *fs)
+ */
+ __set_bit(X86_FEATURE_MD_CLEAR, fs);
+ __set_bit(X86_FEATURE_FB_CLEAR, fs);
++ __set_bit(X86_FEATURE_RFDS_CLEAR, fs);
+
+ /*
+ * The Gather Data Sampling microcode mitigation (August 2023) has an
+@@ -492,6 +493,10 @@ static void __init guest_common_default_feature_adjustments(uint32_t *fs)
+ if ( cpu_has_fb_clear )
+ __set_bit(X86_FEATURE_FB_CLEAR, fs);
+
++ __clear_bit(X86_FEATURE_RFDS_CLEAR, fs);
++ if ( cpu_has_rfds_clear )
++ __set_bit(X86_FEATURE_RFDS_CLEAR, fs);
++
+ /*
+ * The Gather Data Sampling microcode mitigation (August 2023) has an
+ * adverse performance impact on the CLWB instruction on SKX/CLX/CPX.
+diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
+index 2e80e0871642..24bf98a018a0 100644
+--- a/xen/arch/x86/spec_ctrl.c
++++ b/xen/arch/x86/spec_ctrl.c
+@@ -432,7 +432,7 @@ static void __init print_details(enum ind_thunk thunk)
+ * Hardware read-only information, stating immunity to certain issues, or
+ * suggestions of which mitigation to use.
+ */
+- printk(" Hardware hints:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
++ printk(" Hardware hints:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
+ (caps & ARCH_CAPS_RDCL_NO) ? " RDCL_NO" : "",
+ (caps & ARCH_CAPS_EIBRS) ? " EIBRS" : "",
+ (caps & ARCH_CAPS_RSBA) ? " RSBA" : "",
+@@ -448,6 +448,7 @@ static void __init print_details(enum ind_thunk thunk)
+ (caps & ARCH_CAPS_FB_CLEAR) ? " FB_CLEAR" : "",
+ (caps & ARCH_CAPS_PBRSB_NO) ? " PBRSB_NO" : "",
+ (caps & ARCH_CAPS_GDS_NO) ? " GDS_NO" : "",
++ (caps & ARCH_CAPS_RFDS_NO) ? " RFDS_NO" : "",
+ (e8b & cpufeat_mask(X86_FEATURE_IBRS_ALWAYS)) ? " IBRS_ALWAYS" : "",
+ (e8b & cpufeat_mask(X86_FEATURE_STIBP_ALWAYS)) ? " STIBP_ALWAYS" : "",
+ (e8b & cpufeat_mask(X86_FEATURE_IBRS_FAST)) ? " IBRS_FAST" : "",
+@@ -458,7 +459,7 @@ static void __init print_details(enum ind_thunk thunk)
+ (e21a & cpufeat_mask(X86_FEATURE_SRSO_NO)) ? " SRSO_NO" : "");
+
+ /* Hardware features which need driving to mitigate issues. */
+- printk(" Hardware features:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
++ printk(" Hardware features:%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
+ (e8b & cpufeat_mask(X86_FEATURE_IBPB)) ||
+ (_7d0 & cpufeat_mask(X86_FEATURE_IBRSB)) ? " IBPB" : "",
+ (e8b & cpufeat_mask(X86_FEATURE_IBRS)) ||
+@@ -476,6 +477,7 @@ static void __init print_details(enum ind_thunk thunk)
+ (caps & ARCH_CAPS_TSX_CTRL) ? " TSX_CTRL" : "",
+ (caps & ARCH_CAPS_FB_CLEAR_CTRL) ? " FB_CLEAR_CTRL" : "",
+ (caps & ARCH_CAPS_GDS_CTRL) ? " GDS_CTRL" : "",
++ (caps & ARCH_CAPS_RFDS_CLEAR) ? " RFDS_CLEAR" : "",
+ (e21a & cpufeat_mask(X86_FEATURE_SBPB)) ? " SBPB" : "");
+
+ /* Compiled-in support which pertains to mitigations. */
+@@ -1324,6 +1326,83 @@ static __init void mds_calculations(void)
+ }
+ }
+
++/*
++ * Register File Data Sampling affects Atom cores from the Goldmont to
++ * Gracemont microarchitectures. The March 2024 microcode adds RFDS_NO to
++ * some but not all unaffected parts, and RFDS_CLEAR to affected parts still
++ * in support.
++ *
++ * Alder Lake and Raptor Lake client CPUs have a mix of P cores
++ * (Golden/Raptor Cove, not vulnerable) and E cores (Gracemont,
++ * vulnerable), and both enumerate RFDS_CLEAR.
++ *
++ * Both exist in a Xeon SKU, which has the E cores (Gracemont) disabled by
++ * platform configuration, and enumerate RFDS_NO.
++ *
++ * With older parts, or with out-of-date microcode, synthesise RFDS_NO when
++ * safe to do so.
++ *
++ * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/advisory-guidance/register-file-data-sampling.html
++ */
++static void __init rfds_calculations(void)
++{
++ /* RFDS is only known to affect Intel Family 6 processors at this time. */
++ if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
++ boot_cpu_data.x86 != 6 )
++ return;
++
++ /*
++ * If RFDS_NO or RFDS_CLEAR are visible, we've either got suitable
++ * microcode, or an RFDS-aware hypervisor is levelling us in a pool.
++ */
++ if ( cpu_has_rfds_no || cpu_has_rfds_clear )
++ return;
++
++ /* If we're virtualised, don't attempt to synthesise RFDS_NO. */
++ if ( cpu_has_hypervisor )
++ return;
++
++ /*
++ * Not all CPUs are expected to get a microcode update enumerating one of
++ * RFDS_{NO,CLEAR}, or we might have out-of-date microcode.
++ */
++ switch ( boot_cpu_data.x86_model )
++ {
++ case 0x97: /* INTEL_FAM6_ALDERLAKE */
++ case 0xB7: /* INTEL_FAM6_RAPTORLAKE */
++ /*
++ * Alder Lake and Raptor Lake might be a client SKU (with the
++ * Gracemont cores active, and therefore vulnerable) or might be a
++ * server SKU (with the Gracemont cores disabled, and therefore not
++ * vulnerable).
++ *
++ * See if the CPU identifies as hybrid to distinguish the two cases.
++ */
++ if ( !cpu_has_hybrid )
++ break;
++ /* fallthrough */
++ case 0x9A: /* INTEL_FAM6_ALDERLAKE_L */
++ case 0xBA: /* INTEL_FAM6_RAPTORLAKE_P */
++ case 0xBF: /* INTEL_FAM6_RAPTORLAKE_S */
++
++ case 0x5C: /* INTEL_FAM6_ATOM_GOLDMONT */ /* Apollo Lake */
++ case 0x5F: /* INTEL_FAM6_ATOM_GOLDMONT_D */ /* Denverton */
++ case 0x7A: /* INTEL_FAM6_ATOM_GOLDMONT_PLUS */ /* Gemini Lake */
++ case 0x86: /* INTEL_FAM6_ATOM_TREMONT_D */ /* Snow Ridge / Parker Ridge */
++ case 0x96: /* INTEL_FAM6_ATOM_TREMONT */ /* Elkhart Lake */
++ case 0x9C: /* INTEL_FAM6_ATOM_TREMONT_L */ /* Jasper Lake */
++ case 0xBE: /* INTEL_FAM6_ATOM_GRACEMONT */ /* Alder Lake N */
++ return;
++ }
++
++ /*
++ * We appear to be on an unaffected CPU which didn't enumerate RFDS_NO,
++ * perhaps because of it's age or because of out-of-date microcode.
++ * Synthesise it.
++ */
++ setup_force_cpu_cap(X86_FEATURE_RFDS_NO);
++}
++
+ static bool __init cpu_has_gds(void)
+ {
+ /*
+@@ -1832,6 +1911,7 @@ void __init init_speculation_mitigations(void)
+ *
+ * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/intel-analysis-microarchitectural-data-sampling.html
+ * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/processor-mmio-stale-data-vulnerabilities.html
++ * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/advisory-guidance/register-file-data-sampling.html
+ *
+ * Relevant ucodes:
+ *
+@@ -1861,8 +1941,12 @@ void __init init_speculation_mitigations(void)
+ *
+ * If FB_CLEAR is enumerated, L1D_FLUSH does not have the same scrubbing
+ * side effects as VERW and cannot be used in its place.
++ *
++ * - March 2023, for RFDS. Enumerate RFDS_CLEAR to mean that VERW now
++ * scrubs non-architectural entries from certain register files.
+ */
+ mds_calculations();
++ rfds_calculations();
+
+ /*
+ * Parts which enumerate FB_CLEAR are those with now-updated microcode
+@@ -1894,15 +1978,19 @@ void __init init_speculation_mitigations(void)
+ * MLPDS/MFBDS when SMT is enabled.
+ */
+ if ( opt_verw_pv == -1 )
+- opt_verw_pv = cpu_has_useful_md_clear;
++ opt_verw_pv = cpu_has_useful_md_clear || cpu_has_rfds_clear;
+
+ if ( opt_verw_hvm == -1 )
+- opt_verw_hvm = cpu_has_useful_md_clear;
++ opt_verw_hvm = cpu_has_useful_md_clear || cpu_has_rfds_clear;
+
+ /*
+ * If SMT is active, and we're protecting against MDS or MMIO stale data,
+ * we need to scrub before going idle as well as on return to guest.
+ * Various pipeline resources are repartitioned amongst non-idle threads.
++ *
++ * We don't need to scrub on idle for RFDS. There are no affected cores
++ * which support SMT, despite there being affected cores in hybrid systems
++ * which have SMT elsewhere in the platform.
+ */
+ if ( ((cpu_has_useful_md_clear && (opt_verw_pv || opt_verw_hvm)) ||
+ opt_verw_mmio) && hw_smt_enabled )
+@@ -1916,7 +2004,8 @@ void __init init_speculation_mitigations(void)
+ * It is only safe to use L1D_FLUSH in place of VERW when MD_CLEAR is the
+ * only *_CLEAR we can see.
+ */
+- if ( opt_l1d_flush && cpu_has_md_clear && !cpu_has_fb_clear )
++ if ( opt_l1d_flush && cpu_has_md_clear && !cpu_has_fb_clear &&
++ !cpu_has_rfds_clear )
+ opt_verw_hvm = false;
+
+ /*
+diff --git a/xen/include/asm-x86/cpufeature.h b/xen/include/asm-x86/cpufeature.h
+index 81ac4d76eea6..1869732bcb9b 100644
+--- a/xen/include/asm-x86/cpufeature.h
++++ b/xen/include/asm-x86/cpufeature.h
+@@ -138,6 +138,7 @@
+ #define cpu_has_rtm_always_abort boot_cpu_has(X86_FEATURE_RTM_ALWAYS_ABORT)
+ #define cpu_has_tsx_force_abort boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)
+ #define cpu_has_serialize boot_cpu_has(X86_FEATURE_SERIALIZE)
++#define cpu_has_hybrid boot_cpu_has(X86_FEATURE_HYBRID)
+ #define cpu_has_arch_caps boot_cpu_has(X86_FEATURE_ARCH_CAPS)
+
+ /* CPUID level 0x00000007:1.eax */
+@@ -157,6 +158,8 @@
+ #define cpu_has_rrsba boot_cpu_has(X86_FEATURE_RRSBA)
+ #define cpu_has_gds_ctrl boot_cpu_has(X86_FEATURE_GDS_CTRL)
+ #define cpu_has_gds_no boot_cpu_has(X86_FEATURE_GDS_NO)
++#define cpu_has_rfds_no boot_cpu_has(X86_FEATURE_RFDS_NO)
++#define cpu_has_rfds_clear boot_cpu_has(X86_FEATURE_RFDS_CLEAR)
+
+ /* Synthesized. */
+ #define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON)
+diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h
+index 8251b8258b79..eb6295d8a7a4 100644
+--- a/xen/include/asm-x86/msr-index.h
++++ b/xen/include/asm-x86/msr-index.h
+@@ -77,6 +77,8 @@
+ #define ARCH_CAPS_PBRSB_NO (_AC(1, ULL) << 24)
+ #define ARCH_CAPS_GDS_CTRL (_AC(1, ULL) << 25)
+ #define ARCH_CAPS_GDS_NO (_AC(1, ULL) << 26)
++#define ARCH_CAPS_RFDS_NO (_AC(1, ULL) << 27)
++#define ARCH_CAPS_RFDS_CLEAR (_AC(1, ULL) << 28)
+
+ #define MSR_FLUSH_CMD 0x0000010b
+ #define FLUSH_CMD_L1D (_AC(1, ULL) << 0)
+diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h
+index 2906eaa6c290..7a9d8d05d3fb 100644
+--- a/xen/include/public/arch-x86/cpufeatureset.h
++++ b/xen/include/public/arch-x86/cpufeatureset.h
+@@ -279,6 +279,7 @@ XEN_CPUFEATURE(MD_CLEAR, 9*32+10) /*!A VERW clears microarchitectural buffe
+ XEN_CPUFEATURE(RTM_ALWAYS_ABORT, 9*32+11) /*! June 2021 TSX defeaturing in microcode. */
+ XEN_CPUFEATURE(TSX_FORCE_ABORT, 9*32+13) /* MSR_TSX_FORCE_ABORT.RTM_ABORT */
+ XEN_CPUFEATURE(SERIALIZE, 9*32+14) /*a SERIALIZE insn */
++XEN_CPUFEATURE(HYBRID, 9*32+15) /* Heterogeneous platform */
+ XEN_CPUFEATURE(TSXLDTRK, 9*32+16) /*a TSX load tracking suspend/resume insns */
+ XEN_CPUFEATURE(CET_IBT, 9*32+20) /* CET - Indirect Branch Tracking */
+ XEN_CPUFEATURE(IBRSB, 9*32+26) /*A IBRS and IBPB support (used by Intel) */
+@@ -338,6 +339,8 @@ XEN_CPUFEATURE(OVRCLK_STATUS, 16*32+23) /* MSR_OVERCLOCKING_STATUS */
+ XEN_CPUFEATURE(PBRSB_NO, 16*32+24) /*A No Post-Barrier RSB predictions */
+ XEN_CPUFEATURE(GDS_CTRL, 16*32+25) /* MCU_OPT_CTRL.GDS_MIT_{DIS,LOCK} */
+ XEN_CPUFEATURE(GDS_NO, 16*32+26) /*A No Gather Data Sampling */
++XEN_CPUFEATURE(RFDS_NO, 16*32+27) /*A No Register File Data Sampling */
++XEN_CPUFEATURE(RFDS_CLEAR, 16*32+28) /*!A Register File(s) cleared by VERW */
+
+ /* Intel-defined CPU features, MSR_ARCH_CAPS 0x10a.edx, word 17 */
+
diff --git a/main/xen/xsa453-4.16-1.patch b/main/xen/xsa453-4.16-1.patch
new file mode 100644
index 00000000000..c9fd8c21bb8
--- /dev/null
+++ b/main/xen/xsa453-4.16-1.patch
@@ -0,0 +1,148 @@
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Subject: x86/paging: Delete update_cr3()'s do_locking parameter
+
+Nicola reports that the XSA-438 fix introduced new MISRA violations because of
+some incidental tidying it tried to do. The parameter is useless, so resolve
+the MISRA regression by removing it.
+
+hap_update_cr3() discards the parameter entirely, while sh_update_cr3() uses
+it to distinguish internal and external callers and therefore whether the
+paging lock should be taken.
+
+However, we have paging_lock_recursive() for this purpose, which also avoids
+the ability for the shadow internal callers to accidentally not hold the lock.
+
+Fixes: fb0ff49fe9f7 ("x86/shadow: defer releasing of PV's top-level shadow reference")
+Reported-by: Nicola Vetrini <nicola.vetrini@bugseng.com>
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Release-acked-by: Henry Wang <Henry.Wang@arm.com>
+(cherry picked from commit e71157d1ac2a7fbf413130663cf0a93ff9fbcf7e)
+
+diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
+index fa479d3d97b3..63c29da696dd 100644
+--- a/xen/arch/x86/mm/hap/hap.c
++++ b/xen/arch/x86/mm/hap/hap.c
+@@ -728,7 +728,7 @@ static bool_t hap_invlpg(struct vcpu *v, unsigned long linear)
+ return 1;
+ }
+
+-static pagetable_t hap_update_cr3(struct vcpu *v, bool do_locking, bool noflush)
++static pagetable_t hap_update_cr3(struct vcpu *v, bool noflush)
+ {
+ v->arch.hvm.hw_cr[3] = v->arch.hvm.guest_cr[3];
+ hvm_update_guest_cr3(v, noflush);
+@@ -818,7 +818,7 @@ static void hap_update_paging_modes(struct vcpu *v)
+ }
+
+ /* CR3 is effectively updated by a mode change. Flush ASIDs, etc. */
+- hap_update_cr3(v, 0, false);
++ hap_update_cr3(v, false);
+
+ unlock:
+ paging_unlock(d);
+diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
+index 242b93537f9a..a8869a3fb7eb 100644
+--- a/xen/arch/x86/mm/shadow/common.c
++++ b/xen/arch/x86/mm/shadow/common.c
+@@ -2563,7 +2563,7 @@ static void sh_update_paging_modes(struct vcpu *v)
+ }
+ #endif /* OOS */
+
+- v->arch.paging.mode->update_cr3(v, 0, false);
++ v->arch.paging.mode->update_cr3(v, false);
+ }
+
+ void shadow_update_paging_modes(struct vcpu *v)
+diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
+index cf3ded70e75e..78bb89f1ee04 100644
+--- a/xen/arch/x86/mm/shadow/multi.c
++++ b/xen/arch/x86/mm/shadow/multi.c
+@@ -2499,7 +2499,7 @@ static int sh_page_fault(struct vcpu *v,
+ * In any case, in the PAE case, the ASSERT is not true; it can
+ * happen because of actions the guest is taking. */
+ #if GUEST_PAGING_LEVELS == 3
+- v->arch.paging.mode->update_cr3(v, 0, false);
++ v->arch.paging.mode->update_cr3(v, false);
+ #else
+ ASSERT(d->is_shutting_down);
+ #endif
+@@ -3219,17 +3219,13 @@ sh_detach_old_tables(struct vcpu *v)
+ }
+ }
+
+-static pagetable_t
+-sh_update_cr3(struct vcpu *v, bool do_locking, bool noflush)
++static pagetable_t sh_update_cr3(struct vcpu *v, bool noflush)
+ /* Updates vcpu->arch.cr3 after the guest has changed CR3.
+ * Paravirtual guests should set v->arch.guest_table (and guest_table_user,
+ * if appropriate).
+ * HVM guests should also make sure hvm_get_guest_cntl_reg(v, 3) works;
+ * this function will call hvm_update_guest_cr(v, 3) to tell them where the
+ * shadow tables are.
+- * If do_locking != 0, assume we are being called from outside the
+- * shadow code, and must take and release the paging lock; otherwise
+- * that is the caller's responsibility.
+ */
+ {
+ struct domain *d = v->domain;
+@@ -3247,7 +3243,11 @@ sh_update_cr3(struct vcpu *v, bool do_locking, bool noflush)
+ return old_entry;
+ }
+
+- if ( do_locking ) paging_lock(v->domain);
++ /*
++ * This is used externally (with the paging lock not taken) and internally
++ * by the shadow code (with the lock already taken).
++ */
++ paging_lock_recursive(v->domain);
+
+ #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
+ /* Need to resync all the shadow entries on a TLB flush. Resync
+@@ -3483,8 +3483,7 @@ sh_update_cr3(struct vcpu *v, bool do_locking, bool noflush)
+ shadow_sync_other_vcpus(v);
+ #endif
+
+- /* Release the lock, if we took it (otherwise it's the caller's problem) */
+- if ( do_locking ) paging_unlock(v->domain);
++ paging_unlock(v->domain);
+
+ return old_entry;
+ }
+diff --git a/xen/arch/x86/mm/shadow/none.c b/xen/arch/x86/mm/shadow/none.c
+index 2a5fd409b2d8..003536980803 100644
+--- a/xen/arch/x86/mm/shadow/none.c
++++ b/xen/arch/x86/mm/shadow/none.c
+@@ -52,7 +52,7 @@ static unsigned long _gva_to_gfn(struct vcpu *v, struct p2m_domain *p2m,
+ }
+ #endif
+
+-static pagetable_t _update_cr3(struct vcpu *v, bool do_locking, bool noflush)
++static pagetable_t _update_cr3(struct vcpu *v, bool noflush)
+ {
+ ASSERT_UNREACHABLE();
+ return pagetable_null();
+diff --git a/xen/include/asm-x86/paging.h b/xen/include/asm-x86/paging.h
+index fceb208d3671..bd7c7008ae79 100644
+--- a/xen/include/asm-x86/paging.h
++++ b/xen/include/asm-x86/paging.h
+@@ -138,8 +138,7 @@ struct paging_mode {
+ paddr_t ga, uint32_t *pfec,
+ unsigned int *page_order);
+ #endif
+- pagetable_t (*update_cr3 )(struct vcpu *v, bool do_locking,
+- bool noflush);
++ pagetable_t (*update_cr3 )(struct vcpu *v, bool noflush);
+ void (*update_paging_modes )(struct vcpu *v);
+ bool (*flush_tlb )(bool (*flush_vcpu)(void *ctxt,
+ struct vcpu *v),
+@@ -317,7 +316,7 @@ static inline unsigned long paging_ga_to_gfn_cr3(struct vcpu *v,
+ * as the value to load into the host CR3 to schedule this vcpu */
+ static inline pagetable_t paging_update_cr3(struct vcpu *v, bool noflush)
+ {
+- return paging_get_hostmode(v)->update_cr3(v, 1, noflush);
++ return paging_get_hostmode(v)->update_cr3(v, noflush);
+ }
+
+ /* Update all the things that are derived from the guest's CR0/CR3/CR4.
+
diff --git a/main/xen/xsa453-4.16-2.patch b/main/xen/xsa453-4.16-2.patch
new file mode 100644
index 00000000000..2f0c1b2084b
--- /dev/null
+++ b/main/xen/xsa453-4.16-2.patch
@@ -0,0 +1,49 @@
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Subject: xen: Swap order of actions in the FREE*() macros
+
+Wherever possible, it is a good idea to NULL out the visible reference to an
+object prior to freeing it. The FREE*() macros already collect together both
+parts, making it easy to adjust.
+
+This has a marginal code generation improvement, as some of the calls to the
+free() function can be tailcall optimised.
+
+No functional change.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Acked-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit c4f427ec879e7c0df6d44d02561e8bee838a293e)
+
+diff --git a/xen/include/xen/mm.h b/xen/include/xen/mm.h
+index 3f5c296138cf..c0b77d563d80 100644
+--- a/xen/include/xen/mm.h
++++ b/xen/include/xen/mm.h
+@@ -80,8 +80,9 @@ bool scrub_free_pages(void);
+
+ /* Free an allocation, and zero the pointer to it. */
+ #define FREE_XENHEAP_PAGES(p, o) do { \
+- free_xenheap_pages(p, o); \
++ void *_ptr_ = (p); \
+ (p) = NULL; \
++ free_xenheap_pages(_ptr_, o); \
+ } while ( false )
+ #define FREE_XENHEAP_PAGE(p) FREE_XENHEAP_PAGES(p, 0)
+
+diff --git a/xen/include/xen/xmalloc.h b/xen/include/xen/xmalloc.h
+index 16979a117c6a..d857298011c1 100644
+--- a/xen/include/xen/xmalloc.h
++++ b/xen/include/xen/xmalloc.h
+@@ -66,9 +66,10 @@
+ extern void xfree(void *);
+
+ /* Free an allocation, and zero the pointer to it. */
+-#define XFREE(p) do { \
+- xfree(p); \
+- (p) = NULL; \
++#define XFREE(p) do { \
++ void *_ptr_ = (p); \
++ (p) = NULL; \
++ xfree(_ptr_); \
+ } while ( false )
+
+ /* Underlying functions */
diff --git a/main/xen/xsa453-4.16-3.patch b/main/xen/xsa453-4.16-3.patch
new file mode 100644
index 00000000000..07ce4e78ac6
--- /dev/null
+++ b/main/xen/xsa453-4.16-3.patch
@@ -0,0 +1,313 @@
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Subject: x86/spinlock: introduce support for blocking speculation into
+ critical regions
+
+Introduce a new Kconfig option to block speculation into lock protected
+critical regions. The Kconfig option is enabled by default, but the mitigation
+won't be engaged unless it's explicitly enabled in the command line using
+`spec-ctrl=lock-harden`.
+
+Convert the spinlock acquire macros into always-inline functions, and introduce
+a speculation barrier after the lock has been taken. Note the speculation
+barrier is not placed inside the implementation of the spin lock functions, as
+to prevent speculation from falling through the call to the lock functions
+resulting in the barrier also being skipped.
+
+trylock variants are protected using a construct akin to the existing
+evaluate_nospec().
+
+This patch only implements the speculation barrier for x86.
+
+Note spin locks are the only locking primitive taken care in this change,
+further locking primitives will be adjusted by separate changes.
+
+This is part of XSA-453 / CVE-2024-2193
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit 7ef0084418e188d05f338c3e028fbbe8b6924afa)
+
+diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
+index 029002fa82d6..33c32cfc1cbc 100644
+--- a/docs/misc/xen-command-line.pandoc
++++ b/docs/misc/xen-command-line.pandoc
+@@ -2263,7 +2263,7 @@ By default SSBD will be mitigated at runtime (i.e `ssbd=runtime`).
+ > {msr-sc,rsb,verw,ibpb-entry}=<bool>|{pv,hvm}=<bool>,
+ > bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,psfd,
+ > eager-fpu,l1d-flush,branch-harden,srb-lock,
+-> unpriv-mmio,gds-mit,div-scrub}=<bool> ]`
++> unpriv-mmio,gds-mit,div-scrub,lock-harden}=<bool> ]`
+
+ Controls for speculative execution sidechannel mitigations. By default, Xen
+ will pick the most appropriate mitigations based on compiled in support,
+@@ -2388,6 +2388,11 @@ On all hardware, the `div-scrub=` option can be used to force or prevent Xen
+ from mitigating the DIV-leakage vulnerability. By default, Xen will mitigate
+ DIV-leakage on hardware believed to be vulnerable.
+
++If Xen is compiled with `CONFIG_SPECULATIVE_HARDEN_LOCK`, the `lock-harden=`
++boolean can be used to force or prevent Xen from using speculation barriers to
++protect lock critical regions. This mitigation won't be engaged by default,
++and needs to be explicitly enabled on the command line.
++
+ ### sync_console
+ > `= <boolean>`
+
+diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
+index 24bf98a018a0..0a7af22a9b3c 100644
+--- a/xen/arch/x86/spec_ctrl.c
++++ b/xen/arch/x86/spec_ctrl.c
+@@ -63,6 +63,7 @@ int8_t __read_mostly opt_ibpb_ctxt_switch = -1;
+ int8_t __read_mostly opt_eager_fpu = -1;
+ int8_t __read_mostly opt_l1d_flush = -1;
+ static bool __initdata opt_branch_harden = true;
++static bool __initdata opt_lock_harden;
+
+ bool __initdata bsp_delay_spec_ctrl;
+ uint8_t __read_mostly default_xen_spec_ctrl;
+@@ -131,6 +132,7 @@ static int __init parse_spec_ctrl(const char *s)
+ opt_ssbd = false;
+ opt_l1d_flush = 0;
+ opt_branch_harden = false;
++ opt_lock_harden = false;
+ opt_srb_lock = 0;
+ opt_unpriv_mmio = false;
+ opt_gds_mit = 0;
+@@ -282,6 +284,16 @@ static int __init parse_spec_ctrl(const char *s)
+ opt_l1d_flush = val;
+ else if ( (val = parse_boolean("branch-harden", s, ss)) >= 0 )
+ opt_branch_harden = val;
++ else if ( (val = parse_boolean("lock-harden", s, ss)) >= 0 )
++ {
++ if ( IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_LOCK) )
++ opt_lock_harden = val;
++ else
++ {
++ no_config_param("SPECULATIVE_HARDEN_LOCK", "spec-ctrl", s, ss);
++ rc = -EINVAL;
++ }
++ }
+ else if ( (val = parse_boolean("srb-lock", s, ss)) >= 0 )
+ opt_srb_lock = val;
+ else if ( (val = parse_boolean("unpriv-mmio", s, ss)) >= 0 )
+@@ -481,18 +493,22 @@ static void __init print_details(enum ind_thunk thunk)
+ (e21a & cpufeat_mask(X86_FEATURE_SBPB)) ? " SBPB" : "");
+
+ /* Compiled-in support which pertains to mitigations. */
+- if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) || IS_ENABLED(CONFIG_SHADOW_PAGING) )
++ if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) || IS_ENABLED(CONFIG_SHADOW_PAGING) ||
++ IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_LOCK) )
+ printk(" Compiled-in support:"
+ #ifdef CONFIG_INDIRECT_THUNK
+ " INDIRECT_THUNK"
+ #endif
+ #ifdef CONFIG_SHADOW_PAGING
+ " SHADOW_PAGING"
++#endif
++#ifdef CONFIG_SPECULATIVE_HARDEN_LOCK
++ " HARDEN_LOCK"
+ #endif
+ "\n");
+
+ /* Settings for Xen's protection, irrespective of guests. */
+- printk(" Xen settings: BTI-Thunk %s, SPEC_CTRL: %s%s%s%s%s, Other:%s%s%s%s%s%s\n",
++ printk(" Xen settings: BTI-Thunk %s, SPEC_CTRL: %s%s%s%s%s, Other:%s%s%s%s%s%s%s\n",
+ thunk == THUNK_NONE ? "N/A" :
+ thunk == THUNK_RETPOLINE ? "RETPOLINE" :
+ thunk == THUNK_LFENCE ? "LFENCE" :
+@@ -518,7 +534,8 @@ static void __init print_details(enum ind_thunk thunk)
+ opt_verw_pv || opt_verw_hvm ||
+ opt_verw_mmio ? " VERW" : "",
+ opt_div_scrub ? " DIV" : "",
+- opt_branch_harden ? " BRANCH_HARDEN" : "");
++ opt_branch_harden ? " BRANCH_HARDEN" : "",
++ opt_lock_harden ? " LOCK_HARDEN" : "");
+
+ /* L1TF diagnostics, printed if vulnerable or PV shadowing is in use. */
+ if ( cpu_has_bug_l1tf || opt_pv_l1tf_hwdom || opt_pv_l1tf_domu )
+@@ -1889,6 +1906,9 @@ void __init init_speculation_mitigations(void)
+ if ( !opt_branch_harden )
+ setup_force_cpu_cap(X86_FEATURE_SC_NO_BRANCH_HARDEN);
+
++ if ( !opt_lock_harden )
++ setup_force_cpu_cap(X86_FEATURE_SC_NO_LOCK_HARDEN);
++
+ /*
+ * We do not disable HT by default on affected hardware.
+ *
+diff --git a/xen/common/Kconfig b/xen/common/Kconfig
+index c9f4b7f49240..01c70109f539 100644
+--- a/xen/common/Kconfig
++++ b/xen/common/Kconfig
+@@ -161,6 +161,23 @@ config SPECULATIVE_HARDEN_GUEST_ACCESS
+
+ If unsure, say Y.
+
++config SPECULATIVE_HARDEN_LOCK
++ bool "Speculative lock context hardening"
++ default y
++ depends on X86
++ help
++ Contemporary processors may use speculative execution as a
++ performance optimisation, but this can potentially be abused by an
++ attacker to leak data via speculative sidechannels.
++
++ One source of data leakage is via speculative accesses to lock
++ critical regions.
++
++ This option is disabled by default at run time, and needs to be
++ enabled on the command line.
++
++ If unsure, say Y.
++
+ endmenu
+
+ config HYPFS
+diff --git a/xen/include/asm-x86/cpufeatures.h b/xen/include/asm-x86/cpufeatures.h
+index 70b93b6b443f..7e8221fd85dd 100644
+--- a/xen/include/asm-x86/cpufeatures.h
++++ b/xen/include/asm-x86/cpufeatures.h
+@@ -24,7 +24,7 @@ XEN_CPUFEATURE(APERFMPERF, X86_SYNTH( 8)) /* APERFMPERF */
+ XEN_CPUFEATURE(MFENCE_RDTSC, X86_SYNTH( 9)) /* MFENCE synchronizes RDTSC */
+ XEN_CPUFEATURE(XEN_SMEP, X86_SYNTH(10)) /* SMEP gets used by Xen itself */
+ XEN_CPUFEATURE(XEN_SMAP, X86_SYNTH(11)) /* SMAP gets used by Xen itself */
+-/* Bit 12 - unused. */
++XEN_CPUFEATURE(SC_NO_LOCK_HARDEN, X86_SYNTH(12)) /* (Disable) Lock critical region hardening */
+ XEN_CPUFEATURE(IND_THUNK_LFENCE, X86_SYNTH(13)) /* Use IND_THUNK_LFENCE */
+ XEN_CPUFEATURE(IND_THUNK_JMP, X86_SYNTH(14)) /* Use IND_THUNK_JMP */
+ XEN_CPUFEATURE(SC_NO_BRANCH_HARDEN, X86_SYNTH(15)) /* (Disable) Conditional branch hardening */
+diff --git a/xen/include/asm-x86/nospec.h b/xen/include/asm-x86/nospec.h
+index 7150e76b87fb..0725839e1982 100644
+--- a/xen/include/asm-x86/nospec.h
++++ b/xen/include/asm-x86/nospec.h
+@@ -38,6 +38,32 @@ static always_inline void block_speculation(void)
+ barrier_nospec_true();
+ }
+
++static always_inline void arch_block_lock_speculation(void)
++{
++ alternative("lfence", "", X86_FEATURE_SC_NO_LOCK_HARDEN);
++}
++
++/* Allow to insert a read memory barrier into conditionals */
++static always_inline bool barrier_lock_true(void)
++{
++ alternative("lfence #nospec-true", "", X86_FEATURE_SC_NO_LOCK_HARDEN);
++ return true;
++}
++
++static always_inline bool barrier_lock_false(void)
++{
++ alternative("lfence #nospec-false", "", X86_FEATURE_SC_NO_LOCK_HARDEN);
++ return false;
++}
++
++static always_inline bool arch_lock_evaluate_nospec(bool condition)
++{
++ if ( condition )
++ return barrier_lock_true();
++ else
++ return barrier_lock_false();
++}
++
+ #endif /* _ASM_X86_NOSPEC_H */
+
+ /*
+diff --git a/xen/include/xen/nospec.h b/xen/include/xen/nospec.h
+index 76255bc46efe..455284640396 100644
+--- a/xen/include/xen/nospec.h
++++ b/xen/include/xen/nospec.h
+@@ -70,6 +70,21 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
+ #define array_access_nospec(array, index) \
+ (array)[array_index_nospec(index, ARRAY_SIZE(array))]
+
++static always_inline void block_lock_speculation(void)
++{
++#ifdef CONFIG_SPECULATIVE_HARDEN_LOCK
++ arch_block_lock_speculation();
++#endif
++}
++
++static always_inline bool lock_evaluate_nospec(bool condition)
++{
++#ifdef CONFIG_SPECULATIVE_HARDEN_LOCK
++ return arch_lock_evaluate_nospec(condition);
++#endif
++ return condition;
++}
++
+ #endif /* XEN_NOSPEC_H */
+
+ /*
+diff --git a/xen/include/xen/spinlock.h b/xen/include/xen/spinlock.h
+index 9fa4e600c1f7..efdb21ea9072 100644
+--- a/xen/include/xen/spinlock.h
++++ b/xen/include/xen/spinlock.h
+@@ -1,6 +1,7 @@
+ #ifndef __SPINLOCK_H__
+ #define __SPINLOCK_H__
+
++#include <xen/nospec.h>
+ #include <xen/time.h>
+ #include <asm/system.h>
+ #include <asm/spinlock.h>
+@@ -189,13 +190,30 @@ int _spin_trylock_recursive(spinlock_t *lock);
+ void _spin_lock_recursive(spinlock_t *lock);
+ void _spin_unlock_recursive(spinlock_t *lock);
+
+-#define spin_lock(l) _spin_lock(l)
+-#define spin_lock_cb(l, c, d) _spin_lock_cb(l, c, d)
+-#define spin_lock_irq(l) _spin_lock_irq(l)
++static always_inline void spin_lock(spinlock_t *l)
++{
++ _spin_lock(l);
++ block_lock_speculation();
++}
++
++static always_inline void spin_lock_cb(spinlock_t *l, void (*c)(void *data),
++ void *d)
++{
++ _spin_lock_cb(l, c, d);
++ block_lock_speculation();
++}
++
++static always_inline void spin_lock_irq(spinlock_t *l)
++{
++ _spin_lock_irq(l);
++ block_lock_speculation();
++}
++
+ #define spin_lock_irqsave(l, f) \
+ ({ \
+ BUILD_BUG_ON(sizeof(f) != sizeof(unsigned long)); \
+ ((f) = _spin_lock_irqsave(l)); \
++ block_lock_speculation(); \
+ })
+
+ #define spin_unlock(l) _spin_unlock(l)
+@@ -203,7 +221,7 @@ void _spin_unlock_recursive(spinlock_t *lock);
+ #define spin_unlock_irqrestore(l, f) _spin_unlock_irqrestore(l, f)
+
+ #define spin_is_locked(l) _spin_is_locked(l)
+-#define spin_trylock(l) _spin_trylock(l)
++#define spin_trylock(l) lock_evaluate_nospec(_spin_trylock(l))
+
+ #define spin_trylock_irqsave(lock, flags) \
+ ({ \
+@@ -224,8 +242,15 @@ void _spin_unlock_recursive(spinlock_t *lock);
+ * are any critical regions that cannot form part of such a set, they can use
+ * standard spin_[un]lock().
+ */
+-#define spin_trylock_recursive(l) _spin_trylock_recursive(l)
+-#define spin_lock_recursive(l) _spin_lock_recursive(l)
++#define spin_trylock_recursive(l) \
++ lock_evaluate_nospec(_spin_trylock_recursive(l))
++
++static always_inline void spin_lock_recursive(spinlock_t *l)
++{
++ _spin_lock_recursive(l);
++ block_lock_speculation();
++}
++
+ #define spin_unlock_recursive(l) _spin_unlock_recursive(l)
+
+ #endif /* __SPINLOCK_H__ */
diff --git a/main/xen/xsa453-4.16-4.patch b/main/xen/xsa453-4.16-4.patch
new file mode 100644
index 00000000000..f53cc4703c1
--- /dev/null
+++ b/main/xen/xsa453-4.16-4.patch
@@ -0,0 +1,113 @@
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Subject: rwlock: introduce support for blocking speculation into critical
+ regions
+
+Introduce inline wrappers as required and add direct calls to
+block_lock_speculation() in order to prevent speculation into the rwlock
+protected critical regions.
+
+Note the rwlock primitives are adjusted to use the non speculation safe variants
+of the spinlock handlers, as a speculation barrier is added in the rwlock
+calling wrappers.
+
+trylock variants are protected by using lock_evaluate_nospec().
+
+This is part of XSA-453 / CVE-2024-2193
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit a1fb15f61692b1fa9945fc51f55471ace49cdd59)
+
+diff --git a/xen/common/rwlock.c b/xen/common/rwlock.c
+index dadab372b5e1..2464f745485d 100644
+--- a/xen/common/rwlock.c
++++ b/xen/common/rwlock.c
+@@ -34,8 +34,11 @@ void queue_read_lock_slowpath(rwlock_t *lock)
+
+ /*
+ * Put the reader into the wait queue.
++ *
++ * Use the speculation unsafe helper, as it's the caller responsibility to
++ * issue a speculation barrier if required.
+ */
+- spin_lock(&lock->lock);
++ _spin_lock(&lock->lock);
+
+ /*
+ * At the head of the wait queue now, wait until the writer state
+@@ -64,8 +67,13 @@ void queue_write_lock_slowpath(rwlock_t *lock)
+ {
+ u32 cnts;
+
+- /* Put the writer into the wait queue. */
+- spin_lock(&lock->lock);
++ /*
++ * Put the writer into the wait queue.
++ *
++ * Use the speculation unsafe helper, as it's the caller responsibility to
++ * issue a speculation barrier if required.
++ */
++ _spin_lock(&lock->lock);
+
+ /* Try to acquire the lock directly if no reader is present. */
+ if ( !atomic_read(&lock->cnts) &&
+diff --git a/xen/include/xen/rwlock.h b/xen/include/xen/rwlock.h
+index 0cc9167715b3..fd0458be94ae 100644
+--- a/xen/include/xen/rwlock.h
++++ b/xen/include/xen/rwlock.h
+@@ -247,27 +247,49 @@ static inline int _rw_is_write_locked(rwlock_t *lock)
+ return (atomic_read(&lock->cnts) & _QW_WMASK) == _QW_LOCKED;
+ }
+
+-#define read_lock(l) _read_lock(l)
+-#define read_lock_irq(l) _read_lock_irq(l)
++static always_inline void read_lock(rwlock_t *l)
++{
++ _read_lock(l);
++ block_lock_speculation();
++}
++
++static always_inline void read_lock_irq(rwlock_t *l)
++{
++ _read_lock_irq(l);
++ block_lock_speculation();
++}
++
+ #define read_lock_irqsave(l, f) \
+ ({ \
+ BUILD_BUG_ON(sizeof(f) != sizeof(unsigned long)); \
+ ((f) = _read_lock_irqsave(l)); \
++ block_lock_speculation(); \
+ })
+
+ #define read_unlock(l) _read_unlock(l)
+ #define read_unlock_irq(l) _read_unlock_irq(l)
+ #define read_unlock_irqrestore(l, f) _read_unlock_irqrestore(l, f)
+-#define read_trylock(l) _read_trylock(l)
++#define read_trylock(l) lock_evaluate_nospec(_read_trylock(l))
++
++static always_inline void write_lock(rwlock_t *l)
++{
++ _write_lock(l);
++ block_lock_speculation();
++}
++
++static always_inline void write_lock_irq(rwlock_t *l)
++{
++ _write_lock_irq(l);
++ block_lock_speculation();
++}
+
+-#define write_lock(l) _write_lock(l)
+-#define write_lock_irq(l) _write_lock_irq(l)
+ #define write_lock_irqsave(l, f) \
+ ({ \
+ BUILD_BUG_ON(sizeof(f) != sizeof(unsigned long)); \
+ ((f) = _write_lock_irqsave(l)); \
++ block_lock_speculation(); \
+ })
+-#define write_trylock(l) _write_trylock(l)
++#define write_trylock(l) lock_evaluate_nospec(_write_trylock(l))
+
+ #define write_unlock(l) _write_unlock(l)
+ #define write_unlock_irq(l) _write_unlock_irq(l)
diff --git a/main/xen/xsa453-4.16-5.patch b/main/xen/xsa453-4.16-5.patch
new file mode 100644
index 00000000000..94b78eea116
--- /dev/null
+++ b/main/xen/xsa453-4.16-5.patch
@@ -0,0 +1,75 @@
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Subject: percpu-rwlock: introduce support for blocking speculation into
+ critical regions
+
+Add direct calls to block_lock_speculation() where required in order to prevent
+speculation into the lock protected critical regions. Also convert
+_percpu_read_lock() from inline to always_inline.
+
+Note that _percpu_write_lock() has been modified the use the non speculation
+safe of the locking primites, as a speculation is added unconditionally by the
+calling wrapper.
+
+This is part of XSA-453 / CVE-2024-2193
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit f218daf6d3a3b847736d37c6a6b76031a0d08441)
+
+diff --git a/xen/common/rwlock.c b/xen/common/rwlock.c
+index 2464f745485d..703276f4aa63 100644
+--- a/xen/common/rwlock.c
++++ b/xen/common/rwlock.c
+@@ -125,8 +125,12 @@ void _percpu_write_lock(percpu_rwlock_t **per_cpudata,
+ /*
+ * First take the write lock to protect against other writers or slow
+ * path readers.
++ *
++ * Note we use the speculation unsafe variant of write_lock(), as the
++ * calling wrapper already adds a speculation barrier after the lock has
++ * been taken.
+ */
+- write_lock(&percpu_rwlock->rwlock);
++ _write_lock(&percpu_rwlock->rwlock);
+
+ /* Now set the global variable so that readers start using read_lock. */
+ percpu_rwlock->writer_activating = 1;
+diff --git a/xen/include/xen/rwlock.h b/xen/include/xen/rwlock.h
+index fd0458be94ae..abe0804bf7d5 100644
+--- a/xen/include/xen/rwlock.h
++++ b/xen/include/xen/rwlock.h
+@@ -326,8 +326,8 @@ static inline void _percpu_rwlock_owner_check(percpu_rwlock_t **per_cpudata,
+ #define percpu_rwlock_resource_init(l, owner) \
+ (*(l) = (percpu_rwlock_t)PERCPU_RW_LOCK_UNLOCKED(&get_per_cpu_var(owner)))
+
+-static inline void _percpu_read_lock(percpu_rwlock_t **per_cpudata,
+- percpu_rwlock_t *percpu_rwlock)
++static always_inline void _percpu_read_lock(percpu_rwlock_t **per_cpudata,
++ percpu_rwlock_t *percpu_rwlock)
+ {
+ /* Validate the correct per_cpudata variable has been provided. */
+ _percpu_rwlock_owner_check(per_cpudata, percpu_rwlock);
+@@ -362,6 +362,8 @@ static inline void _percpu_read_lock(percpu_rwlock_t **per_cpudata,
+ }
+ else
+ {
++ /* Other branch already has a speculation barrier in read_lock(). */
++ block_lock_speculation();
+ /* All other paths have implicit check_lock() calls via read_lock(). */
+ check_lock(&percpu_rwlock->rwlock.lock.debug, false);
+ }
+@@ -410,8 +412,12 @@ static inline void _percpu_write_unlock(percpu_rwlock_t **per_cpudata,
+ _percpu_read_lock(&get_per_cpu_var(percpu), lock)
+ #define percpu_read_unlock(percpu, lock) \
+ _percpu_read_unlock(&get_per_cpu_var(percpu), lock)
+-#define percpu_write_lock(percpu, lock) \
+- _percpu_write_lock(&get_per_cpu_var(percpu), lock)
++
++#define percpu_write_lock(percpu, lock) \
++({ \
++ _percpu_write_lock(&get_per_cpu_var(percpu), lock); \
++ block_lock_speculation(); \
++})
+ #define percpu_write_unlock(percpu, lock) \
+ _percpu_write_unlock(&get_per_cpu_var(percpu), lock)
+
diff --git a/main/xen/xsa453-4.16-6.patch b/main/xen/xsa453-4.16-6.patch
new file mode 100644
index 00000000000..317f61823c7
--- /dev/null
+++ b/main/xen/xsa453-4.16-6.patch
@@ -0,0 +1,382 @@
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Subject: locking: attempt to ensure lock wrappers are always inline
+
+In order to prevent the locking speculation barriers from being inside of
+`call`ed functions that could be speculatively bypassed.
+
+While there also add an extra locking barrier to _mm_write_lock() in the branch
+taken when the lock is already held.
+
+Note some functions are switched to use the unsafe variants (without speculation
+barrier) of the locking primitives, but a speculation barrier is always added
+to the exposed public lock wrapping helper. That's the case with
+sched_spin_lock_double() or pcidevs_lock() for example.
+
+This is part of XSA-453 / CVE-2024-2193
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit 197ecd838a2aaf959a469df3696d4559c4f8b762)
+
+diff --git a/xen/arch/x86/hvm/vpt.c b/xen/arch/x86/hvm/vpt.c
+index 6fdc3e19fe8c..dd2de574cf18 100644
+--- a/xen/arch/x86/hvm/vpt.c
++++ b/xen/arch/x86/hvm/vpt.c
+@@ -161,7 +161,7 @@ static int pt_irq_masked(struct periodic_time *pt)
+ * pt->vcpu field, because another thread holding the pt_migrate lock
+ * may already be spinning waiting for your vcpu lock.
+ */
+-static void pt_vcpu_lock(struct vcpu *v)
++static always_inline void pt_vcpu_lock(struct vcpu *v)
+ {
+ spin_lock(&v->arch.hvm.tm_lock);
+ }
+@@ -180,9 +180,13 @@ static void pt_vcpu_unlock(struct vcpu *v)
+ * need to take an additional lock that protects against pt->vcpu
+ * changing.
+ */
+-static void pt_lock(struct periodic_time *pt)
++static always_inline void pt_lock(struct periodic_time *pt)
+ {
+- read_lock(&pt->vcpu->domain->arch.hvm.pl_time->pt_migrate);
++ /*
++ * Use the speculation unsafe variant for the first lock, as the following
++ * lock taking helper already includes a speculation barrier.
++ */
++ _read_lock(&pt->vcpu->domain->arch.hvm.pl_time->pt_migrate);
+ spin_lock(&pt->vcpu->arch.hvm.tm_lock);
+ }
+
+diff --git a/xen/arch/x86/mm/mm-locks.h b/xen/arch/x86/mm/mm-locks.h
+index d6c073dc5cf5..cc635a440571 100644
+--- a/xen/arch/x86/mm/mm-locks.h
++++ b/xen/arch/x86/mm/mm-locks.h
+@@ -88,8 +88,8 @@ static inline void _set_lock_level(int l)
+ this_cpu(mm_lock_level) = l;
+ }
+
+-static inline void _mm_lock(const struct domain *d, mm_lock_t *l,
+- const char *func, int level, int rec)
++static always_inline void _mm_lock(const struct domain *d, mm_lock_t *l,
++ const char *func, int level, int rec)
+ {
+ if ( !((mm_locked_by_me(l)) && rec) )
+ _check_lock_level(d, level);
+@@ -139,8 +139,8 @@ static inline int mm_write_locked_by_me(mm_rwlock_t *l)
+ return (l->locker == get_processor_id());
+ }
+
+-static inline void _mm_write_lock(const struct domain *d, mm_rwlock_t *l,
+- const char *func, int level)
++static always_inline void _mm_write_lock(const struct domain *d, mm_rwlock_t *l,
++ const char *func, int level)
+ {
+ if ( !mm_write_locked_by_me(l) )
+ {
+@@ -151,6 +151,8 @@ static inline void _mm_write_lock(const struct domain *d, mm_rwlock_t *l,
+ l->unlock_level = _get_lock_level();
+ _set_lock_level(_lock_level(d, level));
+ }
++ else
++ block_speculation();
+ l->recurse_count++;
+ }
+
+@@ -164,8 +166,8 @@ static inline void mm_write_unlock(mm_rwlock_t *l)
+ percpu_write_unlock(p2m_percpu_rwlock, &l->lock);
+ }
+
+-static inline void _mm_read_lock(const struct domain *d, mm_rwlock_t *l,
+- int level)
++static always_inline void _mm_read_lock(const struct domain *d, mm_rwlock_t *l,
++ int level)
+ {
+ _check_lock_level(d, level);
+ percpu_read_lock(p2m_percpu_rwlock, &l->lock);
+@@ -180,15 +182,15 @@ static inline void mm_read_unlock(mm_rwlock_t *l)
+
+ /* This wrapper uses the line number to express the locking order below */
+ #define declare_mm_lock(name) \
+- static inline void mm_lock_##name(const struct domain *d, mm_lock_t *l, \
+- const char *func, int rec) \
++ static always_inline void mm_lock_##name( \
++ const struct domain *d, mm_lock_t *l, const char *func, int rec) \
+ { _mm_lock(d, l, func, MM_LOCK_ORDER_##name, rec); }
+ #define declare_mm_rwlock(name) \
+- static inline void mm_write_lock_##name(const struct domain *d, \
+- mm_rwlock_t *l, const char *func) \
++ static always_inline void mm_write_lock_##name( \
++ const struct domain *d, mm_rwlock_t *l, const char *func) \
+ { _mm_write_lock(d, l, func, MM_LOCK_ORDER_##name); } \
+- static inline void mm_read_lock_##name(const struct domain *d, \
+- mm_rwlock_t *l) \
++ static always_inline void mm_read_lock_##name(const struct domain *d, \
++ mm_rwlock_t *l) \
+ { _mm_read_lock(d, l, MM_LOCK_ORDER_##name); }
+ /* These capture the name of the calling function */
+ #define mm_lock(name, d, l) mm_lock_##name(d, l, __func__, 0)
+@@ -321,7 +323,7 @@ declare_mm_lock(altp2mlist)
+ #define MM_LOCK_ORDER_altp2m 40
+ declare_mm_rwlock(altp2m);
+
+-static inline void p2m_lock(struct p2m_domain *p)
++static always_inline void p2m_lock(struct p2m_domain *p)
+ {
+ if ( p2m_is_altp2m(p) )
+ mm_write_lock(altp2m, p->domain, &p->lock);
+diff --git a/xen/arch/x86/mm/p2m-pod.c b/xen/arch/x86/mm/p2m-pod.c
+index a3c9d8a97423..c82628840864 100644
+--- a/xen/arch/x86/mm/p2m-pod.c
++++ b/xen/arch/x86/mm/p2m-pod.c
+@@ -35,7 +35,7 @@
+ #define superpage_aligned(_x) (((_x)&(SUPERPAGE_PAGES-1))==0)
+
+ /* Enforce lock ordering when grabbing the "external" page_alloc lock */
+-static inline void lock_page_alloc(struct p2m_domain *p2m)
++static always_inline void lock_page_alloc(struct p2m_domain *p2m)
+ {
+ page_alloc_mm_pre_lock(p2m->domain);
+ spin_lock(&(p2m->domain->page_alloc_lock));
+diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c
+index da88ad141a69..e5f4e68b8819 100644
+--- a/xen/common/event_channel.c
++++ b/xen/common/event_channel.c
+@@ -57,7 +57,7 @@
+ * just assume the event channel is free or unbound at the moment when the
+ * evtchn_read_trylock() returns false.
+ */
+-static inline void evtchn_write_lock(struct evtchn *evtchn)
++static always_inline void evtchn_write_lock(struct evtchn *evtchn)
+ {
+ write_lock(&evtchn->lock);
+
+@@ -324,7 +324,8 @@ static int evtchn_alloc_unbound(evtchn_alloc_unbound_t *alloc)
+ return rc;
+ }
+
+-static void double_evtchn_lock(struct evtchn *lchn, struct evtchn *rchn)
++static always_inline void double_evtchn_lock(struct evtchn *lchn,
++ struct evtchn *rchn)
+ {
+ ASSERT(lchn != rchn);
+
+diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
+index 76272b3c8add..9464cebdd6e4 100644
+--- a/xen/common/grant_table.c
++++ b/xen/common/grant_table.c
+@@ -398,7 +398,7 @@ static inline void act_set_gfn(struct active_grant_entry *act, gfn_t gfn)
+
+ static DEFINE_PERCPU_RWLOCK_GLOBAL(grant_rwlock);
+
+-static inline void grant_read_lock(struct grant_table *gt)
++static always_inline void grant_read_lock(struct grant_table *gt)
+ {
+ percpu_read_lock(grant_rwlock, &gt->lock);
+ }
+@@ -408,7 +408,7 @@ static inline void grant_read_unlock(struct grant_table *gt)
+ percpu_read_unlock(grant_rwlock, &gt->lock);
+ }
+
+-static inline void grant_write_lock(struct grant_table *gt)
++static always_inline void grant_write_lock(struct grant_table *gt)
+ {
+ percpu_write_lock(grant_rwlock, &gt->lock);
+ }
+@@ -445,7 +445,7 @@ nr_active_grant_frames(struct grant_table *gt)
+ return num_act_frames_from_sha_frames(nr_grant_frames(gt));
+ }
+
+-static inline struct active_grant_entry *
++static always_inline struct active_grant_entry *
+ active_entry_acquire(struct grant_table *t, grant_ref_t e)
+ {
+ struct active_grant_entry *act;
+diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c
+index 03ace41540d6..9e80ad4c7463 100644
+--- a/xen/common/sched/core.c
++++ b/xen/common/sched/core.c
+@@ -348,23 +348,28 @@ uint64_t get_cpu_idle_time(unsigned int cpu)
+ * This avoids dead- or live-locks when this code is running on both
+ * cpus at the same time.
+ */
+-static void sched_spin_lock_double(spinlock_t *lock1, spinlock_t *lock2,
+- unsigned long *flags)
++static always_inline void sched_spin_lock_double(
++ spinlock_t *lock1, spinlock_t *lock2, unsigned long *flags)
+ {
++ /*
++ * In order to avoid extra overhead, use the locking primitives without the
++ * speculation barrier, and introduce a single barrier here.
++ */
+ if ( lock1 == lock2 )
+ {
+- spin_lock_irqsave(lock1, *flags);
++ *flags = _spin_lock_irqsave(lock1);
+ }
+ else if ( lock1 < lock2 )
+ {
+- spin_lock_irqsave(lock1, *flags);
+- spin_lock(lock2);
++ *flags = _spin_lock_irqsave(lock1);
++ _spin_lock(lock2);
+ }
+ else
+ {
+- spin_lock_irqsave(lock2, *flags);
+- spin_lock(lock1);
++ *flags = _spin_lock_irqsave(lock2);
++ _spin_lock(lock1);
+ }
++ block_lock_speculation();
+ }
+
+ static void sched_spin_unlock_double(spinlock_t *lock1, spinlock_t *lock2,
+diff --git a/xen/common/sched/private.h b/xen/common/sched/private.h
+index 0527a8c70d1c..24a93dd0c123 100644
+--- a/xen/common/sched/private.h
++++ b/xen/common/sched/private.h
+@@ -207,8 +207,24 @@ DECLARE_PER_CPU(cpumask_t, cpumask_scratch);
+ #define cpumask_scratch (&this_cpu(cpumask_scratch))
+ #define cpumask_scratch_cpu(c) (&per_cpu(cpumask_scratch, c))
+
++/*
++ * Deal with _spin_lock_irqsave() returning the flags value instead of storing
++ * it in a passed parameter.
++ */
++#define _sched_spinlock0(lock, irq) _spin_lock##irq(lock)
++#define _sched_spinlock1(lock, irq, arg) ({ \
++ BUILD_BUG_ON(sizeof(arg) != sizeof(unsigned long)); \
++ (arg) = _spin_lock##irq(lock); \
++})
++
++#define _sched_spinlock__(nr) _sched_spinlock ## nr
++#define _sched_spinlock_(nr) _sched_spinlock__(nr)
++#define _sched_spinlock(lock, irq, args...) \
++ _sched_spinlock_(count_args(args))(lock, irq, ## args)
++
+ #define sched_lock(kind, param, cpu, irq, arg...) \
+-static inline spinlock_t *kind##_schedule_lock##irq(param EXTRA_TYPE(arg)) \
++static always_inline spinlock_t \
++*kind##_schedule_lock##irq(param EXTRA_TYPE(arg)) \
+ { \
+ for ( ; ; ) \
+ { \
+@@ -220,10 +236,16 @@ static inline spinlock_t *kind##_schedule_lock##irq(param EXTRA_TYPE(arg)) \
+ * \
+ * It may also be the case that v->processor may change but the \
+ * lock may be the same; this will succeed in that case. \
++ * \
++ * Use the speculation unsafe locking helper, there's a speculation \
++ * barrier before returning to the caller. \
+ */ \
+- spin_lock##irq(lock, ## arg); \
++ _sched_spinlock(lock, irq, ## arg); \
+ if ( likely(lock == get_sched_res(cpu)->schedule_lock) ) \
++ { \
++ block_lock_speculation(); \
+ return lock; \
++ } \
+ spin_unlock##irq(lock, ## arg); \
+ } \
+ }
+diff --git a/xen/common/timer.c b/xen/common/timer.c
+index 1bb265ceea0e..dc831efc79e5 100644
+--- a/xen/common/timer.c
++++ b/xen/common/timer.c
+@@ -240,7 +240,7 @@ static inline void deactivate_timer(struct timer *timer)
+ list_add(&timer->inactive, &per_cpu(timers, timer->cpu).inactive);
+ }
+
+-static inline bool_t timer_lock(struct timer *timer)
++static inline bool_t timer_lock_unsafe(struct timer *timer)
+ {
+ unsigned int cpu;
+
+@@ -254,7 +254,8 @@ static inline bool_t timer_lock(struct timer *timer)
+ rcu_read_unlock(&timer_cpu_read_lock);
+ return 0;
+ }
+- spin_lock(&per_cpu(timers, cpu).lock);
++ /* Use the speculation unsafe variant, the wrapper has the barrier. */
++ _spin_lock(&per_cpu(timers, cpu).lock);
+ if ( likely(timer->cpu == cpu) )
+ break;
+ spin_unlock(&per_cpu(timers, cpu).lock);
+@@ -267,8 +268,9 @@ static inline bool_t timer_lock(struct timer *timer)
+ #define timer_lock_irqsave(t, flags) ({ \
+ bool_t __x; \
+ local_irq_save(flags); \
+- if ( !(__x = timer_lock(t)) ) \
++ if ( !(__x = timer_lock_unsafe(t)) ) \
+ local_irq_restore(flags); \
++ block_lock_speculation(); \
+ __x; \
+ })
+
+diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
+index 6fc27e7ede40..2fd663062ad5 100644
+--- a/xen/drivers/passthrough/pci.c
++++ b/xen/drivers/passthrough/pci.c
+@@ -52,9 +52,10 @@ struct pci_seg {
+
+ static spinlock_t _pcidevs_lock = SPIN_LOCK_UNLOCKED;
+
+-void pcidevs_lock(void)
++/* Do not use, as it has no speculation barrier, use pcidevs_lock() instead. */
++void pcidevs_lock_unsafe(void)
+ {
+- spin_lock_recursive(&_pcidevs_lock);
++ _spin_lock_recursive(&_pcidevs_lock);
+ }
+
+ void pcidevs_unlock(void)
+diff --git a/xen/include/asm-x86/irq.h b/xen/include/asm-x86/irq.h
+index 7c825e9d9c0a..d4b2beda798d 100644
+--- a/xen/include/asm-x86/irq.h
++++ b/xen/include/asm-x86/irq.h
+@@ -177,6 +177,7 @@ extern void irq_complete_move(struct irq_desc *);
+
+ extern struct irq_desc *irq_desc;
+
++/* Not speculation safe, only used for AP bringup. */
+ void lock_vector_lock(void);
+ void unlock_vector_lock(void);
+
+diff --git a/xen/include/xen/event.h b/xen/include/xen/event.h
+index 21c95e14fd6a..18924e69e7d0 100644
+--- a/xen/include/xen/event.h
++++ b/xen/include/xen/event.h
+@@ -105,12 +105,12 @@ void notify_via_xen_event_channel(struct domain *ld, int lport);
+ #define bucket_from_port(d, p) \
+ ((group_from_port(d, p))[((p) % EVTCHNS_PER_GROUP) / EVTCHNS_PER_BUCKET])
+
+-static inline void evtchn_read_lock(struct evtchn *evtchn)
++static always_inline void evtchn_read_lock(struct evtchn *evtchn)
+ {
+ read_lock(&evtchn->lock);
+ }
+
+-static inline bool evtchn_read_trylock(struct evtchn *evtchn)
++static always_inline bool evtchn_read_trylock(struct evtchn *evtchn)
+ {
+ return read_trylock(&evtchn->lock);
+ }
+diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h
+index ac3880e686f8..3f1324e5de92 100644
+--- a/xen/include/xen/pci.h
++++ b/xen/include/xen/pci.h
+@@ -147,8 +147,12 @@ struct pci_dev {
+ * devices, it also sync the access to the msi capability that is not
+ * interrupt handling related (the mask bit register).
+ */
+-
+-void pcidevs_lock(void);
++void pcidevs_lock_unsafe(void);
++static always_inline void pcidevs_lock(void)
++{
++ pcidevs_lock_unsafe();
++ block_lock_speculation();
++}
+ void pcidevs_unlock(void);
+ bool_t __must_check pcidevs_locked(void);
+
diff --git a/main/xen/xsa453-4.16-7.patch b/main/xen/xsa453-4.16-7.patch
new file mode 100644
index 00000000000..8a32529d99e
--- /dev/null
+++ b/main/xen/xsa453-4.16-7.patch
@@ -0,0 +1,61 @@
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Subject: x86/mm: add speculation barriers to open coded locks
+
+Add a speculation barrier to the clearly identified open-coded lock taking
+functions.
+
+Note that the memory sharing page_lock() replacement (_page_lock()) is left
+as-is, as the code is experimental and not security supported.
+
+This is part of XSA-453 / CVE-2024-2193
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit 42a572a38e22a97d86a4b648a22597628d5b42e4)
+
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index ea024c145034..2bf1b709851a 100644
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -2005,7 +2005,7 @@ static inline bool current_locked_page_ne_check(struct page_info *page) {
+ #define current_locked_page_ne_check(x) true
+ #endif
+
+-int page_lock(struct page_info *page)
++int page_lock_unsafe(struct page_info *page)
+ {
+ unsigned long x, nx;
+
+@@ -2066,7 +2066,7 @@ void page_unlock(struct page_info *page)
+ * l3t_lock(), so to avoid deadlock we must avoid grabbing them in
+ * reverse order.
+ */
+-static void l3t_lock(struct page_info *page)
++static always_inline void l3t_lock(struct page_info *page)
+ {
+ unsigned long x, nx;
+
+@@ -2075,6 +2075,8 @@ static void l3t_lock(struct page_info *page)
+ cpu_relax();
+ nx = x | PGT_locked;
+ } while ( cmpxchg(&page->u.inuse.type_info, x, nx) != x );
++
++ block_lock_speculation();
+ }
+
+ static void l3t_unlock(struct page_info *page)
+diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
+index cccef852b4de..73d5a98bec7e 100644
+--- a/xen/include/asm-x86/mm.h
++++ b/xen/include/asm-x86/mm.h
+@@ -393,7 +393,9 @@ const struct platform_bad_page *get_platform_badpages(unsigned int *array_size);
+ * The use of PGT_locked in mem_sharing does not collide, since mem_sharing is
+ * only supported for hvm guests, which do not have PV PTEs updated.
+ */
+-int page_lock(struct page_info *page);
++int page_lock_unsafe(struct page_info *page);
++#define page_lock(pg) lock_evaluate_nospec(page_lock_unsafe(pg))
++
+ void page_unlock(struct page_info *page);
+
+ void put_page_type(struct page_info *page);
diff --git a/main/xen/xsa453-4.16-8.patch b/main/xen/xsa453-4.16-8.patch
new file mode 100644
index 00000000000..9a134fbcab5
--- /dev/null
+++ b/main/xen/xsa453-4.16-8.patch
@@ -0,0 +1,201 @@
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Subject: x86: protect conditional lock taking from speculative execution
+
+Conditionally taken locks that use the pattern:
+
+if ( lock )
+ spin_lock(...);
+
+Need an else branch in order to issue an speculation barrier in the else case,
+just like it's done in case the lock needs to be acquired.
+
+eval_nospec() could be used on the condition itself, but that would result in a
+double barrier on the branch where the lock is taken.
+
+Introduce a new pair of helpers, {gfn,spin}_lock_if() that can be used to
+conditionally take a lock in a speculation safe way.
+
+This is part of XSA-453 / CVE-2024-2193
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+(cherry picked from commit 03cf7ca23e0e876075954c558485b267b7d02406)
+
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index 2bf1b709851a..16287e62af23 100644
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -5000,8 +5000,7 @@ static l3_pgentry_t *virt_to_xen_l3e(unsigned long v)
+ if ( !l3t )
+ return NULL;
+ UNMAP_DOMAIN_PAGE(l3t);
+- if ( locking )
+- spin_lock(&map_pgdir_lock);
++ spin_lock_if(locking, &map_pgdir_lock);
+ if ( !(l4e_get_flags(*pl4e) & _PAGE_PRESENT) )
+ {
+ l4_pgentry_t l4e = l4e_from_mfn(l3mfn, __PAGE_HYPERVISOR);
+@@ -5038,8 +5037,7 @@ static l2_pgentry_t *virt_to_xen_l2e(unsigned long v)
+ return NULL;
+ }
+ UNMAP_DOMAIN_PAGE(l2t);
+- if ( locking )
+- spin_lock(&map_pgdir_lock);
++ spin_lock_if(locking, &map_pgdir_lock);
+ if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) )
+ {
+ l3e_write(pl3e, l3e_from_mfn(l2mfn, __PAGE_HYPERVISOR));
+@@ -5077,8 +5075,7 @@ l1_pgentry_t *virt_to_xen_l1e(unsigned long v)
+ return NULL;
+ }
+ UNMAP_DOMAIN_PAGE(l1t);
+- if ( locking )
+- spin_lock(&map_pgdir_lock);
++ spin_lock_if(locking, &map_pgdir_lock);
+ if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) )
+ {
+ l2e_write(pl2e, l2e_from_mfn(l1mfn, __PAGE_HYPERVISOR));
+@@ -5109,6 +5106,8 @@ l1_pgentry_t *virt_to_xen_l1e(unsigned long v)
+ do { \
+ if ( locking ) \
+ l3t_lock(page); \
++ else \
++ block_lock_speculation(); \
+ } while ( false )
+
+ #define L3T_UNLOCK(page) \
+@@ -5324,8 +5323,7 @@ int map_pages_to_xen(
+ if ( l3e_get_flags(ol3e) & _PAGE_GLOBAL )
+ flush_flags |= FLUSH_TLB_GLOBAL;
+
+- if ( locking )
+- spin_lock(&map_pgdir_lock);
++ spin_lock_if(locking, &map_pgdir_lock);
+ if ( (l3e_get_flags(*pl3e) & _PAGE_PRESENT) &&
+ (l3e_get_flags(*pl3e) & _PAGE_PSE) )
+ {
+@@ -5429,8 +5427,7 @@ int map_pages_to_xen(
+ if ( l2e_get_flags(*pl2e) & _PAGE_GLOBAL )
+ flush_flags |= FLUSH_TLB_GLOBAL;
+
+- if ( locking )
+- spin_lock(&map_pgdir_lock);
++ spin_lock_if(locking, &map_pgdir_lock);
+ if ( (l2e_get_flags(*pl2e) & _PAGE_PRESENT) &&
+ (l2e_get_flags(*pl2e) & _PAGE_PSE) )
+ {
+@@ -5471,8 +5468,7 @@ int map_pages_to_xen(
+ unsigned long base_mfn;
+ const l1_pgentry_t *l1t;
+
+- if ( locking )
+- spin_lock(&map_pgdir_lock);
++ spin_lock_if(locking, &map_pgdir_lock);
+
+ ol2e = *pl2e;
+ /*
+@@ -5526,8 +5522,7 @@ int map_pages_to_xen(
+ unsigned long base_mfn;
+ const l2_pgentry_t *l2t;
+
+- if ( locking )
+- spin_lock(&map_pgdir_lock);
++ spin_lock_if(locking, &map_pgdir_lock);
+
+ ol3e = *pl3e;
+ /*
+@@ -5671,8 +5666,7 @@ int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int nf)
+ l3e_get_flags(*pl3e)));
+ UNMAP_DOMAIN_PAGE(l2t);
+
+- if ( locking )
+- spin_lock(&map_pgdir_lock);
++ spin_lock_if(locking, &map_pgdir_lock);
+ if ( (l3e_get_flags(*pl3e) & _PAGE_PRESENT) &&
+ (l3e_get_flags(*pl3e) & _PAGE_PSE) )
+ {
+@@ -5731,8 +5725,7 @@ int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int nf)
+ l2e_get_flags(*pl2e) & ~_PAGE_PSE));
+ UNMAP_DOMAIN_PAGE(l1t);
+
+- if ( locking )
+- spin_lock(&map_pgdir_lock);
++ spin_lock_if(locking, &map_pgdir_lock);
+ if ( (l2e_get_flags(*pl2e) & _PAGE_PRESENT) &&
+ (l2e_get_flags(*pl2e) & _PAGE_PSE) )
+ {
+@@ -5776,8 +5769,7 @@ int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int nf)
+ */
+ if ( (nf & _PAGE_PRESENT) || ((v != e) && (l1_table_offset(v) != 0)) )
+ continue;
+- if ( locking )
+- spin_lock(&map_pgdir_lock);
++ spin_lock_if(locking, &map_pgdir_lock);
+
+ /*
+ * L2E may be already cleared, or set to a superpage, by
+@@ -5824,8 +5816,7 @@ int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int nf)
+ if ( (nf & _PAGE_PRESENT) ||
+ ((v != e) && (l2_table_offset(v) + l1_table_offset(v) != 0)) )
+ continue;
+- if ( locking )
+- spin_lock(&map_pgdir_lock);
++ spin_lock_if(locking, &map_pgdir_lock);
+
+ /*
+ * L3E may be already cleared, or set to a superpage, by
+diff --git a/xen/arch/x86/mm/mm-locks.h b/xen/arch/x86/mm/mm-locks.h
+index cc635a440571..7eee233b4cef 100644
+--- a/xen/arch/x86/mm/mm-locks.h
++++ b/xen/arch/x86/mm/mm-locks.h
+@@ -347,6 +347,15 @@ static inline void p2m_unlock(struct p2m_domain *p)
+ #define p2m_locked_by_me(p) mm_write_locked_by_me(&(p)->lock)
+ #define gfn_locked_by_me(p,g) p2m_locked_by_me(p)
+
++static always_inline void gfn_lock_if(bool condition, struct p2m_domain *p2m,
++ gfn_t gfn, unsigned int order)
++{
++ if ( condition )
++ gfn_lock(p2m, gfn, order);
++ else
++ block_lock_speculation();
++}
++
+ /* PoD lock (per-p2m-table)
+ *
+ * Protects private PoD data structs: entry and cache
+diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
+index 2d41446a6902..ddd2f861c3c7 100644
+--- a/xen/arch/x86/mm/p2m.c
++++ b/xen/arch/x86/mm/p2m.c
+@@ -514,9 +514,8 @@ mfn_t __get_gfn_type_access(struct p2m_domain *p2m, unsigned long gfn_l,
+ if ( q & P2M_UNSHARE )
+ q |= P2M_ALLOC;
+
+- if ( locked )
+- /* Grab the lock here, don't release until put_gfn */
+- gfn_lock(p2m, gfn, 0);
++ /* Grab the lock here, don't release until put_gfn */
++ gfn_lock_if(locked, p2m, gfn, 0);
+
+ mfn = p2m->get_entry(p2m, gfn, t, a, q, page_order, NULL);
+
+diff --git a/xen/include/xen/spinlock.h b/xen/include/xen/spinlock.h
+index efdb21ea9072..8bffb3f4b610 100644
+--- a/xen/include/xen/spinlock.h
++++ b/xen/include/xen/spinlock.h
+@@ -216,6 +216,14 @@ static always_inline void spin_lock_irq(spinlock_t *l)
+ block_lock_speculation(); \
+ })
+
++/* Conditionally take a spinlock in a speculation safe way. */
++static always_inline void spin_lock_if(bool condition, spinlock_t *l)
++{
++ if ( condition )
++ _spin_lock(l);
++ block_lock_speculation();
++}
++
+ #define spin_unlock(l) _spin_unlock(l)
+ #define spin_unlock_irq(l) _spin_unlock_irq(l)
+ #define spin_unlock_irqrestore(l, f) _spin_unlock_irqrestore(l, f)