aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Néri <dne+alpine@mayonnaise.net>2021-09-11 01:45:41 +0200
committerDaniel Néri <dne+alpine@mayonnaise.net>2021-09-11 01:45:41 +0200
commitdc645aa6c8102d8b2ea5b21290e837cd91031e8e (patch)
tree4df8c040ca520a6133ad82879a7c745804c5a3cf
parentafe5463476592d299dedff91476c87c85a110bf4 (diff)
downloadaports-dc645aa6c8102d8b2ea5b21290e837cd91031e8e.tar.gz
aports-dc645aa6c8102d8b2ea5b21290e837cd91031e8e.tar.bz2
aports-dc645aa6c8102d8b2ea5b21290e837cd91031e8e.tar.xz
main/xen: upgrade to 4.13.4
-rw-r--r--main/xen/0001-xen-arm-Create-dom0less-domUs-earlier.patch83
-rw-r--r--main/xen/0002-xen-arm-Boot-modules-should-always-be-scrubbed-if-bo.patch58
-rw-r--r--main/xen/APKBUILD69
-rw-r--r--main/xen/xen.git-e06d0c113e0067b86186db94aabae9c91aa09f35.patch117
-rw-r--r--main/xen/xsa373-4.13-1.patch120
-rw-r--r--main/xen/xsa373-4.13-2.patch95
-rw-r--r--main/xen/xsa373-4.13-3.patch163
-rw-r--r--main/xen/xsa373-4.13-4.patch86
-rw-r--r--main/xen/xsa373-4.13-5.patch145
-rw-r--r--main/xen/xsa375-4.13.patch50
-rw-r--r--main/xen/xsa377.patch27
-rw-r--r--main/xen/xsa378-4.13-0a.patch77
-rw-r--r--main/xen/xsa378-4.13-0b.patch62
-rw-r--r--main/xen/xsa378-4.13-0c.patch59
-rw-r--r--main/xen/xsa378-4.13-1.patch142
-rw-r--r--main/xen/xsa378-4.13-2.patch218
-rw-r--r--main/xen/xsa378-4.13-3.patch102
-rw-r--r--main/xen/xsa378-4.13-4.patch385
-rw-r--r--main/xen/xsa378-4.13-5.patch208
-rw-r--r--main/xen/xsa378-4.13-6.patch411
-rw-r--r--main/xen/xsa378-4.13-7.patch88
-rw-r--r--main/xen/xsa378-4.13-8.patch157
-rw-r--r--main/xen/xsa379-4.14.patch77
-rw-r--r--main/xen/xsa380-4.13-1.patch148
-rw-r--r--main/xen/xsa380-4.13-2.patch383
-rw-r--r--main/xen/xsa382.patch34
-rw-r--r--main/xen/xsa383.patch55
-rw-r--r--main/xen/xsa384-4.14.patch79
28 files changed, 3 insertions, 3695 deletions
diff --git a/main/xen/0001-xen-arm-Create-dom0less-domUs-earlier.patch b/main/xen/0001-xen-arm-Create-dom0less-domUs-earlier.patch
deleted file mode 100644
index a5289a821a..0000000000
--- a/main/xen/0001-xen-arm-Create-dom0less-domUs-earlier.patch
+++ /dev/null
@@ -1,83 +0,0 @@
-From f98c20aaaf909be04ada5cb6cb88c14b9bc75e15 Mon Sep 17 00:00:00 2001
-From: Julien Grall <jgrall@amazon.com>
-Date: Mon, 17 May 2021 17:47:13 +0100
-Subject: [PATCH 1/2] xen/arm: Create dom0less domUs earlier
-
-In a follow-up patch we will need to unallocate the boot modules
-before heap_init_late() is called.
-
-The modules will contain the domUs kernel and initramfs. Therefore Xen
-will need to create extra domUs (used by dom0less) before heap_init_late().
-
-This has two consequences on dom0less:
- 1) Domains will not be unpaused as soon as they are created but
- once all have been created. However, Xen doesn't guarantee an order
- to unpause, so this is not something one could rely on.
-
- 2) The memory allocated for a domU will not be scrubbed anymore when an
- admin select bootscrub=on. This is not something we advertised, but if
- this is a concern we can introduce either force scrub for all domUs or
- a per-domain flag in the DT. The behavior for bootscrub=off and
- bootscrub=idle (default) has not changed.
-
-This is part of XSA-372 / CVE-2021-28693.
-
-Signed-off-by: Julien Grall <jgrall@amazon.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
-Tested-by: Stefano Stabellini <sstabellini@kernel.org>
----
- xen/arch/arm/domain_build.c | 2 --
- xen/arch/arm/setup.c | 9 +++++----
- 2 files changed, 5 insertions(+), 6 deletions(-)
-
-diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
-index e824ba34b012..b07461f5d376 100644
---- a/xen/arch/arm/domain_build.c
-+++ b/xen/arch/arm/domain_build.c
-@@ -2515,8 +2515,6 @@ void __init create_domUs(void)
-
- if ( construct_domU(d, node) != 0 )
- panic("Could not set up domain %s\n", dt_node_name(node));
--
-- domain_unpause_by_systemcontroller(d);
- }
- }
-
-diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c
-index 7968cee47d05..1f26080b30bf 100644
---- a/xen/arch/arm/setup.c
-+++ b/xen/arch/arm/setup.c
-@@ -779,7 +779,7 @@ void __init start_xen(unsigned long boot_phys_offset,
- int cpus, i;
- const char *cmdline;
- struct bootmodule *xen_bootmodule;
-- struct domain *dom0;
-+ struct domain *dom0, *d;
- struct xen_domctl_createdomain dom0_cfg = {
- .flags = XEN_DOMCTL_CDF_hvm | XEN_DOMCTL_CDF_hap,
- .max_evtchn_port = -1,
-@@ -962,6 +962,8 @@ void __init start_xen(unsigned long boot_phys_offset,
- if ( construct_dom0(dom0) != 0)
- panic("Could not set up DOM0 guest OS\n");
-
-+ create_domUs();
-+
- heap_init_late();
-
- init_trace_bufs();
-@@ -975,9 +977,8 @@ void __init start_xen(unsigned long boot_phys_offset,
-
- system_state = SYS_STATE_active;
-
-- create_domUs();
--
-- domain_unpause_by_systemcontroller(dom0);
-+ for_each_domain( d )
-+ domain_unpause_by_systemcontroller(d);
-
- /* Switch on to the dynamically allocated stack for the idle vcpu
- * since the static one we're running on is about to be freed. */
---
-2.17.1
-
diff --git a/main/xen/0002-xen-arm-Boot-modules-should-always-be-scrubbed-if-bo.patch b/main/xen/0002-xen-arm-Boot-modules-should-always-be-scrubbed-if-bo.patch
deleted file mode 100644
index 3ed62f360e..0000000000
--- a/main/xen/0002-xen-arm-Boot-modules-should-always-be-scrubbed-if-bo.patch
+++ /dev/null
@@ -1,58 +0,0 @@
-From e7e475c1a3dc6b149252413589eebaa4ae138824 Mon Sep 17 00:00:00 2001
-From: Julien Grall <jgrall@amazon.com>
-Date: Sat, 17 Apr 2021 17:38:28 +0100
-Subject: [PATCH 2/2] xen/arm: Boot modules should always be scrubbed if
- bootscrub={on, idle}
-
-The function to initialize the pages (see init_heap_pages()) will request
-scrub when the admin request idle bootscrub (default) and state ==
-SYS_STATE_active. When bootscrub=on, Xen will scrub any free pages in
-heap_init_late().
-
-Currently, the boot modules (e.g. kernels, initramfs) will be discarded/
-freed after heap_init_late() is called and system_state switched to
-SYS_STATE_active. This means the pages associated with the boot modules
-will not get scrubbed before getting re-purposed.
-
-If the memory is assigned to an untrusted domU, it may be able to
-retrieve secrets from the modules.
-
-This is part of XSA-372 / CVE-2021-28693.
-
-Fixes: 1774e9b1df27 ("xen/arm: introduce create_domUs")
-Signed-off-by: Julien Grall <jgrall@amazon.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
-Tested-by: Stefano Stabellini <sstabellini@kernel.org>
----
- xen/arch/arm/setup.c | 7 ++++++-
- 1 file changed, 6 insertions(+), 1 deletion(-)
-
-diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c
-index 1f26080b30bf..34b1c1a11ef6 100644
---- a/xen/arch/arm/setup.c
-+++ b/xen/arch/arm/setup.c
-@@ -75,7 +75,6 @@ static __used void init_done(void)
- /* Must be done past setting system_state. */
- unregister_init_virtual_region();
-
-- discard_initial_modules();
- free_init_memory();
- startup_cpu_idle_loop();
- }
-@@ -964,6 +963,12 @@ void __init start_xen(unsigned long boot_phys_offset,
-
- create_domUs();
-
-+ /*
-+ * This needs to be called **before** heap_init_late() so modules
-+ * will be scrubbed (unless suppressed).
-+ */
-+ discard_initial_modules();
-+
- heap_init_late();
-
- init_trace_bufs();
---
-2.17.1
-
diff --git a/main/xen/APKBUILD b/main/xen/APKBUILD
index 6b6cb3e732..5054641f7f 100644
--- a/main/xen/APKBUILD
+++ b/main/xen/APKBUILD
@@ -1,8 +1,8 @@
# Contributor: Roger Pau Monne <roger.pau@entel.upc.edu>
# Maintainer: Natanael Copa <ncopa@alpinelinux.org>
pkgname=xen
-pkgver=4.13.3
-pkgrel=3
+pkgver=4.13.4
+pkgrel=0
pkgdesc="Xen hypervisor"
url="https://www.xenproject.org/"
arch="x86_64 armhf aarch64" # enable armv7 when builds with gcc8
@@ -292,42 +292,6 @@ source="https://downloads.xenproject.org/release/xen/$pkgver/xen-$pkgver.tar.gz
stubdom-hack.patch
- 0001-xen-arm-Create-dom0less-domUs-earlier.patch
- 0002-xen-arm-Boot-modules-should-always-be-scrubbed-if-bo.patch
-
- xsa373-4.13-1.patch
- xsa373-4.13-2.patch
- xsa373-4.13-3.patch
- xsa373-4.13-4.patch
- xsa373-4.13-5.patch
-
- xsa375-4.13.patch
-
- xsa377.patch
-
- xen.git-e06d0c113e0067b86186db94aabae9c91aa09f35.patch
-
- xsa378-4.13-0a.patch
- xsa378-4.13-0b.patch
- xsa378-4.13-0c.patch
- xsa378-4.13-1.patch
- xsa378-4.13-2.patch
- xsa378-4.13-3.patch
- xsa378-4.13-4.patch
- xsa378-4.13-5.patch
- xsa378-4.13-6.patch
- xsa378-4.13-7.patch
- xsa378-4.13-8.patch
-
- xsa379-4.14.patch
-
- xsa380-4.13-1.patch
- xsa380-4.13-2.patch
-
- xsa382.patch
- xsa383.patch
- xsa384-4.14.patch
-
hotplug-Linux-iscsi-block-handle-lun-1.patch
drop-test.py.patch
@@ -557,7 +521,7 @@ EOF
}
sha512sums="
-622127d824b9c49b57282a887fb404e0bad05ff60bccade82e4e0e9b5ad975ff9aa1fba83392e6d8379e9a15340e8ae9785c0913eb11027816e4600432eea6b6 xen-4.13.3.tar.gz
+1f6d67e0270b10be45b6444322ced791b44df09a3a51e0fe690f5ad76cd80d35115efc93056e99f73b4e550178e0e780c9ee827ced04b09caf12fdf34d9a9b71 xen-4.13.4.tar.gz
2e0b0fd23e6f10742a5517981e5171c6e88b0a93c83da701b296f5c0861d72c19782daab589a7eac3f9032152a0fc7eff7f5362db8fccc4859564a9aa82329cf gmp-4.3.2.tar.bz2
c2bc9ffc8583aeae71cee9ddcc4418969768d4e3764d47307da54f93981c0109fb07d84b061b3a3628bd00ba4d14a54742bc04848110eb3ae8ca25dbfbaabadb grub-0.97.tar.gz
1465b58279af1647f909450e394fe002ca165f0ff4a0254bfa9fe0e64316f50facdde2729d79a4e632565b4500cf4d6c74192ac0dd3bc9fe09129bbd67ba089d lwip-1.3.0.tar.gz
@@ -577,33 +541,6 @@ f095ea373f36381491ad36f0662fb4f53665031973721256b23166e596318581da7cbb0146d0beb2
e76816c6ad0e91dc5f81947f266da3429b20e6d976c3e8c41202c6179532eec878a3f0913921ef3ac853c5dbad8082da3c9cd53b65081910516feb492577b7fc xen-fd-is-file.c
2094ea964fa610b2bf72fd2c7ede7e954899a75c0f5b08030cf1d74460fb759ade84866176e32f8fe29c921dfdc6dafd2b31e23ab9b0a3874d3dceeabdd1913b xenqemu-xattr-size-max.patch
6c28470dab368ce94d94db9e66954e4d915394ea730f6d4abb198ae122dbd7412453d6d8054f0a348d43d7f807fb13294363162f8b19f47311e802ffa9a40a90 stubdom-hack.patch
-57bae240ac94fd35e8a2a39a06fdc4178a1cf0782832a77fd768ca3c773d8b27d76692703ac481733874e5a0198ef20d7319ea504c6b7836d4edd0a198adede1 0001-xen-arm-Create-dom0less-domUs-earlier.patch
-2b47e612c23c8bb65a2432f93a877f592b75b8de2ae97d5a22ed37588594a38b740f5c3e0694dd7ceff5f949e24ff38113e543038d5ae22e8c1dc142c3e8d1b3 0002-xen-arm-Boot-modules-should-always-be-scrubbed-if-bo.patch
-7010225962e7c22d6aa2e14d10e5091b3876a76f195e9725e7f175b108f933ea9ad5a080663d27279ccd20e2d4e344620ec414e17437d971a8f3cb9420520696 xsa373-4.13-1.patch
-682476c1e44590268c5f84b96a15a44942ec73a54748264b2879ac7ffdd36336db0fa5b51659de3368c9bc6d12e8ecc551761d04f08301e5055d117ae7430475 xsa373-4.13-2.patch
-bb04c86c57058b674237d6d81b8a5a600e39e6c2144ae72b7312ee7e72d4305c5fa4b8d5194a0aecd5631e66fcd2165208a821a1fb7034c0c413ae1b1a5525d4 xsa373-4.13-3.patch
-1c93e62bfeb8ed0d5fe6db10baebc00cf54f7a6e2255f53e2770220db86c69fe46dd2fac17502d9da2109a60c93d8703b9bb618977cfe0e9919659f133f87c8d xsa373-4.13-4.patch
-8fb77d16b60efa4307c0008c8773a9d5341f1b0577c6de46fe6e5630a7243c7b2eb55089a1ce778e4ed03ebf29fad69042746121b50cb953016e95a60549a728 xsa373-4.13-5.patch
-9e354ab79cc182ca71c1d60be18b207c0254f35cf89f5020791d98a081bafc0a84ae7320ceb9c6215ccc4846e2daa258f72f577268bda84f5c7153e0bc03cabb xsa375-4.13.patch
-9c104793facd9d595a1cbca21034d700e7e25398cad1440131258a349cd60d6145e5847e9c4bd066a5d63a63aceb8995456126a51b6d3ca872cd90717ebc2dbe xsa377.patch
-6640e6c1373b1a90a5d0ff8a7f21bf98afa35cfb668b68ce53d83f4b6f4e912c7dec818025c753204301b3e2113fa69d17be0b785ed4cd458fe890c121d6ae2f xen.git-e06d0c113e0067b86186db94aabae9c91aa09f35.patch
-b8feb4579e422c6ebe16e9f66bedf710151e745c84c75574f985f9a4ca655510955b05da195b584dff4af320e65093d75ac4e5a300989cd225037bf986cf0f73 xsa378-4.13-0a.patch
-4f80e767621cd2b0d2c5d1b650e5d85753bfd97aba972398ca5991e6da093e2f7c1fa3eae8b8dec27826b2a90a4970b4eb3926cf76aa88d1d13a34041698a322 xsa378-4.13-0b.patch
-d59ad3bf524261e58a7abb495e9723142e5c1ece0d2d0022167abec182dfc3cd77d3572ca29381c2f6eaa21ebfbde603daf4cbd2d6e33a3c5d6eaafa46353f5f xsa378-4.13-0c.patch
-d143906731257c5cb551daf73dfe3b711d6f36eeaf2078c3ea95f438d750248e36fcd015a53e02a426ce850fbe933ee5844301dc405a2b2a36f341cbc7a1da75 xsa378-4.13-1.patch
-8429fac01762219afa06b7b3cb5e53e9363f28f4e9cdabeed7523161a47188b8a86f8d80d1a9d78c5e5677bec6685ce436cc47d06b67e0cd4e816e726618dc0f xsa378-4.13-2.patch
-69281c4f5d06cc085f06d107dd430b61506c7a3fb03025604b716388f1c944427736c2adabe10d6cdb7f40d53df1f0722ece2f4a7666f898bbcd362cdee25b79 xsa378-4.13-3.patch
-c35b4397fd4999331262a2f2439f3d4ff80d3efbd479f192fc644cff31b5f1e968cfd785191158262db4cfcbaf7c862c5d26b78ec5e53b95cc3bad48d21d3a32 xsa378-4.13-4.patch
-2ab2a8e934d0a40484350da17d7f65f0ae34b80574af48ebb23e66d386f51a8e8e66f3b9b1a2093bc8cf96f379b54c8a7cd874b4b3a5efcfcc0b43ecc3546a75 xsa378-4.13-5.patch
-c27c9bead789347824b41450c78bd1287f04996cdc0342ae7b057b20d1ee98d10443a08fddaa8102dffad30900cebbd0927311298fbecfee2d6689908c49f7af xsa378-4.13-6.patch
-9632bda9f463f0fb7a0470a01aca67c8b622e7820e9bd905d5cd15eb57d4ad075be1f6a4ccac4fe6de3a81591871a899a10b7535fafcc4245c9f5647ac905924 xsa378-4.13-7.patch
-0b880a5cee5e50563252448ec5f2e4da0c70bd9d710b871d5319787ef37121521d66e32987cf24a52a2f78785f8b92367d929c3752ffff4ca06448a99d611925 xsa378-4.13-8.patch
-3305caa9bde065962203b8c6b766d67c0fb19048116d5785a7734fad8a9cab87fde71b31192bdcc5e13277d3530c2789f52c88d1d7491638a6b8edcb5241839d xsa379-4.14.patch
-e8284587f60e15cb34db381924e7833e634e9a9320fc206f6ecf411ee5c586a2f1efba0972dc4ba089b888402f9ff919c9b0493ddd8f996f226ba0d063d5efc0 xsa380-4.13-1.patch
-277952033c67001883ed88fb8440c2b44c521e5b869bf6efa171f568ccfc13400ef341780bd6eba587ec484e455cbc57ba5e1186faa48691942a70790ee4f7cb xsa380-4.13-2.patch
-6c5e3388fcfb0dcae30d5f315bf95d263c82519d2cbf2a8a88d280b5b0b1c1ed4cce7a1a85fabbf57c785ad9dc23e8e5e4773c631c00e036aada604ff8e7fa03 xsa382.patch
-d5106df26e6c4512d88ea6748c403117a2b61cb40f6d6c08a76f160352b79f94dd67cbb3419a33f2c6cfc7bbd644baed0498e366a6bf00d8031df728a47f36ea xsa383.patch
-c07bcd765a9a89905ba9c01b063e33ed14376e9a3c1a7d596d5426fce567719d66ecf80cad16e434f91b9dfc3dccb705e6b265f8da80729b10310308a0d19a37 xsa384-4.14.patch
8c9cfc6afca325df1d8026e21ed03fa8cd2c7e1a21a56cc1968301c5ab634bfe849951899e75d328951d7a41273d1e49a2448edbadec0029ed410c43c0549812 hotplug-Linux-iscsi-block-handle-lun-1.patch
61f66bab603778fb41bfe8e85320c15f2bf3e5d8583e077b56a93784dbdb9b2c7c5e55ce18f06b87501429086f8410d102d3ed5f2a77d54bcfa328bc07681f4d drop-test.py.patch
8cb12dbfc05a53898a97d47d71ab6b8a6f81c5e5579fd765b37303faea95c645cb8dedc05e3d064bdf070e93814e00bf8939767acc1127513375bab0fe2f4436 py3-compat.patch
diff --git a/main/xen/xen.git-e06d0c113e0067b86186db94aabae9c91aa09f35.patch b/main/xen/xen.git-e06d0c113e0067b86186db94aabae9c91aa09f35.patch
deleted file mode 100644
index e36de31acf..0000000000
--- a/main/xen/xen.git-e06d0c113e0067b86186db94aabae9c91aa09f35.patch
+++ /dev/null
@@ -1,117 +0,0 @@
-From e06d0c113e0067b86186db94aabae9c91aa09f35 Mon Sep 17 00:00:00 2001
-From: Jan Beulich <jbeulich@suse.com>
-Date: Thu, 15 Jul 2021 09:32:21 +0200
-Subject: [PATCH] VT-d: adjust domid map updating when unmapping context
-
-When an earlier error occurred, cleaning up the domid mapping data is
-wrong, as references likely still exist. The only exception to this is
-when the actual unmapping worked, but some flush failed (supposedly
-impossible after XSA-373). The guest will get crashed in such a case
-though, so add fallback cleanup to domain destruction to cover this
-case. This in turn makes it desirable to silence the dprintk() in
-domain_iommu_domid().
-
-Note that no error will be returned anymore when the lookup fails - in
-the common case lookup failure would already have caused
-domain_context_unmap_one() to fail, yet even from a more general
-perspective it doesn't look right to fail domain_context_unmap() in such
-a case when this was the last device, but not when any earlier unmap was
-otherwise successful.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Kevin Tian <kevin.tian@intel.com>
-master commit: 32655880057ce2829f962d46916ea6cec60f98d3
-master date: 2021-06-24 16:29:13 +0200
----
- xen/drivers/passthrough/vtd/iommu.c | 39 ++++++++++++++++++-----------
- 1 file changed, 24 insertions(+), 15 deletions(-)
-
-diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c
-index 7d1813a615..e4c0e4368e 100644
---- a/xen/drivers/passthrough/vtd/iommu.c
-+++ b/xen/drivers/passthrough/vtd/iommu.c
-@@ -79,9 +79,11 @@ static int domain_iommu_domid(struct domain *d,
- i = find_next_bit(iommu->domid_bitmap, nr_dom, i+1);
- }
-
-- dprintk(XENLOG_ERR VTDPREFIX,
-- "Cannot get valid iommu domid: domid=%d iommu->index=%d\n",
-- d->domain_id, iommu->index);
-+ if ( !d->is_dying )
-+ dprintk(XENLOG_ERR VTDPREFIX,
-+ "Cannot get valid iommu %u domid: %pd\n",
-+ iommu->index, d);
-+
- return -1;
- }
-
-@@ -146,6 +148,17 @@ static int context_get_domain_id(struct context_entry *context,
- return domid;
- }
-
-+static void cleanup_domid_map(struct domain *domain, struct vtd_iommu *iommu)
-+{
-+ int iommu_domid = domain_iommu_domid(domain, iommu);
-+
-+ if ( iommu_domid >= 0 )
-+ {
-+ clear_bit(iommu_domid, iommu->domid_bitmap);
-+ iommu->domid_map[iommu_domid] = 0;
-+ }
-+}
-+
- static int iommus_incoherent;
-
- static void sync_cache(const void *addr, unsigned int size)
-@@ -1753,6 +1766,9 @@ static int domain_context_unmap(struct domain *domain, u8 devfn,
- goto out;
- }
-
-+ if ( ret )
-+ goto out;
-+
- /*
- * if no other devices under the same iommu owned by this domain,
- * clear iommu in iommu_bitmap and clear domain_id in domid_bitmp
-@@ -1772,19 +1788,8 @@ static int domain_context_unmap(struct domain *domain, u8 devfn,
-
- if ( found == 0 )
- {
-- int iommu_domid;
--
- clear_bit(iommu->index, &dom_iommu(domain)->arch.iommu_bitmap);
--
-- iommu_domid = domain_iommu_domid(domain, iommu);
-- if ( iommu_domid == -1 )
-- {
-- ret = -EINVAL;
-- goto out;
-- }
--
-- clear_bit(iommu_domid, iommu->domid_bitmap);
-- iommu->domid_map[iommu_domid] = 0;
-+ cleanup_domid_map(domain, iommu);
- }
-
- out:
-@@ -1795,6 +1800,7 @@ static void iommu_domain_teardown(struct domain *d)
- {
- struct domain_iommu *hd = dom_iommu(d);
- struct mapped_rmrr *mrmrr, *tmp;
-+ const struct acpi_drhd_unit *drhd;
-
- if ( list_empty(&acpi_drhd_units) )
- return;
-@@ -1814,6 +1820,9 @@ static void iommu_domain_teardown(struct domain *d)
- iommu_free_pagetable(hd->arch.pgd_maddr, agaw_to_level(hd->arch.agaw));
- hd->arch.pgd_maddr = 0;
- spin_unlock(&hd->arch.mapping_lock);
-+
-+ for_each_drhd_unit ( drhd )
-+ cleanup_domid_map(d, drhd->iommu);
- }
-
- static int __must_check intel_iommu_map_page(struct domain *d, dfn_t dfn,
---
-2.30.2
-
diff --git a/main/xen/xsa373-4.13-1.patch b/main/xen/xsa373-4.13-1.patch
deleted file mode 100644
index ee5229a11c..0000000000
--- a/main/xen/xsa373-4.13-1.patch
+++ /dev/null
@@ -1,120 +0,0 @@
-From: Jan Beulich <jbeulich@suse.com>
-Subject: VT-d: size qinval queue dynamically
-
-With the present synchronous model, we need two slots for every
-operation (the operation itself and a wait descriptor). There can be
-one such pair of requests pending per CPU. To ensure that under all
-normal circumstances a slot is always available when one is requested,
-size the queue ring according to the number of present CPUs.
-
-This is part of XSA-373 / CVE-2021-28692.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Paul Durrant <paul@xen.org>
-
---- a/xen/drivers/passthrough/vtd/iommu.h
-+++ b/xen/drivers/passthrough/vtd/iommu.h
-@@ -450,17 +450,9 @@ struct qinval_entry {
- }q;
- };
-
--/* Order of queue invalidation pages(max is 8) */
--#define QINVAL_PAGE_ORDER 2
--
--#define QINVAL_ARCH_PAGE_ORDER (QINVAL_PAGE_ORDER + PAGE_SHIFT_4K - PAGE_SHIFT)
--#define QINVAL_ARCH_PAGE_NR ( QINVAL_ARCH_PAGE_ORDER < 0 ? \
-- 1 : \
-- 1 << QINVAL_ARCH_PAGE_ORDER )
--
- /* Each entry is 16 bytes, so 2^8 entries per page */
- #define QINVAL_ENTRY_ORDER ( PAGE_SHIFT - 4 )
--#define QINVAL_ENTRY_NR (1 << (QINVAL_PAGE_ORDER + 8))
-+#define QINVAL_MAX_ENTRY_NR (1u << (7 + QINVAL_ENTRY_ORDER))
-
- /* Status data flag */
- #define QINVAL_STAT_INIT 0
---- a/xen/drivers/passthrough/vtd/qinval.c
-+++ b/xen/drivers/passthrough/vtd/qinval.c
-@@ -31,6 +31,9 @@
-
- #define VTD_QI_TIMEOUT 1
-
-+static unsigned int __read_mostly qi_pg_order;
-+static unsigned int __read_mostly qi_entry_nr;
-+
- static int __must_check invalidate_sync(struct vtd_iommu *iommu);
-
- static void print_qi_regs(struct vtd_iommu *iommu)
-@@ -55,7 +58,7 @@ static unsigned int qinval_next_index(st
- tail >>= QINVAL_INDEX_SHIFT;
-
- /* (tail+1 == head) indicates a full queue, wait for HW */
-- while ( ( tail + 1 ) % QINVAL_ENTRY_NR ==
-+ while ( ((tail + 1) & (qi_entry_nr - 1)) ==
- ( dmar_readq(iommu->reg, DMAR_IQH_REG) >> QINVAL_INDEX_SHIFT ) )
- cpu_relax();
-
-@@ -68,7 +71,7 @@ static void qinval_update_qtail(struct v
-
- /* Need hold register lock when update tail */
- ASSERT( spin_is_locked(&iommu->register_lock) );
-- val = (index + 1) % QINVAL_ENTRY_NR;
-+ val = (index + 1) & (qi_entry_nr - 1);
- dmar_writeq(iommu->reg, DMAR_IQT_REG, (val << QINVAL_INDEX_SHIFT));
- }
-
-@@ -403,8 +406,28 @@ int enable_qinval(struct vtd_iommu *iomm
-
- if ( iommu->qinval_maddr == 0 )
- {
-- iommu->qinval_maddr = alloc_pgtable_maddr(QINVAL_ARCH_PAGE_NR,
-- iommu->node);
-+ if ( !qi_entry_nr )
-+ {
-+ /*
-+ * With the present synchronous model, we need two slots for every
-+ * operation (the operation itself and a wait descriptor). There
-+ * can be one such pair of requests pending per CPU. One extra
-+ * entry is needed as the ring is considered full when there's
-+ * only one entry left.
-+ */
-+ BUILD_BUG_ON(CONFIG_NR_CPUS * 2 >= QINVAL_MAX_ENTRY_NR);
-+ qi_pg_order = get_order_from_bytes((num_present_cpus() * 2 + 1) <<
-+ (PAGE_SHIFT -
-+ QINVAL_ENTRY_ORDER));
-+ qi_entry_nr = 1u << (qi_pg_order + QINVAL_ENTRY_ORDER);
-+
-+ dprintk(XENLOG_INFO VTDPREFIX,
-+ "QI: using %u-entry ring(s)\n", qi_entry_nr);
-+ }
-+
-+ iommu->qinval_maddr =
-+ alloc_pgtable_maddr(qi_entry_nr >> QINVAL_ENTRY_ORDER,
-+ iommu->node);
- if ( iommu->qinval_maddr == 0 )
- {
- dprintk(XENLOG_WARNING VTDPREFIX,
-@@ -418,15 +441,16 @@ int enable_qinval(struct vtd_iommu *iomm
-
- spin_lock_irqsave(&iommu->register_lock, flags);
-
-- /* Setup Invalidation Queue Address(IQA) register with the
-- * address of the page we just allocated. QS field at
-- * bits[2:0] to indicate size of queue is one 4KB page.
-- * That's 256 entries. Queued Head (IQH) and Queue Tail (IQT)
-- * registers are automatically reset to 0 with write
-- * to IQA register.
-+ /*
-+ * Setup Invalidation Queue Address (IQA) register with the address of the
-+ * pages we just allocated. The QS field at bits[2:0] indicates the size
-+ * (page order) of the queue.
-+ *
-+ * Queued Head (IQH) and Queue Tail (IQT) registers are automatically
-+ * reset to 0 with write to IQA register.
- */
- dmar_writeq(iommu->reg, DMAR_IQA_REG,
-- iommu->qinval_maddr | QINVAL_PAGE_ORDER);
-+ iommu->qinval_maddr | qi_pg_order);
-
- dmar_writeq(iommu->reg, DMAR_IQT_REG, 0);
-
diff --git a/main/xen/xsa373-4.13-2.patch b/main/xen/xsa373-4.13-2.patch
deleted file mode 100644
index ceb5bea6c3..0000000000
--- a/main/xen/xsa373-4.13-2.patch
+++ /dev/null
@@ -1,95 +0,0 @@
-From: Jan Beulich <jbeulich@suse.com>
-Subject: AMD/IOMMU: size command buffer dynamically
-
-With the present synchronous model, we need two slots for every
-operation (the operation itself and a wait command). There can be one
-such pair of commands pending per CPU. To ensure that under all normal
-circumstances a slot is always available when one is requested, size the
-command ring according to the number of present CPUs.
-
-This is part of XSA-373 / CVE-2021-28692.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Paul Durrant <paul@xen.org>
-
---- a/xen/drivers/passthrough/amd/iommu_cmd.c
-+++ b/xen/drivers/passthrough/amd/iommu_cmd.c
-@@ -35,8 +35,8 @@ static int queue_iommu_command(struct am
- if ( head != tail )
- {
- memcpy(iommu->cmd_buffer.buffer +
-- (iommu->cmd_buffer.tail * IOMMU_CMD_BUFFER_ENTRY_SIZE),
-- cmd, IOMMU_CMD_BUFFER_ENTRY_SIZE);
-+ (iommu->cmd_buffer.tail * sizeof(cmd_entry_t)),
-+ cmd, sizeof(cmd_entry_t));
-
- iommu->cmd_buffer.tail = tail;
- return 1;
---- a/xen/drivers/passthrough/amd/iommu_init.c
-+++ b/xen/drivers/passthrough/amd/iommu_init.c
-@@ -125,7 +125,7 @@ static void register_iommu_cmd_buffer_in
- writel(entry, iommu->mmio_base + IOMMU_CMD_BUFFER_BASE_LOW_OFFSET);
-
- power_of2_entries = get_order_from_bytes(iommu->cmd_buffer.alloc_size) +
-- IOMMU_CMD_BUFFER_POWER_OF2_ENTRIES_PER_PAGE;
-+ PAGE_SHIFT - IOMMU_CMD_BUFFER_ENTRY_ORDER;
-
- entry = 0;
- iommu_set_addr_hi_to_reg(&entry, addr_hi);
-@@ -1050,9 +1050,31 @@ static void *__init allocate_ring_buffer
- static void * __init allocate_cmd_buffer(struct amd_iommu *iommu)
- {
- /* allocate 'command buffer' in power of 2 increments of 4K */
-+ static unsigned int __read_mostly nr_ents;
-+
-+ if ( !nr_ents )
-+ {
-+ unsigned int order;
-+
-+ /*
-+ * With the present synchronous model, we need two slots for every
-+ * operation (the operation itself and a wait command). There can be
-+ * one such pair of requests pending per CPU. One extra entry is
-+ * needed as the ring is considered full when there's only one entry
-+ * left.
-+ */
-+ BUILD_BUG_ON(CONFIG_NR_CPUS * 2 >= IOMMU_CMD_BUFFER_MAX_ENTRIES);
-+ order = get_order_from_bytes((num_present_cpus() * 2 + 1) <<
-+ IOMMU_CMD_BUFFER_ENTRY_ORDER);
-+ nr_ents = 1u << (order + PAGE_SHIFT - IOMMU_CMD_BUFFER_ENTRY_ORDER);
-+
-+ AMD_IOMMU_DEBUG("using %u-entry cmd ring(s)\n", nr_ents);
-+ }
-+
-+ BUILD_BUG_ON(sizeof(cmd_entry_t) != (1u << IOMMU_CMD_BUFFER_ENTRY_ORDER));
-+
- return allocate_ring_buffer(&iommu->cmd_buffer, sizeof(cmd_entry_t),
-- IOMMU_CMD_BUFFER_DEFAULT_ENTRIES,
-- "Command Buffer", false);
-+ nr_ents, "Command Buffer", false);
- }
-
- static void * __init allocate_event_log(struct amd_iommu *iommu)
---- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
-+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
-@@ -20,9 +20,6 @@
- #ifndef _ASM_X86_64_AMD_IOMMU_DEFS_H
- #define _ASM_X86_64_AMD_IOMMU_DEFS_H
-
--/* IOMMU Command Buffer entries: in power of 2 increments, minimum of 256 */
--#define IOMMU_CMD_BUFFER_DEFAULT_ENTRIES 512
--
- /* IOMMU Event Log entries: in power of 2 increments, minimum of 256 */
- #define IOMMU_EVENT_LOG_DEFAULT_ENTRIES 512
-
-@@ -168,8 +165,8 @@ struct amd_iommu_dte {
- #define IOMMU_CMD_BUFFER_LENGTH_MASK 0x0F000000
- #define IOMMU_CMD_BUFFER_LENGTH_SHIFT 24
-
--#define IOMMU_CMD_BUFFER_ENTRY_SIZE 16
--#define IOMMU_CMD_BUFFER_POWER_OF2_ENTRIES_PER_PAGE 8
-+#define IOMMU_CMD_BUFFER_ENTRY_ORDER 4
-+#define IOMMU_CMD_BUFFER_MAX_ENTRIES (1u << 15)
-
- #define IOMMU_CMD_OPCODE_MASK 0xF0000000
- #define IOMMU_CMD_OPCODE_SHIFT 28
diff --git a/main/xen/xsa373-4.13-3.patch b/main/xen/xsa373-4.13-3.patch
deleted file mode 100644
index f2a24ea416..0000000000
--- a/main/xen/xsa373-4.13-3.patch
+++ /dev/null
@@ -1,163 +0,0 @@
-From: Jan Beulich <jbeulich@suse.com>
-Subject: VT-d: eliminate flush related timeouts
-
-Leaving an in-progress operation pending when it appears to take too
-long is problematic: If e.g. a QI command completed later, the write to
-the "poll slot" may instead be understood to signal a subsequently
-started command's completion. Also our accounting of the timeout period
-was actually wrong: We included the time it took for the command to
-actually make it to the front of the queue, which could be heavily
-affected by guests other than the one for which the flush is being
-performed.
-
-Do away with all timeout detection on all flush related code paths.
-Log excessively long processing times (with a progressive threshold) to
-have some indication of problems in this area.
-
-Additionally log (once) if qinval_next_index() didn't immediately find
-an available slot. Together with the earlier change sizing the queue(s)
-dynamically, we should now have a guarantee that with our fully
-synchronous model any demand for slots can actually be satisfied.
-
-This is part of XSA-373 / CVE-2021-28692.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Paul Durrant <paul@xen.org>
-
---- a/xen/drivers/passthrough/vtd/dmar.h
-+++ b/xen/drivers/passthrough/vtd/dmar.h
-@@ -127,6 +127,34 @@ do {
- } \
- } while (0)
-
-+#define IOMMU_FLUSH_WAIT(what, iommu, offset, op, cond, sts) \
-+do { \
-+ static unsigned int __read_mostly threshold = 1; \
-+ s_time_t start = NOW(); \
-+ s_time_t timeout = start + DMAR_OPERATION_TIMEOUT * threshold; \
-+ \
-+ for ( ; ; ) \
-+ { \
-+ sts = op(iommu->reg, offset); \
-+ if ( cond ) \
-+ break; \
-+ if ( timeout && NOW() > timeout ) \
-+ { \
-+ threshold |= threshold << 1; \
-+ printk(XENLOG_WARNING VTDPREFIX \
-+ " IOMMU#%u: %s flush taking too long\n", \
-+ iommu->index, what); \
-+ timeout = 0; \
-+ } \
-+ cpu_relax(); \
-+ } \
-+ \
-+ if ( !timeout ) \
-+ printk(XENLOG_WARNING VTDPREFIX \
-+ " IOMMU#%u: %s flush took %lums\n", \
-+ iommu->index, what, (NOW() - start) / 10000000); \
-+} while ( false )
-+
- int vtd_hw_check(void);
- void disable_pmr(struct vtd_iommu *iommu);
- int is_igd_drhd(struct acpi_drhd_unit *drhd);
---- a/xen/drivers/passthrough/vtd/iommu.c
-+++ b/xen/drivers/passthrough/vtd/iommu.c
-@@ -320,8 +320,8 @@ static void iommu_flush_write_buffer(str
- dmar_writel(iommu->reg, DMAR_GCMD_REG, val | DMA_GCMD_WBF);
-
- /* Make sure hardware complete it */
-- IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, dmar_readl,
-- !(val & DMA_GSTS_WBFS), val);
-+ IOMMU_FLUSH_WAIT("write buffer", iommu, DMAR_GSTS_REG, dmar_readl,
-+ !(val & DMA_GSTS_WBFS), val);
-
- spin_unlock_irqrestore(&iommu->register_lock, flags);
- }
-@@ -370,8 +370,8 @@ int vtd_flush_context_reg(struct vtd_iom
- dmar_writeq(iommu->reg, DMAR_CCMD_REG, val);
-
- /* Make sure hardware complete it */
-- IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG, dmar_readq,
-- !(val & DMA_CCMD_ICC), val);
-+ IOMMU_FLUSH_WAIT("context", iommu, DMAR_CCMD_REG, dmar_readq,
-+ !(val & DMA_CCMD_ICC), val);
-
- spin_unlock_irqrestore(&iommu->register_lock, flags);
- /* flush context entry will implicitly flush write buffer */
-@@ -448,8 +448,8 @@ int vtd_flush_iotlb_reg(struct vtd_iommu
- dmar_writeq(iommu->reg, tlb_offset + 8, val);
-
- /* Make sure hardware complete it */
-- IOMMU_WAIT_OP(iommu, (tlb_offset + 8), dmar_readq,
-- !(val & DMA_TLB_IVT), val);
-+ IOMMU_FLUSH_WAIT("iotlb", iommu, (tlb_offset + 8), dmar_readq,
-+ !(val & DMA_TLB_IVT), val);
- spin_unlock_irqrestore(&iommu->register_lock, flags);
-
- /* check IOTLB invalidation granularity */
---- a/xen/drivers/passthrough/vtd/qinval.c
-+++ b/xen/drivers/passthrough/vtd/qinval.c
-@@ -29,8 +29,6 @@
- #include "extern.h"
- #include "../ats.h"
-
--#define VTD_QI_TIMEOUT 1
--
- static unsigned int __read_mostly qi_pg_order;
- static unsigned int __read_mostly qi_entry_nr;
-
-@@ -60,7 +58,11 @@ static unsigned int qinval_next_index(st
- /* (tail+1 == head) indicates a full queue, wait for HW */
- while ( ((tail + 1) & (qi_entry_nr - 1)) ==
- ( dmar_readq(iommu->reg, DMAR_IQH_REG) >> QINVAL_INDEX_SHIFT ) )
-+ {
-+ printk_once(XENLOG_ERR VTDPREFIX " IOMMU#%u: no QI slot available\n",
-+ iommu->index);
- cpu_relax();
-+ }
-
- return tail;
- }
-@@ -180,23 +182,32 @@ static int __must_check queue_invalidate
- /* Now we don't support interrupt method */
- if ( sw )
- {
-- s_time_t timeout;
--
-- /* In case all wait descriptor writes to same addr with same data */
-- timeout = NOW() + MILLISECS(flush_dev_iotlb ?
-- iommu_dev_iotlb_timeout : VTD_QI_TIMEOUT);
-+ static unsigned int __read_mostly threshold = 1;
-+ s_time_t start = NOW();
-+ s_time_t timeout = start + (flush_dev_iotlb
-+ ? iommu_dev_iotlb_timeout
-+ : 100) * MILLISECS(threshold);
-
- while ( ACCESS_ONCE(*this_poll_slot) != QINVAL_STAT_DONE )
- {
-- if ( NOW() > timeout )
-+ if ( timeout && NOW() > timeout )
- {
-- print_qi_regs(iommu);
-+ threshold |= threshold << 1;
- printk(XENLOG_WARNING VTDPREFIX
-- " Queue invalidate wait descriptor timed out\n");
-- return -ETIMEDOUT;
-+ " IOMMU#%u: QI%s wait descriptor taking too long\n",
-+ iommu->index, flush_dev_iotlb ? " dev" : "");
-+ print_qi_regs(iommu);
-+ timeout = 0;
- }
- cpu_relax();
- }
-+
-+ if ( !timeout )
-+ printk(XENLOG_WARNING VTDPREFIX
-+ " IOMMU#%u: QI%s wait descriptor took %lums\n",
-+ iommu->index, flush_dev_iotlb ? " dev" : "",
-+ (NOW() - start) / 10000000);
-+
- return 0;
- }
-
diff --git a/main/xen/xsa373-4.13-4.patch b/main/xen/xsa373-4.13-4.patch
deleted file mode 100644
index 7f0370b15a..0000000000
--- a/main/xen/xsa373-4.13-4.patch
+++ /dev/null
@@ -1,86 +0,0 @@
-From: Jan Beulich <jbeulich@suse.com>
-Subject: AMD/IOMMU: wait for command slot to be available
-
-No caller cared about send_iommu_command() indicating unavailability of
-a slot. Hence if a sufficient number prior commands timed out, we did
-blindly assume that the requested command was submitted to the IOMMU
-when really it wasn't. This could mean both a hanging system (waiting
-for a command to complete that was never seen by the IOMMU) or blindly
-propagating success back to callers, making them believe they're fine
-to e.g. free previously unmapped pages.
-
-Fold the three involved functions into one, add spin waiting for an
-available slot along the lines of VT-d's qinval_next_index(), and as a
-consequence drop all error indicator return types/values.
-
-This is part of XSA-373 / CVE-2021-28692.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Paul Durrant <paul@xen.org>
-
---- a/xen/drivers/passthrough/amd/iommu_cmd.c
-+++ b/xen/drivers/passthrough/amd/iommu_cmd.c
-@@ -22,48 +22,36 @@
- #include <asm/hvm/svm/amd-iommu-proto.h>
- #include "../ats.h"
-
--static int queue_iommu_command(struct amd_iommu *iommu, u32 cmd[])
-+static void send_iommu_command(struct amd_iommu *iommu,
-+ const uint32_t cmd[4])
- {
-- uint32_t tail, head;
-+ uint32_t tail;
-
- tail = iommu->cmd_buffer.tail;
- if ( ++tail == iommu->cmd_buffer.entries )
- tail = 0;
-
-- head = iommu_get_rb_pointer(readl(iommu->mmio_base +
-- IOMMU_CMD_BUFFER_HEAD_OFFSET));
-- if ( head != tail )
-+ while ( tail == iommu_get_rb_pointer(readl(iommu->mmio_base +
-+ IOMMU_CMD_BUFFER_HEAD_OFFSET)) )
- {
-- memcpy(iommu->cmd_buffer.buffer +
-- (iommu->cmd_buffer.tail * sizeof(cmd_entry_t)),
-- cmd, sizeof(cmd_entry_t));
--
-- iommu->cmd_buffer.tail = tail;
-- return 1;
-+ printk_once(XENLOG_ERR
-+ "AMD IOMMU %04x:%02x:%02x.%u: no cmd slot available\n",
-+ iommu->seg, PCI_BUS(iommu->bdf),
-+ PCI_SLOT(iommu->bdf), PCI_FUNC(iommu->bdf));
-+ cpu_relax();
- }
-
-- return 0;
--}
-+ memcpy(iommu->cmd_buffer.buffer +
-+ (iommu->cmd_buffer.tail * sizeof(cmd_entry_t)),
-+ cmd, sizeof(cmd_entry_t));
-
--static void commit_iommu_command_buffer(struct amd_iommu *iommu)
--{
-- u32 tail = 0;
-+ iommu->cmd_buffer.tail = tail;
-
-+ tail = 0;
- iommu_set_rb_pointer(&tail, iommu->cmd_buffer.tail);
- writel(tail, iommu->mmio_base+IOMMU_CMD_BUFFER_TAIL_OFFSET);
- }
-
--int send_iommu_command(struct amd_iommu *iommu, u32 cmd[])
--{
-- if ( queue_iommu_command(iommu, cmd) )
-- {
-- commit_iommu_command_buffer(iommu);
-- return 1;
-- }
--
-- return 0;
--}
--
- static void flush_command_buffer(struct amd_iommu *iommu)
- {
- u32 cmd[4], status;
diff --git a/main/xen/xsa373-4.13-5.patch b/main/xen/xsa373-4.13-5.patch
deleted file mode 100644
index 984536760d..0000000000
--- a/main/xen/xsa373-4.13-5.patch
+++ /dev/null
@@ -1,145 +0,0 @@
-From: Jan Beulich <jbeulich@suse.com>
-Subject: AMD/IOMMU: drop command completion timeout
-
-First and foremost - such timeouts were not signaled to callers, making
-them believe they're fine to e.g. free previously unmapped pages.
-
-Mirror VT-d's behavior: A fixed number of loop iterations is not a
-suitable way to detect timeouts in an environment (CPU and bus speeds)
-independent manner anyway. Furthermore, leaving an in-progress operation
-pending when it appears to take too long is problematic: If a command
-completed later, the signaling of its completion may instead be
-understood to signal a subsequently started command's completion.
-
-Log excessively long processing times (with a progressive threshold) to
-have some indication of problems in this area. Allow callers to specify
-a non-default timeout bias for this logging, using the same values as
-VT-d does, which in particular means a (by default) much larger value
-for device IO TLB invalidation.
-
-This is part of XSA-373 / CVE-2021-28692.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Paul Durrant <paul@xen.org>
-
---- a/xen/drivers/passthrough/amd/iommu_cmd.c
-+++ b/xen/drivers/passthrough/amd/iommu_cmd.c
-@@ -52,10 +52,12 @@ static void send_iommu_command(struct am
- writel(tail, iommu->mmio_base+IOMMU_CMD_BUFFER_TAIL_OFFSET);
- }
-
--static void flush_command_buffer(struct amd_iommu *iommu)
-+static void flush_command_buffer(struct amd_iommu *iommu,
-+ unsigned int timeout_base)
- {
-- u32 cmd[4], status;
-- int loop_count, comp_wait;
-+ uint32_t cmd[4];
-+ s_time_t start, timeout;
-+ static unsigned int __read_mostly threshold = 1;
-
- /* RW1C 'ComWaitInt' in status register */
- writel(IOMMU_STATUS_COMP_WAIT_INT_MASK,
-@@ -71,24 +73,31 @@ static void flush_command_buffer(struct
- IOMMU_COMP_WAIT_I_FLAG_SHIFT, &cmd[0]);
- send_iommu_command(iommu, cmd);
-
-- /* Make loop_count long enough for polling completion wait bit */
-- loop_count = 1000;
-- do {
-- status = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
-- comp_wait = get_field_from_reg_u32(status,
-- IOMMU_STATUS_COMP_WAIT_INT_MASK,
-- IOMMU_STATUS_COMP_WAIT_INT_SHIFT);
-- --loop_count;
-- } while ( !comp_wait && loop_count );
--
-- if ( comp_wait )
-+ start = NOW();
-+ timeout = start + (timeout_base ?: 100) * MILLISECS(threshold);
-+ while ( !(readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET) &
-+ IOMMU_STATUS_COMP_WAIT_INT_MASK) )
- {
-- /* RW1C 'ComWaitInt' in status register */
-- writel(IOMMU_STATUS_COMP_WAIT_INT_MASK,
-- iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
-- return;
-+ if ( timeout && NOW() > timeout )
-+ {
-+ threshold |= threshold << 1;
-+ printk(XENLOG_WARNING
-+ "AMD IOMMU %04x:%02x:%02x.%u: %scompletion wait taking too long\n",
-+ iommu->seg, PCI_BUS(iommu->bdf),
-+ PCI_SLOT(iommu->bdf), PCI_FUNC(iommu->bdf),
-+ timeout_base ? "iotlb " : "");
-+ timeout = 0;
-+ }
-+ cpu_relax();
- }
-- AMD_IOMMU_DEBUG("Warning: ComWaitInt bit did not assert!\n");
-+
-+ if ( !timeout )
-+ printk(XENLOG_WARNING
-+ "AMD IOMMU %04x:%02x:%02x.%u: %scompletion wait took %lums\n",
-+ iommu->seg, PCI_BUS(iommu->bdf),
-+ PCI_SLOT(iommu->bdf), PCI_FUNC(iommu->bdf),
-+ timeout_base ? "iotlb " : "",
-+ (NOW() - start) / 10000000);
- }
-
- /* Build low level iommu command messages */
-@@ -300,7 +309,7 @@ void amd_iommu_flush_iotlb(u8 devfn, con
- /* send INVALIDATE_IOTLB_PAGES command */
- spin_lock_irqsave(&iommu->lock, flags);
- invalidate_iotlb_pages(iommu, maxpend, 0, queueid, daddr, req_id, order);
-- flush_command_buffer(iommu);
-+ flush_command_buffer(iommu, iommu_dev_iotlb_timeout);
- spin_unlock_irqrestore(&iommu->lock, flags);
- }
-
-@@ -337,7 +346,7 @@ static void _amd_iommu_flush_pages(struc
- {
- spin_lock_irqsave(&iommu->lock, flags);
- invalidate_iommu_pages(iommu, daddr, dom_id, order);
-- flush_command_buffer(iommu);
-+ flush_command_buffer(iommu, 0);
- spin_unlock_irqrestore(&iommu->lock, flags);
- }
-
-@@ -361,7 +370,7 @@ void amd_iommu_flush_device(struct amd_i
- ASSERT( spin_is_locked(&iommu->lock) );
-
- invalidate_dev_table_entry(iommu, bdf);
-- flush_command_buffer(iommu);
-+ flush_command_buffer(iommu, 0);
- }
-
- void amd_iommu_flush_intremap(struct amd_iommu *iommu, uint16_t bdf)
-@@ -369,7 +378,7 @@ void amd_iommu_flush_intremap(struct amd
- ASSERT( spin_is_locked(&iommu->lock) );
-
- invalidate_interrupt_table(iommu, bdf);
-- flush_command_buffer(iommu);
-+ flush_command_buffer(iommu, 0);
- }
-
- void amd_iommu_flush_all_caches(struct amd_iommu *iommu)
-@@ -377,7 +386,7 @@ void amd_iommu_flush_all_caches(struct a
- ASSERT( spin_is_locked(&iommu->lock) );
-
- invalidate_iommu_all(iommu);
-- flush_command_buffer(iommu);
-+ flush_command_buffer(iommu, 0);
- }
-
- void amd_iommu_send_guest_cmd(struct amd_iommu *iommu, u32 cmd[])
-@@ -387,7 +396,8 @@ void amd_iommu_send_guest_cmd(struct amd
- spin_lock_irqsave(&iommu->lock, flags);
-
- send_iommu_command(iommu, cmd);
-- flush_command_buffer(iommu);
-+ /* TBD: Timeout selection may require peeking into cmd[]. */
-+ flush_command_buffer(iommu, 0);
-
- spin_unlock_irqrestore(&iommu->lock, flags);
- }
diff --git a/main/xen/xsa375-4.13.patch b/main/xen/xsa375-4.13.patch
deleted file mode 100644
index 6fab954418..0000000000
--- a/main/xen/xsa375-4.13.patch
+++ /dev/null
@@ -1,50 +0,0 @@
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Subject: x86/spec-ctrl: Protect against Speculative Code Store Bypass
-
-Modern x86 processors have far-better-than-architecturally-guaranteed self
-modifying code detection. Typically, when a write hits an instruction in
-flight, a Machine Clear occurs to flush stale content in the frontend and
-backend.
-
-For self modifying code, before a write which hits an instruction in flight
-retires, the frontend can speculatively decode and execute the old instruction
-stream. Speculation of this form can suffer from type confusion in registers,
-and potentially leak data.
-
-Furthermore, updates are typically byte-wise, rather than atomic. Depending
-on timing, speculation can race ahead multiple times between individual
-writes, and execute the transiently-malformed instruction stream.
-
-Xen has stubs which are used in certain cases for emulation purposes. Inhibit
-speculation between updating the stub and executing it.
-
-This is XSA-375 / CVE-2021-0089.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-
-diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c
-index 6dc4f92a84..59c15ca0e7 100644
---- a/xen/arch/x86/pv/emul-priv-op.c
-+++ b/xen/arch/x86/pv/emul-priv-op.c
-@@ -97,6 +97,8 @@ static io_emul_stub_t *io_emul_stub_setup(struct priv_op_ctxt *ctxt, u8 opcode,
- BUILD_BUG_ON(STUB_BUF_SIZE / 2 < MAX(9, /* Default emul stub */
- 5 + IOEMUL_QUIRK_STUB_BYTES));
-
-+ block_speculation(); /* SCSB */
-+
- /* Handy function-typed pointer to the stub. */
- return (void *)stub_va;
- }
-diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c
-index bba6dd0187..cd123492a6 100644
---- a/xen/arch/x86/x86_emulate/x86_emulate.c
-+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
-@@ -1172,6 +1172,7 @@ static inline int mkec(uint8_t e, int32_t ec, ...)
- # define invoke_stub(pre, post, constraints...) do { \
- stub_exn.info = (union stub_exception_token) { .raw = ~0 }; \
- stub_exn.line = __LINE__; /* Utility outweighs livepatching cost */ \
-+ block_speculation(); /* SCSB */ \
- asm volatile ( pre "\n\tINDIRECT_CALL %[stub]\n\t" post "\n" \
- ".Lret%=:\n\t" \
- ".pushsection .fixup,\"ax\"\n" \
diff --git a/main/xen/xsa377.patch b/main/xen/xsa377.patch
deleted file mode 100644
index 1a1887b60e..0000000000
--- a/main/xen/xsa377.patch
+++ /dev/null
@@ -1,27 +0,0 @@
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Subject: x86/spec-ctrl: Mitigate TAA after S3 resume
-
-The user chosen setting for MSR_TSX_CTRL needs restoring after S3.
-
-All APs get the correct setting via start_secondary(), but the BSP was missed
-out.
-
-This is XSA-377 / CVE-2021-28690.
-
-Fixes: 8c4330818f6 ("x86/spec-ctrl: Mitigate the TSX Asynchronous Abort sidechannel")
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-
-diff --git a/xen/arch/x86/acpi/power.c b/xen/arch/x86/acpi/power.c
-index 91a8c4d0bd..31a56f02d0 100644
---- a/xen/arch/x86/acpi/power.c
-+++ b/xen/arch/x86/acpi/power.c
-@@ -288,6 +288,8 @@ static int enter_state(u32 state)
-
- microcode_update_one();
-
-+ tsx_init(); /* Needs microcode. May change HLE/RTM feature bits. */
-+
- if ( !recheck_cpu_features(0) )
- panic("Missing previously available feature(s)\n");
-
diff --git a/main/xen/xsa378-4.13-0a.patch b/main/xen/xsa378-4.13-0a.patch
deleted file mode 100644
index 61a3341b11..0000000000
--- a/main/xen/xsa378-4.13-0a.patch
+++ /dev/null
@@ -1,77 +0,0 @@
-From: Jan Beulich <jbeulich@suse.com>
-Subject: x86/p2m: fix PoD accounting in guest_physmap_add_entry()
-
-The initial observation was that the mfn_valid() check comes too late:
-Neither mfn_add() nor mfn_to_page() (let alone de-referencing the
-result of the latter) are valid for MFNs failing this check. Move it up
-and - noticing that there's no caller doing so - also add an assertion
-that this should never produce "false" here.
-
-In turn this would have meant that the "else" to that if() could now go
-away, which didn't seem right at all. And indeed, considering callers
-like memory_exchange() or various grant table functions, the PoD
-accounting should have been outside of that if() from the very
-beginning.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
-master commit: aea270e3f7c0db696c88a0e94b1ece7abd339c84
-master date: 2020-02-21 17:14:38 +0100
-
---- a/xen/arch/x86/mm/p2m.c
-+++ b/xen/arch/x86/mm/p2m.c
-@@ -881,6 +881,12 @@ guest_physmap_add_entry(struct domain *d
- if ( p2m_is_foreign(t) )
- return -EINVAL;
-
-+ if ( !mfn_valid(mfn) )
-+ {
-+ ASSERT_UNREACHABLE();
-+ return -EINVAL;
-+ }
-+
- p2m_lock(p2m);
-
- P2M_DEBUG("adding gfn=%#lx mfn=%#lx\n", gfn_x(gfn), mfn_x(mfn));
-@@ -981,12 +987,13 @@ guest_physmap_add_entry(struct domain *d
- }
-
- /* Now, actually do the two-way mapping */
-- if ( mfn_valid(mfn) )
-+ rc = p2m_set_entry(p2m, gfn, mfn, page_order, t, p2m->default_access);
-+ if ( rc == 0 )
- {
-- rc = p2m_set_entry(p2m, gfn, mfn, page_order, t,
-- p2m->default_access);
-- if ( rc )
-- goto out; /* Failed to update p2m, bail without updating m2p. */
-+ pod_lock(p2m);
-+ p2m->pod.entry_count -= pod_count;
-+ BUG_ON(p2m->pod.entry_count < 0);
-+ pod_unlock(p2m);
-
- if ( !p2m_is_grant(t) )
- {
-@@ -995,22 +1002,7 @@ guest_physmap_add_entry(struct domain *d
- gfn_x(gfn_add(gfn, i)));
- }
- }
-- else
-- {
-- gdprintk(XENLOG_WARNING, "Adding bad mfn to p2m map (%#lx -> %#lx)\n",
-- gfn_x(gfn), mfn_x(mfn));
-- rc = p2m_set_entry(p2m, gfn, INVALID_MFN, page_order,
-- p2m_invalid, p2m->default_access);
-- if ( rc == 0 )
-- {
-- pod_lock(p2m);
-- p2m->pod.entry_count -= pod_count;
-- BUG_ON(p2m->pod.entry_count < 0);
-- pod_unlock(p2m);
-- }
-- }
-
--out:
- p2m_unlock(p2m);
-
- return rc;
diff --git a/main/xen/xsa378-4.13-0b.patch b/main/xen/xsa378-4.13-0b.patch
deleted file mode 100644
index ae5e812142..0000000000
--- a/main/xen/xsa378-4.13-0b.patch
+++ /dev/null
@@ -1,62 +0,0 @@
-From: Jan Beulich <jbeulich@suse.com>
-Subject: x86/p2m: don't ignore p2m_remove_page()'s return value
-
-It's not very nice to return from guest_physmap_add_entry() after
-perhaps already having made some changes to the P2M, but this is pre-
-existing practice in the function, and imo better than ignoring errors.
-
-Take the liberty and replace an mfn_add() instance with a local variable
-already holding the result (as proven by the check immediately ahead).
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Paul Durrant <paul.durrant@citrix.com>
-Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
-master commit: a6b051a87a586347969bfbaa6925ac0f0c845413
-master date: 2020-04-03 10:56:10 +0200
-
---- a/xen/arch/x86/mm/p2m.c
-+++ b/xen/arch/x86/mm/p2m.c
-@@ -773,8 +773,7 @@ void p2m_final_teardown(struct domain *d
- p2m_teardown_hostp2m(d);
- }
-
--
--static int
-+static int __must_check
- p2m_remove_page(struct p2m_domain *p2m, unsigned long gfn_l, unsigned long mfn,
- unsigned int page_order)
- {
-@@ -979,9 +978,9 @@ guest_physmap_add_entry(struct domain *d
- ASSERT(mfn_valid(omfn));
- P2M_DEBUG("old gfn=%#lx -> mfn %#lx\n",
- gfn_x(ogfn) , mfn_x(omfn));
-- if ( mfn_eq(omfn, mfn_add(mfn, i)) )
-- p2m_remove_page(p2m, gfn_x(ogfn), mfn_x(mfn_add(mfn, i)),
-- 0);
-+ if ( mfn_eq(omfn, mfn_add(mfn, i)) &&
-+ (rc = p2m_remove_page(p2m, gfn_x(ogfn), mfn_x(omfn), 0)) )
-+ goto out;
- }
- }
- }
-@@ -1003,6 +1002,7 @@ guest_physmap_add_entry(struct domain *d
- }
- }
-
-+ out:
- p2m_unlock(p2m);
-
- return rc;
-@@ -2690,9 +2690,9 @@ int p2m_change_altp2m_gfn(struct domain
- if ( gfn_eq(new_gfn, INVALID_GFN) )
- {
- mfn = ap2m->get_entry(ap2m, old_gfn, &t, &a, 0, NULL, NULL);
-- if ( mfn_valid(mfn) )
-- p2m_remove_page(ap2m, gfn_x(old_gfn), mfn_x(mfn), PAGE_ORDER_4K);
-- rc = 0;
-+ rc = mfn_valid(mfn)
-+ ? p2m_remove_page(ap2m, gfn_x(old_gfn), mfn_x(mfn), PAGE_ORDER_4K)
-+ : 0;
- goto out;
- }
-
diff --git a/main/xen/xsa378-4.13-0c.patch b/main/xen/xsa378-4.13-0c.patch
deleted file mode 100644
index eec5239a9e..0000000000
--- a/main/xen/xsa378-4.13-0c.patch
+++ /dev/null
@@ -1,59 +0,0 @@
-From: Jan Beulich <jbeulich@suse.com>
-Subject: x86/p2m: don't assert that the passed in MFN matches for a remove
-
-guest_physmap_remove_page() gets handed an MFN from the outside, yet
-takes the necessary lock to prevent further changes to the GFN <-> MFN
-mapping itself. While some callers, in particular guest_remove_page()
-(by way of having called get_gfn_query()), hold the GFN lock already,
-various others (most notably perhaps the 2nd instance in
-xenmem_add_to_physmap_one()) don't. While it also is an option to fix
-all the callers, deal with the issue in p2m_remove_page() instead:
-Replace the ASSERT() by a conditional and split the loop into two, such
-that all checking gets done before any modification would occur.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Paul Durrant <paul.durrant@citrix.com>
-Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
-master commit: c65ea16dbcafbe4fe21693b18f8c2a3c5d14600e
-master date: 2020-04-03 10:56:55 +0200
-
---- a/xen/arch/x86/mm/p2m.c
-+++ b/xen/arch/x86/mm/p2m.c
-@@ -779,7 +779,6 @@ p2m_remove_page(struct p2m_domain *p2m,
- {
- unsigned long i;
- gfn_t gfn = _gfn(gfn_l);
-- mfn_t mfn_return;
- p2m_type_t t;
- p2m_access_t a;
-
-@@ -790,15 +789,26 @@ p2m_remove_page(struct p2m_domain *p2m,
- ASSERT(gfn_locked_by_me(p2m, gfn));
- P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn_l, mfn);
-
-+ for ( i = 0; i < (1UL << page_order); )
-+ {
-+ unsigned int cur_order;
-+ mfn_t mfn_return = p2m->get_entry(p2m, gfn_add(gfn, i), &t, &a, 0,
-+ &cur_order, NULL);
-+
-+ if ( p2m_is_valid(t) &&
-+ (!mfn_valid(_mfn(mfn)) || mfn + i != mfn_x(mfn_return)) )
-+ return -EILSEQ;
-+
-+ i += (1UL << cur_order) - ((gfn_l + i) & ((1UL << cur_order) - 1));
-+ }
-+
- if ( mfn_valid(_mfn(mfn)) )
- {
- for ( i = 0; i < (1UL << page_order); i++ )
- {
-- mfn_return = p2m->get_entry(p2m, gfn_add(gfn, i), &t, &a, 0,
-- NULL, NULL);
-+ p2m->get_entry(p2m, gfn_add(gfn, i), &t, &a, 0, NULL, NULL);
- if ( !p2m_is_grant(t) && !p2m_is_shared(t) && !p2m_is_foreign(t) )
- set_gpfn_from_mfn(mfn+i, INVALID_M2P_ENTRY);
-- ASSERT( !p2m_is_valid(t) || mfn + i == mfn_x(mfn_return) );
- }
- }
- return p2m_set_entry(p2m, gfn, INVALID_MFN, page_order, p2m_invalid,
diff --git a/main/xen/xsa378-4.13-1.patch b/main/xen/xsa378-4.13-1.patch
deleted file mode 100644
index 73956ea2d4..0000000000
--- a/main/xen/xsa378-4.13-1.patch
+++ /dev/null
@@ -1,142 +0,0 @@
-From: Jan Beulich <jbeulich@suse.com>
-Subject: AMD/IOMMU: correct global exclusion range extending
-
-Besides unity mapping regions, the AMD IOMMU spec also provides for
-exclusion ranges (areas of memory not to be subject to DMA translation)
-to be specified by firmware in the ACPI tables. The spec does not put
-any constraints on the number of such regions.
-
-Blindly assuming all addresses between any two such ranges should also
-be excluded can't be right. Since hardware has room for just a single
-such range (comprised of the Exclusion Base Register and the Exclusion
-Range Limit Register), combine only adjacent or overlapping regions (for
-now; this may require further adjustment in case table entries aren't
-sorted by address) with matching exclusion_allow_all settings. This
-requires bubbling up error indicators, such that IOMMU init can be
-failed when concatenation wasn't possible.
-
-Furthermore, since the exclusion range specified in IOMMU registers
-implies R/W access, reject requests asking for less permissions (this
-will be brought closer to the spec by a subsequent change).
-
-This is part of XSA-378 / CVE-2021-28695.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Paul Durrant <paul@xen.org>
-
---- a/xen/drivers/passthrough/amd/iommu_acpi.c
-+++ b/xen/drivers/passthrough/amd/iommu_acpi.c
-@@ -117,12 +117,21 @@ static struct amd_iommu * __init find_io
- return NULL;
- }
-
--static void __init reserve_iommu_exclusion_range(
-- struct amd_iommu *iommu, uint64_t base, uint64_t limit)
-+static int __init reserve_iommu_exclusion_range(
-+ struct amd_iommu *iommu, uint64_t base, uint64_t limit,
-+ bool all, bool iw, bool ir)
- {
-+ if ( !ir || !iw )
-+ return -EPERM;
-+
- /* need to extend exclusion range? */
- if ( iommu->exclusion_enable )
- {
-+ if ( iommu->exclusion_limit + PAGE_SIZE < base ||
-+ limit + PAGE_SIZE < iommu->exclusion_base ||
-+ iommu->exclusion_allow_all != all )
-+ return -EBUSY;
-+
- if ( iommu->exclusion_base < base )
- base = iommu->exclusion_base;
- if ( iommu->exclusion_limit > limit )
-@@ -130,16 +139,11 @@ static void __init reserve_iommu_exclusi
- }
-
- iommu->exclusion_enable = IOMMU_CONTROL_ENABLED;
-+ iommu->exclusion_allow_all = all;
- iommu->exclusion_base = base;
- iommu->exclusion_limit = limit;
--}
-
--static void __init reserve_iommu_exclusion_range_all(
-- struct amd_iommu *iommu,
-- unsigned long base, unsigned long limit)
--{
-- reserve_iommu_exclusion_range(iommu, base, limit);
-- iommu->exclusion_allow_all = IOMMU_CONTROL_ENABLED;
-+ return 0;
- }
-
- static void __init reserve_unity_map_for_device(
-@@ -177,6 +181,7 @@ static int __init register_exclusion_ran
- unsigned long range_top, iommu_top, length;
- struct amd_iommu *iommu;
- unsigned int bdf;
-+ int rc = 0;
-
- /* is part of exclusion range inside of IOMMU virtual address space? */
- /* note: 'limit' parameter is assumed to be page-aligned */
-@@ -198,10 +203,15 @@ static int __init register_exclusion_ran
- if ( limit >= iommu_top )
- {
- for_each_amd_iommu( iommu )
-- reserve_iommu_exclusion_range_all(iommu, base, limit);
-+ {
-+ rc = reserve_iommu_exclusion_range(iommu, base, limit,
-+ true /* all */, iw, ir);
-+ if ( rc )
-+ break;
-+ }
- }
-
-- return 0;
-+ return rc;
- }
-
- static int __init register_exclusion_range_for_device(
-@@ -212,6 +222,7 @@ static int __init register_exclusion_ran
- unsigned long range_top, iommu_top, length;
- struct amd_iommu *iommu;
- u16 req;
-+ int rc = 0;
-
- iommu = find_iommu_for_device(seg, bdf);
- if ( !iommu )
-@@ -241,12 +252,13 @@ static int __init register_exclusion_ran
- /* register IOMMU exclusion range settings for device */
- if ( limit >= iommu_top )
- {
-- reserve_iommu_exclusion_range(iommu, base, limit);
-+ rc = reserve_iommu_exclusion_range(iommu, base, limit,
-+ false /* all */, iw, ir);
- ivrs_mappings[bdf].dte_allow_exclusion = true;
- ivrs_mappings[req].dte_allow_exclusion = true;
- }
-
-- return 0;
-+ return rc;
- }
-
- static int __init register_exclusion_range_for_iommu_devices(
-@@ -256,6 +268,7 @@ static int __init register_exclusion_ran
- unsigned long range_top, iommu_top, length;
- unsigned int bdf;
- u16 req;
-+ int rc = 0;
-
- /* is part of exclusion range inside of IOMMU virtual address space? */
- /* note: 'limit' parameter is assumed to be page-aligned */
-@@ -286,8 +299,10 @@ static int __init register_exclusion_ran
-
- /* register IOMMU exclusion range settings */
- if ( limit >= iommu_top )
-- reserve_iommu_exclusion_range_all(iommu, base, limit);
-- return 0;
-+ rc = reserve_iommu_exclusion_range(iommu, base, limit,
-+ true /* all */, iw, ir);
-+
-+ return rc;
- }
-
- static int __init parse_ivmd_device_select(
diff --git a/main/xen/xsa378-4.13-2.patch b/main/xen/xsa378-4.13-2.patch
deleted file mode 100644
index 755ecdee7e..0000000000
--- a/main/xen/xsa378-4.13-2.patch
+++ /dev/null
@@ -1,218 +0,0 @@
-From: Jan Beulich <jbeulich@suse.com>
-Subject: AMD/IOMMU: correct device unity map handling
-
-Blindly assuming all addresses between any two such ranges, specified by
-firmware in the ACPI tables, should also be unity-mapped can't be right.
-Nor can it be correct to merge ranges with differing permissions. Track
-ranges individually; don't merge at all, but check for overlaps instead.
-This requires bubbling up error indicators, such that IOMMU init can be
-failed when allocation of a new tracking struct wasn't possible, or an
-overlap was detected.
-
-At this occasion also stop ignoring
-amd_iommu_reserve_domain_unity_map()'s return value.
-
-This is part of XSA-378 / CVE-2021-28695.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: George Dunlap <george.dunlap@citrix.com>
-Reviewed-by: Paul Durrant <paul@xen.org>
-
---- a/xen/drivers/passthrough/amd/iommu_acpi.c
-+++ b/xen/drivers/passthrough/amd/iommu_acpi.c
-@@ -146,32 +146,48 @@ static int __init reserve_iommu_exclusio
- return 0;
- }
-
--static void __init reserve_unity_map_for_device(
-- u16 seg, u16 bdf, unsigned long base,
-- unsigned long length, u8 iw, u8 ir)
-+static int __init reserve_unity_map_for_device(
-+ uint16_t seg, uint16_t bdf, unsigned long base,
-+ unsigned long length, bool iw, bool ir)
- {
- struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(seg);
-- unsigned long old_top, new_top;
-+ struct ivrs_unity_map *unity_map = ivrs_mappings[bdf].unity_map;
-
-- /* need to extend unity-mapped range? */
-- if ( ivrs_mappings[bdf].unity_map_enable )
-+ /* Check for overlaps. */
-+ for ( ; unity_map; unity_map = unity_map->next )
- {
-- old_top = ivrs_mappings[bdf].addr_range_start +
-- ivrs_mappings[bdf].addr_range_length;
-- new_top = base + length;
-- if ( old_top > new_top )
-- new_top = old_top;
-- if ( ivrs_mappings[bdf].addr_range_start < base )
-- base = ivrs_mappings[bdf].addr_range_start;
-- length = new_top - base;
-- }
--
-- /* extend r/w permissioms and keep aggregate */
-- ivrs_mappings[bdf].write_permission = iw;
-- ivrs_mappings[bdf].read_permission = ir;
-- ivrs_mappings[bdf].unity_map_enable = true;
-- ivrs_mappings[bdf].addr_range_start = base;
-- ivrs_mappings[bdf].addr_range_length = length;
-+ /*
-+ * Exact matches are okay. This can in particular happen when
-+ * register_exclusion_range_for_device() calls here twice for the
-+ * same (s,b,d,f).
-+ */
-+ if ( base == unity_map->addr && length == unity_map->length &&
-+ ir == unity_map->read && iw == unity_map->write )
-+ return 0;
-+
-+ if ( unity_map->addr + unity_map->length > base &&
-+ base + length > unity_map->addr )
-+ {
-+ AMD_IOMMU_DEBUG("IVMD Error: overlap [%lx,%lx) vs [%lx,%lx)\n",
-+ base, base + length, unity_map->addr,
-+ unity_map->addr + unity_map->length);
-+ return -EPERM;
-+ }
-+ }
-+
-+ /* Populate and insert a new unity map. */
-+ unity_map = xmalloc(struct ivrs_unity_map);
-+ if ( !unity_map )
-+ return -ENOMEM;
-+
-+ unity_map->read = ir;
-+ unity_map->write = iw;
-+ unity_map->addr = base;
-+ unity_map->length = length;
-+ unity_map->next = ivrs_mappings[bdf].unity_map;
-+ ivrs_mappings[bdf].unity_map = unity_map;
-+
-+ return 0;
- }
-
- static int __init register_exclusion_range_for_all_devices(
-@@ -194,13 +210,13 @@ static int __init register_exclusion_ran
- length = range_top - base;
- /* reserve r/w unity-mapped page entries for devices */
- /* note: these entries are part of the exclusion range */
-- for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ )
-- reserve_unity_map_for_device(seg, bdf, base, length, iw, ir);
-+ for ( bdf = 0; !rc && bdf < ivrs_bdf_entries; bdf++ )
-+ rc = reserve_unity_map_for_device(seg, bdf, base, length, iw, ir);
- /* push 'base' just outside of virtual address space */
- base = iommu_top;
- }
- /* register IOMMU exclusion range settings */
-- if ( limit >= iommu_top )
-+ if ( !rc && limit >= iommu_top )
- {
- for_each_amd_iommu( iommu )
- {
-@@ -242,15 +258,15 @@ static int __init register_exclusion_ran
- length = range_top - base;
- /* reserve unity-mapped page entries for device */
- /* note: these entries are part of the exclusion range */
-- reserve_unity_map_for_device(seg, bdf, base, length, iw, ir);
-- reserve_unity_map_for_device(seg, req, base, length, iw, ir);
-+ rc = reserve_unity_map_for_device(seg, bdf, base, length, iw, ir) ?:
-+ reserve_unity_map_for_device(seg, req, base, length, iw, ir);
-
- /* push 'base' just outside of virtual address space */
- base = iommu_top;
- }
-
- /* register IOMMU exclusion range settings for device */
-- if ( limit >= iommu_top )
-+ if ( !rc && limit >= iommu_top )
- {
- rc = reserve_iommu_exclusion_range(iommu, base, limit,
- false /* all */, iw, ir);
-@@ -281,15 +297,15 @@ static int __init register_exclusion_ran
- length = range_top - base;
- /* reserve r/w unity-mapped page entries for devices */
- /* note: these entries are part of the exclusion range */
-- for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ )
-+ for ( bdf = 0; !rc && bdf < ivrs_bdf_entries; bdf++ )
- {
- if ( iommu == find_iommu_for_device(iommu->seg, bdf) )
- {
-- reserve_unity_map_for_device(iommu->seg, bdf, base, length,
-- iw, ir);
- req = get_ivrs_mappings(iommu->seg)[bdf].dte_requestor_id;
-- reserve_unity_map_for_device(iommu->seg, req, base, length,
-- iw, ir);
-+ rc = reserve_unity_map_for_device(iommu->seg, bdf, base, length,
-+ iw, ir) ?:
-+ reserve_unity_map_for_device(iommu->seg, req, base, length,
-+ iw, ir);
- }
- }
-
-@@ -298,7 +314,7 @@ static int __init register_exclusion_ran
- }
-
- /* register IOMMU exclusion range settings */
-- if ( limit >= iommu_top )
-+ if ( !rc && limit >= iommu_top )
- rc = reserve_iommu_exclusion_range(iommu, base, limit,
- true /* all */, iw, ir);
-
---- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
-+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
-@@ -369,15 +369,17 @@ static int amd_iommu_assign_device(struc
- struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(pdev->seg);
- int bdf = PCI_BDF2(pdev->bus, devfn);
- int req_id = get_dma_requestor_id(pdev->seg, bdf);
-+ const struct ivrs_unity_map *unity_map;
-
-- if ( ivrs_mappings[req_id].unity_map_enable )
-+ for ( unity_map = ivrs_mappings[req_id].unity_map; unity_map;
-+ unity_map = unity_map->next )
- {
-- amd_iommu_reserve_domain_unity_map(
-- d,
-- ivrs_mappings[req_id].addr_range_start,
-- ivrs_mappings[req_id].addr_range_length,
-- ivrs_mappings[req_id].write_permission,
-- ivrs_mappings[req_id].read_permission);
-+ int rc = amd_iommu_reserve_domain_unity_map(
-+ d, unity_map->addr, unity_map->length,
-+ unity_map->write, unity_map->read);
-+
-+ if ( rc )
-+ return rc;
- }
-
- return reassign_device(pdev->domain, d, devfn, pdev);
---- a/xen/include/asm-x86/amd-iommu.h
-+++ b/xen/include/asm-x86/amd-iommu.h
-@@ -105,20 +105,24 @@ struct amd_iommu {
- struct list_head ats_devices;
- };
-
-+struct ivrs_unity_map {
-+ bool read:1;
-+ bool write:1;
-+ paddr_t addr;
-+ unsigned long length;
-+ struct ivrs_unity_map *next;
-+};
-+
- struct ivrs_mappings {
- uint16_t dte_requestor_id;
- bool valid:1;
- bool dte_allow_exclusion:1;
-- bool unity_map_enable:1;
-- bool write_permission:1;
-- bool read_permission:1;
-
- /* ivhd device data settings */
- uint8_t device_flags;
-
-- unsigned long addr_range_start;
-- unsigned long addr_range_length;
- struct amd_iommu *iommu;
-+ struct ivrs_unity_map *unity_map;
-
- /* per device interrupt remapping table */
- void *intremap_table;
diff --git a/main/xen/xsa378-4.13-3.patch b/main/xen/xsa378-4.13-3.patch
deleted file mode 100644
index 0e22c73cfc..0000000000
--- a/main/xen/xsa378-4.13-3.patch
+++ /dev/null
@@ -1,102 +0,0 @@
-From: Jan Beulich <jbeulich@suse.com>
-Subject: IOMMU: also pass p2m_access_t to p2m_get_iommu_flags()
-
-A subsequent change will want to customize the IOMMU permissions based
-on this.
-
-This is part of XSA-378.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Paul Durrant <paul@xen.org>
-
---- a/xen/arch/x86/mm/p2m-ept.c
-+++ b/xen/arch/x86/mm/p2m-ept.c
-@@ -678,7 +678,7 @@ ept_set_entry(struct p2m_domain *p2m, gf
- uint8_t ipat = 0;
- bool_t need_modify_vtd_table = 1;
- bool_t vtd_pte_present = 0;
-- unsigned int iommu_flags = p2m_get_iommu_flags(p2mt, mfn);
-+ unsigned int iommu_flags = p2m_get_iommu_flags(p2mt, p2ma, mfn);
- bool_t needs_sync = 1;
- ept_entry_t old_entry = { .epte = 0 };
- ept_entry_t new_entry = { .epte = 0 };
-@@ -805,8 +805,8 @@ ept_set_entry(struct p2m_domain *p2m, gf
-
- /* Safe to read-then-write because we hold the p2m lock */
- if ( ept_entry->mfn == new_entry.mfn &&
-- p2m_get_iommu_flags(ept_entry->sa_p2mt, _mfn(ept_entry->mfn)) ==
-- iommu_flags )
-+ p2m_get_iommu_flags(ept_entry->sa_p2mt, ept_entry->access,
-+ _mfn(ept_entry->mfn)) == iommu_flags )
- need_modify_vtd_table = 0;
-
- ept_p2m_type_to_flags(p2m, &new_entry, p2mt, p2ma);
---- a/xen/arch/x86/mm/p2m-pt.c
-+++ b/xen/arch/x86/mm/p2m-pt.c
-@@ -480,6 +480,16 @@ int p2m_pt_handle_deferred_changes(uint6
- return rc;
- }
-
-+/* Reconstruct a fake p2m_access_t from stored PTE flags. */
-+static p2m_access_t p2m_flags_to_access(unsigned int flags)
-+{
-+ if ( flags & _PAGE_PRESENT )
-+ return p2m_access_n;
-+
-+ /* No need to look at _PAGE_NX for now. */
-+ return flags & _PAGE_RW ? p2m_access_rw : p2m_access_r;
-+}
-+
- /* Checks only applicable to entries with order > PAGE_ORDER_4K */
- static void check_entry(mfn_t mfn, p2m_type_t new, p2m_type_t old,
- unsigned int order)
-@@ -514,7 +524,7 @@ p2m_pt_set_entry(struct p2m_domain *p2m,
- l2_pgentry_t l2e_content;
- l3_pgentry_t l3e_content;
- int rc;
-- unsigned int iommu_pte_flags = p2m_get_iommu_flags(p2mt, mfn);
-+ unsigned int iommu_pte_flags = p2m_get_iommu_flags(p2mt, p2ma, mfn);
- /*
- * old_mfn and iommu_old_flags control possible flush/update needs on the
- * IOMMU: We need to flush when MFN or flags (i.e. permissions) change.
-@@ -577,6 +587,7 @@ p2m_pt_set_entry(struct p2m_domain *p2m,
- old_mfn = l1e_get_pfn(*p2m_entry);
- iommu_old_flags =
- p2m_get_iommu_flags(p2m_flags_to_type(flags),
-+ p2m_flags_to_access(flags),
- _mfn(old_mfn));
- }
- else
-@@ -619,9 +630,10 @@ p2m_pt_set_entry(struct p2m_domain *p2m,
- 0, L1_PAGETABLE_ENTRIES);
- ASSERT(p2m_entry);
- old_mfn = l1e_get_pfn(*p2m_entry);
-+ flags = l1e_get_flags(*p2m_entry);
- iommu_old_flags =
-- p2m_get_iommu_flags(p2m_flags_to_type(l1e_get_flags(*p2m_entry)),
-- _mfn(old_mfn));
-+ p2m_get_iommu_flags(p2m_flags_to_type(flags),
-+ p2m_flags_to_access(flags), _mfn(old_mfn));
-
- if ( mfn_valid(mfn) || p2m_allows_invalid_mfn(p2mt) )
- entry_content = p2m_l1e_from_pfn(mfn_x(mfn),
-@@ -649,6 +661,7 @@ p2m_pt_set_entry(struct p2m_domain *p2m,
- old_mfn = l1e_get_pfn(*p2m_entry);
- iommu_old_flags =
- p2m_get_iommu_flags(p2m_flags_to_type(flags),
-+ p2m_flags_to_access(flags),
- _mfn(old_mfn));
- }
- else
---- a/xen/include/asm-x86/p2m.h
-+++ b/xen/include/asm-x86/p2m.h
-@@ -908,7 +908,8 @@ static inline void p2m_altp2m_check(stru
- /*
- * p2m type to IOMMU flags
- */
--static inline unsigned int p2m_get_iommu_flags(p2m_type_t p2mt, mfn_t mfn)
-+static inline unsigned int p2m_get_iommu_flags(p2m_type_t p2mt,
-+ p2m_access_t p2ma, mfn_t mfn)
- {
- unsigned int flags;
-
diff --git a/main/xen/xsa378-4.13-4.patch b/main/xen/xsa378-4.13-4.patch
deleted file mode 100644
index 39b05ff613..0000000000
--- a/main/xen/xsa378-4.13-4.patch
+++ /dev/null
@@ -1,385 +0,0 @@
-From: Jan Beulich <jbeulich@suse.com>
-Subject: IOMMU: generalize VT-d's tracking of mapped RMRR regions
-
-In order to re-use it elsewhere, move the logic to vendor independent
-code and strip it of RMRR specifics.
-
-Note that the prior "map" parameter gets folded into the new "p2ma" one
-(which AMD IOMMU code will want to make use of), assigning alternative
-meaning ("unmap") to p2m_access_x. Prepare set_identity_p2m_entry() and
-p2m_get_iommu_flags() for getting passed access types other than
-p2m_access_rw (in the latter case just for p2m_mmio_direct requests).
-
-Note also that, to be on the safe side, an overlap check gets added to
-the main loop of iommu_identity_mapping().
-
-This is part of XSA-378.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Paul Durrant <paul@xen.org>
-
---- a/xen/arch/x86/mm/p2m.c
-+++ b/xen/arch/x86/mm/p2m.c
-@@ -1351,7 +1351,7 @@ int set_identity_p2m_entry(struct domain
- if ( !is_iommu_enabled(d) )
- return 0;
- return iommu_legacy_map(d, _dfn(gfn_l), _mfn(gfn_l), PAGE_ORDER_4K,
-- IOMMUF_readable | IOMMUF_writable);
-+ p2m_access_to_iommu_flags(p2ma));
- }
-
- gfn_lock(p2m, gfn, 0);
---- a/xen/drivers/passthrough/vtd/iommu.c
-+++ b/xen/drivers/passthrough/vtd/iommu.c
-@@ -42,12 +42,6 @@
- #include "vtd.h"
- #include "../ats.h"
-
--struct mapped_rmrr {
-- struct list_head list;
-- u64 base, end;
-- unsigned int count;
--};
--
- /* Possible unfiltered LAPIC/MSI messages from untrusted sources? */
- bool __read_mostly untrusted_msi;
-
-@@ -1799,17 +1793,12 @@ out:
- static void iommu_domain_teardown(struct domain *d)
- {
- struct domain_iommu *hd = dom_iommu(d);
-- struct mapped_rmrr *mrmrr, *tmp;
- const struct acpi_drhd_unit *drhd;
-
- if ( list_empty(&acpi_drhd_units) )
- return;
-
-- list_for_each_entry_safe ( mrmrr, tmp, &hd->arch.mapped_rmrrs, list )
-- {
-- list_del(&mrmrr->list);
-- xfree(mrmrr);
-- }
-+ iommu_identity_map_teardown(d);
-
- ASSERT(is_iommu_enabled(d));
-
-@@ -1963,74 +1952,6 @@ static void iommu_set_pgd(struct domain
- pagetable_get_paddr(pagetable_from_mfn(pgd_mfn));
- }
-
--static int rmrr_identity_mapping(struct domain *d, bool_t map,
-- const struct acpi_rmrr_unit *rmrr,
-- u32 flag)
--{
-- unsigned long base_pfn = rmrr->base_address >> PAGE_SHIFT_4K;
-- unsigned long end_pfn = PAGE_ALIGN_4K(rmrr->end_address) >> PAGE_SHIFT_4K;
-- struct mapped_rmrr *mrmrr;
-- struct domain_iommu *hd = dom_iommu(d);
--
-- ASSERT(pcidevs_locked());
-- ASSERT(rmrr->base_address < rmrr->end_address);
--
-- /*
-- * No need to acquire hd->arch.mapping_lock: Both insertion and removal
-- * get done while holding pcidevs_lock.
-- */
-- list_for_each_entry( mrmrr, &hd->arch.mapped_rmrrs, list )
-- {
-- if ( mrmrr->base == rmrr->base_address &&
-- mrmrr->end == rmrr->end_address )
-- {
-- int ret = 0;
--
-- if ( map )
-- {
-- ++mrmrr->count;
-- return 0;
-- }
--
-- if ( --mrmrr->count )
-- return 0;
--
-- while ( base_pfn < end_pfn )
-- {
-- if ( clear_identity_p2m_entry(d, base_pfn) )
-- ret = -ENXIO;
-- base_pfn++;
-- }
--
-- list_del(&mrmrr->list);
-- xfree(mrmrr);
-- return ret;
-- }
-- }
--
-- if ( !map )
-- return -ENOENT;
--
-- while ( base_pfn < end_pfn )
-- {
-- int err = set_identity_p2m_entry(d, base_pfn, p2m_access_rw, flag);
--
-- if ( err )
-- return err;
-- base_pfn++;
-- }
--
-- mrmrr = xmalloc(struct mapped_rmrr);
-- if ( !mrmrr )
-- return -ENOMEM;
-- mrmrr->base = rmrr->base_address;
-- mrmrr->end = rmrr->end_address;
-- mrmrr->count = 1;
-- list_add_tail(&mrmrr->list, &hd->arch.mapped_rmrrs);
--
-- return 0;
--}
--
- static int intel_iommu_add_device(u8 devfn, struct pci_dev *pdev)
- {
- struct acpi_rmrr_unit *rmrr;
-@@ -2062,7 +1983,9 @@ static int intel_iommu_add_device(u8 dev
- * Since RMRRs are always reserved in the e820 map for the hardware
- * domain, there shouldn't be a conflict.
- */
-- ret = rmrr_identity_mapping(pdev->domain, 1, rmrr, 0);
-+ ret = iommu_identity_mapping(pdev->domain, p2m_access_rw,
-+ rmrr->base_address, rmrr->end_address,
-+ 0);
- if ( ret )
- dprintk(XENLOG_ERR VTDPREFIX, "d%d: RMRR mapping failed\n",
- pdev->domain->domain_id);
-@@ -2107,7 +2030,8 @@ static int intel_iommu_remove_device(u8
- * Any flag is nothing to clear these mappings but here
- * its always safe and strict to set 0.
- */
-- rmrr_identity_mapping(pdev->domain, 0, rmrr, 0);
-+ iommu_identity_mapping(pdev->domain, p2m_access_x, rmrr->base_address,
-+ rmrr->end_address, 0);
- }
-
- return domain_context_unmap(pdev->domain, devfn, pdev);
-@@ -2306,7 +2230,8 @@ static void __hwdom_init setup_hwdom_rmr
- * domain, there shouldn't be a conflict. So its always safe and
- * strict to set 0.
- */
-- ret = rmrr_identity_mapping(d, 1, rmrr, 0);
-+ ret = iommu_identity_mapping(d, p2m_access_rw, rmrr->base_address,
-+ rmrr->end_address, 0);
- if ( ret )
- dprintk(XENLOG_ERR VTDPREFIX,
- "IOMMU: mapping reserved region failed\n");
-@@ -2465,7 +2390,9 @@ static int reassign_device_ownership(
- * Any RMRR flag is always ignored when remove a device,
- * but its always safe and strict to set 0.
- */
-- ret = rmrr_identity_mapping(source, 0, rmrr, 0);
-+ ret = iommu_identity_mapping(source, p2m_access_x,
-+ rmrr->base_address,
-+ rmrr->end_address, 0);
- if ( ret != -ENOENT )
- return ret;
- }
-@@ -2562,7 +2489,8 @@ static int intel_iommu_assign_device(
- PCI_BUS(bdf) == bus &&
- PCI_DEVFN2(bdf) == devfn )
- {
-- ret = rmrr_identity_mapping(d, 1, rmrr, flag);
-+ ret = iommu_identity_mapping(d, p2m_access_rw, rmrr->base_address,
-+ rmrr->end_address, flag);
- if ( ret )
- {
- int rc;
---- a/xen/drivers/passthrough/x86/iommu.c
-+++ b/xen/drivers/passthrough/x86/iommu.c
-@@ -127,7 +127,7 @@ int arch_iommu_domain_init(struct domain
- struct domain_iommu *hd = dom_iommu(d);
-
- spin_lock_init(&hd->arch.mapping_lock);
-- INIT_LIST_HEAD(&hd->arch.mapped_rmrrs);
-+ INIT_LIST_HEAD(&hd->arch.identity_maps);
-
- return 0;
- }
-@@ -136,6 +136,99 @@ void arch_iommu_domain_destroy(struct do
- {
- }
-
-+struct identity_map {
-+ struct list_head list;
-+ paddr_t base, end;
-+ p2m_access_t access;
-+ unsigned int count;
-+};
-+
-+int iommu_identity_mapping(struct domain *d, p2m_access_t p2ma,
-+ paddr_t base, paddr_t end,
-+ unsigned int flag)
-+{
-+ unsigned long base_pfn = base >> PAGE_SHIFT_4K;
-+ unsigned long end_pfn = PAGE_ALIGN_4K(end) >> PAGE_SHIFT_4K;
-+ struct identity_map *map;
-+ struct domain_iommu *hd = dom_iommu(d);
-+
-+ ASSERT(pcidevs_locked());
-+ ASSERT(base < end);
-+
-+ /*
-+ * No need to acquire hd->arch.mapping_lock: Both insertion and removal
-+ * get done while holding pcidevs_lock.
-+ */
-+ list_for_each_entry( map, &hd->arch.identity_maps, list )
-+ {
-+ if ( map->base == base && map->end == end )
-+ {
-+ int ret = 0;
-+
-+ if ( p2ma != p2m_access_x )
-+ {
-+ if ( map->access != p2ma )
-+ return -EADDRINUSE;
-+ ++map->count;
-+ return 0;
-+ }
-+
-+ if ( --map->count )
-+ return 0;
-+
-+ while ( base_pfn < end_pfn )
-+ {
-+ if ( clear_identity_p2m_entry(d, base_pfn) )
-+ ret = -ENXIO;
-+ base_pfn++;
-+ }
-+
-+ list_del(&map->list);
-+ xfree(map);
-+
-+ return ret;
-+ }
-+
-+ if ( end >= map->base && map->end >= base )
-+ return -EADDRINUSE;
-+ }
-+
-+ if ( p2ma == p2m_access_x )
-+ return -ENOENT;
-+
-+ while ( base_pfn < end_pfn )
-+ {
-+ int err = set_identity_p2m_entry(d, base_pfn, p2ma, flag);
-+
-+ if ( err )
-+ return err;
-+ base_pfn++;
-+ }
-+
-+ map = xmalloc(struct identity_map);
-+ if ( !map )
-+ return -ENOMEM;
-+ map->base = base;
-+ map->end = end;
-+ map->access = p2ma;
-+ map->count = 1;
-+ list_add_tail(&map->list, &hd->arch.identity_maps);
-+
-+ return 0;
-+}
-+
-+void iommu_identity_map_teardown(struct domain *d)
-+{
-+ struct domain_iommu *hd = dom_iommu(d);
-+ struct identity_map *map, *tmp;
-+
-+ list_for_each_entry_safe ( map, tmp, &hd->arch.identity_maps, list )
-+ {
-+ list_del(&map->list);
-+ xfree(map);
-+ }
-+}
-+
- static bool __hwdom_init hwdom_iommu_map(const struct domain *d,
- unsigned long pfn,
- unsigned long max_pfn)
---- a/xen/include/asm-x86/iommu.h
-+++ b/xen/include/asm-x86/iommu.h
-@@ -16,6 +16,7 @@
-
- #include <xen/errno.h>
- #include <xen/list.h>
-+#include <xen/mem_access.h>
- #include <xen/spinlock.h>
- #include <asm/apicdef.h>
- #include <asm/processor.h>
-@@ -49,7 +50,7 @@ struct arch_iommu
- spinlock_t mapping_lock; /* io page table lock */
- int agaw; /* adjusted guest address width, 0 is level 2 30-bit */
- u64 iommu_bitmap; /* bitmap of iommu(s) that the domain uses */
-- struct list_head mapped_rmrrs;
-+ struct list_head identity_maps;
-
- /* amd iommu support */
- int paging_mode;
-@@ -112,6 +113,11 @@ static inline void iommu_disable_x2apic(
- iommu_ops.disable_x2apic();
- }
-
-+int iommu_identity_mapping(struct domain *d, p2m_access_t p2ma,
-+ paddr_t base, paddr_t end,
-+ unsigned int flag);
-+void iommu_identity_map_teardown(struct domain *d);
-+
- extern bool untrusted_msi;
-
- int pi_update_irte(const struct pi_desc *pi_desc, const struct pirq *pirq,
---- a/xen/include/asm-x86/p2m.h
-+++ b/xen/include/asm-x86/p2m.h
-@@ -905,6 +905,34 @@ struct p2m_domain *p2m_get_altp2m(struct
- static inline void p2m_altp2m_check(struct vcpu *v, uint16_t idx) {}
- #endif
-
-+/* p2m access to IOMMU flags */
-+static inline unsigned int p2m_access_to_iommu_flags(p2m_access_t p2ma)
-+{
-+ switch ( p2ma )
-+ {
-+ case p2m_access_rw:
-+ case p2m_access_rwx:
-+ return IOMMUF_readable | IOMMUF_writable;
-+
-+ case p2m_access_r:
-+ case p2m_access_rx:
-+ case p2m_access_rx2rw:
-+ return IOMMUF_readable;
-+
-+ case p2m_access_w:
-+ case p2m_access_wx:
-+ return IOMMUF_writable;
-+
-+ case p2m_access_n:
-+ case p2m_access_x:
-+ case p2m_access_n2rwx:
-+ return 0;
-+ }
-+
-+ ASSERT_UNREACHABLE();
-+ return 0;
-+}
-+
- /*
- * p2m type to IOMMU flags
- */
-@@ -926,9 +954,10 @@ static inline unsigned int p2m_get_iommu
- flags = IOMMUF_readable;
- break;
- case p2m_mmio_direct:
-- flags = IOMMUF_readable;
-- if ( !rangeset_contains_singleton(mmio_ro_ranges, mfn_x(mfn)) )
-- flags |= IOMMUF_writable;
-+ flags = p2m_access_to_iommu_flags(p2ma);
-+ if ( (flags & IOMMUF_writable) &&
-+ rangeset_contains_singleton(mmio_ro_ranges, mfn_x(mfn)) )
-+ flags &= ~IOMMUF_writable;
- break;
- default:
- flags = 0;
diff --git a/main/xen/xsa378-4.13-5.patch b/main/xen/xsa378-4.13-5.patch
deleted file mode 100644
index 86a6f21f42..0000000000
--- a/main/xen/xsa378-4.13-5.patch
+++ /dev/null
@@ -1,208 +0,0 @@
-From: Jan Beulich <jbeulich@suse.com>
-Subject: AMD/IOMMU: re-arrange/complete re-assignment handling
-
-Prior to the assignment step having completed successfully, devices
-should not get associated with their new owner. Hand the device to DomIO
-(perhaps temporarily), until after the de-assignment step has completed.
-
-De-assignment of a device (from other than Dom0) as well as failure of
-reassign_device() during assignment should result in unity mappings
-getting torn down. This in turn requires switching to a refcounted
-mapping approach, as was already used by VT-d for its RMRRs, to prevent
-unmapping a region used by multiple devices.
-
-This is CVE-2021-28696 / part of XSA-378.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Paul Durrant <paul@xen.org>
-
---- a/xen/drivers/passthrough/amd/iommu_map.c
-+++ b/xen/drivers/passthrough/amd/iommu_map.c
-@@ -430,38 +430,49 @@ int amd_iommu_flush_iotlb_all(struct dom
- return 0;
- }
-
--int amd_iommu_reserve_domain_unity_map(struct domain *domain,
-- paddr_t phys_addr,
-- unsigned long size, int iw, int ir)
-+int amd_iommu_reserve_domain_unity_map(struct domain *d,
-+ const struct ivrs_unity_map *map,
-+ unsigned int flag)
- {
-- unsigned long npages, i;
-- unsigned long gfn;
-- unsigned int flags = !!ir;
-- unsigned int flush_flags = 0;
-- int rt = 0;
--
-- if ( iw )
-- flags |= IOMMUF_writable;
--
-- npages = region_to_pages(phys_addr, size);
-- gfn = phys_addr >> PAGE_SHIFT;
-- for ( i = 0; i < npages; i++ )
-+ int rc;
-+
-+ if ( d == dom_io )
-+ return 0;
-+
-+ for ( rc = 0; !rc && map; map = map->next )
- {
-- unsigned long frame = gfn + i;
-+ p2m_access_t p2ma = p2m_access_n;
-+
-+ if ( map->read )
-+ p2ma |= p2m_access_r;
-+ if ( map->write )
-+ p2ma |= p2m_access_w;
-
-- rt = amd_iommu_map_page(domain, _dfn(frame), _mfn(frame), flags,
-- &flush_flags);
-- if ( rt != 0 )
-- break;
-+ rc = iommu_identity_mapping(d, p2ma, map->addr,
-+ map->addr + map->length - 1, flag);
- }
-
-- /* Use while-break to avoid compiler warning */
-- while ( flush_flags &&
-- amd_iommu_flush_iotlb_pages(domain, _dfn(gfn),
-- npages, flush_flags) )
-- break;
-+ return rc;
-+}
-+
-+int amd_iommu_reserve_domain_unity_unmap(struct domain *d,
-+ const struct ivrs_unity_map *map)
-+{
-+ int rc;
-+
-+ if ( d == dom_io )
-+ return 0;
-+
-+ for ( rc = 0; map; map = map->next )
-+ {
-+ int ret = iommu_identity_mapping(d, p2m_access_x, map->addr,
-+ map->addr + map->length - 1, 0);
-+
-+ if ( ret && ret != -ENOENT && !rc )
-+ rc = ret;
-+ }
-
-- return rt;
-+ return rc;
- }
-
- int __init amd_iommu_quarantine_init(struct domain *d)
---- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
-+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
-@@ -330,6 +330,7 @@ static int reassign_device(struct domain
- struct amd_iommu *iommu;
- int bdf, rc;
- struct domain_iommu *t = dom_iommu(target);
-+ const struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(pdev->seg);
-
- bdf = PCI_BDF2(pdev->bus, pdev->devfn);
- iommu = find_iommu_for_device(pdev->seg, bdf);
-@@ -344,10 +345,24 @@ static int reassign_device(struct domain
-
- amd_iommu_disable_domain_device(source, iommu, devfn, pdev);
-
-- if ( devfn == pdev->devfn )
-+ /*
-+ * If the device belongs to the hardware domain, and it has a unity mapping,
-+ * don't remove it from the hardware domain, because BIOS may reference that
-+ * mapping.
-+ */
-+ if ( !is_hardware_domain(source) )
-+ {
-+ rc = amd_iommu_reserve_domain_unity_unmap(
-+ source,
-+ ivrs_mappings[get_dma_requestor_id(pdev->seg, bdf)].unity_map);
-+ if ( rc )
-+ return rc;
-+ }
-+
-+ if ( devfn == pdev->devfn && pdev->domain != dom_io )
- {
-- list_move(&pdev->domain_list, &target->pdev_list);
-- pdev->domain = target;
-+ list_move(&pdev->domain_list, &dom_io->pdev_list);
-+ pdev->domain = dom_io;
- }
-
- rc = allocate_domain_resources(t);
-@@ -359,6 +374,12 @@ static int reassign_device(struct domain
- pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
- source->domain_id, target->domain_id);
-
-+ if ( devfn == pdev->devfn && pdev->domain != target )
-+ {
-+ list_move(&pdev->domain_list, &target->pdev_list);
-+ pdev->domain = target;
-+ }
-+
- return 0;
- }
-
-@@ -369,20 +390,28 @@ static int amd_iommu_assign_device(struc
- struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(pdev->seg);
- int bdf = PCI_BDF2(pdev->bus, devfn);
- int req_id = get_dma_requestor_id(pdev->seg, bdf);
-- const struct ivrs_unity_map *unity_map;
-+ int rc = amd_iommu_reserve_domain_unity_map(
-+ d, ivrs_mappings[req_id].unity_map, flag);
-+
-+ if ( !rc )
-+ rc = reassign_device(pdev->domain, d, devfn, pdev);
-
-- for ( unity_map = ivrs_mappings[req_id].unity_map; unity_map;
-- unity_map = unity_map->next )
-+ if ( rc && !is_hardware_domain(d) )
- {
-- int rc = amd_iommu_reserve_domain_unity_map(
-- d, unity_map->addr, unity_map->length,
-- unity_map->write, unity_map->read);
-+ int ret = amd_iommu_reserve_domain_unity_unmap(
-+ d, ivrs_mappings[req_id].unity_map);
-
-- if ( rc )
-- return rc;
-+ if ( ret )
-+ {
-+ printk(XENLOG_ERR "AMD-Vi: "
-+ "unity-unmap for %pd/%04x:%02x:%02x.%u failed (%d)\n",
-+ d, pdev->seg, pdev->bus,
-+ PCI_SLOT(devfn), PCI_FUNC(devfn), ret);
-+ domain_crash(d);
-+ }
- }
-
-- return reassign_device(pdev->domain, d, devfn, pdev);
-+ return rc;
- }
-
- static void deallocate_next_page_table(struct page_info *pg, int level)
-@@ -441,6 +470,7 @@ static void deallocate_iommu_page_tables
-
- static void amd_iommu_domain_destroy(struct domain *d)
- {
-+ iommu_identity_map_teardown(d);
- deallocate_iommu_page_tables(d);
- amd_iommu_flush_all_pages(d);
- }
---- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
-+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
-@@ -64,8 +64,10 @@ int __must_check amd_iommu_unmap_page(st
- unsigned int *flush_flags);
- int __must_check amd_iommu_alloc_root(struct domain_iommu *hd);
- int amd_iommu_reserve_domain_unity_map(struct domain *domain,
-- paddr_t phys_addr, unsigned long size,
-- int iw, int ir);
-+ const struct ivrs_unity_map *map,
-+ unsigned int flag);
-+int amd_iommu_reserve_domain_unity_unmap(struct domain *d,
-+ const struct ivrs_unity_map *map);
- int __must_check amd_iommu_flush_iotlb_pages(struct domain *d, dfn_t dfn,
- unsigned int page_count,
- unsigned int flush_flags);
diff --git a/main/xen/xsa378-4.13-6.patch b/main/xen/xsa378-4.13-6.patch
deleted file mode 100644
index 069a087f13..0000000000
--- a/main/xen/xsa378-4.13-6.patch
+++ /dev/null
@@ -1,411 +0,0 @@
-From: Jan Beulich <jbeulich@suse.com>
-Subject: AMD/IOMMU: re-arrange exclusion range and unity map recording
-
-The spec makes no provisions for OS behavior here to depend on the
-amount of RAM found on the system. While the spec may not sufficiently
-clearly distinguish both kinds of regions, they are surely meant to be
-separate things: Only regions with ACPI_IVMD_EXCLUSION_RANGE set should
-be candidates for putting in the exclusion range registers. (As there's
-only a single such pair of registers per IOMMU, secondary non-adjacent
-regions with the flag set already get converted to unity mapped
-regions.)
-
-First of all, drop the dependency on max_page. With commit b4f042236ae0
-("AMD/IOMMU: Cease using a dynamic height for the IOMMU pagetables") the
-use of it here was stale anyway; it was bogus already before, as it
-didn't account for max_page getting increased later on. Simply try an
-exclusion range registration first, and if it fails (for being
-unsuitable or non-mergeable), register a unity mapping range.
-
-With this various local variables become unnecessary and hence get
-dropped at the same time.
-
-With the max_page boundary dropped for using unity maps, the minimum
-page table tree height now needs both recording and enforcing in
-amd_iommu_domain_init(). Since we can't predict which devices may get
-assigned to a domain, our only option is to uniformly force at least
-that height for all domains, now that the height isn't dynamic anymore.
-
-Further don't make use of the exclusion range unless ACPI data says so.
-
-Note that exclusion range registration in
-register_range_for_all_devices() is on a best effort basis. Hence unity
-map entries also registered are redundant when the former succeeded, but
-they also do no harm. Improvements in this area can be done later imo.
-
-Also adjust types where suitable without touching extra lines.
-
-This is part of XSA-378.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Paul Durrant <paul@xen.org>
-
---- a/xen/drivers/passthrough/amd/iommu_acpi.c
-+++ b/xen/drivers/passthrough/amd/iommu_acpi.c
-@@ -118,12 +118,8 @@ static struct amd_iommu * __init find_io
- }
-
- static int __init reserve_iommu_exclusion_range(
-- struct amd_iommu *iommu, uint64_t base, uint64_t limit,
-- bool all, bool iw, bool ir)
-+ struct amd_iommu *iommu, paddr_t base, paddr_t limit, bool all)
- {
-- if ( !ir || !iw )
-- return -EPERM;
--
- /* need to extend exclusion range? */
- if ( iommu->exclusion_enable )
- {
-@@ -152,14 +148,18 @@ static int __init reserve_unity_map_for_
- {
- struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(seg);
- struct ivrs_unity_map *unity_map = ivrs_mappings[bdf].unity_map;
-+ int paging_mode = amd_iommu_get_paging_mode(PFN_UP(base + length));
-+
-+ if ( paging_mode < 0 )
-+ return paging_mode;
-
- /* Check for overlaps. */
- for ( ; unity_map; unity_map = unity_map->next )
- {
- /*
- * Exact matches are okay. This can in particular happen when
-- * register_exclusion_range_for_device() calls here twice for the
-- * same (s,b,d,f).
-+ * register_range_for_device() calls here twice for the same
-+ * (s,b,d,f).
- */
- if ( base == unity_map->addr && length == unity_map->length &&
- ir == unity_map->read && iw == unity_map->write )
-@@ -187,55 +187,52 @@ static int __init reserve_unity_map_for_
- unity_map->next = ivrs_mappings[bdf].unity_map;
- ivrs_mappings[bdf].unity_map = unity_map;
-
-+ if ( paging_mode > amd_iommu_min_paging_mode )
-+ amd_iommu_min_paging_mode = paging_mode;
-+
- return 0;
- }
-
--static int __init register_exclusion_range_for_all_devices(
-- unsigned long base, unsigned long limit, u8 iw, u8 ir)
-+static int __init register_range_for_all_devices(
-+ paddr_t base, paddr_t limit, bool iw, bool ir, bool exclusion)
- {
- int seg = 0; /* XXX */
-- unsigned long range_top, iommu_top, length;
- struct amd_iommu *iommu;
-- unsigned int bdf;
- int rc = 0;
-
- /* is part of exclusion range inside of IOMMU virtual address space? */
- /* note: 'limit' parameter is assumed to be page-aligned */
-- range_top = limit + PAGE_SIZE;
-- iommu_top = max_page * PAGE_SIZE;
-- if ( base < iommu_top )
-- {
-- if ( range_top > iommu_top )
-- range_top = iommu_top;
-- length = range_top - base;
-- /* reserve r/w unity-mapped page entries for devices */
-- /* note: these entries are part of the exclusion range */
-- for ( bdf = 0; !rc && bdf < ivrs_bdf_entries; bdf++ )
-- rc = reserve_unity_map_for_device(seg, bdf, base, length, iw, ir);
-- /* push 'base' just outside of virtual address space */
-- base = iommu_top;
-- }
-- /* register IOMMU exclusion range settings */
-- if ( !rc && limit >= iommu_top )
-+ if ( exclusion )
- {
- for_each_amd_iommu( iommu )
- {
-- rc = reserve_iommu_exclusion_range(iommu, base, limit,
-- true /* all */, iw, ir);
-- if ( rc )
-- break;
-+ int ret = reserve_iommu_exclusion_range(iommu, base, limit,
-+ true /* all */);
-+
-+ if ( ret && !rc )
-+ rc = ret;
- }
- }
-
-+ if ( !exclusion || rc )
-+ {
-+ paddr_t length = limit + PAGE_SIZE - base;
-+ unsigned int bdf;
-+
-+ /* reserve r/w unity-mapped page entries for devices */
-+ for ( bdf = rc = 0; !rc && bdf < ivrs_bdf_entries; bdf++ )
-+ rc = reserve_unity_map_for_device(seg, bdf, base, length, iw, ir);
-+ }
-+
- return rc;
- }
-
--static int __init register_exclusion_range_for_device(
-- u16 bdf, unsigned long base, unsigned long limit, u8 iw, u8 ir)
-+static int __init register_range_for_device(
-+ unsigned int bdf, paddr_t base, paddr_t limit,
-+ bool iw, bool ir, bool exclusion)
- {
- int seg = 0; /* XXX */
- struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(seg);
-- unsigned long range_top, iommu_top, length;
- struct amd_iommu *iommu;
- u16 req;
- int rc = 0;
-@@ -249,27 +246,19 @@ static int __init register_exclusion_ran
- req = ivrs_mappings[bdf].dte_requestor_id;
-
- /* note: 'limit' parameter is assumed to be page-aligned */
-- range_top = limit + PAGE_SIZE;
-- iommu_top = max_page * PAGE_SIZE;
-- if ( base < iommu_top )
-- {
-- if ( range_top > iommu_top )
-- range_top = iommu_top;
-- length = range_top - base;
-+ if ( exclusion )
-+ rc = reserve_iommu_exclusion_range(iommu, base, limit,
-+ false /* all */);
-+ if ( !exclusion || rc )
-+ {
-+ paddr_t length = limit + PAGE_SIZE - base;
-+
- /* reserve unity-mapped page entries for device */
-- /* note: these entries are part of the exclusion range */
- rc = reserve_unity_map_for_device(seg, bdf, base, length, iw, ir) ?:
- reserve_unity_map_for_device(seg, req, base, length, iw, ir);
--
-- /* push 'base' just outside of virtual address space */
-- base = iommu_top;
- }
--
-- /* register IOMMU exclusion range settings for device */
-- if ( !rc && limit >= iommu_top )
-+ else
- {
-- rc = reserve_iommu_exclusion_range(iommu, base, limit,
-- false /* all */, iw, ir);
- ivrs_mappings[bdf].dte_allow_exclusion = true;
- ivrs_mappings[req].dte_allow_exclusion = true;
- }
-@@ -277,53 +266,42 @@ static int __init register_exclusion_ran
- return rc;
- }
-
--static int __init register_exclusion_range_for_iommu_devices(
-- struct amd_iommu *iommu,
-- unsigned long base, unsigned long limit, u8 iw, u8 ir)
-+static int __init register_range_for_iommu_devices(
-+ struct amd_iommu *iommu, paddr_t base, paddr_t limit,
-+ bool iw, bool ir, bool exclusion)
- {
-- unsigned long range_top, iommu_top, length;
-+ /* note: 'limit' parameter is assumed to be page-aligned */
-+ paddr_t length = limit + PAGE_SIZE - base;
- unsigned int bdf;
- u16 req;
-- int rc = 0;
-+ int rc;
-
-- /* is part of exclusion range inside of IOMMU virtual address space? */
-- /* note: 'limit' parameter is assumed to be page-aligned */
-- range_top = limit + PAGE_SIZE;
-- iommu_top = max_page * PAGE_SIZE;
-- if ( base < iommu_top )
-- {
-- if ( range_top > iommu_top )
-- range_top = iommu_top;
-- length = range_top - base;
-- /* reserve r/w unity-mapped page entries for devices */
-- /* note: these entries are part of the exclusion range */
-- for ( bdf = 0; !rc && bdf < ivrs_bdf_entries; bdf++ )
-- {
-- if ( iommu == find_iommu_for_device(iommu->seg, bdf) )
-- {
-- req = get_ivrs_mappings(iommu->seg)[bdf].dte_requestor_id;
-- rc = reserve_unity_map_for_device(iommu->seg, bdf, base, length,
-- iw, ir) ?:
-- reserve_unity_map_for_device(iommu->seg, req, base, length,
-- iw, ir);
-- }
-- }
--
-- /* push 'base' just outside of virtual address space */
-- base = iommu_top;
-+ if ( exclusion )
-+ {
-+ rc = reserve_iommu_exclusion_range(iommu, base, limit, true /* all */);
-+ if ( !rc )
-+ return 0;
- }
-
-- /* register IOMMU exclusion range settings */
-- if ( !rc && limit >= iommu_top )
-- rc = reserve_iommu_exclusion_range(iommu, base, limit,
-- true /* all */, iw, ir);
-+ /* reserve unity-mapped page entries for devices */
-+ for ( bdf = rc = 0; !rc && bdf < ivrs_bdf_entries; bdf++ )
-+ {
-+ if ( iommu != find_iommu_for_device(iommu->seg, bdf) )
-+ continue;
-+
-+ req = get_ivrs_mappings(iommu->seg)[bdf].dte_requestor_id;
-+ rc = reserve_unity_map_for_device(iommu->seg, bdf, base, length,
-+ iw, ir) ?:
-+ reserve_unity_map_for_device(iommu->seg, req, base, length,
-+ iw, ir);
-+ }
-
- return rc;
- }
-
- static int __init parse_ivmd_device_select(
- const struct acpi_ivrs_memory *ivmd_block,
-- unsigned long base, unsigned long limit, u8 iw, u8 ir)
-+ paddr_t base, paddr_t limit, bool iw, bool ir, bool exclusion)
- {
- u16 bdf;
-
-@@ -334,12 +312,12 @@ static int __init parse_ivmd_device_sele
- return -ENODEV;
- }
-
-- return register_exclusion_range_for_device(bdf, base, limit, iw, ir);
-+ return register_range_for_device(bdf, base, limit, iw, ir, exclusion);
- }
-
- static int __init parse_ivmd_device_range(
- const struct acpi_ivrs_memory *ivmd_block,
-- unsigned long base, unsigned long limit, u8 iw, u8 ir)
-+ paddr_t base, paddr_t limit, bool iw, bool ir, bool exclusion)
- {
- unsigned int first_bdf, last_bdf, bdf;
- int error;
-@@ -361,15 +339,15 @@ static int __init parse_ivmd_device_rang
- }
-
- for ( bdf = first_bdf, error = 0; (bdf <= last_bdf) && !error; bdf++ )
-- error = register_exclusion_range_for_device(
-- bdf, base, limit, iw, ir);
-+ error = register_range_for_device(
-+ bdf, base, limit, iw, ir, exclusion);
-
- return error;
- }
-
- static int __init parse_ivmd_device_iommu(
- const struct acpi_ivrs_memory *ivmd_block,
-- unsigned long base, unsigned long limit, u8 iw, u8 ir)
-+ paddr_t base, paddr_t limit, bool iw, bool ir, bool exclusion)
- {
- int seg = 0; /* XXX */
- struct amd_iommu *iommu;
-@@ -384,14 +362,14 @@ static int __init parse_ivmd_device_iomm
- return -ENODEV;
- }
-
-- return register_exclusion_range_for_iommu_devices(
-- iommu, base, limit, iw, ir);
-+ return register_range_for_iommu_devices(
-+ iommu, base, limit, iw, ir, exclusion);
- }
-
- static int __init parse_ivmd_block(const struct acpi_ivrs_memory *ivmd_block)
- {
- unsigned long start_addr, mem_length, base, limit;
-- u8 iw, ir;
-+ bool iw = true, ir = true, exclusion = false;
-
- if ( ivmd_block->header.length < sizeof(*ivmd_block) )
- {
-@@ -408,13 +386,11 @@ static int __init parse_ivmd_block(const
- ivmd_block->header.type, start_addr, mem_length);
-
- if ( ivmd_block->header.flags & ACPI_IVMD_EXCLUSION_RANGE )
-- iw = ir = IOMMU_CONTROL_ENABLED;
-+ exclusion = true;
- else if ( ivmd_block->header.flags & ACPI_IVMD_UNITY )
- {
-- iw = ivmd_block->header.flags & ACPI_IVMD_READ ?
-- IOMMU_CONTROL_ENABLED : IOMMU_CONTROL_DISABLED;
-- ir = ivmd_block->header.flags & ACPI_IVMD_WRITE ?
-- IOMMU_CONTROL_ENABLED : IOMMU_CONTROL_DISABLED;
-+ iw = ivmd_block->header.flags & ACPI_IVMD_READ;
-+ ir = ivmd_block->header.flags & ACPI_IVMD_WRITE;
- }
- else
- {
-@@ -425,20 +401,20 @@ static int __init parse_ivmd_block(const
- switch( ivmd_block->header.type )
- {
- case ACPI_IVRS_TYPE_MEMORY_ALL:
-- return register_exclusion_range_for_all_devices(
-- base, limit, iw, ir);
-+ return register_range_for_all_devices(
-+ base, limit, iw, ir, exclusion);
-
- case ACPI_IVRS_TYPE_MEMORY_ONE:
-- return parse_ivmd_device_select(ivmd_block,
-- base, limit, iw, ir);
-+ return parse_ivmd_device_select(ivmd_block, base, limit,
-+ iw, ir, exclusion);
-
- case ACPI_IVRS_TYPE_MEMORY_RANGE:
-- return parse_ivmd_device_range(ivmd_block,
-- base, limit, iw, ir);
-+ return parse_ivmd_device_range(ivmd_block, base, limit,
-+ iw, ir, exclusion);
-
- case ACPI_IVRS_TYPE_MEMORY_IOMMU:
-- return parse_ivmd_device_iommu(ivmd_block,
-- base, limit, iw, ir);
-+ return parse_ivmd_device_iommu(ivmd_block, base, limit,
-+ iw, ir, exclusion);
-
- default:
- AMD_IOMMU_DEBUG("IVMD Error: Invalid Block Type!\n");
---- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
-+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
-@@ -234,6 +234,8 @@ static int __must_check allocate_domain_
- return rc;
- }
-
-+int __read_mostly amd_iommu_min_paging_mode = 1;
-+
- static int amd_iommu_domain_init(struct domain *d)
- {
- struct domain_iommu *hd = dom_iommu(d);
-@@ -245,11 +247,13 @@ static int amd_iommu_domain_init(struct
- * - HVM could in principle use 3 or 4 depending on how much guest
- * physical address space we give it, but this isn't known yet so use 4
- * unilaterally.
-+ * - Unity maps may require an even higher number.
- */
-- hd->arch.paging_mode = amd_iommu_get_paging_mode(
-- is_hvm_domain(d)
-- ? 1ul << (DEFAULT_DOMAIN_ADDRESS_WIDTH - PAGE_SHIFT)
-- : get_upper_mfn_bound() + 1);
-+ hd->arch.paging_mode = max(amd_iommu_get_paging_mode(
-+ is_hvm_domain(d)
-+ ? 1ul << (DEFAULT_DOMAIN_ADDRESS_WIDTH - PAGE_SHIFT)
-+ : get_upper_mfn_bound() + 1),
-+ amd_iommu_min_paging_mode);
-
- return 0;
- }
---- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
-+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
-@@ -140,6 +140,8 @@ extern struct hpet_sbdf {
- } init;
- } hpet_sbdf;
-
-+extern int amd_iommu_min_paging_mode;
-+
- extern void *shared_intremap_table;
- extern unsigned long *shared_intremap_inuse;
-
diff --git a/main/xen/xsa378-4.13-7.patch b/main/xen/xsa378-4.13-7.patch
deleted file mode 100644
index 57f466b21c..0000000000
--- a/main/xen/xsa378-4.13-7.patch
+++ /dev/null
@@ -1,88 +0,0 @@
-From: Jan Beulich <jbeulich@suse.com>
-Subject: x86/p2m: introduce p2m_is_special()
-
-Seeing the similarity of grant, foreign, and (subsequently) direct-MMIO
-handling, introduce a new P2M type group named "special" (as in "needing
-special accessors to create/destroy").
-
-Also use -EPERM instead of other error codes on the two domain_crash()
-paths touched.
-
-This is part of XSA-378.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Paul Durrant <paul@xen.org>
-
---- a/xen/arch/x86/mm/p2m.c
-+++ b/xen/arch/x86/mm/p2m.c
-@@ -807,7 +807,7 @@ p2m_remove_page(struct p2m_domain *p2m,
- for ( i = 0; i < (1UL << page_order); i++ )
- {
- p2m->get_entry(p2m, gfn_add(gfn, i), &t, &a, 0, NULL, NULL);
-- if ( !p2m_is_grant(t) && !p2m_is_shared(t) && !p2m_is_foreign(t) )
-+ if ( !p2m_is_special(t) && !p2m_is_shared(t) )
- set_gpfn_from_mfn(mfn+i, INVALID_M2P_ENTRY);
- }
- }
-@@ -934,13 +934,13 @@ guest_physmap_add_entry(struct domain *d
- &ot, &a, 0, NULL, NULL);
- ASSERT(!p2m_is_shared(ot));
- }
-- if ( p2m_is_grant(ot) || p2m_is_foreign(ot) )
-+ if ( p2m_is_special(ot) )
- {
-- /* Really shouldn't be unmapping grant/foreign maps this way */
-+ /* Don't permit unmapping grant/foreign this way. */
- domain_crash(d);
- p2m_unlock(p2m);
-
-- return -EINVAL;
-+ return -EPERM;
- }
- else if ( p2m_is_ram(ot) && !p2m_is_paged(ot) )
- {
-@@ -1034,8 +1034,7 @@ int p2m_change_type_one(struct domain *d
- struct p2m_domain *p2m = p2m_get_hostp2m(d);
- int rc;
-
-- BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt));
-- BUG_ON(p2m_is_foreign(ot) || p2m_is_foreign(nt));
-+ BUG_ON(p2m_is_special(ot) || p2m_is_special(nt));
-
- gfn_lock(p2m, gfn, 0);
-
-@@ -1282,11 +1281,11 @@ static int set_typed_p2m_entry(struct do
- gfn_unlock(p2m, gfn, order);
- return cur_order + 1;
- }
-- if ( p2m_is_grant(ot) || p2m_is_foreign(ot) )
-+ if ( p2m_is_special(ot) )
- {
- gfn_unlock(p2m, gfn, order);
- domain_crash(d);
-- return -ENOENT;
-+ return -EPERM;
- }
- else if ( p2m_is_ram(ot) )
- {
---- a/xen/include/asm-x86/p2m.h
-+++ b/xen/include/asm-x86/p2m.h
-@@ -141,6 +141,10 @@ typedef unsigned int p2m_query_t;
- | p2m_to_mask(p2m_ram_logdirty) )
- #define P2M_SHARED_TYPES (p2m_to_mask(p2m_ram_shared))
-
-+/* Types established/cleaned up via special accessors. */
-+#define P2M_SPECIAL_TYPES (P2M_GRANT_TYPES | \
-+ p2m_to_mask(p2m_map_foreign))
-+
- /* Valid types not necessarily associated with a (valid) MFN. */
- #define P2M_INVALID_MFN_TYPES (P2M_POD_TYPES \
- | p2m_to_mask(p2m_mmio_direct) \
-@@ -169,6 +173,7 @@ typedef unsigned int p2m_query_t;
- #define p2m_is_paged(_t) (p2m_to_mask(_t) & P2M_PAGED_TYPES)
- #define p2m_is_sharable(_t) (p2m_to_mask(_t) & P2M_SHARABLE_TYPES)
- #define p2m_is_shared(_t) (p2m_to_mask(_t) & P2M_SHARED_TYPES)
-+#define p2m_is_special(_t) (p2m_to_mask(_t) & P2M_SPECIAL_TYPES)
- #define p2m_is_broken(_t) (p2m_to_mask(_t) & P2M_BROKEN_TYPES)
- #define p2m_is_foreign(_t) (p2m_to_mask(_t) & p2m_to_mask(p2m_map_foreign))
-
diff --git a/main/xen/xsa378-4.13-8.patch b/main/xen/xsa378-4.13-8.patch
deleted file mode 100644
index d91dbc3b65..0000000000
--- a/main/xen/xsa378-4.13-8.patch
+++ /dev/null
@@ -1,157 +0,0 @@
-From: Jan Beulich <jbeulich@suse.com>
-Subject: x86/p2m: guard (in particular) identity mapping entries
-
-Such entries, created by set_identity_p2m_entry(), should only be
-destroyed by clear_identity_p2m_entry(). However, similarly, entries
-created by set_mmio_p2m_entry() should only be torn down by
-clear_mmio_p2m_entry(), so the logic gets based upon p2m_mmio_direct as
-the entry type (separation between "ordinary" and 1:1 mappings would
-require a further indicator to tell apart the two).
-
-As to the guest_remove_page() change, commit 48dfb297a20a ("x86/PVH:
-allow guest_remove_page to remove p2m_mmio_direct pages"), which
-introduced the call to clear_mmio_p2m_entry(), claimed this was done for
-hwdom only without this actually having been the case. However, this
-code shouldn't be there in the first place, as MMIO entries shouldn't be
-dropped this way. Avoid triggering the warning again that 48dfb297a20a
-silenced by an adjustment to xenmem_add_to_physmap_one() instead.
-
-Note that guest_physmap_mark_populate_on_demand() gets tightened beyond
-the immediate purpose of this change.
-
-Note also that I didn't inspect code which isn't security supported,
-e.g. sharing, paging, or altp2m.
-
-This is CVE-2021-28694 / part of XSA-378.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Paul Durrant <paul@xen.org>
-
---- a/xen/arch/x86/mm.c
-+++ b/xen/arch/x86/mm.c
-@@ -4770,7 +4770,9 @@ int xenmem_add_to_physmap_one(
-
- /* Remove previously mapped page if it was present. */
- prev_mfn = get_gfn(d, gfn_x(gpfn), &p2mt);
-- if ( mfn_valid(prev_mfn) )
-+ if ( p2mt == p2m_mmio_direct )
-+ rc = -EPERM;
-+ else if ( mfn_valid(prev_mfn) )
- {
- if ( is_xen_heap_mfn(prev_mfn) )
- /* Xen heap frames are simply unhooked from this phys slot. */
---- a/xen/arch/x86/mm/p2m.c
-+++ b/xen/arch/x86/mm/p2m.c
-@@ -796,7 +796,8 @@ p2m_remove_page(struct p2m_domain *p2m,
- &cur_order, NULL);
-
- if ( p2m_is_valid(t) &&
-- (!mfn_valid(_mfn(mfn)) || mfn + i != mfn_x(mfn_return)) )
-+ (!mfn_valid(_mfn(mfn)) || t == p2m_mmio_direct ||
-+ mfn + i != mfn_x(mfn_return)) )
- return -EILSEQ;
-
- i += (1UL << cur_order) - ((gfn_l + i) & ((1UL << cur_order) - 1));
-@@ -890,7 +891,7 @@ guest_physmap_add_entry(struct domain *d
- if ( p2m_is_foreign(t) )
- return -EINVAL;
-
-- if ( !mfn_valid(mfn) )
-+ if ( !mfn_valid(mfn) || t == p2m_mmio_direct )
- {
- ASSERT_UNREACHABLE();
- return -EINVAL;
-@@ -936,7 +937,7 @@ guest_physmap_add_entry(struct domain *d
- }
- if ( p2m_is_special(ot) )
- {
-- /* Don't permit unmapping grant/foreign this way. */
-+ /* Don't permit unmapping grant/foreign/direct-MMIO this way. */
- domain_crash(d);
- p2m_unlock(p2m);
-
-@@ -1385,8 +1386,8 @@ int set_identity_p2m_entry(struct domain
- * order+1 for caller to retry with order (guaranteed smaller than
- * the order value passed in)
- */
--int clear_mmio_p2m_entry(struct domain *d, unsigned long gfn_l, mfn_t mfn,
-- unsigned int order)
-+static int clear_mmio_p2m_entry(struct domain *d, unsigned long gfn_l,
-+ mfn_t mfn, unsigned int order)
- {
- int rc = -EINVAL;
- gfn_t gfn = _gfn(gfn_l);
---- a/xen/arch/x86/mm/p2m-pod.c
-+++ b/xen/arch/x86/mm/p2m-pod.c
-@@ -1295,17 +1295,17 @@ guest_physmap_mark_populate_on_demand(st
-
- p2m->get_entry(p2m, gfn_add(gfn, i), &ot, &a, 0, &cur_order, NULL);
- n = 1UL << min(order, cur_order);
-- if ( p2m_is_ram(ot) )
-+ if ( ot == p2m_populate_on_demand )
-+ {
-+ /* Count how many PoD entries we'll be replacing if successful */
-+ pod_count += n;
-+ }
-+ else if ( ot != p2m_invalid && ot != p2m_mmio_dm )
- {
- P2M_DEBUG("gfn_to_mfn returned type %d!\n", ot);
- rc = -EBUSY;
- goto out;
- }
-- else if ( ot == p2m_populate_on_demand )
-- {
-- /* Count how man PoD entries we'll be replacing if successful */
-- pod_count += n;
-- }
- }
-
- /* Now, actually do the two-way mapping */
---- a/xen/common/memory.c
-+++ b/xen/common/memory.c
-@@ -328,7 +328,7 @@ int guest_remove_page(struct domain *d,
- }
- if ( p2mt == p2m_mmio_direct )
- {
-- rc = clear_mmio_p2m_entry(d, gmfn, mfn, PAGE_ORDER_4K);
-+ rc = -EPERM;
- goto out_put_gfn;
- }
- #else
-@@ -1720,6 +1720,15 @@ int check_get_page_from_gfn(struct domai
- return -EAGAIN;
- }
- #endif
-+#ifdef CONFIG_X86
-+ if ( p2mt == p2m_mmio_direct )
-+ {
-+ if ( page )
-+ put_page(page);
-+
-+ return -EPERM;
-+ }
-+#endif
-
- if ( !page )
- return -EINVAL;
---- a/xen/include/asm-x86/p2m.h
-+++ b/xen/include/asm-x86/p2m.h
-@@ -143,7 +143,8 @@ typedef unsigned int p2m_query_t;
-
- /* Types established/cleaned up via special accessors. */
- #define P2M_SPECIAL_TYPES (P2M_GRANT_TYPES | \
-- p2m_to_mask(p2m_map_foreign))
-+ p2m_to_mask(p2m_map_foreign) | \
-+ p2m_to_mask(p2m_mmio_direct))
-
- /* Valid types not necessarily associated with a (valid) MFN. */
- #define P2M_INVALID_MFN_TYPES (P2M_POD_TYPES \
-@@ -649,8 +650,6 @@ int set_foreign_p2m_entry(struct domain
- /* Set mmio addresses in the p2m table (for pass-through) */
- int set_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
- unsigned int order, p2m_access_t access);
--int clear_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
-- unsigned int order);
-
- /* Set identity addresses in the p2m table (for pass-through) */
- int set_identity_p2m_entry(struct domain *d, unsigned long gfn,
diff --git a/main/xen/xsa379-4.14.patch b/main/xen/xsa379-4.14.patch
deleted file mode 100644
index 25894b3ba7..0000000000
--- a/main/xen/xsa379-4.14.patch
+++ /dev/null
@@ -1,77 +0,0 @@
-From: Jan Beulich <jbeulich@suse.com>
-Subject: x86/mm: widen locked region in xenmem_add_to_physmap_one()
-
-For pages which can be made part of the P2M by the guest, but which can
-also later be de-allocated (grant table v2 status pages being the
-present example), it is imperative that they be mapped at no more than a
-single GFN. We therefore need to make sure that of two parallel
-XENMAPSPACE_grant_table requests for the same status page one completes
-before the second checks at which other GFN the underlying MFN is
-presently mapped.
-
-Push down the respective put_gfn(). This leverages that gfn_lock()
-really aliases p2m_lock(), but the function makes this assumption
-already anyway: In the XENMAPSPACE_gmfn case lock nesting constraints
-for both involved GFNs would otherwise need to be enforced to avoid ABBA
-deadlocks.
-
-This is CVE-2021-28697 / XSA-379.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Julien Grall <jgrall@amazon.com>
-
---- a/xen/arch/x86/mm.c
-+++ b/xen/arch/x86/mm.c
-@@ -4650,8 +4650,20 @@ int xenmem_add_to_physmap_one(
- goto put_both;
- }
-
-- /* Remove previously mapped page if it was present. */
-+ /*
-+ * Note that we're (ab)using GFN locking (to really be locking of the
-+ * entire P2M) here in (at least) two ways: Finer grained locking would
-+ * expose lock order violations in the XENMAPSPACE_gmfn case (due to the
-+ * earlier get_gfn_unshare() above). Plus at the very least for the grant
-+ * table v2 status page case we need to guarantee that the same page can
-+ * only appear at a single GFN. While this is a property we want in
-+ * general, for pages which can subsequently be freed this imperative:
-+ * Upon freeing we wouldn't be able to find other mappings in the P2M
-+ * (unless we did a brute force search).
-+ */
- prev_mfn = get_gfn(d, gfn_x(gpfn), &p2mt);
-+
-+ /* Remove previously mapped page if it was present. */
- if ( p2mt == p2m_mmio_direct )
- rc = -EPERM;
- else if ( mfn_valid(prev_mfn) )
-@@ -4663,27 +4675,21 @@ int xenmem_add_to_physmap_one(
- /* Normal domain memory is freed, to avoid leaking memory. */
- rc = guest_remove_page(d, gfn_x(gpfn));
- }
-- /* In the XENMAPSPACE_gmfn case we still hold a ref on the old page. */
-- put_gfn(d, gfn_x(gpfn));
--
-- if ( rc )
-- goto put_both;
-
- /* Unmap from old location, if any. */
- old_gpfn = get_gpfn_from_mfn(mfn_x(mfn));
- ASSERT(!SHARED_M2P(old_gpfn));
- if ( space == XENMAPSPACE_gmfn && old_gpfn != gfn )
-- {
- rc = -EXDEV;
-- goto put_both;
-- }
-- if ( old_gpfn != INVALID_M2P_ENTRY )
-+ else if ( !rc && old_gpfn != INVALID_M2P_ENTRY )
- rc = guest_physmap_remove_page(d, _gfn(old_gpfn), mfn, PAGE_ORDER_4K);
-
- /* Map at new location. */
- if ( !rc )
- rc = guest_physmap_add_page(d, gpfn, mfn, PAGE_ORDER_4K);
-
-+ put_gfn(d, gfn_x(gpfn));
-+
- put_both:
- /*
- * In the XENMAPSPACE_gmfn case, we took a ref of the gfn at the top.
diff --git a/main/xen/xsa380-4.13-1.patch b/main/xen/xsa380-4.13-1.patch
deleted file mode 100644
index eb2f9e7dcf..0000000000
--- a/main/xen/xsa380-4.13-1.patch
+++ /dev/null
@@ -1,148 +0,0 @@
-From: Jan Beulich <jbeulich@suse.com>
-Subject: gnttab: add preemption check to gnttab_release_mappings()
-
-A guest may die with many grant mappings still in place, or simply with
-a large maptrack table. Iterating through this may take more time than
-is reasonable without intermediate preemption (to run softirqs and
-perhaps the scheduler).
-
-Move the invocation of the function to the section where other
-restartable functions get invoked, and have the function itself check
-for preemption every once in a while. Have it iterate the table
-backwards, such that decreasing the maptrack limit is all it takes to
-convey restart information.
-
-In domain_teardown() introduce PROG_none such that inserting at the
-front will be easier going forward.
-
-This is part of CVE-2021-28698 / XSA-380.
-
-Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Julien Grall <jgrall@amazon.com>
-
---- a/xen/common/domain.c
-+++ b/xen/common/domain.c
-@@ -776,11 +776,13 @@ int domain_kill(struct domain *d)
- return domain_kill(d);
- d->is_dying = DOMDYING_dying;
- argo_destroy(d);
-- gnttab_release_mappings(d);
- vnuma_destroy(d->vnuma);
- domain_set_outstanding_pages(d, 0);
- /* fallthrough */
- case DOMDYING_dying:
-+ rc = gnttab_release_mappings(d);
-+ if ( rc )
-+ break;
- rc = evtchn_destroy(d);
- if ( rc )
- break;
---- a/xen/common/grant_table.c
-+++ b/xen/common/grant_table.c
-@@ -63,7 +63,13 @@ struct grant_table {
- unsigned int nr_grant_frames;
- /* Number of grant status frames shared with guest (for version 2) */
- unsigned int nr_status_frames;
-- /* Number of available maptrack entries. */
-+ /*
-+ * Number of available maptrack entries. For cleanup purposes it is
-+ * important to realize that this field and @maptrack further down will
-+ * only ever be accessed by the local domain. Thus it is okay to clean
-+ * up early, and to shrink the limit for the purpose of tracking cleanup
-+ * progress.
-+ */
- unsigned int maptrack_limit;
- /* Shared grant table (see include/public/grant_table.h). */
- union {
-@@ -3675,9 +3681,7 @@ do_grant_table_op(
- #include "compat/grant_table.c"
- #endif
-
--void
--gnttab_release_mappings(
-- struct domain *d)
-+int gnttab_release_mappings(struct domain *d)
- {
- struct grant_table *gt = d->grant_table, *rgt;
- struct grant_mapping *map;
-@@ -3691,10 +3695,34 @@ gnttab_release_mappings(
-
- BUG_ON(!d->is_dying);
-
-- for ( handle = 0; handle < gt->maptrack_limit; handle++ )
-+ if ( !gt || !gt->maptrack )
-+ return 0;
-+
-+ for ( handle = gt->maptrack_limit; handle; )
- {
- unsigned int clear_flags = 0;
-
-+ /*
-+ * Deal with full pages such that their freeing (in the body of the
-+ * if()) remains simple.
-+ */
-+ if ( handle < gt->maptrack_limit && !(handle % MAPTRACK_PER_PAGE) )
-+ {
-+ /*
-+ * Changing maptrack_limit alters nr_maptrack_frames()'es return
-+ * value. Free the then excess trailing page right here, rather
-+ * than leaving it to grant_table_destroy() (and in turn requiring
-+ * to leave gt->maptrack_limit unaltered).
-+ */
-+ gt->maptrack_limit = handle;
-+ FREE_XENHEAP_PAGE(gt->maptrack[nr_maptrack_frames(gt)]);
-+
-+ if ( hypercall_preempt_check() )
-+ return -ERESTART;
-+ }
-+
-+ --handle;
-+
- map = &maptrack_entry(gt, handle);
- if ( !(map->flags & (GNTMAP_device_map|GNTMAP_host_map)) )
- continue;
-@@ -3785,6 +3813,11 @@ gnttab_release_mappings(
-
- map->flags = 0;
- }
-+
-+ gt->maptrack_limit = 0;
-+ FREE_XENHEAP_PAGE(gt->maptrack[0]);
-+
-+ return 0;
- }
-
- void grant_table_warn_active_grants(struct domain *d)
-@@ -3848,8 +3881,7 @@ grant_table_destroy(
- free_xenheap_page(t->shared_raw[i]);
- xfree(t->shared_raw);
-
-- for ( i = 0; i < nr_maptrack_frames(t); i++ )
-- free_xenheap_page(t->maptrack[i]);
-+ ASSERT(!t->maptrack_limit);
- vfree(t->maptrack);
-
- for ( i = 0; i < nr_active_grant_frames(t); i++ )
---- a/xen/include/xen/grant_table.h
-+++ b/xen/include/xen/grant_table.h
-@@ -47,9 +47,7 @@ void grant_table_init_vcpu(struct vcpu *
- void grant_table_warn_active_grants(struct domain *d);
-
- /* Domain death release of granted mappings of other domains' memory. */
--void
--gnttab_release_mappings(
-- struct domain *d);
-+int gnttab_release_mappings(struct domain *d);
-
- int mem_sharing_gref_to_gfn(struct grant_table *gt, grant_ref_t ref,
- gfn_t *gfn, uint16_t *status);
-@@ -78,7 +76,7 @@ static inline void grant_table_init_vcpu
-
- static inline void grant_table_warn_active_grants(struct domain *d) {}
-
--static inline void gnttab_release_mappings(struct domain *d) {}
-+static inline int gnttab_release_mappings(struct domain *d) { return 0; }
-
- static inline int mem_sharing_gref_to_gfn(struct grant_table *gt,
- grant_ref_t ref,
diff --git a/main/xen/xsa380-4.13-2.patch b/main/xen/xsa380-4.13-2.patch
deleted file mode 100644
index 36f2c6372a..0000000000
--- a/main/xen/xsa380-4.13-2.patch
+++ /dev/null
@@ -1,383 +0,0 @@
-From: Jan Beulich <jbeulich@suse.com>
-Subject: gnttab: replace mapkind()
-
-mapkind() doesn't scale very well with larger maptrack entry counts,
-using a brute force linear search through all entries, with the only
-option of an early loop exit if a matching writable entry was found.
-Introduce a radix tree alongside the main maptrack table, thus
-allowing much faster MFN-based lookup. To avoid the need to actually
-allocate space for the individual nodes, encode the two counters in the
-node pointers themselves, thus limiting the number of permitted
-simultaneous r/o and r/w mappings of the same MFN to 2³¹-1 (64-bit) /
-2¹⁵-1 (32-bit) each.
-
-To avoid enforcing an unnecessarily low bound on the number of
-simultaneous mappings of a single MFN, introduce
-radix_tree_{ulong_to_ptr,ptr_to_ulong} paralleling
-radix_tree_{int_to_ptr,ptr_to_int}.
-
-As a consequence locking changes are also applicable: With there no
-longer being any inspection of the remote domain's active entries,
-there's also no need anymore to hold the remote domain's grant table
-lock. And since we're no longer iterating over the local domain's map
-track table, the lock in map_grant_ref() can also be dropped before the
-new maptrack entry actually gets populated.
-
-As a nice side effect this also reduces the number of IOMMU operations
-in unmap_common(): Previously we would have "established" a readable
-mapping whenever we didn't find a writable entry anymore (yet, of
-course, at least one readable one). But we only need to do this if we
-actually dropped the last writable entry, not if there were none already
-before.
-
-This is part of CVE-2021-28698 / XSA-380.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Julien Grall <jgrall@amazon.com>
-
---- a/xen/common/grant_table.c
-+++ b/xen/common/grant_table.c
-@@ -36,6 +36,7 @@
- #include <xen/iommu.h>
- #include <xen/paging.h>
- #include <xen/keyhandler.h>
-+#include <xen/radix-tree.h>
- #include <xen/vmap.h>
- #include <xen/nospec.h>
- #include <xsm/xsm.h>
-@@ -81,8 +82,13 @@ struct grant_table {
- grant_status_t **status;
- /* Active grant table. */
- struct active_grant_entry **active;
-- /* Mapping tracking table per vcpu. */
-+ /* Handle-indexed tracking table of mappings. */
- struct grant_mapping **maptrack;
-+ /*
-+ * MFN-indexed tracking tree of mappings, if needed. Note that this is
-+ * protected by @lock, not @maptrack_lock.
-+ */
-+ struct radix_tree_root maptrack_tree;
-
- /* Domain to which this struct grant_table belongs. */
- const struct domain *domain;
-@@ -460,34 +466,6 @@ static int get_paged_frame(unsigned long
- return GNTST_okay;
- }
-
--static inline void
--double_gt_lock(struct grant_table *lgt, struct grant_table *rgt)
--{
-- /*
-- * See mapkind() for why the write lock is also required for the
-- * remote domain.
-- */
-- if ( lgt < rgt )
-- {
-- grant_write_lock(lgt);
-- grant_write_lock(rgt);
-- }
-- else
-- {
-- if ( lgt != rgt )
-- grant_write_lock(rgt);
-- grant_write_lock(lgt);
-- }
--}
--
--static inline void
--double_gt_unlock(struct grant_table *lgt, struct grant_table *rgt)
--{
-- grant_write_unlock(lgt);
-- if ( lgt != rgt )
-- grant_write_unlock(rgt);
--}
--
- #define INVALID_MAPTRACK_HANDLE UINT_MAX
-
- static inline grant_handle_t
-@@ -907,41 +885,17 @@ static struct active_grant_entry *grant_
- return ERR_PTR(-EINVAL);
- }
-
--#define MAPKIND_READ 1
--#define MAPKIND_WRITE 2
--static unsigned int mapkind(
-- struct grant_table *lgt, const struct domain *rd, mfn_t mfn)
--{
-- struct grant_mapping *map;
-- grant_handle_t handle, limit = lgt->maptrack_limit;
-- unsigned int kind = 0;
--
-- /*
-- * Must have the local domain's grant table write lock when
-- * iterating over its maptrack entries.
-- */
-- ASSERT(percpu_rw_is_write_locked(&lgt->lock));
-- /*
-- * Must have the remote domain's grant table write lock while
-- * counting its active entries.
-- */
-- ASSERT(percpu_rw_is_write_locked(&rd->grant_table->lock));
--
-- smp_rmb();
--
-- for ( handle = 0; !(kind & MAPKIND_WRITE) && handle < limit; handle++ )
-- {
-- map = &maptrack_entry(lgt, handle);
-- if ( !(map->flags & (GNTMAP_device_map|GNTMAP_host_map)) ||
-- map->domid != rd->domain_id )
-- continue;
-- if ( mfn_eq(_active_entry(rd->grant_table, map->ref).mfn, mfn) )
-- kind |= map->flags & GNTMAP_readonly ?
-- MAPKIND_READ : MAPKIND_WRITE;
-- }
--
-- return kind;
--}
-+union maptrack_node {
-+ struct {
-+ /* Radix tree slot pointers use two of the bits. */
-+#ifdef __BIG_ENDIAN_BITFIELD
-+ unsigned long : 2;
-+#endif
-+ unsigned long rd : BITS_PER_LONG / 2 - 1;
-+ unsigned long wr : BITS_PER_LONG / 2 - 1;
-+ } cnt;
-+ unsigned long raw;
-+};
-
- static void
- map_grant_ref(
-@@ -961,7 +915,6 @@ map_grant_ref(
- struct grant_mapping *mt;
- grant_entry_header_t *shah;
- uint16_t *status;
-- bool_t need_iommu;
-
- led = current;
- ld = led->domain;
-@@ -1181,31 +1134,75 @@ map_grant_ref(
- * as mem-sharing and IOMMU use are incompatible). The dom_io case would
- * need checking separately if we compared against owner here.
- */
-- need_iommu = ld != rd && gnttab_need_iommu_mapping(ld);
-- if ( need_iommu )
-+ if ( ld != rd && gnttab_need_iommu_mapping(ld) )
- {
-+ union maptrack_node node = {
-+ .cnt.rd = !!(op->flags & GNTMAP_readonly),
-+ .cnt.wr = !(op->flags & GNTMAP_readonly),
-+ };
-+ int err;
-+ void **slot = NULL;
- unsigned int kind;
-
-- double_gt_lock(lgt, rgt);
-+ grant_write_lock(lgt);
-+
-+ err = radix_tree_insert(&lgt->maptrack_tree, mfn_x(mfn),
-+ radix_tree_ulong_to_ptr(node.raw));
-+ if ( err == -EEXIST )
-+ {
-+ slot = radix_tree_lookup_slot(&lgt->maptrack_tree, mfn_x(mfn));
-+ if ( likely(slot) )
-+ {
-+ node.raw = radix_tree_ptr_to_ulong(*slot);
-+ err = -EBUSY;
-+
-+ /* Update node only when refcount doesn't overflow. */
-+ if ( op->flags & GNTMAP_readonly ? ++node.cnt.rd
-+ : ++node.cnt.wr )
-+ {
-+ radix_tree_replace_slot(slot,
-+ radix_tree_ulong_to_ptr(node.raw));
-+ err = 0;
-+ }
-+ }
-+ else
-+ ASSERT_UNREACHABLE();
-+ }
-
- /*
- * We're not translated, so we know that dfns and mfns are
- * the same things, so the IOMMU entry is always 1-to-1.
- */
-- kind = mapkind(lgt, rd, mfn);
-- if ( !(op->flags & GNTMAP_readonly) &&
-- !(kind & MAPKIND_WRITE) )
-+ if ( !(op->flags & GNTMAP_readonly) && node.cnt.wr == 1 )
- kind = IOMMUF_readable | IOMMUF_writable;
-- else if ( !kind )
-+ else if ( (op->flags & GNTMAP_readonly) &&
-+ node.cnt.rd == 1 && !node.cnt.wr )
- kind = IOMMUF_readable;
- else
- kind = 0;
-- if ( kind && iommu_legacy_map(ld, _dfn(mfn_x(mfn)), mfn, 0, kind) )
-+ if ( err ||
-+ (kind && iommu_legacy_map(ld, _dfn(mfn_x(mfn)), mfn, 0, kind)) )
- {
-- double_gt_unlock(lgt, rgt);
-+ if ( !err )
-+ {
-+ if ( slot )
-+ {
-+ op->flags & GNTMAP_readonly ? node.cnt.rd--
-+ : node.cnt.wr--;
-+ radix_tree_replace_slot(slot,
-+ radix_tree_ulong_to_ptr(node.raw));
-+ }
-+ else
-+ radix_tree_delete(&lgt->maptrack_tree, mfn_x(mfn));
-+ }
-+
- rc = GNTST_general_error;
-- goto undo_out;
- }
-+
-+ grant_write_unlock(lgt);
-+
-+ if ( rc != GNTST_okay )
-+ goto undo_out;
- }
-
- TRACE_1D(TRC_MEM_PAGE_GRANT_MAP, op->dom);
-@@ -1213,10 +1210,6 @@ map_grant_ref(
- /*
- * All maptrack entry users check mt->flags first before using the
- * other fields so just ensure the flags field is stored last.
-- *
-- * However, if gnttab_need_iommu_mapping() then this would race
-- * with a concurrent mapkind() call (on an unmap, for example)
-- * and a lock is required.
- */
- mt = &maptrack_entry(lgt, handle);
- mt->domid = op->dom;
-@@ -1224,9 +1217,6 @@ map_grant_ref(
- smp_wmb();
- write_atomic(&mt->flags, op->flags);
-
-- if ( need_iommu )
-- double_gt_unlock(lgt, rgt);
--
- op->dev_bus_addr = mfn_to_maddr(mfn);
- op->handle = handle;
- op->status = GNTST_okay;
-@@ -1448,19 +1438,34 @@ unmap_common(
- /* See the respective comment in map_grant_ref(). */
- if ( rc == GNTST_okay && ld != rd && gnttab_need_iommu_mapping(ld) )
- {
-- unsigned int kind;
-+ void **slot;
-+ union maptrack_node node;
- int err = 0;
-
-- double_gt_lock(lgt, rgt);
-+ grant_write_lock(lgt);
-+ slot = radix_tree_lookup_slot(&lgt->maptrack_tree, mfn_x(op->mfn));
-+ node.raw = likely(slot) ? radix_tree_ptr_to_ulong(*slot) : 0;
-+
-+ /* Refcount must not underflow. */
-+ if ( !(flags & GNTMAP_readonly ? node.cnt.rd--
-+ : node.cnt.wr--) )
-+ BUG();
-
-- kind = mapkind(lgt, rd, op->mfn);
-- if ( !kind )
-+ if ( !node.raw )
- err = iommu_legacy_unmap(ld, _dfn(mfn_x(op->mfn)), 0);
-- else if ( !(kind & MAPKIND_WRITE) )
-+ else if ( !(flags & GNTMAP_readonly) && !node.cnt.wr )
- err = iommu_legacy_map(ld, _dfn(mfn_x(op->mfn)), op->mfn, 0,
- IOMMUF_readable);
-
-- double_gt_unlock(lgt, rgt);
-+ if ( err )
-+ ;
-+ else if ( !node.raw )
-+ radix_tree_delete(&lgt->maptrack_tree, mfn_x(op->mfn));
-+ else
-+ radix_tree_replace_slot(slot,
-+ radix_tree_ulong_to_ptr(node.raw));
-+
-+ grant_write_unlock(lgt);
-
- if ( err )
- rc = GNTST_general_error;
-@@ -1918,6 +1923,8 @@ int grant_table_init(struct domain *d, i
- gt->maptrack = vzalloc(gt->max_maptrack_frames * sizeof(*gt->maptrack));
- if ( gt->maptrack == NULL )
- goto out;
-+
-+ radix_tree_init(&gt->maptrack_tree);
- }
-
- /* Shared grant table. */
-@@ -3701,6 +3708,7 @@ int gnttab_release_mappings(struct domai
- for ( handle = gt->maptrack_limit; handle; )
- {
- unsigned int clear_flags = 0;
-+ mfn_t mfn;
-
- /*
- * Deal with full pages such that their freeing (in the body of the
-@@ -3806,17 +3814,31 @@ int gnttab_release_mappings(struct domai
- if ( clear_flags )
- gnttab_clear_flags(rd, clear_flags, status);
-
-+ mfn = act->mfn;
-+
- active_entry_release(act);
- grant_read_unlock(rgt);
-
- rcu_unlock_domain(rd);
-
- map->flags = 0;
-+
-+ /*
-+ * This is excessive in that a single such call would suffice per
-+ * mapped MFN (or none at all, if no entry was ever inserted). But it
-+ * should be the common case for an MFN to be mapped just once, and
-+ * this way we don't need to further maintain the counters. We also
-+ * don't want to leave cleaning up of the tree as a whole to the end
-+ * of the function, as this could take quite some time.
-+ */
-+ radix_tree_delete(&gt->maptrack_tree, mfn_x(mfn));
- }
-
- gt->maptrack_limit = 0;
- FREE_XENHEAP_PAGE(gt->maptrack[0]);
-
-+ radix_tree_destroy(&gt->maptrack_tree, NULL);
-+
- return 0;
- }
-
---- a/xen/include/xen/radix-tree.h
-+++ b/xen/include/xen/radix-tree.h
-@@ -190,6 +190,25 @@ static inline int radix_tree_ptr_to_int(
- return (int)((long)ptr >> 2);
- }
-
-+/**
-+ * radix_tree_{ulong_to_ptr,ptr_to_ulong}:
-+ *
-+ * Same for unsigned long values. Beware though that only BITS_PER_LONG-2
-+ * bits are actually usable for the value.
-+ */
-+static inline void *radix_tree_ulong_to_ptr(unsigned long val)
-+{
-+ unsigned long ptr = (val << 2) | 0x2;
-+ ASSERT((ptr >> 2) == val);
-+ return (void *)ptr;
-+}
-+
-+static inline unsigned long radix_tree_ptr_to_ulong(void *ptr)
-+{
-+ ASSERT(((unsigned long)ptr & 0x3) == 0x2);
-+ return (unsigned long)ptr >> 2;
-+}
-+
- int radix_tree_insert(struct radix_tree_root *, unsigned long, void *);
- void *radix_tree_lookup(struct radix_tree_root *, unsigned long);
- void **radix_tree_lookup_slot(struct radix_tree_root *, unsigned long);
diff --git a/main/xen/xsa382.patch b/main/xen/xsa382.patch
deleted file mode 100644
index 936c2ded9b..0000000000
--- a/main/xen/xsa382.patch
+++ /dev/null
@@ -1,34 +0,0 @@
-From: Jan Beulich <jbeulich@suse.com>
-Subject: gnttab: fix array capacity check in gnttab_get_status_frames()
-
-The number of grant frames is of no interest here; converting the passed
-in op.nr_frames this way means we allow for 8 times as many GFNs to be
-written as actually fit in the array. We would corrupt xlat areas of
-higher vCPU-s (after having faulted many times while trying to write to
-the guard pages between any two areas) for 32-bit PV guests. For HVM
-guests we'd simply crash as soon as we hit the first guard page, as
-accesses to the xlat area are simply memcpy() there.
-
-This is CVE-2021-28699 / XSA-382.
-
-Fixes: 18b1be5e324b ("gnttab: make resource limits per domain")
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-
---- a/xen/common/grant_table.c
-+++ b/xen/common/grant_table.c
-@@ -3243,12 +3243,11 @@ gnttab_get_status_frames(XEN_GUEST_HANDL
- goto unlock;
- }
-
-- if ( unlikely(limit_max < grant_to_status_frames(op.nr_frames)) )
-+ if ( unlikely(limit_max < op.nr_frames) )
- {
- gdprintk(XENLOG_WARNING,
-- "grant_to_status_frames(%u) for d%d is too large (%u,%u)\n",
-- op.nr_frames, d->domain_id,
-- grant_to_status_frames(op.nr_frames), limit_max);
-+ "nr_status_frames for %pd is too large (%u,%u)\n",
-+ d, op.nr_frames, limit_max);
- op.status = GNTST_general_error;
- goto unlock;
- }
diff --git a/main/xen/xsa383.patch b/main/xen/xsa383.patch
deleted file mode 100644
index 9ab5eb32bb..0000000000
--- a/main/xen/xsa383.patch
+++ /dev/null
@@ -1,55 +0,0 @@
-From: Julien Grall <jgrall@amazon.com>
-Date: Sat, 3 Jul 2021 14:03:36 +0100
-Subject: [PATCH] xen/arm: Restrict the amount of memory that dom0less domU and
- dom0 can allocate
-
-Currently, both dom0less domUs and dom0 can allocate an "unlimited"
-amount of memory because d->max_pages is set to ~0U.
-
-In particular, the former are meant to be unprivileged. Therefore the
-memory they could allocate should be bounded. As the domain are not yet
-officially aware of Xen (we don't expose advertise it in the DT, yet
-the hypercalls are accessible), they should not need to allocate more
-than the initial amount. So cap set d->max_pages directly the amount of
-memory we are meant to allocate.
-
-Take the opportunity to also restrict the memory for dom0 as the
-domain is direct mapped (e.g. MFN == GFN) and therefore cannot
-allocate outside of the pre-allocated region.
-
-This is CVE-2021-28700 / XSA-383.
-
-Reported-by: Julien Grall <jgrall@amazon.com>
-Signed-off-by: Julien Grall <jgrall@amazon.com>
-Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
-Tested-by: Stefano Stabellini <sstabellini@kernel.org>
----
- xen/arch/arm/domain_build.c | 5 +++--
- 1 file changed, 3 insertions(+), 2 deletions(-)
-
-diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
-index 6c86d527810f..206038d1c022 100644
---- a/xen/arch/arm/domain_build.c
-+++ b/xen/arch/arm/domain_build.c
-@@ -2440,7 +2440,8 @@ static int __init construct_domU(struct domain *d,
-
- if ( vcpu_create(d, 0) == NULL )
- return -ENOMEM;
-- d->max_pages = ~0U;
-+
-+ d->max_pages = ((paddr_t)mem * SZ_1K) >> PAGE_SHIFT;
-
- kinfo.d = d;
-
-@@ -2546,7 +2547,7 @@ static int __init construct_dom0(struct domain *d)
-
- iommu_hwdom_init(d);
-
-- d->max_pages = ~0U;
-+ d->max_pages = dom0_mem >> PAGE_SHIFT;
-
- kinfo.unassigned_mem = dom0_mem;
- kinfo.d = d;
---
-2.17.1
-
diff --git a/main/xen/xsa384-4.14.patch b/main/xen/xsa384-4.14.patch
deleted file mode 100644
index 9efe1b063e..0000000000
--- a/main/xen/xsa384-4.14.patch
+++ /dev/null
@@ -1,79 +0,0 @@
-From: Jan Beulich <jbeulich@suse.com>
-Subject: gnttab: deal with status frame mapping race
-
-Once gnttab_map_frame() drops the grant table lock, the MFN it reports
-back to its caller is free to other manipulation. In particular
-gnttab_unpopulate_status_frames() might free it, by a racing request on
-another CPU, thus resulting in a reference to a deallocated page getting
-added to a domain's P2M.
-
-Obtain a page reference in gnttab_map_frame() to prevent freeing of the
-page until xenmem_add_to_physmap_one() has actually completed its acting
-on the page. Do so uniformly, even if only strictly required for v2
-status pages, to avoid extra conditionals (which then would all need to
-be kept in sync going forward).
-
-This is CVE-2021-28701 / XSA-384.
-
-Reported-by: Julien Grall <jgrall@amazon.com>
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Julien Grall <jgrall@amazon.com>
-
---- a/xen/arch/arm/mm.c
-+++ b/xen/arch/arm/mm.c
-@@ -1423,6 +1423,8 @@ int xenmem_add_to_physmap_one(
- if ( rc )
- return rc;
-
-+ /* Need to take care of the reference obtained in gnttab_map_frame(). */
-+ page = mfn_to_page(mfn);
- t = p2m_ram_rw;
-
- break;
-@@ -1490,9 +1492,12 @@ int xenmem_add_to_physmap_one(
- /* Map at new location. */
- rc = guest_physmap_add_entry(d, gfn, mfn, 0, t);
-
-- /* If we fail to add the mapping, we need to drop the reference we
-- * took earlier on foreign pages */
-- if ( rc && space == XENMAPSPACE_gmfn_foreign )
-+ /*
-+ * For XENMAPSPACE_gmfn_foreign if we failed to add the mapping, we need
-+ * to drop the reference we took earlier. In all other cases we need to
-+ * drop any reference we took earlier (perhaps indirectly).
-+ */
-+ if ( space == XENMAPSPACE_gmfn_foreign ? rc : page != NULL )
- {
- ASSERT(page != NULL);
- put_page(page);
---- a/xen/arch/x86/mm.c
-+++ b/xen/arch/x86/mm.c
-@@ -4620,6 +4620,8 @@ int xenmem_add_to_physmap_one(
- rc = gnttab_map_frame(d, idx, gpfn, &mfn);
- if ( rc )
- return rc;
-+ /* Need to take care of the ref obtained in gnttab_map_frame(). */
-+ page = mfn_to_page(mfn);
- break;
- case XENMAPSPACE_gmfn:
- {
---- a/xen/common/grant_table.c
-+++ b/xen/common/grant_table.c
-@@ -4102,7 +4102,16 @@ int gnttab_map_frame(struct domain *d, u
- }
-
- if ( !rc )
-- gnttab_set_frame_gfn(gt, status, idx, gfn);
-+ {
-+ /*
-+ * Make sure gnttab_unpopulate_status_frames() won't (successfully)
-+ * free the page until our caller has completed its operation.
-+ */
-+ if ( get_page(mfn_to_page(*mfn), d) )
-+ gnttab_set_frame_gfn(gt, status, idx, gfn);
-+ else
-+ rc = -EBUSY;
-+ }
-
- grant_write_unlock(gt);
-