summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNatanael Copa <ncopa@alpinelinux.org>2014-10-23 09:28:52 +0000
committerNatanael Copa <ncopa@alpinelinux.org>2014-10-23 09:48:32 +0000
commit9cba7900153b15f9070445e546fd8244cb2da8f1 (patch)
treecbccb037c3b72a9c8f240294fcc026759c8da37b
parent256f4e7e9f920e61c9a0f213d108851dd6eee97c (diff)
main/xen: security upgrade to 4.2.5 and patches2.5-stable
The 4.2.5 release fixes: CVE-2014-2599 / XSA-89 HVMOP_set_mem_access is not preemptible CVE-2014-3124 / XSA-92 HVMOP_set_mem_type allows invalid P2M entries to be created CVE-2014-3967,CVE-2014-3968 / XSA-96 Vulnerabilities in HVM MSI injection CVE-2014-4021 / XSA-100 Hypervisor heap contents leaked to guests In addition we add patches for: CVE-2014-7154 / XSA-104 Race condition in HVMOP_track_dirty_vram CVE-2014-7155 / XSA-105 Missing privilege level checks in x86 HLT, LGDT, LIDT, and LMSW emulation CVE-2014-7156 / XSA-106 Missing privilege level checks in x86 emulation of software interrupts CVE-2014-7188 / XSA-108 Improper MSR range used for x2APIC emulation fixes #3412 fixes #3457
-rw-r--r--main/xen/APKBUILD17
-rw-r--r--main/xen/xsa104.patch44
-rw-r--r--main/xen/xsa105.patch37
-rw-r--r--main/xen/xsa106.patch23
-rw-r--r--main/xen/xsa108.patch36
-rw-r--r--main/xen/xsa97-hap-4_2-prereq.patch466
-rw-r--r--main/xen/xsa97-hap-4_2.patch485
7 files changed, 151 insertions, 957 deletions
diff --git a/main/xen/APKBUILD b/main/xen/APKBUILD
index 9f3221d8659..169649b1e73 100644
--- a/main/xen/APKBUILD
+++ b/main/xen/APKBUILD
@@ -2,7 +2,7 @@
# Contributor: Roger Pau Monne <roger.pau@entel.upc.edu>
# Maintainer: William Pitcock <nenolod@dereferenced.org>
pkgname=xen
-pkgver=4.2.4
+pkgver=4.2.5
pkgrel=0
pkgdesc="Xen hypervisor"
url="http://www.xen.org/"
@@ -23,8 +23,11 @@ source="http://bits.xensource.com/oss-xen/release/$pkgver/$pkgname-$pkgver.tar.g
xsa41.patch
xsa41b.patch
xsa41c.patch
- xsa97-hap-4_2-prereq.patch
- xsa97-hap-4_2.patch
+
+ xsa104.patch
+ xsa105.patch
+ xsa106.patch
+ xsa108.patch
xenstored.initd
xenstored.confd
@@ -134,7 +137,7 @@ xend() {
-exec mv '{}' "$subpkgdir"/"$sitepackages"/xen \;
}
-md5sums="b32be39471c93249828b117473adca9d xen-4.2.4.tar.gz
+md5sums="d8a589be1558496f8c639f7c890bcffc xen-4.2.5.tar.gz
506e7ab6f9482dc95f230978d340bcd9 qemu_uclibc_configure.patch
2dc5ddf47c53ea168729975046c3c1f9 librt.patch
1ccde6b36a6f9542a16d998204dc9a22 qemu-xen_paths.patch
@@ -142,8 +145,10 @@ md5sums="b32be39471c93249828b117473adca9d xen-4.2.4.tar.gz
8ad8942000b8a4be4917599cad9209cf xsa41.patch
ed7d0399c6ca6aeee479da5d8f807fe0 xsa41b.patch
2f3dd7bdc59d104370066d6582725575 xsa41c.patch
-4778066a3338ca9a2263048e6a22bb6b xsa97-hap-4_2-prereq.patch
-052b4144e2eef36757a28e7011d0ac74 xsa97-hap-4_2.patch
+1cc14dc8cc1a42aa93a6ea1508931d98 xsa104.patch
+cdc40a86a58fc864ebb7b1dbf90d2352 xsa105.patch
+f58b915ad62aef72bde99f8d04f9a7a4 xsa106.patch
+1f66f6c52941309c825f60e1bf144987 xsa108.patch
95d8af17bf844d41a015ff32aae51ba1 xenstored.initd
b017ccdd5e1c27bbf1513e3569d4ff07 xenstored.confd
ed262f15fb880badb53575539468646c xenconsoled.initd
diff --git a/main/xen/xsa104.patch b/main/xen/xsa104.patch
new file mode 100644
index 00000000000..2c5b39ee9b6
--- /dev/null
+++ b/main/xen/xsa104.patch
@@ -0,0 +1,44 @@
+x86/shadow: fix race condition sampling the dirty vram state
+
+d->arch.hvm_domain.dirty_vram must be read with the domain's paging lock held.
+
+If not, two concurrent hypercalls could both end up attempting to free
+dirty_vram (the second of which will free a wild pointer), or both end up
+allocating a new dirty_vram structure (the first of which will be leaked).
+
+This is XSA-104.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Tim Deegan <tim@xen.org>
+
+--- a/xen/arch/x86/mm/shadow/common.c
++++ b/xen/arch/x86/mm/shadow/common.c
+@@ -3485,7 +3485,7 @@ int shadow_track_dirty_vram(struct domai
+ int flush_tlb = 0;
+ unsigned long i;
+ p2m_type_t t;
+- struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
++ struct sh_dirty_vram *dirty_vram;
+ struct p2m_domain *p2m = p2m_get_hostp2m(d);
+
+ if ( end_pfn < begin_pfn || end_pfn > p2m->max_mapped_pfn + 1 )
+@@ -3495,6 +3495,8 @@ int shadow_track_dirty_vram(struct domai
+ p2m_lock(p2m_get_hostp2m(d));
+ paging_lock(d);
+
++ dirty_vram = d->arch.hvm_domain.dirty_vram;
++
+ if ( dirty_vram && (!nr ||
+ ( begin_pfn != dirty_vram->begin_pfn
+ || end_pfn != dirty_vram->end_pfn )) )
+--- a/xen/include/asm-x86/hvm/domain.h
++++ b/xen/include/asm-x86/hvm/domain.h
+@@ -112,7 +112,7 @@ struct hvm_domain {
+ /* Memory ranges with pinned cache attributes. */
+ struct list_head pinned_cacheattr_ranges;
+
+- /* VRAM dirty support. */
++ /* VRAM dirty support. Protect with the domain paging lock. */
+ struct sh_dirty_vram *dirty_vram;
+
+ /* If one of vcpus of this domain is in no_fill_mode or
diff --git a/main/xen/xsa105.patch b/main/xen/xsa105.patch
new file mode 100644
index 00000000000..cc7cafddd63
--- /dev/null
+++ b/main/xen/xsa105.patch
@@ -0,0 +1,37 @@
+x86/emulate: check cpl for all privileged instructions
+
+Without this, it is possible for userspace to load its own IDT or GDT.
+
+This is XSA-105.
+
+Reported-by: Andrei LUTAS <vlutas@bitdefender.com>
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Tested-by: Andrei LUTAS <vlutas@bitdefender.com>
+
+--- a/xen/arch/x86/x86_emulate/x86_emulate.c
++++ b/xen/arch/x86/x86_emulate/x86_emulate.c
+@@ -3314,6 +3314,7 @@ x86_emulate(
+ goto swint;
+
+ case 0xf4: /* hlt */
++ generate_exception_if(!mode_ring0(), EXC_GP, 0);
+ ctxt->retire.flags.hlt = 1;
+ break;
+
+@@ -3710,6 +3711,7 @@ x86_emulate(
+ break;
+ case 2: /* lgdt */
+ case 3: /* lidt */
++ generate_exception_if(!mode_ring0(), EXC_GP, 0);
+ generate_exception_if(ea.type != OP_MEM, EXC_UD, -1);
+ fail_if(ops->write_segment == NULL);
+ memset(&reg, 0, sizeof(reg));
+@@ -3738,6 +3740,7 @@ x86_emulate(
+ case 6: /* lmsw */
+ fail_if(ops->read_cr == NULL);
+ fail_if(ops->write_cr == NULL);
++ generate_exception_if(!mode_ring0(), EXC_GP, 0);
+ if ( (rc = ops->read_cr(0, &cr0, ctxt)) )
+ goto done;
+ if ( ea.type == OP_REG )
diff --git a/main/xen/xsa106.patch b/main/xen/xsa106.patch
new file mode 100644
index 00000000000..436724dbc15
--- /dev/null
+++ b/main/xen/xsa106.patch
@@ -0,0 +1,23 @@
+x86emul: only emulate software interrupt injection for real mode
+
+Protected mode emulation currently lacks proper privilege checking of
+the referenced IDT entry, and there's currently no legitimate way for
+any of the respective instructions to reach the emulator when the guest
+is in protected mode.
+
+This is XSA-106.
+
+Reported-by: Andrei LUTAS <vlutas@bitdefender.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Keir Fraser <keir@xen.org>
+
+--- a/xen/arch/x86/x86_emulate/x86_emulate.c
++++ b/xen/arch/x86/x86_emulate/x86_emulate.c
+@@ -2634,6 +2634,7 @@ x86_emulate(
+ case 0xcd: /* int imm8 */
+ src.val = insn_fetch_type(uint8_t);
+ swint:
++ fail_if(!in_realmode(ctxt, ops)); /* XSA-106 */
+ fail_if(ops->inject_sw_interrupt == NULL);
+ rc = ops->inject_sw_interrupt(src.val, _regs.eip - ctxt->regs->eip,
+ ctxt) ? : X86EMUL_EXCEPTION;
diff --git a/main/xen/xsa108.patch b/main/xen/xsa108.patch
new file mode 100644
index 00000000000..e162185789f
--- /dev/null
+++ b/main/xen/xsa108.patch
@@ -0,0 +1,36 @@
+x86/HVM: properly bound x2APIC MSR range
+
+While the write path change appears to be purely cosmetic (but still
+gets done here for consistency), the read side mistake permitted
+accesses beyond the virtual APIC page.
+
+Note that while this isn't fully in line with the specification
+(digesting MSRs 0x800-0xBFF for the x2APIC), this is the minimal
+possible fix addressing the security issue and getting x2APIC related
+code into a consistent shape (elsewhere a 256 rather than 1024 wide
+window is being used too). This will be dealt with subsequently.
+
+This is XSA-108.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+
+--- a/xen/arch/x86/hvm/hvm.c
++++ b/xen/arch/x86/hvm/hvm.c
+@@ -4380,7 +4380,7 @@ int hvm_msr_read_intercept(unsigned int
+ *msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
+ break;
+
+- case MSR_IA32_APICBASE_MSR ... MSR_IA32_APICBASE_MSR + 0x3ff:
++ case MSR_IA32_APICBASE_MSR ... MSR_IA32_APICBASE_MSR + 0xff:
+ if ( hvm_x2apic_msr_read(v, msr, msr_content) )
+ goto gp_fault;
+ break;
+@@ -4506,7 +4506,7 @@ int hvm_msr_write_intercept(unsigned int
+ vlapic_tdt_msr_set(vcpu_vlapic(v), msr_content);
+ break;
+
+- case MSR_IA32_APICBASE_MSR ... MSR_IA32_APICBASE_MSR + 0x3ff:
++ case MSR_IA32_APICBASE_MSR ... MSR_IA32_APICBASE_MSR + 0xff:
+ if ( hvm_x2apic_msr_write(v, msr, msr_content) )
+ goto gp_fault;
+ break;
diff --git a/main/xen/xsa97-hap-4_2-prereq.patch b/main/xen/xsa97-hap-4_2-prereq.patch
deleted file mode 100644
index ce2240aec84..00000000000
--- a/main/xen/xsa97-hap-4_2-prereq.patch
+++ /dev/null
@@ -1,466 +0,0 @@
-x86/mm/hap: Adjust vram tracking to play nicely with log-dirty.
-
-The previous code assumed the guest would be in one of three mutually exclusive
-modes for bookkeeping dirty pages: (1) shadow, (2) hap utilizing the log dirty
-bitmap to support functionality such as live migrate, (3) hap utilizing the
-log dirty bitmap to track dirty vram pages.
-Races arose when a guest attempted to track dirty vram while performing live
-migrate. (The dispatch table managed by paging_log_dirty_init() might change
-in the middle of a log dirty or a vram tracking function.)
-
-This change allows hap log dirty and hap vram tracking to be concurrent.
-Vram tracking no longer uses the log dirty bitmap. Instead it detects
-dirty vram pages by examining their p2m type. The log dirty bitmap is only
-used by the log dirty code. Because the two operations use different
-mechanisms, they are no longer mutually exclusive.
-
-Signed-Off-By: Robert Phillips <robert.phillips@citrix.com>
-Acked-by: Tim Deegan <tim@xen.org>
-
-Minor whitespace changes to conform with coding style
-Signed-off-by: Tim Deegan <tim@xen.org>
-
-Committed-by: Tim Deegan <tim@xen.org>
-master commit: fd91a2a662bc59677e0f217423a7a155d5465886
-master date: 2012-12-13 12:10:14 +0000
-
---- a/xen/arch/x86/mm/hap/hap.c
-+++ b/xen/arch/x86/mm/hap/hap.c
-@@ -56,132 +56,110 @@
- /* HAP VRAM TRACKING SUPPORT */
- /************************************************/
-
--static int hap_enable_vram_tracking(struct domain *d)
--{
-- struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
--
-- if ( !dirty_vram )
-- return -EINVAL;
--
-- /* turn on PG_log_dirty bit in paging mode */
-- paging_lock(d);
-- d->arch.paging.mode |= PG_log_dirty;
-- paging_unlock(d);
--
-- /* set l1e entries of P2M table to be read-only. */
-- p2m_change_type_range(d, dirty_vram->begin_pfn, dirty_vram->end_pfn,
-- p2m_ram_rw, p2m_ram_logdirty);
--
-- flush_tlb_mask(d->domain_dirty_cpumask);
-- return 0;
--}
--
--static int hap_disable_vram_tracking(struct domain *d)
--{
-- struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
--
-- if ( !dirty_vram )
-- return -EINVAL;
--
-- paging_lock(d);
-- d->arch.paging.mode &= ~PG_log_dirty;
-- paging_unlock(d);
--
-- /* set l1e entries of P2M table with normal mode */
-- p2m_change_type_range(d, dirty_vram->begin_pfn, dirty_vram->end_pfn,
-- p2m_ram_logdirty, p2m_ram_rw);
--
-- flush_tlb_mask(d->domain_dirty_cpumask);
-- return 0;
--}
--
--static void hap_clean_vram_tracking(struct domain *d)
--{
-- struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
--
-- if ( !dirty_vram )
-- return;
--
-- /* set l1e entries of P2M table to be read-only. */
-- p2m_change_type_range(d, dirty_vram->begin_pfn, dirty_vram->end_pfn,
-- p2m_ram_rw, p2m_ram_logdirty);
--
-- flush_tlb_mask(d->domain_dirty_cpumask);
--}
--
--static void hap_vram_tracking_init(struct domain *d)
--{
-- paging_log_dirty_init(d, hap_enable_vram_tracking,
-- hap_disable_vram_tracking,
-- hap_clean_vram_tracking);
--}
-+/*
-+ * hap_track_dirty_vram()
-+ * Create the domain's dv_dirty_vram struct on demand.
-+ * Create a dirty vram range on demand when some [begin_pfn:begin_pfn+nr] is
-+ * first encountered.
-+ * Collect the guest_dirty bitmask, a bit mask of the dirty vram pages, by
-+ * calling paging_log_dirty_range(), which interrogates each vram
-+ * page's p2m type looking for pages that have been made writable.
-+ */
-
- int hap_track_dirty_vram(struct domain *d,
- unsigned long begin_pfn,
- unsigned long nr,
-- XEN_GUEST_HANDLE_64(uint8) dirty_bitmap)
-+ XEN_GUEST_HANDLE_64(uint8) guest_dirty_bitmap)
- {
- long rc = 0;
-- struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
-+ struct sh_dirty_vram *dirty_vram;
-+ uint8_t *dirty_bitmap = NULL;
-
- if ( nr )
- {
-- if ( paging_mode_log_dirty(d) && dirty_vram )
-+ int size = (nr + BITS_PER_BYTE - 1) / BITS_PER_BYTE;
-+
-+ if ( !paging_mode_log_dirty(d) )
- {
-- if ( begin_pfn != dirty_vram->begin_pfn ||
-- begin_pfn + nr != dirty_vram->end_pfn )
-- {
-- paging_log_dirty_disable(d);
-- dirty_vram->begin_pfn = begin_pfn;
-- dirty_vram->end_pfn = begin_pfn + nr;
-- rc = paging_log_dirty_enable(d);
-- if (rc != 0)
-- goto param_fail;
-- }
-+ hap_logdirty_init(d);
-+ rc = paging_log_dirty_enable(d);
-+ if ( rc )
-+ goto out;
- }
-- else if ( !paging_mode_log_dirty(d) && !dirty_vram )
-+
-+ rc = -ENOMEM;
-+ dirty_bitmap = xzalloc_bytes(size);
-+ if ( !dirty_bitmap )
-+ goto out;
-+
-+ paging_lock(d);
-+
-+ dirty_vram = d->arch.hvm_domain.dirty_vram;
-+ if ( !dirty_vram )
- {
- rc = -ENOMEM;
-- if ( (dirty_vram = xmalloc(struct sh_dirty_vram)) == NULL )
-- goto param_fail;
-+ if ( (dirty_vram = xzalloc(struct sh_dirty_vram)) == NULL )
-+ {
-+ paging_unlock(d);
-+ goto out;
-+ }
-
-+ d->arch.hvm_domain.dirty_vram = dirty_vram;
-+ }
-+
-+ if ( begin_pfn != dirty_vram->begin_pfn ||
-+ begin_pfn + nr != dirty_vram->end_pfn )
-+ {
- dirty_vram->begin_pfn = begin_pfn;
- dirty_vram->end_pfn = begin_pfn + nr;
-- d->arch.hvm_domain.dirty_vram = dirty_vram;
-- hap_vram_tracking_init(d);
-- rc = paging_log_dirty_enable(d);
-- if (rc != 0)
-- goto param_fail;
-+
-+ paging_unlock(d);
-+
-+ /* set l1e entries of range within P2M table to be read-only. */
-+ p2m_change_type_range(d, begin_pfn, begin_pfn + nr,
-+ p2m_ram_rw, p2m_ram_logdirty);
-+
-+ flush_tlb_mask(d->domain_dirty_cpumask);
-+
-+ memset(dirty_bitmap, 0xff, size); /* consider all pages dirty */
- }
- else
- {
-- if ( !paging_mode_log_dirty(d) && dirty_vram )
-- rc = -EINVAL;
-- else
-- rc = -ENODATA;
-- goto param_fail;
-+ paging_unlock(d);
-+
-+ domain_pause(d);
-+
-+ /* get the bitmap */
-+ paging_log_dirty_range(d, begin_pfn, nr, dirty_bitmap);
-+
-+ domain_unpause(d);
- }
-- /* get the bitmap */
-- rc = paging_log_dirty_range(d, begin_pfn, nr, dirty_bitmap);
-+
-+ rc = -EFAULT;
-+ if ( copy_to_guest(guest_dirty_bitmap, dirty_bitmap, size) == 0 )
-+ rc = 0;
- }
- else
- {
-- if ( paging_mode_log_dirty(d) && dirty_vram ) {
-- rc = paging_log_dirty_disable(d);
-- xfree(dirty_vram);
-- dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
-- } else
-- rc = 0;
-- }
-+ paging_lock(d);
-
-- return rc;
-+ dirty_vram = d->arch.hvm_domain.dirty_vram;
-+ if ( dirty_vram )
-+ {
-+ /*
-+ * If zero pages specified while tracking dirty vram
-+ * then stop tracking
-+ */
-+ xfree(dirty_vram);
-+ d->arch.hvm_domain.dirty_vram = NULL;
-+ }
-
--param_fail:
-- if ( dirty_vram )
-- {
-- xfree(dirty_vram);
-- dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
-+ paging_unlock(d);
- }
-+out:
-+ if ( dirty_bitmap )
-+ xfree(dirty_bitmap);
-+
- return rc;
- }
-
-@@ -223,13 +201,6 @@ static void hap_clean_dirty_bitmap(struc
-
- void hap_logdirty_init(struct domain *d)
- {
-- struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
-- if ( paging_mode_log_dirty(d) && dirty_vram )
-- {
-- paging_log_dirty_disable(d);
-- xfree(dirty_vram);
-- dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
-- }
-
- /* Reinitialize logdirty mechanism */
- paging_log_dirty_init(d, hap_enable_log_dirty,
---- a/xen/arch/x86/mm/paging.c
-+++ b/xen/arch/x86/mm/paging.c
-@@ -447,157 +447,38 @@ int paging_log_dirty_op(struct domain *d
- return rv;
- }
-
--int paging_log_dirty_range(struct domain *d,
-- unsigned long begin_pfn,
-- unsigned long nr,
-- XEN_GUEST_HANDLE_64(uint8) dirty_bitmap)
--{
-- int rv = 0;
-- unsigned long pages = 0;
-- mfn_t *l4, *l3, *l2;
-- unsigned long *l1;
-- int b1, b2, b3, b4;
-- int i2, i3, i4;
--
-- d->arch.paging.log_dirty.clean_dirty_bitmap(d);
-- paging_lock(d);
--
-- PAGING_DEBUG(LOGDIRTY, "log-dirty-range: dom %u faults=%u dirty=%u\n",
-- d->domain_id,
-- d->arch.paging.log_dirty.fault_count,
-- d->arch.paging.log_dirty.dirty_count);
--
-- if ( unlikely(d->arch.paging.log_dirty.failed_allocs) ) {
-- printk("%s: %d failed page allocs while logging dirty pages\n",
-- __FUNCTION__, d->arch.paging.log_dirty.failed_allocs);
-- rv = -ENOMEM;
-- goto out;
-- }
-+void paging_log_dirty_range(struct domain *d,
-+ unsigned long begin_pfn,
-+ unsigned long nr,
-+ uint8_t *dirty_bitmap)
-+{
-+ struct p2m_domain *p2m = p2m_get_hostp2m(d);
-+ int i;
-+ unsigned long pfn;
-+
-+ /*
-+ * Set l1e entries of P2M table to be read-only.
-+ *
-+ * On first write, it page faults, its entry is changed to read-write,
-+ * and on retry the write succeeds.
-+ *
-+ * We populate dirty_bitmap by looking for entries that have been
-+ * switched to read-write.
-+ */
-
-- if ( !d->arch.paging.log_dirty.fault_count &&
-- !d->arch.paging.log_dirty.dirty_count ) {
-- unsigned int size = BITS_TO_LONGS(nr);
--
-- if ( clear_guest(dirty_bitmap, size * BYTES_PER_LONG) != 0 )
-- rv = -EFAULT;
-- goto out;
-- }
-- d->arch.paging.log_dirty.fault_count = 0;
-- d->arch.paging.log_dirty.dirty_count = 0;
-+ p2m_lock(p2m);
-
-- b1 = L1_LOGDIRTY_IDX(begin_pfn);
-- b2 = L2_LOGDIRTY_IDX(begin_pfn);
-- b3 = L3_LOGDIRTY_IDX(begin_pfn);
-- b4 = L4_LOGDIRTY_IDX(begin_pfn);
-- l4 = paging_map_log_dirty_bitmap(d);
--
-- for ( i4 = b4;
-- (pages < nr) && (i4 < LOGDIRTY_NODE_ENTRIES);
-- i4++ )
-+ for ( i = 0, pfn = begin_pfn; pfn < begin_pfn + nr; i++, pfn++ )
- {
-- l3 = (l4 && mfn_valid(l4[i4])) ? map_domain_page(mfn_x(l4[i4])) : NULL;
-- for ( i3 = b3;
-- (pages < nr) && (i3 < LOGDIRTY_NODE_ENTRIES);
-- i3++ )
-- {
-- l2 = ((l3 && mfn_valid(l3[i3])) ?
-- map_domain_page(mfn_x(l3[i3])) : NULL);
-- for ( i2 = b2;
-- (pages < nr) && (i2 < LOGDIRTY_NODE_ENTRIES);
-- i2++ )
-- {
-- unsigned int bytes = PAGE_SIZE;
-- uint8_t *s;
-- l1 = ((l2 && mfn_valid(l2[i2])) ?
-- map_domain_page(mfn_x(l2[i2])) : NULL);
--
-- s = ((uint8_t*)l1) + (b1 >> 3);
-- bytes -= b1 >> 3;
--
-- if ( likely(((nr - pages + 7) >> 3) < bytes) )
-- bytes = (unsigned int)((nr - pages + 7) >> 3);
--
-- if ( !l1 )
-- {
-- if ( clear_guest_offset(dirty_bitmap, pages >> 3,
-- bytes) != 0 )
-- {
-- rv = -EFAULT;
-- goto out;
-- }
-- }
-- /* begin_pfn is not 32K aligned, hence we have to bit
-- * shift the bitmap */
-- else if ( b1 & 0x7 )
-- {
-- int i, j;
-- uint32_t *l = (uint32_t*) s;
-- int bits = b1 & 0x7;
-- int bitmask = (1 << bits) - 1;
-- int size = (bytes + BYTES_PER_LONG - 1) / BYTES_PER_LONG;
-- unsigned long bitmap[size];
-- static unsigned long printed = 0;
--
-- if ( printed != begin_pfn )
-- {
-- dprintk(XENLOG_DEBUG, "%s: begin_pfn %lx is not 32K aligned!\n",
-- __FUNCTION__, begin_pfn);
-- printed = begin_pfn;
-- }
--
-- for ( i = 0; i < size - 1; i++, l++ ) {
-- bitmap[i] = ((*l) >> bits) |
-- (((*((uint8_t*)(l + 1))) & bitmask) << (sizeof(*l) * 8 - bits));
-- }
-- s = (uint8_t*) l;
-- size = BYTES_PER_LONG - ((b1 >> 3) & 0x3);
-- bitmap[i] = 0;
-- for ( j = 0; j < size; j++, s++ )
-- bitmap[i] |= (*s) << (j * 8);
-- bitmap[i] = (bitmap[i] >> bits) | (bitmask << (size * 8 - bits));
-- if ( copy_to_guest_offset(dirty_bitmap, (pages >> 3),
-- (uint8_t*) bitmap, bytes) != 0 )
-- {
-- rv = -EFAULT;
-- goto out;
-- }
-- }
-- else
-- {
-- if ( copy_to_guest_offset(dirty_bitmap, pages >> 3,
-- s, bytes) != 0 )
-- {
-- rv = -EFAULT;
-- goto out;
-- }
-- }
--
-- pages += bytes << 3;
-- if ( l1 )
-- {
-- clear_page(l1);
-- unmap_domain_page(l1);
-- }
-- b1 = b1 & 0x7;
-- }
-- b2 = 0;
-- if ( l2 )
-- unmap_domain_page(l2);
-- }
-- b3 = 0;
-- if ( l3 )
-- unmap_domain_page(l3);
-+ p2m_type_t pt;
-+ pt = p2m_change_type(d, pfn, p2m_ram_rw, p2m_ram_logdirty);
-+ if ( pt == p2m_ram_rw )
-+ dirty_bitmap[i >> 3] |= (1 << (i & 7));
- }
-- if ( l4 )
-- unmap_domain_page(l4);
--
-- paging_unlock(d);
-
-- return rv;
-+ p2m_unlock(p2m);
-
-- out:
-- paging_unlock(d);
-- return rv;
-+ flush_tlb_mask(d->domain_dirty_cpumask);
- }
-
- /* Note that this function takes three function pointers. Callers must supply
---- a/xen/include/asm-x86/config.h
-+++ b/xen/include/asm-x86/config.h
-@@ -17,6 +17,7 @@
-
- #define BYTES_PER_LONG (1 << LONG_BYTEORDER)
- #define BITS_PER_LONG (BYTES_PER_LONG << 3)
-+#define BITS_PER_BYTE 8
-
- #define CONFIG_X86 1
- #define CONFIG_X86_HT 1
---- a/xen/include/asm-x86/paging.h
-+++ b/xen/include/asm-x86/paging.h
-@@ -145,10 +145,10 @@ struct paging_mode {
- void paging_free_log_dirty_bitmap(struct domain *d);
-
- /* get the dirty bitmap for a specific range of pfns */
--int paging_log_dirty_range(struct domain *d,
-- unsigned long begin_pfn,
-- unsigned long nr,
-- XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
-+void paging_log_dirty_range(struct domain *d,
-+ unsigned long begin_pfn,
-+ unsigned long nr,
-+ uint8_t *dirty_bitmap);
-
- /* enable log dirty */
- int paging_log_dirty_enable(struct domain *d);
diff --git a/main/xen/xsa97-hap-4_2.patch b/main/xen/xsa97-hap-4_2.patch
deleted file mode 100644
index 5f89b58b7b0..00000000000
--- a/main/xen/xsa97-hap-4_2.patch
+++ /dev/null
@@ -1,485 +0,0 @@
-x86/paging: make log-dirty operations preemptible
-
-Both the freeing and the inspection of the bitmap get done in (nested)
-loops which - besides having a rather high iteration count in general,
-albeit that would be covered by XSA-77 - have the number of non-trivial
-iterations they need to perform (indirectly) controllable by both the
-guest they are for and any domain controlling the guest (including the
-one running qemu for it).
-
-This is XSA-97.
-
-Signed-off-by: Jan Beulich <jbeulich@suse.com>
-Reviewed-by: Tim Deegan <tim@xen.org>
-
---- a/xen/arch/x86/domain.c
-+++ b/xen/arch/x86/domain.c
-@@ -2136,7 +2136,9 @@ int domain_relinquish_resources(struct d
- pci_release_devices(d);
-
- /* Tear down paging-assistance stuff. */
-- paging_teardown(d);
-+ ret = paging_teardown(d);
-+ if ( ret )
-+ return ret;
-
- /* Drop the in-use references to page-table bases. */
- for_each_vcpu ( d, v )
---- a/xen/arch/x86/domctl.c
-+++ b/xen/arch/x86/domctl.c
-@@ -66,6 +66,9 @@ long arch_do_domctl(
- &domctl->u.shadow_op,
- guest_handle_cast(u_domctl, void));
- rcu_unlock_domain(d);
-+ if ( ret == -EAGAIN )
-+ return hypercall_create_continuation(__HYPERVISOR_domctl,
-+ "h", u_domctl);
- copy_to_guest(u_domctl, domctl, 1);
- }
- }
---- a/xen/arch/x86/mm/hap/hap.c
-+++ b/xen/arch/x86/mm/hap/hap.c
-@@ -678,8 +678,7 @@ int hap_domctl(struct domain *d, xen_dom
- paging_unlock(d);
- if ( preempted )
- /* Not finished. Set up to re-run the call. */
-- rc = hypercall_create_continuation(__HYPERVISOR_domctl, "h",
-- u_domctl);
-+ rc = -EAGAIN;
- else
- /* Finished. Return the new allocation */
- sc->mb = hap_get_allocation(d);
---- a/xen/arch/x86/mm/paging.c
-+++ b/xen/arch/x86/mm/paging.c
-@@ -26,6 +26,7 @@
- #include <asm/shadow.h>
- #include <asm/p2m.h>
- #include <asm/hap.h>
-+#include <asm/event.h>
- #include <asm/hvm/nestedhvm.h>
- #include <xen/numa.h>
- #include <xsm/xsm.h>
-@@ -116,26 +117,46 @@ static void paging_free_log_dirty_page(s
- d->arch.paging.free_page(d, mfn_to_page(mfn));
- }
-
--void paging_free_log_dirty_bitmap(struct domain *d)
-+static int paging_free_log_dirty_bitmap(struct domain *d, int rc)
- {
- mfn_t *l4, *l3, *l2;
- int i4, i3, i2;
-
-+ paging_lock(d);
-+
- if ( !mfn_valid(d->arch.paging.log_dirty.top) )
-- return;
-+ {
-+ paging_unlock(d);
-+ return 0;
-+ }
-
-- paging_lock(d);
-+ if ( !d->arch.paging.preempt.vcpu )
-+ {
-+ memset(&d->arch.paging.preempt.log_dirty, 0,
-+ sizeof(d->arch.paging.preempt.log_dirty));
-+ ASSERT(rc <= 0);
-+ d->arch.paging.preempt.log_dirty.done = -rc;
-+ }
-+ else if ( d->arch.paging.preempt.vcpu != current ||
-+ d->arch.paging.preempt.op != XEN_DOMCTL_SHADOW_OP_OFF )
-+ {
-+ paging_unlock(d);
-+ return -EBUSY;
-+ }
-
- l4 = map_domain_page(mfn_x(d->arch.paging.log_dirty.top));
-+ i4 = d->arch.paging.preempt.log_dirty.i4;
-+ i3 = d->arch.paging.preempt.log_dirty.i3;
-+ rc = 0;
-
-- for ( i4 = 0; i4 < LOGDIRTY_NODE_ENTRIES; i4++ )
-+ for ( ; i4 < LOGDIRTY_NODE_ENTRIES; i4++, i3 = 0 )
- {
- if ( !mfn_valid(l4[i4]) )
- continue;
-
- l3 = map_domain_page(mfn_x(l4[i4]));
-
-- for ( i3 = 0; i3 < LOGDIRTY_NODE_ENTRIES; i3++ )
-+ for ( ; i3 < LOGDIRTY_NODE_ENTRIES; i3++ )
- {
- if ( !mfn_valid(l3[i3]) )
- continue;
-@@ -148,20 +169,54 @@ void paging_free_log_dirty_bitmap(struct
-
- unmap_domain_page(l2);
- paging_free_log_dirty_page(d, l3[i3]);
-+ l3[i3] = _mfn(INVALID_MFN);
-+
-+ if ( i3 < LOGDIRTY_NODE_ENTRIES - 1 && hypercall_preempt_check() )
-+ {
-+ d->arch.paging.preempt.log_dirty.i3 = i3 + 1;
-+ d->arch.paging.preempt.log_dirty.i4 = i4;
-+ rc = -EAGAIN;
-+ break;
-+ }
- }
-
- unmap_domain_page(l3);
-+ if ( rc )
-+ break;
- paging_free_log_dirty_page(d, l4[i4]);
-+ l4[i4] = _mfn(INVALID_MFN);
-+
-+ if ( i4 < LOGDIRTY_NODE_ENTRIES - 1 && hypercall_preempt_check() )
-+ {
-+ d->arch.paging.preempt.log_dirty.i3 = 0;
-+ d->arch.paging.preempt.log_dirty.i4 = i4 + 1;
-+ rc = -EAGAIN;
-+ break;
-+ }
- }
-
- unmap_domain_page(l4);
-- paging_free_log_dirty_page(d, d->arch.paging.log_dirty.top);
-- d->arch.paging.log_dirty.top = _mfn(INVALID_MFN);
-
-- ASSERT(d->arch.paging.log_dirty.allocs == 0);
-- d->arch.paging.log_dirty.failed_allocs = 0;
-+ if ( !rc )
-+ {
-+ paging_free_log_dirty_page(d, d->arch.paging.log_dirty.top);
-+ d->arch.paging.log_dirty.top = _mfn(INVALID_MFN);
-+
-+ ASSERT(d->arch.paging.log_dirty.allocs == 0);
-+ d->arch.paging.log_dirty.failed_allocs = 0;
-+
-+ rc = -d->arch.paging.preempt.log_dirty.done;
-+ d->arch.paging.preempt.vcpu = NULL;
-+ }
-+ else
-+ {
-+ d->arch.paging.preempt.vcpu = current;
-+ d->arch.paging.preempt.op = XEN_DOMCTL_SHADOW_OP_OFF;
-+ }
-
- paging_unlock(d);
-+
-+ return rc;
- }
-
- int paging_log_dirty_enable(struct domain *d)
-@@ -178,15 +233,25 @@ int paging_log_dirty_enable(struct domai
- return ret;
- }
-
--int paging_log_dirty_disable(struct domain *d)
-+static int paging_log_dirty_disable(struct domain *d, bool_t resuming)
- {
-- int ret;
-+ int ret = 1;
-+
-+ if ( !resuming )
-+ {
-+ domain_pause(d);
-+ /* Safe because the domain is paused. */
-+ ret = d->arch.paging.log_dirty.disable_log_dirty(d);
-+ ASSERT(ret <= 0);
-+ }
-
-- domain_pause(d);
-- /* Safe because the domain is paused. */
-- ret = d->arch.paging.log_dirty.disable_log_dirty(d);
- if ( !paging_mode_log_dirty(d) )
-- paging_free_log_dirty_bitmap(d);
-+ {
-+ ret = paging_free_log_dirty_bitmap(d, ret);
-+ if ( ret == -EAGAIN )
-+ return ret;
-+ }
-+
- domain_unpause(d);
-
- return ret;
-@@ -326,7 +391,9 @@ int paging_mfn_is_dirty(struct domain *d
-
- /* Read a domain's log-dirty bitmap and stats. If the operation is a CLEAN,
- * clear the bitmap and stats as well. */
--int paging_log_dirty_op(struct domain *d, struct xen_domctl_shadow_op *sc)
-+static int paging_log_dirty_op(struct domain *d,
-+ struct xen_domctl_shadow_op *sc,
-+ bool_t resuming)
- {
- int rv = 0, clean = 0, peek = 1;
- unsigned long pages = 0;
-@@ -334,9 +401,22 @@ int paging_log_dirty_op(struct domain *d
- unsigned long *l1 = NULL;
- int i4, i3, i2;
-
-- domain_pause(d);
-+ if ( !resuming )
-+ domain_pause(d);
- paging_lock(d);
-
-+ if ( !d->arch.paging.preempt.vcpu )
-+ memset(&d->arch.paging.preempt.log_dirty, 0,
-+ sizeof(d->arch.paging.preempt.log_dirty));
-+ else if ( d->arch.paging.preempt.vcpu != current ||
-+ d->arch.paging.preempt.op != sc->op )
-+ {
-+ paging_unlock(d);
-+ ASSERT(!resuming);
-+ domain_unpause(d);
-+ return -EBUSY;
-+ }
-+
- clean = (sc->op == XEN_DOMCTL_SHADOW_OP_CLEAN);
-
- PAGING_DEBUG(LOGDIRTY, "log-dirty %s: dom %u faults=%u dirty=%u\n",
-@@ -365,17 +445,15 @@ int paging_log_dirty_op(struct domain *d
- goto out;
- }
-
-- pages = 0;
- l4 = paging_map_log_dirty_bitmap(d);
-+ i4 = d->arch.paging.preempt.log_dirty.i4;
-+ i3 = d->arch.paging.preempt.log_dirty.i3;
-+ pages = d->arch.paging.preempt.log_dirty.done;
-
-- for ( i4 = 0;
-- (pages < sc->pages) && (i4 < LOGDIRTY_NODE_ENTRIES);
-- i4++ )
-+ for ( ; (pages < sc->pages) && (i4 < LOGDIRTY_NODE_ENTRIES); i4++, i3 = 0 )
- {
- l3 = (l4 && mfn_valid(l4[i4])) ? map_domain_page(mfn_x(l4[i4])) : NULL;
-- for ( i3 = 0;
-- (pages < sc->pages) && (i3 < LOGDIRTY_NODE_ENTRIES);
-- i3++ )
-+ for ( ; (pages < sc->pages) && (i3 < LOGDIRTY_NODE_ENTRIES); i3++ )
- {
- l2 = ((l3 && mfn_valid(l3[i3])) ?
- map_domain_page(mfn_x(l3[i3])) : NULL);
-@@ -410,18 +488,51 @@ int paging_log_dirty_op(struct domain *d
- }
- if ( l2 )
- unmap_domain_page(l2);
-+
-+ if ( i3 < LOGDIRTY_NODE_ENTRIES - 1 && hypercall_preempt_check() )
-+ {
-+ d->arch.paging.preempt.log_dirty.i4 = i4;
-+ d->arch.paging.preempt.log_dirty.i3 = i3 + 1;
-+ rv = -EAGAIN;
-+ break;
-+ }
- }
- if ( l3 )
- unmap_domain_page(l3);
-+
-+ if ( !rv && i4 < LOGDIRTY_NODE_ENTRIES - 1 &&
-+ hypercall_preempt_check() )
-+ {
-+ d->arch.paging.preempt.log_dirty.i4 = i4 + 1;
-+ d->arch.paging.preempt.log_dirty.i3 = 0;
-+ rv = -EAGAIN;
-+ }
-+ if ( rv )
-+ break;
- }
- if ( l4 )
- unmap_domain_page(l4);
-
-- if ( pages < sc->pages )
-- sc->pages = pages;
-+ if ( !rv )
-+ d->arch.paging.preempt.vcpu = NULL;
-+ else
-+ {
-+ d->arch.paging.preempt.vcpu = current;
-+ d->arch.paging.preempt.op = sc->op;
-+ d->arch.paging.preempt.log_dirty.done = pages;
-+ }
-
- paging_unlock(d);
-
-+ if ( rv )
-+ {
-+ /* Never leave the domain paused for other errors. */
-+ ASSERT(rv == -EAGAIN);
-+ return rv;
-+ }
-+
-+ if ( pages < sc->pages )
-+ sc->pages = pages;
- if ( clean )
- {
- /* We need to further call clean_dirty_bitmap() functions of specific
-@@ -432,6 +543,7 @@ int paging_log_dirty_op(struct domain *d
- return rv;
-
- out:
-+ d->arch.paging.preempt.vcpu = NULL;
- paging_unlock(d);
- domain_unpause(d);
-
-@@ -498,12 +610,6 @@ void paging_log_dirty_init(struct domain
- d->arch.paging.log_dirty.clean_dirty_bitmap = clean_dirty_bitmap;
- }
-
--/* This function fress log dirty bitmap resources. */
--static void paging_log_dirty_teardown(struct domain*d)
--{
-- paging_free_log_dirty_bitmap(d);
--}
--
- /************************************************/
- /* CODE FOR PAGING SUPPORT */
- /************************************************/
-@@ -547,6 +653,7 @@ void paging_vcpu_init(struct vcpu *v)
- int paging_domctl(struct domain *d, xen_domctl_shadow_op_t *sc,
- XEN_GUEST_HANDLE(void) u_domctl)
- {
-+ bool_t resuming = 0;
- int rc;
-
- if ( unlikely(d == current->domain) )
-@@ -569,6 +676,20 @@ int paging_domctl(struct domain *d, xen_
- return -EINVAL;
- }
-
-+ if ( d->arch.paging.preempt.vcpu )
-+ {
-+ if ( d->arch.paging.preempt.vcpu != current ||
-+ d->arch.paging.preempt.op != sc->op )
-+ {
-+ printk(XENLOG_G_DEBUG
-+ "d%d:v%d: Paging op %#x on Dom%u with unfinished prior op %#x\n",
-+ current->domain->domain_id, current->vcpu_id,
-+ sc->op, d->domain_id, d->arch.paging.preempt.op);
-+ return -EBUSY;
-+ }
-+ resuming = 1;
-+ }
-+
- rc = xsm_shadow_control(d, sc->op);
- if ( rc )
- return rc;
-@@ -594,13 +714,13 @@ int paging_domctl(struct domain *d, xen_
-
- case XEN_DOMCTL_SHADOW_OP_OFF:
- if ( paging_mode_log_dirty(d) )
-- if ( (rc = paging_log_dirty_disable(d)) != 0 )
-+ if ( (rc = paging_log_dirty_disable(d, resuming)) != 0 )
- return rc;
- break;
-
- case XEN_DOMCTL_SHADOW_OP_CLEAN:
- case XEN_DOMCTL_SHADOW_OP_PEEK:
-- return paging_log_dirty_op(d, sc);
-+ return paging_log_dirty_op(d, sc, resuming);
- }
-
- /* Here, dispatch domctl to the appropriate paging code */
-@@ -611,18 +731,24 @@ int paging_domctl(struct domain *d, xen_
- }
-
- /* Call when destroying a domain */
--void paging_teardown(struct domain *d)
-+int paging_teardown(struct domain *d)
- {
-+ int rc;
-+
- if ( hap_enabled(d) )
- hap_teardown(d);
- else
- shadow_teardown(d);
-
- /* clean up log dirty resources. */
-- paging_log_dirty_teardown(d);
-+ rc = paging_free_log_dirty_bitmap(d, 0);
-+ if ( rc == -EAGAIN )
-+ return rc;
-
- /* Move populate-on-demand cache back to domain_list for destruction */
- p2m_pod_empty_cache(d);
-+
-+ return rc;
- }
-
- /* Call once all of the references to the domain have gone away */
---- a/xen/arch/x86/mm/shadow/common.c
-+++ b/xen/arch/x86/mm/shadow/common.c
-@@ -3829,8 +3829,7 @@ int shadow_domctl(struct domain *d,
- paging_unlock(d);
- if ( preempted )
- /* Not finished. Set up to re-run the call. */
-- rc = hypercall_create_continuation(
-- __HYPERVISOR_domctl, "h", u_domctl);
-+ rc = -EAGAIN;
- else
- /* Finished. Return the new allocation */
- sc->mb = shadow_get_allocation(d);
---- a/xen/common/domain.c
-+++ b/xen/common/domain.c
-@@ -479,7 +479,6 @@ int domain_kill(struct domain *d)
- rc = domain_relinquish_resources(d);
- if ( rc != 0 )
- {
-- BUG_ON(rc != -EAGAIN);
- break;
- }
- if ( sched_move_domain(d, cpupool0) )
---- a/xen/include/asm-x86/domain.h
-+++ b/xen/include/asm-x86/domain.h
-@@ -193,6 +193,20 @@ struct paging_domain {
- struct hap_domain hap;
- /* log dirty support */
- struct log_dirty_domain log_dirty;
-+
-+ /* preemption handling */
-+ struct {
-+ struct vcpu *vcpu;
-+ unsigned int op;
-+ union {
-+ struct {
-+ unsigned long done:PADDR_BITS - PAGE_SHIFT;
-+ unsigned long i4:PAGETABLE_ORDER;
-+ unsigned long i3:PAGETABLE_ORDER;
-+ } log_dirty;
-+ };
-+ } preempt;
-+
- /* alloc/free pages from the pool for paging-assistance structures
- * (used by p2m and log-dirty code for their tries) */
- struct page_info * (*alloc_page)(struct domain *d);
---- a/xen/include/asm-x86/paging.h
-+++ b/xen/include/asm-x86/paging.h
-@@ -141,9 +141,6 @@ struct paging_mode {
- /*****************************************************************************
- * Log dirty code */
-
--/* free log dirty bitmap resource */
--void paging_free_log_dirty_bitmap(struct domain *d);
--
- /* get the dirty bitmap for a specific range of pfns */
- void paging_log_dirty_range(struct domain *d,
- unsigned long begin_pfn,
-@@ -153,9 +150,6 @@ void paging_log_dirty_range(struct domai
- /* enable log dirty */
- int paging_log_dirty_enable(struct domain *d);
-
--/* disable log dirty */
--int paging_log_dirty_disable(struct domain *d);
--
- /* log dirty initialization */
- void paging_log_dirty_init(struct domain *d,
- int (*enable_log_dirty)(struct domain *d),
-@@ -218,7 +212,7 @@ int paging_domctl(struct domain *d, xen_
- XEN_GUEST_HANDLE(void) u_domctl);
-
- /* Call when destroying a domain */
--void paging_teardown(struct domain *d);
-+int paging_teardown(struct domain *d);
-
- /* Call once all of the references to the domain have gone away */
- void paging_final_teardown(struct domain *d);