aboutsummaryrefslogtreecommitdiffstats
path: root/main/xen/xsa336.patch
diff options
context:
space:
mode:
Diffstat (limited to 'main/xen/xsa336.patch')
-rw-r--r--main/xen/xsa336.patch283
1 files changed, 283 insertions, 0 deletions
diff --git a/main/xen/xsa336.patch b/main/xen/xsa336.patch
new file mode 100644
index 0000000000..b44c298b70
--- /dev/null
+++ b/main/xen/xsa336.patch
@@ -0,0 +1,283 @@
+From: Roger Pau Monné <roger.pau@citrix.com>
+Subject: x86/vpt: fix race when migrating timers between vCPUs
+
+The current vPT code will migrate the emulated timers between vCPUs
+(change the pt->vcpu field) while just holding the destination lock,
+either from create_periodic_time or pt_adjust_global_vcpu_target if
+the global target is adjusted. Changing the periodic_timer vCPU field
+in this way creates a race where a third party could grab the lock in
+the unlocked region of pt_adjust_global_vcpu_target (or before
+create_periodic_time performs the vcpu change) and then release the
+lock from a different vCPU, creating a locking imbalance.
+
+Introduce a per-domain rwlock in order to protect periodic_time
+migration between vCPU lists. Taking the lock in read mode prevents
+any timer from being migrated to a different vCPU, while taking it in
+write mode allows performing migration of timers across vCPUs. The
+per-vcpu locks are still used to protect all the other fields from the
+periodic_timer struct.
+
+Note that such migration shouldn't happen frequently, and hence
+there's no performance drop as a result of such locking.
+
+This is XSA-336.
+
+Reported-by: Igor Druzhinin <igor.druzhinin@citrix.com>
+Tested-by: Igor Druzhinin <igor.druzhinin@citrix.com>
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+---
+Changes since v2:
+ - Re-order pt_adjust_vcpu to remove one if.
+ - Fix pt_lock to not call pt_vcpu_lock, as we might end up using a
+ stale value of pt->vcpu when taking the per-vcpu lock.
+
+Changes since v1:
+ - Use a per-domain rwlock to protect timer vCPU migration.
+
+--- a/xen/arch/x86/hvm/hvm.c
++++ b/xen/arch/x86/hvm/hvm.c
+@@ -658,6 +658,8 @@ int hvm_domain_initialise(struct domain
+ /* need link to containing domain */
+ d->arch.hvm.pl_time->domain = d;
+
++ rwlock_init(&d->arch.hvm.pl_time->pt_migrate);
++
+ /* Set the default IO Bitmap. */
+ if ( is_hardware_domain(d) )
+ {
+--- a/xen/arch/x86/hvm/vpt.c
++++ b/xen/arch/x86/hvm/vpt.c
+@@ -153,23 +153,32 @@ static int pt_irq_masked(struct periodic
+ return 1;
+ }
+
+-static void pt_lock(struct periodic_time *pt)
++static void pt_vcpu_lock(struct vcpu *v)
+ {
+- struct vcpu *v;
++ read_lock(&v->domain->arch.hvm.pl_time->pt_migrate);
++ spin_lock(&v->arch.hvm.tm_lock);
++}
+
+- for ( ; ; )
+- {
+- v = pt->vcpu;
+- spin_lock(&v->arch.hvm.tm_lock);
+- if ( likely(pt->vcpu == v) )
+- break;
+- spin_unlock(&v->arch.hvm.tm_lock);
+- }
++static void pt_vcpu_unlock(struct vcpu *v)
++{
++ spin_unlock(&v->arch.hvm.tm_lock);
++ read_unlock(&v->domain->arch.hvm.pl_time->pt_migrate);
++}
++
++static void pt_lock(struct periodic_time *pt)
++{
++ /*
++ * We cannot use pt_vcpu_lock here, because we need to acquire the
++ * per-domain lock first and then (re-)fetch the value of pt->vcpu, or
++ * else we might be using a stale value of pt->vcpu.
++ */
++ read_lock(&pt->vcpu->domain->arch.hvm.pl_time->pt_migrate);
++ spin_lock(&pt->vcpu->arch.hvm.tm_lock);
+ }
+
+ static void pt_unlock(struct periodic_time *pt)
+ {
+- spin_unlock(&pt->vcpu->arch.hvm.tm_lock);
++ pt_vcpu_unlock(pt->vcpu);
+ }
+
+ static void pt_process_missed_ticks(struct periodic_time *pt)
+@@ -219,7 +228,7 @@ void pt_save_timer(struct vcpu *v)
+ if ( v->pause_flags & VPF_blocked )
+ return;
+
+- spin_lock(&v->arch.hvm.tm_lock);
++ pt_vcpu_lock(v);
+
+ list_for_each_entry ( pt, head, list )
+ if ( !pt->do_not_freeze )
+@@ -227,7 +236,7 @@ void pt_save_timer(struct vcpu *v)
+
+ pt_freeze_time(v);
+
+- spin_unlock(&v->arch.hvm.tm_lock);
++ pt_vcpu_unlock(v);
+ }
+
+ void pt_restore_timer(struct vcpu *v)
+@@ -235,7 +244,7 @@ void pt_restore_timer(struct vcpu *v)
+ struct list_head *head = &v->arch.hvm.tm_list;
+ struct periodic_time *pt;
+
+- spin_lock(&v->arch.hvm.tm_lock);
++ pt_vcpu_lock(v);
+
+ list_for_each_entry ( pt, head, list )
+ {
+@@ -248,7 +257,7 @@ void pt_restore_timer(struct vcpu *v)
+
+ pt_thaw_time(v);
+
+- spin_unlock(&v->arch.hvm.tm_lock);
++ pt_vcpu_unlock(v);
+ }
+
+ static void pt_timer_fn(void *data)
+@@ -309,7 +318,7 @@ int pt_update_irq(struct vcpu *v)
+ int irq, pt_vector = -1;
+ bool level;
+
+- spin_lock(&v->arch.hvm.tm_lock);
++ pt_vcpu_lock(v);
+
+ earliest_pt = NULL;
+ max_lag = -1ULL;
+@@ -339,7 +348,7 @@ int pt_update_irq(struct vcpu *v)
+
+ if ( earliest_pt == NULL )
+ {
+- spin_unlock(&v->arch.hvm.tm_lock);
++ pt_vcpu_unlock(v);
+ return -1;
+ }
+
+@@ -347,7 +356,7 @@ int pt_update_irq(struct vcpu *v)
+ irq = earliest_pt->irq;
+ level = earliest_pt->level;
+
+- spin_unlock(&v->arch.hvm.tm_lock);
++ pt_vcpu_unlock(v);
+
+ switch ( earliest_pt->source )
+ {
+@@ -394,7 +403,7 @@ int pt_update_irq(struct vcpu *v)
+ time_cb *cb = NULL;
+ void *cb_priv;
+
+- spin_lock(&v->arch.hvm.tm_lock);
++ pt_vcpu_lock(v);
+ /* Make sure the timer is still on the list. */
+ list_for_each_entry ( pt, &v->arch.hvm.tm_list, list )
+ if ( pt == earliest_pt )
+@@ -404,7 +413,7 @@ int pt_update_irq(struct vcpu *v)
+ cb_priv = pt->priv;
+ break;
+ }
+- spin_unlock(&v->arch.hvm.tm_lock);
++ pt_vcpu_unlock(v);
+
+ if ( cb != NULL )
+ cb(v, cb_priv);
+@@ -441,12 +450,12 @@ void pt_intr_post(struct vcpu *v, struct
+ if ( intack.source == hvm_intsrc_vector )
+ return;
+
+- spin_lock(&v->arch.hvm.tm_lock);
++ pt_vcpu_lock(v);
+
+ pt = is_pt_irq(v, intack);
+ if ( pt == NULL )
+ {
+- spin_unlock(&v->arch.hvm.tm_lock);
++ pt_vcpu_unlock(v);
+ return;
+ }
+
+@@ -455,7 +464,7 @@ void pt_intr_post(struct vcpu *v, struct
+ cb = pt->cb;
+ cb_priv = pt->priv;
+
+- spin_unlock(&v->arch.hvm.tm_lock);
++ pt_vcpu_unlock(v);
+
+ if ( cb != NULL )
+ cb(v, cb_priv);
+@@ -466,12 +475,12 @@ void pt_migrate(struct vcpu *v)
+ struct list_head *head = &v->arch.hvm.tm_list;
+ struct periodic_time *pt;
+
+- spin_lock(&v->arch.hvm.tm_lock);
++ pt_vcpu_lock(v);
+
+ list_for_each_entry ( pt, head, list )
+ migrate_timer(&pt->timer, v->processor);
+
+- spin_unlock(&v->arch.hvm.tm_lock);
++ pt_vcpu_unlock(v);
+ }
+
+ void create_periodic_time(
+@@ -490,7 +499,7 @@ void create_periodic_time(
+
+ destroy_periodic_time(pt);
+
+- spin_lock(&v->arch.hvm.tm_lock);
++ write_lock(&v->domain->arch.hvm.pl_time->pt_migrate);
+
+ pt->pending_intr_nr = 0;
+ pt->do_not_freeze = 0;
+@@ -540,7 +549,7 @@ void create_periodic_time(
+ init_timer(&pt->timer, pt_timer_fn, pt, v->processor);
+ set_timer(&pt->timer, pt->scheduled);
+
+- spin_unlock(&v->arch.hvm.tm_lock);
++ write_unlock(&v->domain->arch.hvm.pl_time->pt_migrate);
+ }
+
+ void destroy_periodic_time(struct periodic_time *pt)
+@@ -565,30 +574,20 @@ void destroy_periodic_time(struct period
+
+ static void pt_adjust_vcpu(struct periodic_time *pt, struct vcpu *v)
+ {
+- int on_list;
+-
+ ASSERT(pt->source == PTSRC_isa || pt->source == PTSRC_ioapic);
+
+ if ( pt->vcpu == NULL )
+ return;
+
+- pt_lock(pt);
+- on_list = pt->on_list;
+- if ( pt->on_list )
+- list_del(&pt->list);
+- pt->on_list = 0;
+- pt_unlock(pt);
+-
+- spin_lock(&v->arch.hvm.tm_lock);
++ write_lock(&pt->vcpu->domain->arch.hvm.pl_time->pt_migrate);
+ pt->vcpu = v;
+- if ( on_list )
++ if ( pt->on_list )
+ {
+- pt->on_list = 1;
++ list_del(&pt->list);
+ list_add(&pt->list, &v->arch.hvm.tm_list);
+-
+ migrate_timer(&pt->timer, v->processor);
+ }
+- spin_unlock(&v->arch.hvm.tm_lock);
++ write_unlock(&pt->vcpu->domain->arch.hvm.pl_time->pt_migrate);
+ }
+
+ void pt_adjust_global_vcpu_target(struct vcpu *v)
+--- a/xen/include/asm-x86/hvm/vpt.h
++++ b/xen/include/asm-x86/hvm/vpt.h
+@@ -128,6 +128,13 @@ struct pl_time { /* platform time */
+ struct RTCState vrtc;
+ struct HPETState vhpet;
+ struct PMTState vpmt;
++ /*
++ * rwlock to prevent periodic_time vCPU migration. Take the lock in read
++ * mode in order to prevent the vcpu field of periodic_time from changing.
++ * Lock must be taken in write mode when changes to the vcpu field are
++ * performed, as it allows exclusive access to all the timers of a domain.
++ */
++ rwlock_t pt_migrate;
+ /* guest_time = Xen sys time + stime_offset */
+ int64_t stime_offset;
+ /* Ensures monotonicity in appropriate timer modes. */