MIPS: KVM: Fix timer race modifying guest CP0_Cause
authorJames Hogan <james.hogan@imgtec.com>
Thu, 29 May 2014 09:16:33 +0000 (10:16 +0100)
committerPaolo Bonzini <pbonzini@redhat.com>
Fri, 30 May 2014 11:01:25 +0000 (13:01 +0200)
The hrtimer callback for guest timer timeouts sets the guest's
CP0_Cause.TI bit to indicate to the guest that a timer interrupt is
pending, however there is no mutual exclusion implemented to prevent
this occurring while the guest's CP0_Cause register is being
read-modify-written elsewhere.

When this occurs the setting of the CP0_Cause.TI bit is undone and the
guest misses the timer interrupt and doesn't reprogram the CP0_Compare
register for the next timeout. Currently another timer interrupt will be
triggered again in another 10ms anyway due to the way timers are
emulated, but after the MIPS timer emulation is fixed this would result
in Linux guest time standing still and the guest scheduler not being
invoked until the guest CP0_Count has looped around again, which at
100MHz takes just under 43 seconds.

Currently this is the only asynchronous modification of guest registers,
therefore it is fixed by adjusting the implementations of the
kvm_set_c0_guest_cause(), kvm_clear_c0_guest_cause(), and
kvm_change_c0_guest_cause() macros which are used for modifying the
guest CP0_Cause register to use ll/sc to ensure atomic modification.
This should work in both UP and SMP cases without requiring interrupts
to be disabled.

Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Gleb Natapov <gleb@kernel.org>
Cc: kvm@vger.kernel.org
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: linux-mips@linux-mips.org
Cc: Sanjay Lal <sanjayl@kymasys.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
arch/mips/include/asm/kvm_host.h

index 6f93384..79410f8 100644 (file)
@@ -482,15 +482,74 @@ struct kvm_vcpu_arch {
 #define kvm_read_c0_guest_errorepc(cop0)       (cop0->reg[MIPS_CP0_ERROR_PC][0])
 #define kvm_write_c0_guest_errorepc(cop0, val) (cop0->reg[MIPS_CP0_ERROR_PC][0] = (val))
 
+/*
+ * Some of the guest registers may be modified asynchronously (e.g. from a
+ * hrtimer callback in hard irq context) and therefore need stronger atomicity
+ * guarantees than other registers.
+ */
+
+static inline void _kvm_atomic_set_c0_guest_reg(unsigned long *reg,
+                                               unsigned long val)
+{
+       unsigned long temp;
+       do {
+               __asm__ __volatile__(
+               "       .set    mips3                           \n"
+               "       " __LL "%0, %1                          \n"
+               "       or      %0, %2                          \n"
+               "       " __SC  "%0, %1                         \n"
+               "       .set    mips0                           \n"
+               : "=&r" (temp), "+m" (*reg)
+               : "r" (val));
+       } while (unlikely(!temp));
+}
+
+static inline void _kvm_atomic_clear_c0_guest_reg(unsigned long *reg,
+                                                 unsigned long val)
+{
+       unsigned long temp;
+       do {
+               __asm__ __volatile__(
+               "       .set    mips3                           \n"
+               "       " __LL "%0, %1                          \n"
+               "       and     %0, %2                          \n"
+               "       " __SC  "%0, %1                         \n"
+               "       .set    mips0                           \n"
+               : "=&r" (temp), "+m" (*reg)
+               : "r" (~val));
+       } while (unlikely(!temp));
+}
+
+static inline void _kvm_atomic_change_c0_guest_reg(unsigned long *reg,
+                                                  unsigned long change,
+                                                  unsigned long val)
+{
+       unsigned long temp;
+       do {
+               __asm__ __volatile__(
+               "       .set    mips3                           \n"
+               "       " __LL "%0, %1                          \n"
+               "       and     %0, %2                          \n"
+               "       or      %0, %3                          \n"
+               "       " __SC  "%0, %1                         \n"
+               "       .set    mips0                           \n"
+               : "=&r" (temp), "+m" (*reg)
+               : "r" (~change), "r" (val & change));
+       } while (unlikely(!temp));
+}
+
 #define kvm_set_c0_guest_status(cop0, val)     (cop0->reg[MIPS_CP0_STATUS][0] |= (val))
 #define kvm_clear_c0_guest_status(cop0, val)   (cop0->reg[MIPS_CP0_STATUS][0] &= ~(val))
-#define kvm_set_c0_guest_cause(cop0, val)      (cop0->reg[MIPS_CP0_CAUSE][0] |= (val))
-#define kvm_clear_c0_guest_cause(cop0, val)    (cop0->reg[MIPS_CP0_CAUSE][0] &= ~(val))
+
+/* Cause can be modified asynchronously from hardirq hrtimer callback */
+#define kvm_set_c0_guest_cause(cop0, val)                              \
+       _kvm_atomic_set_c0_guest_reg(&cop0->reg[MIPS_CP0_CAUSE][0], val)
+#define kvm_clear_c0_guest_cause(cop0, val)                            \
+       _kvm_atomic_clear_c0_guest_reg(&cop0->reg[MIPS_CP0_CAUSE][0], val)
 #define kvm_change_c0_guest_cause(cop0, change, val)                   \
-{                                                                      \
-       kvm_clear_c0_guest_cause(cop0, change);                         \
-       kvm_set_c0_guest_cause(cop0, ((val) & (change)));               \
-}
+       _kvm_atomic_change_c0_guest_reg(&cop0->reg[MIPS_CP0_CAUSE][0],  \
+                                       change, val)
+
 #define kvm_set_c0_guest_ebase(cop0, val)      (cop0->reg[MIPS_CP0_PRID][1] |= (val))
 #define kvm_clear_c0_guest_ebase(cop0, val)    (cop0->reg[MIPS_CP0_PRID][1] &= ~(val))
 #define kvm_change_c0_guest_ebase(cop0, change, val)                   \