Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 27 Aug 2016 22:51:50 +0000 (15:51 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 27 Aug 2016 22:51:50 +0000 (15:51 -0700)
Pull KVM fixes from Paolo Bonzini:
 "ARM:
   - fixes for ITS init issues, error handling, IRQ leakage, race
     conditions
   - an erratum workaround for timers
   - some removal of misleading use of errors and comments
   - a fix for GICv3 on 32-bit guests

  MIPS:
   - fix for where the guest could wrongly map the first page of
     physical memory

  x86:
   - nested virtualization fixes"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  MIPS: KVM: Check for pfn noslot case
  kvm: nVMX: fix nested tsc scaling
  KVM: nVMX: postpone VMCS changes on MSR_IA32_APICBASE write
  KVM: nVMX: fix msr bitmaps to prevent L2 from accessing L0 x2APIC
  arm64: KVM: report configured SRE value to 32-bit world
  arm64: KVM: remove misleading comment on pmu status
  KVM: arm/arm64: timer: Workaround misconfigured timer interrupt
  arm64: Document workaround for Cortex-A72 erratum #853709
  KVM: arm/arm64: Change misleading use of is_error_pfn
  KVM: arm64: ITS: avoid re-mapping LPIs
  KVM: arm64: check for ITS device on MSI injection
  KVM: arm64: ITS: move ITS registration into first VCPU run
  KVM: arm64: vgic-its: Make updates to propbaser/pendbaser atomic
  KVM: arm64: vgic-its: Plug race in vgic_put_irq
  KVM: arm64: vgic-its: Handle errors from vgic_add_lpi
  KVM: arm64: ITS: return 1 on successful MSI injection

13 files changed:
Documentation/arm64/silicon-errata.txt
arch/arm/kvm/mmu.c
arch/arm64/kvm/hyp/switch.c
arch/arm64/kvm/sys_regs.c
arch/mips/kvm/mmu.c
arch/x86/kvm/vmx.c
include/linux/irqchip/arm-gic-v3.h
virt/kvm/arm/arch_timer.c
virt/kvm/arm/vgic/vgic-its.c
virt/kvm/arm/vgic/vgic-mmio-v3.c
virt/kvm/arm/vgic/vgic-v3.c
virt/kvm/arm/vgic/vgic.c
virt/kvm/arm/vgic/vgic.h

index 4da60b4..ccc6032 100644 (file)
@@ -53,6 +53,7 @@ stable kernels.
 | ARM            | Cortex-A57      | #832075         | ARM64_ERRATUM_832075    |
 | ARM            | Cortex-A57      | #852523         | N/A                     |
 | ARM            | Cortex-A57      | #834220         | ARM64_ERRATUM_834220    |
+| ARM            | Cortex-A72      | #853709         | N/A                     |
 | ARM            | MMU-500         | #841119,#826419 | N/A                     |
 |                |                 |                 |                         |
 | Cavium         | ThunderX ITS    | #22375, #24313  | CAVIUM_ERRATUM_22375    |
index bda27b6..29d0b23 100644 (file)
@@ -1309,7 +1309,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
        smp_rmb();
 
        pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable);
-       if (is_error_pfn(pfn))
+       if (is_error_noslot_pfn(pfn))
                return -EFAULT;
 
        if (kvm_is_device_pfn(pfn)) {
index ae7855f..5a84b45 100644 (file)
@@ -256,7 +256,7 @@ static int __hyp_text __guest_run(struct kvm_vcpu *vcpu)
 
        /*
         * We must restore the 32-bit state before the sysregs, thanks
-        * to Cortex-A57 erratum #852523.
+        * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72).
         */
        __sysreg32_restore_state(vcpu);
        __sysreg_restore_guest_state(guest_ctxt);
index b0b225c..e51367d 100644 (file)
@@ -823,14 +823,6 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
  * Architected system registers.
  * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
  *
- * We could trap ID_DFR0 and tell the guest we don't support performance
- * monitoring.  Unfortunately the patch to make the kernel check ID_DFR0 was
- * NAKed, so it will read the PMCR anyway.
- *
- * Therefore we tell the guest we have 0 counters.  Unfortunately, we
- * must always support PMCCNTR (the cycle counter): we just RAZ/WI for
- * all PM registers, which doesn't crash the guest kernel at least.
- *
  * Debug handling: We do trap most, if not all debug related system
  * registers. The implementation is good enough to ensure that a guest
  * can use these with minimal performance degradation. The drawback is
@@ -1360,7 +1352,7 @@ static const struct sys_reg_desc cp15_regs[] = {
        { Op1( 0), CRn(10), CRm( 3), Op2( 1), access_vm_reg, NULL, c10_AMAIR1 },
 
        /* ICC_SRE */
-       { Op1( 0), CRn(12), CRm(12), Op2( 5), trap_raz_wi },
+       { Op1( 0), CRn(12), CRm(12), Op2( 5), access_gic_sre },
 
        { Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID },
 
index 6cfdcf5..121008c 100644 (file)
@@ -40,7 +40,7 @@ static int kvm_mips_map_page(struct kvm *kvm, gfn_t gfn)
        srcu_idx = srcu_read_lock(&kvm->srcu);
        pfn = gfn_to_pfn(kvm, gfn);
 
-       if (is_error_pfn(pfn)) {
+       if (is_error_noslot_pfn(pfn)) {
                kvm_err("Couldn't get pfn for gfn %#llx!\n", gfn);
                err = -EFAULT;
                goto out;
index a45d858..5cede40 100644 (file)
@@ -422,6 +422,7 @@ struct nested_vmx {
        struct list_head vmcs02_pool;
        int vmcs02_num;
        u64 vmcs01_tsc_offset;
+       bool change_vmcs01_virtual_x2apic_mode;
        /* L2 must run next, and mustn't decide to exit to L1. */
        bool nested_run_pending;
        /*
@@ -435,6 +436,8 @@ struct nested_vmx {
        bool pi_pending;
        u16 posted_intr_nv;
 
+       unsigned long *msr_bitmap;
+
        struct hrtimer preemption_timer;
        bool preemption_timer_expired;
 
@@ -924,7 +927,6 @@ static unsigned long *vmx_msr_bitmap_legacy;
 static unsigned long *vmx_msr_bitmap_longmode;
 static unsigned long *vmx_msr_bitmap_legacy_x2apic;
 static unsigned long *vmx_msr_bitmap_longmode_x2apic;
-static unsigned long *vmx_msr_bitmap_nested;
 static unsigned long *vmx_vmread_bitmap;
 static unsigned long *vmx_vmwrite_bitmap;
 
@@ -2198,6 +2200,12 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
                        new.control) != old.control);
 }
 
+static void decache_tsc_multiplier(struct vcpu_vmx *vmx)
+{
+       vmx->current_tsc_ratio = vmx->vcpu.arch.tsc_scaling_ratio;
+       vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio);
+}
+
 /*
  * Switches to specified vcpu, until a matching vcpu_put(), but assumes
  * vcpu mutex is already taken.
@@ -2256,10 +2264,8 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
        /* Setup TSC multiplier */
        if (kvm_has_tsc_control &&
-           vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio) {
-               vmx->current_tsc_ratio = vcpu->arch.tsc_scaling_ratio;
-               vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio);
-       }
+           vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio)
+               decache_tsc_multiplier(vmx);
 
        vmx_vcpu_pi_load(vcpu, cpu);
        vmx->host_pkru = read_pkru();
@@ -2508,7 +2514,7 @@ static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
        unsigned long *msr_bitmap;
 
        if (is_guest_mode(vcpu))
-               msr_bitmap = vmx_msr_bitmap_nested;
+               msr_bitmap = to_vmx(vcpu)->nested.msr_bitmap;
        else if (cpu_has_secondary_exec_ctrls() &&
                 (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
                  SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
@@ -6363,13 +6369,6 @@ static __init int hardware_setup(void)
        if (!vmx_msr_bitmap_longmode_x2apic)
                goto out4;
 
-       if (nested) {
-               vmx_msr_bitmap_nested =
-                       (unsigned long *)__get_free_page(GFP_KERNEL);
-               if (!vmx_msr_bitmap_nested)
-                       goto out5;
-       }
-
        vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
        if (!vmx_vmread_bitmap)
                goto out6;
@@ -6392,8 +6391,6 @@ static __init int hardware_setup(void)
 
        memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
        memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
-       if (nested)
-               memset(vmx_msr_bitmap_nested, 0xff, PAGE_SIZE);
 
        if (setup_vmcs_config(&vmcs_config) < 0) {
                r = -EIO;
@@ -6529,9 +6526,6 @@ out8:
 out7:
        free_page((unsigned long)vmx_vmread_bitmap);
 out6:
-       if (nested)
-               free_page((unsigned long)vmx_msr_bitmap_nested);
-out5:
        free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
 out4:
        free_page((unsigned long)vmx_msr_bitmap_longmode);
@@ -6557,8 +6551,6 @@ static __exit void hardware_unsetup(void)
        free_page((unsigned long)vmx_io_bitmap_a);
        free_page((unsigned long)vmx_vmwrite_bitmap);
        free_page((unsigned long)vmx_vmread_bitmap);
-       if (nested)
-               free_page((unsigned long)vmx_msr_bitmap_nested);
 
        free_kvm_area();
 }
@@ -6995,16 +6987,21 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
                return 1;
        }
 
+       if (cpu_has_vmx_msr_bitmap()) {
+               vmx->nested.msr_bitmap =
+                               (unsigned long *)__get_free_page(GFP_KERNEL);
+               if (!vmx->nested.msr_bitmap)
+                       goto out_msr_bitmap;
+       }
+
        vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL);
        if (!vmx->nested.cached_vmcs12)
-               return -ENOMEM;
+               goto out_cached_vmcs12;
 
        if (enable_shadow_vmcs) {
                shadow_vmcs = alloc_vmcs();
-               if (!shadow_vmcs) {
-                       kfree(vmx->nested.cached_vmcs12);
-                       return -ENOMEM;
-               }
+               if (!shadow_vmcs)
+                       goto out_shadow_vmcs;
                /* mark vmcs as shadow */
                shadow_vmcs->revision_id |= (1u << 31);
                /* init shadow vmcs */
@@ -7024,6 +7021,15 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
        skip_emulated_instruction(vcpu);
        nested_vmx_succeed(vcpu);
        return 1;
+
+out_shadow_vmcs:
+       kfree(vmx->nested.cached_vmcs12);
+
+out_cached_vmcs12:
+       free_page((unsigned long)vmx->nested.msr_bitmap);
+
+out_msr_bitmap:
+       return -ENOMEM;
 }
 
 /*
@@ -7098,6 +7104,10 @@ static void free_nested(struct vcpu_vmx *vmx)
        vmx->nested.vmxon = false;
        free_vpid(vmx->nested.vpid02);
        nested_release_vmcs12(vmx);
+       if (vmx->nested.msr_bitmap) {
+               free_page((unsigned long)vmx->nested.msr_bitmap);
+               vmx->nested.msr_bitmap = NULL;
+       }
        if (enable_shadow_vmcs)
                free_vmcs(vmx->nested.current_shadow_vmcs);
        kfree(vmx->nested.cached_vmcs12);
@@ -8419,6 +8429,12 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
 {
        u32 sec_exec_control;
 
+       /* Postpone execution until vmcs01 is the current VMCS. */
+       if (is_guest_mode(vcpu)) {
+               to_vmx(vcpu)->nested.change_vmcs01_virtual_x2apic_mode = true;
+               return;
+       }
+
        /*
         * There is not point to enable virtualize x2apic without enable
         * apicv
@@ -9472,8 +9488,10 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
 {
        int msr;
        struct page *page;
-       unsigned long *msr_bitmap;
+       unsigned long *msr_bitmap_l1;
+       unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap;
 
+       /* This shortcut is ok because we support only x2APIC MSRs so far. */
        if (!nested_cpu_has_virt_x2apic_mode(vmcs12))
                return false;
 
@@ -9482,63 +9500,37 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
                WARN_ON(1);
                return false;
        }
-       msr_bitmap = (unsigned long *)kmap(page);
-       if (!msr_bitmap) {
+       msr_bitmap_l1 = (unsigned long *)kmap(page);
+       if (!msr_bitmap_l1) {
                nested_release_page_clean(page);
                WARN_ON(1);
                return false;
        }
 
+       memset(msr_bitmap_l0, 0xff, PAGE_SIZE);
+
        if (nested_cpu_has_virt_x2apic_mode(vmcs12)) {
                if (nested_cpu_has_apic_reg_virt(vmcs12))
                        for (msr = 0x800; msr <= 0x8ff; msr++)
                                nested_vmx_disable_intercept_for_msr(
-                                       msr_bitmap,
-                                       vmx_msr_bitmap_nested,
+                                       msr_bitmap_l1, msr_bitmap_l0,
                                        msr, MSR_TYPE_R);
-               /* TPR is allowed */
-               nested_vmx_disable_intercept_for_msr(msr_bitmap,
-                               vmx_msr_bitmap_nested,
+
+               nested_vmx_disable_intercept_for_msr(
+                               msr_bitmap_l1, msr_bitmap_l0,
                                APIC_BASE_MSR + (APIC_TASKPRI >> 4),
                                MSR_TYPE_R | MSR_TYPE_W);
+
                if (nested_cpu_has_vid(vmcs12)) {
-                       /* EOI and self-IPI are allowed */
                        nested_vmx_disable_intercept_for_msr(
-                               msr_bitmap,
-                               vmx_msr_bitmap_nested,
+                               msr_bitmap_l1, msr_bitmap_l0,
                                APIC_BASE_MSR + (APIC_EOI >> 4),
                                MSR_TYPE_W);
                        nested_vmx_disable_intercept_for_msr(
-                               msr_bitmap,
-                               vmx_msr_bitmap_nested,
+                               msr_bitmap_l1, msr_bitmap_l0,
                                APIC_BASE_MSR + (APIC_SELF_IPI >> 4),
                                MSR_TYPE_W);
                }
-       } else {
-               /*
-                * Enable reading intercept of all the x2apic
-                * MSRs. We should not rely on vmcs12 to do any
-                * optimizations here, it may have been modified
-                * by L1.
-                */
-               for (msr = 0x800; msr <= 0x8ff; msr++)
-                       __vmx_enable_intercept_for_msr(
-                               vmx_msr_bitmap_nested,
-                               msr,
-                               MSR_TYPE_R);
-
-               __vmx_enable_intercept_for_msr(
-                               vmx_msr_bitmap_nested,
-                               APIC_BASE_MSR + (APIC_TASKPRI >> 4),
-                               MSR_TYPE_W);
-               __vmx_enable_intercept_for_msr(
-                               vmx_msr_bitmap_nested,
-                               APIC_BASE_MSR + (APIC_EOI >> 4),
-                               MSR_TYPE_W);
-               __vmx_enable_intercept_for_msr(
-                               vmx_msr_bitmap_nested,
-                               APIC_BASE_MSR + (APIC_SELF_IPI >> 4),
-                               MSR_TYPE_W);
        }
        kunmap(page);
        nested_release_page_clean(page);
@@ -9957,10 +9949,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
        }
 
        if (cpu_has_vmx_msr_bitmap() &&
-           exec_control & CPU_BASED_USE_MSR_BITMAPS) {
-               nested_vmx_merge_msr_bitmap(vcpu, vmcs12);
-               /* MSR_BITMAP will be set by following vmx_set_efer. */
-       else
+           exec_control & CPU_BASED_USE_MSR_BITMAPS &&
+           nested_vmx_merge_msr_bitmap(vcpu, vmcs12))
+               /* MSR_BITMAP will be set by following vmx_set_efer. */
+       else
                exec_control &= ~CPU_BASED_USE_MSR_BITMAPS;
 
        /*
@@ -10011,6 +10003,8 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
                        vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset);
        else
                vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset);
+       if (kvm_has_tsc_control)
+               decache_tsc_multiplier(vmx);
 
        if (enable_vpid) {
                /*
@@ -10767,6 +10761,14 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
        else
                vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL,
                              PIN_BASED_VMX_PREEMPTION_TIMER);
+       if (kvm_has_tsc_control)
+               decache_tsc_multiplier(vmx);
+
+       if (vmx->nested.change_vmcs01_virtual_x2apic_mode) {
+               vmx->nested.change_vmcs01_virtual_x2apic_mode = false;
+               vmx_set_virtual_x2apic_mode(vcpu,
+                               vcpu->arch.apic_base & X2APIC_ENABLE);
+       }
 
        /* This is needed for same reason as it was needed in prepare_vmcs02 */
        vmx->host_rsp = 0;
index 56b0b7e..99ac022 100644 (file)
  */
 #define E_ITS_MOVI_UNMAPPED_INTERRUPT          0x010107
 #define E_ITS_MOVI_UNMAPPED_COLLECTION         0x010109
+#define E_ITS_INT_UNMAPPED_INTERRUPT           0x010307
 #define E_ITS_CLEAR_UNMAPPED_INTERRUPT         0x010507
 #define E_ITS_MAPD_DEVICE_OOR                  0x010801
 #define E_ITS_MAPC_PROCNUM_OOR                 0x010902
index 4fde8c7..77e6ccf 100644 (file)
@@ -33,6 +33,7 @@
 static struct timecounter *timecounter;
 static struct workqueue_struct *wqueue;
 static unsigned int host_vtimer_irq;
+static u32 host_vtimer_irq_flags;
 
 void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
 {
@@ -365,7 +366,7 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
 
 static void kvm_timer_init_interrupt(void *info)
 {
-       enable_percpu_irq(host_vtimer_irq, 0);
+       enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
 }
 
 int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
@@ -432,6 +433,14 @@ int kvm_timer_hyp_init(void)
        }
        host_vtimer_irq = info->virtual_irq;
 
+       host_vtimer_irq_flags = irq_get_trigger_type(host_vtimer_irq);
+       if (host_vtimer_irq_flags != IRQF_TRIGGER_HIGH &&
+           host_vtimer_irq_flags != IRQF_TRIGGER_LOW) {
+               kvm_err("Invalid trigger for IRQ%d, assuming level low\n",
+                       host_vtimer_irq);
+               host_vtimer_irq_flags = IRQF_TRIGGER_LOW;
+       }
+
        err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler,
                                 "kvm guest timer", kvm_get_running_vcpus());
        if (err) {
index 07411cf..4660a7d 100644 (file)
@@ -51,7 +51,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid)
 
        irq = kzalloc(sizeof(struct vgic_irq), GFP_KERNEL);
        if (!irq)
-               return NULL;
+               return ERR_PTR(-ENOMEM);
 
        INIT_LIST_HEAD(&irq->lpi_list);
        INIT_LIST_HEAD(&irq->ap_list);
@@ -441,39 +441,63 @@ static unsigned long vgic_mmio_read_its_idregs(struct kvm *kvm,
  * Find the target VCPU and the LPI number for a given devid/eventid pair
  * and make this IRQ pending, possibly injecting it.
  * Must be called with the its_lock mutex held.
+ * Returns 0 on success, a positive error value for any ITS mapping
+ * related errors and negative error values for generic errors.
  */
-static void vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its,
-                                u32 devid, u32 eventid)
+static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its,
+                               u32 devid, u32 eventid)
 {
+       struct kvm_vcpu *vcpu;
        struct its_itte *itte;
 
        if (!its->enabled)
-               return;
+               return -EBUSY;
 
        itte = find_itte(its, devid, eventid);
-       /* Triggering an unmapped IRQ gets silently dropped. */
-       if (itte && its_is_collection_mapped(itte->collection)) {
-               struct kvm_vcpu *vcpu;
-
-               vcpu = kvm_get_vcpu(kvm, itte->collection->target_addr);
-               if (vcpu && vcpu->arch.vgic_cpu.lpis_enabled) {
-                       spin_lock(&itte->irq->irq_lock);
-                       itte->irq->pending = true;
-                       vgic_queue_irq_unlock(kvm, itte->irq);
-               }
-       }
+       if (!itte || !its_is_collection_mapped(itte->collection))
+               return E_ITS_INT_UNMAPPED_INTERRUPT;
+
+       vcpu = kvm_get_vcpu(kvm, itte->collection->target_addr);
+       if (!vcpu)
+               return E_ITS_INT_UNMAPPED_INTERRUPT;
+
+       if (!vcpu->arch.vgic_cpu.lpis_enabled)
+               return -EBUSY;
+
+       spin_lock(&itte->irq->irq_lock);
+       itte->irq->pending = true;
+       vgic_queue_irq_unlock(kvm, itte->irq);
+
+       return 0;
+}
+
+static struct vgic_io_device *vgic_get_its_iodev(struct kvm_io_device *dev)
+{
+       struct vgic_io_device *iodev;
+
+       if (dev->ops != &kvm_io_gic_ops)
+               return NULL;
+
+       iodev = container_of(dev, struct vgic_io_device, dev);
+
+       if (iodev->iodev_type != IODEV_ITS)
+               return NULL;
+
+       return iodev;
 }
 
 /*
  * Queries the KVM IO bus framework to get the ITS pointer from the given
  * doorbell address.
  * We then call vgic_its_trigger_msi() with the decoded data.
+ * According to the KVM_SIGNAL_MSI API description returns 1 on success.
  */
 int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi)
 {
        u64 address;
        struct kvm_io_device *kvm_io_dev;
        struct vgic_io_device *iodev;
+       int ret;
 
        if (!vgic_has_its(kvm))
                return -ENODEV;
@@ -485,15 +509,28 @@ int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi)
 
        kvm_io_dev = kvm_io_bus_get_dev(kvm, KVM_MMIO_BUS, address);
        if (!kvm_io_dev)
-               return -ENODEV;
+               return -EINVAL;
 
-       iodev = container_of(kvm_io_dev, struct vgic_io_device, dev);
+       iodev = vgic_get_its_iodev(kvm_io_dev);
+       if (!iodev)
+               return -EINVAL;
 
        mutex_lock(&iodev->its->its_lock);
-       vgic_its_trigger_msi(kvm, iodev->its, msi->devid, msi->data);
+       ret = vgic_its_trigger_msi(kvm, iodev->its, msi->devid, msi->data);
        mutex_unlock(&iodev->its->its_lock);
 
-       return 0;
+       if (ret < 0)
+               return ret;
+
+       /*
+        * KVM_SIGNAL_MSI demands a return value > 0 for success and 0
+        * if the guest has blocked the MSI. So we map any LPI mapping
+        * related error to that.
+        */
+       if (ret)
+               return 0;
+       else
+               return 1;
 }
 
 /* Requires the its_lock to be held. */
@@ -502,7 +539,8 @@ static void its_free_itte(struct kvm *kvm, struct its_itte *itte)
        list_del(&itte->itte_list);
 
        /* This put matches the get in vgic_add_lpi. */
-       vgic_put_irq(kvm, itte->irq);
+       if (itte->irq)
+               vgic_put_irq(kvm, itte->irq);
 
        kfree(itte);
 }
@@ -697,6 +735,7 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its,
        struct its_device *device;
        struct its_collection *collection, *new_coll = NULL;
        int lpi_nr;
+       struct vgic_irq *irq;
 
        device = find_its_device(its, device_id);
        if (!device)
@@ -710,6 +749,10 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its,
            lpi_nr >= max_lpis_propbaser(kvm->arch.vgic.propbaser))
                return E_ITS_MAPTI_PHYSICALID_OOR;
 
+       /* If there is an existing mapping, behavior is UNPREDICTABLE. */
+       if (find_itte(its, device_id, event_id))
+               return 0;
+
        collection = find_collection(its, coll_id);
        if (!collection) {
                int ret = vgic_its_alloc_collection(its, &collection, coll_id);
@@ -718,22 +761,28 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its,
                new_coll = collection;
        }
 
-       itte = find_itte(its, device_id, event_id);
+       itte = kzalloc(sizeof(struct its_itte), GFP_KERNEL);
        if (!itte) {
-               itte = kzalloc(sizeof(struct its_itte), GFP_KERNEL);
-               if (!itte) {
-                       if (new_coll)
-                               vgic_its_free_collection(its, coll_id);
-                       return -ENOMEM;
-               }
-
-               itte->event_id  = event_id;
-               list_add_tail(&itte->itte_list, &device->itt_head);
+               if (new_coll)
+                       vgic_its_free_collection(its, coll_id);
+               return -ENOMEM;
        }
 
+       itte->event_id  = event_id;
+       list_add_tail(&itte->itte_list, &device->itt_head);
+
        itte->collection = collection;
        itte->lpi = lpi_nr;
-       itte->irq = vgic_add_lpi(kvm, lpi_nr);
+
+       irq = vgic_add_lpi(kvm, lpi_nr);
+       if (IS_ERR(irq)) {
+               if (new_coll)
+                       vgic_its_free_collection(its, coll_id);
+               its_free_itte(kvm, itte);
+               return PTR_ERR(irq);
+       }
+       itte->irq = irq;
+
        update_affinity_itte(kvm, itte);
 
        /*
@@ -981,9 +1030,7 @@ static int vgic_its_cmd_handle_int(struct kvm *kvm, struct vgic_its *its,
        u32 msi_data = its_cmd_get_id(its_cmd);
        u64 msi_devid = its_cmd_get_deviceid(its_cmd);
 
-       vgic_its_trigger_msi(kvm, its, msi_devid, msi_data);
-
-       return 0;
+       return vgic_its_trigger_msi(kvm, its, msi_devid, msi_data);
 }
 
 /*
@@ -1288,13 +1335,13 @@ void vgic_enable_lpis(struct kvm_vcpu *vcpu)
                its_sync_lpi_pending_table(vcpu);
 }
 
-static int vgic_its_init_its(struct kvm *kvm, struct vgic_its *its)
+static int vgic_register_its_iodev(struct kvm *kvm, struct vgic_its *its)
 {
        struct vgic_io_device *iodev = &its->iodev;
        int ret;
 
-       if (its->initialized)
-               return 0;
+       if (!its->initialized)
+               return -EBUSY;
 
        if (IS_VGIC_ADDR_UNDEF(its->vgic_its_base))
                return -ENXIO;
@@ -1311,9 +1358,6 @@ static int vgic_its_init_its(struct kvm *kvm, struct vgic_its *its)
                                      KVM_VGIC_V3_ITS_SIZE, &iodev->dev);
        mutex_unlock(&kvm->slots_lock);
 
-       if (!ret)
-               its->initialized = true;
-
        return ret;
 }
 
@@ -1435,9 +1479,6 @@ static int vgic_its_set_attr(struct kvm_device *dev,
                if (type != KVM_VGIC_ITS_ADDR_TYPE)
                        return -ENODEV;
 
-               if (its->initialized)
-                       return -EBUSY;
-
                if (copy_from_user(&addr, uaddr, sizeof(addr)))
                        return -EFAULT;
 
@@ -1453,7 +1494,9 @@ static int vgic_its_set_attr(struct kvm_device *dev,
        case KVM_DEV_ARM_VGIC_GRP_CTRL:
                switch (attr->attr) {
                case KVM_DEV_ARM_VGIC_CTRL_INIT:
-                       return vgic_its_init_its(dev->kvm, its);
+                       its->initialized = true;
+
+                       return 0;
                }
                break;
        }
@@ -1498,3 +1541,30 @@ int kvm_vgic_register_its_device(void)
        return kvm_register_device_ops(&kvm_arm_vgic_its_ops,
                                       KVM_DEV_TYPE_ARM_VGIC_ITS);
 }
+
+/*
+ * Registers all ITSes with the kvm_io_bus framework.
+ * To follow the existing VGIC initialization sequence, this has to be
+ * done as late as possible, just before the first VCPU runs.
+ */
+int vgic_register_its_iodevs(struct kvm *kvm)
+{
+       struct kvm_device *dev;
+       int ret = 0;
+
+       list_for_each_entry(dev, &kvm->devices, vm_node) {
+               if (dev->ops != &kvm_arm_vgic_its_ops)
+                       continue;
+
+               ret = vgic_register_its_iodev(kvm, dev->private);
+               if (ret)
+                       return ret;
+               /*
+                * We don't need to care about tearing down previously
+                * registered ITSes, as the kvm_io_bus framework removes
+                * them for us if the VM gets destroyed.
+                */
+       }
+
+       return ret;
+}
index ff668e0..90d8181 100644 (file)
@@ -306,16 +306,19 @@ static void vgic_mmio_write_propbase(struct kvm_vcpu *vcpu,
 {
        struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
        struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
-       u64 propbaser = dist->propbaser;
+       u64 old_propbaser, propbaser;
 
        /* Storing a value with LPIs already enabled is undefined */
        if (vgic_cpu->lpis_enabled)
                return;
 
-       propbaser = update_64bit_reg(propbaser, addr & 4, len, val);
-       propbaser = vgic_sanitise_propbaser(propbaser);
-
-       dist->propbaser = propbaser;
+       do {
+               old_propbaser = dist->propbaser;
+               propbaser = old_propbaser;
+               propbaser = update_64bit_reg(propbaser, addr & 4, len, val);
+               propbaser = vgic_sanitise_propbaser(propbaser);
+       } while (cmpxchg64(&dist->propbaser, old_propbaser,
+                          propbaser) != old_propbaser);
 }
 
 static unsigned long vgic_mmio_read_pendbase(struct kvm_vcpu *vcpu,
@@ -331,16 +334,19 @@ static void vgic_mmio_write_pendbase(struct kvm_vcpu *vcpu,
                                     unsigned long val)
 {
        struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
-       u64 pendbaser = vgic_cpu->pendbaser;
+       u64 old_pendbaser, pendbaser;
 
        /* Storing a value with LPIs already enabled is undefined */
        if (vgic_cpu->lpis_enabled)
                return;
 
-       pendbaser = update_64bit_reg(pendbaser, addr & 4, len, val);
-       pendbaser = vgic_sanitise_pendbaser(pendbaser);
-
-       vgic_cpu->pendbaser = pendbaser;
+       do {
+               old_pendbaser = vgic_cpu->pendbaser;
+               pendbaser = old_pendbaser;
+               pendbaser = update_64bit_reg(pendbaser, addr & 4, len, val);
+               pendbaser = vgic_sanitise_pendbaser(pendbaser);
+       } while (cmpxchg64(&vgic_cpu->pendbaser, old_pendbaser,
+                          pendbaser) != old_pendbaser);
 }
 
 /*
index 0506543..9f0dae3 100644 (file)
@@ -289,6 +289,14 @@ int vgic_v3_map_resources(struct kvm *kvm)
                goto out;
        }
 
+       if (vgic_has_its(kvm)) {
+               ret = vgic_register_its_iodevs(kvm);
+               if (ret) {
+                       kvm_err("Unable to register VGIC ITS MMIO regions\n");
+                       goto out;
+               }
+       }
+
        dist->ready = true;
 
 out:
index e7aeac7..e83b7fe 100644 (file)
@@ -117,17 +117,17 @@ static void vgic_irq_release(struct kref *ref)
 
 void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
 {
-       struct vgic_dist *dist;
+       struct vgic_dist *dist = &kvm->arch.vgic;
 
        if (irq->intid < VGIC_MIN_LPI)
                return;
 
-       if (!kref_put(&irq->refcount, vgic_irq_release))
+       spin_lock(&dist->lpi_list_lock);
+       if (!kref_put(&irq->refcount, vgic_irq_release)) {
+               spin_unlock(&dist->lpi_list_lock);
                return;
+       };
 
-       dist = &kvm->arch.vgic;
-
-       spin_lock(&dist->lpi_list_lock);
        list_del(&irq->lpi_list);
        dist->lpi_list_count--;
        spin_unlock(&dist->lpi_list_lock);
index 1d8e21d..6c4625c 100644 (file)
@@ -84,6 +84,7 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu);
 int vgic_v3_probe(const struct gic_kvm_info *info);
 int vgic_v3_map_resources(struct kvm *kvm);
 int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t dist_base_address);
+int vgic_register_its_iodevs(struct kvm *kvm);
 bool vgic_has_its(struct kvm *kvm);
 int kvm_vgic_register_its_device(void);
 void vgic_enable_lpis(struct kvm_vcpu *vcpu);
@@ -140,6 +141,11 @@ static inline int vgic_register_redist_iodevs(struct kvm *kvm,
        return -ENODEV;
 }
 
+static inline int vgic_register_its_iodevs(struct kvm *kvm)
+{
+       return -ENODEV;
+}
+
 static inline bool vgic_has_its(struct kvm *kvm)
 {
        return false;