Merge tag 'kvm-arm-for-3.17' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm...
authorPaolo Bonzini <pbonzini@redhat.com>
Tue, 5 Aug 2014 07:47:45 +0000 (09:47 +0200)
committerPaolo Bonzini <pbonzini@redhat.com>
Tue, 5 Aug 2014 07:47:45 +0000 (09:47 +0200)
KVM/ARM New features for 3.17 include:
 - Fixes and code refactoring for stage2 kvm MMU unmap_range
 - Support unmapping IPAs on deleting memslots for arm and arm64
 - Support MMIO mappings in stage2 faults
 - KVM VGIC v2 emulation on GICv3 hardware
 - Big-Endian support for arm/arm64 (guest and host)
 - Debug Architecture support for arm64 (arm32 is on Christoffer's todo list)

Conflicts:
virt/kvm/arm/vgic.c [last minute cherry-pick from 3.17 to 3.16]

1  2 
Documentation/arm64/booting.txt
arch/arm64/kernel/debug-monitors.c
virt/kvm/arm/vgic-v2.c
virt/kvm/arm/vgic.c

@@@ -72,54 -72,27 +72,54 @@@ The decompressed kernel image contains 
  
    u32 code0;                  /* Executable code */
    u32 code1;                  /* Executable code */
 -  u64 text_offset;            /* Image load offset */
 -  u64 res0    = 0;            /* reserved */
 -  u64 res1    = 0;            /* reserved */
 +  u64 text_offset;            /* Image load offset, little endian */
 +  u64 image_size;             /* Effective Image size, little endian */
 +  u64 flags;                  /* kernel flags, little endian */
    u64 res2    = 0;            /* reserved */
    u64 res3    = 0;            /* reserved */
    u64 res4    = 0;            /* reserved */
    u32 magic   = 0x644d5241;   /* Magic number, little endian, "ARM\x64" */
 -  u32 res5 = 0;               /* reserved */
 +  u32 res5;                   /* reserved (used for PE COFF offset) */
  
  
  Header notes:
  
 +- As of v3.17, all fields are little endian unless stated otherwise.
 +
  - code0/code1 are responsible for branching to stext.
 +
  - when booting through EFI, code0/code1 are initially skipped.
    res5 is an offset to the PE header and the PE header has the EFI
 -  entry point (efi_stub_entry). When the stub has done its work, it
 +  entry point (efi_stub_entry).  When the stub has done its work, it
    jumps to code0 to resume the normal boot process.
  
 -The image must be placed at the specified offset (currently 0x80000)
 -from the start of the system RAM and called there. The start of the
 -system RAM must be aligned to 2MB.
 +- Prior to v3.17, the endianness of text_offset was not specified.  In
 +  these cases image_size is zero and text_offset is 0x80000 in the
 +  endianness of the kernel.  Where image_size is non-zero image_size is
 +  little-endian and must be respected.  Where image_size is zero,
 +  text_offset can be assumed to be 0x80000.
 +
 +- The flags field (introduced in v3.17) is a little-endian 64-bit field
 +  composed as follows:
 +  Bit 0:      Kernel endianness.  1 if BE, 0 if LE.
 +  Bits 1-63:  Reserved.
 +
 +- When image_size is zero, a bootloader should attempt to keep as much
 +  memory as possible free for use by the kernel immediately after the
 +  end of the kernel image. The amount of space required will vary
 +  depending on selected features, and is effectively unbound.
 +
 +The Image must be placed text_offset bytes from a 2MB aligned base
 +address near the start of usable system RAM and called there. Memory
 +below that base address is currently unusable by Linux, and therefore it
 +is strongly recommended that this location is the start of system RAM.
 +At least image_size bytes from the start of the image must be free for
 +use by the kernel.
 +
 +Any memory described to the kernel (even that below the 2MB aligned base
 +address) which is not marked as reserved from the kernel e.g. with a
 +memreserve region in the device tree) will be considered as available to
 +the kernel.
  
  Before jumping into the kernel, the following conditions must be met:
  
    the kernel image will be entered must be initialised by software at a
    higher exception level to prevent execution in an UNKNOWN state.
  
+   For systems with a GICv3 interrupt controller:
+   - If EL3 is present:
+     ICC_SRE_EL3.Enable (bit 3) must be initialiased to 0b1.
+     ICC_SRE_EL3.SRE (bit 0) must be initialised to 0b1.
+   - If the kernel is entered at EL1:
+     ICC.SRE_EL2.Enable (bit 3) must be initialised to 0b1
+     ICC_SRE_EL2.SRE (bit 0) must be initialised to 0b1.
  The requirements described above for CPU mode, caches, MMUs, architected
  timers, coherency and system registers apply to all CPUs.  All CPUs must
  enter the kernel in the same exception level.
  #include <asm/cputype.h>
  #include <asm/system_misc.h>
  
- /* Low-level stepping controls. */
- #define DBG_MDSCR_SS          (1 << 0)
- #define DBG_SPSR_SS           (1 << 21)
- /* MDSCR_EL1 enabling bits */
- #define DBG_MDSCR_KDE         (1 << 13)
- #define DBG_MDSCR_MDE         (1 << 15)
- #define DBG_MDSCR_MASK                ~(DBG_MDSCR_KDE | DBG_MDSCR_MDE)
  /* Determine debug architecture. */
  u8 debug_monitors_arch(void)
  {
@@@ -315,20 -306,20 +306,20 @@@ static int brk_handler(unsigned long ad
  {
        siginfo_t info;
  
 -      if (call_break_hook(regs, esr) == DBG_HOOK_HANDLED)
 -              return 0;
 +      if (user_mode(regs)) {
 +              info = (siginfo_t) {
 +                      .si_signo = SIGTRAP,
 +                      .si_errno = 0,
 +                      .si_code  = TRAP_BRKPT,
 +                      .si_addr  = (void __user *)instruction_pointer(regs),
 +              };
  
 -      if (!user_mode(regs))
 +              force_sig_info(SIGTRAP, &info, current);
 +      } else if (call_break_hook(regs, esr) != DBG_HOOK_HANDLED) {
 +              pr_warning("Unexpected kernel BRK exception at EL1\n");
                return -EFAULT;
 +      }
  
 -      info = (siginfo_t) {
 -              .si_signo = SIGTRAP,
 -              .si_errno = 0,
 -              .si_code  = TRAP_BRKPT,
 -              .si_addr  = (void __user *)instruction_pointer(regs),
 -      };
 -
 -      force_sig_info(SIGTRAP, &info, current);
        return 0;
  }
  
diff --combined virt/kvm/arm/vgic-v2.c
index 0000000,d6c9c14..01124ef
mode 000000,100644..100644
--- /dev/null
@@@ -1,0 -1,249 +1,265 @@@
+ /*
+  * Copyright (C) 2012,2013 ARM Limited, All Rights Reserved.
+  * Author: Marc Zyngier <marc.zyngier@arm.com>
+  *
+  * This program is free software; you can redistribute it and/or modify
+  * it under the terms of the GNU General Public License version 2 as
+  * published by the Free Software Foundation.
+  *
+  * This program is distributed in the hope that it will be useful,
+  * but WITHOUT ANY WARRANTY; without even the implied warranty of
+  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  * GNU General Public License for more details.
+  *
+  * You should have received a copy of the GNU General Public License
+  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+  */
+ #include <linux/cpu.h>
+ #include <linux/kvm.h>
+ #include <linux/kvm_host.h>
+ #include <linux/interrupt.h>
+ #include <linux/io.h>
+ #include <linux/of.h>
+ #include <linux/of_address.h>
+ #include <linux/of_irq.h>
+ #include <linux/irqchip/arm-gic.h>
+ #include <asm/kvm_emulate.h>
+ #include <asm/kvm_arm.h>
+ #include <asm/kvm_mmu.h>
+ static struct vgic_lr vgic_v2_get_lr(const struct kvm_vcpu *vcpu, int lr)
+ {
+       struct vgic_lr lr_desc;
+       u32 val = vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr];
+       lr_desc.irq     = val & GICH_LR_VIRTUALID;
+       if (lr_desc.irq <= 15)
+               lr_desc.source  = (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7;
+       else
+               lr_desc.source = 0;
+       lr_desc.state   = 0;
+       if (val & GICH_LR_PENDING_BIT)
+               lr_desc.state |= LR_STATE_PENDING;
+       if (val & GICH_LR_ACTIVE_BIT)
+               lr_desc.state |= LR_STATE_ACTIVE;
+       if (val & GICH_LR_EOI)
+               lr_desc.state |= LR_EOI_INT;
+       return lr_desc;
+ }
+ static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr,
+                          struct vgic_lr lr_desc)
+ {
+       u32 lr_val = (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT) | lr_desc.irq;
+       if (lr_desc.state & LR_STATE_PENDING)
+               lr_val |= GICH_LR_PENDING_BIT;
+       if (lr_desc.state & LR_STATE_ACTIVE)
+               lr_val |= GICH_LR_ACTIVE_BIT;
+       if (lr_desc.state & LR_EOI_INT)
+               lr_val |= GICH_LR_EOI;
+       vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val;
+ }
+ static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
+                                 struct vgic_lr lr_desc)
+ {
+       if (!(lr_desc.state & LR_STATE_MASK))
+               set_bit(lr, (unsigned long *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr);
+ }
+ static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu)
+ {
+       u64 val;
+ #if BITS_PER_LONG == 64
+       val  = vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[1];
+       val <<= 32;
+       val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[0];
+ #else
+       val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr;
+ #endif
+       return val;
+ }
+ static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu)
+ {
+       u64 val;
+ #if BITS_PER_LONG == 64
+       val  = vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[1];
+       val <<= 32;
+       val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[0];
+ #else
+       val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr;
+ #endif
+       return val;
+ }
+ static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu)
+ {
+       u32 misr = vcpu->arch.vgic_cpu.vgic_v2.vgic_misr;
+       u32 ret = 0;
+       if (misr & GICH_MISR_EOI)
+               ret |= INT_STATUS_EOI;
+       if (misr & GICH_MISR_U)
+               ret |= INT_STATUS_UNDERFLOW;
+       return ret;
+ }
+ static void vgic_v2_enable_underflow(struct kvm_vcpu *vcpu)
+ {
+       vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr |= GICH_HCR_UIE;
+ }
+ static void vgic_v2_disable_underflow(struct kvm_vcpu *vcpu)
+ {
+       vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr &= ~GICH_HCR_UIE;
+ }
+ static void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
+ {
+       u32 vmcr = vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr;
+       vmcrp->ctlr = (vmcr & GICH_VMCR_CTRL_MASK) >> GICH_VMCR_CTRL_SHIFT;
+       vmcrp->abpr = (vmcr & GICH_VMCR_ALIAS_BINPOINT_MASK) >> GICH_VMCR_ALIAS_BINPOINT_SHIFT;
+       vmcrp->bpr  = (vmcr & GICH_VMCR_BINPOINT_MASK) >> GICH_VMCR_BINPOINT_SHIFT;
+       vmcrp->pmr  = (vmcr & GICH_VMCR_PRIMASK_MASK) >> GICH_VMCR_PRIMASK_SHIFT;
+ }
+ static void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
+ {
+       u32 vmcr;
+       vmcr  = (vmcrp->ctlr << GICH_VMCR_CTRL_SHIFT) & GICH_VMCR_CTRL_MASK;
+       vmcr |= (vmcrp->abpr << GICH_VMCR_ALIAS_BINPOINT_SHIFT) & GICH_VMCR_ALIAS_BINPOINT_MASK;
+       vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) & GICH_VMCR_BINPOINT_MASK;
+       vmcr |= (vmcrp->pmr << GICH_VMCR_PRIMASK_SHIFT) & GICH_VMCR_PRIMASK_MASK;
+       vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr;
+ }
+ static void vgic_v2_enable(struct kvm_vcpu *vcpu)
+ {
+       /*
+        * By forcing VMCR to zero, the GIC will restore the binary
+        * points to their reset values. Anything else resets to zero
+        * anyway.
+        */
+       vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0;
+       /* Get the show on the road... */
+       vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN;
+ }
+ static const struct vgic_ops vgic_v2_ops = {
+       .get_lr                 = vgic_v2_get_lr,
+       .set_lr                 = vgic_v2_set_lr,
+       .sync_lr_elrsr          = vgic_v2_sync_lr_elrsr,
+       .get_elrsr              = vgic_v2_get_elrsr,
+       .get_eisr               = vgic_v2_get_eisr,
+       .get_interrupt_status   = vgic_v2_get_interrupt_status,
+       .enable_underflow       = vgic_v2_enable_underflow,
+       .disable_underflow      = vgic_v2_disable_underflow,
+       .get_vmcr               = vgic_v2_get_vmcr,
+       .set_vmcr               = vgic_v2_set_vmcr,
+       .enable                 = vgic_v2_enable,
+ };
+ static struct vgic_params vgic_v2_params;
+ /**
+  * vgic_v2_probe - probe for a GICv2 compatible interrupt controller in DT
+  * @node:     pointer to the DT node
+  * @ops:      address of a pointer to the GICv2 operations
+  * @params:   address of a pointer to HW-specific parameters
+  *
+  * Returns 0 if a GICv2 has been found, with the low level operations
+  * in *ops and the HW parameters in *params. Returns an error code
+  * otherwise.
+  */
+ int vgic_v2_probe(struct device_node *vgic_node,
+                 const struct vgic_ops **ops,
+                 const struct vgic_params **params)
+ {
+       int ret;
+       struct resource vctrl_res;
+       struct resource vcpu_res;
+       struct vgic_params *vgic = &vgic_v2_params;
+       vgic->maint_irq = irq_of_parse_and_map(vgic_node, 0);
+       if (!vgic->maint_irq) {
+               kvm_err("error getting vgic maintenance irq from DT\n");
+               ret = -ENXIO;
+               goto out;
+       }
+       ret = of_address_to_resource(vgic_node, 2, &vctrl_res);
+       if (ret) {
+               kvm_err("Cannot obtain GICH resource\n");
+               goto out;
+       }
+       vgic->vctrl_base = of_iomap(vgic_node, 2);
+       if (!vgic->vctrl_base) {
+               kvm_err("Cannot ioremap GICH\n");
+               ret = -ENOMEM;
+               goto out;
+       }
+       vgic->nr_lr = readl_relaxed(vgic->vctrl_base + GICH_VTR);
+       vgic->nr_lr = (vgic->nr_lr & 0x3f) + 1;
+       ret = create_hyp_io_mappings(vgic->vctrl_base,
+                                    vgic->vctrl_base + resource_size(&vctrl_res),
+                                    vctrl_res.start);
+       if (ret) {
+               kvm_err("Cannot map VCTRL into hyp\n");
+               goto out_unmap;
+       }
+       if (of_address_to_resource(vgic_node, 3, &vcpu_res)) {
+               kvm_err("Cannot obtain GICV resource\n");
+               ret = -ENXIO;
+               goto out_unmap;
+       }
++
++      if (!PAGE_ALIGNED(vcpu_res.start)) {
++              kvm_err("GICV physical address 0x%llx not page aligned\n",
++                      (unsigned long long)vcpu_res.start);
++              ret = -ENXIO;
++              goto out_unmap;
++      }
++
++      if (!PAGE_ALIGNED(resource_size(&vcpu_res))) {
++              kvm_err("GICV size 0x%llx not a multiple of page size 0x%lx\n",
++                      (unsigned long long)resource_size(&vcpu_res),
++                      PAGE_SIZE);
++              ret = -ENXIO;
++              goto out_unmap;
++      }
++
+       vgic->vcpu_base = vcpu_res.start;
+       kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
+                vctrl_res.start, vgic->maint_irq);
+       vgic->type = VGIC_V2;
+       *ops = &vgic_v2_ops;
+       *params = vgic;
+       goto out;
+ out_unmap:
+       iounmap(vgic->vctrl_base);
+ out:
+       of_node_put(vgic_node);
+       return ret;
+ }
diff --combined virt/kvm/arm/vgic.c
  #define IMPLEMENTER_ARM               0x43b
  #define GICC_ARCH_VERSION_V2  0x2
  
- /* Physical address of vgic virtual cpu interface */
- static phys_addr_t vgic_vcpu_base;
- /* Virtual control interface base address */
- static void __iomem *vgic_vctrl_base;
- static struct device_node *vgic_node;
  #define ACCESS_READ_VALUE     (1 << 0)
  #define ACCESS_READ_RAZ               (0 << 0)
  #define ACCESS_READ_MASK(x)   ((x) & (1 << 0))
  #define ACCESS_WRITE_MASK(x)  ((x) & (3 << 1))
  
  static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
+ static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu);
  static void vgic_update_state(struct kvm *kvm);
  static void vgic_kick_vcpus(struct kvm *kvm);
  static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg);
- static u32 vgic_nr_lr;
+ static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr);
+ static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc);
+ static void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
+ static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
  
- static unsigned int vgic_maint_irq;
+ static const struct vgic_ops *vgic_ops;
+ static const struct vgic_params *vgic;
+ /*
+  * struct vgic_bitmap contains unions that provide two views of
+  * the same data. In one case it is an array of registers of
+  * u32's, and in the other case it is a bitmap of unsigned
+  * longs.
+  *
+  * This does not work on 64-bit BE systems, because the bitmap access
+  * will store two consecutive 32-bit words with the higher-addressed
+  * register's bits at the lower index and the lower-addressed register's
+  * bits at the higher index.
+  *
+  * Therefore, swizzle the register index when accessing the 32-bit word
+  * registers to access the right register's value.
+  */
+ #if defined(CONFIG_CPU_BIG_ENDIAN) && BITS_PER_LONG == 64
+ #define REG_OFFSET_SWIZZLE    1
+ #else
+ #define REG_OFFSET_SWIZZLE    0
+ #endif
  
  static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x,
                                int cpuid, u32 offset)
  {
        offset >>= 2;
        if (!offset)
-               return x->percpu[cpuid].reg;
+               return x->percpu[cpuid].reg + (offset ^ REG_OFFSET_SWIZZLE);
        else
-               return x->shared.reg + offset - 1;
+               return x->shared.reg + ((offset - 1) ^ REG_OFFSET_SWIZZLE);
  }
  
  static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x,
@@@ -241,12 -258,12 +258,12 @@@ static void vgic_cpu_irq_clear(struct k
  
  static u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask)
  {
-       return *((u32 *)mmio->data) & mask;
+       return le32_to_cpu(*((u32 *)mmio->data)) & mask;
  }
  
  static void mmio_data_write(struct kvm_exit_mmio *mmio, u32 mask, u32 value)
  {
-       *((u32 *)mmio->data) = value & mask;
+       *((u32 *)mmio->data) = cpu_to_le32(value) & mask;
  }
  
  /**
@@@ -593,18 -610,6 +610,6 @@@ static bool handle_mmio_sgi_reg(struct 
        return false;
  }
  
- #define LR_CPUID(lr)  \
-       (((lr) & GICH_LR_PHYSID_CPUID) >> GICH_LR_PHYSID_CPUID_SHIFT)
- #define LR_IRQID(lr)  \
-       ((lr) & GICH_LR_VIRTUALID)
- static void vgic_retire_lr(int lr_nr, int irq, struct vgic_cpu *vgic_cpu)
- {
-       clear_bit(lr_nr, vgic_cpu->lr_used);
-       vgic_cpu->vgic_lr[lr_nr] &= ~GICH_LR_STATE;
-       vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
- }
  /**
   * vgic_unqueue_irqs - move pending IRQs from LRs to the distributor
   * @vgic_cpu: Pointer to the vgic_cpu struct holding the LRs
@@@ -622,13 -627,10 +627,10 @@@ static void vgic_unqueue_irqs(struct kv
        struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
        struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
        int vcpu_id = vcpu->vcpu_id;
-       int i, irq, source_cpu;
-       u32 *lr;
+       int i;
  
        for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) {
-               lr = &vgic_cpu->vgic_lr[i];
-               irq = LR_IRQID(*lr);
-               source_cpu = LR_CPUID(*lr);
+               struct vgic_lr lr = vgic_get_lr(vcpu, i);
  
                /*
                 * There are three options for the state bits:
                 * If the LR holds only an active interrupt (not pending) then
                 * just leave it alone.
                 */
-               if ((*lr & GICH_LR_STATE) == GICH_LR_ACTIVE_BIT)
+               if ((lr.state & LR_STATE_MASK) == LR_STATE_ACTIVE)
                        continue;
  
                /*
                 * is fine, then we are only setting a few bits that were
                 * already set.
                 */
-               vgic_dist_irq_set(vcpu, irq);
-               if (irq < VGIC_NR_SGIS)
-                       dist->irq_sgi_sources[vcpu_id][irq] |= 1 << source_cpu;
-               *lr &= ~GICH_LR_PENDING_BIT;
+               vgic_dist_irq_set(vcpu, lr.irq);
+               if (lr.irq < VGIC_NR_SGIS)
+                       dist->irq_sgi_sources[vcpu_id][lr.irq] |= 1 << lr.source;
+               lr.state &= ~LR_STATE_PENDING;
+               vgic_set_lr(vcpu, i, lr);
  
                /*
                 * If there's no state left on the LR (it could still be
                 * active), then the LR does not hold any useful info and can
                 * be marked as free for other use.
                 */
-               if (!(*lr & GICH_LR_STATE))
-                       vgic_retire_lr(i, irq, vgic_cpu);
+               if (!(lr.state & LR_STATE_MASK))
+                       vgic_retire_lr(i, lr.irq, vcpu);
  
                /* Finally update the VGIC state. */
                vgic_update_state(vcpu->kvm);
@@@ -989,8 -992,73 +992,73 @@@ static void vgic_update_state(struct kv
        }
  }
  
- #define MK_LR_PEND(src, irq)  \
-       (GICH_LR_PENDING_BIT | ((src) << GICH_LR_PHYSID_CPUID_SHIFT) | (irq))
+ static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr)
+ {
+       return vgic_ops->get_lr(vcpu, lr);
+ }
+ static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr,
+                              struct vgic_lr vlr)
+ {
+       vgic_ops->set_lr(vcpu, lr, vlr);
+ }
+ static void vgic_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
+                              struct vgic_lr vlr)
+ {
+       vgic_ops->sync_lr_elrsr(vcpu, lr, vlr);
+ }
+ static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu)
+ {
+       return vgic_ops->get_elrsr(vcpu);
+ }
+ static inline u64 vgic_get_eisr(struct kvm_vcpu *vcpu)
+ {
+       return vgic_ops->get_eisr(vcpu);
+ }
+ static inline u32 vgic_get_interrupt_status(struct kvm_vcpu *vcpu)
+ {
+       return vgic_ops->get_interrupt_status(vcpu);
+ }
+ static inline void vgic_enable_underflow(struct kvm_vcpu *vcpu)
+ {
+       vgic_ops->enable_underflow(vcpu);
+ }
+ static inline void vgic_disable_underflow(struct kvm_vcpu *vcpu)
+ {
+       vgic_ops->disable_underflow(vcpu);
+ }
+ static inline void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
+ {
+       vgic_ops->get_vmcr(vcpu, vmcr);
+ }
+ static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
+ {
+       vgic_ops->set_vmcr(vcpu, vmcr);
+ }
+ static inline void vgic_enable(struct kvm_vcpu *vcpu)
+ {
+       vgic_ops->enable(vcpu);
+ }
+ static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
+ {
+       struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+       struct vgic_lr vlr = vgic_get_lr(vcpu, lr_nr);
+       vlr.state = 0;
+       vgic_set_lr(vcpu, lr_nr, vlr);
+       clear_bit(lr_nr, vgic_cpu->lr_used);
+       vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
+ }
  
  /*
   * An interrupt may have been disabled after being made pending on the
@@@ -1006,13 -1074,13 +1074,13 @@@ static void vgic_retire_disabled_irqs(s
        struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
        int lr;
  
-       for_each_set_bit(lr, vgic_cpu->lr_used, vgic_cpu->nr_lr) {
-               int irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID;
+       for_each_set_bit(lr, vgic_cpu->lr_used, vgic->nr_lr) {
+               struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
  
-               if (!vgic_irq_is_enabled(vcpu, irq)) {
-                       vgic_retire_lr(lr, irq, vgic_cpu);
-                       if (vgic_irq_is_active(vcpu, irq))
-                               vgic_irq_clear_active(vcpu, irq);
+               if (!vgic_irq_is_enabled(vcpu, vlr.irq)) {
+                       vgic_retire_lr(lr, vlr.irq, vcpu);
+                       if (vgic_irq_is_active(vcpu, vlr.irq))
+                               vgic_irq_clear_active(vcpu, vlr.irq);
                }
        }
  }
  static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
  {
        struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+       struct vgic_lr vlr;
        int lr;
  
        /* Sanitize the input... */
        lr = vgic_cpu->vgic_irq_lr_map[irq];
  
        /* Do we have an active interrupt for the same CPUID? */
-       if (lr != LR_EMPTY &&
-           (LR_CPUID(vgic_cpu->vgic_lr[lr]) == sgi_source_id)) {
-               kvm_debug("LR%d piggyback for IRQ%d %x\n",
-                         lr, irq, vgic_cpu->vgic_lr[lr]);
-               BUG_ON(!test_bit(lr, vgic_cpu->lr_used));
-               vgic_cpu->vgic_lr[lr] |= GICH_LR_PENDING_BIT;
-               return true;
+       if (lr != LR_EMPTY) {
+               vlr = vgic_get_lr(vcpu, lr);
+               if (vlr.source == sgi_source_id) {
+                       kvm_debug("LR%d piggyback for IRQ%d\n", lr, vlr.irq);
+                       BUG_ON(!test_bit(lr, vgic_cpu->lr_used));
+                       vlr.state |= LR_STATE_PENDING;
+                       vgic_set_lr(vcpu, lr, vlr);
+                       return true;
+               }
        }
  
        /* Try to use another LR for this interrupt */
        lr = find_first_zero_bit((unsigned long *)vgic_cpu->lr_used,
-                              vgic_cpu->nr_lr);
-       if (lr >= vgic_cpu->nr_lr)
+                              vgic->nr_lr);
+       if (lr >= vgic->nr_lr)
                return false;
  
        kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id);
-       vgic_cpu->vgic_lr[lr] = MK_LR_PEND(sgi_source_id, irq);
        vgic_cpu->vgic_irq_lr_map[irq] = lr;
        set_bit(lr, vgic_cpu->lr_used);
  
+       vlr.irq = irq;
+       vlr.source = sgi_source_id;
+       vlr.state = LR_STATE_PENDING;
        if (!vgic_irq_is_edge(vcpu, irq))
-               vgic_cpu->vgic_lr[lr] |= GICH_LR_EOI;
+               vlr.state |= LR_EOI_INT;
+       vgic_set_lr(vcpu, lr, vlr);
  
        return true;
  }
@@@ -1155,9 -1230,9 +1230,9 @@@ static void __kvm_vgic_flush_hwstate(st
  
  epilog:
        if (overflow) {
-               vgic_cpu->vgic_hcr |= GICH_HCR_UIE;
+               vgic_enable_underflow(vcpu);
        } else {
-               vgic_cpu->vgic_hcr &= ~GICH_HCR_UIE;
+               vgic_disable_underflow(vcpu);
                /*
                 * We're about to run this VCPU, and we've consumed
                 * everything the distributor had in store for
  
  static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
  {
-       struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+       u32 status = vgic_get_interrupt_status(vcpu);
        bool level_pending = false;
  
-       kvm_debug("MISR = %08x\n", vgic_cpu->vgic_misr);
+       kvm_debug("STATUS = %08x\n", status);
  
-       if (vgic_cpu->vgic_misr & GICH_MISR_EOI) {
+       if (status & INT_STATUS_EOI) {
                /*
                 * Some level interrupts have been EOIed. Clear their
                 * active bit.
                 */
-               int lr, irq;
+               u64 eisr = vgic_get_eisr(vcpu);
+               unsigned long *eisr_ptr = (unsigned long *)&eisr;
+               int lr;
  
-               for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_eisr,
-                                vgic_cpu->nr_lr) {
-                       irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID;
+               for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) {
+                       struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
  
-                       vgic_irq_clear_active(vcpu, irq);
-                       vgic_cpu->vgic_lr[lr] &= ~GICH_LR_EOI;
+                       vgic_irq_clear_active(vcpu, vlr.irq);
+                       WARN_ON(vlr.state & LR_STATE_MASK);
+                       vlr.state = 0;
+                       vgic_set_lr(vcpu, lr, vlr);
  
                        /* Any additional pending interrupt? */
-                       if (vgic_dist_irq_is_pending(vcpu, irq)) {
-                               vgic_cpu_irq_set(vcpu, irq);
+                       if (vgic_dist_irq_is_pending(vcpu, vlr.irq)) {
+                               vgic_cpu_irq_set(vcpu, vlr.irq);
                                level_pending = true;
                        } else {
-                               vgic_cpu_irq_clear(vcpu, irq);
+                               vgic_cpu_irq_clear(vcpu, vlr.irq);
                        }
  
                        /*
                         * Despite being EOIed, the LR may not have
                         * been marked as empty.
                         */
-                       set_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr);
-                       vgic_cpu->vgic_lr[lr] &= ~GICH_LR_ACTIVE_BIT;
+                       vgic_sync_lr_elrsr(vcpu, lr, vlr);
                }
        }
  
-       if (vgic_cpu->vgic_misr & GICH_MISR_U)
-               vgic_cpu->vgic_hcr &= ~GICH_HCR_UIE;
+       if (status & INT_STATUS_UNDERFLOW)
+               vgic_disable_underflow(vcpu);
  
        return level_pending;
  }
@@@ -1220,29 -1297,31 +1297,31 @@@ static void __kvm_vgic_sync_hwstate(str
  {
        struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
        struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+       u64 elrsr;
+       unsigned long *elrsr_ptr;
        int lr, pending;
        bool level_pending;
  
        level_pending = vgic_process_maintenance(vcpu);
+       elrsr = vgic_get_elrsr(vcpu);
+       elrsr_ptr = (unsigned long *)&elrsr;
  
        /* Clear mappings for empty LRs */
-       for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr,
-                        vgic_cpu->nr_lr) {
-               int irq;
+       for_each_set_bit(lr, elrsr_ptr, vgic->nr_lr) {
+               struct vgic_lr vlr;
  
                if (!test_and_clear_bit(lr, vgic_cpu->lr_used))
                        continue;
  
-               irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID;
+               vlr = vgic_get_lr(vcpu, lr);
  
-               BUG_ON(irq >= VGIC_NR_IRQS);
-               vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
+               BUG_ON(vlr.irq >= VGIC_NR_IRQS);
+               vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY;
        }
  
        /* Check if we still have something up our sleeve... */
-       pending = find_first_zero_bit((unsigned long *)vgic_cpu->vgic_elrsr,
-                                     vgic_cpu->nr_lr);
-       if (level_pending || pending < vgic_cpu->nr_lr)
+       pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr);
+       if (level_pending || pending < vgic->nr_lr)
                set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu);
  }
  
@@@ -1432,21 -1511,20 +1511,20 @@@ int kvm_vgic_vcpu_init(struct kvm_vcpu 
        }
  
        /*
-        * By forcing VMCR to zero, the GIC will restore the binary
-        * points to their reset values. Anything else resets to zero
-        * anyway.
+        * Store the number of LRs per vcpu, so we don't have to go
+        * all the way to the distributor structure to find out. Only
+        * assembly code should use this one.
         */
-       vgic_cpu->vgic_vmcr = 0;
+       vgic_cpu->nr_lr = vgic->nr_lr;
  
-       vgic_cpu->nr_lr = vgic_nr_lr;
-       vgic_cpu->vgic_hcr = GICH_HCR_EN; /* Get the show on the road... */
+       vgic_enable(vcpu);
  
        return 0;
  }
  
  static void vgic_init_maintenance_interrupt(void *info)
  {
-       enable_percpu_irq(vgic_maint_irq, 0);
+       enable_percpu_irq(vgic->maint_irq, 0);
  }
  
  static int vgic_cpu_notify(struct notifier_block *self,
                break;
        case CPU_DYING:
        case CPU_DYING_FROZEN:
-               disable_percpu_irq(vgic_maint_irq);
+               disable_percpu_irq(vgic->maint_irq);
                break;
        }
  
@@@ -1470,30 -1548,37 +1548,37 @@@ static struct notifier_block vgic_cpu_n
        .notifier_call = vgic_cpu_notify,
  };
  
+ static const struct of_device_id vgic_ids[] = {
+       { .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, },
+       { .compatible = "arm,gic-v3", .data = vgic_v3_probe, },
+       {},
+ };
  int kvm_vgic_hyp_init(void)
  {
+       const struct of_device_id *matched_id;
+       int (*vgic_probe)(struct device_node *,const struct vgic_ops **,
+                         const struct vgic_params **);
+       struct device_node *vgic_node;
        int ret;
-       struct resource vctrl_res;
-       struct resource vcpu_res;
  
-       vgic_node = of_find_compatible_node(NULL, NULL, "arm,cortex-a15-gic");
+       vgic_node = of_find_matching_node_and_match(NULL,
+                                                   vgic_ids, &matched_id);
        if (!vgic_node) {
-               kvm_err("error: no compatible vgic node in DT\n");
+               kvm_err("error: no compatible GIC node found\n");
                return -ENODEV;
        }
  
-       vgic_maint_irq = irq_of_parse_and_map(vgic_node, 0);
-       if (!vgic_maint_irq) {
-               kvm_err("error getting vgic maintenance irq from DT\n");
-               ret = -ENXIO;
-               goto out;
-       }
+       vgic_probe = matched_id->data;
+       ret = vgic_probe(vgic_node, &vgic_ops, &vgic);
+       if (ret)
+               return ret;
  
-       ret = request_percpu_irq(vgic_maint_irq, vgic_maintenance_handler,
+       ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler,
                                 "vgic", kvm_get_running_vcpus());
        if (ret) {
-               kvm_err("Cannot register interrupt %d\n", vgic_maint_irq);
-               goto out;
+               kvm_err("Cannot register interrupt %d\n", vgic->maint_irq);
+               return ret;
        }
  
        ret = __register_cpu_notifier(&vgic_cpu_nb);
                goto out_free_irq;
        }
  
-       ret = of_address_to_resource(vgic_node, 2, &vctrl_res);
-       if (ret) {
-               kvm_err("Cannot obtain VCTRL resource\n");
-               goto out_free_irq;
-       }
-       vgic_vctrl_base = of_iomap(vgic_node, 2);
-       if (!vgic_vctrl_base) {
-               kvm_err("Cannot ioremap VCTRL\n");
-               ret = -ENOMEM;
-               goto out_free_irq;
-       }
-       vgic_nr_lr = readl_relaxed(vgic_vctrl_base + GICH_VTR);
-       vgic_nr_lr = (vgic_nr_lr & 0x3f) + 1;
-       ret = create_hyp_io_mappings(vgic_vctrl_base,
-                                    vgic_vctrl_base + resource_size(&vctrl_res),
-                                    vctrl_res.start);
-       if (ret) {
-               kvm_err("Cannot map VCTRL into hyp\n");
-               goto out_unmap;
-       }
-       if (of_address_to_resource(vgic_node, 3, &vcpu_res)) {
-               kvm_err("Cannot obtain VCPU resource\n");
-               ret = -ENXIO;
-               goto out_unmap;
-       }
 -      on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
--
-       if (!PAGE_ALIGNED(vcpu_res.start)) {
-               kvm_err("GICV physical address 0x%llx not page aligned\n",
-                       (unsigned long long)vcpu_res.start);
-               ret = -ENXIO;
-               goto out_unmap;
-       }
+       /* Callback into for arch code for setup */
+       vgic_arch_setup(vgic);
  
-       if (!PAGE_ALIGNED(resource_size(&vcpu_res))) {
-               kvm_err("GICV size 0x%llx not a multiple of page size 0x%lx\n",
-                       (unsigned long long)resource_size(&vcpu_res),
-                       PAGE_SIZE);
-               ret = -ENXIO;
-               goto out_unmap;
-       }
-       vgic_vcpu_base = vcpu_res.start;
-       kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
-                vctrl_res.start, vgic_maint_irq);
 +      on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
 +
-       goto out;
+       return 0;
  
- out_unmap:
-       iounmap(vgic_vctrl_base);
  out_free_irq:
-       free_percpu_irq(vgic_maint_irq, kvm_get_running_vcpus());
- out:
-       of_node_put(vgic_node);
+       free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus());
        return ret;
  }
  
@@@ -1593,7 -1628,7 +1628,7 @@@ int kvm_vgic_init(struct kvm *kvm
        }
  
        ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base,
-                                   vgic_vcpu_base, KVM_VGIC_V2_CPU_SIZE);
+                                   vgic->vcpu_base, KVM_VGIC_V2_CPU_SIZE);
        if (ret) {
                kvm_err("Unable to remap VGIC CPU to VCPU\n");
                goto out;
@@@ -1639,7 -1674,8 +1674,8 @@@ int kvm_vgic_create(struct kvm *kvm
        }
  
        spin_lock_init(&kvm->arch.vgic.lock);
-       kvm->arch.vgic.vctrl_base = vgic_vctrl_base;
+       kvm->arch.vgic.in_kernel = true;
+       kvm->arch.vgic.vctrl_base = vgic->vctrl_base;
        kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
        kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
  
@@@ -1738,39 -1774,40 +1774,40 @@@ int kvm_vgic_addr(struct kvm *kvm, unsi
  static bool handle_cpu_mmio_misc(struct kvm_vcpu *vcpu,
                                 struct kvm_exit_mmio *mmio, phys_addr_t offset)
  {
-       struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
-       u32 reg, mask = 0, shift = 0;
        bool updated = false;
+       struct vgic_vmcr vmcr;
+       u32 *vmcr_field;
+       u32 reg;
+       vgic_get_vmcr(vcpu, &vmcr);
  
        switch (offset & ~0x3) {
        case GIC_CPU_CTRL:
-               mask = GICH_VMCR_CTRL_MASK;
-               shift = GICH_VMCR_CTRL_SHIFT;
+               vmcr_field = &vmcr.ctlr;
                break;
        case GIC_CPU_PRIMASK:
-               mask = GICH_VMCR_PRIMASK_MASK;
-               shift = GICH_VMCR_PRIMASK_SHIFT;
+               vmcr_field = &vmcr.pmr;
                break;
        case GIC_CPU_BINPOINT:
-               mask = GICH_VMCR_BINPOINT_MASK;
-               shift = GICH_VMCR_BINPOINT_SHIFT;
+               vmcr_field = &vmcr.bpr;
                break;
        case GIC_CPU_ALIAS_BINPOINT:
-               mask = GICH_VMCR_ALIAS_BINPOINT_MASK;
-               shift = GICH_VMCR_ALIAS_BINPOINT_SHIFT;
+               vmcr_field = &vmcr.abpr;
                break;
+       default:
+               BUG();
        }
  
        if (!mmio->is_write) {
-               reg = (vgic_cpu->vgic_vmcr & mask) >> shift;
+               reg = *vmcr_field;
                mmio_data_write(mmio, ~0, reg);
        } else {
                reg = mmio_data_read(mmio, ~0);
-               reg = (reg << shift) & mask;
-               if (reg != (vgic_cpu->vgic_vmcr & mask))
+               if (reg != *vmcr_field) {
+                       *vmcr_field = reg;
+                       vgic_set_vmcr(vcpu, &vmcr);
                        updated = true;
-               vgic_cpu->vgic_vmcr &= ~mask;
-               vgic_cpu->vgic_vmcr |= reg;
+               }
        }
        return updated;
  }