Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 19 May 2016 18:27:09 +0000 (11:27 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 19 May 2016 18:27:09 +0000 (11:27 -0700)
Pull KVM updates from Paolo Bonzini:
 "Small release overall.

  x86:
   - miscellaneous fixes
   - AVIC support (local APIC virtualization, AMD version)

  s390:
   - polling for interrupts after a VCPU goes to halted state is now
     enabled for s390
   - use hardware provided information about facility bits that do not
     need any hypervisor activity, and other fixes for cpu models and
     facilities
   - improve perf output
   - floating interrupt controller improvements.

  MIPS:
   - miscellaneous fixes

  PPC:
   - bugfixes only

  ARM:
   - 16K page size support
   - generic firmware probing layer for timer and GIC

  Christoffer Dall (KVM-ARM maintainer) says:
    "There are a few changes in this pull request touching things
     outside KVM, but they should all carry the necessary acks and it
     made the merge process much easier to do it this way."

  though actually the irqchip maintainers' acks didn't make it into the
  patches.  Marc Zyngier, who is both irqchip and KVM-ARM maintainer,
  later acked at http://mid.gmane.org/573351D1.4060303@arm.com ('more
  formally and for documentation purposes')"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (82 commits)
  KVM: MTRR: remove MSR 0x2f8
  KVM: x86: make hwapic_isr_update and hwapic_irr_update look the same
  svm: Manage vcpu load/unload when enable AVIC
  svm: Do not intercept CR8 when enable AVIC
  svm: Do not expose x2APIC when enable AVIC
  KVM: x86: Introducing kvm_x86_ops.apicv_post_state_restore
  svm: Add VMEXIT handlers for AVIC
  svm: Add interrupt injection via AVIC
  KVM: x86: Detect and Initialize AVIC support
  svm: Introduce new AVIC VMCB registers
  KVM: split kvm_vcpu_wake_up from kvm_vcpu_kick
  KVM: x86: Introducing kvm_x86_ops VCPU blocking/unblocking hooks
  KVM: x86: Introducing kvm_x86_ops VM init/destroy hooks
  KVM: x86: Rename kvm_apic_get_reg to kvm_lapic_get_reg
  KVM: x86: Misc LAPIC changes to expose helper functions
  KVM: shrink halt polling even more for invalid wakeups
  KVM: s390: set halt polling to 80 microseconds
  KVM: halt_polling: provide a way to qualify wakeups during poll
  KVM: PPC: Book3S HV: Re-enable XICS fast path for irqfd-generated interrupts
  kvm: Conditionally register IRQ bypass consumer
  ...

22 files changed:
1  2 
arch/arm/include/asm/kvm_host.h
arch/arm/include/asm/kvm_mmu.h
arch/arm/kvm/arm.c
arch/arm/kvm/mmu.c
arch/arm64/include/asm/kvm_arm.h
arch/arm64/include/asm/kvm_host.h
arch/arm64/include/asm/kvm_mmu.h
arch/arm64/include/asm/pgtable-hwdef.h
arch/arm64/include/asm/pgtable.h
arch/mips/include/asm/kvm_host.h
arch/mips/kvm/emulate.c
arch/mips/kvm/tlb.c
arch/mips/kvm/trap_emul.c
arch/s390/include/asm/sclp.h
arch/x86/kvm/mmu.c
arch/x86/kvm/svm.c
arch/x86/kvm/trace.h
arch/x86/kvm/vmx.c
arch/x86/kvm/x86.c
drivers/irqchip/irq-gic-common.c
drivers/irqchip/irq-gic-v3.c
drivers/irqchip/irq-gic.c

Simple merge
Simple merge
Simple merge
Simple merge
  #define HCR_INT_OVERRIDE   (HCR_FMO | HCR_IMO)
  #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
  
 -/* Hyp System Control Register (SCTLR_EL2) bits */
 -#define SCTLR_EL2_EE  (1 << 25)
 -#define SCTLR_EL2_WXN (1 << 19)
 -#define SCTLR_EL2_I   (1 << 12)
 -#define SCTLR_EL2_SA  (1 << 3)
 -#define SCTLR_EL2_C   (1 << 2)
 -#define SCTLR_EL2_A   (1 << 1)
 -#define SCTLR_EL2_M   1
 -#define SCTLR_EL2_FLAGS       (SCTLR_EL2_M | SCTLR_EL2_A | SCTLR_EL2_C |      \
 -                       SCTLR_EL2_SA | SCTLR_EL2_I)
 -
  /* TCR_EL2 Registers bits */
- #define TCR_EL2_RES1  ((1 << 31) | (1 << 23))
- #define TCR_EL2_TBI   (1 << 20)
- #define TCR_EL2_PS    (7 << 16)
- #define TCR_EL2_PS_40B        (2 << 16)
- #define TCR_EL2_TG0   (1 << 14)
- #define TCR_EL2_SH0   (3 << 12)
- #define TCR_EL2_ORGN0 (3 << 10)
- #define TCR_EL2_IRGN0 (3 << 8)
- #define TCR_EL2_T0SZ  0x3f
- #define TCR_EL2_MASK  (TCR_EL2_TG0 | TCR_EL2_SH0 | \
-                        TCR_EL2_ORGN0 | TCR_EL2_IRGN0 | TCR_EL2_T0SZ)
+ #define TCR_EL2_RES1          ((1 << 31) | (1 << 23))
+ #define TCR_EL2_TBI           (1 << 20)
+ #define TCR_EL2_PS_SHIFT      16
+ #define TCR_EL2_PS_MASK               (7 << TCR_EL2_PS_SHIFT)
+ #define TCR_EL2_PS_40B                (2 << TCR_EL2_PS_SHIFT)
+ #define TCR_EL2_TG0_MASK      TCR_TG0_MASK
+ #define TCR_EL2_SH0_MASK      TCR_SH0_MASK
+ #define TCR_EL2_ORGN0_MASK    TCR_ORGN0_MASK
+ #define TCR_EL2_IRGN0_MASK    TCR_IRGN0_MASK
+ #define TCR_EL2_T0SZ_MASK     0x3f
+ #define TCR_EL2_MASK  (TCR_EL2_TG0_MASK | TCR_EL2_SH0_MASK | \
+                        TCR_EL2_ORGN0_MASK | TCR_EL2_IRGN0_MASK | TCR_EL2_T0SZ_MASK)
  
  /* VTCR_EL2 Registers bits */
  #define VTCR_EL2_RES1         (1 << 31)
Simple merge
Simple merge
@@@ -295,11 -285,13 +295,13 @@@ static inline int pmd_protnone(pmd_t pm
  #define pmd_wrprotect(pmd)    pte_pmd(pte_wrprotect(pmd_pte(pmd)))
  #define pmd_mkold(pmd)                pte_pmd(pte_mkold(pmd_pte(pmd)))
  #define pmd_mkwrite(pmd)      pte_pmd(pte_mkwrite(pmd_pte(pmd)))
 -#define pmd_mkclean(pmd)       pte_pmd(pte_mkclean(pmd_pte(pmd)))
 +#define pmd_mkclean(pmd)      pte_pmd(pte_mkclean(pmd_pte(pmd)))
  #define pmd_mkdirty(pmd)      pte_pmd(pte_mkdirty(pmd_pte(pmd)))
  #define pmd_mkyoung(pmd)      pte_pmd(pte_mkyoung(pmd_pte(pmd)))
 -#define pmd_mknotpresent(pmd) (__pmd(pmd_val(pmd) & ~PMD_TYPE_MASK))
 +#define pmd_mknotpresent(pmd) (__pmd(pmd_val(pmd) & ~PMD_SECT_VALID))
  
+ #define pmd_thp_or_huge(pmd)  (pmd_huge(pmd) || pmd_trans_huge(pmd))
  #define __HAVE_ARCH_PMD_WRITE
  #define pmd_write(pmd)                pte_write(pmd_pte(pmd))
  
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
@@@ -28,8 -30,8 +30,9 @@@
  #include <linux/slab.h>
  
  #include <linux/irqchip.h>
+ #include <linux/irqchip/arm-gic-common.h>
  #include <linux/irqchip/arm-gic-v3.h>
 +#include <linux/irqchip/irq-partition-percpu.h>
  
  #include <asm/cputype.h>
  #include <asm/exception.h>
@@@ -976,119 -906,30 +981,143 @@@ static int __init gic_validate_dist_ver
        return 0;
  }
  
- static void gic_populate_ppi_partitions(struct device_node *gic_node)
 +static int get_cpu_number(struct device_node *dn)
 +{
 +      const __be32 *cell;
 +      u64 hwid;
 +      int i;
 +
 +      cell = of_get_property(dn, "reg", NULL);
 +      if (!cell)
 +              return -1;
 +
 +      hwid = of_read_number(cell, of_n_addr_cells(dn));
 +
 +      /*
 +       * Non affinity bits must be set to 0 in the DT
 +       */
 +      if (hwid & ~MPIDR_HWID_BITMASK)
 +              return -1;
 +
 +      for (i = 0; i < num_possible_cpus(); i++)
 +              if (cpu_logical_map(i) == hwid)
 +                      return i;
 +
 +      return -1;
 +}
 +
 +/* Create all possible partitions at boot time */
++static void __init gic_populate_ppi_partitions(struct device_node *gic_node)
 +{
 +      struct device_node *parts_node, *child_part;
 +      int part_idx = 0, i;
 +      int nr_parts;
 +      struct partition_affinity *parts;
 +
 +      parts_node = of_find_node_by_name(gic_node, "ppi-partitions");
 +      if (!parts_node)
 +              return;
 +
 +      nr_parts = of_get_child_count(parts_node);
 +
 +      if (!nr_parts)
 +              return;
 +
 +      parts = kzalloc(sizeof(*parts) * nr_parts, GFP_KERNEL);
 +      if (WARN_ON(!parts))
 +              return;
 +
 +      for_each_child_of_node(parts_node, child_part) {
 +              struct partition_affinity *part;
 +              int n;
 +
 +              part = &parts[part_idx];
 +
 +              part->partition_id = of_node_to_fwnode(child_part);
 +
 +              pr_info("GIC: PPI partition %s[%d] { ",
 +                      child_part->name, part_idx);
 +
 +              n = of_property_count_elems_of_size(child_part, "affinity",
 +                                                  sizeof(u32));
 +              WARN_ON(n <= 0);
 +
 +              for (i = 0; i < n; i++) {
 +                      int err, cpu;
 +                      u32 cpu_phandle;
 +                      struct device_node *cpu_node;
 +
 +                      err = of_property_read_u32_index(child_part, "affinity",
 +                                                       i, &cpu_phandle);
 +                      if (WARN_ON(err))
 +                              continue;
 +
 +                      cpu_node = of_find_node_by_phandle(cpu_phandle);
 +                      if (WARN_ON(!cpu_node))
 +                              continue;
 +
 +                      cpu = get_cpu_number(cpu_node);
 +                      if (WARN_ON(cpu == -1))
 +                              continue;
 +
 +                      pr_cont("%s[%d] ", cpu_node->full_name, cpu);
 +
 +                      cpumask_set_cpu(cpu, &part->mask);
 +              }
 +
 +              pr_cont("}\n");
 +              part_idx++;
 +      }
 +
 +      for (i = 0; i < 16; i++) {
 +              unsigned int irq;
 +              struct partition_desc *desc;
 +              struct irq_fwspec ppi_fwspec = {
 +                      .fwnode         = gic_data.fwnode,
 +                      .param_count    = 3,
 +                      .param          = {
 +                              [0]     = 1,
 +                              [1]     = i,
 +                              [2]     = IRQ_TYPE_NONE,
 +                      },
 +              };
 +
 +              irq = irq_create_fwspec_mapping(&ppi_fwspec);
 +              if (WARN_ON(!irq))
 +                      continue;
 +              desc = partition_create_desc(gic_data.fwnode, parts, nr_parts,
 +                                           irq, &partition_domain_ops);
 +              if (WARN_ON(!desc))
 +                      continue;
 +
 +              gic_data.ppi_descs[i] = desc;
 +      }
 +}
 +
+ static void __init gic_of_setup_kvm_info(struct device_node *node)
+ {
+       int ret;
+       struct resource r;
+       u32 gicv_idx;
+       gic_v3_kvm_info.type = GIC_V3;
+       gic_v3_kvm_info.maint_irq = irq_of_parse_and_map(node, 0);
+       if (!gic_v3_kvm_info.maint_irq)
+               return;
+       if (of_property_read_u32(node, "#redistributor-regions",
+                                &gicv_idx))
+               gicv_idx = 1;
+       gicv_idx += 3;  /* Also skip GICD, GICC, GICH */
+       ret = of_address_to_resource(node, gicv_idx, &r);
+       if (!ret)
+               gic_v3_kvm_info.vcpu = r;
+       gic_set_kvm_info(&gic_v3_kvm_info);
+ }
  static int __init gic_of_init(struct device_node *node, struct device_node *parent)
  {
        void __iomem *dist_base;
  
        err = gic_init_bases(dist_base, rdist_regs, nr_redist_regions,
                             redist_stride, &node->fwnode);
 -      if (!err) {
 -              gic_of_setup_kvm_info(node);
 -              return 0;
 -      }
 +      if (err)
 +              goto out_unmap_rdist;
 +
 +      gic_populate_ppi_partitions(node);
++      gic_of_setup_kvm_info(node);
 +      return 0;
  
  out_unmap_rdist:
        for (i = 0; i < nr_redist_regions; i++)
@@@ -1248,30 -1191,29 +1250,53 @@@ static bool gic_check_eoimode(struct de
        return true;
  }
  
- static int gic_of_setup(struct gic_chip_data *gic, struct device_node *node)
++static int __init gic_of_setup(struct gic_chip_data *gic, struct device_node *node)
 +{
 +      if (!gic || !node)
 +              return -EINVAL;
 +
 +      gic->raw_dist_base = of_iomap(node, 0);
 +      if (WARN(!gic->raw_dist_base, "unable to map gic dist registers\n"))
 +              goto error;
 +
 +      gic->raw_cpu_base = of_iomap(node, 1);
 +      if (WARN(!gic->raw_cpu_base, "unable to map gic cpu registers\n"))
 +              goto error;
 +
 +      if (of_property_read_u32(node, "cpu-offset", &gic->percpu_offset))
 +              gic->percpu_offset = 0;
 +
 +      return 0;
 +
 +error:
 +      gic_teardown(gic);
 +
 +      return -ENOMEM;
 +}
 +
+ static void __init gic_of_setup_kvm_info(struct device_node *node)
+ {
+       int ret;
+       struct resource *vctrl_res = &gic_v2_kvm_info.vctrl;
+       struct resource *vcpu_res = &gic_v2_kvm_info.vcpu;
+       gic_v2_kvm_info.type = GIC_V2;
+       gic_v2_kvm_info.maint_irq = irq_of_parse_and_map(node, 0);
+       if (!gic_v2_kvm_info.maint_irq)
+               return;
+       ret = of_address_to_resource(node, 2, vctrl_res);
+       if (ret)
+               return;
+       ret = of_address_to_resource(node, 3, vcpu_res);
+       if (ret)
+               return;
+       gic_set_kvm_info(&gic_v2_kvm_info);
+ }
  int __init
  gic_of_init(struct device_node *node, struct device_node *parent)
  {
         * Disable split EOI/Deactivate if either HYP is not available
         * or the CPU interface is too small.
         */
 -      if (gic_cnt == 0 && !gic_check_eoimode(node, &cpu_base))
 +      if (gic_cnt == 0 && !gic_check_eoimode(node, &gic->raw_cpu_base))
                static_key_slow_dec(&supports_deactivate);
  
 -      if (of_property_read_u32(node, "cpu-offset", &percpu_offset))
 -              percpu_offset = 0;
 +      ret = __gic_init_bases(gic, -1, &node->fwnode);
 +      if (ret) {
 +              gic_teardown(gic);
 +              return ret;
 +      }
  
-       if (!gic_cnt)
 -      __gic_init_bases(gic_cnt, -1, dist_base, cpu_base, percpu_offset,
 -                       &node->fwnode);
+       if (!gic_cnt) {
                gic_init_physaddr(node);
+               gic_of_setup_kvm_info(node);
+       }
  
        if (parent) {
                irq = irq_of_parse_and_map(node, 0);
@@@ -1401,8 -1391,8 +1476,8 @@@ static int __init gic_v2_acpi_init(stru
                return -EINVAL;
        }
  
-       gic->raw_cpu_base = ioremap(cpu_phy_base, ACPI_GIC_CPU_IF_MEM_SIZE);
 -      cpu_base = ioremap(acpi_data.cpu_phys_base, ACPI_GIC_CPU_IF_MEM_SIZE);
 -      if (!cpu_base) {
++      gic->raw_cpu_base = ioremap(acpi_data.cpu_phys_base, ACPI_GIC_CPU_IF_MEM_SIZE);
 +      if (!gic->raw_cpu_base) {
                pr_err("Unable to map GICC registers\n");
                return -ENOMEM;
        }