Merge tag 'kvm-4.9-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm

author Linus Torvalds <torvalds@linux-foundation.org>

Thu, 6 Oct 2016 17:49:01 +0000 (10:49 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Thu, 6 Oct 2016 17:49:01 +0000 (10:49 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Thu, 6 Oct 2016 17:49:01 +0000 (10:49 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Thu, 6 Oct 2016 17:49:01 +0000 (10:49 -0700)
diff --combined Documentation/kernel-parameters.txt

index 6fa1d8a,748ef7b..ec8d814
--- 1/Documentation/kernel-parameters.txt
--- 2/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@@ -460,6 -460,15 +460,15 @@@ bytes respectively. Such letter suffixe
                         driver will print ACPI tables for AMD IOMMU during
                         IOMMU initialization.
   
+       amd_iommu_intr= [HW,X86-64]
+                       Specifies one of the following AMD IOMMU interrupt
+                       remapping modes:
+                       legacy     - Use legacy interrupt remapping mode.
+                       vapic      - Use virtual APIC mode, which allows IOMMU
+                                    to inject interrupts directly into guest.
+                                    This mode requires kvm-amd.avic=1.
+                                    (Default when IOMMU HW support is present.)
+ 
         amijoy.map=     [HW,JOY] Amiga joystick support
                         Map of devices attached to JOY0DAT and JOY1DAT
                         Format: <a>,<b>
@@@ -698,15 -707,6 +707,15 @@@
                         loops can be debugged more effectively on production
                         systems.
   
+ +      clocksource.arm_arch_timer.fsl-a008585=
+ +                      [ARM64]
+ +                      Format: <bool>
+ +                      Enable/disable the workaround of Freescale/NXP
+ +                      erratum A-008585.  This can be useful for KVM
+ +                      guests, if the guest device tree doesn't show the
+ +                      erratum.  If unspecified, the workaround is
+ +                      enabled based on the device tree.
+ +
         clearcpuid=BITNUM [X86]
                         Disable CPUID feature X for the kernel. See
                         arch/x86/include/asm/cpufeatures.h for the valid bit
@@@ -1054,12 -1054,11 +1063,12 @@@
                         determined by the stdout-path property in device
                         tree's chosen node.
   
- -              cdns,<addr>
- -                      Start an early, polled-mode console on a cadence serial
- -                      port at the specified address. The cadence serial port
- -                      must already be setup and configured. Options are not
- -                      yet supported.
+ +              cdns,<addr>[,options]
+ +                      Start an early, polled-mode console on a Cadence
+ +                      (xuartps) serial port at the specified address. Only
+ +                      supported option is baud rate. If baud rate is not
+ +                      specified, the serial port must already be setup and
+ +                      configured.
   
                 uart[8250],io,<addr>[,options]
                 uart[8250],mmio,<addr>[,options]
@@@ -1374,10 -1373,6 +1383,10 @@@
                         Format: <unsigned int> such that (rxsize & ~0x1fffc0) == 0.
                         Default: 1024
   
+ +      gpio-mockup.gpio_mockup_ranges
+ +                      [HW] Sets the ranges of gpiochip of for this device.
+ +                      Format: <start1>,<end1>,<start2>,<end2>...
+ +
         hardlockup_all_cpu_backtrace=
                         [KNL] Should the hard-lockup detector generate
                         backtraces on all cpus.
@@@ -1702,7 -1697,7 +1711,7 @@@
   
         intel_idle.max_cstate=  [KNL,HW,ACPI,X86]
                         0       disables intel_idle and fall back on acpi_idle.
- -                      1 to 6  specify maximum depth of C-state.
+ +                      1 to 9  specify maximum depth of C-state.
   
         intel_pstate=  [X86]
                        disable
@@@ -2175,13 -2170,10 +2184,13 @@@
                         than or equal to this physical address is ignored.
   
         maxcpus=        [SMP] Maximum number of processors that an SMP kernel
- -                      should make use of.  maxcpus=n : n >= 0 limits the
- -                      kernel to using 'n' processors.  n=0 is a special case,
- -                      it is equivalent to "nosmp", which also disables
- -                      the IO APIC.
+ +                      will bring up during bootup.  maxcpus=n : n >= 0 limits
+ +                      the kernel to bring up 'n' processors. Surely after
+ +                      bootup you can bring up the other plugged cpu by executing
+ +                      "echo 1 > /sys/devices/system/cpu/cpuX/online". So maxcpus
+ +                      only takes effect during system bootup.
+ +                      While n=0 is a special case, it is equivalent to "nosmp",
+ +                      which also disables the IO APIC.
   
         max_loop=       [LOOP] The number of loop block devices that get
         (loop.max_loop) unconditionally pre-created at init time. The default
@@@ -2588,6 -2580,8 +2597,6 @@@
   
         nodelayacct     [KNL] Disable per-task delay accounting
   
- -      nodisconnect    [HW,SCSI,M68K] Disables SCSI disconnects.
- -
         nodsp           [SH] Disable hardware DSP at boot time.
   
         noefi           Disable EFI runtime services support.
@@@ -2788,12 -2782,9 +2797,12 @@@
   
         nr_cpus=        [SMP] Maximum number of processors that an SMP kernel
                         could support.  nr_cpus=n : n >= 1 limits the kernel to
- -                      supporting 'n' processors. Later in runtime you can not
- -                      use hotplug cpu feature to put more cpu back to online.
- -                      just like you compile the kernel NR_CPUS=n
+ +                      support 'n' processors. It could be larger than the
+ +                      number of already plugged CPU during bootup, later in
+ +                      runtime you can physically add extra cpu until it reaches
+ +                      n. So during boot up some boot time memory for per-cpu
+ +                      variables need be pre-allocated for later physical cpu
+ +                      hot plugging.
   
         nr_uarts=       [SERIAL] maximum number of UARTs to be registered.
   
@@@ -3050,10 -3041,6 +3059,10 @@@
                                 PAGE_SIZE is used as alignment.
                                 PCI-PCI bridge can be specified, if resource
                                 windows need to be expanded.
+ +                              To specify the alignment for several
+ +                              instances of a device, the PCI vendor,
+ +                              device, subvendor, and subdevice may be
+ +                              specified, e.g., 4096@pci:8086:9c22:103c:198f
                 ecrc=           Enable/disable PCIe ECRC (transaction layer
                                 end-to-end CRC checking).
                                 bios: Use BIOS/firmware settings. This is the
@@@ -4256,8 -4243,6 +4265,8 @@@
                                 u = IGNORE_UAS (don't bind to the uas driver);
                                 w = NO_WP_DETECT (don't test whether the
                                         medium is write-protected).
+ +                              y = ALWAYS_SYNC (issue a SYNCHRONIZE_CACHE
+ +                                      even if the device claims no cache)
                         Example: quirks=0419:aaf5:rl,0421:0433:rc
   
         user_debug=     [KNL,ARM]
diff --combined arch/arm/include/asm/arch_gicv3.h

index dfe4002,1fee657..a808829
--- 1/arch/arm/include/asm/arch_gicv3.h
--- 2/arch/arm/include/asm/arch_gicv3.h
+++ b/arch/arm/include/asm/arch_gicv3.h
@@@ -22,9 -22,7 +22,7 @@@
   
   #include <linux/io.h>
   #include <asm/barrier.h>
- 
- #define __ACCESS_CP15(CRn, Op1, CRm, Op2)     p15, Op1, %0, CRn, CRm, Op2
- #define __ACCESS_CP15_64(Op1, CRm)            p15, Op1, %Q0, %R0, CRm
+ #include <asm/cp15.h>
   
   #define ICC_EOIR1                     __ACCESS_CP15(c12, 0, c12, 1)
   #define ICC_DIR                               __ACCESS_CP15(c12, 0, c11, 1)
@@@ -34,7 -32,6 +32,7 @@@
   #define ICC_CTLR                      __ACCESS_CP15(c12, 0, c12, 4)
   #define ICC_SRE                               __ACCESS_CP15(c12, 0, c12, 5)
   #define ICC_IGRPEN1                   __ACCESS_CP15(c12, 0, c12, 7)
+ +#define ICC_BPR1                      __ACCESS_CP15(c12, 0, c12, 3)
   
   #define ICC_HSRE                      __ACCESS_CP15(c12, 4, c9, 5)
   
@@@ -99,68 -96,133 +97,129 @@@
   #define ICH_AP1R2                     __AP1Rx(2)
   #define ICH_AP1R3                     __AP1Rx(3)
   
+ /* A32-to-A64 mappings used by VGIC save/restore */
+ 
+ #define CPUIF_MAP(a32, a64)                   \
+ static inline void write_ ## a64(u32 val)     \
+ {                                             \
+       write_sysreg(val, a32);                 \
+ }                                             \
+ static inline u32 read_ ## a64(void)          \
+ {                                             \
+       return read_sysreg(a32);                \
+ }                                             \
+ 
+ #define CPUIF_MAP_LO_HI(a32lo, a32hi, a64)    \
+ static inline void write_ ## a64(u64 val)     \
+ {                                             \
+       write_sysreg(lower_32_bits(val), a32lo);\
+       write_sysreg(upper_32_bits(val), a32hi);\
+ }                                             \
+ static inline u64 read_ ## a64(void)          \
+ {                                             \
+       u64 val = read_sysreg(a32lo);           \
+                                               \
+       val |=  (u64)read_sysreg(a32hi) << 32;  \
+                                               \
+       return val;                             \
+ }
+ 
+ CPUIF_MAP(ICH_HCR, ICH_HCR_EL2)
+ CPUIF_MAP(ICH_VTR, ICH_VTR_EL2)
+ CPUIF_MAP(ICH_MISR, ICH_MISR_EL2)
+ CPUIF_MAP(ICH_EISR, ICH_EISR_EL2)
+ CPUIF_MAP(ICH_ELSR, ICH_ELSR_EL2)
+ CPUIF_MAP(ICH_VMCR, ICH_VMCR_EL2)
+ CPUIF_MAP(ICH_AP0R3, ICH_AP0R3_EL2)
+ CPUIF_MAP(ICH_AP0R2, ICH_AP0R2_EL2)
+ CPUIF_MAP(ICH_AP0R1, ICH_AP0R1_EL2)
+ CPUIF_MAP(ICH_AP0R0, ICH_AP0R0_EL2)
+ CPUIF_MAP(ICH_AP1R3, ICH_AP1R3_EL2)
+ CPUIF_MAP(ICH_AP1R2, ICH_AP1R2_EL2)
+ CPUIF_MAP(ICH_AP1R1, ICH_AP1R1_EL2)
+ CPUIF_MAP(ICH_AP1R0, ICH_AP1R0_EL2)
+ CPUIF_MAP(ICC_HSRE, ICC_SRE_EL2)
+ CPUIF_MAP(ICC_SRE, ICC_SRE_EL1)
+ 
+ CPUIF_MAP_LO_HI(ICH_LR15, ICH_LRC15, ICH_LR15_EL2)
+ CPUIF_MAP_LO_HI(ICH_LR14, ICH_LRC14, ICH_LR14_EL2)
+ CPUIF_MAP_LO_HI(ICH_LR13, ICH_LRC13, ICH_LR13_EL2)
+ CPUIF_MAP_LO_HI(ICH_LR12, ICH_LRC12, ICH_LR12_EL2)
+ CPUIF_MAP_LO_HI(ICH_LR11, ICH_LRC11, ICH_LR11_EL2)
+ CPUIF_MAP_LO_HI(ICH_LR10, ICH_LRC10, ICH_LR10_EL2)
+ CPUIF_MAP_LO_HI(ICH_LR9, ICH_LRC9, ICH_LR9_EL2)
+ CPUIF_MAP_LO_HI(ICH_LR8, ICH_LRC8, ICH_LR8_EL2)
+ CPUIF_MAP_LO_HI(ICH_LR7, ICH_LRC7, ICH_LR7_EL2)
+ CPUIF_MAP_LO_HI(ICH_LR6, ICH_LRC6, ICH_LR6_EL2)
+ CPUIF_MAP_LO_HI(ICH_LR5, ICH_LRC5, ICH_LR5_EL2)
+ CPUIF_MAP_LO_HI(ICH_LR4, ICH_LRC4, ICH_LR4_EL2)
+ CPUIF_MAP_LO_HI(ICH_LR3, ICH_LRC3, ICH_LR3_EL2)
+ CPUIF_MAP_LO_HI(ICH_LR2, ICH_LRC2, ICH_LR2_EL2)
+ CPUIF_MAP_LO_HI(ICH_LR1, ICH_LRC1, ICH_LR1_EL2)
+ CPUIF_MAP_LO_HI(ICH_LR0, ICH_LRC0, ICH_LR0_EL2)
+ 
+ #define read_gicreg(r)                 read_##r()
+ #define write_gicreg(v, r)             write_##r(v)
+ 
   /* Low-level accessors */
   
   static inline void gic_write_eoir(u32 irq)
   {
-       asm volatile("mcr " __stringify(ICC_EOIR1) : : "r" (irq));
+       write_sysreg(irq, ICC_EOIR1);
         isb();
   }
   
   static inline void gic_write_dir(u32 val)
   {
-       asm volatile("mcr " __stringify(ICC_DIR) : : "r" (val));
+       write_sysreg(val, ICC_DIR);
         isb();
   }
   
   static inline u32 gic_read_iar(void)
   {
-       u32 irqstat;
+       u32 irqstat = read_sysreg(ICC_IAR1);
   
-       asm volatile("mrc " __stringify(ICC_IAR1) : "=r" (irqstat));
         dsb(sy);
+ 
         return irqstat;
   }
   
   static inline void gic_write_pmr(u32 val)
   {
-       asm volatile("mcr " __stringify(ICC_PMR) : : "r" (val));
+       write_sysreg(val, ICC_PMR);
   }
   
   static inline void gic_write_ctlr(u32 val)
   {
-       asm volatile("mcr " __stringify(ICC_CTLR) : : "r" (val));
+       write_sysreg(val, ICC_CTLR);
         isb();
   }
   
   static inline void gic_write_grpen1(u32 val)
   {
-       asm volatile("mcr " __stringify(ICC_IGRPEN1) : : "r" (val));
+       write_sysreg(val, ICC_IGRPEN1);
         isb();
   }
   
   static inline void gic_write_sgi1r(u64 val)
   {
-       asm volatile("mcrr " __stringify(ICC_SGI1R) : : "r" (val));
+       write_sysreg(val, ICC_SGI1R);
   }
   
   static inline u32 gic_read_sre(void)
   {
-       u32 val;
- 
-       asm volatile("mrc " __stringify(ICC_SRE) : "=r" (val));
-       return val;
+       return read_sysreg(ICC_SRE);
   }
   
   static inline void gic_write_sre(u32 val)
   {
-       asm volatile("mcr " __stringify(ICC_SRE) : : "r" (val));
+       write_sysreg(val, ICC_SRE);
         isb();
   }
   
   static inline void gic_write_bpr1(u32 val)
   {
-       asm volatile("mcr " __stringify(ICC_BPR1) : : "r" (val));
- -#if defined(__write_sysreg) && defined(ICC_BPR1)
+       write_sysreg(val, ICC_BPR1);
- -#else
- -      asm volatile("mcr " __stringify(ICC_BPR1) : : "r" (val));
- -#endif
   }
   
   /*
diff --combined arch/arm/include/asm/cputype.h

index 754f86f,e2d94c1..522b5fe
--- 1/arch/arm/include/asm/cputype.h
--- 2/arch/arm/include/asm/cputype.h
+++ b/arch/arm/include/asm/cputype.h
@@@ -55,12 -55,12 +55,13 @@@
   
   #define MPIDR_LEVEL_BITS 8
   #define MPIDR_LEVEL_MASK ((1 << MPIDR_LEVEL_BITS) - 1)
+ #define MPIDR_LEVEL_SHIFT(level) (MPIDR_LEVEL_BITS * level)
   
   #define MPIDR_AFFINITY_LEVEL(mpidr, level) \
         ((mpidr >> (MPIDR_LEVEL_BITS * level)) & MPIDR_LEVEL_MASK)
   
   #define ARM_CPU_IMP_ARM                       0x41
+ +#define ARM_CPU_IMP_DEC                       0x44
   #define ARM_CPU_IMP_INTEL             0x69
   
   /* ARM implemented processors */
@@@ -77,17 -77,6 +78,17 @@@
   #define ARM_CPU_PART_CORTEX_A15               0x4100c0f0
   #define ARM_CPU_PART_MASK             0xff00fff0
   
+ +/* DEC implemented cores */
+ +#define ARM_CPU_PART_SA1100           0x4400a110
+ +
+ +/* Intel implemented cores */
+ +#define ARM_CPU_PART_SA1110           0x6900b110
+ +#define ARM_CPU_REV_SA1110_A0         0
+ +#define ARM_CPU_REV_SA1110_B0         4
+ +#define ARM_CPU_REV_SA1110_B1         5
+ +#define ARM_CPU_REV_SA1110_B2         6
+ +#define ARM_CPU_REV_SA1110_B4         8
+ +
   #define ARM_CPU_XSCALE_ARCH_MASK      0xe000
   #define ARM_CPU_XSCALE_ARCH_V1                0x2000
   #define ARM_CPU_XSCALE_ARCH_V2                0x4000
@@@ -164,11 -153,6 +165,11 @@@ static inline unsigned int __attribute_
         return read_cpuid(CPUID_ID);
   }
   
+ +static inline unsigned int __attribute_const__ read_cpuid_cachetype(void)
+ +{
+ +      return read_cpuid(CPUID_CACHETYPE);
+ +}
+ +
   #elif defined(CONFIG_CPU_V7M)
   
   static inline unsigned int __attribute_const__ read_cpuid_id(void)
@@@ -176,11 -160,6 +177,11 @@@
         return readl(BASEADDR_V7M_SCB + V7M_SCB_CPUID);
   }
   
+ +static inline unsigned int __attribute_const__ read_cpuid_cachetype(void)
+ +{
+ +      return readl(BASEADDR_V7M_SCB + V7M_SCB_CTR);
+ +}
+ +
   #else /* ifdef CONFIG_CPU_CP15 / elif defined(CONFIG_CPU_V7M) */
   
   static inline unsigned int __attribute_const__ read_cpuid_id(void)
@@@ -195,11 -174,6 +196,11 @@@ static inline unsigned int __attribute_
         return (read_cpuid_id() & 0xFF000000) >> 24;
   }
   
+ +static inline unsigned int __attribute_const__ read_cpuid_revision(void)
+ +{
+ +      return read_cpuid_id() & 0x0000000f;
+ +}
+ +
   /*
    * The CPU part number is meaningless without referring to the CPU
    * implementer: implementers are free to define their own part numbers
@@@ -220,6 -194,11 +221,6 @@@ static inline unsigned int __attribute_
         return read_cpuid_id() & ARM_CPU_XSCALE_ARCH_MASK;
   }
   
- -static inline unsigned int __attribute_const__ read_cpuid_cachetype(void)
- -{
- -      return read_cpuid(CPUID_CACHETYPE);
- -}
- -
   static inline unsigned int __attribute_const__ read_cpuid_tcmstatus(void)
   {
         return read_cpuid(CPUID_TCM);
@@@ -230,10 -209,6 +231,10 @@@ static inline unsigned int __attribute_
         return read_cpuid(CPUID_MPIDR);
   }
   
+ +/* StrongARM-11x0 CPUs */
+ +#define cpu_is_sa1100() (read_cpuid_part() == ARM_CPU_PART_SA1100)
+ +#define cpu_is_sa1110() (read_cpuid_part() == ARM_CPU_PART_SA1110)
+ +
   /*
    * Intel's XScale3 core supports some v6 features (supersections, L2)
    * but advertises itself as v5 as it does not support the v6 ISA.  For
diff --combined arch/arm/kvm/arm.c

index c94b90d,8a4a563..03e9273
--- 1/arch/arm/kvm/arm.c
--- 2/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@@ -144,6 -144,16 +144,16 @@@ out_fail_alloc
         return ret;
   }
   
+ bool kvm_arch_has_vcpu_debugfs(void)
+ {
+       return false;
+ }
+ 
+ int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
+ {
+       return 0;
+ }
+ 
   int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
   {
         return VM_FAULT_SIGBUS;
@@@ -158,6 -168,8 +168,6 @@@ void kvm_arch_destroy_vm(struct kvm *kv
   {
         int i;
   
- -      kvm_free_stage2_pgd(kvm);
- -
         for (i = 0; i < KVM_MAX_VCPUS; ++i) {
                 if (kvm->vcpus[i]) {
                         kvm_arch_vcpu_free(kvm->vcpus[i]);
@@@ -1176,6 -1188,10 +1186,10 @@@ static int init_common_resources(void
                 return -ENOMEM;
         }
   
+       /* set size of VMID supported by CPU */
+       kvm_vmid_bits = kvm_get_vmid_bits();
+       kvm_info("%d-bit VMID\n", kvm_vmid_bits);
+ 
         return 0;
   }
   
@@@ -1241,10 -1257,6 +1255,6 @@@ static void teardown_hyp_mode(void
   
   static int init_vhe_mode(void)
   {
-       /* set size of VMID supported by CPU */
-       kvm_vmid_bits = kvm_get_vmid_bits();
-       kvm_info("%d-bit VMID\n", kvm_vmid_bits);
- 
         kvm_info("VHE mode initialized successfully\n");
         return 0;
   }
@@@ -1328,10 -1340,6 +1338,6 @@@ static int init_hyp_mode(void
                 }
         }
   
-       /* set size of VMID supported by CPU */
-       kvm_vmid_bits = kvm_get_vmid_bits();
-       kvm_info("%d-bit VMID\n", kvm_vmid_bits);
- 
         kvm_info("Hyp mode initialized successfully\n");
   
         return 0;
diff --combined arch/arm/kvm/mmu.c

index e9a5c0e,60e0c1a..a5265ed
--- 1/arch/arm/kvm/mmu.c
--- 2/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@@ -744,7 -744,6 +744,6 @@@ int kvm_alloc_stage2_pgd(struct kvm *kv
         if (!pgd)
                 return -ENOMEM;
   
-       kvm_clean_pgd(pgd);
         kvm->arch.pgd = pgd;
         return 0;
   }
@@@ -936,7 -935,6 +935,6 @@@ static int stage2_set_pte(struct kvm *k
                 if (!cache)
                         return 0; /* ignore calls from kvm_set_spte_hva */
                 pte = mmu_memory_cache_alloc(cache);
-               kvm_clean_pte(pte);
                 pmd_populate_kernel(NULL, pmd, pte);
                 get_page(virt_to_page(pmd));
         }
@@@ -1434,6 -1432,11 +1432,11 @@@ int kvm_handle_guest_abort(struct kvm_v
         int ret, idx;
   
         is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
+       if (unlikely(!is_iabt && kvm_vcpu_dabt_isextabt(vcpu))) {
+               kvm_inject_vabt(vcpu);
+               return 1;
+       }
+ 
         fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
   
         trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu),
@@@ -1714,8 -1717,7 +1717,8 @@@ int kvm_mmu_init(void
                  kern_hyp_va(PAGE_OFFSET), kern_hyp_va(~0UL));
   
         if (hyp_idmap_start >= kern_hyp_va(PAGE_OFFSET) &&
- -          hyp_idmap_start <  kern_hyp_va(~0UL)) {
+ +          hyp_idmap_start <  kern_hyp_va(~0UL) &&
+ +          hyp_idmap_start != (unsigned long)__hyp_idmap_text_start) {
                 /*
                  * The idmap page is intersecting with the VA space,
                  * it is not safe to continue further.
@@@ -1894,7 -1896,6 +1897,7 @@@ void kvm_arch_memslots_updated(struct k
   
   void kvm_arch_flush_shadow_all(struct kvm *kvm)
   {
+ +      kvm_free_stage2_pgd(kvm);
   }
   
   void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
diff --combined arch/arm64/include/asm/arch_gicv3.h

index fc2a0cb,ae7dbd7..f8ae6d6
--- 1/arch/arm64/include/asm/arch_gicv3.h
--- 2/arch/arm64/include/asm/arch_gicv3.h
+++ b/arch/arm64/include/asm/arch_gicv3.h
@@@ -28,7 -28,6 +28,7 @@@
   #define ICC_CTLR_EL1                  sys_reg(3, 0, 12, 12, 4)
   #define ICC_SRE_EL1                   sys_reg(3, 0, 12, 12, 5)
   #define ICC_GRPEN1_EL1                        sys_reg(3, 0, 12, 12, 7)
+ +#define ICC_BPR1_EL1                  sys_reg(3, 0, 12, 12, 3)
   
   #define ICC_SRE_EL2                   sys_reg(3, 4, 12, 9, 5)
   
@@@ -80,6 -79,19 +80,19 @@@
   #include <linux/stringify.h>
   #include <asm/barrier.h>
   
+ #define read_gicreg(r)                                                        \
+       ({                                                              \
+               u64 reg;                                                \
+               asm volatile("mrs_s %0, " __stringify(r) : "=r" (reg)); \
+               reg;                                                    \
+       })
+ 
+ #define write_gicreg(v,r)                                             \
+       do {                                                            \
+               u64 __val = (v);                                        \
+               asm volatile("msr_s " __stringify(r) ", %0" : : "r" (__val));\
+       } while (0)
+ 
   /*
    * Low-level accessors
    *
@@@ -166,11 -178,6 +179,11 @@@ static inline void gic_write_sre(u32 va
         isb();
   }
   
+ +static inline void gic_write_bpr1(u32 val)
+ +{
+ +      asm volatile("msr_s " __stringify(ICC_BPR1_EL1) ", %0" : : "r" (val));
+ +}
+ +
   #define gic_read_typer(c)             readq_relaxed(c)
   #define gic_write_irouter(v, c)               writeq_relaxed(v, c)
   
diff --combined arch/arm64/include/asm/kvm_mmu.h

index dff1098,8f99ab6..a79b969
--- 1/arch/arm64/include/asm/kvm_mmu.h
--- 2/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@@ -99,10 -99,14 +99,10 @@@
   .macro kern_hyp_va    reg
   alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
         and     \reg, \reg, #HYP_PAGE_OFFSET_HIGH_MASK
- -alternative_else
- -      nop
- -alternative_endif
- -alternative_if_not ARM64_HYP_OFFSET_LOW
- -      nop
- -alternative_else
+ +alternative_else_nop_endif
+ +alternative_if ARM64_HYP_OFFSET_LOW
         and     \reg, \reg, #HYP_PAGE_OFFSET_LOW_MASK
- -alternative_endif
+ +alternative_else_nop_endif
   .endm
   
   #else
@@@ -162,12 -166,6 +162,6 @@@ void kvm_clear_hyp_idmap(void)
   #define       kvm_set_pte(ptep, pte)          set_pte(ptep, pte)
   #define       kvm_set_pmd(pmdp, pmd)          set_pmd(pmdp, pmd)
   
- static inline void kvm_clean_pgd(pgd_t *pgd) {}
- static inline void kvm_clean_pmd(pmd_t *pmd) {}
- static inline void kvm_clean_pmd_entry(pmd_t *pmd) {}
- static inline void kvm_clean_pte(pte_t *pte) {}
- static inline void kvm_clean_pte_entry(pte_t *pte) {}
- 
   static inline pte_t kvm_s2pte_mkwrite(pte_t pte)
   {
         pte_val(pte) |= PTE_S2_RDWR;
diff --combined arch/powerpc/platforms/powernv/pci-ioda.c

index 38a5c65,9ce48ae..d314ecc
--- 1/arch/powerpc/platforms/powernv/pci-ioda.c
--- 2/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@@ -124,13 -124,6 +124,13 @@@ static inline bool pnv_pci_is_m64(struc
                 r->start < (phb->ioda.m64_base + phb->ioda.m64_size));
   }
   
+ +static inline bool pnv_pci_is_m64_flags(unsigned long resource_flags)
+ +{
+ +      unsigned long flags = (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH);
+ +
+ +      return (resource_flags & flags) == flags;
+ +}
+ +
   static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no)
   {
         phb->ioda.pe_array[pe_no].phb = phb;
@@@ -156,7 -149,7 +156,7 @@@ static void pnv_ioda_reserve_pe(struct 
   
   static struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb)
   {
- -      unsigned long pe = phb->ioda.total_pe_num - 1;
+ +      long pe;
   
         for (pe = phb->ioda.total_pe_num - 1; pe >= 0; pe--) {
                 if (!test_and_set_bit(pe, phb->ioda.pe_alloc))
@@@ -169,12 -162,11 +169,12 @@@
   static void pnv_ioda_free_pe(struct pnv_ioda_pe *pe)
   {
         struct pnv_phb *phb = pe->phb;
+ +      unsigned int pe_num = pe->pe_number;
   
         WARN_ON(pe->pdev);
   
         memset(pe, 0, sizeof(struct pnv_ioda_pe));
- -      clear_bit(pe->pe_number, phb->ioda.pe_alloc);
+ +      clear_bit(pe_num, phb->ioda.pe_alloc);
   }
   
   /* The default M64 BAR is shared by all PEs */
@@@ -2224,7 -2216,7 +2224,7 @@@ static long pnv_pci_ioda2_set_window(st
   
         pnv_pci_link_table_and_group(phb->hose->node, num,
                         tbl, &pe->table_group);
- -      pnv_pci_phb3_tce_invalidate_pe(pe);
+ +      pnv_pci_ioda2_tce_invalidate_pe(pe);
   
         return 0;
   }
@@@ -2362,7 -2354,7 +2362,7 @@@ static long pnv_pci_ioda2_unset_window(
         if (ret)
                 pe_warn(pe, "Unmapping failed, ret = %ld\n", ret);
         else
- -              pnv_pci_phb3_tce_invalidate_pe(pe);
+ +              pnv_pci_ioda2_tce_invalidate_pe(pe);
   
         pnv_pci_unlink_table_and_group(table_group->tables[num], table_group);
   
@@@ -2718,15 -2710,21 +2718,21 @@@ static void pnv_pci_ioda2_setup_dma_pe(
   }
   
   #ifdef CONFIG_PCI_MSI
- static void pnv_ioda2_msi_eoi(struct irq_data *d)
+ int64_t pnv_opal_pci_msi_eoi(struct irq_chip *chip, unsigned int hw_irq)
   {
-       unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
-       struct irq_chip *chip = irq_data_get_irq_chip(d);
         struct pnv_phb *phb = container_of(chip, struct pnv_phb,
                                            ioda.irq_chip);
+ 
+       return opal_pci_msi_eoi(phb->opal_id, hw_irq);
+ }
+ 
+ static void pnv_ioda2_msi_eoi(struct irq_data *d)
+ {
         int64_t rc;
+       unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+       struct irq_chip *chip = irq_data_get_irq_chip(d);
   
-       rc = opal_pci_msi_eoi(phb->opal_id, hw_irq);
+       rc = pnv_opal_pci_msi_eoi(chip, hw_irq);
         WARN_ON_ONCE(rc);
   
         icp_native_eoi(d);
@@@ -2756,6 -2754,16 +2762,16 @@@ void pnv_set_msi_irq_chip(struct pnv_ph
         irq_set_chip(virq, &phb->ioda.irq_chip);
   }
   
+ /*
+  * Returns true iff chip is something that we could call
+  * pnv_opal_pci_msi_eoi for.
+  */
+ bool is_pnv_opal_msi(struct irq_chip *chip)
+ {
+       return chip->irq_eoi == pnv_ioda2_msi_eoi;
+ }
+ EXPORT_SYMBOL_GPL(is_pnv_opal_msi);
+ 
   static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
                                   unsigned int hwirq, unsigned int virq,
                                   unsigned int is_64, struct msi_msg *msg)
@@@ -2878,7 -2886,7 +2894,7 @@@ static void pnv_pci_ioda_fixup_iov_reso
                 res = &pdev->resource[i + PCI_IOV_RESOURCES];
                 if (!res->flags || res->parent)
                         continue;
- -              if (!pnv_pci_is_m64(phb, res)) {
+ +              if (!pnv_pci_is_m64_flags(res->flags)) {
                         dev_warn(&pdev->dev, "Don't support SR-IOV with"
                                         " non M64 VF BAR%d: %pR. \n",
                                  i, res);
@@@ -3103,7 -3111,7 +3119,7 @@@ static resource_size_t pnv_pci_window_a
          * alignment for any 64-bit resource, PCIe doesn't care and
          * bridges only do 64-bit prefetchable anyway.
          */
- -      if (phb->ioda.m64_segsize && (type & IORESOURCE_MEM_64))
+ +      if (phb->ioda.m64_segsize && pnv_pci_is_m64_flags(type))
                 return phb->ioda.m64_segsize;
         if (type & IORESOURCE_MEM)
                 return phb->ioda.m32_segsize;
@@@ -3410,6 -3418,12 +3426,6 @@@ static void pnv_ioda_release_pe(struct 
         struct pnv_phb *phb = pe->phb;
         struct pnv_ioda_pe *slave, *tmp;
   
- -      /* Release slave PEs in compound PE */
- -      if (pe->flags & PNV_IODA_PE_MASTER) {
- -              list_for_each_entry_safe(slave, tmp, &pe->slaves, list)
- -                      pnv_ioda_release_pe(slave);
- -      }
- -
         list_del(&pe->list);
         switch (phb->type) {
         case PNV_PHB_IODA1:
@@@ -3424,26 -3438,7 +3440,26 @@@
   
         pnv_ioda_release_pe_seg(pe);
         pnv_ioda_deconfigure_pe(pe->phb, pe);
- -      pnv_ioda_free_pe(pe);
+ +
+ +      /* Release slave PEs in the compound PE */
+ +      if (pe->flags & PNV_IODA_PE_MASTER) {
+ +              list_for_each_entry_safe(slave, tmp, &pe->slaves, list) {
+ +                      list_del(&slave->list);
+ +                      pnv_ioda_free_pe(slave);
+ +              }
+ +      }
+ +
+ +      /*
+ +       * The PE for root bus can be removed because of hotplug in EEH
+ +       * recovery for fenced PHB error. We need to mark the PE dead so
+ +       * that it can be populated again in PCI hot add path. The PE
+ +       * shouldn't be destroyed as it's the global reserved resource.
+ +       */
+ +      if (phb->ioda.root_pe_populated &&
+ +          phb->ioda.root_pe_idx == pe->pe_number)
+ +              phb->ioda.root_pe_populated = false;
+ +      else
+ +              pnv_ioda_free_pe(pe);
   }
   
   static void pnv_pci_release_device(struct pci_dev *pdev)
@@@ -3459,17 -3454,7 +3475,17 @@@
         if (!pdn || pdn->pe_number == IODA_INVALID_PE)
                 return;
   
+ +      /*
+ +       * PCI hotplug can happen as part of EEH error recovery. The @pdn
+ +       * isn't removed and added afterwards in this scenario. We should
+ +       * set the PE number in @pdn to an invalid one. Otherwise, the PE's
+ +       * device count is decreased on removing devices while failing to
+ +       * be increased on adding devices. It leads to unbalanced PE's device
+ +       * count and eventually make normal PCI hotplug path broken.
+ +       */
         pe = &phb->ioda.pe_array[pdn->pe_number];
+ +      pdn->pe_number = IODA_INVALID_PE;
+ +
         WARN_ON(--pe->device_count < 0);
         if (pe->device_count == 0)
                 pnv_ioda_release_pe(pe);
diff --combined arch/s390/kvm/kvm-s390.c

index 7e8cb6a,0a9ce9d..9c7a1ec
--- 1/arch/s390/kvm/kvm-s390.c
--- 2/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@@ -245,33 -245,22 +245,33 @@@ static void kvm_s390_cpu_feat_init(void
                      PTFF_QAF);
   
         if (test_facility(17)) { /* MSA */
- -              __cpacf_query(CPACF_KMAC, kvm_s390_available_subfunc.kmac);
- -              __cpacf_query(CPACF_KMC, kvm_s390_available_subfunc.kmc);
- -              __cpacf_query(CPACF_KM, kvm_s390_available_subfunc.km);
- -              __cpacf_query(CPACF_KIMD, kvm_s390_available_subfunc.kimd);
- -              __cpacf_query(CPACF_KLMD, kvm_s390_available_subfunc.klmd);
+ +              __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
+ +                            kvm_s390_available_subfunc.kmac);
+ +              __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
+ +                            kvm_s390_available_subfunc.kmc);
+ +              __cpacf_query(CPACF_KM, (cpacf_mask_t *)
+ +                            kvm_s390_available_subfunc.km);
+ +              __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
+ +                            kvm_s390_available_subfunc.kimd);
+ +              __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
+ +                            kvm_s390_available_subfunc.klmd);
         }
         if (test_facility(76)) /* MSA3 */
- -              __cpacf_query(CPACF_PCKMO, kvm_s390_available_subfunc.pckmo);
+ +              __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
+ +                            kvm_s390_available_subfunc.pckmo);
         if (test_facility(77)) { /* MSA4 */
- -              __cpacf_query(CPACF_KMCTR, kvm_s390_available_subfunc.kmctr);
- -              __cpacf_query(CPACF_KMF, kvm_s390_available_subfunc.kmf);
- -              __cpacf_query(CPACF_KMO, kvm_s390_available_subfunc.kmo);
- -              __cpacf_query(CPACF_PCC, kvm_s390_available_subfunc.pcc);
+ +              __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
+ +                            kvm_s390_available_subfunc.kmctr);
+ +              __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
+ +                            kvm_s390_available_subfunc.kmf);
+ +              __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
+ +                            kvm_s390_available_subfunc.kmo);
+ +              __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
+ +                            kvm_s390_available_subfunc.pcc);
         }
         if (test_facility(57)) /* MSA5 */
- -              __cpacf_query(CPACF_PPNO, kvm_s390_available_subfunc.ppno);
+ +              __cpacf_query(CPACF_PPNO, (cpacf_mask_t *)
+ +                            kvm_s390_available_subfunc.ppno);
   
         if (MACHINE_HAS_ESOP)
                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
@@@ -384,7 -373,9 +384,9 @@@ int kvm_vm_ioctl_check_extension(struc
         case KVM_CAP_NR_VCPUS:
         case KVM_CAP_MAX_VCPUS:
                 r = KVM_S390_BSCA_CPU_SLOTS;
-               if (sclp.has_esca && sclp.has_64bscao)
+               if (!kvm_s390_use_sca_entries())
+                       r = KVM_MAX_VCPUS;
+               else if (sclp.has_esca && sclp.has_64bscao)
                         r = KVM_S390_ESCA_CPU_SLOTS;
                 break;
         case KVM_CAP_NR_MEMSLOTS:
@@@ -1498,6 -1489,16 +1500,16 @@@ out_err
         return rc;
   }
   
+ bool kvm_arch_has_vcpu_debugfs(void)
+ {
+       return false;
+ }
+ 
+ int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
+ {
+       return 0;
+ }
+ 
   void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
   {
         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
@@@ -1561,6 -1562,8 +1573,8 @@@ static int __kvm_ucontrol_vcpu_init(str
   
   static void sca_del_vcpu(struct kvm_vcpu *vcpu)
   {
+       if (!kvm_s390_use_sca_entries())
+               return;
         read_lock(&vcpu->kvm->arch.sca_lock);
         if (vcpu->kvm->arch.use_esca) {
                 struct esca_block *sca = vcpu->kvm->arch.sca;
@@@ -1578,6 -1581,13 +1592,13 @@@
   
   static void sca_add_vcpu(struct kvm_vcpu *vcpu)
   {
+       if (!kvm_s390_use_sca_entries()) {
+               struct bsca_block *sca = vcpu->kvm->arch.sca;
+ 
+               /* we still need the basic sca for the ipte control */
+               vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
+               vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
+       }
         read_lock(&vcpu->kvm->arch.sca_lock);
         if (vcpu->kvm->arch.use_esca) {
                 struct esca_block *sca = vcpu->kvm->arch.sca;
@@@ -1658,6 -1668,11 +1679,11 @@@ static int sca_can_add_vcpu(struct kvm 
   {
         int rc;
   
+       if (!kvm_s390_use_sca_entries()) {
+               if (id < KVM_MAX_VCPUS)
+                       return true;
+               return false;
+       }
         if (id < KVM_S390_BSCA_CPU_SLOTS)
                 return true;
         if (!sclp.has_esca || !sclp.has_64bscao)
@@@ -1946,8 -1961,6 +1972,6 @@@ int kvm_arch_vcpu_setup(struct kvm_vcp
                 vcpu->arch.sie_block->eca |= 1;
         if (sclp.has_sigpif)
                 vcpu->arch.sie_block->eca |= 0x10000000U;
-       if (test_kvm_facility(vcpu->kvm, 64))
-               vcpu->arch.sie_block->ecb3 |= 0x01;
         if (test_kvm_facility(vcpu->kvm, 129)) {
                 vcpu->arch.sie_block->eca |= 0x00020000;
                 vcpu->arch.sie_block->ecd |= 0x20000000;
@@@ -2239,10 -2252,9 +2263,10 @@@ int kvm_arch_vcpu_ioctl_set_fpu(struct 
                 return -EINVAL;
         current->thread.fpu.fpc = fpu->fpc;
         if (MACHINE_HAS_VX)
- -              convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs);
+ +              convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
+ +                               (freg_t *) fpu->fprs);
         else
- -              memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs));
+ +              memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
         return 0;
   }
   
@@@ -2251,10 -2263,9 +2275,10 @@@ int kvm_arch_vcpu_ioctl_get_fpu(struct 
         /* make sure we have the latest values */
         save_fpu_regs();
         if (MACHINE_HAS_VX)
- -              convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs);
+ +              convert_vx_to_fp((freg_t *) fpu->fprs,
+ +                               (__vector128 *) vcpu->run->s.regs.vrs);
         else
- -              memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs));
+ +              memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
         fpu->fpc = current->thread.fpu.fpc;
         return 0;
   }
@@@ -2704,6 -2715,19 +2728,19 @@@ static void sync_regs(struct kvm_vcpu *
                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
                         kvm_clear_async_pf_completion_queue(vcpu);
         }
+       /*
+        * If userspace sets the riccb (e.g. after migration) to a valid state,
+        * we should enable RI here instead of doing the lazy enablement.
+        */
+       if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
+           test_kvm_facility(vcpu->kvm, 64)) {
+               struct runtime_instr_cb *riccb =
+                       (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
+ 
+               if (riccb->valid)
+                       vcpu->arch.sie_block->ecb3 |= 0x01;
+       }
+ 
         kvm_run->kvm_dirty_regs = 0;
   }
   
@@@ -2847,38 -2871,6 +2884,6 @@@ int kvm_s390_vcpu_store_status(struct k
         return kvm_s390_store_status_unloaded(vcpu, addr);
   }
   
- /*
-  * store additional status at address
-  */
- int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
-                                       unsigned long gpa)
- {
-       /* Only bits 0-53 are used for address formation */
-       if (!(gpa & ~0x3ff))
-               return 0;
- 
-       return write_guest_abs(vcpu, gpa & ~0x3ff,
-                              (void *)&vcpu->run->s.regs.vrs, 512);
- }
- 
- int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
- {
-       if (!test_kvm_facility(vcpu->kvm, 129))
-               return 0;
- 
-       /*
-        * The guest VXRS are in the host VXRs due to the lazy
-        * copying in vcpu load/put. We can simply call save_fpu_regs()
-        * to save the current register state because we are in the
-        * middle of a load/put cycle.
-        *
-        * Let's update our copies before we save it into the save area.
-        */
-       save_fpu_regs();
- 
-       return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
- }
- 
   static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
   {
         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
diff --combined arch/x86/kvm/svm.c

index 1e6b84b,9b42214..f8157a3
--- 1/arch/x86/kvm/svm.c
--- 2/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@@ -34,6 -34,8 +34,8 @@@
   #include <linux/sched.h>
   #include <linux/trace_events.h>
   #include <linux/slab.h>
+ #include <linux/amd-iommu.h>
+ #include <linux/hashtable.h>
   
   #include <asm/apic.h>
   #include <asm/perf_event.h>
@@@ -41,6 -43,7 +43,7 @@@
   #include <asm/desc.h>
   #include <asm/debugreg.h>
   #include <asm/kvm_para.h>
+ #include <asm/irq_remapping.h>
   
   #include <asm/virtext.h>
   #include "trace.h"
@@@ -96,6 -99,19 +99,19 @@@ MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id)
   #define AVIC_UNACCEL_ACCESS_OFFSET_MASK               0xFF0
   #define AVIC_UNACCEL_ACCESS_VECTOR_MASK               0xFFFFFFFF
   
+ /* AVIC GATAG is encoded using VM and VCPU IDs */
+ #define AVIC_VCPU_ID_BITS             8
+ #define AVIC_VCPU_ID_MASK             ((1 << AVIC_VCPU_ID_BITS) - 1)
+ 
+ #define AVIC_VM_ID_BITS                       24
+ #define AVIC_VM_ID_NR                 (1 << AVIC_VM_ID_BITS)
+ #define AVIC_VM_ID_MASK                       ((1 << AVIC_VM_ID_BITS) - 1)
+ 
+ #define AVIC_GATAG(x, y)              (((x & AVIC_VM_ID_MASK) << AVIC_VCPU_ID_BITS) | \
+                                               (y & AVIC_VCPU_ID_MASK))
+ #define AVIC_GATAG_TO_VMID(x)         ((x >> AVIC_VCPU_ID_BITS) & AVIC_VM_ID_MASK)
+ #define AVIC_GATAG_TO_VCPUID(x)               (x & AVIC_VCPU_ID_MASK)
+ 
   static bool erratum_383_found __read_mostly;
   
   static const u32 host_save_user_msrs[] = {
@@@ -185,6 -201,23 +201,23 @@@ struct vcpu_svm 
         struct page *avic_backing_page;
         u64 *avic_physical_id_cache;
         bool avic_is_running;
+ 
+       /*
+        * Per-vcpu list of struct amd_svm_iommu_ir:
+        * This is used mainly to store interrupt remapping information used
+        * when update the vcpu affinity. This avoids the need to scan for
+        * IRTE and try to match ga_tag in the IOMMU driver.
+        */
+       struct list_head ir_list;
+       spinlock_t ir_list_lock;
+ };
+ 
+ /*
+  * This is a wrapper of struct amd_iommu_ir_data.
+  */
+ struct amd_svm_iommu_ir {
+       struct list_head node;  /* Used by SVM for per-vcpu ir_list */
+       void *data;             /* Storing pointer to struct amd_ir_data */
   };
   
   #define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK  (0xFF)
@@@ -242,6 -275,10 +275,10 @@@ static int avic
   module_param(avic, int, S_IRUGO);
   #endif
   
+ /* AVIC VM ID bit masks and lock */
+ static DECLARE_BITMAP(avic_vm_id_bitmap, AVIC_VM_ID_NR);
+ static DEFINE_SPINLOCK(avic_vm_id_lock);
+ 
   static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
   static void svm_flush_tlb(struct kvm_vcpu *vcpu);
   static void svm_complete_interrupts(struct vcpu_svm *svm);
@@@ -928,6 -965,55 +965,55 @@@ static void svm_disable_lbrv(struct vcp
         set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
   }
   
+ /* Note:
+  * This hash table is used to map VM_ID to a struct kvm_arch,
+  * when handling AMD IOMMU GALOG notification to schedule in
+  * a particular vCPU.
+  */
+ #define SVM_VM_DATA_HASH_BITS 8
+ DECLARE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS);
+ static spinlock_t svm_vm_data_hash_lock;
+ 
+ /* Note:
+  * This function is called from IOMMU driver to notify
+  * SVM to schedule in a particular vCPU of a particular VM.
+  */
+ static int avic_ga_log_notifier(u32 ga_tag)
+ {
+       unsigned long flags;
+       struct kvm_arch *ka = NULL;
+       struct kvm_vcpu *vcpu = NULL;
+       u32 vm_id = AVIC_GATAG_TO_VMID(ga_tag);
+       u32 vcpu_id = AVIC_GATAG_TO_VCPUID(ga_tag);
+ 
+       pr_debug("SVM: %s: vm_id=%#x, vcpu_id=%#x\n", __func__, vm_id, vcpu_id);
+ 
+       spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
+       hash_for_each_possible(svm_vm_data_hash, ka, hnode, vm_id) {
+               struct kvm *kvm = container_of(ka, struct kvm, arch);
+               struct kvm_arch *vm_data = &kvm->arch;
+ 
+               if (vm_data->avic_vm_id != vm_id)
+                       continue;
+               vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
+               break;
+       }
+       spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
+ 
+       if (!vcpu)
+               return 0;
+ 
+       /* Note:
+        * At this point, the IOMMU should have already set the pending
+        * bit in the vAPIC backing page. So, we just need to schedule
+        * in the vcpu.
+        */
+       if (vcpu->mode == OUTSIDE_GUEST_MODE)
+               kvm_vcpu_wake_up(vcpu);
+ 
+       return 0;
+ }
+ 
   static __init int svm_hardware_setup(void)
   {
         int cpu;
@@@ -986,10 -1072,15 +1072,15 @@@
         if (avic) {
                 if (!npt_enabled ||
                     !boot_cpu_has(X86_FEATURE_AVIC) ||
-                   !IS_ENABLED(CONFIG_X86_LOCAL_APIC))
+                   !IS_ENABLED(CONFIG_X86_LOCAL_APIC)) {
                         avic = false;
-               else
+               } else {
                         pr_info("AVIC enabled\n");
+ 
+                       hash_init(svm_vm_data_hash);
+                       spin_lock_init(&svm_vm_data_hash_lock);
+                       amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
+               }
         }
   
         return 0;
@@@ -1028,13 -1119,6 +1119,6 @@@ static void init_sys_seg(struct vmcb_se
         seg->base = 0;
   }
   
- static u64 svm_read_tsc_offset(struct kvm_vcpu *vcpu)
- {
-       struct vcpu_svm *svm = to_svm(vcpu);
- 
-       return svm->vmcb->control.tsc_offset;
- }
- 
   static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
   {
         struct vcpu_svm *svm = to_svm(vcpu);
@@@ -1280,19 -1364,55 +1364,55 @@@ static int avic_init_backing_page(struc
         return 0;
   }
   
+ static inline int avic_get_next_vm_id(void)
+ {
+       int id;
+ 
+       spin_lock(&avic_vm_id_lock);
+ 
+       /* AVIC VM ID is one-based. */
+       id = find_next_zero_bit(avic_vm_id_bitmap, AVIC_VM_ID_NR, 1);
+       if (id <= AVIC_VM_ID_MASK)
+               __set_bit(id, avic_vm_id_bitmap);
+       else
+               id = -EAGAIN;
+ 
+       spin_unlock(&avic_vm_id_lock);
+       return id;
+ }
+ 
+ static inline int avic_free_vm_id(int id)
+ {
+       if (id <= 0 || id > AVIC_VM_ID_MASK)
+               return -EINVAL;
+ 
+       spin_lock(&avic_vm_id_lock);
+       __clear_bit(id, avic_vm_id_bitmap);
+       spin_unlock(&avic_vm_id_lock);
+       return 0;
+ }
+ 
   static void avic_vm_destroy(struct kvm *kvm)
   {
+       unsigned long flags;
         struct kvm_arch *vm_data = &kvm->arch;
   
+       avic_free_vm_id(vm_data->avic_vm_id);
+ 
         if (vm_data->avic_logical_id_table_page)
                 __free_page(vm_data->avic_logical_id_table_page);
         if (vm_data->avic_physical_id_table_page)
                 __free_page(vm_data->avic_physical_id_table_page);
+ 
+       spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
+       hash_del(&vm_data->hnode);
+       spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
   }
   
   static int avic_vm_init(struct kvm *kvm)
   {
-       int err = -ENOMEM;
+       unsigned long flags;
+       int vm_id, err = -ENOMEM;
         struct kvm_arch *vm_data = &kvm->arch;
         struct page *p_page;
         struct page *l_page;
@@@ -1300,6 -1420,11 +1420,11 @@@
         if (!avic)
                 return 0;
   
+       vm_id = avic_get_next_vm_id();
+       if (vm_id < 0)
+               return vm_id;
+       vm_data->avic_vm_id = (u32)vm_id;
+ 
         /* Allocating physical APIC ID table (4KB) */
         p_page = alloc_page(GFP_KERNEL);
         if (!p_page)
@@@ -1316,6 -1441,10 +1441,10 @@@
         vm_data->avic_logical_id_table_page = l_page;
         clear_page(page_address(l_page));
   
+       spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
+       hash_add(svm_vm_data_hash, &vm_data->hnode, vm_data->avic_vm_id);
+       spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
+ 
         return 0;
   
   free_avic:
@@@ -1323,31 -1452,34 +1452,34 @@@
         return err;
   }
   
- /**
-  * This function is called during VCPU halt/unhalt.
-  */
- static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
+ static inline int
+ avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
   {
-       u64 entry;
-       int h_physical_id = kvm_cpu_get_apicid(vcpu->cpu);
+       int ret = 0;
+       unsigned long flags;
+       struct amd_svm_iommu_ir *ir;
         struct vcpu_svm *svm = to_svm(vcpu);
   
-       if (!kvm_vcpu_apicv_active(vcpu))
-               return;
- 
-       svm->avic_is_running = is_run;
+       if (!kvm_arch_has_assigned_device(vcpu->kvm))
+               return 0;
   
-       /* ID = 0xff (broadcast), ID > 0xff (reserved) */
-       if (WARN_ON(h_physical_id >= AVIC_MAX_PHYSICAL_ID_COUNT))
-               return;
+       /*
+        * Here, we go through the per-vcpu ir_list to update all existing
+        * interrupt remapping table entry targeting this vcpu.
+        */
+       spin_lock_irqsave(&svm->ir_list_lock, flags);
   
-       entry = READ_ONCE(*(svm->avic_physical_id_cache));
-       WARN_ON(is_run == !!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK));
+       if (list_empty(&svm->ir_list))
+               goto out;
   
-       entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
-       if (is_run)
-               entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
-       WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
+       list_for_each_entry(ir, &svm->ir_list, node) {
+               ret = amd_iommu_update_ga(cpu, r, ir->data);
+               if (ret)
+                       break;
+       }
+ out:
+       spin_unlock_irqrestore(&svm->ir_list_lock, flags);
+       return ret;
   }
   
   static void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@@ -1374,6 -1506,8 +1506,8 @@@
                 entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
   
         WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
+       avic_update_iommu_vcpu_affinity(vcpu, h_physical_id,
+                                       svm->avic_is_running);
   }
   
   static void avic_vcpu_put(struct kvm_vcpu *vcpu)
@@@ -1385,10 -1519,27 +1519,27 @@@
                 return;
   
         entry = READ_ONCE(*(svm->avic_physical_id_cache));
+       if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)
+               avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
+ 
         entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
         WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
   }
   
+ /**
+  * This function is called during VCPU halt/unhalt.
+  */
+ static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
+ {
+       struct vcpu_svm *svm = to_svm(vcpu);
+ 
+       svm->avic_is_running = is_run;
+       if (is_run)
+               avic_vcpu_load(vcpu, vcpu->cpu);
+       else
+               avic_vcpu_put(vcpu);
+ }
+ 
   static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
   {
         struct vcpu_svm *svm = to_svm(vcpu);
@@@ -1450,6 -1601,9 +1601,9 @@@ static struct kvm_vcpu *svm_create_vcpu
                 err = avic_init_backing_page(&svm->vcpu);
                 if (err)
                         goto free_page4;
+ 
+               INIT_LIST_HEAD(&svm->ir_list);
+               spin_lock_init(&svm->ir_list_lock);
         }
   
         /* We initialize this flag to true to make sure that the is_running
@@@ -4246,6 -4400,209 +4400,209 @@@ static void svm_deliver_avic_intr(struc
                 kvm_vcpu_wake_up(vcpu);
   }
   
+ static void svm_ir_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
+ {
+       unsigned long flags;
+       struct amd_svm_iommu_ir *cur;
+ 
+       spin_lock_irqsave(&svm->ir_list_lock, flags);
+       list_for_each_entry(cur, &svm->ir_list, node) {
+               if (cur->data != pi->ir_data)
+                       continue;
+               list_del(&cur->node);
+               kfree(cur);
+               break;
+       }
+       spin_unlock_irqrestore(&svm->ir_list_lock, flags);
+ }
+ 
+ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
+ {
+       int ret = 0;
+       unsigned long flags;
+       struct amd_svm_iommu_ir *ir;
+ 
+       /**
+        * In some cases, the existing irte is updaed and re-set,
+        * so we need to check here if it's already been * added
+        * to the ir_list.
+        */
+       if (pi->ir_data && (pi->prev_ga_tag != 0)) {
+               struct kvm *kvm = svm->vcpu.kvm;
+               u32 vcpu_id = AVIC_GATAG_TO_VCPUID(pi->prev_ga_tag);
+               struct kvm_vcpu *prev_vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
+               struct vcpu_svm *prev_svm;
+ 
+               if (!prev_vcpu) {
+                       ret = -EINVAL;
+                       goto out;
+               }
+ 
+               prev_svm = to_svm(prev_vcpu);
+               svm_ir_list_del(prev_svm, pi);
+       }
+ 
+       /**
+        * Allocating new amd_iommu_pi_data, which will get
+        * add to the per-vcpu ir_list.
+        */
+       ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL);
+       if (!ir) {
+               ret = -ENOMEM;
+               goto out;
+       }
+       ir->data = pi->ir_data;
+ 
+       spin_lock_irqsave(&svm->ir_list_lock, flags);
+       list_add(&ir->node, &svm->ir_list);
+       spin_unlock_irqrestore(&svm->ir_list_lock, flags);
+ out:
+       return ret;
+ }
+ 
+ /**
+  * Note:
+  * The HW cannot support posting multicast/broadcast
+  * interrupts to a vCPU. So, we still use legacy interrupt
+  * remapping for these kind of interrupts.
+  *
+  * For lowest-priority interrupts, we only support
+  * those with single CPU as the destination, e.g. user
+  * configures the interrupts via /proc/irq or uses
+  * irqbalance to make the interrupts single-CPU.
+  */
+ static int
+ get_pi_vcpu_info(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
+                struct vcpu_data *vcpu_info, struct vcpu_svm **svm)
+ {
+       struct kvm_lapic_irq irq;
+       struct kvm_vcpu *vcpu = NULL;
+ 
+       kvm_set_msi_irq(kvm, e, &irq);
+ 
+       if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
+               pr_debug("SVM: %s: use legacy intr remap mode for irq %u\n",
+                        __func__, irq.vector);
+               return -1;
+       }
+ 
+       pr_debug("SVM: %s: use GA mode for irq %u\n", __func__,
+                irq.vector);
+       *svm = to_svm(vcpu);
+       vcpu_info->pi_desc_addr = page_to_phys((*svm)->avic_backing_page);
+       vcpu_info->vector = irq.vector;
+ 
+       return 0;
+ }
+ 
+ /*
+  * svm_update_pi_irte - set IRTE for Posted-Interrupts
+  *
+  * @kvm: kvm
+  * @host_irq: host irq of the interrupt
+  * @guest_irq: gsi of the interrupt
+  * @set: set or unset PI
+  * returns 0 on success, < 0 on failure
+  */
+ static int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
+                             uint32_t guest_irq, bool set)
+ {
+       struct kvm_kernel_irq_routing_entry *e;
+       struct kvm_irq_routing_table *irq_rt;
+       int idx, ret = -EINVAL;
+ 
+       if (!kvm_arch_has_assigned_device(kvm) ||
+           !irq_remapping_cap(IRQ_POSTING_CAP))
+               return 0;
+ 
+       pr_debug("SVM: %s: host_irq=%#x, guest_irq=%#x, set=%#x\n",
+                __func__, host_irq, guest_irq, set);
+ 
+       idx = srcu_read_lock(&kvm->irq_srcu);
+       irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
+       WARN_ON(guest_irq >= irq_rt->nr_rt_entries);
+ 
+       hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
+               struct vcpu_data vcpu_info;
+               struct vcpu_svm *svm = NULL;
+ 
+               if (e->type != KVM_IRQ_ROUTING_MSI)
+                       continue;
+ 
+               /**
+                * Here, we setup with legacy mode in the following cases:
+                * 1. When cannot target interrupt to a specific vcpu.
+                * 2. Unsetting posted interrupt.
+                * 3. APIC virtialization is disabled for the vcpu.
+                */
+               if (!get_pi_vcpu_info(kvm, e, &vcpu_info, &svm) && set &&
+                   kvm_vcpu_apicv_active(&svm->vcpu)) {
+                       struct amd_iommu_pi_data pi;
+ 
+                       /* Try to enable guest_mode in IRTE */
+                       pi.base = page_to_phys(svm->avic_backing_page) & AVIC_HPA_MASK;
+                       pi.ga_tag = AVIC_GATAG(kvm->arch.avic_vm_id,
+                                                    svm->vcpu.vcpu_id);
+                       pi.is_guest_mode = true;
+                       pi.vcpu_data = &vcpu_info;
+                       ret = irq_set_vcpu_affinity(host_irq, &pi);
+ 
+                       /**
+                        * Here, we successfully setting up vcpu affinity in
+                        * IOMMU guest mode. Now, we need to store the posted
+                        * interrupt information in a per-vcpu ir_list so that
+                        * we can reference to them directly when we update vcpu
+                        * scheduling information in IOMMU irte.
+                        */
+                       if (!ret && pi.is_guest_mode)
+                               svm_ir_list_add(svm, &pi);
+               } else {
+                       /* Use legacy mode in IRTE */
+                       struct amd_iommu_pi_data pi;
+ 
+                       /**
+                        * Here, pi is used to:
+                        * - Tell IOMMU to use legacy mode for this interrupt.
+                        * - Retrieve ga_tag of prior interrupt remapping data.
+                        */
+                       pi.is_guest_mode = false;
+                       ret = irq_set_vcpu_affinity(host_irq, &pi);
+ 
+                       /**
+                        * Check if the posted interrupt was previously
+                        * setup with the guest_mode by checking if the ga_tag
+                        * was cached. If so, we need to clean up the per-vcpu
+                        * ir_list.
+                        */
+                       if (!ret && pi.prev_ga_tag) {
+                               int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag);
+                               struct kvm_vcpu *vcpu;
+ 
+                               vcpu = kvm_get_vcpu_by_id(kvm, id);
+                               if (vcpu)
+                                       svm_ir_list_del(to_svm(vcpu), &pi);
+                       }
+               }
+ 
+               if (!ret && svm) {
+                       trace_kvm_pi_irte_update(svm->vcpu.vcpu_id,
+                                                host_irq, e->gsi,
+                                                vcpu_info.vector,
+                                                vcpu_info.pi_desc_addr, set);
+               }
+ 
+               if (ret < 0) {
+                       pr_err("%s: failed to update PI IRTE\n", __func__);
+                       goto out;
+               }
+       }
+ 
+       ret = 0;
+ out:
+       srcu_read_unlock(&kvm->irq_srcu, idx);
+       return ret;
+ }
+ 
   static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
   {
         struct vcpu_svm *svm = to_svm(vcpu);
@@@ -4961,7 -5318,7 +5318,7 @@@ static inline void avic_post_state_rest
         avic_handle_ldr_update(vcpu);
   }
   
- -static struct kvm_x86_ops svm_x86_ops = {
+ +static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
         .cpu_has_kvm_support = has_svm,
         .disabled_by_bios = is_disabled,
         .hardware_setup = svm_hardware_setup,
@@@ -5064,7 -5421,6 +5421,6 @@@
   
         .has_wbinvd_exit = svm_has_wbinvd_exit,
   
-       .read_tsc_offset = svm_read_tsc_offset,
         .write_tsc_offset = svm_write_tsc_offset,
         .adjust_tsc_offset_guest = svm_adjust_tsc_offset_guest,
         .read_l1_tsc = svm_read_l1_tsc,
@@@ -5078,6 -5434,7 +5434,7 @@@
   
         .pmu_ops = &amd_pmu_ops,
         .deliver_posted_interrupt = svm_deliver_avic_intr,
+       .update_pi_irte = svm_update_pi_irte,
   };
   
   static int __init svm_init(void)
diff --combined arch/x86/kvm/vmx.c

index 121fdf6,2577183..cf1b16d
--- 1/arch/x86/kvm/vmx.c
--- 2/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@@ -927,6 -927,8 +927,8 @@@ static unsigned long *vmx_msr_bitmap_le
   static unsigned long *vmx_msr_bitmap_longmode;
   static unsigned long *vmx_msr_bitmap_legacy_x2apic;
   static unsigned long *vmx_msr_bitmap_longmode_x2apic;
+ static unsigned long *vmx_msr_bitmap_legacy_x2apic_apicv_inactive;
+ static unsigned long *vmx_msr_bitmap_longmode_x2apic_apicv_inactive;
   static unsigned long *vmx_vmread_bitmap;
   static unsigned long *vmx_vmwrite_bitmap;
   
@@@ -939,6 -941,7 +941,7 @@@ static DEFINE_SPINLOCK(vmx_vpid_lock)
   static struct vmcs_config {
         int size;
         int order;
+       u32 basic_cap;
         u32 revision_id;
         u32 pin_based_exec_ctrl;
         u32 cpu_based_exec_ctrl;
@@@ -1215,6 -1218,11 +1218,11 @@@ static inline bool cpu_has_vmx_ple(void
                 SECONDARY_EXEC_PAUSE_LOOP_EXITING;
   }
   
+ static inline bool cpu_has_vmx_basic_inout(void)
+ {
+       return  (((u64)vmcs_config.basic_cap << 32) & VMX_BASIC_INOUT);
+ }
+ 
   static inline bool cpu_need_virtualize_apic_accesses(struct kvm_vcpu *vcpu)
   {
         return flexpriority_enabled && lapic_in_kernel(vcpu);
@@@ -2518,10 -2526,17 +2526,17 @@@ static void vmx_set_msr_bitmap(struct k
         else if (cpu_has_secondary_exec_ctrls() &&
                  (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
                   SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
-               if (is_long_mode(vcpu))
-                       msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
-               else
-                       msr_bitmap = vmx_msr_bitmap_legacy_x2apic;
+               if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) {
+                       if (is_long_mode(vcpu))
+                               msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
+                       else
+                               msr_bitmap = vmx_msr_bitmap_legacy_x2apic;
+               } else {
+                       if (is_long_mode(vcpu))
+                               msr_bitmap = vmx_msr_bitmap_longmode_x2apic_apicv_inactive;
+                       else
+                               msr_bitmap = vmx_msr_bitmap_legacy_x2apic_apicv_inactive;
+               }
         } else {
                 if (is_long_mode(vcpu))
                         msr_bitmap = vmx_msr_bitmap_longmode;
@@@ -2603,11 -2618,6 +2618,6 @@@ static u64 vmx_read_l1_tsc(struct kvm_v
         return host_tsc + tsc_offset;
   }
   
- static u64 vmx_read_tsc_offset(struct kvm_vcpu *vcpu)
- {
-       return vmcs_read64(TSC_OFFSET);
- }
- 
   /*
    * writes 'offset' into guest's timestamp counter offset register
    */
@@@ -2877,6 -2887,8 +2887,8 @@@ static int vmx_get_vmx_msr(struct kvm_v
                 *pdata = VMCS12_REVISION | VMX_BASIC_TRUE_CTLS |
                            ((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) |
                            (VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT);
+               if (cpu_has_vmx_basic_inout())
+                       *pdata |= VMX_BASIC_INOUT;
                 break;
         case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
         case MSR_IA32_VMX_PINBASED_CTLS:
@@@ -3457,7 -3469,8 +3469,8 @@@ static __init int setup_vmcs_config(str
                 return -EIO;
   
         vmcs_conf->size = vmx_msr_high & 0x1fff;
-       vmcs_conf->order = get_order(vmcs_config.size);
+       vmcs_conf->order = get_order(vmcs_conf->size);
+       vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff;
         vmcs_conf->revision_id = vmx_msr_low;
   
         vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control;
@@@ -4678,28 -4691,49 +4691,49 @@@ static void vmx_disable_intercept_for_m
                                                 msr, MSR_TYPE_R | MSR_TYPE_W);
   }
   
- static void vmx_enable_intercept_msr_read_x2apic(u32 msr)
+ static void vmx_enable_intercept_msr_read_x2apic(u32 msr, bool apicv_active)
   {
-       __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
-                       msr, MSR_TYPE_R);
-       __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
-                       msr, MSR_TYPE_R);
+       if (apicv_active) {
+               __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
+                               msr, MSR_TYPE_R);
+               __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
+                               msr, MSR_TYPE_R);
+       } else {
+               __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive,
+                               msr, MSR_TYPE_R);
+               __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive,
+                               msr, MSR_TYPE_R);
+       }
   }
   
- static void vmx_disable_intercept_msr_read_x2apic(u32 msr)
+ static void vmx_disable_intercept_msr_read_x2apic(u32 msr, bool apicv_active)
   {
-       __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
-                       msr, MSR_TYPE_R);
-       __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
-                       msr, MSR_TYPE_R);
+       if (apicv_active) {
+               __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
+                               msr, MSR_TYPE_R);
+               __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
+                               msr, MSR_TYPE_R);
+       } else {
+               __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive,
+                               msr, MSR_TYPE_R);
+               __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive,
+                               msr, MSR_TYPE_R);
+       }
   }
   
- static void vmx_disable_intercept_msr_write_x2apic(u32 msr)
+ static void vmx_disable_intercept_msr_write_x2apic(u32 msr, bool apicv_active)
   {
-       __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
-                       msr, MSR_TYPE_W);
-       __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
-                       msr, MSR_TYPE_W);
+       if (apicv_active) {
+               __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
+                               msr, MSR_TYPE_W);
+               __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
+                               msr, MSR_TYPE_W);
+       } else {
+               __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive,
+                               msr, MSR_TYPE_W);
+               __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive,
+                               msr, MSR_TYPE_W);
+       }
   }
   
   static bool vmx_get_enable_apicv(void)
@@@ -5279,29 -5313,30 +5313,30 @@@ static void vmx_inject_nmi(struct kvm_v
   {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
   
-       if (is_guest_mode(vcpu))
-               return;
+       if (!is_guest_mode(vcpu)) {
+               if (!cpu_has_virtual_nmis()) {
+                       /*
+                        * Tracking the NMI-blocked state in software is built upon
+                        * finding the next open IRQ window. This, in turn, depends on
+                        * well-behaving guests: They have to keep IRQs disabled at
+                        * least as long as the NMI handler runs. Otherwise we may
+                        * cause NMI nesting, maybe breaking the guest. But as this is
+                        * highly unlikely, we can live with the residual risk.
+                        */
+                       vmx->soft_vnmi_blocked = 1;
+                       vmx->vnmi_blocked_time = 0;
+               }
   
-       if (!cpu_has_virtual_nmis()) {
-               /*
-                * Tracking the NMI-blocked state in software is built upon
-                * finding the next open IRQ window. This, in turn, depends on
-                * well-behaving guests: They have to keep IRQs disabled at
-                * least as long as the NMI handler runs. Otherwise we may
-                * cause NMI nesting, maybe breaking the guest. But as this is
-                * highly unlikely, we can live with the residual risk.
-                */
-               vmx->soft_vnmi_blocked = 1;
-               vmx->vnmi_blocked_time = 0;
+               ++vcpu->stat.nmi_injections;
+               vmx->nmi_known_unmasked = false;
         }
   
-       ++vcpu->stat.nmi_injections;
-       vmx->nmi_known_unmasked = false;
         if (vmx->rmode.vm86_active) {
                 if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0) != EMULATE_DONE)
                         kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
                 return;
         }
+ 
         vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
                         INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
   }
@@@ -6109,7 -6144,7 +6144,7 @@@ static int handle_ept_violation(struct 
         exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
   
         gla_validity = (exit_qualification >> 7) & 0x3;
-       if (gla_validity != 0x3 && gla_validity != 0x1 && gla_validity != 0) {
+       if (gla_validity == 0x2) {
                 printk(KERN_ERR "EPT: Handling EPT violation failed!\n");
                 printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n",
                         (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS),
@@@ -6360,22 -6395,32 +6395,32 @@@ static __init int hardware_setup(void
         if (!vmx_msr_bitmap_legacy_x2apic)
                 goto out2;
   
+       vmx_msr_bitmap_legacy_x2apic_apicv_inactive =
+                               (unsigned long *)__get_free_page(GFP_KERNEL);
+       if (!vmx_msr_bitmap_legacy_x2apic_apicv_inactive)
+               goto out3;
+ 
         vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
         if (!vmx_msr_bitmap_longmode)
-               goto out3;
+               goto out4;
   
         vmx_msr_bitmap_longmode_x2apic =
                                 (unsigned long *)__get_free_page(GFP_KERNEL);
         if (!vmx_msr_bitmap_longmode_x2apic)
-               goto out4;
+               goto out5;
+ 
+       vmx_msr_bitmap_longmode_x2apic_apicv_inactive =
+                               (unsigned long *)__get_free_page(GFP_KERNEL);
+       if (!vmx_msr_bitmap_longmode_x2apic_apicv_inactive)
+               goto out6;
   
         vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
         if (!vmx_vmread_bitmap)
-               goto out6;
+               goto out7;
   
         vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
         if (!vmx_vmwrite_bitmap)
-               goto out7;
+               goto out8;
   
         memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
         memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
@@@ -6394,7 -6439,7 +6439,7 @@@
   
         if (setup_vmcs_config(&vmcs_config) < 0) {
                 r = -EIO;
-               goto out8;
+               goto out9;
         }
   
         if (boot_cpu_has(X86_FEATURE_NX))
@@@ -6461,20 -6506,35 +6506,35 @@@
                         vmx_msr_bitmap_legacy, PAGE_SIZE);
         memcpy(vmx_msr_bitmap_longmode_x2apic,
                         vmx_msr_bitmap_longmode, PAGE_SIZE);
+       memcpy(vmx_msr_bitmap_legacy_x2apic_apicv_inactive,
+                       vmx_msr_bitmap_legacy, PAGE_SIZE);
+       memcpy(vmx_msr_bitmap_longmode_x2apic_apicv_inactive,
+                       vmx_msr_bitmap_longmode, PAGE_SIZE);
   
         set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
   
+       /*
+        * enable_apicv && kvm_vcpu_apicv_active()
+        */
         for (msr = 0x800; msr <= 0x8ff; msr++)
-               vmx_disable_intercept_msr_read_x2apic(msr);
+               vmx_disable_intercept_msr_read_x2apic(msr, true);
   
         /* TMCCT */
-       vmx_enable_intercept_msr_read_x2apic(0x839);
+       vmx_enable_intercept_msr_read_x2apic(0x839, true);
         /* TPR */
-       vmx_disable_intercept_msr_write_x2apic(0x808);
+       vmx_disable_intercept_msr_write_x2apic(0x808, true);
         /* EOI */
-       vmx_disable_intercept_msr_write_x2apic(0x80b);
+       vmx_disable_intercept_msr_write_x2apic(0x80b, true);
         /* SELF-IPI */
-       vmx_disable_intercept_msr_write_x2apic(0x83f);
+       vmx_disable_intercept_msr_write_x2apic(0x83f, true);
+ 
+       /*
+        * (enable_apicv && !kvm_vcpu_apicv_active()) ||
+        *      !enable_apicv
+        */
+       /* TPR */
+       vmx_disable_intercept_msr_read_x2apic(0x808, false);
+       vmx_disable_intercept_msr_write_x2apic(0x808, false);
   
         if (enable_ept) {
                 kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK,
@@@ -6521,14 -6581,18 +6581,18 @@@
   
         return alloc_kvm_area();
   
- out8:
+ out9:
         free_page((unsigned long)vmx_vmwrite_bitmap);
- out7:
+ out8:
         free_page((unsigned long)vmx_vmread_bitmap);
+ out7:
+       free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic_apicv_inactive);
   out6:
         free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
- out4:
+ out5:
         free_page((unsigned long)vmx_msr_bitmap_longmode);
+ out4:
+       free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic_apicv_inactive);
   out3:
         free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
   out2:
@@@ -6544,7 -6608,9 +6608,9 @@@ out
   static __exit void hardware_unsetup(void)
   {
         free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
+       free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic_apicv_inactive);
         free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
+       free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic_apicv_inactive);
         free_page((unsigned long)vmx_msr_bitmap_legacy);
         free_page((unsigned long)vmx_msr_bitmap_longmode);
         free_page((unsigned long)vmx_io_bitmap_b);
@@@ -6726,7 -6792,7 +6792,7 @@@ static void nested_vmx_abort(struct kvm
   {
         /* TODO: not to reset guest simply here. */
         kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
-       pr_warn("kvm: nested vmx abort, indicator %d\n", indicator);
+       pr_debug_ratelimited("kvm: nested vmx abort, indicator %d\n", indicator);
   }
   
   static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
@@@ -7013,7 -7079,7 +7079,7 @@@ static int handle_vmon(struct kvm_vcpu 
         vmx->nested.vmcs02_num = 0;
   
         hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
-                    HRTIMER_MODE_REL);
+                    HRTIMER_MODE_REL_PINNED);
         vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
   
         vmx->nested.vmxon = true;
@@@ -8435,12 -8501,7 +8501,7 @@@ static void vmx_set_virtual_x2apic_mode
                 return;
         }
   
-       /*
-        * There is not point to enable virtualize x2apic without enable
-        * apicv
-        */
-       if (!cpu_has_vmx_virtualize_x2apic_mode() ||
-                               !kvm_vcpu_apicv_active(vcpu))
+       if (!cpu_has_vmx_virtualize_x2apic_mode())
                 return;
   
         if (!cpu_need_tpr_shadow(vcpu))
@@@ -9598,7 -9659,7 +9659,7 @@@ static int nested_vmx_check_msr_switch(
         maxphyaddr = cpuid_maxphyaddr(vcpu);
         if (!IS_ALIGNED(addr, 16) || addr >> maxphyaddr ||
             (addr + count * sizeof(struct vmx_msr_entry) - 1) >> maxphyaddr) {
-               pr_warn_ratelimited(
+               pr_debug_ratelimited(
                         "nVMX: invalid MSR switch (0x%lx, %d, %llu, 0x%08llx)",
                         addr_field, maxphyaddr, count, addr);
                 return -EINVAL;
@@@ -9671,13 -9732,13 +9732,13 @@@ static u32 nested_vmx_load_msr(struct k
         for (i = 0; i < count; i++) {
                 if (kvm_vcpu_read_guest(vcpu, gpa + i * sizeof(e),
                                         &e, sizeof(e))) {
-                       pr_warn_ratelimited(
+                       pr_debug_ratelimited(
                                 "%s cannot read MSR entry (%u, 0x%08llx)\n",
                                 __func__, i, gpa + i * sizeof(e));
                         goto fail;
                 }
                 if (nested_vmx_load_msr_check(vcpu, &e)) {
-                       pr_warn_ratelimited(
+                       pr_debug_ratelimited(
                                 "%s check failed (%u, 0x%x, 0x%x)\n",
                                 __func__, i, e.index, e.reserved);
                         goto fail;
@@@ -9685,7 -9746,7 +9746,7 @@@
                 msr.index = e.index;
                 msr.data = e.value;
                 if (kvm_set_msr(vcpu, &msr)) {
-                       pr_warn_ratelimited(
+                       pr_debug_ratelimited(
                                 "%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
                                 __func__, i, e.index, e.value);
                         goto fail;
@@@ -9706,13 -9767,13 +9767,13 @@@ static int nested_vmx_store_msr(struct 
                 if (kvm_vcpu_read_guest(vcpu,
                                         gpa + i * sizeof(e),
                                         &e, 2 * sizeof(u32))) {
-                       pr_warn_ratelimited(
+                       pr_debug_ratelimited(
                                 "%s cannot read MSR entry (%u, 0x%08llx)\n",
                                 __func__, i, gpa + i * sizeof(e));
                         return -EINVAL;
                 }
                 if (nested_vmx_store_msr_check(vcpu, &e)) {
-                       pr_warn_ratelimited(
+                       pr_debug_ratelimited(
                                 "%s check failed (%u, 0x%x, 0x%x)\n",
                                 __func__, i, e.index, e.reserved);
                         return -EINVAL;
@@@ -9720,7 -9781,7 +9781,7 @@@
                 msr_info.host_initiated = false;
                 msr_info.index = e.index;
                 if (kvm_get_msr(vcpu, &msr_info)) {
-                       pr_warn_ratelimited(
+                       pr_debug_ratelimited(
                                 "%s cannot read MSR (%u, 0x%x)\n",
                                 __func__, i, e.index);
                         return -EINVAL;
@@@ -9729,7 -9790,7 +9790,7 @@@
                                          gpa + i * sizeof(e) +
                                              offsetof(struct vmx_msr_entry, value),
                                          &msr_info.data, sizeof(msr_info.data))) {
-                       pr_warn_ratelimited(
+                       pr_debug_ratelimited(
                                 "%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
                                 __func__, i, e.index, msr_info.data);
                         return -EINVAL;
@@@ -10500,6 -10561,9 +10561,9 @@@ static void prepare_vmcs12(struct kvm_v
                 vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3);
         }
   
+       if (nested_cpu_has_ept(vmcs12))
+               vmcs12->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS);
+ 
         if (nested_cpu_has_vid(vmcs12))
                 vmcs12->guest_intr_status = vmcs_read16(GUEST_INTR_STATUS);
   
@@@ -10793,7 -10857,7 +10857,7 @@@ static void nested_vmx_vmexit(struct kv
          * We are now running in L2, mmu_notifier will force to reload the
          * page's hpa for L2 vmcs. Need to reload it for L1 before entering L1.
          */
-       kvm_vcpu_reload_apic_access_page(vcpu);
+       kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
   
         /*
          * Exiting from L2 to L1, we're now back to L1 which thinks it just
@@@ -11177,7 -11241,7 +11241,7 @@@ static void vmx_setup_mce(struct kvm_vc
                         ~FEATURE_CONTROL_LMCE;
   }
   
- -static struct kvm_x86_ops vmx_x86_ops = {
+ +static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
         .cpu_has_kvm_support = cpu_has_kvm_support,
         .disabled_by_bios = vmx_disabled_by_bios,
         .hardware_setup = hardware_setup,
@@@ -11274,7 -11338,6 +11338,6 @@@
   
         .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
   
-       .read_tsc_offset = vmx_read_tsc_offset,
         .write_tsc_offset = vmx_write_tsc_offset,
         .adjust_tsc_offset_guest = vmx_adjust_tsc_offset_guest,
         .read_l1_tsc = vmx_read_l1_tsc,
diff --combined arch/x86/kvm/x86.c

index 699f872,3ee8a91..6c633de
--- 1/arch/x86/kvm/x86.c
--- 2/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@@ -1367,7 -1367,7 +1367,7 @@@ static void kvm_track_tsc_matching(stru
   
   static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset)
   {
-       u64 curr_offset = kvm_x86_ops->read_tsc_offset(vcpu);
+       u64 curr_offset = vcpu->arch.tsc_offset;
         vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset;
   }
   
@@@ -1413,6 -1413,12 +1413,12 @@@ u64 kvm_read_l1_tsc(struct kvm_vcpu *vc
   }
   EXPORT_SYMBOL_GPL(kvm_read_l1_tsc);
   
+ static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
+ {
+       kvm_x86_ops->write_tsc_offset(vcpu, offset);
+       vcpu->arch.tsc_offset = offset;
+ }
+ 
   void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
   {
         struct kvm *kvm = vcpu->kvm;
@@@ -1425,7 -1431,7 +1431,7 @@@
   
         raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
         offset = kvm_compute_tsc_offset(vcpu, data);
-       ns = get_kernel_ns();
+       ns = ktime_get_boot_ns();
         elapsed = ns - kvm->arch.last_tsc_nsec;
   
         if (vcpu->arch.virtual_tsc_khz) {
@@@ -1522,7 -1528,7 +1528,7 @@@
   
         if (guest_cpuid_has_tsc_adjust(vcpu) && !msr->host_initiated)
                 update_ia32_tsc_adjust_msr(vcpu, offset);
-       kvm_x86_ops->write_tsc_offset(vcpu, offset);
+       kvm_vcpu_write_tsc_offset(vcpu, offset);
         raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
   
         spin_lock(&kvm->arch.pvclock_gtod_sync_lock);
@@@ -1716,6 -1722,88 +1722,88 @@@ static void kvm_gen_update_masterclock(
   #endif
   }
   
+ static u64 __get_kvmclock_ns(struct kvm *kvm)
+ {
+       struct kvm_vcpu *vcpu = kvm_get_vcpu(kvm, 0);
+       struct kvm_arch *ka = &kvm->arch;
+       s64 ns;
+ 
+       if (vcpu->arch.hv_clock.flags & PVCLOCK_TSC_STABLE_BIT) {
+               u64 tsc = kvm_read_l1_tsc(vcpu, rdtsc());
+               ns = __pvclock_read_cycles(&vcpu->arch.hv_clock, tsc);
+       } else {
+               ns = ktime_get_boot_ns() + ka->kvmclock_offset;
+       }
+ 
+       return ns;
+ }
+ 
+ u64 get_kvmclock_ns(struct kvm *kvm)
+ {
+       unsigned long flags;
+       s64 ns;
+ 
+       local_irq_save(flags);
+       ns = __get_kvmclock_ns(kvm);
+       local_irq_restore(flags);
+ 
+       return ns;
+ }
+ 
+ static void kvm_setup_pvclock_page(struct kvm_vcpu *v)
+ {
+       struct kvm_vcpu_arch *vcpu = &v->arch;
+       struct pvclock_vcpu_time_info guest_hv_clock;
+ 
+       if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time,
+               &guest_hv_clock, sizeof(guest_hv_clock))))
+               return;
+ 
+       /* This VCPU is paused, but it's legal for a guest to read another
+        * VCPU's kvmclock, so we really have to follow the specification where
+        * it says that version is odd if data is being modified, and even after
+        * it is consistent.
+        *
+        * Version field updates must be kept separate.  This is because
+        * kvm_write_guest_cached might use a "rep movs" instruction, and
+        * writes within a string instruction are weakly ordered.  So there
+        * are three writes overall.
+        *
+        * As a small optimization, only write the version field in the first
+        * and third write.  The vcpu->pv_time cache is still valid, because the
+        * version field is the first in the struct.
+        */
+       BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
+ 
+       vcpu->hv_clock.version = guest_hv_clock.version + 1;
+       kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
+                               &vcpu->hv_clock,
+                               sizeof(vcpu->hv_clock.version));
+ 
+       smp_wmb();
+ 
+       /* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
+       vcpu->hv_clock.flags |= (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED);
+ 
+       if (vcpu->pvclock_set_guest_stopped_request) {
+               vcpu->hv_clock.flags |= PVCLOCK_GUEST_STOPPED;
+               vcpu->pvclock_set_guest_stopped_request = false;
+       }
+ 
+       trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock);
+ 
+       kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
+                               &vcpu->hv_clock,
+                               sizeof(vcpu->hv_clock));
+ 
+       smp_wmb();
+ 
+       vcpu->hv_clock.version++;
+       kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
+                               &vcpu->hv_clock,
+                               sizeof(vcpu->hv_clock.version));
+ }
+ 
   static int kvm_guest_time_update(struct kvm_vcpu *v)
   {
         unsigned long flags, tgt_tsc_khz;
@@@ -1723,7 -1811,6 +1811,6 @@@
         struct kvm_arch *ka = &v->kvm->arch;
         s64 kernel_ns;
         u64 tsc_timestamp, host_tsc;
-       struct pvclock_vcpu_time_info guest_hv_clock;
         u8 pvclock_flags;
         bool use_master_clock;
   
@@@ -1752,7 -1839,7 +1839,7 @@@
         }
         if (!use_master_clock) {
                 host_tsc = rdtsc();
-               kernel_ns = get_kernel_ns();
+               kernel_ns = ktime_get_boot_ns();
         }
   
         tsc_timestamp = kvm_read_l1_tsc(v, host_tsc);
@@@ -1777,8 -1864,7 +1864,7 @@@
   
         local_irq_restore(flags);
   
-       if (!vcpu->pv_time_enabled)
-               return 0;
+       /* With all the info we got, fill in the values */
   
         if (kvm_has_tsc_control)
                 tgt_tsc_khz = kvm_scale_tsc(v, tgt_tsc_khz);
@@@ -1790,64 -1876,21 +1876,21 @@@
                 vcpu->hw_tsc_khz = tgt_tsc_khz;
         }
   
-       /* With all the info we got, fill in the values */
         vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
         vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
         vcpu->last_guest_tsc = tsc_timestamp;
   
-       if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time,
-               &guest_hv_clock, sizeof(guest_hv_clock))))
-               return 0;
- 
-       /* This VCPU is paused, but it's legal for a guest to read another
-        * VCPU's kvmclock, so we really have to follow the specification where
-        * it says that version is odd if data is being modified, and even after
-        * it is consistent.
-        *
-        * Version field updates must be kept separate.  This is because
-        * kvm_write_guest_cached might use a "rep movs" instruction, and
-        * writes within a string instruction are weakly ordered.  So there
-        * are three writes overall.
-        *
-        * As a small optimization, only write the version field in the first
-        * and third write.  The vcpu->pv_time cache is still valid, because the
-        * version field is the first in the struct.
-        */
-       BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
- 
-       vcpu->hv_clock.version = guest_hv_clock.version + 1;
-       kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
-                               &vcpu->hv_clock,
-                               sizeof(vcpu->hv_clock.version));
- 
-       smp_wmb();
- 
-       /* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
-       pvclock_flags = (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED);
- 
-       if (vcpu->pvclock_set_guest_stopped_request) {
-               pvclock_flags |= PVCLOCK_GUEST_STOPPED;
-               vcpu->pvclock_set_guest_stopped_request = false;
-       }
- 
         /* If the host uses TSC clocksource, then it is stable */
+       pvclock_flags = 0;
         if (use_master_clock)
                 pvclock_flags |= PVCLOCK_TSC_STABLE_BIT;
   
         vcpu->hv_clock.flags = pvclock_flags;
   
-       trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock);
- 
-       kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
-                               &vcpu->hv_clock,
-                               sizeof(vcpu->hv_clock));
- 
-       smp_wmb();
- 
-       vcpu->hv_clock.version++;
-       kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
-                               &vcpu->hv_clock,
-                               sizeof(vcpu->hv_clock.version));
+       if (vcpu->pv_time_enabled)
+               kvm_setup_pvclock_page(v);
+       if (v == kvm_get_vcpu(v->kvm, 0))
+               kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock);
         return 0;
   }
   
@@@ -2743,16 -2786,16 +2786,16 @@@ void kvm_arch_vcpu_load(struct kvm_vcp
                 if (tsc_delta < 0)
                         mark_tsc_unstable("KVM discovered backwards TSC");
   
- -              if (kvm_lapic_hv_timer_in_use(vcpu) &&
- -                              kvm_x86_ops->set_hv_timer(vcpu,
- -                                      kvm_get_lapic_tscdeadline_msr(vcpu)))
- -                      kvm_lapic_switch_to_sw_timer(vcpu);
                 if (check_tsc_unstable()) {
                         u64 offset = kvm_compute_tsc_offset(vcpu,
                                                 vcpu->arch.last_guest_tsc);
-                       kvm_x86_ops->write_tsc_offset(vcpu, offset);
+                       kvm_vcpu_write_tsc_offset(vcpu, offset);
                         vcpu->arch.tsc_catchup = 1;
                 }
+ +              if (kvm_lapic_hv_timer_in_use(vcpu) &&
+ +                              kvm_x86_ops->set_hv_timer(vcpu,
+ +                                      kvm_get_lapic_tscdeadline_msr(vcpu)))
+ +                      kvm_lapic_switch_to_sw_timer(vcpu);
                 /*
                  * On a host with synchronized TSC, there is no need to update
                  * kvmclock on vcpu->cpu migration
@@@ -4039,7 -4082,6 +4082,6 @@@ long kvm_arch_vm_ioctl(struct file *fil
         case KVM_SET_CLOCK: {
                 struct kvm_clock_data user_ns;
                 u64 now_ns;
-               s64 delta;
   
                 r = -EFAULT;
                 if (copy_from_user(&user_ns, argp, sizeof(user_ns)))
@@@ -4051,10 -4093,9 +4093,9 @@@
   
                 r = 0;
                 local_irq_disable();
-               now_ns = get_kernel_ns();
-               delta = user_ns.clock - now_ns;
+               now_ns = __get_kvmclock_ns(kvm);
+               kvm->arch.kvmclock_offset += user_ns.clock - now_ns;
                 local_irq_enable();
-               kvm->arch.kvmclock_offset = delta;
                 kvm_gen_update_masterclock(kvm);
                 break;
         }
@@@ -4062,10 -4103,8 +4103,8 @@@
                 struct kvm_clock_data user_ns;
                 u64 now_ns;
   
-               local_irq_disable();
-               now_ns = get_kernel_ns();
-               user_ns.clock = kvm->arch.kvmclock_offset + now_ns;
-               local_irq_enable();
+               now_ns = get_kvmclock_ns(kvm);
+               user_ns.clock = now_ns;
                 user_ns.flags = 0;
                 memset(&user_ns.pad, 0, sizeof(user_ns.pad));
   
@@@ -6700,7 -6739,6 +6739,6 @@@ static int vcpu_enter_guest(struct kvm_
   
         kvm_put_guest_xcr0(vcpu);
   
-       /* Interrupt is enabled by handle_external_intr() */
         kvm_x86_ops->handle_external_intr(vcpu);
   
         ++vcpu->stat.exits;
@@@ -7530,7 -7568,7 +7568,7 @@@ int kvm_arch_hardware_enable(void
          * before any KVM threads can be running.  Unfortunately, we can't
          * bring the TSCs fully up to date with real time, as we aren't yet far
          * enough into CPU bringup that we know how much real time has actually
-        * elapsed; our helper function, get_kernel_ns() will be using boot
+        * elapsed; our helper function, ktime_get_boot_ns() will be using boot
          * variables that haven't been updated yet.
          *
          * So we simply find the maximum observed TSC above, then record the
@@@ -7765,6 -7803,7 +7803,7 @@@ int kvm_arch_init_vm(struct kvm *kvm, u
         mutex_init(&kvm->arch.apic_map_lock);
         spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
   
+       kvm->arch.kvmclock_offset = -ktime_get_boot_ns();
         pvclock_update_vm_gtod_copy(kvm);
   
         INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn);
diff --combined drivers/iommu/amd_iommu.c

index 4025291,a7aa0e7..58fa8cc
--- 1/drivers/iommu/amd_iommu.c
--- 2/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@@ -137,6 -137,7 +137,7 @@@ struct iommu_dev_data 
         bool pri_tlp;                     /* PASID TLB required for
                                              PPR completions */
         u32 errata;                       /* Bitmap for errata to apply */
+       bool use_vapic;                   /* Enable device to use vapic mode */
   };
   
   /*
@@@ -707,14 -708,74 +708,74 @@@ static void iommu_poll_ppr_log(struct a
         }
   }
   
+ #ifdef CONFIG_IRQ_REMAP
+ static int (*iommu_ga_log_notifier)(u32);
+ 
+ int amd_iommu_register_ga_log_notifier(int (*notifier)(u32))
+ {
+       iommu_ga_log_notifier = notifier;
+ 
+       return 0;
+ }
+ EXPORT_SYMBOL(amd_iommu_register_ga_log_notifier);
+ 
+ static void iommu_poll_ga_log(struct amd_iommu *iommu)
+ {
+       u32 head, tail, cnt = 0;
+ 
+       if (iommu->ga_log == NULL)
+               return;
+ 
+       head = readl(iommu->mmio_base + MMIO_GA_HEAD_OFFSET);
+       tail = readl(iommu->mmio_base + MMIO_GA_TAIL_OFFSET);
+ 
+       while (head != tail) {
+               volatile u64 *raw;
+               u64 log_entry;
+ 
+               raw = (u64 *)(iommu->ga_log + head);
+               cnt++;
+ 
+               /* Avoid memcpy function-call overhead */
+               log_entry = *raw;
+ 
+               /* Update head pointer of hardware ring-buffer */
+               head = (head + GA_ENTRY_SIZE) % GA_LOG_SIZE;
+               writel(head, iommu->mmio_base + MMIO_GA_HEAD_OFFSET);
+ 
+               /* Handle GA entry */
+               switch (GA_REQ_TYPE(log_entry)) {
+               case GA_GUEST_NR:
+                       if (!iommu_ga_log_notifier)
+                               break;
+ 
+                       pr_debug("AMD-Vi: %s: devid=%#x, ga_tag=%#x\n",
+                                __func__, GA_DEVID(log_entry),
+                                GA_TAG(log_entry));
+ 
+                       if (iommu_ga_log_notifier(GA_TAG(log_entry)) != 0)
+                               pr_err("AMD-Vi: GA log notifier failed.\n");
+                       break;
+               default:
+                       break;
+               }
+       }
+ }
+ #endif /* CONFIG_IRQ_REMAP */
+ 
+ #define AMD_IOMMU_INT_MASK    \
+       (MMIO_STATUS_EVT_INT_MASK | \
+        MMIO_STATUS_PPR_INT_MASK | \
+        MMIO_STATUS_GALOG_INT_MASK)
+ 
   irqreturn_t amd_iommu_int_thread(int irq, void *data)
   {
         struct amd_iommu *iommu = (struct amd_iommu *) data;
         u32 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
   
-       while (status & (MMIO_STATUS_EVT_INT_MASK | MMIO_STATUS_PPR_INT_MASK)) {
-               /* Enable EVT and PPR interrupts again */
-               writel((MMIO_STATUS_EVT_INT_MASK | MMIO_STATUS_PPR_INT_MASK),
+       while (status & AMD_IOMMU_INT_MASK) {
+               /* Enable EVT and PPR and GA interrupts again */
+               writel(AMD_IOMMU_INT_MASK,
                         iommu->mmio_base + MMIO_STATUS_OFFSET);
   
                 if (status & MMIO_STATUS_EVT_INT_MASK) {
@@@ -727,6 -788,13 +788,13 @@@
                         iommu_poll_ppr_log(iommu);
                 }
   
+ #ifdef CONFIG_IRQ_REMAP
+               if (status & MMIO_STATUS_GALOG_INT_MASK) {
+                       pr_devel("AMD-Vi: Processing IOMMU GA Log\n");
+                       iommu_poll_ga_log(iommu);
+               }
+ #endif
+ 
                 /*
                  * Hardware bug: ERBT1312
                  * When re-enabling interrupt (by writing 1
@@@ -940,13 -1008,15 +1008,13 @@@ static void build_inv_irt(struct iommu_
    * Writes the command to the IOMMUs command buffer and informs the
    * hardware about the new command.
    */
- -static int iommu_queue_command_sync(struct amd_iommu *iommu,
- -                                  struct iommu_cmd *cmd,
- -                                  bool sync)
+ +static int __iommu_queue_command_sync(struct amd_iommu *iommu,
+ +                                    struct iommu_cmd *cmd,
+ +                                    bool sync)
   {
         u32 left, tail, head, next_tail;
- -      unsigned long flags;
   
   again:
- -      spin_lock_irqsave(&iommu->lock, flags);
   
         head      = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
         tail      = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
@@@ -955,14 -1025,15 +1023,14 @@@
   
         if (left <= 2) {
                 struct iommu_cmd sync_cmd;
- -              volatile u64 sem = 0;
                 int ret;
   
- -              build_completion_wait(&sync_cmd, (u64)&sem);
- -              copy_cmd_to_buffer(iommu, &sync_cmd, tail);
+ +              iommu->cmd_sem = 0;
   
- -              spin_unlock_irqrestore(&iommu->lock, flags);
+ +              build_completion_wait(&sync_cmd, (u64)&iommu->cmd_sem);
+ +              copy_cmd_to_buffer(iommu, &sync_cmd, tail);
   
- -              if ((ret = wait_on_sem(&sem)) != 0)
+ +              if ((ret = wait_on_sem(&iommu->cmd_sem)) != 0)
                         return ret;
   
                 goto again;
@@@ -973,21 -1044,9 +1041,21 @@@
         /* We need to sync now to make sure all commands are processed */
         iommu->need_sync = sync;
   
+ +      return 0;
+ +}
+ +
+ +static int iommu_queue_command_sync(struct amd_iommu *iommu,
+ +                                  struct iommu_cmd *cmd,
+ +                                  bool sync)
+ +{
+ +      unsigned long flags;
+ +      int ret;
+ +
+ +      spin_lock_irqsave(&iommu->lock, flags);
+ +      ret = __iommu_queue_command_sync(iommu, cmd, sync);
         spin_unlock_irqrestore(&iommu->lock, flags);
   
- -      return 0;
+ +      return ret;
   }
   
   static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
@@@ -1002,29 -1061,19 +1070,29 @@@
   static int iommu_completion_wait(struct amd_iommu *iommu)
   {
         struct iommu_cmd cmd;
- -      volatile u64 sem = 0;
+ +      unsigned long flags;
         int ret;
   
         if (!iommu->need_sync)
                 return 0;
   
- -      build_completion_wait(&cmd, (u64)&sem);
   
- -      ret = iommu_queue_command_sync(iommu, &cmd, false);
+ +      build_completion_wait(&cmd, (u64)&iommu->cmd_sem);
+ +
+ +      spin_lock_irqsave(&iommu->lock, flags);
+ +
+ +      iommu->cmd_sem = 0;
+ +
+ +      ret = __iommu_queue_command_sync(iommu, &cmd, false);
         if (ret)
- -              return ret;
+ +              goto out_unlock;
+ +
+ +      ret = wait_on_sem(&iommu->cmd_sem);
+ +
+ +out_unlock:
+ +      spin_unlock_irqrestore(&iommu->lock, flags);
   
- -      return wait_on_sem(&sem);
+ +      return ret;
   }
   
   static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid)
@@@ -2967,6 -3016,12 +3035,12 @@@ static void amd_iommu_detach_device(str
         if (!iommu)
                 return;
   
+ #ifdef CONFIG_IRQ_REMAP
+       if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) &&
+           (dom->type == IOMMU_DOMAIN_UNMANAGED))
+               dev_data->use_vapic = 0;
+ #endif
+ 
         iommu_completion_wait(iommu);
   }
   
@@@ -2992,6 -3047,15 +3066,15 @@@ static int amd_iommu_attach_device(stru
   
         ret = attach_device(dev, domain);
   
+ #ifdef CONFIG_IRQ_REMAP
+       if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) {
+               if (dom->type == IOMMU_DOMAIN_UNMANAGED)
+                       dev_data->use_vapic = 1;
+               else
+                       dev_data->use_vapic = 0;
+       }
+ #endif
+ 
         iommu_completion_wait(iommu);
   
         return ret;
@@@ -3530,34 -3594,6 +3613,6 @@@ EXPORT_SYMBOL(amd_iommu_device_info)
    *
    *****************************************************************************/
   
- union irte {
-       u32 val;
-       struct {
-               u32 valid       : 1,
-                   no_fault    : 1,
-                   int_type    : 3,
-                   rq_eoi      : 1,
-                   dm          : 1,
-                   rsvd_1      : 1,
-                   destination : 8,
-                   vector      : 8,
-                   rsvd_2      : 8;
-       } fields;
- };
- 
- struct irq_2_irte {
-       u16 devid; /* Device ID for IRTE table */
-       u16 index; /* Index into IRTE table*/
- };
- 
- struct amd_ir_data {
-       struct irq_2_irte                       irq_2_irte;
-       union irte                              irte_entry;
-       union {
-               struct msi_msg                  msi_entry;
-       };
- };
- 
   static struct irq_chip amd_ir_chip;
   
   #define DTE_IRQ_PHYS_ADDR_MASK        (((1ULL << 45)-1) << 6)
@@@ -3579,8 -3615,6 +3634,6 @@@ static void set_dte_irq_entry(u16 devid
         amd_iommu_dev_table[devid].data[2] = dte;
   }
   
- #define IRTE_ALLOCATED (~1U)
- 
   static struct irq_remap_table *get_irq_table(u16 devid, bool ioapic)
   {
         struct irq_remap_table *table = NULL;
@@@ -3626,13 -3660,18 +3679,18 @@@
                 goto out;
         }
   
-       memset(table->table, 0, MAX_IRQS_PER_TABLE * sizeof(u32));
+       if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
+               memset(table->table, 0,
+                      MAX_IRQS_PER_TABLE * sizeof(u32));
+       else
+               memset(table->table, 0,
+                      (MAX_IRQS_PER_TABLE * (sizeof(u64) * 2)));
   
         if (ioapic) {
                 int i;
   
                 for (i = 0; i < 32; ++i)
-                       table->table[i] = IRTE_ALLOCATED;
+                       iommu->irte_ops->set_allocated(table, i);
         }
   
         irq_lookup_table[devid] = table;
@@@ -3658,6 -3697,10 +3716,10 @@@ static int alloc_irq_index(u16 devid, i
         struct irq_remap_table *table;
         unsigned long flags;
         int index, c;
+       struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
+ 
+       if (!iommu)
+               return -ENODEV;
   
         table = get_irq_table(devid, false);
         if (!table)
@@@ -3669,14 -3712,14 +3731,14 @@@
         for (c = 0, index = table->min_index;
              index < MAX_IRQS_PER_TABLE;
              ++index) {
-               if (table->table[index] == 0)
+               if (!iommu->irte_ops->is_allocated(table, index))
                         c += 1;
                 else
                         c = 0;
   
                 if (c == count) {
                         for (; c != 0; --c)
-                               table->table[index - c + 1] = IRTE_ALLOCATED;
+                               iommu->irte_ops->set_allocated(table, index - c + 1);
   
                         index -= count - 1;
                         goto out;
@@@ -3691,7 -3734,42 +3753,42 @@@ out
         return index;
   }
   
- static int modify_irte(u16 devid, int index, union irte irte)
+ static int modify_irte_ga(u16 devid, int index, struct irte_ga *irte,
+                         struct amd_ir_data *data)
+ {
+       struct irq_remap_table *table;
+       struct amd_iommu *iommu;
+       unsigned long flags;
+       struct irte_ga *entry;
+ 
+       iommu = amd_iommu_rlookup_table[devid];
+       if (iommu == NULL)
+               return -EINVAL;
+ 
+       table = get_irq_table(devid, false);
+       if (!table)
+               return -ENOMEM;
+ 
+       spin_lock_irqsave(&table->lock, flags);
+ 
+       entry = (struct irte_ga *)table->table;
+       entry = &entry[index];
+       entry->lo.fields_remap.valid = 0;
+       entry->hi.val = irte->hi.val;
+       entry->lo.val = irte->lo.val;
+       entry->lo.fields_remap.valid = 1;
+       if (data)
+               data->ref = entry;
+ 
+       spin_unlock_irqrestore(&table->lock, flags);
+ 
+       iommu_flush_irt(iommu, devid);
+       iommu_completion_wait(iommu);
+ 
+       return 0;
+ }
+ 
+ static int modify_irte(u16 devid, int index, union irte *irte)
   {
         struct irq_remap_table *table;
         struct amd_iommu *iommu;
@@@ -3706,7 -3784,7 +3803,7 @@@
                 return -ENOMEM;
   
         spin_lock_irqsave(&table->lock, flags);
-       table->table[index] = irte.val;
+       table->table[index] = irte->val;
         spin_unlock_irqrestore(&table->lock, flags);
   
         iommu_flush_irt(iommu, devid);
@@@ -3730,13 -3808,146 +3827,146 @@@ static void free_irte(u16 devid, int in
                 return;
   
         spin_lock_irqsave(&table->lock, flags);
-       table->table[index] = 0;
+       iommu->irte_ops->clear_allocated(table, index);
         spin_unlock_irqrestore(&table->lock, flags);
   
         iommu_flush_irt(iommu, devid);
         iommu_completion_wait(iommu);
   }
   
+ static void irte_prepare(void *entry,
+                        u32 delivery_mode, u32 dest_mode,
+                        u8 vector, u32 dest_apicid, int devid)
+ {
+       union irte *irte = (union irte *) entry;
+ 
+       irte->val                = 0;
+       irte->fields.vector      = vector;
+       irte->fields.int_type    = delivery_mode;
+       irte->fields.destination = dest_apicid;
+       irte->fields.dm          = dest_mode;
+       irte->fields.valid       = 1;
+ }
+ 
+ static void irte_ga_prepare(void *entry,
+                           u32 delivery_mode, u32 dest_mode,
+                           u8 vector, u32 dest_apicid, int devid)
+ {
+       struct irte_ga *irte = (struct irte_ga *) entry;
+       struct iommu_dev_data *dev_data = search_dev_data(devid);
+ 
+       irte->lo.val                      = 0;
+       irte->hi.val                      = 0;
+       irte->lo.fields_remap.guest_mode  = dev_data ? dev_data->use_vapic : 0;
+       irte->lo.fields_remap.int_type    = delivery_mode;
+       irte->lo.fields_remap.dm          = dest_mode;
+       irte->hi.fields.vector            = vector;
+       irte->lo.fields_remap.destination = dest_apicid;
+       irte->lo.fields_remap.valid       = 1;
+ }
+ 
+ static void irte_activate(void *entry, u16 devid, u16 index)
+ {
+       union irte *irte = (union irte *) entry;
+ 
+       irte->fields.valid = 1;
+       modify_irte(devid, index, irte);
+ }
+ 
+ static void irte_ga_activate(void *entry, u16 devid, u16 index)
+ {
+       struct irte_ga *irte = (struct irte_ga *) entry;
+ 
+       irte->lo.fields_remap.valid = 1;
+       modify_irte_ga(devid, index, irte, NULL);
+ }
+ 
+ static void irte_deactivate(void *entry, u16 devid, u16 index)
+ {
+       union irte *irte = (union irte *) entry;
+ 
+       irte->fields.valid = 0;
+       modify_irte(devid, index, irte);
+ }
+ 
+ static void irte_ga_deactivate(void *entry, u16 devid, u16 index)
+ {
+       struct irte_ga *irte = (struct irte_ga *) entry;
+ 
+       irte->lo.fields_remap.valid = 0;
+       modify_irte_ga(devid, index, irte, NULL);
+ }
+ 
+ static void irte_set_affinity(void *entry, u16 devid, u16 index,
+                             u8 vector, u32 dest_apicid)
+ {
+       union irte *irte = (union irte *) entry;
+ 
+       irte->fields.vector = vector;
+       irte->fields.destination = dest_apicid;
+       modify_irte(devid, index, irte);
+ }
+ 
+ static void irte_ga_set_affinity(void *entry, u16 devid, u16 index,
+                                u8 vector, u32 dest_apicid)
+ {
+       struct irte_ga *irte = (struct irte_ga *) entry;
+       struct iommu_dev_data *dev_data = search_dev_data(devid);
+ 
+       if (!dev_data || !dev_data->use_vapic) {
+               irte->hi.fields.vector = vector;
+               irte->lo.fields_remap.destination = dest_apicid;
+               irte->lo.fields_remap.guest_mode = 0;
+               modify_irte_ga(devid, index, irte, NULL);
+       }
+ }
+ 
+ #define IRTE_ALLOCATED (~1U)
+ static void irte_set_allocated(struct irq_remap_table *table, int index)
+ {
+       table->table[index] = IRTE_ALLOCATED;
+ }
+ 
+ static void irte_ga_set_allocated(struct irq_remap_table *table, int index)
+ {
+       struct irte_ga *ptr = (struct irte_ga *)table->table;
+       struct irte_ga *irte = &ptr[index];
+ 
+       memset(&irte->lo.val, 0, sizeof(u64));
+       memset(&irte->hi.val, 0, sizeof(u64));
+       irte->hi.fields.vector = 0xff;
+ }
+ 
+ static bool irte_is_allocated(struct irq_remap_table *table, int index)
+ {
+       union irte *ptr = (union irte *)table->table;
+       union irte *irte = &ptr[index];
+ 
+       return irte->val != 0;
+ }
+ 
+ static bool irte_ga_is_allocated(struct irq_remap_table *table, int index)
+ {
+       struct irte_ga *ptr = (struct irte_ga *)table->table;
+       struct irte_ga *irte = &ptr[index];
+ 
+       return irte->hi.fields.vector != 0;
+ }
+ 
+ static void irte_clear_allocated(struct irq_remap_table *table, int index)
+ {
+       table->table[index] = 0;
+ }
+ 
+ static void irte_ga_clear_allocated(struct irq_remap_table *table, int index)
+ {
+       struct irte_ga *ptr = (struct irte_ga *)table->table;
+       struct irte_ga *irte = &ptr[index];
+ 
+       memset(&irte->lo.val, 0, sizeof(u64));
+       memset(&irte->hi.val, 0, sizeof(u64));
+ }
+ 
   static int get_devid(struct irq_alloc_info *info)
   {
         int devid = -1;
@@@ -3821,19 -4032,17 +4051,17 @@@ static void irq_remapping_prepare_irte(
   {
         struct irq_2_irte *irte_info = &data->irq_2_irte;
         struct msi_msg *msg = &data->msi_entry;
-       union irte *irte = &data->irte_entry;
         struct IO_APIC_route_entry *entry;
+       struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
+ 
+       if (!iommu)
+               return;
   
         data->irq_2_irte.devid = devid;
         data->irq_2_irte.index = index + sub_handle;
- 
-       /* Setup IRTE for IOMMU */
-       irte->val = 0;
-       irte->fields.vector      = irq_cfg->vector;
-       irte->fields.int_type    = apic->irq_delivery_mode;
-       irte->fields.destination = irq_cfg->dest_apicid;
-       irte->fields.dm          = apic->irq_dest_mode;
-       irte->fields.valid       = 1;
+       iommu->irte_ops->prepare(data->entry, apic->irq_delivery_mode,
+                                apic->irq_dest_mode, irq_cfg->vector,
+                                irq_cfg->dest_apicid, devid);
   
         switch (info->type) {
         case X86_IRQ_ALLOC_TYPE_IOAPIC:
@@@ -3864,12 -4073,32 +4092,32 @@@
         }
   }
   
+ struct amd_irte_ops irte_32_ops = {
+       .prepare = irte_prepare,
+       .activate = irte_activate,
+       .deactivate = irte_deactivate,
+       .set_affinity = irte_set_affinity,
+       .set_allocated = irte_set_allocated,
+       .is_allocated = irte_is_allocated,
+       .clear_allocated = irte_clear_allocated,
+ };
+ 
+ struct amd_irte_ops irte_128_ops = {
+       .prepare = irte_ga_prepare,
+       .activate = irte_ga_activate,
+       .deactivate = irte_ga_deactivate,
+       .set_affinity = irte_ga_set_affinity,
+       .set_allocated = irte_ga_set_allocated,
+       .is_allocated = irte_ga_is_allocated,
+       .clear_allocated = irte_ga_clear_allocated,
+ };
+ 
   static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq,
                                unsigned int nr_irqs, void *arg)
   {
         struct irq_alloc_info *info = arg;
         struct irq_data *irq_data;
-       struct amd_ir_data *data;
+       struct amd_ir_data *data = NULL;
         struct irq_cfg *cfg;
         int i, ret, devid;
         int index = -1;
@@@ -3921,6 -4150,16 +4169,16 @@@
                 if (!data)
                         goto out_free_data;
   
+               if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
+                       data->entry = kzalloc(sizeof(union irte), GFP_KERNEL);
+               else
+                       data->entry = kzalloc(sizeof(struct irte_ga),
+                                                    GFP_KERNEL);
+               if (!data->entry) {
+                       kfree(data);
+                       goto out_free_data;
+               }
+ 
                 irq_data->hwirq = (devid << 16) + i;
                 irq_data->chip_data = data;
                 irq_data->chip = &amd_ir_chip;
@@@ -3957,6 -4196,7 +4215,7 @@@ static void irq_remapping_free(struct i
                         data = irq_data->chip_data;
                         irte_info = &data->irq_2_irte;
                         free_irte(irte_info->devid, irte_info->index);
+                       kfree(data->entry);
                         kfree(data);
                 }
         }
@@@ -3968,8 -4208,11 +4227,11 @@@ static void irq_remapping_activate(stru
   {
         struct amd_ir_data *data = irq_data->chip_data;
         struct irq_2_irte *irte_info = &data->irq_2_irte;
+       struct amd_iommu *iommu = amd_iommu_rlookup_table[irte_info->devid];
   
-       modify_irte(irte_info->devid, irte_info->index, data->irte_entry);
+       if (iommu)
+               iommu->irte_ops->activate(data->entry, irte_info->devid,
+                                         irte_info->index);
   }
   
   static void irq_remapping_deactivate(struct irq_domain *domain,
@@@ -3977,10 -4220,11 +4239,11 @@@
   {
         struct amd_ir_data *data = irq_data->chip_data;
         struct irq_2_irte *irte_info = &data->irq_2_irte;
-       union irte entry;
+       struct amd_iommu *iommu = amd_iommu_rlookup_table[irte_info->devid];
   
-       entry.val = 0;
-       modify_irte(irte_info->devid, irte_info->index, data->irte_entry);
+       if (iommu)
+               iommu->irte_ops->deactivate(data->entry, irte_info->devid,
+                                           irte_info->index);
   }
   
   static struct irq_domain_ops amd_ir_domain_ops = {
@@@ -3990,6 -4234,70 +4253,70 @@@
         .deactivate = irq_remapping_deactivate,
   };
   
+ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info)
+ {
+       struct amd_iommu *iommu;
+       struct amd_iommu_pi_data *pi_data = vcpu_info;
+       struct vcpu_data *vcpu_pi_info = pi_data->vcpu_data;
+       struct amd_ir_data *ir_data = data->chip_data;
+       struct irte_ga *irte = (struct irte_ga *) ir_data->entry;
+       struct irq_2_irte *irte_info = &ir_data->irq_2_irte;
+       struct iommu_dev_data *dev_data = search_dev_data(irte_info->devid);
+ 
+       /* Note:
+        * This device has never been set up for guest mode.
+        * we should not modify the IRTE
+        */
+       if (!dev_data || !dev_data->use_vapic)
+               return 0;
+ 
+       pi_data->ir_data = ir_data;
+ 
+       /* Note:
+        * SVM tries to set up for VAPIC mode, but we are in
+        * legacy mode. So, we force legacy mode instead.
+        */
+       if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) {
+               pr_debug("AMD-Vi: %s: Fall back to using intr legacy remap\n",
+                        __func__);
+               pi_data->is_guest_mode = false;
+       }
+ 
+       iommu = amd_iommu_rlookup_table[irte_info->devid];
+       if (iommu == NULL)
+               return -EINVAL;
+ 
+       pi_data->prev_ga_tag = ir_data->cached_ga_tag;
+       if (pi_data->is_guest_mode) {
+               /* Setting */
+               irte->hi.fields.ga_root_ptr = (pi_data->base >> 12);
+               irte->hi.fields.vector = vcpu_pi_info->vector;
+               irte->lo.fields_vapic.guest_mode = 1;
+               irte->lo.fields_vapic.ga_tag = pi_data->ga_tag;
+ 
+               ir_data->cached_ga_tag = pi_data->ga_tag;
+       } else {
+               /* Un-Setting */
+               struct irq_cfg *cfg = irqd_cfg(data);
+ 
+               irte->hi.val = 0;
+               irte->lo.val = 0;
+               irte->hi.fields.vector = cfg->vector;
+               irte->lo.fields_remap.guest_mode = 0;
+               irte->lo.fields_remap.destination = cfg->dest_apicid;
+               irte->lo.fields_remap.int_type = apic->irq_delivery_mode;
+               irte->lo.fields_remap.dm = apic->irq_dest_mode;
+ 
+               /*
+                * This communicates the ga_tag back to the caller
+                * so that it can do all the necessary clean up.
+                */
+               ir_data->cached_ga_tag = 0;
+       }
+ 
+       return modify_irte_ga(irte_info->devid, irte_info->index, irte, ir_data);
+ }
+ 
   static int amd_ir_set_affinity(struct irq_data *data,
                                const struct cpumask *mask, bool force)
   {
@@@ -3997,8 -4305,12 +4324,12 @@@
         struct irq_2_irte *irte_info = &ir_data->irq_2_irte;
         struct irq_cfg *cfg = irqd_cfg(data);
         struct irq_data *parent = data->parent_data;
+       struct amd_iommu *iommu = amd_iommu_rlookup_table[irte_info->devid];
         int ret;
   
+       if (!iommu)
+               return -ENODEV;
+ 
         ret = parent->chip->irq_set_affinity(parent, mask, force);
         if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE)
                 return ret;
@@@ -4007,9 -4319,8 +4338,8 @@@
          * Atomically updates the IRTE with the new destination, vector
          * and flushes the interrupt entry cache.
          */
-       ir_data->irte_entry.fields.vector = cfg->vector;
-       ir_data->irte_entry.fields.destination = cfg->dest_apicid;
-       modify_irte(irte_info->devid, irte_info->index, ir_data->irte_entry);
+       iommu->irte_ops->set_affinity(ir_data->entry, irte_info->devid,
+                           irte_info->index, cfg->vector, cfg->dest_apicid);
   
         /*
          * After this point, all the interrupts will start arriving
@@@ -4031,6 -4342,7 +4361,7 @@@ static void ir_compose_msi_msg(struct i
   static struct irq_chip amd_ir_chip = {
         .irq_ack = ir_ack_apic_edge,
         .irq_set_affinity = amd_ir_set_affinity,
+       .irq_set_vcpu_affinity = amd_ir_set_vcpu_affinity,
         .irq_compose_msi_msg = ir_compose_msi_msg,
   };
   
@@@ -4045,4 -4357,43 +4376,43 @@@ int amd_iommu_create_irq_domain(struct 
   
         return 0;
   }
+ 
+ int amd_iommu_update_ga(int cpu, bool is_run, void *data)
+ {
+       unsigned long flags;
+       struct amd_iommu *iommu;
+       struct irq_remap_table *irt;
+       struct amd_ir_data *ir_data = (struct amd_ir_data *)data;
+       int devid = ir_data->irq_2_irte.devid;
+       struct irte_ga *entry = (struct irte_ga *) ir_data->entry;
+       struct irte_ga *ref = (struct irte_ga *) ir_data->ref;
+ 
+       if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) ||
+           !ref || !entry || !entry->lo.fields_vapic.guest_mode)
+               return 0;
+ 
+       iommu = amd_iommu_rlookup_table[devid];
+       if (!iommu)
+               return -ENODEV;
+ 
+       irt = get_irq_table(devid, false);
+       if (!irt)
+               return -ENODEV;
+ 
+       spin_lock_irqsave(&irt->lock, flags);
+ 
+       if (ref->lo.fields_vapic.guest_mode) {
+               if (cpu >= 0)
+                       ref->lo.fields_vapic.destination = cpu;
+               ref->lo.fields_vapic.is_run = is_run;
+               barrier();
+       }
+ 
+       spin_unlock_irqrestore(&irt->lock, flags);
+ 
+       iommu_flush_irt(iommu, devid);
+       iommu_completion_wait(iommu);
+       return 0;
+ }
+ EXPORT_SYMBOL(amd_iommu_update_ga);
   #endif
diff --combined drivers/iommu/amd_iommu_types.h

index 9652848,fa766ee..0d91785
--- 1/drivers/iommu/amd_iommu_types.h
--- 2/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@@ -22,6 -22,7 +22,7 @@@
   
   #include <linux/types.h>
   #include <linux/mutex.h>
+ #include <linux/msi.h>
   #include <linux/list.h>
   #include <linux/spinlock.h>
   #include <linux/pci.h>
@@@ -69,6 -70,8 +70,8 @@@
   #define MMIO_EXCL_LIMIT_OFFSET  0x0028
   #define MMIO_EXT_FEATURES     0x0030
   #define MMIO_PPR_LOG_OFFSET   0x0038
+ #define MMIO_GA_LOG_BASE_OFFSET       0x00e0
+ #define MMIO_GA_LOG_TAIL_OFFSET       0x00e8
   #define MMIO_CMD_HEAD_OFFSET  0x2000
   #define MMIO_CMD_TAIL_OFFSET  0x2008
   #define MMIO_EVT_HEAD_OFFSET  0x2010
@@@ -76,6 -79,8 +79,8 @@@
   #define MMIO_STATUS_OFFSET    0x2020
   #define MMIO_PPR_HEAD_OFFSET  0x2030
   #define MMIO_PPR_TAIL_OFFSET  0x2038
+ #define MMIO_GA_HEAD_OFFSET   0x2040
+ #define MMIO_GA_TAIL_OFFSET   0x2048
   #define MMIO_CNTR_CONF_OFFSET 0x4000
   #define MMIO_CNTR_REG_OFFSET  0x40000
   #define MMIO_REG_END_OFFSET   0x80000
@@@ -92,6 -97,7 +97,7 @@@
   #define FEATURE_GA            (1ULL<<7)
   #define FEATURE_HE            (1ULL<<8)
   #define FEATURE_PC            (1ULL<<9)
+ #define FEATURE_GAM_VAPIC     (1ULL<<21)
   
   #define FEATURE_PASID_SHIFT   32
   #define FEATURE_PASID_MASK    (0x1fULL << FEATURE_PASID_SHIFT)
@@@ -110,6 -116,9 +116,9 @@@
   #define MMIO_STATUS_EVT_INT_MASK      (1 << 1)
   #define MMIO_STATUS_COM_WAIT_INT_MASK (1 << 2)
   #define MMIO_STATUS_PPR_INT_MASK      (1 << 6)
+ #define MMIO_STATUS_GALOG_RUN_MASK    (1 << 8)
+ #define MMIO_STATUS_GALOG_OVERFLOW_MASK       (1 << 9)
+ #define MMIO_STATUS_GALOG_INT_MASK    (1 << 10)
   
   /* event logging constants */
   #define EVENT_ENTRY_SIZE      0x10
@@@ -146,6 -155,10 +155,10 @@@
   #define CONTROL_PPFINT_EN       0x0eULL
   #define CONTROL_PPR_EN          0x0fULL
   #define CONTROL_GT_EN           0x10ULL
+ #define CONTROL_GA_EN           0x11ULL
+ #define CONTROL_GAM_EN          0x19ULL
+ #define CONTROL_GALOG_EN        0x1CULL
+ #define CONTROL_GAINT_EN        0x1DULL
   
   #define CTRL_INV_TO_MASK      (7 << CONTROL_INV_TIMEOUT)
   #define CTRL_INV_TO_NONE      0
@@@ -224,6 -237,19 +237,19 @@@
   
   #define PPR_REQ_FAULT         0x01
   
+ /* Constants for GA Log handling */
+ #define GA_LOG_ENTRIES                512
+ #define GA_LOG_SIZE_SHIFT     56
+ #define GA_LOG_SIZE_512               (0x8ULL << GA_LOG_SIZE_SHIFT)
+ #define GA_ENTRY_SIZE         8
+ #define GA_LOG_SIZE           (GA_ENTRY_SIZE * GA_LOG_ENTRIES)
+ 
+ #define GA_TAG(x)             (u32)(x & 0xffffffffULL)
+ #define GA_DEVID(x)           (u16)(((x) >> 32) & 0xffffULL)
+ #define GA_REQ_TYPE(x)                (((x) >> 60) & 0xfULL)
+ 
+ #define GA_GUEST_NR           0x1
+ 
   #define PAGE_MODE_NONE    0x00
   #define PAGE_MODE_1_LEVEL 0x01
   #define PAGE_MODE_2_LEVEL 0x02
@@@ -329,6 -355,12 +355,12 @@@
   #define IOMMU_CAP_NPCACHE 26
   #define IOMMU_CAP_EFR     27
   
+ /* IOMMU Feature Reporting Field (for IVHD type 10h */
+ #define IOMMU_FEAT_GASUP_SHIFT        6
+ 
+ /* IOMMU Extended Feature Register (EFR) */
+ #define IOMMU_EFR_GASUP_SHIFT 7
+ 
   #define MAX_DOMAIN_ID 65536
   
   /* Protection domain flags */
@@@ -400,6 -432,7 +432,7 @@@ struct amd_iommu_fault 
   
   struct iommu_domain;
   struct irq_domain;
+ struct amd_irte_ops;
   
   /*
    * This structure contains generic data for  IOMMU protection domains
@@@ -490,6 -523,12 +523,12 @@@ struct amd_iommu 
         /* Base of the PPR log, if present */
         u8 *ppr_log;
   
+       /* Base of the GA log, if present */
+       u8 *ga_log;
+ 
+       /* Tail of the GA log, if present */
+       u8 *ga_log_tail;
+ 
         /* true if interrupts for this IOMMU are already enabled */
         bool int_enabled;
   
@@@ -523,9 -562,9 +562,11 @@@
   #ifdef CONFIG_IRQ_REMAP
         struct irq_domain *ir_domain;
         struct irq_domain *msi_domain;
+ 
+       struct amd_irte_ops *irte_ops;
   #endif
+ +
+ +      volatile u64 __aligned(8) cmd_sem;
   };
   
   #define ACPIHID_UID_LEN 256
@@@ -683,4 -722,112 +724,112 @@@ static inline int get_hpet_devid(int id
         return -EINVAL;
   }
   
+ enum amd_iommu_intr_mode_type {
+       AMD_IOMMU_GUEST_IR_LEGACY,
+ 
+       /* This mode is not visible to users. It is used when
+        * we cannot fully enable vAPIC and fallback to only support
+        * legacy interrupt remapping via 128-bit IRTE.
+        */
+       AMD_IOMMU_GUEST_IR_LEGACY_GA,
+       AMD_IOMMU_GUEST_IR_VAPIC,
+ };
+ 
+ #define AMD_IOMMU_GUEST_IR_GA(x)      (x == AMD_IOMMU_GUEST_IR_VAPIC || \
+                                        x == AMD_IOMMU_GUEST_IR_LEGACY_GA)
+ 
+ #define AMD_IOMMU_GUEST_IR_VAPIC(x)   (x == AMD_IOMMU_GUEST_IR_VAPIC)
+ 
+ union irte {
+       u32 val;
+       struct {
+               u32 valid       : 1,
+                   no_fault    : 1,
+                   int_type    : 3,
+                   rq_eoi      : 1,
+                   dm          : 1,
+                   rsvd_1      : 1,
+                   destination : 8,
+                   vector      : 8,
+                   rsvd_2      : 8;
+       } fields;
+ };
+ 
+ union irte_ga_lo {
+       u64 val;
+ 
+       /* For int remapping */
+       struct {
+               u64 valid       : 1,
+                   no_fault    : 1,
+                   /* ------ */
+                   int_type    : 3,
+                   rq_eoi      : 1,
+                   dm          : 1,
+                   /* ------ */
+                   guest_mode  : 1,
+                   destination : 8,
+                   rsvd        : 48;
+       } fields_remap;
+ 
+       /* For guest vAPIC */
+       struct {
+               u64 valid       : 1,
+                   no_fault    : 1,
+                   /* ------ */
+                   ga_log_intr : 1,
+                   rsvd1       : 3,
+                   is_run      : 1,
+                   /* ------ */
+                   guest_mode  : 1,
+                   destination : 8,
+                   rsvd2       : 16,
+                   ga_tag      : 32;
+       } fields_vapic;
+ };
+ 
+ union irte_ga_hi {
+       u64 val;
+       struct {
+               u64 vector      : 8,
+                   rsvd_1      : 4,
+                   ga_root_ptr : 40,
+                   rsvd_2      : 12;
+       } fields;
+ };
+ 
+ struct irte_ga {
+       union irte_ga_lo lo;
+       union irte_ga_hi hi;
+ };
+ 
+ struct irq_2_irte {
+       u16 devid; /* Device ID for IRTE table */
+       u16 index; /* Index into IRTE table*/
+ };
+ 
+ struct amd_ir_data {
+       u32 cached_ga_tag;
+       struct irq_2_irte irq_2_irte;
+       struct msi_msg msi_entry;
+       void *entry;    /* Pointer to union irte or struct irte_ga */
+       void *ref;      /* Pointer to the actual irte */
+ };
+ 
+ struct amd_irte_ops {
+       void (*prepare)(void *, u32, u32, u8, u32, int);
+       void (*activate)(void *, u16, u16);
+       void (*deactivate)(void *, u16, u16);
+       void (*set_affinity)(void *, u16, u16, u8, u32);
+       void *(*get)(struct irq_remap_table *, int);
+       void (*set_allocated)(struct irq_remap_table *, int);
+       bool (*is_allocated)(struct irq_remap_table *, int);
+       void (*clear_allocated)(struct irq_remap_table *, int);
+ };
+ 
+ #ifdef CONFIG_IRQ_REMAP
+ extern struct amd_irte_ops irte_32_ops;
+ extern struct amd_irte_ops irte_128_ops;
+ #endif
+ 
   #endif /* _ASM_X86_AMD_IOMMU_TYPES_H */
author	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 6 Oct 2016 17:49:01 +0000 (10:49 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 6 Oct 2016 17:49:01 +0000 (10:49 -0700)
		1	2
Documentation/kernel-parameters.txt	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm/include/asm/arch_gicv3.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm/include/asm/cputype.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm/kvm/arm.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm/kvm/mmu.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm64/include/asm/arch_gicv3.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm64/include/asm/kvm_mmu.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/platforms/powernv/pci-ioda.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/s390/kvm/kvm-s390.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/svm.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/vmx.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/x86.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/iommu/amd_iommu.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/iommu/amd_iommu_types.h	patch \|	diff1 \|	diff2 \|	blob \| history