Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 19 Dec 2014 00:05:28 +0000 (16:05 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 19 Dec 2014 00:05:28 +0000 (16:05 -0800)
Pull KVM update from Paolo Bonzini:
 "3.19 changes for KVM:

   - spring cleaning: removed support for IA64, and for hardware-
     assisted virtualization on the PPC970

   - ARM, PPC, s390 all had only small fixes

  For x86:
   - small performance improvements (though only on weird guests)
   - usual round of hardware-compliancy fixes from Nadav
   - APICv fixes
   - XSAVES support for hosts and guests.  XSAVES hosts were broken
     because the (non-KVM) XSAVES patches inadvertently changed the KVM
     userspace ABI whenever XSAVES was enabled; hence, this part is
     going to stable.  Guest support is just a matter of exposing the
     feature and CPUID leaves support"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (179 commits)
  KVM: move APIC types to arch/x86/
  KVM: PPC: Book3S: Enable in-kernel XICS emulation by default
  KVM: PPC: Book3S HV: Improve H_CONFER implementation
  KVM: PPC: Book3S HV: Fix endianness of instruction obtained from HEIR register
  KVM: PPC: Book3S HV: Remove code for PPC970 processors
  KVM: PPC: Book3S HV: Tracepoints for KVM HV guest interactions
  KVM: PPC: Book3S HV: Simplify locking around stolen time calculations
  arch: powerpc: kvm: book3s_paired_singles.c: Remove unused function
  arch: powerpc: kvm: book3s_pr.c: Remove unused function
  arch: powerpc: kvm: book3s.c: Remove some unused functions
  arch: powerpc: kvm: book3s_32_mmu.c: Remove unused function
  KVM: PPC: Book3S HV: Check wait conditions before sleeping in kvmppc_vcore_blocked
  KVM: PPC: Book3S HV: ptes are big endian
  KVM: PPC: Book3S HV: Fix inaccuracies in ICP emulation for H_IPI
  KVM: PPC: Book3S HV: Fix KSM memory corruption
  KVM: PPC: Book3S HV: Fix an issue where guest is paused on receiving HMI
  KVM: PPC: Book3S HV: Fix computation of tlbie operand
  KVM: PPC: Book3S HV: Add missing HPTE unlock
  KVM: PPC: BookE: Improve irq inject tracepoint
  arm/arm64: KVM: Require in-kernel vgic for the arch timers
  ...

119 files changed:
Documentation/ia64/kvm.txt [deleted file]
Documentation/virtual/kvm/api.txt
Documentation/virtual/kvm/devices/vm.txt
Documentation/virtual/kvm/msr.txt
MAINTAINERS
arch/arm/include/asm/kvm_emulate.h
arch/arm/include/asm/kvm_host.h
arch/arm/include/asm/kvm_mmu.h
arch/arm/kvm/arm.c
arch/arm/kvm/guest.c
arch/arm/kvm/mmio.c
arch/arm/kvm/mmu.c
arch/arm/kvm/psci.c
arch/arm64/include/asm/kvm_emulate.h
arch/arm64/include/asm/kvm_host.h
arch/arm64/include/asm/kvm_mmu.h
arch/arm64/kvm/guest.c
arch/ia64/Kconfig
arch/ia64/Makefile
arch/ia64/include/asm/kvm_host.h [deleted file]
arch/ia64/include/asm/pvclock-abi.h [deleted file]
arch/ia64/include/uapi/asm/kvm.h [deleted file]
arch/ia64/kvm/Kconfig [deleted file]
arch/ia64/kvm/Makefile [deleted file]
arch/ia64/kvm/asm-offsets.c [deleted file]
arch/ia64/kvm/irq.h [deleted file]
arch/ia64/kvm/kvm-ia64.c [deleted file]
arch/ia64/kvm/kvm_fw.c [deleted file]
arch/ia64/kvm/kvm_lib.c [deleted file]
arch/ia64/kvm/kvm_minstate.h [deleted file]
arch/ia64/kvm/lapic.h [deleted file]
arch/ia64/kvm/memcpy.S [deleted file]
arch/ia64/kvm/memset.S [deleted file]
arch/ia64/kvm/misc.h [deleted file]
arch/ia64/kvm/mmio.c [deleted file]
arch/ia64/kvm/optvfault.S [deleted file]
arch/ia64/kvm/process.c [deleted file]
arch/ia64/kvm/trampoline.S [deleted file]
arch/ia64/kvm/vcpu.c [deleted file]
arch/ia64/kvm/vcpu.h [deleted file]
arch/ia64/kvm/vmm.c [deleted file]
arch/ia64/kvm/vmm_ivt.S [deleted file]
arch/ia64/kvm/vti.h [deleted file]
arch/ia64/kvm/vtlb.c [deleted file]
arch/powerpc/include/asm/kvm_book3s.h
arch/powerpc/include/asm/kvm_book3s_64.h
arch/powerpc/include/asm/kvm_host.h
arch/powerpc/include/asm/kvm_ppc.h
arch/powerpc/kernel/asm-offsets.c
arch/powerpc/kvm/Kconfig
arch/powerpc/kvm/book3s.c
arch/powerpc/kvm/book3s_32_mmu.c
arch/powerpc/kvm/book3s_64_mmu_hv.c
arch/powerpc/kvm/book3s_hv.c
arch/powerpc/kvm/book3s_hv_builtin.c
arch/powerpc/kvm/book3s_hv_interrupts.S
arch/powerpc/kvm/book3s_hv_ras.c
arch/powerpc/kvm/book3s_hv_rm_mmu.c
arch/powerpc/kvm/book3s_hv_rm_xics.c
arch/powerpc/kvm/book3s_hv_rmhandlers.S
arch/powerpc/kvm/book3s_paired_singles.c
arch/powerpc/kvm/book3s_pr.c
arch/powerpc/kvm/book3s_xics.c
arch/powerpc/kvm/book3s_xics.h
arch/powerpc/kvm/e500.c
arch/powerpc/kvm/powerpc.c
arch/powerpc/kvm/trace_book3s.h [new file with mode: 0644]
arch/powerpc/kvm/trace_booke.h
arch/powerpc/kvm/trace_hv.h [new file with mode: 0644]
arch/powerpc/kvm/trace_pr.h
arch/s390/include/asm/kvm_host.h
arch/s390/include/asm/pgalloc.h
arch/s390/include/asm/sigp.h
arch/s390/kvm/gaccess.c
arch/s390/kvm/intercept.c
arch/s390/kvm/interrupt.c
arch/s390/kvm/kvm-s390.c
arch/s390/kvm/kvm-s390.h
arch/s390/kvm/priv.c
arch/s390/kvm/sigp.c
arch/s390/mm/pgtable.c
arch/x86/include/asm/kvm_host.h
arch/x86/include/asm/vmx.h
arch/x86/include/asm/xsave.h
arch/x86/include/uapi/asm/vmx.h
arch/x86/kernel/kvm.c
arch/x86/kernel/kvmclock.c
arch/x86/kernel/xsave.c
arch/x86/kvm/Makefile
arch/x86/kvm/assigned-dev.c [new file with mode: 0644]
arch/x86/kvm/assigned-dev.h [new file with mode: 0644]
arch/x86/kvm/cpuid.c
arch/x86/kvm/emulate.c
arch/x86/kvm/ioapic.c [new file with mode: 0644]
arch/x86/kvm/ioapic.h [new file with mode: 0644]
arch/x86/kvm/iommu.c [new file with mode: 0644]
arch/x86/kvm/irq_comm.c [new file with mode: 0644]
arch/x86/kvm/lapic.c
arch/x86/kvm/lapic.h
arch/x86/kvm/mmu.c
arch/x86/kvm/svm.c
arch/x86/kvm/trace.h
arch/x86/kvm/vmx.c
arch/x86/kvm/x86.c
arch/x86/kvm/x86.h
include/kvm/arm_arch_timer.h
include/kvm/arm_vgic.h
include/linux/kvm_host.h
include/linux/kvm_types.h
include/uapi/linux/kvm.h
virt/kvm/arm/arch_timer.c
virt/kvm/arm/vgic.c
virt/kvm/assigned-dev.c [deleted file]
virt/kvm/eventfd.c
virt/kvm/ioapic.c [deleted file]
virt/kvm/ioapic.h [deleted file]
virt/kvm/iommu.c [deleted file]
virt/kvm/irq_comm.c [deleted file]
virt/kvm/kvm_main.c

diff --git a/Documentation/ia64/kvm.txt b/Documentation/ia64/kvm.txt
deleted file mode 100644 (file)
index ffb5c80..0000000
+++ /dev/null
@@ -1,83 +0,0 @@
-Currently, kvm module is in EXPERIMENTAL stage on IA64. This means that
-interfaces are not stable enough to use. So, please don't run critical
-applications in virtual machine.
-We will try our best to improve it in future versions!
-
-                               Guide: How to boot up guests on kvm/ia64
-
-This guide is to describe how to enable kvm support for IA-64 systems.
-
-1. Get the kvm source from git.kernel.org.
-       Userspace source:
-               git clone git://git.kernel.org/pub/scm/virt/kvm/kvm-userspace.git
-       Kernel Source:
-               git clone git://git.kernel.org/pub/scm/linux/kernel/git/xiantao/kvm-ia64.git
-
-2. Compile the source code.
-       2.1 Compile userspace code:
-               (1)cd ./kvm-userspace
-               (2)./configure
-               (3)cd kernel
-               (4)make sync LINUX= $kernel_dir (kernel_dir is the directory of kernel source.)
-               (5)cd ..
-               (6)make qemu
-               (7)cd qemu; make install
-
-       2.2 Compile kernel source code:
-               (1) cd ./$kernel_dir
-               (2) Make menuconfig
-               (3) Enter into virtualization option, and choose kvm.
-               (4) make
-               (5) Once (4) done, make modules_install
-               (6) Make initrd, and use new kernel to reboot up host machine.
-               (7) Once (6) done, cd $kernel_dir/arch/ia64/kvm
-               (8) insmod kvm.ko; insmod kvm-intel.ko
-
-Note: For step 2, please make sure that host page size == TARGET_PAGE_SIZE of qemu, otherwise, may fail.
-
-3. Get Guest Firmware named as Flash.fd, and put it under right place:
-       (1) If you have the guest firmware (binary) released by Intel Corp for Xen, use it directly.
-
-       (2) If you have no firmware at hand, Please download its source from
-               hg clone http://xenbits.xensource.com/ext/efi-vfirmware.hg
-           you can get the firmware's binary in the directory of efi-vfirmware.hg/binaries.
-
-       (3) Rename the firmware you owned to Flash.fd, and copy it to /usr/local/share/qemu
-
-4. Boot up Linux or Windows guests:
-       4.1 Create or install a image for guest boot. If you have xen experience, it should be easy.
-
-       4.2 Boot up guests use the following command.
-               /usr/local/bin/qemu-system-ia64 -smp xx -m 512 -hda $your_image
-               (xx is the number of virtual processors for the guest, now the maximum value is 4)
-
-5. Known possible issue on some platforms with old Firmware.
-
-In the event of strange host crash issues, try to solve it through either of the following ways:
-
-(1): Upgrade your Firmware to the latest one.
-
-(2): Applying the below patch to kernel source.
-diff --git a/arch/ia64/kernel/pal.S b/arch/ia64/kernel/pal.S
-index 0b53344..f02b0f7 100644
---- a/arch/ia64/kernel/pal.S
-+++ b/arch/ia64/kernel/pal.S
-@@ -84,7 +84,8 @@ GLOBAL_ENTRY(ia64_pal_call_static)
-       mov ar.pfs = loc1
-       mov rp = loc0
-       ;;
--      srlz.d                          // serialize restoration of psr.l
-+      srlz.i                  // serialize restoration of psr.l
-+      ;;
-       br.ret.sptk.many b0
- END(ia64_pal_call_static)
-
-6. Bug report:
-       If you found any issues when use kvm/ia64, Please post the bug info to kvm-ia64-devel mailing list.
-       https://lists.sourceforge.net/lists/listinfo/kvm-ia64-devel/
-
-Thanks for your interest! Let's work together, and make kvm/ia64 stronger and stronger!
-
-
-                                                               Xiantao Zhang <xiantao.zhang@intel.com>
-                                                                                       2008.3.10
index 7610eaa..0007fef 100644 (file)
@@ -68,9 +68,12 @@ description:
 
   Capability: which KVM extension provides this ioctl.  Can be 'basic',
       which means that is will be provided by any kernel that supports
-      API version 12 (see section 4.1), or a KVM_CAP_xyz constant, which
+      API version 12 (see section 4.1), a KVM_CAP_xyz constant, which
       means availability needs to be checked with KVM_CHECK_EXTENSION
-      (see section 4.4).
+      (see section 4.4), or 'none' which means that while not all kernels
+      support this ioctl, there's no capability bit to check its
+      availability: for kernels that don't support the ioctl,
+      the ioctl returns -ENOTTY.
 
   Architectures: which instruction set architectures provide this ioctl.
       x86 includes both i386 and x86_64.
@@ -604,7 +607,7 @@ struct kvm_fpu {
 4.24 KVM_CREATE_IRQCHIP
 
 Capability: KVM_CAP_IRQCHIP, KVM_CAP_S390_IRQCHIP (s390)
-Architectures: x86, ia64, ARM, arm64, s390
+Architectures: x86, ARM, arm64, s390
 Type: vm ioctl
 Parameters: none
 Returns: 0 on success, -1 on error
@@ -612,7 +615,7 @@ Returns: 0 on success, -1 on error
 Creates an interrupt controller model in the kernel.  On x86, creates a virtual
 ioapic, a virtual PIC (two PICs, nested), and sets up future vcpus to have a
 local APIC.  IRQ routing for GSIs 0-15 is set to both PIC and IOAPIC; GSI 16-23
-only go to the IOAPIC.  On ia64, a IOSAPIC is created. On ARM/arm64, a GIC is
+only go to the IOAPIC.  On ARM/arm64, a GIC is
 created. On s390, a dummy irq routing table is created.
 
 Note that on s390 the KVM_CAP_S390_IRQCHIP vm capability needs to be enabled
@@ -622,7 +625,7 @@ before KVM_CREATE_IRQCHIP can be used.
 4.25 KVM_IRQ_LINE
 
 Capability: KVM_CAP_IRQCHIP
-Architectures: x86, ia64, arm, arm64
+Architectures: x86, arm, arm64
 Type: vm ioctl
 Parameters: struct kvm_irq_level
 Returns: 0 on success, -1 on error
@@ -676,7 +679,7 @@ struct kvm_irq_level {
 4.26 KVM_GET_IRQCHIP
 
 Capability: KVM_CAP_IRQCHIP
-Architectures: x86, ia64
+Architectures: x86
 Type: vm ioctl
 Parameters: struct kvm_irqchip (in/out)
 Returns: 0 on success, -1 on error
@@ -698,7 +701,7 @@ struct kvm_irqchip {
 4.27 KVM_SET_IRQCHIP
 
 Capability: KVM_CAP_IRQCHIP
-Architectures: x86, ia64
+Architectures: x86
 Type: vm ioctl
 Parameters: struct kvm_irqchip (in)
 Returns: 0 on success, -1 on error
@@ -991,7 +994,7 @@ for vm-wide capabilities.
 4.38 KVM_GET_MP_STATE
 
 Capability: KVM_CAP_MP_STATE
-Architectures: x86, ia64, s390
+Architectures: x86, s390
 Type: vcpu ioctl
 Parameters: struct kvm_mp_state (out)
 Returns: 0 on success; -1 on error
@@ -1005,16 +1008,15 @@ uniprocessor guests).
 
 Possible values are:
 
- - KVM_MP_STATE_RUNNABLE:        the vcpu is currently running [x86, ia64]
+ - KVM_MP_STATE_RUNNABLE:        the vcpu is currently running [x86]
  - KVM_MP_STATE_UNINITIALIZED:   the vcpu is an application processor (AP)
-                                 which has not yet received an INIT signal [x86,
-                                 ia64]
+                                 which has not yet received an INIT signal [x86]
  - KVM_MP_STATE_INIT_RECEIVED:   the vcpu has received an INIT signal, and is
-                                 now ready for a SIPI [x86, ia64]
+                                 now ready for a SIPI [x86]
  - KVM_MP_STATE_HALTED:          the vcpu has executed a HLT instruction and
-                                 is waiting for an interrupt [x86, ia64]
+                                 is waiting for an interrupt [x86]
  - KVM_MP_STATE_SIPI_RECEIVED:   the vcpu has just received a SIPI (vector
-                                 accessible via KVM_GET_VCPU_EVENTS) [x86, ia64]
+                                 accessible via KVM_GET_VCPU_EVENTS) [x86]
  - KVM_MP_STATE_STOPPED:         the vcpu is stopped [s390]
  - KVM_MP_STATE_CHECK_STOP:      the vcpu is in a special error state [s390]
  - KVM_MP_STATE_OPERATING:       the vcpu is operating (running or halted)
@@ -1022,7 +1024,7 @@ Possible values are:
  - KVM_MP_STATE_LOAD:            the vcpu is in a special load/startup state
                                  [s390]
 
-On x86 and ia64, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
+On x86, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
 in-kernel irqchip, the multiprocessing state must be maintained by userspace on
 these architectures.
 
@@ -1030,7 +1032,7 @@ these architectures.
 4.39 KVM_SET_MP_STATE
 
 Capability: KVM_CAP_MP_STATE
-Architectures: x86, ia64, s390
+Architectures: x86, s390
 Type: vcpu ioctl
 Parameters: struct kvm_mp_state (in)
 Returns: 0 on success; -1 on error
@@ -1038,7 +1040,7 @@ Returns: 0 on success; -1 on error
 Sets the vcpu's current "multiprocessing state"; see KVM_GET_MP_STATE for
 arguments.
 
-On x86 and ia64, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
+On x86, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
 in-kernel irqchip, the multiprocessing state must be maintained by userspace on
 these architectures.
 
@@ -1065,7 +1067,7 @@ documentation when it pops into existence).
 4.41 KVM_SET_BOOT_CPU_ID
 
 Capability: KVM_CAP_SET_BOOT_CPU_ID
-Architectures: x86, ia64
+Architectures: x86
 Type: vm ioctl
 Parameters: unsigned long vcpu_id
 Returns: 0 on success, -1 on error
@@ -1257,8 +1259,8 @@ The flags bitmap is defined as:
 
 4.48 KVM_ASSIGN_PCI_DEVICE
 
-Capability: KVM_CAP_DEVICE_ASSIGNMENT
-Architectures: x86 ia64
+Capability: none
+Architectures: x86
 Type: vm ioctl
 Parameters: struct kvm_assigned_pci_dev (in)
 Returns: 0 on success, -1 on error
@@ -1298,25 +1300,36 @@ Only PCI header type 0 devices with PCI BAR resources are supported by
 device assignment.  The user requesting this ioctl must have read/write
 access to the PCI sysfs resource files associated with the device.
 
+Errors:
+  ENOTTY: kernel does not support this ioctl
+
+  Other error conditions may be defined by individual device types or
+  have their standard meanings.
+
 
 4.49 KVM_DEASSIGN_PCI_DEVICE
 
-Capability: KVM_CAP_DEVICE_DEASSIGNMENT
-Architectures: x86 ia64
+Capability: none
+Architectures: x86
 Type: vm ioctl
 Parameters: struct kvm_assigned_pci_dev (in)
 Returns: 0 on success, -1 on error
 
 Ends PCI device assignment, releasing all associated resources.
 
-See KVM_CAP_DEVICE_ASSIGNMENT for the data structure. Only assigned_dev_id is
+See KVM_ASSIGN_PCI_DEVICE for the data structure. Only assigned_dev_id is
 used in kvm_assigned_pci_dev to identify the device.
 
+Errors:
+  ENOTTY: kernel does not support this ioctl
+
+  Other error conditions may be defined by individual device types or
+  have their standard meanings.
 
 4.50 KVM_ASSIGN_DEV_IRQ
 
 Capability: KVM_CAP_ASSIGN_DEV_IRQ
-Architectures: x86 ia64
+Architectures: x86
 Type: vm ioctl
 Parameters: struct kvm_assigned_irq (in)
 Returns: 0 on success, -1 on error
@@ -1346,11 +1359,17 @@ The following flags are defined:
 It is not valid to specify multiple types per host or guest IRQ. However, the
 IRQ type of host and guest can differ or can even be null.
 
+Errors:
+  ENOTTY: kernel does not support this ioctl
+
+  Other error conditions may be defined by individual device types or
+  have their standard meanings.
+
 
 4.51 KVM_DEASSIGN_DEV_IRQ
 
 Capability: KVM_CAP_ASSIGN_DEV_IRQ
-Architectures: x86 ia64
+Architectures: x86
 Type: vm ioctl
 Parameters: struct kvm_assigned_irq (in)
 Returns: 0 on success, -1 on error
@@ -1365,7 +1384,7 @@ KVM_ASSIGN_DEV_IRQ. Partial deassignment of host or guest IRQ is allowed.
 4.52 KVM_SET_GSI_ROUTING
 
 Capability: KVM_CAP_IRQ_ROUTING
-Architectures: x86 ia64 s390
+Architectures: x86 s390
 Type: vm ioctl
 Parameters: struct kvm_irq_routing (in)
 Returns: 0 on success, -1 on error
@@ -1423,8 +1442,8 @@ struct kvm_irq_routing_s390_adapter {
 
 4.53 KVM_ASSIGN_SET_MSIX_NR
 
-Capability: KVM_CAP_DEVICE_MSIX
-Architectures: x86 ia64
+Capability: none
+Architectures: x86
 Type: vm ioctl
 Parameters: struct kvm_assigned_msix_nr (in)
 Returns: 0 on success, -1 on error
@@ -1445,8 +1464,8 @@ struct kvm_assigned_msix_nr {
 
 4.54 KVM_ASSIGN_SET_MSIX_ENTRY
 
-Capability: KVM_CAP_DEVICE_MSIX
-Architectures: x86 ia64
+Capability: none
+Architectures: x86
 Type: vm ioctl
 Parameters: struct kvm_assigned_msix_entry (in)
 Returns: 0 on success, -1 on error
@@ -1461,6 +1480,12 @@ struct kvm_assigned_msix_entry {
        __u16 padding[3];
 };
 
+Errors:
+  ENOTTY: kernel does not support this ioctl
+
+  Other error conditions may be defined by individual device types or
+  have their standard meanings.
+
 
 4.55 KVM_SET_TSC_KHZ
 
@@ -2453,9 +2478,15 @@ return ENOEXEC for that vcpu.
 Note that because some registers reflect machine topology, all vcpus
 should be created before this ioctl is invoked.
 
+Userspace can call this function multiple times for a given vcpu, including
+after the vcpu has been run. This will reset the vcpu to its initial
+state. All calls to this function after the initial call must use the same
+target and same set of feature flags, otherwise EINVAL will be returned.
+
 Possible features:
        - KVM_ARM_VCPU_POWER_OFF: Starts the CPU in a power-off state.
-         Depends on KVM_CAP_ARM_PSCI.
+         Depends on KVM_CAP_ARM_PSCI.  If not set, the CPU will be powered on
+         and execute guest code when KVM_RUN is called.
        - KVM_ARM_VCPU_EL1_32BIT: Starts the CPU in a 32bit mode.
          Depends on KVM_CAP_ARM_EL1_32BIT (arm64 only).
        - KVM_ARM_VCPU_PSCI_0_2: Emulate PSCI v0.2 for the CPU.
@@ -2951,6 +2982,15 @@ HVC instruction based PSCI call from the vcpu. The 'type' field describes
 the system-level event type. The 'flags' field describes architecture
 specific flags for the system-level event.
 
+Valid values for 'type' are:
+  KVM_SYSTEM_EVENT_SHUTDOWN -- the guest has requested a shutdown of the
+   VM. Userspace is not obliged to honour this, and if it does honour
+   this does not need to destroy the VM synchronously (ie it may call
+   KVM_RUN again before shutdown finally occurs).
+  KVM_SYSTEM_EVENT_RESET -- the guest has requested a reset of the VM.
+   As with SHUTDOWN, userspace can choose to ignore the request, or
+   to schedule the reset to occur in the future and may call KVM_RUN again.
+
                /* Fix the size of the union. */
                char padding[256];
        };
index 0d16f96..d426fc8 100644 (file)
@@ -12,14 +12,14 @@ specific.
 1. GROUP: KVM_S390_VM_MEM_CTRL
 Architectures: s390
 
-1.1. ATTRIBUTE: KVM_S390_VM_MEM_CTRL
+1.1. ATTRIBUTE: KVM_S390_VM_MEM_ENABLE_CMMA
 Parameters: none
-Returns: -EBUSY if already a vcpus is defined, otherwise 0
+Returns: -EBUSY if a vcpu is already defined, otherwise 0
 
-Enables CMMA for the virtual machine
+Enables Collaborative Memory Management Assist (CMMA) for the virtual machine.
 
-1.2. ATTRIBUTE: KVM_S390_VM_CLR_CMMA
-Parameteres: none
+1.2. ATTRIBUTE: KVM_S390_VM_MEM_CLR_CMMA
+Parameters: none
 Returns: 0
 
 Clear the CMMA status for all guest pages, so any pages the guest marked
index 6d470ae..2a71c8f 100644 (file)
@@ -168,7 +168,7 @@ MSR_KVM_ASYNC_PF_EN: 0x4b564d02
        64 byte memory area which must be in guest RAM and must be
        zeroed. Bits 5-2 are reserved and should be zero. Bit 0 is 1
        when asynchronous page faults are enabled on the vcpu 0 when
-       disabled. Bit 2 is 1 if asynchronous page faults can be injected
+       disabled. Bit 1 is 1 if asynchronous page faults can be injected
        when vcpu is in cpl == 0.
 
        First 4 byte of 64 byte memory location will be written to by
index 4507a7e..7605833 100644 (file)
@@ -5495,15 +5495,6 @@ S:       Supported
 F:     arch/powerpc/include/asm/kvm*
 F:     arch/powerpc/kvm/
 
-KERNEL VIRTUAL MACHINE For Itanium (KVM/IA64)
-M:     Xiantao Zhang <xiantao.zhang@intel.com>
-L:     kvm-ia64@vger.kernel.org
-W:     http://kvm.qumranet.com
-S:     Supported
-F:     Documentation/ia64/kvm.txt
-F:     arch/ia64/include/asm/kvm*
-F:     arch/ia64/kvm/
-
 KERNEL VIRTUAL MACHINE for s390 (KVM/s390)
 M:     Christian Borntraeger <borntraeger@de.ibm.com>
 M:     Cornelia Huck <cornelia.huck@de.ibm.com>
index b9db269..66ce176 100644 (file)
@@ -33,6 +33,11 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu);
 void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
 void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
 
+static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
+{
+       vcpu->arch.hcr = HCR_GUEST_MASK;
+}
+
 static inline bool vcpu_mode_is_32bit(struct kvm_vcpu *vcpu)
 {
        return 1;
index 53036e2..254e065 100644 (file)
@@ -150,8 +150,6 @@ struct kvm_vcpu_stat {
        u32 halt_wakeup;
 };
 
-int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
-                       const struct kvm_vcpu_init *init);
 int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
index acb0d57..63e0ecc 100644 (file)
@@ -52,6 +52,7 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
 void free_boot_hyp_pgd(void);
 void free_hyp_pgds(void);
 
+void stage2_unmap_vm(struct kvm *kvm);
 int kvm_alloc_stage2_pgd(struct kvm *kvm);
 void kvm_free_stage2_pgd(struct kvm *kvm);
 int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
@@ -161,9 +162,10 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
 }
 
 static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
-                                            unsigned long size)
+                                            unsigned long size,
+                                            bool ipa_uncached)
 {
-       if (!vcpu_has_cache_enabled(vcpu))
+       if (!vcpu_has_cache_enabled(vcpu) || ipa_uncached)
                kvm_flush_dcache_to_poc((void *)hva, size);
        
        /*
index 9e193c8..2d6d910 100644 (file)
@@ -213,6 +213,11 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
        int err;
        struct kvm_vcpu *vcpu;
 
+       if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) {
+               err = -EBUSY;
+               goto out;
+       }
+
        vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
        if (!vcpu) {
                err = -ENOMEM;
@@ -263,6 +268,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 {
        /* Force users to call KVM_ARM_VCPU_INIT */
        vcpu->arch.target = -1;
+       bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
 
        /* Set up the timer */
        kvm_timer_vcpu_init(vcpu);
@@ -419,6 +425,7 @@ static void update_vttbr(struct kvm *kvm)
 
 static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
 {
+       struct kvm *kvm = vcpu->kvm;
        int ret;
 
        if (likely(vcpu->arch.has_run_once))
@@ -427,15 +434,23 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
        vcpu->arch.has_run_once = true;
 
        /*
-        * Initialize the VGIC before running a vcpu the first time on
-        * this VM.
+        * Map the VGIC hardware resources before running a vcpu the first
+        * time on this VM.
         */
-       if (unlikely(!vgic_initialized(vcpu->kvm))) {
-               ret = kvm_vgic_init(vcpu->kvm);
+       if (unlikely(!vgic_ready(kvm))) {
+               ret = kvm_vgic_map_resources(kvm);
                if (ret)
                        return ret;
        }
 
+       /*
+        * Enable the arch timers only if we have an in-kernel VGIC
+        * and it has been properly initialized, since we cannot handle
+        * interrupts from the virtual timer with a userspace gic.
+        */
+       if (irqchip_in_kernel(kvm) && vgic_initialized(kvm))
+               kvm_timer_enable(kvm);
+
        return 0;
 }
 
@@ -649,6 +664,48 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
        return -EINVAL;
 }
 
+static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
+                              const struct kvm_vcpu_init *init)
+{
+       unsigned int i;
+       int phys_target = kvm_target_cpu();
+
+       if (init->target != phys_target)
+               return -EINVAL;
+
+       /*
+        * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must
+        * use the same target.
+        */
+       if (vcpu->arch.target != -1 && vcpu->arch.target != init->target)
+               return -EINVAL;
+
+       /* -ENOENT for unknown features, -EINVAL for invalid combinations. */
+       for (i = 0; i < sizeof(init->features) * 8; i++) {
+               bool set = (init->features[i / 32] & (1 << (i % 32)));
+
+               if (set && i >= KVM_VCPU_MAX_FEATURES)
+                       return -ENOENT;
+
+               /*
+                * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must
+                * use the same feature set.
+                */
+               if (vcpu->arch.target != -1 && i < KVM_VCPU_MAX_FEATURES &&
+                   test_bit(i, vcpu->arch.features) != set)
+                       return -EINVAL;
+
+               if (set)
+                       set_bit(i, vcpu->arch.features);
+       }
+
+       vcpu->arch.target = phys_target;
+
+       /* Now we know what it is, we can reset it. */
+       return kvm_reset_vcpu(vcpu);
+}
+
+
 static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
                                         struct kvm_vcpu_init *init)
 {
@@ -658,11 +715,22 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
        if (ret)
                return ret;
 
+       /*
+        * Ensure a rebooted VM will fault in RAM pages and detect if the
+        * guest MMU is turned off and flush the caches as needed.
+        */
+       if (vcpu->arch.has_run_once)
+               stage2_unmap_vm(vcpu->kvm);
+
+       vcpu_reset_hcr(vcpu);
+
        /*
         * Handle the "start in power-off" case by marking the VCPU as paused.
         */
-       if (__test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
+       if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
                vcpu->arch.pause = true;
+       else
+               vcpu->arch.pause = false;
 
        return 0;
 }
index cc0b787..384bab6 100644 (file)
@@ -38,7 +38,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
-       vcpu->arch.hcr = HCR_GUEST_MASK;
        return 0;
 }
 
@@ -274,31 +273,6 @@ int __attribute_const__ kvm_target_cpu(void)
        }
 }
 
-int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
-                       const struct kvm_vcpu_init *init)
-{
-       unsigned int i;
-
-       /* We can only cope with guest==host and only on A15/A7 (for now). */
-       if (init->target != kvm_target_cpu())
-               return -EINVAL;
-
-       vcpu->arch.target = init->target;
-       bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
-
-       /* -ENOENT for unknown features, -EINVAL for invalid combinations. */
-       for (i = 0; i < sizeof(init->features) * 8; i++) {
-               if (test_bit(i, (void *)init->features)) {
-                       if (i >= KVM_VCPU_MAX_FEATURES)
-                               return -ENOENT;
-                       set_bit(i, vcpu->arch.features);
-               }
-       }
-
-       /* Now we know what it is, we can reset it. */
-       return kvm_reset_vcpu(vcpu);
-}
-
 int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
 {
        int target = kvm_target_cpu();
index 4cb5a93..5d3bfc0 100644 (file)
@@ -187,15 +187,18 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
        }
 
        rt = vcpu->arch.mmio_decode.rt;
-       data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), mmio.len);
 
-       trace_kvm_mmio((mmio.is_write) ? KVM_TRACE_MMIO_WRITE :
-                                        KVM_TRACE_MMIO_READ_UNSATISFIED,
-                       mmio.len, fault_ipa,
-                       (mmio.is_write) ? data : 0);
+       if (mmio.is_write) {
+               data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt),
+                                              mmio.len);
 
-       if (mmio.is_write)
+               trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, mmio.len,
+                              fault_ipa, data);
                mmio_write_buf(mmio.data, mmio.len, data);
+       } else {
+               trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, mmio.len,
+                              fault_ipa, 0);
+       }
 
        if (vgic_handle_mmio(vcpu, run, &mmio))
                return 1;
index 8664ff1..1dc9778 100644 (file)
@@ -612,6 +612,71 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
        unmap_range(kvm, kvm->arch.pgd, start, size);
 }
 
+static void stage2_unmap_memslot(struct kvm *kvm,
+                                struct kvm_memory_slot *memslot)
+{
+       hva_t hva = memslot->userspace_addr;
+       phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
+       phys_addr_t size = PAGE_SIZE * memslot->npages;
+       hva_t reg_end = hva + size;
+
+       /*
+        * A memory region could potentially cover multiple VMAs, and any holes
+        * between them, so iterate over all of them to find out if we should
+        * unmap any of them.
+        *
+        *     +--------------------------------------------+
+        * +---------------+----------------+   +----------------+
+        * |   : VMA 1     |      VMA 2     |   |    VMA 3  :    |
+        * +---------------+----------------+   +----------------+
+        *     |               memory region                |
+        *     +--------------------------------------------+
+        */
+       do {
+               struct vm_area_struct *vma = find_vma(current->mm, hva);
+               hva_t vm_start, vm_end;
+
+               if (!vma || vma->vm_start >= reg_end)
+                       break;
+
+               /*
+                * Take the intersection of this VMA with the memory region
+                */
+               vm_start = max(hva, vma->vm_start);
+               vm_end = min(reg_end, vma->vm_end);
+
+               if (!(vma->vm_flags & VM_PFNMAP)) {
+                       gpa_t gpa = addr + (vm_start - memslot->userspace_addr);
+                       unmap_stage2_range(kvm, gpa, vm_end - vm_start);
+               }
+               hva = vm_end;
+       } while (hva < reg_end);
+}
+
+/**
+ * stage2_unmap_vm - Unmap Stage-2 RAM mappings
+ * @kvm: The struct kvm pointer
+ *
+ * Go through the memregions and unmap any reguler RAM
+ * backing memory already mapped to the VM.
+ */
+void stage2_unmap_vm(struct kvm *kvm)
+{
+       struct kvm_memslots *slots;
+       struct kvm_memory_slot *memslot;
+       int idx;
+
+       idx = srcu_read_lock(&kvm->srcu);
+       spin_lock(&kvm->mmu_lock);
+
+       slots = kvm_memslots(kvm);
+       kvm_for_each_memslot(memslot, slots)
+               stage2_unmap_memslot(kvm, memslot);
+
+       spin_unlock(&kvm->mmu_lock);
+       srcu_read_unlock(&kvm->srcu, idx);
+}
+
 /**
  * kvm_free_stage2_pgd - free all stage-2 tables
  * @kvm:       The KVM struct pointer for the VM.
@@ -853,6 +918,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
        struct vm_area_struct *vma;
        pfn_t pfn;
        pgprot_t mem_type = PAGE_S2;
+       bool fault_ipa_uncached;
 
        write_fault = kvm_is_write_fault(vcpu);
        if (fault_status == FSC_PERM && !write_fault) {
@@ -919,6 +985,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
        if (!hugetlb && !force_pte)
                hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa);
 
+       fault_ipa_uncached = memslot->flags & KVM_MEMSLOT_INCOHERENT;
+
        if (hugetlb) {
                pmd_t new_pmd = pfn_pmd(pfn, mem_type);
                new_pmd = pmd_mkhuge(new_pmd);
@@ -926,7 +994,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
                        kvm_set_s2pmd_writable(&new_pmd);
                        kvm_set_pfn_dirty(pfn);
                }
-               coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE);
+               coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE,
+                                         fault_ipa_uncached);
                ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
        } else {
                pte_t new_pte = pfn_pte(pfn, mem_type);
@@ -934,7 +1003,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
                        kvm_set_s2pte_writable(&new_pte);
                        kvm_set_pfn_dirty(pfn);
                }
-               coherent_cache_guest_page(vcpu, hva, PAGE_SIZE);
+               coherent_cache_guest_page(vcpu, hva, PAGE_SIZE,
+                                         fault_ipa_uncached);
                ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte,
                        pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE));
        }
@@ -1294,11 +1364,12 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
                hva = vm_end;
        } while (hva < reg_end);
 
-       if (ret) {
-               spin_lock(&kvm->mmu_lock);
+       spin_lock(&kvm->mmu_lock);
+       if (ret)
                unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size);
-               spin_unlock(&kvm->mmu_lock);
-       }
+       else
+               stage2_flush_memslot(kvm, memslot);
+       spin_unlock(&kvm->mmu_lock);
        return ret;
 }
 
@@ -1310,6 +1381,15 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
                            unsigned long npages)
 {
+       /*
+        * Readonly memslots are not incoherent with the caches by definition,
+        * but in practice, they are used mostly to emulate ROMs or NOR flashes
+        * that the guest may consider devices and hence map as uncached.
+        * To prevent incoherency issues in these cases, tag all readonly
+        * regions as incoherent.
+        */
+       if (slot->flags & KVM_MEM_READONLY)
+               slot->flags |= KVM_MEMSLOT_INCOHERENT;
        return 0;
 }
 
index 09cf377..58cb324 100644 (file)
@@ -15,6 +15,7 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/preempt.h>
 #include <linux/kvm_host.h>
 #include <linux/wait.h>
 
@@ -166,6 +167,23 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
 
 static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type)
 {
+       int i;
+       struct kvm_vcpu *tmp;
+
+       /*
+        * The KVM ABI specifies that a system event exit may call KVM_RUN
+        * again and may perform shutdown/reboot at a later time that when the
+        * actual request is made.  Since we are implementing PSCI and a
+        * caller of PSCI reboot and shutdown expects that the system shuts
+        * down or reboots immediately, let's make sure that VCPUs are not run
+        * after this call is handled and before the VCPUs have been
+        * re-initialized.
+        */
+       kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
+               tmp->arch.pause = true;
+               kvm_vcpu_kick(tmp);
+       }
+
        memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event));
        vcpu->run->system_event.type = type;
        vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
index 5674a55..8127e45 100644 (file)
@@ -38,6 +38,11 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu);
 void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
 void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
 
+static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
+{
+       vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
+}
+
 static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu)
 {
        return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc;
index 2012c4b..0b7dfdb 100644 (file)
@@ -165,8 +165,6 @@ struct kvm_vcpu_stat {
        u32 halt_wakeup;
 };
 
-int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
-                       const struct kvm_vcpu_init *init);
 int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
@@ -200,6 +198,7 @@ struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
 struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
 
 u64 kvm_call_hyp(void *hypfn, ...);
+void force_vm_exit(const cpumask_t *mask);
 
 int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
                int exception_index);
index 0caf7a5..14a74f1 100644 (file)
@@ -83,6 +83,7 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
 void free_boot_hyp_pgd(void);
 void free_hyp_pgds(void);
 
+void stage2_unmap_vm(struct kvm *kvm);
 int kvm_alloc_stage2_pgd(struct kvm *kvm);
 void kvm_free_stage2_pgd(struct kvm *kvm);
 int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
@@ -243,9 +244,10 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
 }
 
 static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
-                                            unsigned long size)
+                                            unsigned long size,
+                                            bool ipa_uncached)
 {
-       if (!vcpu_has_cache_enabled(vcpu))
+       if (!vcpu_has_cache_enabled(vcpu) || ipa_uncached)
                kvm_flush_dcache_to_poc((void *)hva, size);
 
        if (!icache_is_aliasing()) {            /* PIPT */
index 7679469..9535bd5 100644 (file)
@@ -38,7 +38,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
-       vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
        return 0;
 }
 
@@ -297,31 +296,6 @@ int __attribute_const__ kvm_target_cpu(void)
        return -EINVAL;
 }
 
-int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
-                       const struct kvm_vcpu_init *init)
-{
-       unsigned int i;
-       int phys_target = kvm_target_cpu();
-
-       if (init->target != phys_target)
-               return -EINVAL;
-
-       vcpu->arch.target = phys_target;
-       bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
-
-       /* -ENOENT for unknown features, -EINVAL for invalid combinations. */
-       for (i = 0; i < sizeof(init->features) * 8; i++) {
-               if (init->features[i / 32] & (1 << (i % 32))) {
-                       if (i >= KVM_VCPU_MAX_FEATURES)
-                               return -ENOENT;
-                       set_bit(i, vcpu->arch.features);
-               }
-       }
-
-       /* Now we know what it is, we can reset it. */
-       return kvm_reset_vcpu(vcpu);
-}
-
 int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
 {
        int target = kvm_target_cpu();
index 536d13b..371b55b 100644 (file)
@@ -20,7 +20,6 @@ config IA64
        select HAVE_DYNAMIC_FTRACE if (!ITANIUM)
        select HAVE_FUNCTION_TRACER
        select HAVE_DMA_ATTRS
-       select HAVE_KVM
        select TTY
        select HAVE_ARCH_TRACEHOOK
        select HAVE_DMA_API_DEBUG
@@ -640,8 +639,6 @@ source "security/Kconfig"
 
 source "crypto/Kconfig"
 
-source "arch/ia64/kvm/Kconfig"
-
 source "lib/Kconfig"
 
 config IOMMU_HELPER
index 5441b14..970d0bd 100644 (file)
@@ -53,7 +53,6 @@ core-$(CONFIG_IA64_HP_ZX1)    += arch/ia64/dig/
 core-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/dig/
 core-$(CONFIG_IA64_SGI_SN2)    += arch/ia64/sn/
 core-$(CONFIG_IA64_SGI_UV)     += arch/ia64/uv/
-core-$(CONFIG_KVM)             += arch/ia64/kvm/
 
 drivers-$(CONFIG_PCI)          += arch/ia64/pci/
 drivers-$(CONFIG_IA64_HP_SIM)  += arch/ia64/hp/sim/
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
deleted file mode 100644 (file)
index 4729752..0000000
+++ /dev/null
@@ -1,609 +0,0 @@
-/*
- * kvm_host.h: used for kvm module, and hold ia64-specific sections.
- *
- * Copyright (C) 2007, Intel Corporation.
- *
- * Xiantao Zhang <xiantao.zhang@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#ifndef __ASM_KVM_HOST_H
-#define __ASM_KVM_HOST_H
-
-#define KVM_USER_MEM_SLOTS 32
-
-#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
-#define KVM_IRQCHIP_NUM_PINS  KVM_IOAPIC_NUM_PINS
-
-/* define exit reasons from vmm to kvm*/
-#define EXIT_REASON_VM_PANIC           0
-#define EXIT_REASON_MMIO_INSTRUCTION   1
-#define EXIT_REASON_PAL_CALL           2
-#define EXIT_REASON_SAL_CALL           3
-#define EXIT_REASON_SWITCH_RR6         4
-#define EXIT_REASON_VM_DESTROY         5
-#define EXIT_REASON_EXTERNAL_INTERRUPT 6
-#define EXIT_REASON_IPI                        7
-#define EXIT_REASON_PTC_G              8
-#define EXIT_REASON_DEBUG              20
-
-/*Define vmm address space and vm data space.*/
-#define KVM_VMM_SIZE (__IA64_UL_CONST(16)<<20)
-#define KVM_VMM_SHIFT 24
-#define KVM_VMM_BASE 0xD000000000000000
-#define VMM_SIZE (__IA64_UL_CONST(8)<<20)
-
-/*
- * Define vm_buffer, used by PAL Services, base address.
- * Note: vm_buffer is in the VMM-BLOCK, the size must be < 8M
- */
-#define KVM_VM_BUFFER_BASE (KVM_VMM_BASE + VMM_SIZE)
-#define KVM_VM_BUFFER_SIZE (__IA64_UL_CONST(8)<<20)
-
-/*
- * kvm guest's data area looks as follow:
- *
- *            +----------------------+ ------- KVM_VM_DATA_SIZE
- *           |     vcpu[n]'s data   |   |     ___________________KVM_STK_OFFSET
- *                   |                      |   |    /                   |
- *                   |        ..........    |   |   /vcpu's struct&stack |
- *                   |        ..........    |   |  /---------------------|---- 0
- *           |     vcpu[5]'s data   |   | /       vpd            |
- *           |     vcpu[4]'s data   |   |/-----------------------|
- *           |     vcpu[3]'s data   |   /         vtlb           |
- *           |     vcpu[2]'s data   |  /|------------------------|
- *           |     vcpu[1]'s data   |/  |         vhpt           |
- *           |     vcpu[0]'s data   |____________________________|
- *            +----------------------+  |
- *           |    memory dirty log  |   |
- *            +----------------------+  |
- *           |    vm's data struct  |   |
- *            +----------------------+  |
- *           |                      |   |
- *           |                      |   |
- *           |                      |   |
- *           |                      |   |
- *           |                      |   |
- *           |                      |   |
- *           |                      |   |
- *           |   vm's p2m table  |      |
- *           |                      |   |
- *            |                             |   |
- *           |                      |   |  |
- * vm's data->|                             |   |  |
- *           +----------------------+ ------- 0
- * To support large memory, needs to increase the size of p2m.
- * To support more vcpus, needs to ensure it has enough space to
- * hold vcpus' data.
- */
-
-#define KVM_VM_DATA_SHIFT      26
-#define KVM_VM_DATA_SIZE       (__IA64_UL_CONST(1) << KVM_VM_DATA_SHIFT)
-#define KVM_VM_DATA_BASE       (KVM_VMM_BASE + KVM_VM_DATA_SIZE)
-
-#define KVM_P2M_BASE           KVM_VM_DATA_BASE
-#define KVM_P2M_SIZE           (__IA64_UL_CONST(24) << 20)
-
-#define VHPT_SHIFT             16
-#define VHPT_SIZE              (__IA64_UL_CONST(1) << VHPT_SHIFT)
-#define VHPT_NUM_ENTRIES       (__IA64_UL_CONST(1) << (VHPT_SHIFT-5))
-
-#define VTLB_SHIFT             16
-#define VTLB_SIZE              (__IA64_UL_CONST(1) << VTLB_SHIFT)
-#define VTLB_NUM_ENTRIES       (1UL << (VHPT_SHIFT-5))
-
-#define VPD_SHIFT              16
-#define VPD_SIZE               (__IA64_UL_CONST(1) << VPD_SHIFT)
-
-#define VCPU_STRUCT_SHIFT      16
-#define VCPU_STRUCT_SIZE       (__IA64_UL_CONST(1) << VCPU_STRUCT_SHIFT)
-
-/*
- * This must match KVM_IA64_VCPU_STACK_{SHIFT,SIZE} arch/ia64/include/asm/kvm.h
- */
-#define KVM_STK_SHIFT          16
-#define KVM_STK_OFFSET         (__IA64_UL_CONST(1)<< KVM_STK_SHIFT)
-
-#define KVM_VM_STRUCT_SHIFT    19
-#define KVM_VM_STRUCT_SIZE     (__IA64_UL_CONST(1) << KVM_VM_STRUCT_SHIFT)
-
-#define KVM_MEM_DIRY_LOG_SHIFT 19
-#define KVM_MEM_DIRTY_LOG_SIZE (__IA64_UL_CONST(1) << KVM_MEM_DIRY_LOG_SHIFT)
-
-#ifndef __ASSEMBLY__
-
-/*Define the max vcpus and memory for Guests.*/
-#define KVM_MAX_VCPUS  (KVM_VM_DATA_SIZE - KVM_P2M_SIZE - KVM_VM_STRUCT_SIZE -\
-                       KVM_MEM_DIRTY_LOG_SIZE) / sizeof(struct kvm_vcpu_data)
-#define KVM_MAX_MEM_SIZE (KVM_P2M_SIZE >> 3 << PAGE_SHIFT)
-
-#define VMM_LOG_LEN 256
-
-#include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/kvm.h>
-#include <linux/kvm_para.h>
-#include <linux/kvm_types.h>
-
-#include <asm/pal.h>
-#include <asm/sal.h>
-#include <asm/page.h>
-
-struct kvm_vcpu_data {
-       char vcpu_vhpt[VHPT_SIZE];
-       char vcpu_vtlb[VTLB_SIZE];
-       char vcpu_vpd[VPD_SIZE];
-       char vcpu_struct[VCPU_STRUCT_SIZE];
-};
-
-struct kvm_vm_data {
-       char kvm_p2m[KVM_P2M_SIZE];
-       char kvm_vm_struct[KVM_VM_STRUCT_SIZE];
-       char kvm_mem_dirty_log[KVM_MEM_DIRTY_LOG_SIZE];
-       struct kvm_vcpu_data vcpu_data[KVM_MAX_VCPUS];
-};
-
-#define VCPU_BASE(n)   (KVM_VM_DATA_BASE + \
-                               offsetof(struct kvm_vm_data, vcpu_data[n]))
-#define KVM_VM_BASE    (KVM_VM_DATA_BASE + \
-                               offsetof(struct kvm_vm_data, kvm_vm_struct))
-#define KVM_MEM_DIRTY_LOG_BASE KVM_VM_DATA_BASE + \
-                               offsetof(struct kvm_vm_data, kvm_mem_dirty_log)
-
-#define VHPT_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vhpt))
-#define VTLB_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vtlb))
-#define VPD_BASE(n)  (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vpd))
-#define VCPU_STRUCT_BASE(n)    (VCPU_BASE(n) + \
-                               offsetof(struct kvm_vcpu_data, vcpu_struct))
-
-/*IO section definitions*/
-#define IOREQ_READ      1
-#define IOREQ_WRITE     0
-
-#define STATE_IOREQ_NONE        0
-#define STATE_IOREQ_READY       1
-#define STATE_IOREQ_INPROCESS   2
-#define STATE_IORESP_READY      3
-
-/*Guest Physical address layout.*/
-#define GPFN_MEM        (0UL << 60) /* Guest pfn is normal mem */
-#define GPFN_FRAME_BUFFER   (1UL << 60) /* VGA framebuffer */
-#define GPFN_LOW_MMIO       (2UL << 60) /* Low MMIO range */
-#define GPFN_PIB        (3UL << 60) /* PIB base */
-#define GPFN_IOSAPIC        (4UL << 60) /* IOSAPIC base */
-#define GPFN_LEGACY_IO      (5UL << 60) /* Legacy I/O base */
-#define GPFN_GFW        (6UL << 60) /* Guest Firmware */
-#define GPFN_PHYS_MMIO      (7UL << 60) /* Directed MMIO Range */
-
-#define GPFN_IO_MASK        (7UL << 60) /* Guest pfn is I/O type */
-#define GPFN_INV_MASK       (1UL << 63) /* Guest pfn is invalid */
-#define INVALID_MFN       (~0UL)
-#define MEM_G   (1UL << 30)
-#define MEM_M   (1UL << 20)
-#define MMIO_START       (3 * MEM_G)
-#define MMIO_SIZE        (512 * MEM_M)
-#define VGA_IO_START     0xA0000UL
-#define VGA_IO_SIZE      0x20000
-#define LEGACY_IO_START  (MMIO_START + MMIO_SIZE)
-#define LEGACY_IO_SIZE   (64 * MEM_M)
-#define IO_SAPIC_START   0xfec00000UL
-#define IO_SAPIC_SIZE    0x100000
-#define PIB_START 0xfee00000UL
-#define PIB_SIZE 0x200000
-#define GFW_START        (4 * MEM_G - 16 * MEM_M)
-#define GFW_SIZE         (16 * MEM_M)
-
-/*Deliver mode, defined for ioapic.c*/
-#define dest_Fixed IOSAPIC_FIXED
-#define dest_LowestPrio IOSAPIC_LOWEST_PRIORITY
-
-#define NMI_VECTOR                     2
-#define ExtINT_VECTOR                  0
-#define NULL_VECTOR                    (-1)
-#define IA64_SPURIOUS_INT_VECTOR       0x0f
-
-#define VCPU_LID(v) (((u64)(v)->vcpu_id) << 24)
-
-/*
- *Delivery mode
- */
-#define SAPIC_DELIV_SHIFT      8
-#define SAPIC_FIXED            0x0
-#define SAPIC_LOWEST_PRIORITY  0x1
-#define SAPIC_PMI              0x2
-#define SAPIC_NMI              0x4
-#define SAPIC_INIT             0x5
-#define SAPIC_EXTINT           0x7
-
-/*
- * vcpu->requests bit members for arch
- */
-#define KVM_REQ_PTC_G          32
-#define KVM_REQ_RESUME         33
-
-struct kvm_mmio_req {
-       uint64_t addr;          /*  physical address            */
-       uint64_t size;          /*  size in bytes               */
-       uint64_t data;          /*  data (or paddr of data)     */
-       uint8_t state:4;
-       uint8_t dir:1;          /*  1=read, 0=write             */
-};
-
-/*Pal data struct */
-struct kvm_pal_call{
-       /*In area*/
-       uint64_t gr28;
-       uint64_t gr29;
-       uint64_t gr30;
-       uint64_t gr31;
-       /*Out area*/
-       struct ia64_pal_retval ret;
-};
-
-/* Sal data structure */
-struct kvm_sal_call{
-       /*In area*/
-       uint64_t in0;
-       uint64_t in1;
-       uint64_t in2;
-       uint64_t in3;
-       uint64_t in4;
-       uint64_t in5;
-       uint64_t in6;
-       uint64_t in7;
-       struct sal_ret_values ret;
-};
-
-/*Guest change rr6*/
-struct kvm_switch_rr6 {
-       uint64_t old_rr;
-       uint64_t new_rr;
-};
-
-union ia64_ipi_a{
-       unsigned long val;
-       struct {
-               unsigned long rv  : 3;
-               unsigned long ir  : 1;
-               unsigned long eid : 8;
-               unsigned long id  : 8;
-               unsigned long ib_base : 44;
-       };
-};
-
-union ia64_ipi_d {
-       unsigned long val;
-       struct {
-               unsigned long vector : 8;
-               unsigned long dm  : 3;
-               unsigned long ig  : 53;
-       };
-};
-
-/*ipi check exit data*/
-struct kvm_ipi_data{
-       union ia64_ipi_a addr;
-       union ia64_ipi_d data;
-};
-
-/*global purge data*/
-struct kvm_ptc_g {
-       unsigned long vaddr;
-       unsigned long rr;
-       unsigned long ps;
-       struct kvm_vcpu *vcpu;
-};
-
-/*Exit control data */
-struct exit_ctl_data{
-       uint32_t exit_reason;
-       uint32_t vm_status;
-       union {
-               struct kvm_mmio_req     ioreq;
-               struct kvm_pal_call     pal_data;
-               struct kvm_sal_call     sal_data;
-               struct kvm_switch_rr6   rr_data;
-               struct kvm_ipi_data     ipi_data;
-               struct kvm_ptc_g        ptc_g_data;
-       } u;
-};
-
-union pte_flags {
-       unsigned long val;
-       struct {
-               unsigned long p    :  1; /*0      */
-               unsigned long      :  1; /* 1     */
-               unsigned long ma   :  3; /* 2-4   */
-               unsigned long a    :  1; /* 5     */
-               unsigned long d    :  1; /* 6     */
-               unsigned long pl   :  2; /* 7-8   */
-               unsigned long ar   :  3; /* 9-11  */
-               unsigned long ppn  : 38; /* 12-49 */
-               unsigned long      :  2; /* 50-51 */
-               unsigned long ed   :  1; /* 52    */
-       };
-};
-
-union ia64_pta {
-       unsigned long val;
-       struct {
-               unsigned long ve : 1;
-               unsigned long reserved0 : 1;
-               unsigned long size : 6;
-               unsigned long vf : 1;
-               unsigned long reserved1 : 6;
-               unsigned long base : 49;
-       };
-};
-
-struct thash_cb {
-       /* THASH base information */
-       struct thash_data       *hash; /* hash table pointer */
-       union ia64_pta          pta;
-       int           num;
-};
-
-struct kvm_vcpu_stat {
-       u32 halt_wakeup;
-};
-
-struct kvm_vcpu_arch {
-       int launched;
-       int last_exit;
-       int last_run_cpu;
-       int vmm_tr_slot;
-       int vm_tr_slot;
-       int sn_rtc_tr_slot;
-
-#define KVM_MP_STATE_RUNNABLE          0
-#define KVM_MP_STATE_UNINITIALIZED     1
-#define KVM_MP_STATE_INIT_RECEIVED     2
-#define KVM_MP_STATE_HALTED            3
-       int mp_state;
-
-#define MAX_PTC_G_NUM                  3
-       int ptc_g_count;
-       struct kvm_ptc_g ptc_g_data[MAX_PTC_G_NUM];
-
-       /*halt timer to wake up sleepy vcpus*/
-       struct hrtimer hlt_timer;
-       long ht_active;
-
-       struct kvm_lapic *apic;    /* kernel irqchip context */
-       struct vpd *vpd;
-
-       /* Exit data for vmm_transition*/
-       struct exit_ctl_data exit_data;
-
-       cpumask_t cache_coherent_map;
-
-       unsigned long vmm_rr;
-       unsigned long host_rr6;
-       unsigned long psbits[8];
-       unsigned long cr_iipa;
-       unsigned long cr_isr;
-       unsigned long vsa_base;
-       unsigned long dirty_log_lock_pa;
-       unsigned long __gp;
-       /* TR and TC.  */
-       struct thash_data itrs[NITRS];
-       struct thash_data dtrs[NDTRS];
-       /* Bit is set if there is a tr/tc for the region.  */
-       unsigned char itr_regions;
-       unsigned char dtr_regions;
-       unsigned char tc_regions;
-       /* purge all */
-       unsigned long ptce_base;
-       unsigned long ptce_count[2];
-       unsigned long ptce_stride[2];
-       /* itc/itm */
-       unsigned long last_itc;
-       long itc_offset;
-       unsigned long itc_check;
-       unsigned long timer_check;
-       unsigned int timer_pending;
-       unsigned int timer_fired;
-
-       unsigned long vrr[8];
-       unsigned long ibr[8];
-       unsigned long dbr[8];
-       unsigned long insvc[4];         /* Interrupt in service.  */
-       unsigned long xtp;
-
-       unsigned long metaphysical_rr0; /* from kvm_arch (so is pinned) */
-       unsigned long metaphysical_rr4; /* from kvm_arch (so is pinned) */
-       unsigned long metaphysical_saved_rr0; /* from kvm_arch          */
-       unsigned long metaphysical_saved_rr4; /* from kvm_arch          */
-       unsigned long fp_psr;       /*used for lazy float register */
-       unsigned long saved_gp;
-       /*for phycial  emulation */
-       int mode_flags;
-       struct thash_cb vtlb;
-       struct thash_cb vhpt;
-       char irq_check;
-       char irq_new_pending;
-
-       unsigned long opcode;
-       unsigned long cause;
-       char log_buf[VMM_LOG_LEN];
-       union context host;
-       union context guest;
-
-       char mmio_data[8];
-};
-
-struct kvm_vm_stat {
-       u64 remote_tlb_flush;
-};
-
-struct kvm_sal_data {
-       unsigned long boot_ip;
-       unsigned long boot_gp;
-};
-
-struct kvm_arch_memory_slot {
-};
-
-struct kvm_arch {
-       spinlock_t dirty_log_lock;
-
-       unsigned long   vm_base;
-       unsigned long   metaphysical_rr0;
-       unsigned long   metaphysical_rr4;
-       unsigned long   vmm_init_rr;
-
-       int             is_sn2;
-
-       struct kvm_ioapic *vioapic;
-       struct kvm_vm_stat stat;
-       struct kvm_sal_data rdv_sal_data;
-
-       struct list_head assigned_dev_head;
-       struct iommu_domain *iommu_domain;
-       bool iommu_noncoherent;
-
-       unsigned long irq_sources_bitmap;
-       unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
-};
-
-union cpuid3_t {
-       u64 value;
-       struct {
-               u64 number : 8;
-               u64 revision : 8;
-               u64 model : 8;
-               u64 family : 8;
-               u64 archrev : 8;
-               u64 rv : 24;
-       };
-};
-
-struct kvm_pt_regs {
-       /* The following registers are saved by SAVE_MIN: */
-       unsigned long b6;  /* scratch */
-       unsigned long b7;  /* scratch */
-
-       unsigned long ar_csd; /* used by cmp8xchg16 (scratch) */
-       unsigned long ar_ssd; /* reserved for future use (scratch) */
-
-       unsigned long r8;  /* scratch (return value register 0) */
-       unsigned long r9;  /* scratch (return value register 1) */
-       unsigned long r10; /* scratch (return value register 2) */
-       unsigned long r11; /* scratch (return value register 3) */
-
-       unsigned long cr_ipsr; /* interrupted task's psr */
-       unsigned long cr_iip;  /* interrupted task's instruction pointer */
-       unsigned long cr_ifs;  /* interrupted task's function state */
-
-       unsigned long ar_unat; /* interrupted task's NaT register (preserved) */
-       unsigned long ar_pfs;  /* prev function state  */
-       unsigned long ar_rsc;  /* RSE configuration */
-       /* The following two are valid only if cr_ipsr.cpl > 0: */
-       unsigned long ar_rnat;  /* RSE NaT */
-       unsigned long ar_bspstore; /* RSE bspstore */
-
-       unsigned long pr;  /* 64 predicate registers (1 bit each) */
-       unsigned long b0;  /* return pointer (bp) */
-       unsigned long loadrs;  /* size of dirty partition << 16 */
-
-       unsigned long r1;  /* the gp pointer */
-       unsigned long r12; /* interrupted task's memory stack pointer */
-       unsigned long r13; /* thread pointer */
-
-       unsigned long ar_fpsr;  /* floating point status (preserved) */
-       unsigned long r15;  /* scratch */
-
-       /* The remaining registers are NOT saved for system calls.  */
-       unsigned long r14;  /* scratch */
-       unsigned long r2;  /* scratch */
-       unsigned long r3;  /* scratch */
-       unsigned long r16;  /* scratch */
-       unsigned long r17;  /* scratch */
-       unsigned long r18;  /* scratch */
-       unsigned long r19;  /* scratch */
-       unsigned long r20;  /* scratch */
-       unsigned long r21;  /* scratch */
-       unsigned long r22;  /* scratch */
-       unsigned long r23;  /* scratch */
-       unsigned long r24;  /* scratch */
-       unsigned long r25;  /* scratch */
-       unsigned long r26;  /* scratch */
-       unsigned long r27;  /* scratch */
-       unsigned long r28;  /* scratch */
-       unsigned long r29;  /* scratch */
-       unsigned long r30;  /* scratch */
-       unsigned long r31;  /* scratch */
-       unsigned long ar_ccv;  /* compare/exchange value (scratch) */
-
-       /*
-        * Floating point registers that the kernel considers scratch:
-        */
-       struct ia64_fpreg f6;  /* scratch */
-       struct ia64_fpreg f7;  /* scratch */
-       struct ia64_fpreg f8;  /* scratch */
-       struct ia64_fpreg f9;  /* scratch */
-       struct ia64_fpreg f10;  /* scratch */
-       struct ia64_fpreg f11;  /* scratch */
-
-       unsigned long r4;  /* preserved */
-       unsigned long r5;  /* preserved */
-       unsigned long r6;  /* preserved */
-       unsigned long r7;  /* preserved */
-       unsigned long eml_unat;    /* used for emulating instruction */
-       unsigned long pad0;     /* alignment pad */
-};
-
-static inline struct kvm_pt_regs *vcpu_regs(struct kvm_vcpu *v)
-{
-       return (struct kvm_pt_regs *) ((unsigned long) v + KVM_STK_OFFSET) - 1;
-}
-
-typedef int kvm_vmm_entry(void);
-typedef void kvm_tramp_entry(union context *host, union context *guest);
-
-struct kvm_vmm_info{
-       struct module   *module;
-       kvm_vmm_entry   *vmm_entry;
-       kvm_tramp_entry *tramp_entry;
-       unsigned long   vmm_ivt;
-       unsigned long   patch_mov_ar;
-       unsigned long   patch_mov_ar_sn2;
-};
-
-int kvm_highest_pending_irq(struct kvm_vcpu *vcpu);
-int kvm_emulate_halt(struct kvm_vcpu *vcpu);
-int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run);
-void kvm_sal_emul(struct kvm_vcpu *vcpu);
-
-#define __KVM_HAVE_ARCH_VM_ALLOC 1
-struct kvm *kvm_arch_alloc_vm(void);
-void kvm_arch_free_vm(struct kvm *kvm);
-
-static inline void kvm_arch_sync_events(struct kvm *kvm) {}
-static inline void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) {}
-static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu) {}
-static inline void kvm_arch_free_memslot(struct kvm *kvm,
-               struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
-static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
-static inline void kvm_arch_commit_memory_region(struct kvm *kvm,
-               struct kvm_userspace_memory_region *mem,
-               const struct kvm_memory_slot *old,
-               enum kvm_mr_change change) {}
-static inline void kvm_arch_hardware_unsetup(void) {}
-
-#endif /* __ASSEMBLY__*/
-
-#endif
diff --git a/arch/ia64/include/asm/pvclock-abi.h b/arch/ia64/include/asm/pvclock-abi.h
deleted file mode 100644 (file)
index 42b233b..0000000
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * same structure to x86's
- * Hopefully asm-x86/pvclock-abi.h would be moved to somewhere more generic.
- * For now, define same duplicated definitions.
- */
-
-#ifndef _ASM_IA64__PVCLOCK_ABI_H
-#define _ASM_IA64__PVCLOCK_ABI_H
-#ifndef __ASSEMBLY__
-
-/*
- * These structs MUST NOT be changed.
- * They are the ABI between hypervisor and guest OS.
- * KVM is using this.
- *
- * pvclock_vcpu_time_info holds the system time and the tsc timestamp
- * of the last update. So the guest can use the tsc delta to get a
- * more precise system time.  There is one per virtual cpu.
- *
- * pvclock_wall_clock references the point in time when the system
- * time was zero (usually boot time), thus the guest calculates the
- * current wall clock by adding the system time.
- *
- * Protocol for the "version" fields is: hypervisor raises it (making
- * it uneven) before it starts updating the fields and raises it again
- * (making it even) when it is done.  Thus the guest can make sure the
- * time values it got are consistent by checking the version before
- * and after reading them.
- */
-
-struct pvclock_vcpu_time_info {
-       u32   version;
-       u32   pad0;
-       u64   tsc_timestamp;
-       u64   system_time;
-       u32   tsc_to_system_mul;
-       s8    tsc_shift;
-       u8    pad[3];
-} __attribute__((__packed__)); /* 32 bytes */
-
-struct pvclock_wall_clock {
-       u32   version;
-       u32   sec;
-       u32   nsec;
-} __attribute__((__packed__));
-
-#endif /* __ASSEMBLY__ */
-#endif /* _ASM_IA64__PVCLOCK_ABI_H */
diff --git a/arch/ia64/include/uapi/asm/kvm.h b/arch/ia64/include/uapi/asm/kvm.h
deleted file mode 100644 (file)
index 99503c2..0000000
+++ /dev/null
@@ -1,268 +0,0 @@
-#ifndef __ASM_IA64_KVM_H
-#define __ASM_IA64_KVM_H
-
-/*
- * kvm structure definitions  for ia64
- *
- * Copyright (C) 2007 Xiantao Zhang <xiantao.zhang@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#include <linux/types.h>
-#include <linux/ioctl.h>
-
-/* Select x86 specific features in <linux/kvm.h> */
-#define __KVM_HAVE_IOAPIC
-#define __KVM_HAVE_IRQ_LINE
-
-/* Architectural interrupt line count. */
-#define KVM_NR_INTERRUPTS 256
-
-#define KVM_IOAPIC_NUM_PINS  48
-
-struct kvm_ioapic_state {
-       __u64 base_address;
-       __u32 ioregsel;
-       __u32 id;
-       __u32 irr;
-       __u32 pad;
-       union {
-               __u64 bits;
-               struct {
-                       __u8 vector;
-                       __u8 delivery_mode:3;
-                       __u8 dest_mode:1;
-                       __u8 delivery_status:1;
-                       __u8 polarity:1;
-                       __u8 remote_irr:1;
-                       __u8 trig_mode:1;
-                       __u8 mask:1;
-                       __u8 reserve:7;
-                       __u8 reserved[4];
-                       __u8 dest_id;
-               } fields;
-       } redirtbl[KVM_IOAPIC_NUM_PINS];
-};
-
-#define KVM_IRQCHIP_PIC_MASTER   0
-#define KVM_IRQCHIP_PIC_SLAVE    1
-#define KVM_IRQCHIP_IOAPIC       2
-#define KVM_NR_IRQCHIPS          3
-
-#define KVM_CONTEXT_SIZE       8*1024
-
-struct kvm_fpreg {
-       union {
-               unsigned long bits[2];
-               long double __dummy;    /* force 16-byte alignment */
-       } u;
-};
-
-union context {
-       /* 8K size */
-       char    dummy[KVM_CONTEXT_SIZE];
-       struct {
-               unsigned long       psr;
-               unsigned long       pr;
-               unsigned long       caller_unat;
-               unsigned long       pad;
-               unsigned long       gr[32];
-               unsigned long       ar[128];
-               unsigned long       br[8];
-               unsigned long       cr[128];
-               unsigned long       rr[8];
-               unsigned long       ibr[8];
-               unsigned long       dbr[8];
-               unsigned long       pkr[8];
-               struct kvm_fpreg   fr[128];
-       };
-};
-
-struct thash_data {
-       union {
-               struct {
-                       unsigned long p    :  1; /* 0 */
-                       unsigned long rv1  :  1; /* 1 */
-                       unsigned long ma   :  3; /* 2-4 */
-                       unsigned long a    :  1; /* 5 */
-                       unsigned long d    :  1; /* 6 */
-                       unsigned long pl   :  2; /* 7-8 */
-                       unsigned long ar   :  3; /* 9-11 */
-                       unsigned long ppn  : 38; /* 12-49 */
-                       unsigned long rv2  :  2; /* 50-51 */
-                       unsigned long ed   :  1; /* 52 */
-                       unsigned long ig1  : 11; /* 53-63 */
-               };
-               struct {
-                       unsigned long __rv1 : 53;     /* 0-52 */
-                       unsigned long contiguous : 1; /*53 */
-                       unsigned long tc : 1;         /* 54 TR or TC */
-                       unsigned long cl : 1;
-                       /* 55 I side or D side cache line */
-                       unsigned long len  :  4;      /* 56-59 */
-                       unsigned long io  : 1;  /* 60 entry is for io or not */
-                       unsigned long nomap : 1;
-                       /* 61 entry cann't be inserted into machine TLB.*/
-                       unsigned long checked : 1;
-                       /* 62 for VTLB/VHPT sanity check */
-                       unsigned long invalid : 1;
-                       /* 63 invalid entry */
-               };
-               unsigned long page_flags;
-       };                  /* same for VHPT and TLB */
-
-       union {
-               struct {
-                       unsigned long rv3  :  2;
-                       unsigned long ps   :  6;
-                       unsigned long key  : 24;
-                       unsigned long rv4  : 32;
-               };
-               unsigned long itir;
-       };
-       union {
-               struct {
-                       unsigned long ig2  :  12;
-                       unsigned long vpn  :  49;
-                       unsigned long vrn  :   3;
-               };
-               unsigned long ifa;
-               unsigned long vadr;
-               struct {
-                       unsigned long tag  :  63;
-                       unsigned long ti   :  1;
-               };
-               unsigned long etag;
-       };
-       union {
-               struct thash_data *next;
-               unsigned long rid;
-               unsigned long gpaddr;
-       };
-};
-
-#define        NITRS   8
-#define NDTRS  8
-
-struct saved_vpd {
-       unsigned long  vhpi;
-       unsigned long  vgr[16];
-       unsigned long  vbgr[16];
-       unsigned long  vnat;
-       unsigned long  vbnat;
-       unsigned long  vcpuid[5];
-       unsigned long  vpsr;
-       unsigned long  vpr;
-       union {
-               unsigned long  vcr[128];
-               struct {
-                       unsigned long dcr;
-                       unsigned long itm;
-                       unsigned long iva;
-                       unsigned long rsv1[5];
-                       unsigned long pta;
-                       unsigned long rsv2[7];
-                       unsigned long ipsr;
-                       unsigned long isr;
-                       unsigned long rsv3;
-                       unsigned long iip;
-                       unsigned long ifa;
-                       unsigned long itir;
-                       unsigned long iipa;
-                       unsigned long ifs;
-                       unsigned long iim;
-                       unsigned long iha;
-                       unsigned long rsv4[38];
-                       unsigned long lid;
-                       unsigned long ivr;
-                       unsigned long tpr;
-                       unsigned long eoi;
-                       unsigned long irr[4];
-                       unsigned long itv;
-                       unsigned long pmv;
-                       unsigned long cmcv;
-                       unsigned long rsv5[5];
-                       unsigned long lrr0;
-                       unsigned long lrr1;
-                       unsigned long rsv6[46];
-               };
-       };
-};
-
-struct kvm_regs {
-       struct saved_vpd vpd;
-       /*Arch-regs*/
-       int mp_state;
-       unsigned long vmm_rr;
-       /* TR and TC.  */
-       struct thash_data itrs[NITRS];
-       struct thash_data dtrs[NDTRS];
-       /* Bit is set if there is a tr/tc for the region.  */
-       unsigned char itr_regions;
-       unsigned char dtr_regions;
-       unsigned char tc_regions;
-
-       char irq_check;
-       unsigned long saved_itc;
-       unsigned long itc_check;
-       unsigned long timer_check;
-       unsigned long timer_pending;
-       unsigned long last_itc;
-
-       unsigned long vrr[8];
-       unsigned long ibr[8];
-       unsigned long dbr[8];
-       unsigned long insvc[4];         /* Interrupt in service.  */
-       unsigned long xtp;
-
-       unsigned long metaphysical_rr0; /* from kvm_arch (so is pinned) */
-       unsigned long metaphysical_rr4; /* from kvm_arch (so is pinned) */
-       unsigned long metaphysical_saved_rr0; /* from kvm_arch          */
-       unsigned long metaphysical_saved_rr4; /* from kvm_arch          */
-       unsigned long fp_psr;       /*used for lazy float register */
-       unsigned long saved_gp;
-       /*for phycial  emulation */
-
-       union context saved_guest;
-
-       unsigned long reserved[64];     /* for future use */
-};
-
-struct kvm_sregs {
-};
-
-struct kvm_fpu {
-};
-
-#define KVM_IA64_VCPU_STACK_SHIFT      16
-#define KVM_IA64_VCPU_STACK_SIZE       (1UL << KVM_IA64_VCPU_STACK_SHIFT)
-
-struct kvm_ia64_vcpu_stack {
-       unsigned char stack[KVM_IA64_VCPU_STACK_SIZE];
-};
-
-struct kvm_debug_exit_arch {
-};
-
-/* for KVM_SET_GUEST_DEBUG */
-struct kvm_guest_debug_arch {
-};
-
-/* definition of registers in kvm_run */
-struct kvm_sync_regs {
-};
-
-#endif
diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig
deleted file mode 100644 (file)
index 3d50ea9..0000000
+++ /dev/null
@@ -1,66 +0,0 @@
-#
-# KVM configuration
-#
-
-source "virt/kvm/Kconfig"
-
-menuconfig VIRTUALIZATION
-       bool "Virtualization"
-       depends on HAVE_KVM || IA64
-       default y
-       ---help---
-         Say Y here to get to see options for using your Linux host to run other
-         operating systems inside virtual machines (guests).
-         This option alone does not add any kernel code.
-
-         If you say N, all options in this submenu will be skipped and disabled.
-
-if VIRTUALIZATION
-
-config KVM
-       tristate "Kernel-based Virtual Machine (KVM) support"
-       depends on BROKEN
-       depends on HAVE_KVM && MODULES
-       depends on BROKEN
-       select PREEMPT_NOTIFIERS
-       select ANON_INODES
-       select HAVE_KVM_IRQCHIP
-       select HAVE_KVM_IRQFD
-       select HAVE_KVM_IRQ_ROUTING
-       select KVM_APIC_ARCHITECTURE
-       select KVM_MMIO
-       ---help---
-         Support hosting fully virtualized guest machines using hardware
-         virtualization extensions.  You will need a fairly recent
-         processor equipped with virtualization extensions. You will also
-         need to select one or more of the processor modules below.
-
-         This module provides access to the hardware capabilities through
-         a character device node named /dev/kvm.
-
-         To compile this as a module, choose M here: the module
-         will be called kvm.
-
-         If unsure, say N.
-
-config KVM_INTEL
-       tristate "KVM for Intel Itanium 2 processors support"
-       depends on KVM && m
-       ---help---
-         Provides support for KVM on Itanium 2 processors equipped with the VT
-         extensions.
-
-config KVM_DEVICE_ASSIGNMENT
-       bool "KVM legacy PCI device assignment support"
-       depends on KVM && PCI && IOMMU_API
-       default y
-       ---help---
-         Provide support for legacy PCI device assignment through KVM.  The
-         kernel now also supports a full featured userspace device driver
-         framework through VFIO, which supersedes much of this support.
-
-         If unsure, say Y.
-
-source drivers/vhost/Kconfig
-
-endif # VIRTUALIZATION
diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile
deleted file mode 100644 (file)
index 18e45ec..0000000
+++ /dev/null
@@ -1,67 +0,0 @@
-#This Make file is to generate asm-offsets.h and build source.
-#
-
-#Generate asm-offsets.h for vmm module build
-offsets-file := asm-offsets.h
-
-always  := $(offsets-file)
-targets := $(offsets-file)
-targets += arch/ia64/kvm/asm-offsets.s
-
-# Default sed regexp - multiline due to syntax constraints
-define sed-y
-       "/^->/{s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; s:->::; p;}"
-endef
-
-quiet_cmd_offsets = GEN     $@
-define cmd_offsets
-       (set -e; \
-        echo "#ifndef __ASM_KVM_OFFSETS_H__"; \
-        echo "#define __ASM_KVM_OFFSETS_H__"; \
-        echo "/*"; \
-        echo " * DO NOT MODIFY."; \
-        echo " *"; \
-        echo " * This file was generated by Makefile"; \
-        echo " *"; \
-        echo " */"; \
-        echo ""; \
-        sed -ne $(sed-y) $<; \
-        echo ""; \
-        echo "#endif" ) > $@
-endef
-
-# We use internal rules to avoid the "is up to date" message from make
-arch/ia64/kvm/asm-offsets.s: arch/ia64/kvm/asm-offsets.c \
-                       $(wildcard $(srctree)/arch/ia64/include/asm/*.h)\
-                       $(wildcard $(srctree)/include/linux/*.h)
-       $(call if_changed_dep,cc_s_c)
-
-$(obj)/$(offsets-file): arch/ia64/kvm/asm-offsets.s
-       $(call cmd,offsets)
-
-FORCE : $(obj)/$(offsets-file)
-
-#
-# Makefile for Kernel-based Virtual Machine module
-#
-
-ccflags-y := -Ivirt/kvm -Iarch/ia64/kvm/
-asflags-y := -Ivirt/kvm -Iarch/ia64/kvm/
-KVM := ../../../virt/kvm
-
-common-objs = $(KVM)/kvm_main.o $(KVM)/ioapic.o \
-               $(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o
-
-ifeq ($(CONFIG_KVM_DEVICE_ASSIGNMENT),y)
-common-objs += $(KVM)/assigned-dev.o $(KVM)/iommu.o
-endif
-
-kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o
-obj-$(CONFIG_KVM) += kvm.o
-
-CFLAGS_vcpu.o += -mfixed-range=f2-f5,f12-f127
-kvm-intel-objs = vmm.o vmm_ivt.o trampoline.o vcpu.o optvfault.o mmio.o \
-       vtlb.o process.o kvm_lib.o
-#Add link memcpy and memset to avoid possible structure assignment error
-kvm-intel-objs += memcpy.o memset.o
-obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/ia64/kvm/asm-offsets.c b/arch/ia64/kvm/asm-offsets.c
deleted file mode 100644 (file)
index 9324c87..0000000
+++ /dev/null
@@ -1,241 +0,0 @@
-/*
- * asm-offsets.c Generate definitions needed by assembly language modules.
- * This code generates raw asm output which is post-processed
- * to extract and format the required data.
- *
- * Anthony Xu    <anthony.xu@intel.com>
- * Xiantao Zhang <xiantao.zhang@intel.com>
- * Copyright (c) 2007 Intel Corporation  KVM support.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#include <linux/kvm_host.h>
-#include <linux/kbuild.h>
-
-#include "vcpu.h"
-
-void foo(void)
-{
-       DEFINE(VMM_TASK_SIZE, sizeof(struct kvm_vcpu));
-       DEFINE(VMM_PT_REGS_SIZE, sizeof(struct kvm_pt_regs));
-
-       BLANK();
-
-       DEFINE(VMM_VCPU_META_RR0_OFFSET,
-                       offsetof(struct kvm_vcpu, arch.metaphysical_rr0));
-       DEFINE(VMM_VCPU_META_SAVED_RR0_OFFSET,
-                       offsetof(struct kvm_vcpu,
-                               arch.metaphysical_saved_rr0));
-       DEFINE(VMM_VCPU_VRR0_OFFSET,
-                       offsetof(struct kvm_vcpu, arch.vrr[0]));
-       DEFINE(VMM_VPD_IRR0_OFFSET,
-                       offsetof(struct vpd, irr[0]));
-       DEFINE(VMM_VCPU_ITC_CHECK_OFFSET,
-                       offsetof(struct kvm_vcpu, arch.itc_check));
-       DEFINE(VMM_VCPU_IRQ_CHECK_OFFSET,
-                       offsetof(struct kvm_vcpu, arch.irq_check));
-       DEFINE(VMM_VPD_VHPI_OFFSET,
-                       offsetof(struct vpd, vhpi));
-       DEFINE(VMM_VCPU_VSA_BASE_OFFSET,
-                       offsetof(struct kvm_vcpu, arch.vsa_base));
-       DEFINE(VMM_VCPU_VPD_OFFSET,
-                       offsetof(struct kvm_vcpu, arch.vpd));
-       DEFINE(VMM_VCPU_IRQ_CHECK,
-                       offsetof(struct kvm_vcpu, arch.irq_check));
-       DEFINE(VMM_VCPU_TIMER_PENDING,
-                       offsetof(struct kvm_vcpu, arch.timer_pending));
-       DEFINE(VMM_VCPU_META_SAVED_RR0_OFFSET,
-                       offsetof(struct kvm_vcpu, arch.metaphysical_saved_rr0));
-       DEFINE(VMM_VCPU_MODE_FLAGS_OFFSET,
-                       offsetof(struct kvm_vcpu, arch.mode_flags));
-       DEFINE(VMM_VCPU_ITC_OFS_OFFSET,
-                       offsetof(struct kvm_vcpu, arch.itc_offset));
-       DEFINE(VMM_VCPU_LAST_ITC_OFFSET,
-                       offsetof(struct kvm_vcpu, arch.last_itc));
-       DEFINE(VMM_VCPU_SAVED_GP_OFFSET,
-                       offsetof(struct kvm_vcpu, arch.saved_gp));
-
-       BLANK();
-
-       DEFINE(VMM_PT_REGS_B6_OFFSET,
-                               offsetof(struct kvm_pt_regs, b6));
-       DEFINE(VMM_PT_REGS_B7_OFFSET,
-                               offsetof(struct kvm_pt_regs, b7));
-       DEFINE(VMM_PT_REGS_AR_CSD_OFFSET,
-                               offsetof(struct kvm_pt_regs, ar_csd));
-       DEFINE(VMM_PT_REGS_AR_SSD_OFFSET,
-                               offsetof(struct kvm_pt_regs, ar_ssd));
-       DEFINE(VMM_PT_REGS_R8_OFFSET,
-                               offsetof(struct kvm_pt_regs, r8));
-       DEFINE(VMM_PT_REGS_R9_OFFSET,
-                               offsetof(struct kvm_pt_regs, r9));
-       DEFINE(VMM_PT_REGS_R10_OFFSET,
-                               offsetof(struct kvm_pt_regs, r10));
-       DEFINE(VMM_PT_REGS_R11_OFFSET,
-                               offsetof(struct kvm_pt_regs, r11));
-       DEFINE(VMM_PT_REGS_CR_IPSR_OFFSET,
-                               offsetof(struct kvm_pt_regs, cr_ipsr));
-       DEFINE(VMM_PT_REGS_CR_IIP_OFFSET,
-                               offsetof(struct kvm_pt_regs, cr_iip));
-       DEFINE(VMM_PT_REGS_CR_IFS_OFFSET,
-                               offsetof(struct kvm_pt_regs, cr_ifs));
-       DEFINE(VMM_PT_REGS_AR_UNAT_OFFSET,
-                               offsetof(struct kvm_pt_regs, ar_unat));
-       DEFINE(VMM_PT_REGS_AR_PFS_OFFSET,
-                               offsetof(struct kvm_pt_regs, ar_pfs));
-       DEFINE(VMM_PT_REGS_AR_RSC_OFFSET,
-                               offsetof(struct kvm_pt_regs, ar_rsc));
-       DEFINE(VMM_PT_REGS_AR_RNAT_OFFSET,
-                               offsetof(struct kvm_pt_regs, ar_rnat));
-
-       DEFINE(VMM_PT_REGS_AR_BSPSTORE_OFFSET,
-                               offsetof(struct kvm_pt_regs, ar_bspstore));
-       DEFINE(VMM_PT_REGS_PR_OFFSET,
-                               offsetof(struct kvm_pt_regs, pr));
-       DEFINE(VMM_PT_REGS_B0_OFFSET,
-                               offsetof(struct kvm_pt_regs, b0));
-       DEFINE(VMM_PT_REGS_LOADRS_OFFSET,
-                               offsetof(struct kvm_pt_regs, loadrs));
-       DEFINE(VMM_PT_REGS_R1_OFFSET,
-                               offsetof(struct kvm_pt_regs, r1));
-       DEFINE(VMM_PT_REGS_R12_OFFSET,
-                               offsetof(struct kvm_pt_regs, r12));
-       DEFINE(VMM_PT_REGS_R13_OFFSET,
-                               offsetof(struct kvm_pt_regs, r13));
-       DEFINE(VMM_PT_REGS_AR_FPSR_OFFSET,
-                               offsetof(struct kvm_pt_regs, ar_fpsr));
-       DEFINE(VMM_PT_REGS_R15_OFFSET,
-                               offsetof(struct kvm_pt_regs, r15));
-       DEFINE(VMM_PT_REGS_R14_OFFSET,
-                               offsetof(struct kvm_pt_regs, r14));
-       DEFINE(VMM_PT_REGS_R2_OFFSET,
-                               offsetof(struct kvm_pt_regs, r2));
-       DEFINE(VMM_PT_REGS_R3_OFFSET,
-                               offsetof(struct kvm_pt_regs, r3));
-       DEFINE(VMM_PT_REGS_R16_OFFSET,
-                               offsetof(struct kvm_pt_regs, r16));
-       DEFINE(VMM_PT_REGS_R17_OFFSET,
-                               offsetof(struct kvm_pt_regs, r17));
-       DEFINE(VMM_PT_REGS_R18_OFFSET,
-                               offsetof(struct kvm_pt_regs, r18));
-       DEFINE(VMM_PT_REGS_R19_OFFSET,
-                               offsetof(struct kvm_pt_regs, r19));
-       DEFINE(VMM_PT_REGS_R20_OFFSET,
-                               offsetof(struct kvm_pt_regs, r20));
-       DEFINE(VMM_PT_REGS_R21_OFFSET,
-                               offsetof(struct kvm_pt_regs, r21));
-       DEFINE(VMM_PT_REGS_R22_OFFSET,
-                               offsetof(struct kvm_pt_regs, r22));
-       DEFINE(VMM_PT_REGS_R23_OFFSET,
-                               offsetof(struct kvm_pt_regs, r23));
-       DEFINE(VMM_PT_REGS_R24_OFFSET,
-                               offsetof(struct kvm_pt_regs, r24));
-       DEFINE(VMM_PT_REGS_R25_OFFSET,
-                               offsetof(struct kvm_pt_regs, r25));
-       DEFINE(VMM_PT_REGS_R26_OFFSET,
-                               offsetof(struct kvm_pt_regs, r26));
-       DEFINE(VMM_PT_REGS_R27_OFFSET,
-                               offsetof(struct kvm_pt_regs, r27));
-       DEFINE(VMM_PT_REGS_R28_OFFSET,
-                               offsetof(struct kvm_pt_regs, r28));
-       DEFINE(VMM_PT_REGS_R29_OFFSET,
-                               offsetof(struct kvm_pt_regs, r29));
-       DEFINE(VMM_PT_REGS_R30_OFFSET,
-                               offsetof(struct kvm_pt_regs, r30));
-       DEFINE(VMM_PT_REGS_R31_OFFSET,
-                               offsetof(struct kvm_pt_regs, r31));
-       DEFINE(VMM_PT_REGS_AR_CCV_OFFSET,
-                               offsetof(struct kvm_pt_regs, ar_ccv));
-       DEFINE(VMM_PT_REGS_F6_OFFSET,
-                               offsetof(struct kvm_pt_regs, f6));
-       DEFINE(VMM_PT_REGS_F7_OFFSET,
-                               offsetof(struct kvm_pt_regs, f7));
-       DEFINE(VMM_PT_REGS_F8_OFFSET,
-                               offsetof(struct kvm_pt_regs, f8));
-       DEFINE(VMM_PT_REGS_F9_OFFSET,
-                               offsetof(struct kvm_pt_regs, f9));
-       DEFINE(VMM_PT_REGS_F10_OFFSET,
-                               offsetof(struct kvm_pt_regs, f10));
-       DEFINE(VMM_PT_REGS_F11_OFFSET,
-                               offsetof(struct kvm_pt_regs, f11));
-       DEFINE(VMM_PT_REGS_R4_OFFSET,
-                               offsetof(struct kvm_pt_regs, r4));
-       DEFINE(VMM_PT_REGS_R5_OFFSET,
-                               offsetof(struct kvm_pt_regs, r5));
-       DEFINE(VMM_PT_REGS_R6_OFFSET,
-                               offsetof(struct kvm_pt_regs, r6));
-       DEFINE(VMM_PT_REGS_R7_OFFSET,
-                               offsetof(struct kvm_pt_regs, r7));
-       DEFINE(VMM_PT_REGS_EML_UNAT_OFFSET,
-                               offsetof(struct kvm_pt_regs, eml_unat));
-       DEFINE(VMM_VCPU_IIPA_OFFSET,
-                               offsetof(struct kvm_vcpu, arch.cr_iipa));
-       DEFINE(VMM_VCPU_OPCODE_OFFSET,
-                               offsetof(struct kvm_vcpu, arch.opcode));
-       DEFINE(VMM_VCPU_CAUSE_OFFSET, offsetof(struct kvm_vcpu, arch.cause));
-       DEFINE(VMM_VCPU_ISR_OFFSET,
-                               offsetof(struct kvm_vcpu, arch.cr_isr));
-       DEFINE(VMM_PT_REGS_R16_SLOT,
-                               (((offsetof(struct kvm_pt_regs, r16)
-                               - sizeof(struct kvm_pt_regs)) >> 3) & 0x3f));
-       DEFINE(VMM_VCPU_MODE_FLAGS_OFFSET,
-                               offsetof(struct kvm_vcpu, arch.mode_flags));
-       DEFINE(VMM_VCPU_GP_OFFSET, offsetof(struct kvm_vcpu, arch.__gp));
-       BLANK();
-
-       DEFINE(VMM_VPD_BASE_OFFSET, offsetof(struct kvm_vcpu, arch.vpd));
-       DEFINE(VMM_VPD_VIFS_OFFSET, offsetof(struct vpd, ifs));
-       DEFINE(VMM_VLSAPIC_INSVC_BASE_OFFSET,
-                       offsetof(struct kvm_vcpu, arch.insvc[0]));
-       DEFINE(VMM_VPD_VPTA_OFFSET, offsetof(struct vpd, pta));
-       DEFINE(VMM_VPD_VPSR_OFFSET, offsetof(struct vpd, vpsr));
-
-       DEFINE(VMM_CTX_R4_OFFSET, offsetof(union context, gr[4]));
-       DEFINE(VMM_CTX_R5_OFFSET, offsetof(union context, gr[5]));
-       DEFINE(VMM_CTX_R12_OFFSET, offsetof(union context, gr[12]));
-       DEFINE(VMM_CTX_R13_OFFSET, offsetof(union context, gr[13]));
-       DEFINE(VMM_CTX_KR0_OFFSET, offsetof(union context, ar[0]));
-       DEFINE(VMM_CTX_KR1_OFFSET, offsetof(union context, ar[1]));
-       DEFINE(VMM_CTX_B0_OFFSET, offsetof(union context, br[0]));
-       DEFINE(VMM_CTX_B1_OFFSET, offsetof(union context, br[1]));
-       DEFINE(VMM_CTX_B2_OFFSET, offsetof(union context, br[2]));
-       DEFINE(VMM_CTX_RR0_OFFSET, offsetof(union context, rr[0]));
-       DEFINE(VMM_CTX_RSC_OFFSET, offsetof(union context, ar[16]));
-       DEFINE(VMM_CTX_BSPSTORE_OFFSET, offsetof(union context, ar[18]));
-       DEFINE(VMM_CTX_RNAT_OFFSET, offsetof(union context, ar[19]));
-       DEFINE(VMM_CTX_FCR_OFFSET, offsetof(union context, ar[21]));
-       DEFINE(VMM_CTX_EFLAG_OFFSET, offsetof(union context, ar[24]));
-       DEFINE(VMM_CTX_CFLG_OFFSET, offsetof(union context, ar[27]));
-       DEFINE(VMM_CTX_FSR_OFFSET, offsetof(union context, ar[28]));
-       DEFINE(VMM_CTX_FIR_OFFSET, offsetof(union context, ar[29]));
-       DEFINE(VMM_CTX_FDR_OFFSET, offsetof(union context, ar[30]));
-       DEFINE(VMM_CTX_UNAT_OFFSET, offsetof(union context, ar[36]));
-       DEFINE(VMM_CTX_FPSR_OFFSET, offsetof(union context, ar[40]));
-       DEFINE(VMM_CTX_PFS_OFFSET, offsetof(union context, ar[64]));
-       DEFINE(VMM_CTX_LC_OFFSET, offsetof(union context, ar[65]));
-       DEFINE(VMM_CTX_DCR_OFFSET, offsetof(union context, cr[0]));
-       DEFINE(VMM_CTX_IVA_OFFSET, offsetof(union context, cr[2]));
-       DEFINE(VMM_CTX_PTA_OFFSET, offsetof(union context, cr[8]));
-       DEFINE(VMM_CTX_IBR0_OFFSET, offsetof(union context, ibr[0]));
-       DEFINE(VMM_CTX_DBR0_OFFSET, offsetof(union context, dbr[0]));
-       DEFINE(VMM_CTX_F2_OFFSET, offsetof(union context, fr[2]));
-       DEFINE(VMM_CTX_F3_OFFSET, offsetof(union context, fr[3]));
-       DEFINE(VMM_CTX_F32_OFFSET, offsetof(union context, fr[32]));
-       DEFINE(VMM_CTX_F33_OFFSET, offsetof(union context, fr[33]));
-       DEFINE(VMM_CTX_PKR0_OFFSET, offsetof(union context, pkr[0]));
-       DEFINE(VMM_CTX_PSR_OFFSET, offsetof(union context, psr));
-       BLANK();
-}
diff --git a/arch/ia64/kvm/irq.h b/arch/ia64/kvm/irq.h
deleted file mode 100644 (file)
index c0785a7..0000000
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * irq.h: In-kernel interrupt controller related definitions
- * Copyright (c) 2008, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- * Authors:
- *   Xiantao Zhang <xiantao.zhang@intel.com>
- *
- */
-
-#ifndef __IRQ_H
-#define __IRQ_H
-
-#include "lapic.h"
-
-static inline int irqchip_in_kernel(struct kvm *kvm)
-{
-       return 1;
-}
-
-#endif
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
deleted file mode 100644 (file)
index dbe46f4..0000000
+++ /dev/null
@@ -1,1942 +0,0 @@
-/*
- * kvm_ia64.c: Basic KVM support On Itanium series processors
- *
- *
- *     Copyright (C) 2007, Intel Corporation.
- *     Xiantao Zhang  (xiantao.zhang@intel.com)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/percpu.h>
-#include <linux/fs.h>
-#include <linux/slab.h>
-#include <linux/smp.h>
-#include <linux/kvm_host.h>
-#include <linux/kvm.h>
-#include <linux/bitops.h>
-#include <linux/hrtimer.h>
-#include <linux/uaccess.h>
-#include <linux/iommu.h>
-#include <linux/intel-iommu.h>
-#include <linux/pci.h>
-
-#include <asm/pgtable.h>
-#include <asm/gcc_intrin.h>
-#include <asm/pal.h>
-#include <asm/cacheflush.h>
-#include <asm/div64.h>
-#include <asm/tlb.h>
-#include <asm/elf.h>
-#include <asm/sn/addrs.h>
-#include <asm/sn/clksupport.h>
-#include <asm/sn/shub_mmr.h>
-
-#include "misc.h"
-#include "vti.h"
-#include "iodev.h"
-#include "ioapic.h"
-#include "lapic.h"
-#include "irq.h"
-
-static unsigned long kvm_vmm_base;
-static unsigned long kvm_vsa_base;
-static unsigned long kvm_vm_buffer;
-static unsigned long kvm_vm_buffer_size;
-unsigned long kvm_vmm_gp;
-
-static long vp_env_info;
-
-static struct kvm_vmm_info *kvm_vmm_info;
-
-static DEFINE_PER_CPU(struct kvm_vcpu *, last_vcpu);
-
-struct kvm_stats_debugfs_item debugfs_entries[] = {
-       { NULL }
-};
-
-static unsigned long kvm_get_itc(struct kvm_vcpu *vcpu)
-{
-#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
-       if (vcpu->kvm->arch.is_sn2)
-               return rtc_time();
-       else
-#endif
-               return ia64_getreg(_IA64_REG_AR_ITC);
-}
-
-static void kvm_flush_icache(unsigned long start, unsigned long len)
-{
-       int l;
-
-       for (l = 0; l < (len + 32); l += 32)
-               ia64_fc((void *)(start + l));
-
-       ia64_sync_i();
-       ia64_srlz_i();
-}
-
-static void kvm_flush_tlb_all(void)
-{
-       unsigned long i, j, count0, count1, stride0, stride1, addr;
-       long flags;
-
-       addr    = local_cpu_data->ptce_base;
-       count0  = local_cpu_data->ptce_count[0];
-       count1  = local_cpu_data->ptce_count[1];
-       stride0 = local_cpu_data->ptce_stride[0];
-       stride1 = local_cpu_data->ptce_stride[1];
-
-       local_irq_save(flags);
-       for (i = 0; i < count0; ++i) {
-               for (j = 0; j < count1; ++j) {
-                       ia64_ptce(addr);
-                       addr += stride1;
-               }
-               addr += stride0;
-       }
-       local_irq_restore(flags);
-       ia64_srlz_i();                  /* srlz.i implies srlz.d */
-}
-
-long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler)
-{
-       struct ia64_pal_retval iprv;
-
-       PAL_CALL_STK(iprv, PAL_VP_CREATE, (u64)vpd, (u64)host_iva,
-                       (u64)opt_handler);
-
-       return iprv.status;
-}
-
-static  DEFINE_SPINLOCK(vp_lock);
-
-int kvm_arch_hardware_enable(void)
-{
-       long  status;
-       long  tmp_base;
-       unsigned long pte;
-       unsigned long saved_psr;
-       int slot;
-
-       pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), PAGE_KERNEL));
-       local_irq_save(saved_psr);
-       slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
-       local_irq_restore(saved_psr);
-       if (slot < 0)
-               return -EINVAL;
-
-       spin_lock(&vp_lock);
-       status = ia64_pal_vp_init_env(kvm_vsa_base ?
-                               VP_INIT_ENV : VP_INIT_ENV_INITALIZE,
-                       __pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, &tmp_base);
-       if (status != 0) {
-               spin_unlock(&vp_lock);
-               printk(KERN_WARNING"kvm: Failed to Enable VT Support!!!!\n");
-               return -EINVAL;
-       }
-
-       if (!kvm_vsa_base) {
-               kvm_vsa_base = tmp_base;
-               printk(KERN_INFO"kvm: kvm_vsa_base:0x%lx\n", kvm_vsa_base);
-       }
-       spin_unlock(&vp_lock);
-       ia64_ptr_entry(0x3, slot);
-
-       return 0;
-}
-
-void kvm_arch_hardware_disable(void)
-{
-
-       long status;
-       int slot;
-       unsigned long pte;
-       unsigned long saved_psr;
-       unsigned long host_iva = ia64_getreg(_IA64_REG_CR_IVA);
-
-       pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base),
-                               PAGE_KERNEL));
-
-       local_irq_save(saved_psr);
-       slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
-       local_irq_restore(saved_psr);
-       if (slot < 0)
-               return;
-
-       status = ia64_pal_vp_exit_env(host_iva);
-       if (status)
-               printk(KERN_DEBUG"kvm: Failed to disable VT support! :%ld\n",
-                               status);
-       ia64_ptr_entry(0x3, slot);
-}
-
-void kvm_arch_check_processor_compat(void *rtn)
-{
-       *(int *)rtn = 0;
-}
-
-int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
-{
-
-       int r;
-
-       switch (ext) {
-       case KVM_CAP_IRQCHIP:
-       case KVM_CAP_MP_STATE:
-       case KVM_CAP_IRQ_INJECT_STATUS:
-       case KVM_CAP_IOAPIC_POLARITY_IGNORED:
-               r = 1;
-               break;
-       case KVM_CAP_COALESCED_MMIO:
-               r = KVM_COALESCED_MMIO_PAGE_OFFSET;
-               break;
-#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
-       case KVM_CAP_IOMMU:
-               r = iommu_present(&pci_bus_type);
-               break;
-#endif
-       default:
-               r = 0;
-       }
-       return r;
-
-}
-
-static int handle_vm_error(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-       kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
-       kvm_run->hw.hardware_exit_reason = 1;
-       return 0;
-}
-
-static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-       struct kvm_mmio_req *p;
-       struct kvm_io_device *mmio_dev;
-       int r;
-
-       p = kvm_get_vcpu_ioreq(vcpu);
-
-       if ((p->addr & PAGE_MASK) == IOAPIC_DEFAULT_BASE_ADDRESS)
-               goto mmio;
-       vcpu->mmio_needed = 1;
-       vcpu->mmio_fragments[0].gpa = kvm_run->mmio.phys_addr = p->addr;
-       vcpu->mmio_fragments[0].len = kvm_run->mmio.len = p->size;
-       vcpu->mmio_is_write = kvm_run->mmio.is_write = !p->dir;
-
-       if (vcpu->mmio_is_write)
-               memcpy(vcpu->arch.mmio_data, &p->data, p->size);
-       memcpy(kvm_run->mmio.data, &p->data, p->size);
-       kvm_run->exit_reason = KVM_EXIT_MMIO;
-       return 0;
-mmio:
-       if (p->dir)
-               r = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, p->addr,
-                                   p->size, &p->data);
-       else
-               r = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, p->addr,
-                                    p->size, &p->data);
-       if (r)
-               printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr);
-       p->state = STATE_IORESP_READY;
-
-       return 1;
-}
-
-static int handle_pal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-       struct exit_ctl_data *p;
-
-       p = kvm_get_exit_data(vcpu);
-
-       if (p->exit_reason == EXIT_REASON_PAL_CALL)
-               return kvm_pal_emul(vcpu, kvm_run);
-       else {
-               kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
-               kvm_run->hw.hardware_exit_reason = 2;
-               return 0;
-       }
-}
-
-static int handle_sal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-       struct exit_ctl_data *p;
-
-       p = kvm_get_exit_data(vcpu);
-
-       if (p->exit_reason == EXIT_REASON_SAL_CALL) {
-               kvm_sal_emul(vcpu);
-               return 1;
-       } else {
-               kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
-               kvm_run->hw.hardware_exit_reason = 3;
-               return 0;
-       }
-
-}
-
-static int __apic_accept_irq(struct kvm_vcpu *vcpu, uint64_t vector)
-{
-       struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
-
-       if (!test_and_set_bit(vector, &vpd->irr[0])) {
-               vcpu->arch.irq_new_pending = 1;
-               kvm_vcpu_kick(vcpu);
-               return 1;
-       }
-       return 0;
-}
-
-/*
- *  offset: address offset to IPI space.
- *  value:  deliver value.
- */
-static void vcpu_deliver_ipi(struct kvm_vcpu *vcpu, uint64_t dm,
-                               uint64_t vector)
-{
-       switch (dm) {
-       case SAPIC_FIXED:
-               break;
-       case SAPIC_NMI:
-               vector = 2;
-               break;
-       case SAPIC_EXTINT:
-               vector = 0;
-               break;
-       case SAPIC_INIT:
-       case SAPIC_PMI:
-       default:
-               printk(KERN_ERR"kvm: Unimplemented Deliver reserved IPI!\n");
-               return;
-       }
-       __apic_accept_irq(vcpu, vector);
-}
-
-static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id,
-                       unsigned long eid)
-{
-       union ia64_lid lid;
-       int i;
-       struct kvm_vcpu *vcpu;
-
-       kvm_for_each_vcpu(i, vcpu, kvm) {
-               lid.val = VCPU_LID(vcpu);
-               if (lid.id == id && lid.eid == eid)
-                       return vcpu;
-       }
-
-       return NULL;
-}
-
-static int handle_ipi(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-       struct exit_ctl_data *p = kvm_get_exit_data(vcpu);
-       struct kvm_vcpu *target_vcpu;
-       struct kvm_pt_regs *regs;
-       union ia64_ipi_a addr = p->u.ipi_data.addr;
-       union ia64_ipi_d data = p->u.ipi_data.data;
-
-       target_vcpu = lid_to_vcpu(vcpu->kvm, addr.id, addr.eid);
-       if (!target_vcpu)
-               return handle_vm_error(vcpu, kvm_run);
-
-       if (!target_vcpu->arch.launched) {
-               regs = vcpu_regs(target_vcpu);
-
-               regs->cr_iip = vcpu->kvm->arch.rdv_sal_data.boot_ip;
-               regs->r1 = vcpu->kvm->arch.rdv_sal_data.boot_gp;
-
-               target_vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
-               if (waitqueue_active(&target_vcpu->wq))
-                       wake_up_interruptible(&target_vcpu->wq);
-       } else {
-               vcpu_deliver_ipi(target_vcpu, data.dm, data.vector);
-               if (target_vcpu != vcpu)
-                       kvm_vcpu_kick(target_vcpu);
-       }
-
-       return 1;
-}
-
-struct call_data {
-       struct kvm_ptc_g ptc_g_data;
-       struct kvm_vcpu *vcpu;
-};
-
-static void vcpu_global_purge(void *info)
-{
-       struct call_data *p = (struct call_data *)info;
-       struct kvm_vcpu *vcpu = p->vcpu;
-
-       if (test_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
-               return;
-
-       set_bit(KVM_REQ_PTC_G, &vcpu->requests);
-       if (vcpu->arch.ptc_g_count < MAX_PTC_G_NUM) {
-               vcpu->arch.ptc_g_data[vcpu->arch.ptc_g_count++] =
-                                                       p->ptc_g_data;
-       } else {
-               clear_bit(KVM_REQ_PTC_G, &vcpu->requests);
-               vcpu->arch.ptc_g_count = 0;
-               set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests);
-       }
-}
-
-static int handle_global_purge(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-       struct exit_ctl_data *p = kvm_get_exit_data(vcpu);
-       struct kvm *kvm = vcpu->kvm;
-       struct call_data call_data;
-       int i;
-       struct kvm_vcpu *vcpui;
-
-       call_data.ptc_g_data = p->u.ptc_g_data;
-
-       kvm_for_each_vcpu(i, vcpui, kvm) {
-               if (vcpui->arch.mp_state == KVM_MP_STATE_UNINITIALIZED ||
-                               vcpu == vcpui)
-                       continue;
-
-               if (waitqueue_active(&vcpui->wq))
-                       wake_up_interruptible(&vcpui->wq);
-
-               if (vcpui->cpu != -1) {
-                       call_data.vcpu = vcpui;
-                       smp_call_function_single(vcpui->cpu,
-                                       vcpu_global_purge, &call_data, 1);
-               } else
-                       printk(KERN_WARNING"kvm: Uninit vcpu received ipi!\n");
-
-       }
-       return 1;
-}
-
-static int handle_switch_rr6(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-       return 1;
-}
-
-static int kvm_sn2_setup_mappings(struct kvm_vcpu *vcpu)
-{
-       unsigned long pte, rtc_phys_addr, map_addr;
-       int slot;
-
-       map_addr = KVM_VMM_BASE + (1UL << KVM_VMM_SHIFT);
-       rtc_phys_addr = LOCAL_MMR_OFFSET | SH_RTC;
-       pte = pte_val(mk_pte_phys(rtc_phys_addr, PAGE_KERNEL_UC));
-       slot = ia64_itr_entry(0x3, map_addr, pte, PAGE_SHIFT);
-       vcpu->arch.sn_rtc_tr_slot = slot;
-       if (slot < 0) {
-               printk(KERN_ERR "Mayday mayday! RTC mapping failed!\n");
-               slot = 0;
-       }
-       return slot;
-}
-
-int kvm_emulate_halt(struct kvm_vcpu *vcpu)
-{
-
-       ktime_t kt;
-       long itc_diff;
-       unsigned long vcpu_now_itc;
-       unsigned long expires;
-       struct hrtimer *p_ht = &vcpu->arch.hlt_timer;
-       unsigned long cyc_per_usec = local_cpu_data->cyc_per_usec;
-       struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
-
-       if (irqchip_in_kernel(vcpu->kvm)) {
-
-               vcpu_now_itc = kvm_get_itc(vcpu) + vcpu->arch.itc_offset;
-
-               if (time_after(vcpu_now_itc, vpd->itm)) {
-                       vcpu->arch.timer_check = 1;
-                       return 1;
-               }
-               itc_diff = vpd->itm - vcpu_now_itc;
-               if (itc_diff < 0)
-                       itc_diff = -itc_diff;
-
-               expires = div64_u64(itc_diff, cyc_per_usec);
-               kt = ktime_set(0, 1000 * expires);
-
-               vcpu->arch.ht_active = 1;
-               hrtimer_start(p_ht, kt, HRTIMER_MODE_ABS);
-
-               vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
-               kvm_vcpu_block(vcpu);
-               hrtimer_cancel(p_ht);
-               vcpu->arch.ht_active = 0;
-
-               if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests) ||
-                               kvm_cpu_has_pending_timer(vcpu))
-                       if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
-                               vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
-
-               if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
-                       return -EINTR;
-               return 1;
-       } else {
-               printk(KERN_ERR"kvm: Unsupported userspace halt!");
-               return 0;
-       }
-}
-
-static int handle_vm_shutdown(struct kvm_vcpu *vcpu,
-               struct kvm_run *kvm_run)
-{
-       kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
-       return 0;
-}
-
-static int handle_external_interrupt(struct kvm_vcpu *vcpu,
-               struct kvm_run *kvm_run)
-{
-       return 1;
-}
-
-static int handle_vcpu_debug(struct kvm_vcpu *vcpu,
-                               struct kvm_run *kvm_run)
-{
-       printk("VMM: %s", vcpu->arch.log_buf);
-       return 1;
-}
-
-static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu,
-               struct kvm_run *kvm_run) = {
-       [EXIT_REASON_VM_PANIC]              = handle_vm_error,
-       [EXIT_REASON_MMIO_INSTRUCTION]      = handle_mmio,
-       [EXIT_REASON_PAL_CALL]              = handle_pal_call,
-       [EXIT_REASON_SAL_CALL]              = handle_sal_call,
-       [EXIT_REASON_SWITCH_RR6]            = handle_switch_rr6,
-       [EXIT_REASON_VM_DESTROY]            = handle_vm_shutdown,
-       [EXIT_REASON_EXTERNAL_INTERRUPT]    = handle_external_interrupt,
-       [EXIT_REASON_IPI]                   = handle_ipi,
-       [EXIT_REASON_PTC_G]                 = handle_global_purge,
-       [EXIT_REASON_DEBUG]                 = handle_vcpu_debug,
-
-};
-
-static const int kvm_vti_max_exit_handlers =
-               sizeof(kvm_vti_exit_handlers)/sizeof(*kvm_vti_exit_handlers);
-
-static uint32_t kvm_get_exit_reason(struct kvm_vcpu *vcpu)
-{
-       struct exit_ctl_data *p_exit_data;
-
-       p_exit_data = kvm_get_exit_data(vcpu);
-       return p_exit_data->exit_reason;
-}
-
-/*
- * The guest has exited.  See if we can fix it or if we need userspace
- * assistance.
- */
-static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
-{
-       u32 exit_reason = kvm_get_exit_reason(vcpu);
-       vcpu->arch.last_exit = exit_reason;
-
-       if (exit_reason < kvm_vti_max_exit_handlers
-                       && kvm_vti_exit_handlers[exit_reason])
-               return kvm_vti_exit_handlers[exit_reason](vcpu, kvm_run);
-       else {
-               kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
-               kvm_run->hw.hardware_exit_reason = exit_reason;
-       }
-       return 0;
-}
-
-static inline void vti_set_rr6(unsigned long rr6)
-{
-       ia64_set_rr(RR6, rr6);
-       ia64_srlz_i();
-}
-
-static int kvm_insert_vmm_mapping(struct kvm_vcpu *vcpu)
-{
-       unsigned long pte;
-       struct kvm *kvm = vcpu->kvm;
-       int r;
-
-       /*Insert a pair of tr to map vmm*/
-       pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), PAGE_KERNEL));
-       r = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
-       if (r < 0)
-               goto out;
-       vcpu->arch.vmm_tr_slot = r;
-       /*Insert a pairt of tr to map data of vm*/
-       pte = pte_val(mk_pte_phys(__pa(kvm->arch.vm_base), PAGE_KERNEL));
-       r = ia64_itr_entry(0x3, KVM_VM_DATA_BASE,
-                                       pte, KVM_VM_DATA_SHIFT);
-       if (r < 0)
-               goto out;
-       vcpu->arch.vm_tr_slot = r;
-
-#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
-       if (kvm->arch.is_sn2) {
-               r = kvm_sn2_setup_mappings(vcpu);
-               if (r < 0)
-                       goto out;
-       }
-#endif
-
-       r = 0;
-out:
-       return r;
-}
-
-static void kvm_purge_vmm_mapping(struct kvm_vcpu *vcpu)
-{
-       struct kvm *kvm = vcpu->kvm;
-       ia64_ptr_entry(0x3, vcpu->arch.vmm_tr_slot);
-       ia64_ptr_entry(0x3, vcpu->arch.vm_tr_slot);
-#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
-       if (kvm->arch.is_sn2)
-               ia64_ptr_entry(0x3, vcpu->arch.sn_rtc_tr_slot);
-#endif
-}
-
-static int kvm_vcpu_pre_transition(struct kvm_vcpu *vcpu)
-{
-       unsigned long psr;
-       int r;
-       int cpu = smp_processor_id();
-
-       if (vcpu->arch.last_run_cpu != cpu ||
-                       per_cpu(last_vcpu, cpu) != vcpu) {
-               per_cpu(last_vcpu, cpu) = vcpu;
-               vcpu->arch.last_run_cpu = cpu;
-               kvm_flush_tlb_all();
-       }
-
-       vcpu->arch.host_rr6 = ia64_get_rr(RR6);
-       vti_set_rr6(vcpu->arch.vmm_rr);
-       local_irq_save(psr);
-       r = kvm_insert_vmm_mapping(vcpu);
-       local_irq_restore(psr);
-       return r;
-}
-
-static void kvm_vcpu_post_transition(struct kvm_vcpu *vcpu)
-{
-       kvm_purge_vmm_mapping(vcpu);
-       vti_set_rr6(vcpu->arch.host_rr6);
-}
-
-static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-       union context *host_ctx, *guest_ctx;
-       int r, idx;
-
-       idx = srcu_read_lock(&vcpu->kvm->srcu);
-
-again:
-       if (signal_pending(current)) {
-               r = -EINTR;
-               kvm_run->exit_reason = KVM_EXIT_INTR;
-               goto out;
-       }
-
-       preempt_disable();
-       local_irq_disable();
-
-       /*Get host and guest context with guest address space.*/
-       host_ctx = kvm_get_host_context(vcpu);
-       guest_ctx = kvm_get_guest_context(vcpu);
-
-       clear_bit(KVM_REQ_KICK, &vcpu->requests);
-
-       r = kvm_vcpu_pre_transition(vcpu);
-       if (r < 0)
-               goto vcpu_run_fail;
-
-       srcu_read_unlock(&vcpu->kvm->srcu, idx);
-       vcpu->mode = IN_GUEST_MODE;
-       kvm_guest_enter();
-
-       /*
-        * Transition to the guest
-        */
-       kvm_vmm_info->tramp_entry(host_ctx, guest_ctx);
-
-       kvm_vcpu_post_transition(vcpu);
-
-       vcpu->arch.launched = 1;
-       set_bit(KVM_REQ_KICK, &vcpu->requests);
-       local_irq_enable();
-
-       /*
-        * We must have an instruction between local_irq_enable() and
-        * kvm_guest_exit(), so the timer interrupt isn't delayed by
-        * the interrupt shadow.  The stat.exits increment will do nicely.
-        * But we need to prevent reordering, hence this barrier():
-        */
-       barrier();
-       kvm_guest_exit();
-       vcpu->mode = OUTSIDE_GUEST_MODE;
-       preempt_enable();
-
-       idx = srcu_read_lock(&vcpu->kvm->srcu);
-
-       r = kvm_handle_exit(kvm_run, vcpu);
-
-       if (r > 0) {
-               if (!need_resched())
-                       goto again;
-       }
-
-out:
-       srcu_read_unlock(&vcpu->kvm->srcu, idx);
-       if (r > 0) {
-               cond_resched();
-               idx = srcu_read_lock(&vcpu->kvm->srcu);
-               goto again;
-       }
-
-       return r;
-
-vcpu_run_fail:
-       local_irq_enable();
-       preempt_enable();
-       kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
-       goto out;
-}
-
-static void kvm_set_mmio_data(struct kvm_vcpu *vcpu)
-{
-       struct kvm_mmio_req *p = kvm_get_vcpu_ioreq(vcpu);
-
-       if (!vcpu->mmio_is_write)
-               memcpy(&p->data, vcpu->arch.mmio_data, 8);
-       p->state = STATE_IORESP_READY;
-}
-
-int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-       int r;
-       sigset_t sigsaved;
-
-       if (vcpu->sigset_active)
-               sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
-
-       if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
-               kvm_vcpu_block(vcpu);
-               clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
-               r = -EAGAIN;
-               goto out;
-       }
-
-       if (vcpu->mmio_needed) {
-               memcpy(vcpu->arch.mmio_data, kvm_run->mmio.data, 8);
-               kvm_set_mmio_data(vcpu);
-               vcpu->mmio_read_completed = 1;
-               vcpu->mmio_needed = 0;
-       }
-       r = __vcpu_run(vcpu, kvm_run);
-out:
-       if (vcpu->sigset_active)
-               sigprocmask(SIG_SETMASK, &sigsaved, NULL);
-
-       return r;
-}
-
-struct kvm *kvm_arch_alloc_vm(void)
-{
-
-       struct kvm *kvm;
-       uint64_t  vm_base;
-
-       BUG_ON(sizeof(struct kvm) > KVM_VM_STRUCT_SIZE);
-
-       vm_base = __get_free_pages(GFP_KERNEL, get_order(KVM_VM_DATA_SIZE));
-
-       if (!vm_base)
-               return NULL;
-
-       memset((void *)vm_base, 0, KVM_VM_DATA_SIZE);
-       kvm = (struct kvm *)(vm_base +
-                       offsetof(struct kvm_vm_data, kvm_vm_struct));
-       kvm->arch.vm_base = vm_base;
-       printk(KERN_DEBUG"kvm: vm's data area:0x%lx\n", vm_base);
-
-       return kvm;
-}
-
-struct kvm_ia64_io_range {
-       unsigned long start;
-       unsigned long size;
-       unsigned long type;
-};
-
-static const struct kvm_ia64_io_range io_ranges[] = {
-       {VGA_IO_START, VGA_IO_SIZE, GPFN_FRAME_BUFFER},
-       {MMIO_START, MMIO_SIZE, GPFN_LOW_MMIO},
-       {LEGACY_IO_START, LEGACY_IO_SIZE, GPFN_LEGACY_IO},
-       {IO_SAPIC_START, IO_SAPIC_SIZE, GPFN_IOSAPIC},
-       {PIB_START, PIB_SIZE, GPFN_PIB},
-};
-
-static void kvm_build_io_pmt(struct kvm *kvm)
-{
-       unsigned long i, j;
-
-       /* Mark I/O ranges */
-       for (i = 0; i < (sizeof(io_ranges) / sizeof(struct kvm_io_range));
-                                                       i++) {
-               for (j = io_ranges[i].start;
-                               j < io_ranges[i].start + io_ranges[i].size;
-                               j += PAGE_SIZE)
-                       kvm_set_pmt_entry(kvm, j >> PAGE_SHIFT,
-                                       io_ranges[i].type, 0);
-       }
-
-}
-
-/*Use unused rids to virtualize guest rid.*/
-#define GUEST_PHYSICAL_RR0     0x1739
-#define GUEST_PHYSICAL_RR4     0x2739
-#define VMM_INIT_RR            0x1660
-
-int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
-{
-       BUG_ON(!kvm);
-
-       if (type)
-               return -EINVAL;
-
-       kvm->arch.is_sn2 = ia64_platform_is("sn2");
-
-       kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0;
-       kvm->arch.metaphysical_rr4 = GUEST_PHYSICAL_RR4;
-       kvm->arch.vmm_init_rr = VMM_INIT_RR;
-
-       /*
-        *Fill P2M entries for MMIO/IO ranges
-        */
-       kvm_build_io_pmt(kvm);
-
-       INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
-
-       /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
-       set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
-
-       return 0;
-}
-
-static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm,
-                                       struct kvm_irqchip *chip)
-{
-       int r;
-
-       r = 0;
-       switch (chip->chip_id) {
-       case KVM_IRQCHIP_IOAPIC:
-               r = kvm_get_ioapic(kvm, &chip->chip.ioapic);
-               break;
-       default:
-               r = -EINVAL;
-               break;
-       }
-       return r;
-}
-
-static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
-{
-       int r;
-
-       r = 0;
-       switch (chip->chip_id) {
-       case KVM_IRQCHIP_IOAPIC:
-               r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
-               break;
-       default:
-               r = -EINVAL;
-               break;
-       }
-       return r;
-}
-
-#define RESTORE_REGS(_x) vcpu->arch._x = regs->_x
-
-int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
-{
-       struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
-       int i;
-
-       for (i = 0; i < 16; i++) {
-               vpd->vgr[i] = regs->vpd.vgr[i];
-               vpd->vbgr[i] = regs->vpd.vbgr[i];
-       }
-       for (i = 0; i < 128; i++)
-               vpd->vcr[i] = regs->vpd.vcr[i];
-       vpd->vhpi = regs->vpd.vhpi;
-       vpd->vnat = regs->vpd.vnat;
-       vpd->vbnat = regs->vpd.vbnat;
-       vpd->vpsr = regs->vpd.vpsr;
-
-       vpd->vpr = regs->vpd.vpr;
-
-       memcpy(&vcpu->arch.guest, &regs->saved_guest, sizeof(union context));
-
-       RESTORE_REGS(mp_state);
-       RESTORE_REGS(vmm_rr);
-       memcpy(vcpu->arch.itrs, regs->itrs, sizeof(struct thash_data) * NITRS);
-       memcpy(vcpu->arch.dtrs, regs->dtrs, sizeof(struct thash_data) * NDTRS);
-       RESTORE_REGS(itr_regions);
-       RESTORE_REGS(dtr_regions);
-       RESTORE_REGS(tc_regions);
-       RESTORE_REGS(irq_check);
-       RESTORE_REGS(itc_check);
-       RESTORE_REGS(timer_check);
-       RESTORE_REGS(timer_pending);
-       RESTORE_REGS(last_itc);
-       for (i = 0; i < 8; i++) {
-               vcpu->arch.vrr[i] = regs->vrr[i];
-               vcpu->arch.ibr[i] = regs->ibr[i];
-               vcpu->arch.dbr[i] = regs->dbr[i];
-       }
-       for (i = 0; i < 4; i++)
-               vcpu->arch.insvc[i] = regs->insvc[i];
-       RESTORE_REGS(xtp);
-       RESTORE_REGS(metaphysical_rr0);
-       RESTORE_REGS(metaphysical_rr4);
-       RESTORE_REGS(metaphysical_saved_rr0);
-       RESTORE_REGS(metaphysical_saved_rr4);
-       RESTORE_REGS(fp_psr);
-       RESTORE_REGS(saved_gp);
-
-       vcpu->arch.irq_new_pending = 1;
-       vcpu->arch.itc_offset = regs->saved_itc - kvm_get_itc(vcpu);
-       set_bit(KVM_REQ_RESUME, &vcpu->requests);
-
-       return 0;
-}
-
-int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
-               bool line_status)
-{
-       if (!irqchip_in_kernel(kvm))
-               return -ENXIO;
-
-       irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
-                                       irq_event->irq, irq_event->level,
-                                       line_status);
-       return 0;
-}
-
-long kvm_arch_vm_ioctl(struct file *filp,
-               unsigned int ioctl, unsigned long arg)
-{
-       struct kvm *kvm = filp->private_data;
-       void __user *argp = (void __user *)arg;
-       int r = -ENOTTY;
-
-       switch (ioctl) {
-       case KVM_CREATE_IRQCHIP:
-               r = -EFAULT;
-               r = kvm_ioapic_init(kvm);
-               if (r)
-                       goto out;
-               r = kvm_setup_default_irq_routing(kvm);
-               if (r) {
-                       mutex_lock(&kvm->slots_lock);
-                       kvm_ioapic_destroy(kvm);
-                       mutex_unlock(&kvm->slots_lock);
-                       goto out;
-               }
-               break;
-       case KVM_GET_IRQCHIP: {
-               /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
-               struct kvm_irqchip chip;
-
-               r = -EFAULT;
-               if (copy_from_user(&chip, argp, sizeof chip))
-                               goto out;
-               r = -ENXIO;
-               if (!irqchip_in_kernel(kvm))
-                       goto out;
-               r = kvm_vm_ioctl_get_irqchip(kvm, &chip);
-               if (r)
-                       goto out;
-               r = -EFAULT;
-               if (copy_to_user(argp, &chip, sizeof chip))
-                               goto out;
-               r = 0;
-               break;
-               }
-       case KVM_SET_IRQCHIP: {
-               /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
-               struct kvm_irqchip chip;
-
-               r = -EFAULT;
-               if (copy_from_user(&chip, argp, sizeof chip))
-                               goto out;
-               r = -ENXIO;
-               if (!irqchip_in_kernel(kvm))
-                       goto out;
-               r = kvm_vm_ioctl_set_irqchip(kvm, &chip);
-               if (r)
-                       goto out;
-               r = 0;
-               break;
-               }
-       default:
-               ;
-       }
-out:
-       return r;
-}
-
-int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
-               struct kvm_sregs *sregs)
-{
-       return -EINVAL;
-}
-
-int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
-               struct kvm_sregs *sregs)
-{
-       return -EINVAL;
-
-}
-int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
-               struct kvm_translation *tr)
-{
-
-       return -EINVAL;
-}
-
-static int kvm_alloc_vmm_area(void)
-{
-       if (!kvm_vmm_base && (kvm_vm_buffer_size < KVM_VM_BUFFER_SIZE)) {
-               kvm_vmm_base = __get_free_pages(GFP_KERNEL,
-                               get_order(KVM_VMM_SIZE));
-               if (!kvm_vmm_base)
-                       return -ENOMEM;
-
-               memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE);
-               kvm_vm_buffer = kvm_vmm_base + VMM_SIZE;
-
-               printk(KERN_DEBUG"kvm:VMM's Base Addr:0x%lx, vm_buffer:0x%lx\n",
-                               kvm_vmm_base, kvm_vm_buffer);
-       }
-
-       return 0;
-}
-
-static void kvm_free_vmm_area(void)
-{
-       if (kvm_vmm_base) {
-               /*Zero this area before free to avoid bits leak!!*/
-               memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE);
-               free_pages(kvm_vmm_base, get_order(KVM_VMM_SIZE));
-               kvm_vmm_base  = 0;
-               kvm_vm_buffer = 0;
-               kvm_vsa_base = 0;
-       }
-}
-
-static int vti_init_vpd(struct kvm_vcpu *vcpu)
-{
-       int i;
-       union cpuid3_t cpuid3;
-       struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
-
-       if (IS_ERR(vpd))
-               return PTR_ERR(vpd);
-
-       /* CPUID init */
-       for (i = 0; i < 5; i++)
-               vpd->vcpuid[i] = ia64_get_cpuid(i);
-
-       /* Limit the CPUID number to 5 */
-       cpuid3.value = vpd->vcpuid[3];
-       cpuid3.number = 4;      /* 5 - 1 */
-       vpd->vcpuid[3] = cpuid3.value;
-
-       /*Set vac and vdc fields*/
-       vpd->vac.a_from_int_cr = 1;
-       vpd->vac.a_to_int_cr = 1;
-       vpd->vac.a_from_psr = 1;
-       vpd->vac.a_from_cpuid = 1;
-       vpd->vac.a_cover = 1;
-       vpd->vac.a_bsw = 1;
-       vpd->vac.a_int = 1;
-       vpd->vdc.d_vmsw = 1;
-
-       /*Set virtual buffer*/
-       vpd->virt_env_vaddr = KVM_VM_BUFFER_BASE;
-
-       return 0;
-}
-
-static int vti_create_vp(struct kvm_vcpu *vcpu)
-{
-       long ret;
-       struct vpd *vpd = vcpu->arch.vpd;
-       unsigned long  vmm_ivt;
-
-       vmm_ivt = kvm_vmm_info->vmm_ivt;
-
-       printk(KERN_DEBUG "kvm: vcpu:%p,ivt: 0x%lx\n", vcpu, vmm_ivt);
-
-       ret = ia64_pal_vp_create((u64 *)vpd, (u64 *)vmm_ivt, 0);
-
-       if (ret) {
-               printk(KERN_ERR"kvm: ia64_pal_vp_create failed!\n");
-               return -EINVAL;
-       }
-       return 0;
-}
-
-static void init_ptce_info(struct kvm_vcpu *vcpu)
-{
-       ia64_ptce_info_t ptce = {0};
-
-       ia64_get_ptce(&ptce);
-       vcpu->arch.ptce_base = ptce.base;
-       vcpu->arch.ptce_count[0] = ptce.count[0];
-       vcpu->arch.ptce_count[1] = ptce.count[1];
-       vcpu->arch.ptce_stride[0] = ptce.stride[0];
-       vcpu->arch.ptce_stride[1] = ptce.stride[1];
-}
-
-static void kvm_migrate_hlt_timer(struct kvm_vcpu *vcpu)
-{
-       struct hrtimer *p_ht = &vcpu->arch.hlt_timer;
-
-       if (hrtimer_cancel(p_ht))
-               hrtimer_start_expires(p_ht, HRTIMER_MODE_ABS);
-}
-
-static enum hrtimer_restart hlt_timer_fn(struct hrtimer *data)
-{
-       struct kvm_vcpu *vcpu;
-       wait_queue_head_t *q;
-
-       vcpu  = container_of(data, struct kvm_vcpu, arch.hlt_timer);
-       q = &vcpu->wq;
-
-       if (vcpu->arch.mp_state != KVM_MP_STATE_HALTED)
-               goto out;
-
-       if (waitqueue_active(q))
-               wake_up_interruptible(q);
-
-out:
-       vcpu->arch.timer_fired = 1;
-       vcpu->arch.timer_check = 1;
-       return HRTIMER_NORESTART;
-}
-
-#define PALE_RESET_ENTRY    0x80000000ffffffb0UL
-
-bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
-{
-       return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL);
-}
-
-int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
-{
-       struct kvm_vcpu *v;
-       int r;
-       int i;
-       long itc_offset;
-       struct kvm *kvm = vcpu->kvm;
-       struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-
-       union context *p_ctx = &vcpu->arch.guest;
-       struct kvm_vcpu *vmm_vcpu = to_guest(vcpu->kvm, vcpu);
-
-       /*Init vcpu context for first run.*/
-       if (IS_ERR(vmm_vcpu))
-               return PTR_ERR(vmm_vcpu);
-
-       if (kvm_vcpu_is_bsp(vcpu)) {
-               vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
-
-               /*Set entry address for first run.*/
-               regs->cr_iip = PALE_RESET_ENTRY;
-
-               /*Initialize itc offset for vcpus*/
-               itc_offset = 0UL - kvm_get_itc(vcpu);
-               for (i = 0; i < KVM_MAX_VCPUS; i++) {
-                       v = (struct kvm_vcpu *)((char *)vcpu +
-                                       sizeof(struct kvm_vcpu_data) * i);
-                       v->arch.itc_offset = itc_offset;
-                       v->arch.last_itc = 0;
-               }
-       } else
-               vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
-
-       r = -ENOMEM;
-       vcpu->arch.apic = kzalloc(sizeof(struct kvm_lapic), GFP_KERNEL);
-       if (!vcpu->arch.apic)
-               goto out;
-       vcpu->arch.apic->vcpu = vcpu;
-
-       p_ctx->gr[1] = 0;
-       p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + KVM_STK_OFFSET);
-       p_ctx->gr[13] = (unsigned long)vmm_vcpu;
-       p_ctx->psr = 0x1008522000UL;
-       p_ctx->ar[40] = FPSR_DEFAULT; /*fpsr*/
-       p_ctx->caller_unat = 0;
-       p_ctx->pr = 0x0;
-       p_ctx->ar[36] = 0x0; /*unat*/
-       p_ctx->ar[19] = 0x0; /*rnat*/
-       p_ctx->ar[18] = (unsigned long)vmm_vcpu +
-                               ((sizeof(struct kvm_vcpu)+15) & ~15);
-       p_ctx->ar[64] = 0x0; /*pfs*/
-       p_ctx->cr[0] = 0x7e04UL;
-       p_ctx->cr[2] = (unsigned long)kvm_vmm_info->vmm_ivt;
-       p_ctx->cr[8] = 0x3c;
-
-       /*Initialize region register*/
-       p_ctx->rr[0] = 0x30;
-       p_ctx->rr[1] = 0x30;
-       p_ctx->rr[2] = 0x30;
-       p_ctx->rr[3] = 0x30;
-       p_ctx->rr[4] = 0x30;
-       p_ctx->rr[5] = 0x30;
-       p_ctx->rr[7] = 0x30;
-
-       /*Initialize branch register 0*/
-       p_ctx->br[0] = *(unsigned long *)kvm_vmm_info->vmm_entry;
-
-       vcpu->arch.vmm_rr = kvm->arch.vmm_init_rr;
-       vcpu->arch.metaphysical_rr0 = kvm->arch.metaphysical_rr0;
-       vcpu->arch.metaphysical_rr4 = kvm->arch.metaphysical_rr4;
-
-       hrtimer_init(&vcpu->arch.hlt_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
-       vcpu->arch.hlt_timer.function = hlt_timer_fn;
-
-       vcpu->arch.last_run_cpu = -1;
-       vcpu->arch.vpd = (struct vpd *)VPD_BASE(vcpu->vcpu_id);
-       vcpu->arch.vsa_base = kvm_vsa_base;
-       vcpu->arch.__gp = kvm_vmm_gp;
-       vcpu->arch.dirty_log_lock_pa = __pa(&kvm->arch.dirty_log_lock);
-       vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_BASE(vcpu->vcpu_id);
-       vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_BASE(vcpu->vcpu_id);
-       init_ptce_info(vcpu);
-
-       r = 0;
-out:
-       return r;
-}
-
-static int vti_vcpu_setup(struct kvm_vcpu *vcpu, int id)
-{
-       unsigned long psr;
-       int r;
-
-       local_irq_save(psr);
-       r = kvm_insert_vmm_mapping(vcpu);
-       local_irq_restore(psr);
-       if (r)
-               goto fail;
-       r = kvm_vcpu_init(vcpu, vcpu->kvm, id);
-       if (r)
-               goto fail;
-
-       r = vti_init_vpd(vcpu);
-       if (r) {
-               printk(KERN_DEBUG"kvm: vpd init error!!\n");
-               goto uninit;
-       }
-
-       r = vti_create_vp(vcpu);
-       if (r)
-               goto uninit;
-
-       kvm_purge_vmm_mapping(vcpu);
-
-       return 0;
-uninit:
-       kvm_vcpu_uninit(vcpu);
-fail:
-       return r;
-}
-
-struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
-               unsigned int id)
-{
-       struct kvm_vcpu *vcpu;
-       unsigned long vm_base = kvm->arch.vm_base;
-       int r;
-       int cpu;
-
-       BUG_ON(sizeof(struct kvm_vcpu) > VCPU_STRUCT_SIZE/2);
-
-       r = -EINVAL;
-       if (id >= KVM_MAX_VCPUS) {
-               printk(KERN_ERR"kvm: Can't configure vcpus > %ld",
-                               KVM_MAX_VCPUS);
-               goto fail;
-       }
-
-       r = -ENOMEM;
-       if (!vm_base) {
-               printk(KERN_ERR"kvm: Create vcpu[%d] error!\n", id);
-               goto fail;
-       }
-       vcpu = (struct kvm_vcpu *)(vm_base + offsetof(struct kvm_vm_data,
-                                       vcpu_data[id].vcpu_struct));
-       vcpu->kvm = kvm;
-
-       cpu = get_cpu();
-       r = vti_vcpu_setup(vcpu, id);
-       put_cpu();
-
-       if (r) {
-               printk(KERN_DEBUG"kvm: vcpu_setup error!!\n");
-               goto fail;
-       }
-
-       return vcpu;
-fail:
-       return ERR_PTR(r);
-}
-
-int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
-{
-       return 0;
-}
-
-int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
-{
-       return 0;
-}
-
-int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
-{
-       return -EINVAL;
-}
-
-int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
-{
-       return -EINVAL;
-}
-
-int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
-                                       struct kvm_guest_debug *dbg)
-{
-       return -EINVAL;
-}
-
-void kvm_arch_free_vm(struct kvm *kvm)
-{
-       unsigned long vm_base = kvm->arch.vm_base;
-
-       if (vm_base) {
-               memset((void *)vm_base, 0, KVM_VM_DATA_SIZE);
-               free_pages(vm_base, get_order(KVM_VM_DATA_SIZE));
-       }
-
-}
-
-static void kvm_release_vm_pages(struct kvm *kvm)
-{
-       struct kvm_memslots *slots;
-       struct kvm_memory_slot *memslot;
-       int j;
-
-       slots = kvm_memslots(kvm);
-       kvm_for_each_memslot(memslot, slots) {
-               for (j = 0; j < memslot->npages; j++) {
-                       if (memslot->rmap[j])
-                               put_page((struct page *)memslot->rmap[j]);
-               }
-       }
-}
-
-void kvm_arch_destroy_vm(struct kvm *kvm)
-{
-       kvm_iommu_unmap_guest(kvm);
-       kvm_free_all_assigned_devices(kvm);
-       kfree(kvm->arch.vioapic);
-       kvm_release_vm_pages(kvm);
-}
-
-void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
-{
-       if (cpu != vcpu->cpu) {
-               vcpu->cpu = cpu;
-               if (vcpu->arch.ht_active)
-                       kvm_migrate_hlt_timer(vcpu);
-       }
-}
-
-#define SAVE_REGS(_x)  regs->_x = vcpu->arch._x
-
-int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
-{
-       struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
-       int i;
-
-       vcpu_load(vcpu);
-
-       for (i = 0; i < 16; i++) {
-               regs->vpd.vgr[i] = vpd->vgr[i];
-               regs->vpd.vbgr[i] = vpd->vbgr[i];
-       }
-       for (i = 0; i < 128; i++)
-               regs->vpd.vcr[i] = vpd->vcr[i];
-       regs->vpd.vhpi = vpd->vhpi;
-       regs->vpd.vnat = vpd->vnat;
-       regs->vpd.vbnat = vpd->vbnat;
-       regs->vpd.vpsr = vpd->vpsr;
-       regs->vpd.vpr = vpd->vpr;
-
-       memcpy(&regs->saved_guest, &vcpu->arch.guest, sizeof(union context));
-
-       SAVE_REGS(mp_state);
-       SAVE_REGS(vmm_rr);
-       memcpy(regs->itrs, vcpu->arch.itrs, sizeof(struct thash_data) * NITRS);
-       memcpy(regs->dtrs, vcpu->arch.dtrs, sizeof(struct thash_data) * NDTRS);
-       SAVE_REGS(itr_regions);
-       SAVE_REGS(dtr_regions);
-       SAVE_REGS(tc_regions);
-       SAVE_REGS(irq_check);
-       SAVE_REGS(itc_check);
-       SAVE_REGS(timer_check);
-       SAVE_REGS(timer_pending);
-       SAVE_REGS(last_itc);
-       for (i = 0; i < 8; i++) {
-               regs->vrr[i] = vcpu->arch.vrr[i];
-               regs->ibr[i] = vcpu->arch.ibr[i];
-               regs->dbr[i] = vcpu->arch.dbr[i];
-       }
-       for (i = 0; i < 4; i++)
-               regs->insvc[i] = vcpu->arch.insvc[i];
-       regs->saved_itc = vcpu->arch.itc_offset + kvm_get_itc(vcpu);
-       SAVE_REGS(xtp);
-       SAVE_REGS(metaphysical_rr0);
-       SAVE_REGS(metaphysical_rr4);
-       SAVE_REGS(metaphysical_saved_rr0);
-       SAVE_REGS(metaphysical_saved_rr4);
-       SAVE_REGS(fp_psr);
-       SAVE_REGS(saved_gp);
-
-       vcpu_put(vcpu);
-       return 0;
-}
-
-int kvm_arch_vcpu_ioctl_get_stack(struct kvm_vcpu *vcpu,
-                                 struct kvm_ia64_vcpu_stack *stack)
-{
-       memcpy(stack, vcpu, sizeof(struct kvm_ia64_vcpu_stack));
-       return 0;
-}
-
-int kvm_arch_vcpu_ioctl_set_stack(struct kvm_vcpu *vcpu,
-                                 struct kvm_ia64_vcpu_stack *stack)
-{
-       memcpy(vcpu + 1, &stack->stack[0] + sizeof(struct kvm_vcpu),
-              sizeof(struct kvm_ia64_vcpu_stack) - sizeof(struct kvm_vcpu));
-
-       vcpu->arch.exit_data = ((struct kvm_vcpu *)stack)->arch.exit_data;
-       return 0;
-}
-
-void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
-{
-
-       hrtimer_cancel(&vcpu->arch.hlt_timer);
-       kfree(vcpu->arch.apic);
-}
-
-long kvm_arch_vcpu_ioctl(struct file *filp,
-                        unsigned int ioctl, unsigned long arg)
-{
-       struct kvm_vcpu *vcpu = filp->private_data;
-       void __user *argp = (void __user *)arg;
-       struct kvm_ia64_vcpu_stack *stack = NULL;
-       long r;
-
-       switch (ioctl) {
-       case KVM_IA64_VCPU_GET_STACK: {
-               struct kvm_ia64_vcpu_stack __user *user_stack;
-               void __user *first_p = argp;
-
-               r = -EFAULT;
-               if (copy_from_user(&user_stack, first_p, sizeof(void *)))
-                       goto out;
-
-               if (!access_ok(VERIFY_WRITE, user_stack,
-                              sizeof(struct kvm_ia64_vcpu_stack))) {
-                       printk(KERN_INFO "KVM_IA64_VCPU_GET_STACK: "
-                              "Illegal user destination address for stack\n");
-                       goto out;
-               }
-               stack = kzalloc(sizeof(struct kvm_ia64_vcpu_stack), GFP_KERNEL);
-               if (!stack) {
-                       r = -ENOMEM;
-                       goto out;
-               }
-
-               r = kvm_arch_vcpu_ioctl_get_stack(vcpu, stack);
-               if (r)
-                       goto out;
-
-               if (copy_to_user(user_stack, stack,
-                                sizeof(struct kvm_ia64_vcpu_stack))) {
-                       r = -EFAULT;
-                       goto out;
-               }
-
-               break;
-       }
-       case KVM_IA64_VCPU_SET_STACK: {
-               struct kvm_ia64_vcpu_stack __user *user_stack;
-               void __user *first_p = argp;
-
-               r = -EFAULT;
-               if (copy_from_user(&user_stack, first_p, sizeof(void *)))
-                       goto out;
-
-               if (!access_ok(VERIFY_READ, user_stack,
-                           sizeof(struct kvm_ia64_vcpu_stack))) {
-                       printk(KERN_INFO "KVM_IA64_VCPU_SET_STACK: "
-                              "Illegal user address for stack\n");
-                       goto out;
-               }
-               stack = kmalloc(sizeof(struct kvm_ia64_vcpu_stack), GFP_KERNEL);
-               if (!stack) {
-                       r = -ENOMEM;
-                       goto out;
-               }
-               if (copy_from_user(stack, user_stack,
-                                  sizeof(struct kvm_ia64_vcpu_stack)))
-                       goto out;
-
-               r = kvm_arch_vcpu_ioctl_set_stack(vcpu, stack);
-               break;
-       }
-
-       default:
-               r = -EINVAL;
-       }
-
-out:
-       kfree(stack);
-       return r;
-}
-
-int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
-{
-       return VM_FAULT_SIGBUS;
-}
-
-int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
-                           unsigned long npages)
-{
-       return 0;
-}
-
-int kvm_arch_prepare_memory_region(struct kvm *kvm,
-               struct kvm_memory_slot *memslot,
-               struct kvm_userspace_memory_region *mem,
-               enum kvm_mr_change change)
-{
-       unsigned long i;
-       unsigned long pfn;
-       int npages = memslot->npages;
-       unsigned long base_gfn = memslot->base_gfn;
-
-       if (base_gfn + npages > (KVM_MAX_MEM_SIZE >> PAGE_SHIFT))
-               return -ENOMEM;
-
-       for (i = 0; i < npages; i++) {
-               pfn = gfn_to_pfn(kvm, base_gfn + i);
-               if (!kvm_is_reserved_pfn(pfn)) {
-                       kvm_set_pmt_entry(kvm, base_gfn + i,
-                                       pfn << PAGE_SHIFT,
-                               _PAGE_AR_RWX | _PAGE_MA_WB);
-                       memslot->rmap[i] = (unsigned long)pfn_to_page(pfn);
-               } else {
-                       kvm_set_pmt_entry(kvm, base_gfn + i,
-                                       GPFN_PHYS_MMIO | (pfn << PAGE_SHIFT),
-                                       _PAGE_MA_UC);
-                       memslot->rmap[i] = 0;
-                       }
-       }
-
-       return 0;
-}
-
-void kvm_arch_flush_shadow_all(struct kvm *kvm)
-{
-       kvm_flush_remote_tlbs(kvm);
-}
-
-void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
-                                  struct kvm_memory_slot *slot)
-{
-       kvm_arch_flush_shadow_all();
-}
-
-long kvm_arch_dev_ioctl(struct file *filp,
-                       unsigned int ioctl, unsigned long arg)
-{
-       return -EINVAL;
-}
-
-void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
-{
-       kvm_vcpu_uninit(vcpu);
-}
-
-static int vti_cpu_has_kvm_support(void)
-{
-       long  avail = 1, status = 1, control = 1;
-       long ret;
-
-       ret = ia64_pal_proc_get_features(&avail, &status, &control, 0);
-       if (ret)
-               goto out;
-
-       if (!(avail & PAL_PROC_VM_BIT))
-               goto out;
-
-       printk(KERN_DEBUG"kvm: Hardware Supports VT\n");
-
-       ret = ia64_pal_vp_env_info(&kvm_vm_buffer_size, &vp_env_info);
-       if (ret)
-               goto out;
-       printk(KERN_DEBUG"kvm: VM Buffer Size:0x%lx\n", kvm_vm_buffer_size);
-
-       if (!(vp_env_info & VP_OPCODE)) {
-               printk(KERN_WARNING"kvm: No opcode ability on hardware, "
-                               "vm_env_info:0x%lx\n", vp_env_info);
-       }
-
-       return 1;
-out:
-       return 0;
-}
-
-
-/*
- * On SN2, the ITC isn't stable, so copy in fast path code to use the
- * SN2 RTC, replacing the ITC based default verion.
- */
-static void kvm_patch_vmm(struct kvm_vmm_info *vmm_info,
-                         struct module *module)
-{
-       unsigned long new_ar, new_ar_sn2;
-       unsigned long module_base;
-
-       if (!ia64_platform_is("sn2"))
-               return;
-
-       module_base = (unsigned long)module->module_core;
-
-       new_ar = kvm_vmm_base + vmm_info->patch_mov_ar - module_base;
-       new_ar_sn2 = kvm_vmm_base + vmm_info->patch_mov_ar_sn2 - module_base;
-
-       printk(KERN_INFO "kvm: Patching ITC emulation to use SGI SN2 RTC "
-              "as source\n");
-
-       /*
-        * Copy the SN2 version of mov_ar into place. They are both
-        * the same size, so 6 bundles is sufficient (6 * 0x10).
-        */
-       memcpy((void *)new_ar, (void *)new_ar_sn2, 0x60);
-}
-
-static int kvm_relocate_vmm(struct kvm_vmm_info *vmm_info,
-                           struct module *module)
-{
-       unsigned long module_base;
-       unsigned long vmm_size;
-
-       unsigned long vmm_offset, func_offset, fdesc_offset;
-       struct fdesc *p_fdesc;
-
-       BUG_ON(!module);
-
-       if (!kvm_vmm_base) {
-               printk("kvm: kvm area hasn't been initialized yet!!\n");
-               return -EFAULT;
-       }
-
-       /*Calculate new position of relocated vmm module.*/
-       module_base = (unsigned long)module->module_core;
-       vmm_size = module->core_size;
-       if (unlikely(vmm_size > KVM_VMM_SIZE))
-               return -EFAULT;
-
-       memcpy((void *)kvm_vmm_base, (void *)module_base, vmm_size);
-       kvm_patch_vmm(vmm_info, module);
-       kvm_flush_icache(kvm_vmm_base, vmm_size);
-
-       /*Recalculate kvm_vmm_info based on new VMM*/
-       vmm_offset = vmm_info->vmm_ivt - module_base;
-       kvm_vmm_info->vmm_ivt = KVM_VMM_BASE + vmm_offset;
-       printk(KERN_DEBUG"kvm: Relocated VMM's IVT Base Addr:%lx\n",
-                       kvm_vmm_info->vmm_ivt);
-
-       fdesc_offset = (unsigned long)vmm_info->vmm_entry - module_base;
-       kvm_vmm_info->vmm_entry = (kvm_vmm_entry *)(KVM_VMM_BASE +
-                                                       fdesc_offset);
-       func_offset = *(unsigned long *)vmm_info->vmm_entry - module_base;
-       p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset);
-       p_fdesc->ip = KVM_VMM_BASE + func_offset;
-       p_fdesc->gp = KVM_VMM_BASE+(p_fdesc->gp - module_base);
-
-       printk(KERN_DEBUG"kvm: Relocated VMM's Init Entry Addr:%lx\n",
-                       KVM_VMM_BASE+func_offset);
-
-       fdesc_offset = (unsigned long)vmm_info->tramp_entry - module_base;
-       kvm_vmm_info->tramp_entry = (kvm_tramp_entry *)(KVM_VMM_BASE +
-                       fdesc_offset);
-       func_offset = *(unsigned long *)vmm_info->tramp_entry - module_base;
-       p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset);
-       p_fdesc->ip = KVM_VMM_BASE + func_offset;
-       p_fdesc->gp = KVM_VMM_BASE + (p_fdesc->gp - module_base);
-
-       kvm_vmm_gp = p_fdesc->gp;
-
-       printk(KERN_DEBUG"kvm: Relocated VMM's Entry IP:%p\n",
-                                               kvm_vmm_info->vmm_entry);
-       printk(KERN_DEBUG"kvm: Relocated VMM's Trampoline Entry IP:0x%lx\n",
-                                               KVM_VMM_BASE + func_offset);
-
-       return 0;
-}
-
-int kvm_arch_init(void *opaque)
-{
-       int r;
-       struct kvm_vmm_info *vmm_info = (struct kvm_vmm_info *)opaque;
-
-       if (!vti_cpu_has_kvm_support()) {
-               printk(KERN_ERR "kvm: No Hardware Virtualization Support!\n");
-               r = -EOPNOTSUPP;
-               goto out;
-       }
-
-       if (kvm_vmm_info) {
-               printk(KERN_ERR "kvm: Already loaded VMM module!\n");
-               r = -EEXIST;
-               goto out;
-       }
-
-       r = -ENOMEM;
-       kvm_vmm_info = kzalloc(sizeof(struct kvm_vmm_info), GFP_KERNEL);
-       if (!kvm_vmm_info)
-               goto out;
-
-       if (kvm_alloc_vmm_area())
-               goto out_free0;
-
-       r = kvm_relocate_vmm(vmm_info, vmm_info->module);
-       if (r)
-               goto out_free1;
-
-       return 0;
-
-out_free1:
-       kvm_free_vmm_area();
-out_free0:
-       kfree(kvm_vmm_info);
-out:
-       return r;
-}
-
-void kvm_arch_exit(void)
-{
-       kvm_free_vmm_area();
-       kfree(kvm_vmm_info);
-       kvm_vmm_info = NULL;
-}
-
-static void kvm_ia64_sync_dirty_log(struct kvm *kvm,
-                                   struct kvm_memory_slot *memslot)
-{
-       int i;
-       long base;
-       unsigned long n;
-       unsigned long *dirty_bitmap = (unsigned long *)(kvm->arch.vm_base +
-                       offsetof(struct kvm_vm_data, kvm_mem_dirty_log));
-
-       n = kvm_dirty_bitmap_bytes(memslot);
-       base = memslot->base_gfn / BITS_PER_LONG;
-
-       spin_lock(&kvm->arch.dirty_log_lock);
-       for (i = 0; i < n/sizeof(long); ++i) {
-               memslot->dirty_bitmap[i] = dirty_bitmap[base + i];
-               dirty_bitmap[base + i] = 0;
-       }
-       spin_unlock(&kvm->arch.dirty_log_lock);
-}
-
-int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
-               struct kvm_dirty_log *log)
-{
-       int r;
-       unsigned long n;
-       struct kvm_memory_slot *memslot;
-       int is_dirty = 0;
-
-       mutex_lock(&kvm->slots_lock);
-
-       r = -EINVAL;
-       if (log->slot >= KVM_USER_MEM_SLOTS)
-               goto out;
-
-       memslot = id_to_memslot(kvm->memslots, log->slot);
-       r = -ENOENT;
-       if (!memslot->dirty_bitmap)
-               goto out;
-
-       kvm_ia64_sync_dirty_log(kvm, memslot);
-       r = kvm_get_dirty_log(kvm, log, &is_dirty);
-       if (r)
-               goto out;
-
-       /* If nothing is dirty, don't bother messing with page tables. */
-       if (is_dirty) {
-               kvm_flush_remote_tlbs(kvm);
-               n = kvm_dirty_bitmap_bytes(memslot);
-               memset(memslot->dirty_bitmap, 0, n);
-       }
-       r = 0;
-out:
-       mutex_unlock(&kvm->slots_lock);
-       return r;
-}
-
-int kvm_arch_hardware_setup(void)
-{
-       return 0;
-}
-
-int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
-{
-       return __apic_accept_irq(vcpu, irq->vector);
-}
-
-int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest)
-{
-       return apic->vcpu->vcpu_id == dest;
-}
-
-int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
-{
-       return 0;
-}
-
-int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
-{
-       return vcpu1->arch.xtp - vcpu2->arch.xtp;
-}
-
-int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
-               int short_hand, int dest, int dest_mode)
-{
-       struct kvm_lapic *target = vcpu->arch.apic;
-       return (dest_mode == 0) ?
-               kvm_apic_match_physical_addr(target, dest) :
-               kvm_apic_match_logical_addr(target, dest);
-}
-
-static int find_highest_bits(int *dat)
-{
-       u32  bits, bitnum;
-       int i;
-
-       /* loop for all 256 bits */
-       for (i = 7; i >= 0 ; i--) {
-               bits = dat[i];
-               if (bits) {
-                       bitnum = fls(bits);
-                       return i * 32 + bitnum - 1;
-               }
-       }
-
-       return -1;
-}
-
-int kvm_highest_pending_irq(struct kvm_vcpu *vcpu)
-{
-    struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
-
-    if (vpd->irr[0] & (1UL << NMI_VECTOR))
-               return NMI_VECTOR;
-    if (vpd->irr[0] & (1UL << ExtINT_VECTOR))
-               return ExtINT_VECTOR;
-
-    return find_highest_bits((int *)&vpd->irr[0]);
-}
-
-int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
-{
-       return vcpu->arch.timer_fired;
-}
-
-int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
-{
-       return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) ||
-               (kvm_highest_pending_irq(vcpu) != -1);
-}
-
-int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
-{
-       return (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests));
-}
-
-int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
-                                   struct kvm_mp_state *mp_state)
-{
-       mp_state->mp_state = vcpu->arch.mp_state;
-       return 0;
-}
-
-static int vcpu_reset(struct kvm_vcpu *vcpu)
-{
-       int r;
-       long psr;
-       local_irq_save(psr);
-       r = kvm_insert_vmm_mapping(vcpu);
-       local_irq_restore(psr);
-       if (r)
-               goto fail;
-
-       vcpu->arch.launched = 0;
-       kvm_arch_vcpu_uninit(vcpu);
-       r = kvm_arch_vcpu_init(vcpu);
-       if (r)
-               goto fail;
-
-       kvm_purge_vmm_mapping(vcpu);
-       r = 0;
-fail:
-       return r;
-}
-
-int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
-                                   struct kvm_mp_state *mp_state)
-{
-       int r = 0;
-
-       vcpu->arch.mp_state = mp_state->mp_state;
-       if (vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)
-               r = vcpu_reset(vcpu);
-       return r;
-}
diff --git a/arch/ia64/kvm/kvm_fw.c b/arch/ia64/kvm/kvm_fw.c
deleted file mode 100644 (file)
index cb548ee..0000000
+++ /dev/null
@@ -1,674 +0,0 @@
-/*
- * PAL/SAL call delegation
- *
- * Copyright (c) 2004 Li Susie <susie.li@intel.com>
- * Copyright (c) 2005 Yu Ke <ke.yu@intel.com>
- * Copyright (c) 2007 Xiantao Zhang <xiantao.zhang@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- */
-
-#include <linux/kvm_host.h>
-#include <linux/smp.h>
-#include <asm/sn/addrs.h>
-#include <asm/sn/clksupport.h>
-#include <asm/sn/shub_mmr.h>
-
-#include "vti.h"
-#include "misc.h"
-
-#include <asm/pal.h>
-#include <asm/sal.h>
-#include <asm/tlb.h>
-
-/*
- * Handy macros to make sure that the PAL return values start out
- * as something meaningful.
- */
-#define INIT_PAL_STATUS_UNIMPLEMENTED(x)               \
-       {                                               \
-               x.status = PAL_STATUS_UNIMPLEMENTED;    \
-               x.v0 = 0;                               \
-               x.v1 = 0;                               \
-               x.v2 = 0;                               \
-       }
-
-#define INIT_PAL_STATUS_SUCCESS(x)                     \
-       {                                               \
-               x.status = PAL_STATUS_SUCCESS;          \
-               x.v0 = 0;                               \
-               x.v1 = 0;                               \
-               x.v2 = 0;                               \
-    }
-
-static void kvm_get_pal_call_data(struct kvm_vcpu *vcpu,
-               u64 *gr28, u64 *gr29, u64 *gr30, u64 *gr31) {
-       struct exit_ctl_data *p;
-
-       if (vcpu) {
-               p = &vcpu->arch.exit_data;
-               if (p->exit_reason == EXIT_REASON_PAL_CALL) {
-                       *gr28 = p->u.pal_data.gr28;
-                       *gr29 = p->u.pal_data.gr29;
-                       *gr30 = p->u.pal_data.gr30;
-                       *gr31 = p->u.pal_data.gr31;
-                       return ;
-               }
-       }
-       printk(KERN_DEBUG"Failed to get vcpu pal data!!!\n");
-}
-
-static void set_pal_result(struct kvm_vcpu *vcpu,
-               struct ia64_pal_retval result) {
-
-       struct exit_ctl_data *p;
-
-       p = kvm_get_exit_data(vcpu);
-       if (p->exit_reason == EXIT_REASON_PAL_CALL) {
-               p->u.pal_data.ret = result;
-               return ;
-       }
-       INIT_PAL_STATUS_UNIMPLEMENTED(p->u.pal_data.ret);
-}
-
-static void set_sal_result(struct kvm_vcpu *vcpu,
-               struct sal_ret_values result) {
-       struct exit_ctl_data *p;
-
-       p = kvm_get_exit_data(vcpu);
-       if (p->exit_reason == EXIT_REASON_SAL_CALL) {
-               p->u.sal_data.ret = result;
-               return ;
-       }
-       printk(KERN_WARNING"Failed to set sal result!!\n");
-}
-
-struct cache_flush_args {
-       u64 cache_type;
-       u64 operation;
-       u64 progress;
-       long status;
-};
-
-cpumask_t cpu_cache_coherent_map;
-
-static void remote_pal_cache_flush(void *data)
-{
-       struct cache_flush_args *args = data;
-       long status;
-       u64 progress = args->progress;
-
-       status = ia64_pal_cache_flush(args->cache_type, args->operation,
-                                       &progress, NULL);
-       if (status != 0)
-       args->status = status;
-}
-
-static struct ia64_pal_retval pal_cache_flush(struct kvm_vcpu *vcpu)
-{
-       u64 gr28, gr29, gr30, gr31;
-       struct ia64_pal_retval result = {0, 0, 0, 0};
-       struct cache_flush_args args = {0, 0, 0, 0};
-       long psr;
-
-       gr28 = gr29 = gr30 = gr31 = 0;
-       kvm_get_pal_call_data(vcpu, &gr28, &gr29, &gr30, &gr31);
-
-       if (gr31 != 0)
-               printk(KERN_ERR"vcpu:%p called cache_flush error!\n", vcpu);
-
-       /* Always call Host Pal in int=1 */
-       gr30 &= ~PAL_CACHE_FLUSH_CHK_INTRS;
-       args.cache_type = gr29;
-       args.operation = gr30;
-       smp_call_function(remote_pal_cache_flush,
-                               (void *)&args, 1);
-       if (args.status != 0)
-               printk(KERN_ERR"pal_cache_flush error!,"
-                               "status:0x%lx\n", args.status);
-       /*
-        * Call Host PAL cache flush
-        * Clear psr.ic when call PAL_CACHE_FLUSH
-        */
-       local_irq_save(psr);
-       result.status = ia64_pal_cache_flush(gr29, gr30, &result.v1,
-                                               &result.v0);
-       local_irq_restore(psr);
-       if (result.status != 0)
-               printk(KERN_ERR"vcpu:%p crashed due to cache_flush err:%ld"
-                               "in1:%lx,in2:%lx\n",
-                               vcpu, result.status, gr29, gr30);
-
-#if 0
-       if (gr29 == PAL_CACHE_TYPE_COHERENT) {
-               cpus_setall(vcpu->arch.cache_coherent_map);
-               cpu_clear(vcpu->cpu, vcpu->arch.cache_coherent_map);
-               cpus_setall(cpu_cache_coherent_map);
-               cpu_clear(vcpu->cpu, cpu_cache_coherent_map);
-       }
-#endif
-       return result;
-}
-
-struct ia64_pal_retval pal_cache_summary(struct kvm_vcpu *vcpu)
-{
-
-       struct ia64_pal_retval result;
-
-       PAL_CALL(result, PAL_CACHE_SUMMARY, 0, 0, 0);
-       return result;
-}
-
-static struct ia64_pal_retval pal_freq_base(struct kvm_vcpu *vcpu)
-{
-
-       struct ia64_pal_retval result;
-
-       PAL_CALL(result, PAL_FREQ_BASE, 0, 0, 0);
-
-       /*
-        * PAL_FREQ_BASE may not be implemented in some platforms,
-        * call SAL instead.
-        */
-       if (result.v0 == 0) {
-               result.status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM,
-                                                       &result.v0,
-                                                       &result.v1);
-               result.v2 = 0;
-       }
-
-       return result;
-}
-
-/*
- * On the SGI SN2, the ITC isn't stable. Emulation backed by the SN2
- * RTC is used instead. This function patches the ratios from SAL
- * to match the RTC before providing them to the guest.
- */
-static void sn2_patch_itc_freq_ratios(struct ia64_pal_retval *result)
-{
-       struct pal_freq_ratio *ratio;
-       unsigned long sal_freq, sal_drift, factor;
-
-       result->status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM,
-                                           &sal_freq, &sal_drift);
-       ratio = (struct pal_freq_ratio *)&result->v2;
-       factor = ((sal_freq * 3) + (sn_rtc_cycles_per_second / 2)) /
-               sn_rtc_cycles_per_second;
-
-       ratio->num = 3;
-       ratio->den = factor;
-}
-
-static struct ia64_pal_retval pal_freq_ratios(struct kvm_vcpu *vcpu)
-{
-       struct ia64_pal_retval result;
-
-       PAL_CALL(result, PAL_FREQ_RATIOS, 0, 0, 0);
-
-       if (vcpu->kvm->arch.is_sn2)
-               sn2_patch_itc_freq_ratios(&result);
-
-       return result;
-}
-
-static struct ia64_pal_retval pal_logical_to_physica(struct kvm_vcpu *vcpu)
-{
-       struct ia64_pal_retval result;
-
-       INIT_PAL_STATUS_UNIMPLEMENTED(result);
-       return result;
-}
-
-static struct ia64_pal_retval pal_platform_addr(struct kvm_vcpu *vcpu)
-{
-
-       struct ia64_pal_retval result;
-
-       INIT_PAL_STATUS_SUCCESS(result);
-       return result;
-}
-
-static struct ia64_pal_retval pal_proc_get_features(struct kvm_vcpu *vcpu)
-{
-
-       struct ia64_pal_retval result = {0, 0, 0, 0};
-       long in0, in1, in2, in3;
-
-       kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
-       result.status = ia64_pal_proc_get_features(&result.v0, &result.v1,
-                       &result.v2, in2);
-
-       return result;
-}
-
-static struct ia64_pal_retval pal_register_info(struct kvm_vcpu *vcpu)
-{
-
-       struct ia64_pal_retval result = {0, 0, 0, 0};
-       long in0, in1, in2, in3;
-
-       kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
-       result.status = ia64_pal_register_info(in1, &result.v1, &result.v2);
-
-       return result;
-}
-
-static struct ia64_pal_retval pal_cache_info(struct kvm_vcpu *vcpu)
-{
-
-       pal_cache_config_info_t ci;
-       long status;
-       unsigned long in0, in1, in2, in3, r9, r10;
-
-       kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
-       status = ia64_pal_cache_config_info(in1, in2, &ci);
-       r9 = ci.pcci_info_1.pcci1_data;
-       r10 = ci.pcci_info_2.pcci2_data;
-       return ((struct ia64_pal_retval){status, r9, r10, 0});
-}
-
-#define GUEST_IMPL_VA_MSB      59
-#define GUEST_RID_BITS         18
-
-static struct ia64_pal_retval pal_vm_summary(struct kvm_vcpu *vcpu)
-{
-
-       pal_vm_info_1_u_t vminfo1;
-       pal_vm_info_2_u_t vminfo2;
-       struct ia64_pal_retval result;
-
-       PAL_CALL(result, PAL_VM_SUMMARY, 0, 0, 0);
-       if (!result.status) {
-               vminfo1.pvi1_val = result.v0;
-               vminfo1.pal_vm_info_1_s.max_itr_entry = 8;
-               vminfo1.pal_vm_info_1_s.max_dtr_entry = 8;
-               result.v0 = vminfo1.pvi1_val;
-               vminfo2.pal_vm_info_2_s.impl_va_msb = GUEST_IMPL_VA_MSB;
-               vminfo2.pal_vm_info_2_s.rid_size = GUEST_RID_BITS;
-               result.v1 = vminfo2.pvi2_val;
-       }
-
-       return result;
-}
-
-static struct ia64_pal_retval pal_vm_info(struct kvm_vcpu *vcpu)
-{
-       struct ia64_pal_retval result;
-       unsigned long in0, in1, in2, in3;
-
-       kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
-
-       result.status = ia64_pal_vm_info(in1, in2,
-                       (pal_tc_info_u_t *)&result.v1, &result.v2);
-
-       return result;
-}
-
-static  u64 kvm_get_pal_call_index(struct kvm_vcpu *vcpu)
-{
-       u64 index = 0;
-       struct exit_ctl_data *p;
-
-       p = kvm_get_exit_data(vcpu);
-       if (p->exit_reason == EXIT_REASON_PAL_CALL)
-               index = p->u.pal_data.gr28;
-
-       return index;
-}
-
-static void prepare_for_halt(struct kvm_vcpu *vcpu)
-{
-       vcpu->arch.timer_pending = 1;
-       vcpu->arch.timer_fired = 0;
-}
-
-static struct ia64_pal_retval pal_perf_mon_info(struct kvm_vcpu *vcpu)
-{
-       long status;
-       unsigned long in0, in1, in2, in3, r9;
-       unsigned long pm_buffer[16];
-
-       kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
-       status = ia64_pal_perf_mon_info(pm_buffer,
-                               (pal_perf_mon_info_u_t *) &r9);
-       if (status != 0) {
-               printk(KERN_DEBUG"PAL_PERF_MON_INFO fails ret=%ld\n", status);
-       } else {
-               if (in1)
-                       memcpy((void *)in1, pm_buffer, sizeof(pm_buffer));
-               else {
-                       status = PAL_STATUS_EINVAL;
-                       printk(KERN_WARNING"Invalid parameters "
-                                               "for PAL call:0x%lx!\n", in0);
-               }
-       }
-       return (struct ia64_pal_retval){status, r9, 0, 0};
-}
-
-static struct ia64_pal_retval pal_halt_info(struct kvm_vcpu *vcpu)
-{
-       unsigned long in0, in1, in2, in3;
-       long status;
-       unsigned long res = 1000UL | (1000UL << 16) | (10UL << 32)
-                                       | (1UL << 61) | (1UL << 60);
-
-       kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
-       if (in1) {
-               memcpy((void *)in1, &res, sizeof(res));
-               status = 0;
-       } else{
-               status = PAL_STATUS_EINVAL;
-               printk(KERN_WARNING"Invalid parameters "
-                                       "for PAL call:0x%lx!\n", in0);
-       }
-
-       return (struct ia64_pal_retval){status, 0, 0, 0};
-}
-
-static struct ia64_pal_retval pal_mem_attrib(struct kvm_vcpu *vcpu)
-{
-       unsigned long r9;
-       long status;
-
-       status = ia64_pal_mem_attrib(&r9);
-
-       return (struct ia64_pal_retval){status, r9, 0, 0};
-}
-
-static void remote_pal_prefetch_visibility(void *v)
-{
-       s64 trans_type = (s64)v;
-       ia64_pal_prefetch_visibility(trans_type);
-}
-
-static struct ia64_pal_retval pal_prefetch_visibility(struct kvm_vcpu *vcpu)
-{
-       struct ia64_pal_retval result = {0, 0, 0, 0};
-       unsigned long in0, in1, in2, in3;
-       kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
-       result.status = ia64_pal_prefetch_visibility(in1);
-       if (result.status == 0) {
-               /* Must be performed on all remote processors
-               in the coherence domain. */
-               smp_call_function(remote_pal_prefetch_visibility,
-                                       (void *)in1, 1);
-               /* Unnecessary on remote processor for other vcpus!*/
-               result.status = 1;
-       }
-       return result;
-}
-
-static void remote_pal_mc_drain(void *v)
-{
-       ia64_pal_mc_drain();
-}
-
-static struct ia64_pal_retval pal_get_brand_info(struct kvm_vcpu *vcpu)
-{
-       struct ia64_pal_retval result = {0, 0, 0, 0};
-       unsigned long in0, in1, in2, in3;
-
-       kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
-
-       if (in1 == 0 && in2) {
-               char brand_info[128];
-               result.status = ia64_pal_get_brand_info(brand_info);
-               if (result.status == PAL_STATUS_SUCCESS)
-                       memcpy((void *)in2, brand_info, 128);
-       } else {
-               result.status = PAL_STATUS_REQUIRES_MEMORY;
-               printk(KERN_WARNING"Invalid parameters for "
-                                       "PAL call:0x%lx!\n", in0);
-       }
-
-       return result;
-}
-
-int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *run)
-{
-
-       u64 gr28;
-       struct ia64_pal_retval result;
-       int ret = 1;
-
-       gr28 = kvm_get_pal_call_index(vcpu);
-       switch (gr28) {
-       case PAL_CACHE_FLUSH:
-               result = pal_cache_flush(vcpu);
-               break;
-       case PAL_MEM_ATTRIB:
-               result = pal_mem_attrib(vcpu);
-               break;
-       case PAL_CACHE_SUMMARY:
-               result = pal_cache_summary(vcpu);
-               break;
-       case PAL_PERF_MON_INFO:
-               result = pal_perf_mon_info(vcpu);
-               break;
-       case PAL_HALT_INFO:
-               result = pal_halt_info(vcpu);
-               break;
-       case PAL_HALT_LIGHT:
-       {
-               INIT_PAL_STATUS_SUCCESS(result);
-               prepare_for_halt(vcpu);
-               if (kvm_highest_pending_irq(vcpu) == -1)
-                       ret = kvm_emulate_halt(vcpu);
-       }
-               break;
-
-       case PAL_PREFETCH_VISIBILITY:
-               result = pal_prefetch_visibility(vcpu);
-               break;
-       case PAL_MC_DRAIN:
-               result.status = ia64_pal_mc_drain();
-               /* FIXME: All vcpus likely call PAL_MC_DRAIN.
-                  That causes the congestion. */
-               smp_call_function(remote_pal_mc_drain, NULL, 1);
-               break;
-
-       case PAL_FREQ_RATIOS:
-               result = pal_freq_ratios(vcpu);
-               break;
-
-       case PAL_FREQ_BASE:
-               result = pal_freq_base(vcpu);
-               break;
-
-       case PAL_LOGICAL_TO_PHYSICAL :
-               result = pal_logical_to_physica(vcpu);
-               break;
-
-       case PAL_VM_SUMMARY :
-               result = pal_vm_summary(vcpu);
-               break;
-
-       case PAL_VM_INFO :
-               result = pal_vm_info(vcpu);
-               break;
-       case PAL_PLATFORM_ADDR :
-               result = pal_platform_addr(vcpu);
-               break;
-       case PAL_CACHE_INFO:
-               result = pal_cache_info(vcpu);
-               break;
-       case PAL_PTCE_INFO:
-               INIT_PAL_STATUS_SUCCESS(result);
-               result.v1 = (1L << 32) | 1L;
-               break;
-       case PAL_REGISTER_INFO:
-               result = pal_register_info(vcpu);
-               break;
-       case PAL_VM_PAGE_SIZE:
-               result.status = ia64_pal_vm_page_size(&result.v0,
-                                                       &result.v1);
-               break;
-       case PAL_RSE_INFO:
-               result.status = ia64_pal_rse_info(&result.v0,
-                                       (pal_hints_u_t *)&result.v1);
-               break;
-       case PAL_PROC_GET_FEATURES:
-               result = pal_proc_get_features(vcpu);
-               break;
-       case PAL_DEBUG_INFO:
-               result.status = ia64_pal_debug_info(&result.v0,
-                                                       &result.v1);
-               break;
-       case PAL_VERSION:
-               result.status = ia64_pal_version(
-                               (pal_version_u_t *)&result.v0,
-                               (pal_version_u_t *)&result.v1);
-               break;
-       case PAL_FIXED_ADDR:
-               result.status = PAL_STATUS_SUCCESS;
-               result.v0 = vcpu->vcpu_id;
-               break;
-       case PAL_BRAND_INFO:
-               result = pal_get_brand_info(vcpu);
-               break;
-       case PAL_GET_PSTATE:
-       case PAL_CACHE_SHARED_INFO:
-               INIT_PAL_STATUS_UNIMPLEMENTED(result);
-               break;
-       default:
-               INIT_PAL_STATUS_UNIMPLEMENTED(result);
-               printk(KERN_WARNING"kvm: Unsupported pal call,"
-                                       " index:0x%lx\n", gr28);
-       }
-       set_pal_result(vcpu, result);
-       return ret;
-}
-
-static struct sal_ret_values sal_emulator(struct kvm *kvm,
-                               long index, unsigned long in1,
-                               unsigned long in2, unsigned long in3,
-                               unsigned long in4, unsigned long in5,
-                               unsigned long in6, unsigned long in7)
-{
-       unsigned long r9  = 0;
-       unsigned long r10 = 0;
-       long r11 = 0;
-       long status;
-
-       status = 0;
-       switch (index) {
-       case SAL_FREQ_BASE:
-               status = ia64_sal_freq_base(in1, &r9, &r10);
-               break;
-       case SAL_PCI_CONFIG_READ:
-               printk(KERN_WARNING"kvm: Not allowed to call here!"
-                       " SAL_PCI_CONFIG_READ\n");
-               break;
-       case SAL_PCI_CONFIG_WRITE:
-               printk(KERN_WARNING"kvm: Not allowed to call here!"
-                       " SAL_PCI_CONFIG_WRITE\n");
-               break;
-       case SAL_SET_VECTORS:
-               if (in1 == SAL_VECTOR_OS_BOOT_RENDEZ) {
-                       if (in4 != 0 || in5 != 0 || in6 != 0 || in7 != 0) {
-                               status = -2;
-                       } else {
-                               kvm->arch.rdv_sal_data.boot_ip = in2;
-                               kvm->arch.rdv_sal_data.boot_gp = in3;
-                       }
-                       printk("Rendvous called! iip:%lx\n\n", in2);
-               } else
-                       printk(KERN_WARNING"kvm: CALLED SAL_SET_VECTORS %lu."
-                                                       "ignored...\n", in1);
-               break;
-       case SAL_GET_STATE_INFO:
-               /* No more info.  */
-               status = -5;
-               r9 = 0;
-               break;
-       case SAL_GET_STATE_INFO_SIZE:
-               /* Return a dummy size.  */
-               status = 0;
-               r9 = 128;
-               break;
-       case SAL_CLEAR_STATE_INFO:
-               /* Noop.  */
-               break;
-       case SAL_MC_RENDEZ:
-               printk(KERN_WARNING
-                       "kvm: called SAL_MC_RENDEZ. ignored...\n");
-               break;
-       case SAL_MC_SET_PARAMS:
-               printk(KERN_WARNING
-                       "kvm: called  SAL_MC_SET_PARAMS.ignored!\n");
-               break;
-       case SAL_CACHE_FLUSH:
-               if (1) {
-                       /*Flush using SAL.
-                       This method is faster but has a side
-                       effect on other vcpu running on
-                       this cpu.  */
-                       status = ia64_sal_cache_flush(in1);
-               } else {
-                       /*Maybe need to implement the method
-                       without side effect!*/
-                       status = 0;
-               }
-               break;
-       case SAL_CACHE_INIT:
-               printk(KERN_WARNING
-                       "kvm: called SAL_CACHE_INIT.  ignored...\n");
-               break;
-       case SAL_UPDATE_PAL:
-               printk(KERN_WARNING
-                       "kvm: CALLED SAL_UPDATE_PAL.  ignored...\n");
-               break;
-       default:
-               printk(KERN_WARNING"kvm: called SAL_CALL with unknown index."
-                                               " index:%ld\n", index);
-               status = -1;
-               break;
-       }
-       return ((struct sal_ret_values) {status, r9, r10, r11});
-}
-
-static void kvm_get_sal_call_data(struct kvm_vcpu *vcpu, u64 *in0, u64 *in1,
-               u64 *in2, u64 *in3, u64 *in4, u64 *in5, u64 *in6, u64 *in7){
-
-       struct exit_ctl_data *p;
-
-       p = kvm_get_exit_data(vcpu);
-
-       if (p->exit_reason == EXIT_REASON_SAL_CALL) {
-               *in0 = p->u.sal_data.in0;
-               *in1 = p->u.sal_data.in1;
-               *in2 = p->u.sal_data.in2;
-               *in3 = p->u.sal_data.in3;
-               *in4 = p->u.sal_data.in4;
-               *in5 = p->u.sal_data.in5;
-               *in6 = p->u.sal_data.in6;
-               *in7 = p->u.sal_data.in7;
-               return ;
-       }
-       *in0 = 0;
-}
-
-void kvm_sal_emul(struct kvm_vcpu *vcpu)
-{
-
-       struct sal_ret_values result;
-       u64 index, in1, in2, in3, in4, in5, in6, in7;
-
-       kvm_get_sal_call_data(vcpu, &index, &in1, &in2,
-                       &in3, &in4, &in5, &in6, &in7);
-       result = sal_emulator(vcpu->kvm, index, in1, in2, in3,
-                                       in4, in5, in6, in7);
-       set_sal_result(vcpu, result);
-}
diff --git a/arch/ia64/kvm/kvm_lib.c b/arch/ia64/kvm/kvm_lib.c
deleted file mode 100644 (file)
index f1268b8..0000000
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * kvm_lib.c: Compile some libraries for kvm-intel module.
- *
- *     Just include kernel's library, and disable symbols export.
- *     Copyright (C) 2008, Intel Corporation.
- *     Xiantao Zhang  (xiantao.zhang@intel.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-#undef CONFIG_MODULES
-#include <linux/module.h>
-#undef CONFIG_KALLSYMS
-#undef EXPORT_SYMBOL
-#undef EXPORT_SYMBOL_GPL
-#define EXPORT_SYMBOL(sym)
-#define EXPORT_SYMBOL_GPL(sym)
-#include "../../../lib/vsprintf.c"
-#include "../../../lib/ctype.c"
diff --git a/arch/ia64/kvm/kvm_minstate.h b/arch/ia64/kvm/kvm_minstate.h
deleted file mode 100644 (file)
index b2bcaa2..0000000
+++ /dev/null
@@ -1,266 +0,0 @@
-/*
- *  kvm_minstate.h: min save macros
- *  Copyright (c) 2007, Intel Corporation.
- *
- *  Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
- *  Xiantao Zhang (xiantao.zhang@intel.com)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-
-#include <asm/asmmacro.h>
-#include <asm/types.h>
-#include <asm/kregs.h>
-#include <asm/kvm_host.h>
-
-#include "asm-offsets.h"
-
-#define KVM_MINSTATE_START_SAVE_MIN                                            \
-       mov ar.rsc = 0;/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */\
-       ;;                                                                      \
-       mov.m r28 = ar.rnat;                                                    \
-       addl r22 = VMM_RBS_OFFSET,r1;            /* compute base of RBS */      \
-       ;;                                                                      \
-       lfetch.fault.excl.nt1 [r22];                                            \
-       addl r1 = KVM_STK_OFFSET-VMM_PT_REGS_SIZE, r1;  \
-       mov r23 = ar.bspstore;                  /* save ar.bspstore */          \
-       ;;                                                                      \
-       mov ar.bspstore = r22;                          /* switch to kernel RBS */\
-       ;;                                                                      \
-       mov r18 = ar.bsp;                                                       \
-       mov ar.rsc = 0x3;     /* set eager mode, pl 0, little-endian, loadrs=0 */
-
-
-
-#define KVM_MINSTATE_END_SAVE_MIN                                              \
-       bsw.1;          /* switch back to bank 1 (must be last in insn group) */\
-       ;;
-
-
-#define PAL_VSA_SYNC_READ                                              \
-       /* begin to call pal vps sync_read */                           \
-{.mii;                                                                 \
-       add r25 = VMM_VPD_BASE_OFFSET, r21;                             \
-       nop 0x0;                                                        \
-       mov r24=ip;                                                     \
-       ;;                                                              \
-}                                                                      \
-{.mmb                                                                  \
-       add r24=0x20, r24;                                              \
-       ld8 r25 = [r25];      /* read vpd base */                       \
-       br.cond.sptk kvm_vps_sync_read;         /*call the service*/    \
-       ;;                                                              \
-};                                                                     \
-
-
-#define KVM_MINSTATE_GET_CURRENT(reg)   mov reg=r21
-
-/*
- * KVM_DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
- * the minimum state necessary that allows us to turn psr.ic back
- * on.
- *
- * Assumed state upon entry:
- *  psr.ic: off
- *  r31:       contains saved predicates (pr)
- *
- * Upon exit, the state is as follows:
- *  psr.ic: off
- *   r2 = points to &pt_regs.r16
- *   r8 = contents of ar.ccv
- *   r9 = contents of ar.csd
- *  r10 = contents of ar.ssd
- *  r11 = FPSR_DEFAULT
- *  r12 = kernel sp (kernel virtual address)
- *  r13 = points to current task_struct (kernel virtual address)
- *  p15 = TRUE if psr.i is set in cr.ipsr
- *  predicate registers (other than p2, p3, and p15), b6, r3, r14, r15:
- *       preserved
- *
- * Note that psr.ic is NOT turned on by this macro.  This is so that
- * we can pass interruption state as arguments to a handler.
- */
-
-
-#define PT(f) (VMM_PT_REGS_##f##_OFFSET)
-
-#define KVM_DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA)                  \
-       KVM_MINSTATE_GET_CURRENT(r16);  /* M (or M;;I) */       \
-       mov r27 = ar.rsc;         /* M */                       \
-       mov r20 = r1;         /* A */                           \
-       mov r25 = ar.unat;        /* M */                       \
-       mov r29 = cr.ipsr;        /* M */                       \
-       mov r26 = ar.pfs;         /* I */                       \
-       mov r18 = cr.isr;                                       \
-       COVER;              /* B;; (or nothing) */              \
-       ;;                                                      \
-       tbit.z p0,p15 = r29,IA64_PSR_I_BIT;                     \
-       mov r1 = r16;                                           \
-/*     mov r21=r16;    */                                      \
-       /* switch from user to kernel RBS: */                   \
-       ;;                                                      \
-       invala;             /* M */                             \
-       SAVE_IFS;                                               \
-       ;;                                                      \
-       KVM_MINSTATE_START_SAVE_MIN                             \
-       adds r17 = 2*L1_CACHE_BYTES,r1;/* cache-line size */    \
-       adds r16 = PT(CR_IPSR),r1;                              \
-       ;;                                                      \
-       lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES;             \
-       st8 [r16] = r29;      /* save cr.ipsr */                \
-       ;;                                                      \
-       lfetch.fault.excl.nt1 [r17];                            \
-       tbit.nz p15,p0 = r29,IA64_PSR_I_BIT;                    \
-       mov r29 = b0                                            \
-       ;;                                                      \
-       adds r16 = PT(R8),r1; /* initialize first base pointer */\
-       adds r17 = PT(R9),r1; /* initialize second base pointer */\
-       ;;                                                      \
-.mem.offset 0,0; st8.spill [r16] = r8,16;                      \
-.mem.offset 8,0; st8.spill [r17] = r9,16;                      \
-       ;;                                                      \
-.mem.offset 0,0; st8.spill [r16] = r10,24;                     \
-.mem.offset 8,0; st8.spill [r17] = r11,24;                     \
-       ;;                                                      \
-       mov r9 = cr.iip;         /* M */                        \
-       mov r10 = ar.fpsr;        /* M */                       \
-       ;;                                                      \
-       st8 [r16] = r9,16;    /* save cr.iip */                 \
-       st8 [r17] = r30,16;   /* save cr.ifs */                 \
-       sub r18 = r18,r22;    /* r18=RSE.ndirty*8 */            \
-       ;;                                                      \
-       st8 [r16] = r25,16;   /* save ar.unat */                \
-       st8 [r17] = r26,16;    /* save ar.pfs */                \
-       shl r18 = r18,16;     /* calu ar.rsc used for "loadrs" */\
-       ;;                                                      \
-       st8 [r16] = r27,16;   /* save ar.rsc */                 \
-       st8 [r17] = r28,16;   /* save ar.rnat */                \
-       ;;          /* avoid RAW on r16 & r17 */                \
-       st8 [r16] = r23,16;   /* save ar.bspstore */            \
-       st8 [r17] = r31,16;   /* save predicates */             \
-       ;;                                                      \
-       st8 [r16] = r29,16;   /* save b0 */                     \
-       st8 [r17] = r18,16;   /* save ar.rsc value for "loadrs" */\
-       ;;                                                      \
-.mem.offset 0,0; st8.spill [r16] = r20,16;/* save original r1 */  \
-.mem.offset 8,0; st8.spill [r17] = r12,16;                     \
-       adds r12 = -16,r1;    /* switch to kernel memory stack */  \
-       ;;                                                      \
-.mem.offset 0,0; st8.spill [r16] = r13,16;                     \
-.mem.offset 8,0; st8.spill [r17] = r10,16;     /* save ar.fpsr */\
-       mov r13 = r21;   /* establish `current' */              \
-       ;;                                                      \
-.mem.offset 0,0; st8.spill [r16] = r15,16;                     \
-.mem.offset 8,0; st8.spill [r17] = r14,16;                     \
-       ;;                                                      \
-.mem.offset 0,0; st8.spill [r16] = r2,16;                      \
-.mem.offset 8,0; st8.spill [r17] = r3,16;                      \
-       adds r2 = VMM_PT_REGS_R16_OFFSET,r1;                    \
-        ;;                                                     \
-       adds r16 = VMM_VCPU_IIPA_OFFSET,r13;                    \
-       adds r17 = VMM_VCPU_ISR_OFFSET,r13;                     \
-       mov r26 = cr.iipa;                                      \
-       mov r27 = cr.isr;                                       \
-       ;;                                                      \
-       st8 [r16] = r26;                                        \
-       st8 [r17] = r27;                                        \
-       ;;                                                      \
-       EXTRA;                                                  \
-       mov r8 = ar.ccv;                                        \
-       mov r9 = ar.csd;                                        \
-       mov r10 = ar.ssd;                                       \
-       movl r11 = FPSR_DEFAULT;   /* L-unit */                 \
-       adds r17 = VMM_VCPU_GP_OFFSET,r13;                      \
-       ;;                                                      \
-       ld8 r1 = [r17];/* establish kernel global pointer */    \
-       ;;                                                      \
-       PAL_VSA_SYNC_READ                                       \
-       KVM_MINSTATE_END_SAVE_MIN
-
-/*
- * SAVE_REST saves the remainder of pt_regs (with psr.ic on).
- *
- * Assumed state upon entry:
- *  psr.ic: on
- *  r2: points to &pt_regs.f6
- *  r3: points to &pt_regs.f7
- *  r8: contents of ar.ccv
- *  r9: contents of ar.csd
- *  r10:       contents of ar.ssd
- *  r11:       FPSR_DEFAULT
- *
- * Registers r14 and r15 are guaranteed not to be touched by SAVE_REST.
- */
-#define KVM_SAVE_REST                          \
-.mem.offset 0,0; st8.spill [r2] = r16,16;      \
-.mem.offset 8,0; st8.spill [r3] = r17,16;      \
-       ;;                              \
-.mem.offset 0,0; st8.spill [r2] = r18,16;      \
-.mem.offset 8,0; st8.spill [r3] = r19,16;      \
-       ;;                              \
-.mem.offset 0,0; st8.spill [r2] = r20,16;      \
-.mem.offset 8,0; st8.spill [r3] = r21,16;      \
-       mov r18=b6;                     \
-       ;;                              \
-.mem.offset 0,0; st8.spill [r2] = r22,16;      \
-.mem.offset 8,0; st8.spill [r3] = r23,16;      \
-       mov r19 = b7;                           \
-       ;;                                      \
-.mem.offset 0,0; st8.spill [r2] = r24,16;      \
-.mem.offset 8,0; st8.spill [r3] = r25,16;      \
-       ;;                                      \
-.mem.offset 0,0; st8.spill [r2] = r26,16;      \
-.mem.offset 8,0; st8.spill [r3] = r27,16;      \
-       ;;                                      \
-.mem.offset 0,0; st8.spill [r2] = r28,16;      \
-.mem.offset 8,0; st8.spill [r3] = r29,16;      \
-       ;;                                      \
-.mem.offset 0,0; st8.spill [r2] = r30,16;      \
-.mem.offset 8,0; st8.spill [r3] = r31,32;      \
-       ;;                                      \
-       mov ar.fpsr = r11;                      \
-       st8 [r2] = r8,8;                        \
-       adds r24 = PT(B6)-PT(F7),r3;            \
-       adds r25 = PT(B7)-PT(F7),r3;            \
-       ;;                                      \
-       st8 [r24] = r18,16;       /* b6 */      \
-       st8 [r25] = r19,16;       /* b7 */      \
-       adds r2 = PT(R4)-PT(F6),r2;             \
-       adds r3 = PT(R5)-PT(F7),r3;             \
-       ;;                                      \
-       st8 [r24] = r9; /* ar.csd */            \
-       st8 [r25] = r10;        /* ar.ssd */    \
-       ;;                                      \
-       mov r18 = ar.unat;                      \
-       adds r19 = PT(EML_UNAT)-PT(R4),r2;      \
-       ;;                                      \
-       st8 [r19] = r18; /* eml_unat */         \
-
-
-#define KVM_SAVE_EXTRA                         \
-.mem.offset 0,0; st8.spill [r2] = r4,16;       \
-.mem.offset 8,0; st8.spill [r3] = r5,16;       \
-       ;;                                      \
-.mem.offset 0,0; st8.spill [r2] = r6,16;       \
-.mem.offset 8,0; st8.spill [r3] = r7;          \
-       ;;                                      \
-       mov r26 = ar.unat;                      \
-       ;;                                      \
-       st8 [r2] = r26;/* eml_unat */           \
-
-#define KVM_SAVE_MIN_WITH_COVER                KVM_DO_SAVE_MIN(cover, mov r30 = cr.ifs,)
-#define KVM_SAVE_MIN_WITH_COVER_R19    KVM_DO_SAVE_MIN(cover, mov r30 = cr.ifs, mov r15 = r19)
-#define KVM_SAVE_MIN                   KVM_DO_SAVE_MIN(     , mov r30 = r0, )
diff --git a/arch/ia64/kvm/lapic.h b/arch/ia64/kvm/lapic.h
deleted file mode 100644 (file)
index c5f92a9..0000000
+++ /dev/null
@@ -1,30 +0,0 @@
-#ifndef __KVM_IA64_LAPIC_H
-#define __KVM_IA64_LAPIC_H
-
-#include <linux/kvm_host.h>
-
-/*
- * vlsapic
- */
-struct kvm_lapic{
-       struct kvm_vcpu *vcpu;
-       uint64_t insvc[4];
-       uint64_t vhpi;
-       uint8_t xtp;
-       uint8_t pal_init_pending;
-       uint8_t pad[2];
-};
-
-int kvm_create_lapic(struct kvm_vcpu *vcpu);
-void kvm_free_lapic(struct kvm_vcpu *vcpu);
-
-int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
-int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
-int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
-               int short_hand, int dest, int dest_mode);
-int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
-int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
-#define kvm_apic_present(x) (true)
-#define kvm_lapic_enabled(x) (true)
-
-#endif
diff --git a/arch/ia64/kvm/memcpy.S b/arch/ia64/kvm/memcpy.S
deleted file mode 100644 (file)
index c04cdbe..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include "../lib/memcpy.S"
diff --git a/arch/ia64/kvm/memset.S b/arch/ia64/kvm/memset.S
deleted file mode 100644 (file)
index 83c3066..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include "../lib/memset.S"
diff --git a/arch/ia64/kvm/misc.h b/arch/ia64/kvm/misc.h
deleted file mode 100644 (file)
index dd979e0..0000000
+++ /dev/null
@@ -1,94 +0,0 @@
-#ifndef __KVM_IA64_MISC_H
-#define __KVM_IA64_MISC_H
-
-#include <linux/kvm_host.h>
-/*
- * misc.h
- *     Copyright (C) 2007, Intel Corporation.
- *     Xiantao Zhang  (xiantao.zhang@intel.com)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-/*
- *Return p2m base address at host side!
- */
-static inline uint64_t *kvm_host_get_pmt(struct kvm *kvm)
-{
-       return (uint64_t *)(kvm->arch.vm_base +
-                               offsetof(struct kvm_vm_data, kvm_p2m));
-}
-
-static inline void kvm_set_pmt_entry(struct kvm *kvm, gfn_t gfn,
-               u64 paddr, u64 mem_flags)
-{
-       uint64_t *pmt_base = kvm_host_get_pmt(kvm);
-       unsigned long pte;
-
-       pte = PAGE_ALIGN(paddr) | mem_flags;
-       pmt_base[gfn] = pte;
-}
-
-/*Function for translating host address to guest address*/
-
-static inline void *to_guest(struct kvm *kvm, void *addr)
-{
-       return (void *)((unsigned long)(addr) - kvm->arch.vm_base +
-                       KVM_VM_DATA_BASE);
-}
-
-/*Function for translating guest address to host address*/
-
-static inline void *to_host(struct kvm *kvm, void *addr)
-{
-       return (void *)((unsigned long)addr - KVM_VM_DATA_BASE
-                       + kvm->arch.vm_base);
-}
-
-/* Get host context of the vcpu */
-static inline union context *kvm_get_host_context(struct kvm_vcpu *vcpu)
-{
-       union context *ctx = &vcpu->arch.host;
-       return to_guest(vcpu->kvm, ctx);
-}
-
-/* Get guest context of the vcpu */
-static inline union context *kvm_get_guest_context(struct kvm_vcpu *vcpu)
-{
-       union context *ctx = &vcpu->arch.guest;
-       return  to_guest(vcpu->kvm, ctx);
-}
-
-/* kvm get exit data from gvmm! */
-static inline struct exit_ctl_data *kvm_get_exit_data(struct kvm_vcpu *vcpu)
-{
-       return &vcpu->arch.exit_data;
-}
-
-/*kvm get vcpu ioreq for kvm module!*/
-static inline struct kvm_mmio_req *kvm_get_vcpu_ioreq(struct kvm_vcpu *vcpu)
-{
-       struct exit_ctl_data *p_ctl_data;
-
-       if (vcpu) {
-               p_ctl_data = kvm_get_exit_data(vcpu);
-               if (p_ctl_data->exit_reason == EXIT_REASON_MMIO_INSTRUCTION)
-                       return &p_ctl_data->u.ioreq;
-       }
-
-       return NULL;
-}
-
-#endif
diff --git a/arch/ia64/kvm/mmio.c b/arch/ia64/kvm/mmio.c
deleted file mode 100644 (file)
index f1e17d3..0000000
+++ /dev/null
@@ -1,336 +0,0 @@
-/*
- * mmio.c: MMIO emulation components.
- * Copyright (c) 2004, Intel Corporation.
- *  Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
- *  Kun Tian (Kevin Tian) (Kevin.tian@intel.com)
- *
- * Copyright (c) 2007 Intel Corporation  KVM support.
- * Xuefei Xu (Anthony Xu) (anthony.xu@intel.com)
- * Xiantao Zhang  (xiantao.zhang@intel.com)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#include <linux/kvm_host.h>
-
-#include "vcpu.h"
-
-static void vlsapic_write_xtp(struct kvm_vcpu *v, uint8_t val)
-{
-       VLSAPIC_XTP(v) = val;
-}
-
-/*
- * LSAPIC OFFSET
- */
-#define PIB_LOW_HALF(ofst)     !(ofst & (1 << 20))
-#define PIB_OFST_INTA          0x1E0000
-#define PIB_OFST_XTP           0x1E0008
-
-/*
- * execute write IPI op.
- */
-static void vlsapic_write_ipi(struct kvm_vcpu *vcpu,
-                                       uint64_t addr, uint64_t data)
-{
-       struct exit_ctl_data *p = &current_vcpu->arch.exit_data;
-       unsigned long psr;
-
-       local_irq_save(psr);
-
-       p->exit_reason = EXIT_REASON_IPI;
-       p->u.ipi_data.addr.val = addr;
-       p->u.ipi_data.data.val = data;
-       vmm_transition(current_vcpu);
-
-       local_irq_restore(psr);
-
-}
-
-void lsapic_write(struct kvm_vcpu *v, unsigned long addr,
-                       unsigned long length, unsigned long val)
-{
-       addr &= (PIB_SIZE - 1);
-
-       switch (addr) {
-       case PIB_OFST_INTA:
-               panic_vm(v, "Undefined write on PIB INTA\n");
-               break;
-       case PIB_OFST_XTP:
-               if (length == 1) {
-                       vlsapic_write_xtp(v, val);
-               } else {
-                       panic_vm(v, "Undefined write on PIB XTP\n");
-               }
-               break;
-       default:
-               if (PIB_LOW_HALF(addr)) {
-                       /*Lower half */
-                       if (length != 8)
-                               panic_vm(v, "Can't LHF write with size %ld!\n",
-                                               length);
-                       else
-                               vlsapic_write_ipi(v, addr, val);
-               } else {   /*Upper half */
-                       panic_vm(v, "IPI-UHF write %lx\n", addr);
-               }
-               break;
-       }
-}
-
-unsigned long lsapic_read(struct kvm_vcpu *v, unsigned long addr,
-               unsigned long length)
-{
-       uint64_t result = 0;
-
-       addr &= (PIB_SIZE - 1);
-
-       switch (addr) {
-       case PIB_OFST_INTA:
-               if (length == 1) /* 1 byte load */
-                       ; /* There is no i8259, there is no INTA access*/
-               else
-                       panic_vm(v, "Undefined read on PIB INTA\n");
-
-               break;
-       case PIB_OFST_XTP:
-               if (length == 1) {
-                       result = VLSAPIC_XTP(v);
-               } else {
-                       panic_vm(v, "Undefined read on PIB XTP\n");
-               }
-               break;
-       default:
-               panic_vm(v, "Undefined addr access for lsapic!\n");
-               break;
-       }
-       return result;
-}
-
-static void mmio_access(struct kvm_vcpu *vcpu, u64 src_pa, u64 *dest,
-                                       u16 s, int ma, int dir)
-{
-       unsigned long iot;
-       struct exit_ctl_data *p = &vcpu->arch.exit_data;
-       unsigned long psr;
-
-       iot = __gpfn_is_io(src_pa >> PAGE_SHIFT);
-
-       local_irq_save(psr);
-
-       /*Intercept the access for PIB range*/
-       if (iot == GPFN_PIB) {
-               if (!dir)
-                       lsapic_write(vcpu, src_pa, s, *dest);
-               else
-                       *dest = lsapic_read(vcpu, src_pa, s);
-               goto out;
-       }
-       p->exit_reason = EXIT_REASON_MMIO_INSTRUCTION;
-       p->u.ioreq.addr = src_pa;
-       p->u.ioreq.size = s;
-       p->u.ioreq.dir = dir;
-       if (dir == IOREQ_WRITE)
-               p->u.ioreq.data = *dest;
-       p->u.ioreq.state = STATE_IOREQ_READY;
-       vmm_transition(vcpu);
-
-       if (p->u.ioreq.state == STATE_IORESP_READY) {
-               if (dir == IOREQ_READ)
-                       /* it's necessary to ensure zero extending */
-                       *dest = p->u.ioreq.data & (~0UL >> (64-(s*8)));
-       } else
-               panic_vm(vcpu, "Unhandled mmio access returned!\n");
-out:
-       local_irq_restore(psr);
-       return ;
-}
-
-/*
-   dir 1: read 0:write
-   inst_type 0:integer 1:floating point
- */
-#define SL_INTEGER     0       /* store/load interger*/
-#define SL_FLOATING    1       /* store/load floating*/
-
-void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma)
-{
-       struct kvm_pt_regs *regs;
-       IA64_BUNDLE bundle;
-       int slot, dir = 0;
-       int inst_type = -1;
-       u16 size = 0;
-       u64 data, slot1a, slot1b, temp, update_reg;
-       s32 imm;
-       INST64 inst;
-
-       regs = vcpu_regs(vcpu);
-
-       if (fetch_code(vcpu, regs->cr_iip, &bundle)) {
-               /* if fetch code fail, return and try again */
-               return;
-       }
-       slot = ((struct ia64_psr *)&(regs->cr_ipsr))->ri;
-       if (!slot)
-               inst.inst = bundle.slot0;
-       else if (slot == 1) {
-               slot1a = bundle.slot1a;
-               slot1b = bundle.slot1b;
-               inst.inst = slot1a + (slot1b << 18);
-       } else if (slot == 2)
-               inst.inst = bundle.slot2;
-
-       /* Integer Load/Store */
-       if (inst.M1.major == 4 && inst.M1.m == 0 && inst.M1.x == 0) {
-               inst_type = SL_INTEGER;
-               size = (inst.M1.x6 & 0x3);
-               if ((inst.M1.x6 >> 2) > 0xb) {
-                       /*write*/
-                       dir = IOREQ_WRITE;
-                       data = vcpu_get_gr(vcpu, inst.M4.r2);
-               } else if ((inst.M1.x6 >> 2) < 0xb) {
-                       /*read*/
-                       dir = IOREQ_READ;
-               }
-       } else if (inst.M2.major == 4 && inst.M2.m == 1 && inst.M2.x == 0) {
-               /* Integer Load + Reg update */
-               inst_type = SL_INTEGER;
-               dir = IOREQ_READ;
-               size = (inst.M2.x6 & 0x3);
-               temp = vcpu_get_gr(vcpu, inst.M2.r3);
-               update_reg = vcpu_get_gr(vcpu, inst.M2.r2);
-               temp += update_reg;
-               vcpu_set_gr(vcpu, inst.M2.r3, temp, 0);
-       } else if (inst.M3.major == 5) {
-               /*Integer Load/Store + Imm update*/
-               inst_type = SL_INTEGER;
-               size = (inst.M3.x6&0x3);
-               if ((inst.M5.x6 >> 2) > 0xb) {
-                       /*write*/
-                       dir = IOREQ_WRITE;
-                       data = vcpu_get_gr(vcpu, inst.M5.r2);
-                       temp = vcpu_get_gr(vcpu, inst.M5.r3);
-                       imm = (inst.M5.s << 31) | (inst.M5.i << 30) |
-                               (inst.M5.imm7 << 23);
-                       temp += imm >> 23;
-                       vcpu_set_gr(vcpu, inst.M5.r3, temp, 0);
-
-               } else if ((inst.M3.x6 >> 2) < 0xb) {
-                       /*read*/
-                       dir = IOREQ_READ;
-                       temp = vcpu_get_gr(vcpu, inst.M3.r3);
-                       imm = (inst.M3.s << 31) | (inst.M3.i << 30) |
-                               (inst.M3.imm7 << 23);
-                       temp += imm >> 23;
-                       vcpu_set_gr(vcpu, inst.M3.r3, temp, 0);
-
-               }
-       } else if (inst.M9.major == 6 && inst.M9.x6 == 0x3B
-                               && inst.M9.m == 0 && inst.M9.x == 0) {
-               /* Floating-point spill*/
-               struct ia64_fpreg v;
-
-               inst_type = SL_FLOATING;
-               dir = IOREQ_WRITE;
-               vcpu_get_fpreg(vcpu, inst.M9.f2, &v);
-               /* Write high word. FIXME: this is a kludge!  */
-               v.u.bits[1] &= 0x3ffff;
-               mmio_access(vcpu, padr + 8, (u64 *)&v.u.bits[1], 8,
-                           ma, IOREQ_WRITE);
-               data = v.u.bits[0];
-               size = 3;
-       } else if (inst.M10.major == 7 && inst.M10.x6 == 0x3B) {
-               /* Floating-point spill + Imm update */
-               struct ia64_fpreg v;
-
-               inst_type = SL_FLOATING;
-               dir = IOREQ_WRITE;
-               vcpu_get_fpreg(vcpu, inst.M10.f2, &v);
-               temp = vcpu_get_gr(vcpu, inst.M10.r3);
-               imm = (inst.M10.s << 31) | (inst.M10.i << 30) |
-                       (inst.M10.imm7 << 23);
-               temp += imm >> 23;
-               vcpu_set_gr(vcpu, inst.M10.r3, temp, 0);
-
-               /* Write high word.FIXME: this is a kludge!  */
-               v.u.bits[1] &= 0x3ffff;
-               mmio_access(vcpu, padr + 8, (u64 *)&v.u.bits[1],
-                           8, ma, IOREQ_WRITE);
-               data = v.u.bits[0];
-               size = 3;
-       } else if (inst.M10.major == 7 && inst.M10.x6 == 0x31) {
-               /* Floating-point stf8 + Imm update */
-               struct ia64_fpreg v;
-               inst_type = SL_FLOATING;
-               dir = IOREQ_WRITE;
-               size = 3;
-               vcpu_get_fpreg(vcpu, inst.M10.f2, &v);
-               data = v.u.bits[0]; /* Significand.  */
-               temp = vcpu_get_gr(vcpu, inst.M10.r3);
-               imm = (inst.M10.s << 31) | (inst.M10.i << 30) |
-                       (inst.M10.imm7 << 23);
-               temp += imm >> 23;
-               vcpu_set_gr(vcpu, inst.M10.r3, temp, 0);
-       } else if (inst.M15.major == 7 && inst.M15.x6 >= 0x2c
-                       && inst.M15.x6 <= 0x2f) {
-               temp = vcpu_get_gr(vcpu, inst.M15.r3);
-               imm = (inst.M15.s << 31) | (inst.M15.i << 30) |
-                       (inst.M15.imm7 << 23);
-               temp += imm >> 23;
-               vcpu_set_gr(vcpu, inst.M15.r3, temp, 0);
-
-               vcpu_increment_iip(vcpu);
-               return;
-       } else if (inst.M12.major == 6 && inst.M12.m == 1
-                       && inst.M12.x == 1 && inst.M12.x6 == 1) {
-               /* Floating-point Load Pair + Imm ldfp8 M12*/
-               struct ia64_fpreg v;
-
-               inst_type = SL_FLOATING;
-               dir = IOREQ_READ;
-               size = 8;     /*ldfd*/
-               mmio_access(vcpu, padr, &data, size, ma, dir);
-               v.u.bits[0] = data;
-               v.u.bits[1] = 0x1003E;
-               vcpu_set_fpreg(vcpu, inst.M12.f1, &v);
-               padr += 8;
-               mmio_access(vcpu, padr, &data, size, ma, dir);
-               v.u.bits[0] = data;
-               v.u.bits[1] = 0x1003E;
-               vcpu_set_fpreg(vcpu, inst.M12.f2, &v);
-               padr += 8;
-               vcpu_set_gr(vcpu, inst.M12.r3, padr, 0);
-               vcpu_increment_iip(vcpu);
-               return;
-       } else {
-               inst_type = -1;
-               panic_vm(vcpu, "Unsupported MMIO access instruction! "
-                               "Bunld[0]=0x%lx, Bundle[1]=0x%lx\n",
-                               bundle.i64[0], bundle.i64[1]);
-       }
-
-       size = 1 << size;
-       if (dir == IOREQ_WRITE) {
-               mmio_access(vcpu, padr, &data, size, ma, dir);
-       } else {
-               mmio_access(vcpu, padr, &data, size, ma, dir);
-               if (inst_type == SL_INTEGER)
-                       vcpu_set_gr(vcpu, inst.M1.r1, data, 0);
-               else
-                       panic_vm(vcpu, "Unsupported instruction type!\n");
-
-       }
-       vcpu_increment_iip(vcpu);
-}
diff --git a/arch/ia64/kvm/optvfault.S b/arch/ia64/kvm/optvfault.S
deleted file mode 100644 (file)
index f793be3..0000000
+++ /dev/null
@@ -1,1090 +0,0 @@
-/*
- * arch/ia64/kvm/optvfault.S
- * optimize virtualization fault handler
- *
- * Copyright (C) 2006 Intel Co
- *     Xuefei Xu (Anthony Xu) <anthony.xu@intel.com>
- * Copyright (C) 2008 Intel Co
- *      Add the support for Tukwila processors.
- *     Xiantao Zhang <xiantao.zhang@intel.com>
- */
-
-#include <asm/asmmacro.h>
-#include <asm/processor.h>
-#include <asm/kvm_host.h>
-
-#include "vti.h"
-#include "asm-offsets.h"
-
-#define ACCE_MOV_FROM_AR
-#define ACCE_MOV_FROM_RR
-#define ACCE_MOV_TO_RR
-#define ACCE_RSM
-#define ACCE_SSM
-#define ACCE_MOV_TO_PSR
-#define ACCE_THASH
-
-#define VMX_VPS_SYNC_READ                      \
-       add r16=VMM_VPD_BASE_OFFSET,r21;        \
-       mov r17 = b0;                           \
-       mov r18 = r24;                          \
-       mov r19 = r25;                          \
-       mov r20 = r31;                          \
-       ;;                                      \
-{.mii;                                         \
-       ld8 r16 = [r16];                        \
-       nop 0x0;                                \
-       mov r24 = ip;                           \
-       ;;                                      \
-};                                             \
-{.mmb;                                         \
-       add r24=0x20, r24;                      \
-       mov r25 =r16;                           \
-       br.sptk.many kvm_vps_sync_read;         \
-};                                             \
-       mov b0 = r17;                           \
-       mov r24 = r18;                          \
-       mov r25 = r19;                          \
-       mov r31 = r20
-
-ENTRY(kvm_vps_entry)
-       adds r29 = VMM_VCPU_VSA_BASE_OFFSET,r21
-       ;;
-       ld8 r29 = [r29]
-       ;;
-       add r29 = r29, r30
-       ;;
-       mov b0 = r29
-       br.sptk.many b0
-END(kvm_vps_entry)
-
-/*
- *     Inputs:
- *     r24 : return address
- *     r25 : vpd
- *     r29 : scratch
- *
- */
-GLOBAL_ENTRY(kvm_vps_sync_read)
-       movl r30 = PAL_VPS_SYNC_READ
-       ;;
-       br.sptk.many kvm_vps_entry
-END(kvm_vps_sync_read)
-
-/*
- *     Inputs:
- *     r24 : return address
- *     r25 : vpd
- *     r29 : scratch
- *
- */
-GLOBAL_ENTRY(kvm_vps_sync_write)
-       movl r30 = PAL_VPS_SYNC_WRITE
-       ;;
-       br.sptk.many kvm_vps_entry
-END(kvm_vps_sync_write)
-
-/*
- *     Inputs:
- *     r23 : pr
- *     r24 : guest b0
- *     r25 : vpd
- *
- */
-GLOBAL_ENTRY(kvm_vps_resume_normal)
-       movl r30 = PAL_VPS_RESUME_NORMAL
-       ;;
-       mov pr=r23,-2
-       br.sptk.many kvm_vps_entry
-END(kvm_vps_resume_normal)
-
-/*
- *     Inputs:
- *     r23 : pr
- *     r24 : guest b0
- *     r25 : vpd
- *     r17 : isr
- */
-GLOBAL_ENTRY(kvm_vps_resume_handler)
-       movl r30 = PAL_VPS_RESUME_HANDLER
-       ;;
-       ld8 r26=[r25]
-       shr r17=r17,IA64_ISR_IR_BIT
-       ;;
-       dep r26=r17,r26,63,1   // bit 63 of r26 indicate whether enable CFLE
-       mov pr=r23,-2
-       br.sptk.many kvm_vps_entry
-END(kvm_vps_resume_handler)
-
-//mov r1=ar3
-GLOBAL_ENTRY(kvm_asm_mov_from_ar)
-#ifndef ACCE_MOV_FROM_AR
-       br.many kvm_virtualization_fault_back
-#endif
-       add r18=VMM_VCPU_ITC_OFS_OFFSET, r21
-       add r16=VMM_VCPU_LAST_ITC_OFFSET,r21
-       extr.u r17=r25,6,7
-       ;;
-       ld8 r18=[r18]
-       mov r19=ar.itc
-       mov r24=b0
-       ;;
-       add r19=r19,r18
-       addl r20=@gprel(asm_mov_to_reg),gp
-       ;;
-       st8 [r16] = r19
-       adds r30=kvm_resume_to_guest-asm_mov_to_reg,r20
-       shladd r17=r17,4,r20
-       ;;
-       mov b0=r17
-       br.sptk.few b0
-       ;;
-END(kvm_asm_mov_from_ar)
-
-/*
- * Special SGI SN2 optimized version of mov_from_ar using the SN2 RTC
- * clock as it's source for emulating the ITC. This version will be
- * copied on top of the original version if the host is determined to
- * be an SN2.
- */
-GLOBAL_ENTRY(kvm_asm_mov_from_ar_sn2)
-       add r18=VMM_VCPU_ITC_OFS_OFFSET, r21
-       movl r19 = (KVM_VMM_BASE+(1<<KVM_VMM_SHIFT))
-
-       add r16=VMM_VCPU_LAST_ITC_OFFSET,r21
-       extr.u r17=r25,6,7
-       mov r24=b0
-       ;;
-       ld8 r18=[r18]
-       ld8 r19=[r19]
-       addl r20=@gprel(asm_mov_to_reg),gp
-       ;;
-       add r19=r19,r18
-       shladd r17=r17,4,r20
-       ;;
-       adds r30=kvm_resume_to_guest-asm_mov_to_reg,r20
-       st8 [r16] = r19
-       mov b0=r17
-       br.sptk.few b0
-       ;;
-END(kvm_asm_mov_from_ar_sn2)
-
-
-
-// mov r1=rr[r3]
-GLOBAL_ENTRY(kvm_asm_mov_from_rr)
-#ifndef ACCE_MOV_FROM_RR
-       br.many kvm_virtualization_fault_back
-#endif
-       extr.u r16=r25,20,7
-       extr.u r17=r25,6,7
-       addl r20=@gprel(asm_mov_from_reg),gp
-       ;;
-       adds r30=kvm_asm_mov_from_rr_back_1-asm_mov_from_reg,r20
-       shladd r16=r16,4,r20
-       mov r24=b0
-       ;;
-       add r27=VMM_VCPU_VRR0_OFFSET,r21
-       mov b0=r16
-       br.many b0
-       ;;
-kvm_asm_mov_from_rr_back_1:
-       adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20
-       adds r22=asm_mov_to_reg-asm_mov_from_reg,r20
-       shr.u r26=r19,61
-       ;;
-       shladd r17=r17,4,r22
-       shladd r27=r26,3,r27
-       ;;
-       ld8 r19=[r27]
-       mov b0=r17
-       br.many b0
-END(kvm_asm_mov_from_rr)
-
-
-// mov rr[r3]=r2
-GLOBAL_ENTRY(kvm_asm_mov_to_rr)
-#ifndef ACCE_MOV_TO_RR
-       br.many kvm_virtualization_fault_back
-#endif
-       extr.u r16=r25,20,7
-       extr.u r17=r25,13,7
-       addl r20=@gprel(asm_mov_from_reg),gp
-       ;;
-       adds r30=kvm_asm_mov_to_rr_back_1-asm_mov_from_reg,r20
-       shladd r16=r16,4,r20
-       mov r22=b0
-       ;;
-       add r27=VMM_VCPU_VRR0_OFFSET,r21
-       mov b0=r16
-       br.many b0
-       ;;
-kvm_asm_mov_to_rr_back_1:
-       adds r30=kvm_asm_mov_to_rr_back_2-asm_mov_from_reg,r20
-       shr.u r23=r19,61
-       shladd r17=r17,4,r20
-       ;;
-       //if rr6, go back
-       cmp.eq p6,p0=6,r23
-       mov b0=r22
-       (p6) br.cond.dpnt.many kvm_virtualization_fault_back
-       ;;
-       mov r28=r19
-       mov b0=r17
-       br.many b0
-kvm_asm_mov_to_rr_back_2:
-       adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20
-       shladd r27=r23,3,r27
-       ;; // vrr.rid<<4 |0xe
-       st8 [r27]=r19
-       mov b0=r30
-       ;;
-       extr.u r16=r19,8,26
-       extr.u r18 =r19,2,6
-       mov r17 =0xe
-       ;;
-       shladd r16 = r16, 4, r17
-       extr.u r19 =r19,0,8
-       ;;
-       shl r16 = r16,8
-       ;;
-       add r19 = r19, r16
-       ;; //set ve 1
-       dep r19=-1,r19,0,1
-       cmp.lt p6,p0=14,r18
-       ;;
-       (p6) mov r18=14
-       ;;
-       (p6) dep r19=r18,r19,2,6
-       ;;
-       cmp.eq p6,p0=0,r23
-       ;;
-       cmp.eq.or p6,p0=4,r23
-       ;;
-       adds r16=VMM_VCPU_MODE_FLAGS_OFFSET,r21
-       (p6) adds r17=VMM_VCPU_META_SAVED_RR0_OFFSET,r21
-       ;;
-       ld4 r16=[r16]
-       cmp.eq p7,p0=r0,r0
-       (p6) shladd r17=r23,1,r17
-       ;;
-       (p6) st8 [r17]=r19
-       (p6) tbit.nz p6,p7=r16,0
-       ;;
-       (p7) mov rr[r28]=r19
-       mov r24=r22
-       br.many b0
-END(kvm_asm_mov_to_rr)
-
-
-//rsm
-GLOBAL_ENTRY(kvm_asm_rsm)
-#ifndef ACCE_RSM
-       br.many kvm_virtualization_fault_back
-#endif
-       VMX_VPS_SYNC_READ
-       ;;
-       extr.u r26=r25,6,21
-       extr.u r27=r25,31,2
-       ;;
-       extr.u r28=r25,36,1
-       dep r26=r27,r26,21,2
-       ;;
-       add r17=VPD_VPSR_START_OFFSET,r16
-       add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21
-       //r26 is imm24
-       dep r26=r28,r26,23,1
-       ;;
-       ld8 r18=[r17]
-       movl r28=IA64_PSR_IC+IA64_PSR_I+IA64_PSR_DT+IA64_PSR_SI
-       ld4 r23=[r22]
-       sub r27=-1,r26
-       mov r24=b0
-       ;;
-       mov r20=cr.ipsr
-       or r28=r27,r28
-       and r19=r18,r27
-       ;;
-       st8 [r17]=r19
-       and r20=r20,r28
-       /* Comment it out due to short of fp lazy alorgithm support
-       adds r27=IA64_VCPU_FP_PSR_OFFSET,r21
-       ;;
-       ld8 r27=[r27]
-       ;;
-       tbit.nz p8,p0= r27,IA64_PSR_DFH_BIT
-       ;;
-       (p8) dep r20=-1,r20,IA64_PSR_DFH_BIT,1
-       */
-       ;;
-       mov cr.ipsr=r20
-       tbit.nz p6,p0=r23,0
-       ;;
-       tbit.z.or p6,p0=r26,IA64_PSR_DT_BIT
-       (p6) br.dptk kvm_resume_to_guest_with_sync
-       ;;
-       add r26=VMM_VCPU_META_RR0_OFFSET,r21
-       add r27=VMM_VCPU_META_RR0_OFFSET+8,r21
-       dep r23=-1,r23,0,1
-       ;;
-       ld8 r26=[r26]
-       ld8 r27=[r27]
-       st4 [r22]=r23
-       dep.z r28=4,61,3
-       ;;
-       mov rr[r0]=r26
-       ;;
-       mov rr[r28]=r27
-       ;;
-       srlz.d
-       br.many kvm_resume_to_guest_with_sync
-END(kvm_asm_rsm)
-
-
-//ssm
-GLOBAL_ENTRY(kvm_asm_ssm)
-#ifndef ACCE_SSM
-       br.many kvm_virtualization_fault_back
-#endif
-       VMX_VPS_SYNC_READ
-       ;;
-       extr.u r26=r25,6,21
-       extr.u r27=r25,31,2
-       ;;
-       extr.u r28=r25,36,1
-       dep r26=r27,r26,21,2
-       ;;  //r26 is imm24
-       add r27=VPD_VPSR_START_OFFSET,r16
-       dep r26=r28,r26,23,1
-       ;;  //r19 vpsr
-       ld8 r29=[r27]
-       mov r24=b0
-       ;;
-       add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21
-       mov r20=cr.ipsr
-       or r19=r29,r26
-       ;;
-       ld4 r23=[r22]
-       st8 [r27]=r19
-       or r20=r20,r26
-       ;;
-       mov cr.ipsr=r20
-       movl r28=IA64_PSR_DT+IA64_PSR_RT+IA64_PSR_IT
-       ;;
-       and r19=r28,r19
-       tbit.z p6,p0=r23,0
-       ;;
-       cmp.ne.or p6,p0=r28,r19
-       (p6) br.dptk kvm_asm_ssm_1
-       ;;
-       add r26=VMM_VCPU_META_SAVED_RR0_OFFSET,r21
-       add r27=VMM_VCPU_META_SAVED_RR0_OFFSET+8,r21
-       dep r23=0,r23,0,1
-       ;;
-       ld8 r26=[r26]
-       ld8 r27=[r27]
-       st4 [r22]=r23
-       dep.z r28=4,61,3
-       ;;
-       mov rr[r0]=r26
-       ;;
-       mov rr[r28]=r27
-       ;;
-       srlz.d
-       ;;
-kvm_asm_ssm_1:
-       tbit.nz p6,p0=r29,IA64_PSR_I_BIT
-       ;;
-       tbit.z.or p6,p0=r19,IA64_PSR_I_BIT
-       (p6) br.dptk kvm_resume_to_guest_with_sync
-       ;;
-       add r29=VPD_VTPR_START_OFFSET,r16
-       add r30=VPD_VHPI_START_OFFSET,r16
-       ;;
-       ld8 r29=[r29]
-       ld8 r30=[r30]
-       ;;
-       extr.u r17=r29,4,4
-       extr.u r18=r29,16,1
-       ;;
-       dep r17=r18,r17,4,1
-       ;;
-       cmp.gt p6,p0=r30,r17
-       (p6) br.dpnt.few kvm_asm_dispatch_vexirq
-       br.many kvm_resume_to_guest_with_sync
-END(kvm_asm_ssm)
-
-
-//mov psr.l=r2
-GLOBAL_ENTRY(kvm_asm_mov_to_psr)
-#ifndef ACCE_MOV_TO_PSR
-       br.many kvm_virtualization_fault_back
-#endif
-       VMX_VPS_SYNC_READ
-       ;;
-       extr.u r26=r25,13,7 //r2
-       addl r20=@gprel(asm_mov_from_reg),gp
-       ;;
-       adds r30=kvm_asm_mov_to_psr_back-asm_mov_from_reg,r20
-       shladd r26=r26,4,r20
-       mov r24=b0
-       ;;
-       add r27=VPD_VPSR_START_OFFSET,r16
-       mov b0=r26
-       br.many b0
-       ;;
-kvm_asm_mov_to_psr_back:
-       ld8 r17=[r27]
-       add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21
-       dep r19=0,r19,32,32
-       ;;
-       ld4 r23=[r22]
-       dep r18=0,r17,0,32
-       ;;
-       add r30=r18,r19
-       movl r28=IA64_PSR_DT+IA64_PSR_RT+IA64_PSR_IT
-       ;;
-       st8 [r27]=r30
-       and r27=r28,r30
-       and r29=r28,r17
-       ;;
-       cmp.eq p5,p0=r29,r27
-       cmp.eq p6,p7=r28,r27
-       (p5) br.many kvm_asm_mov_to_psr_1
-       ;;
-       //virtual to physical
-       (p7) add r26=VMM_VCPU_META_RR0_OFFSET,r21
-       (p7) add r27=VMM_VCPU_META_RR0_OFFSET+8,r21
-       (p7) dep r23=-1,r23,0,1
-       ;;
-       //physical to virtual
-       (p6) add r26=VMM_VCPU_META_SAVED_RR0_OFFSET,r21
-       (p6) add r27=VMM_VCPU_META_SAVED_RR0_OFFSET+8,r21
-       (p6) dep r23=0,r23,0,1
-       ;;
-       ld8 r26=[r26]
-       ld8 r27=[r27]
-       st4 [r22]=r23
-       dep.z r28=4,61,3
-       ;;
-       mov rr[r0]=r26
-       ;;
-       mov rr[r28]=r27
-       ;;
-       srlz.d
-       ;;
-kvm_asm_mov_to_psr_1:
-       mov r20=cr.ipsr
-       movl r28=IA64_PSR_IC+IA64_PSR_I+IA64_PSR_DT+IA64_PSR_SI+IA64_PSR_RT
-       ;;
-       or r19=r19,r28
-       dep r20=0,r20,0,32
-       ;;
-       add r20=r19,r20
-       mov b0=r24
-       ;;
-       /* Comment it out due to short of fp lazy algorithm support
-       adds r27=IA64_VCPU_FP_PSR_OFFSET,r21
-       ;;
-       ld8 r27=[r27]
-       ;;
-       tbit.nz p8,p0=r27,IA64_PSR_DFH_BIT
-       ;;
-       (p8) dep r20=-1,r20,IA64_PSR_DFH_BIT,1
-       ;;
-       */
-       mov cr.ipsr=r20
-       cmp.ne p6,p0=r0,r0
-       ;;
-       tbit.nz.or p6,p0=r17,IA64_PSR_I_BIT
-       tbit.z.or p6,p0=r30,IA64_PSR_I_BIT
-       (p6) br.dpnt.few kvm_resume_to_guest_with_sync
-       ;;
-       add r29=VPD_VTPR_START_OFFSET,r16
-       add r30=VPD_VHPI_START_OFFSET,r16
-       ;;
-       ld8 r29=[r29]
-       ld8 r30=[r30]
-       ;;
-       extr.u r17=r29,4,4
-       extr.u r18=r29,16,1
-       ;;
-       dep r17=r18,r17,4,1
-       ;;
-       cmp.gt p6,p0=r30,r17
-       (p6) br.dpnt.few kvm_asm_dispatch_vexirq
-       br.many kvm_resume_to_guest_with_sync
-END(kvm_asm_mov_to_psr)
-
-
-ENTRY(kvm_asm_dispatch_vexirq)
-//increment iip
-       mov r17 = b0
-       mov r18 = r31
-{.mii
-       add r25=VMM_VPD_BASE_OFFSET,r21
-       nop 0x0
-       mov r24 = ip
-       ;;
-}
-{.mmb
-       add r24 = 0x20, r24
-       ld8 r25 = [r25]
-       br.sptk.many kvm_vps_sync_write
-}
-       mov b0 =r17
-       mov r16=cr.ipsr
-       mov r31 = r18
-       mov r19 = 37
-       ;;
-       extr.u r17=r16,IA64_PSR_RI_BIT,2
-       tbit.nz p6,p7=r16,IA64_PSR_RI_BIT+1
-       ;;
-       (p6) mov r18=cr.iip
-       (p6) mov r17=r0
-       (p7) add r17=1,r17
-       ;;
-       (p6) add r18=0x10,r18
-       dep r16=r17,r16,IA64_PSR_RI_BIT,2
-       ;;
-       (p6) mov cr.iip=r18
-       mov cr.ipsr=r16
-       mov r30 =1
-       br.many kvm_dispatch_vexirq
-END(kvm_asm_dispatch_vexirq)
-
-// thash
-// TODO: add support when pta.vf = 1
-GLOBAL_ENTRY(kvm_asm_thash)
-#ifndef ACCE_THASH
-       br.many kvm_virtualization_fault_back
-#endif
-       extr.u r17=r25,20,7             // get r3 from opcode in r25
-       extr.u r18=r25,6,7              // get r1 from opcode in r25
-       addl r20=@gprel(asm_mov_from_reg),gp
-       ;;
-       adds r30=kvm_asm_thash_back1-asm_mov_from_reg,r20
-       shladd r17=r17,4,r20    // get addr of MOVE_FROM_REG(r17)
-       adds r16=VMM_VPD_BASE_OFFSET,r21        // get vcpu.arch.priveregs
-       ;;
-       mov r24=b0
-       ;;
-       ld8 r16=[r16]           // get VPD addr
-       mov b0=r17
-       br.many b0                      // r19 return value
-       ;;
-kvm_asm_thash_back1:
-       shr.u r23=r19,61                // get RR number
-       adds r28=VMM_VCPU_VRR0_OFFSET,r21       // get vcpu->arch.vrr[0]'s addr
-       adds r16=VMM_VPD_VPTA_OFFSET,r16        // get vpta
-       ;;
-       shladd r27=r23,3,r28    // get vcpu->arch.vrr[r23]'s addr
-       ld8 r17=[r16]           // get PTA
-       mov r26=1
-       ;;
-       extr.u r29=r17,2,6      // get pta.size
-       ld8 r28=[r27]           // get vcpu->arch.vrr[r23]'s value
-       ;;
-       mov b0=r24
-       //Fallback to C if pta.vf is set
-       tbit.nz p6,p0=r17, 8
-       ;;
-       (p6) mov r24=EVENT_THASH
-       (p6) br.cond.dpnt.many kvm_virtualization_fault_back
-       extr.u r28=r28,2,6      // get rr.ps
-       shl r22=r26,r29         // 1UL << pta.size
-       ;;
-       shr.u r23=r19,r28       // vaddr >> rr.ps
-       adds r26=3,r29          // pta.size + 3
-       shl r27=r17,3           // pta << 3
-       ;;
-       shl r23=r23,3           // (vaddr >> rr.ps) << 3
-       shr.u r27=r27,r26       // (pta << 3) >> (pta.size+3)
-       movl r16=7<<61
-       ;;
-       adds r22=-1,r22         // (1UL << pta.size) - 1
-       shl r27=r27,r29         // ((pta<<3)>>(pta.size+3))<<pta.size
-       and r19=r19,r16         // vaddr & VRN_MASK
-       ;;
-       and r22=r22,r23         // vhpt_offset
-       or r19=r19,r27 // (vadr&VRN_MASK)|(((pta<<3)>>(pta.size + 3))<<pta.size)
-       adds r26=asm_mov_to_reg-asm_mov_from_reg,r20
-       ;;
-       or r19=r19,r22          // calc pval
-       shladd r17=r18,4,r26
-       adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20
-       ;;
-       mov b0=r17
-       br.many b0
-END(kvm_asm_thash)
-
-#define MOV_TO_REG0    \
-{;                     \
-       nop.b 0x0;              \
-       nop.b 0x0;              \
-       nop.b 0x0;              \
-       ;;                      \
-};
-
-
-#define MOV_TO_REG(n)  \
-{;                     \
-       mov r##n##=r19; \
-       mov b0=r30;     \
-       br.sptk.many b0;        \
-       ;;                      \
-};
-
-
-#define MOV_FROM_REG(n)        \
-{;                             \
-       mov r19=r##n##;         \
-       mov b0=r30;             \
-       br.sptk.many b0;                \
-       ;;                              \
-};
-
-
-#define MOV_TO_BANK0_REG(n)                    \
-ENTRY_MIN_ALIGN(asm_mov_to_bank0_reg##n##);    \
-{;                                             \
-       mov r26=r2;                             \
-       mov r2=r19;                             \
-       bsw.1;                                  \
-       ;;                                              \
-};                                             \
-{;                                             \
-       mov r##n##=r2;                          \
-       nop.b 0x0;                                      \
-       bsw.0;                                  \
-       ;;                                              \
-};                                             \
-{;                                             \
-       mov r2=r26;                             \
-       mov b0=r30;                             \
-       br.sptk.many b0;                                \
-       ;;                                              \
-};                                             \
-END(asm_mov_to_bank0_reg##n##)
-
-
-#define MOV_FROM_BANK0_REG(n)                  \
-ENTRY_MIN_ALIGN(asm_mov_from_bank0_reg##n##);  \
-{;                                             \
-       mov r26=r2;                             \
-       nop.b 0x0;                                      \
-       bsw.1;                                  \
-       ;;                                              \
-};                                             \
-{;                                             \
-       mov r2=r##n##;                          \
-       nop.b 0x0;                                      \
-       bsw.0;                                  \
-       ;;                                              \
-};                                             \
-{;                                             \
-       mov r19=r2;                             \
-       mov r2=r26;                             \
-       mov b0=r30;                             \
-};                                             \
-{;                                             \
-       nop.b 0x0;                                      \
-       nop.b 0x0;                                      \
-       br.sptk.many b0;                                \
-       ;;                                              \
-};                                             \
-END(asm_mov_from_bank0_reg##n##)
-
-
-#define JMP_TO_MOV_TO_BANK0_REG(n)             \
-{;                                             \
-       nop.b 0x0;                                      \
-       nop.b 0x0;                                      \
-       br.sptk.many asm_mov_to_bank0_reg##n##; \
-       ;;                                              \
-}
-
-
-#define JMP_TO_MOV_FROM_BANK0_REG(n)           \
-{;                                             \
-       nop.b 0x0;                                      \
-       nop.b 0x0;                                      \
-       br.sptk.many asm_mov_from_bank0_reg##n##;       \
-       ;;                                              \
-}
-
-
-MOV_FROM_BANK0_REG(16)
-MOV_FROM_BANK0_REG(17)
-MOV_FROM_BANK0_REG(18)
-MOV_FROM_BANK0_REG(19)
-MOV_FROM_BANK0_REG(20)
-MOV_FROM_BANK0_REG(21)
-MOV_FROM_BANK0_REG(22)
-MOV_FROM_BANK0_REG(23)
-MOV_FROM_BANK0_REG(24)
-MOV_FROM_BANK0_REG(25)
-MOV_FROM_BANK0_REG(26)
-MOV_FROM_BANK0_REG(27)
-MOV_FROM_BANK0_REG(28)
-MOV_FROM_BANK0_REG(29)
-MOV_FROM_BANK0_REG(30)
-MOV_FROM_BANK0_REG(31)
-
-
-// mov from reg table
-ENTRY(asm_mov_from_reg)
-       MOV_FROM_REG(0)
-       MOV_FROM_REG(1)
-       MOV_FROM_REG(2)
-       MOV_FROM_REG(3)
-       MOV_FROM_REG(4)
-       MOV_FROM_REG(5)
-       MOV_FROM_REG(6)
-       MOV_FROM_REG(7)
-       MOV_FROM_REG(8)
-       MOV_FROM_REG(9)
-       MOV_FROM_REG(10)
-       MOV_FROM_REG(11)
-       MOV_FROM_REG(12)
-       MOV_FROM_REG(13)
-       MOV_FROM_REG(14)
-       MOV_FROM_REG(15)
-       JMP_TO_MOV_FROM_BANK0_REG(16)
-       JMP_TO_MOV_FROM_BANK0_REG(17)
-       JMP_TO_MOV_FROM_BANK0_REG(18)
-       JMP_TO_MOV_FROM_BANK0_REG(19)
-       JMP_TO_MOV_FROM_BANK0_REG(20)
-       JMP_TO_MOV_FROM_BANK0_REG(21)
-       JMP_TO_MOV_FROM_BANK0_REG(22)
-       JMP_TO_MOV_FROM_BANK0_REG(23)
-       JMP_TO_MOV_FROM_BANK0_REG(24)
-       JMP_TO_MOV_FROM_BANK0_REG(25)
-       JMP_TO_MOV_FROM_BANK0_REG(26)
-       JMP_TO_MOV_FROM_BANK0_REG(27)
-       JMP_TO_MOV_FROM_BANK0_REG(28)
-       JMP_TO_MOV_FROM_BANK0_REG(29)
-       JMP_TO_MOV_FROM_BANK0_REG(30)
-       JMP_TO_MOV_FROM_BANK0_REG(31)
-       MOV_FROM_REG(32)
-       MOV_FROM_REG(33)
-       MOV_FROM_REG(34)
-       MOV_FROM_REG(35)
-       MOV_FROM_REG(36)
-       MOV_FROM_REG(37)
-       MOV_FROM_REG(38)
-       MOV_FROM_REG(39)
-       MOV_FROM_REG(40)
-       MOV_FROM_REG(41)
-       MOV_FROM_REG(42)
-       MOV_FROM_REG(43)
-       MOV_FROM_REG(44)
-       MOV_FROM_REG(45)
-       MOV_FROM_REG(46)
-       MOV_FROM_REG(47)
-       MOV_FROM_REG(48)
-       MOV_FROM_REG(49)
-       MOV_FROM_REG(50)
-       MOV_FROM_REG(51)
-       MOV_FROM_REG(52)
-       MOV_FROM_REG(53)
-       MOV_FROM_REG(54)
-       MOV_FROM_REG(55)
-       MOV_FROM_REG(56)
-       MOV_FROM_REG(57)
-       MOV_FROM_REG(58)
-       MOV_FROM_REG(59)
-       MOV_FROM_REG(60)
-       MOV_FROM_REG(61)
-       MOV_FROM_REG(62)
-       MOV_FROM_REG(63)
-       MOV_FROM_REG(64)
-       MOV_FROM_REG(65)
-       MOV_FROM_REG(66)
-       MOV_FROM_REG(67)
-       MOV_FROM_REG(68)
-       MOV_FROM_REG(69)
-       MOV_FROM_REG(70)
-       MOV_FROM_REG(71)
-       MOV_FROM_REG(72)
-       MOV_FROM_REG(73)
-       MOV_FROM_REG(74)
-       MOV_FROM_REG(75)
-       MOV_FROM_REG(76)
-       MOV_FROM_REG(77)
-       MOV_FROM_REG(78)
-       MOV_FROM_REG(79)
-       MOV_FROM_REG(80)
-       MOV_FROM_REG(81)
-       MOV_FROM_REG(82)
-       MOV_FROM_REG(83)
-       MOV_FROM_REG(84)
-       MOV_FROM_REG(85)
-       MOV_FROM_REG(86)
-       MOV_FROM_REG(87)
-       MOV_FROM_REG(88)
-       MOV_FROM_REG(89)
-       MOV_FROM_REG(90)
-       MOV_FROM_REG(91)
-       MOV_FROM_REG(92)
-       MOV_FROM_REG(93)
-       MOV_FROM_REG(94)
-       MOV_FROM_REG(95)
-       MOV_FROM_REG(96)
-       MOV_FROM_REG(97)
-       MOV_FROM_REG(98)
-       MOV_FROM_REG(99)
-       MOV_FROM_REG(100)
-       MOV_FROM_REG(101)
-       MOV_FROM_REG(102)
-       MOV_FROM_REG(103)
-       MOV_FROM_REG(104)
-       MOV_FROM_REG(105)
-       MOV_FROM_REG(106)
-       MOV_FROM_REG(107)
-       MOV_FROM_REG(108)
-       MOV_FROM_REG(109)
-       MOV_FROM_REG(110)
-       MOV_FROM_REG(111)
-       MOV_FROM_REG(112)
-       MOV_FROM_REG(113)
-       MOV_FROM_REG(114)
-       MOV_FROM_REG(115)
-       MOV_FROM_REG(116)
-       MOV_FROM_REG(117)
-       MOV_FROM_REG(118)
-       MOV_FROM_REG(119)
-       MOV_FROM_REG(120)
-       MOV_FROM_REG(121)
-       MOV_FROM_REG(122)
-       MOV_FROM_REG(123)
-       MOV_FROM_REG(124)
-       MOV_FROM_REG(125)
-       MOV_FROM_REG(126)
-       MOV_FROM_REG(127)
-END(asm_mov_from_reg)
-
-
-/* must be in bank 0
- * parameter:
- * r31: pr
- * r24: b0
- */
-ENTRY(kvm_resume_to_guest_with_sync)
-       adds r19=VMM_VPD_BASE_OFFSET,r21
-       mov r16 = r31
-       mov r17 = r24
-       ;;
-{.mii
-       ld8 r25 =[r19]
-       nop 0x0
-       mov r24 = ip
-       ;;
-}
-{.mmb
-       add r24 =0x20, r24
-       nop 0x0
-       br.sptk.many kvm_vps_sync_write
-}
-
-       mov r31 = r16
-       mov r24 =r17
-       ;;
-       br.sptk.many kvm_resume_to_guest
-END(kvm_resume_to_guest_with_sync)
-
-ENTRY(kvm_resume_to_guest)
-       adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
-       ;;
-       ld8 r1 =[r16]
-       adds r20 = VMM_VCPU_VSA_BASE_OFFSET,r21
-       ;;
-       mov r16=cr.ipsr
-       ;;
-       ld8 r20 = [r20]
-       adds r19=VMM_VPD_BASE_OFFSET,r21
-       ;;
-       ld8 r25=[r19]
-       extr.u r17=r16,IA64_PSR_RI_BIT,2
-       tbit.nz p6,p7=r16,IA64_PSR_RI_BIT+1
-       ;;
-       (p6) mov r18=cr.iip
-       (p6) mov r17=r0
-       ;;
-       (p6) add r18=0x10,r18
-       (p7) add r17=1,r17
-       ;;
-       (p6) mov cr.iip=r18
-       dep r16=r17,r16,IA64_PSR_RI_BIT,2
-       ;;
-       mov cr.ipsr=r16
-       adds r19= VPD_VPSR_START_OFFSET,r25
-       add r28=PAL_VPS_RESUME_NORMAL,r20
-       add r29=PAL_VPS_RESUME_HANDLER,r20
-       ;;
-       ld8 r19=[r19]
-       mov b0=r29
-       mov r27=cr.isr
-       ;;
-       tbit.z p6,p7 = r19,IA64_PSR_IC_BIT              // p7=vpsr.ic
-       shr r27=r27,IA64_ISR_IR_BIT
-       ;;
-       (p6) ld8 r26=[r25]
-       (p7) mov b0=r28
-       ;;
-       (p6) dep r26=r27,r26,63,1
-       mov pr=r31,-2
-       br.sptk.many b0             // call pal service
-       ;;
-END(kvm_resume_to_guest)
-
-
-MOV_TO_BANK0_REG(16)
-MOV_TO_BANK0_REG(17)
-MOV_TO_BANK0_REG(18)
-MOV_TO_BANK0_REG(19)
-MOV_TO_BANK0_REG(20)
-MOV_TO_BANK0_REG(21)
-MOV_TO_BANK0_REG(22)
-MOV_TO_BANK0_REG(23)
-MOV_TO_BANK0_REG(24)
-MOV_TO_BANK0_REG(25)
-MOV_TO_BANK0_REG(26)
-MOV_TO_BANK0_REG(27)
-MOV_TO_BANK0_REG(28)
-MOV_TO_BANK0_REG(29)
-MOV_TO_BANK0_REG(30)
-MOV_TO_BANK0_REG(31)
-
-
-// mov to reg table
-ENTRY(asm_mov_to_reg)
-       MOV_TO_REG0
-       MOV_TO_REG(1)
-       MOV_TO_REG(2)
-       MOV_TO_REG(3)
-       MOV_TO_REG(4)
-       MOV_TO_REG(5)
-       MOV_TO_REG(6)
-       MOV_TO_REG(7)
-       MOV_TO_REG(8)
-       MOV_TO_REG(9)
-       MOV_TO_REG(10)
-       MOV_TO_REG(11)
-       MOV_TO_REG(12)
-       MOV_TO_REG(13)
-       MOV_TO_REG(14)
-       MOV_TO_REG(15)
-       JMP_TO_MOV_TO_BANK0_REG(16)
-       JMP_TO_MOV_TO_BANK0_REG(17)
-       JMP_TO_MOV_TO_BANK0_REG(18)
-       JMP_TO_MOV_TO_BANK0_REG(19)
-       JMP_TO_MOV_TO_BANK0_REG(20)
-       JMP_TO_MOV_TO_BANK0_REG(21)
-       JMP_TO_MOV_TO_BANK0_REG(22)
-       JMP_TO_MOV_TO_BANK0_REG(23)
-       JMP_TO_MOV_TO_BANK0_REG(24)
-       JMP_TO_MOV_TO_BANK0_REG(25)
-       JMP_TO_MOV_TO_BANK0_REG(26)
-       JMP_TO_MOV_TO_BANK0_REG(27)
-       JMP_TO_MOV_TO_BANK0_REG(28)
-       JMP_TO_MOV_TO_BANK0_REG(29)
-       JMP_TO_MOV_TO_BANK0_REG(30)
-       JMP_TO_MOV_TO_BANK0_REG(31)
-       MOV_TO_REG(32)
-       MOV_TO_REG(33)
-       MOV_TO_REG(34)
-       MOV_TO_REG(35)
-       MOV_TO_REG(36)
-       MOV_TO_REG(37)
-       MOV_TO_REG(38)
-       MOV_TO_REG(39)
-       MOV_TO_REG(40)
-       MOV_TO_REG(41)
-       MOV_TO_REG(42)
-       MOV_TO_REG(43)
-       MOV_TO_REG(44)
-       MOV_TO_REG(45)
-       MOV_TO_REG(46)
-       MOV_TO_REG(47)
-       MOV_TO_REG(48)
-       MOV_TO_REG(49)
-       MOV_TO_REG(50)
-       MOV_TO_REG(51)
-       MOV_TO_REG(52)
-       MOV_TO_REG(53)
-       MOV_TO_REG(54)
-       MOV_TO_REG(55)
-       MOV_TO_REG(56)
-       MOV_TO_REG(57)
-       MOV_TO_REG(58)
-       MOV_TO_REG(59)
-       MOV_TO_REG(60)
-       MOV_TO_REG(61)
-       MOV_TO_REG(62)
-       MOV_TO_REG(63)
-       MOV_TO_REG(64)
-       MOV_TO_REG(65)
-       MOV_TO_REG(66)
-       MOV_TO_REG(67)
-       MOV_TO_REG(68)
-       MOV_TO_REG(69)
-       MOV_TO_REG(70)
-       MOV_TO_REG(71)
-       MOV_TO_REG(72)
-       MOV_TO_REG(73)
-       MOV_TO_REG(74)
-       MOV_TO_REG(75)
-       MOV_TO_REG(76)
-       MOV_TO_REG(77)
-       MOV_TO_REG(78)
-       MOV_TO_REG(79)
-       MOV_TO_REG(80)
-       MOV_TO_REG(81)
-       MOV_TO_REG(82)
-       MOV_TO_REG(83)
-       MOV_TO_REG(84)
-       MOV_TO_REG(85)
-       MOV_TO_REG(86)
-       MOV_TO_REG(87)
-       MOV_TO_REG(88)
-       MOV_TO_REG(89)
-       MOV_TO_REG(90)
-       MOV_TO_REG(91)
-       MOV_TO_REG(92)
-       MOV_TO_REG(93)
-       MOV_TO_REG(94)
-       MOV_TO_REG(95)
-       MOV_TO_REG(96)
-       MOV_TO_REG(97)
-       MOV_TO_REG(98)
-       MOV_TO_REG(99)
-       MOV_TO_REG(100)
-       MOV_TO_REG(101)
-       MOV_TO_REG(102)
-       MOV_TO_REG(103)
-       MOV_TO_REG(104)
-       MOV_TO_REG(105)
-       MOV_TO_REG(106)
-       MOV_TO_REG(107)
-       MOV_TO_REG(108)
-       MOV_TO_REG(109)
-       MOV_TO_REG(110)
-       MOV_TO_REG(111)
-       MOV_TO_REG(112)
-       MOV_TO_REG(113)
-       MOV_TO_REG(114)
-       MOV_TO_REG(115)
-       MOV_TO_REG(116)
-       MOV_TO_REG(117)
-       MOV_TO_REG(118)
-       MOV_TO_REG(119)
-       MOV_TO_REG(120)
-       MOV_TO_REG(121)
-       MOV_TO_REG(122)
-       MOV_TO_REG(123)
-       MOV_TO_REG(124)
-       MOV_TO_REG(125)
-       MOV_TO_REG(126)
-       MOV_TO_REG(127)
-END(asm_mov_to_reg)
diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c
deleted file mode 100644 (file)
index b039874..0000000
+++ /dev/null
@@ -1,1024 +0,0 @@
-/*
- * process.c: handle interruption inject for guests.
- * Copyright (c) 2005, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- *     Shaofan Li (Susue Li) <susie.li@intel.com>
- *     Xiaoyan Feng (Fleming Feng)  <fleming.feng@intel.com>
- *     Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
- *     Xiantao Zhang (xiantao.zhang@intel.com)
- */
-#include "vcpu.h"
-
-#include <asm/pal.h>
-#include <asm/sal.h>
-#include <asm/fpswa.h>
-#include <asm/kregs.h>
-#include <asm/tlb.h>
-
-fpswa_interface_t *vmm_fpswa_interface;
-
-#define IA64_VHPT_TRANS_VECTOR                 0x0000
-#define IA64_INST_TLB_VECTOR                   0x0400
-#define IA64_DATA_TLB_VECTOR                   0x0800
-#define IA64_ALT_INST_TLB_VECTOR               0x0c00
-#define IA64_ALT_DATA_TLB_VECTOR               0x1000
-#define IA64_DATA_NESTED_TLB_VECTOR            0x1400
-#define IA64_INST_KEY_MISS_VECTOR              0x1800
-#define IA64_DATA_KEY_MISS_VECTOR              0x1c00
-#define IA64_DIRTY_BIT_VECTOR                  0x2000
-#define IA64_INST_ACCESS_BIT_VECTOR            0x2400
-#define IA64_DATA_ACCESS_BIT_VECTOR            0x2800
-#define IA64_BREAK_VECTOR                      0x2c00
-#define IA64_EXTINT_VECTOR                     0x3000
-#define IA64_PAGE_NOT_PRESENT_VECTOR           0x5000
-#define IA64_KEY_PERMISSION_VECTOR             0x5100
-#define IA64_INST_ACCESS_RIGHTS_VECTOR         0x5200
-#define IA64_DATA_ACCESS_RIGHTS_VECTOR         0x5300
-#define IA64_GENEX_VECTOR                      0x5400
-#define IA64_DISABLED_FPREG_VECTOR             0x5500
-#define IA64_NAT_CONSUMPTION_VECTOR            0x5600
-#define IA64_SPECULATION_VECTOR                0x5700 /* UNUSED */
-#define IA64_DEBUG_VECTOR                      0x5900
-#define IA64_UNALIGNED_REF_VECTOR              0x5a00
-#define IA64_UNSUPPORTED_DATA_REF_VECTOR       0x5b00
-#define IA64_FP_FAULT_VECTOR                   0x5c00
-#define IA64_FP_TRAP_VECTOR                    0x5d00
-#define IA64_LOWERPRIV_TRANSFER_TRAP_VECTOR    0x5e00
-#define IA64_TAKEN_BRANCH_TRAP_VECTOR          0x5f00
-#define IA64_SINGLE_STEP_TRAP_VECTOR           0x6000
-
-/* SDM vol2 5.5 - IVA based interruption handling */
-#define INITIAL_PSR_VALUE_AT_INTERRUPTION (IA64_PSR_UP | IA64_PSR_MFL |\
-                       IA64_PSR_MFH | IA64_PSR_PK | IA64_PSR_DT |      \
-                       IA64_PSR_RT | IA64_PSR_MC|IA64_PSR_IT)
-
-#define DOMN_PAL_REQUEST    0x110000
-#define DOMN_SAL_REQUEST    0x110001
-
-static u64 vec2off[68] = {0x0, 0x400, 0x800, 0xc00, 0x1000, 0x1400, 0x1800,
-       0x1c00, 0x2000, 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00,
-       0x4000, 0x4400, 0x4800, 0x4c00, 0x5000, 0x5100, 0x5200, 0x5300, 0x5400,
-       0x5500, 0x5600, 0x5700, 0x5800, 0x5900, 0x5a00, 0x5b00, 0x5c00, 0x5d00,
-       0x5e00, 0x5f00, 0x6000, 0x6100, 0x6200, 0x6300, 0x6400, 0x6500, 0x6600,
-       0x6700, 0x6800, 0x6900, 0x6a00, 0x6b00, 0x6c00, 0x6d00, 0x6e00, 0x6f00,
-       0x7000, 0x7100, 0x7200, 0x7300, 0x7400, 0x7500, 0x7600, 0x7700, 0x7800,
-       0x7900, 0x7a00, 0x7b00, 0x7c00, 0x7d00, 0x7e00, 0x7f00
-};
-
-static void collect_interruption(struct kvm_vcpu *vcpu)
-{
-       u64 ipsr;
-       u64 vdcr;
-       u64 vifs;
-       unsigned long vpsr;
-       struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-
-       vpsr = vcpu_get_psr(vcpu);
-       vcpu_bsw0(vcpu);
-       if (vpsr & IA64_PSR_IC) {
-
-               /* Sync mpsr id/da/dd/ss/ed bits to vipsr
-                * since after guest do rfi, we still want these bits on in
-                * mpsr
-                */
-
-               ipsr = regs->cr_ipsr;
-               vpsr = vpsr | (ipsr & (IA64_PSR_ID | IA64_PSR_DA
-                                       | IA64_PSR_DD | IA64_PSR_SS
-                                       | IA64_PSR_ED));
-               vcpu_set_ipsr(vcpu, vpsr);
-
-               /* Currently, for trap, we do not advance IIP to next
-                * instruction. That's because we assume caller already
-                * set up IIP correctly
-                */
-
-               vcpu_set_iip(vcpu , regs->cr_iip);
-
-               /* set vifs.v to zero */
-               vifs = VCPU(vcpu, ifs);
-               vifs &= ~IA64_IFS_V;
-               vcpu_set_ifs(vcpu, vifs);
-
-               vcpu_set_iipa(vcpu, VMX(vcpu, cr_iipa));
-       }
-
-       vdcr = VCPU(vcpu, dcr);
-
-       /* Set guest psr
-        * up/mfl/mfh/pk/dt/rt/mc/it keeps unchanged
-        * be: set to the value of dcr.be
-        * pp: set to the value of dcr.pp
-        */
-       vpsr &= INITIAL_PSR_VALUE_AT_INTERRUPTION;
-       vpsr |= (vdcr & IA64_DCR_BE);
-
-       /* VDCR pp bit position is different from VPSR pp bit */
-       if (vdcr & IA64_DCR_PP) {
-               vpsr |= IA64_PSR_PP;
-       } else {
-               vpsr &= ~IA64_PSR_PP;
-       }
-
-       vcpu_set_psr(vcpu, vpsr);
-
-}
-
-void inject_guest_interruption(struct kvm_vcpu *vcpu, u64 vec)
-{
-       u64 viva;
-       struct kvm_pt_regs *regs;
-       union ia64_isr pt_isr;
-
-       regs = vcpu_regs(vcpu);
-
-       /* clear cr.isr.ir (incomplete register frame)*/
-       pt_isr.val = VMX(vcpu, cr_isr);
-       pt_isr.ir = 0;
-       VMX(vcpu, cr_isr) = pt_isr.val;
-
-       collect_interruption(vcpu);
-
-       viva = vcpu_get_iva(vcpu);
-       regs->cr_iip = viva + vec;
-}
-
-static u64 vcpu_get_itir_on_fault(struct kvm_vcpu *vcpu, u64 ifa)
-{
-       union ia64_rr rr, rr1;
-
-       rr.val = vcpu_get_rr(vcpu, ifa);
-       rr1.val = 0;
-       rr1.ps = rr.ps;
-       rr1.rid = rr.rid;
-       return (rr1.val);
-}
-
-/*
- * Set vIFA & vITIR & vIHA, when vPSR.ic =1
- * Parameter:
- *  set_ifa: if true, set vIFA
- *  set_itir: if true, set vITIR
- *  set_iha: if true, set vIHA
- */
-void set_ifa_itir_iha(struct kvm_vcpu *vcpu, u64 vadr,
-               int set_ifa, int set_itir, int set_iha)
-{
-       long vpsr;
-       u64 value;
-
-       vpsr = VCPU(vcpu, vpsr);
-       /* Vol2, Table 8-1 */
-       if (vpsr & IA64_PSR_IC) {
-               if (set_ifa)
-                       vcpu_set_ifa(vcpu, vadr);
-               if (set_itir) {
-                       value = vcpu_get_itir_on_fault(vcpu, vadr);
-                       vcpu_set_itir(vcpu, value);
-               }
-
-               if (set_iha) {
-                       value = vcpu_thash(vcpu, vadr);
-                       vcpu_set_iha(vcpu, value);
-               }
-       }
-}
-
-/*
- * Data TLB Fault
- *  @ Data TLB vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void dtlb_fault(struct kvm_vcpu *vcpu, u64 vadr)
-{
-       /* If vPSR.ic, IFA, ITIR, IHA */
-       set_ifa_itir_iha(vcpu, vadr, 1, 1, 1);
-       inject_guest_interruption(vcpu, IA64_DATA_TLB_VECTOR);
-}
-
-/*
- * Instruction TLB Fault
- *  @ Instruction TLB vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void itlb_fault(struct kvm_vcpu *vcpu, u64 vadr)
-{
-       /* If vPSR.ic, IFA, ITIR, IHA */
-       set_ifa_itir_iha(vcpu, vadr, 1, 1, 1);
-       inject_guest_interruption(vcpu, IA64_INST_TLB_VECTOR);
-}
-
-/*
- * Data Nested TLB Fault
- *  @ Data Nested TLB Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void nested_dtlb(struct kvm_vcpu *vcpu)
-{
-       inject_guest_interruption(vcpu, IA64_DATA_NESTED_TLB_VECTOR);
-}
-
-/*
- * Alternate Data TLB Fault
- *  @ Alternate Data TLB vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void alt_dtlb(struct kvm_vcpu *vcpu, u64 vadr)
-{
-       set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
-       inject_guest_interruption(vcpu, IA64_ALT_DATA_TLB_VECTOR);
-}
-
-/*
- * Data TLB Fault
- *  @ Data TLB vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void alt_itlb(struct kvm_vcpu *vcpu, u64 vadr)
-{
-       set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
-       inject_guest_interruption(vcpu, IA64_ALT_INST_TLB_VECTOR);
-}
-
-/* Deal with:
- *  VHPT Translation Vector
- */
-static void _vhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
-{
-       /* If vPSR.ic, IFA, ITIR, IHA*/
-       set_ifa_itir_iha(vcpu, vadr, 1, 1, 1);
-       inject_guest_interruption(vcpu, IA64_VHPT_TRANS_VECTOR);
-}
-
-/*
- * VHPT Instruction Fault
- *  @ VHPT Translation vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void ivhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
-{
-       _vhpt_fault(vcpu, vadr);
-}
-
-/*
- * VHPT Data Fault
- *  @ VHPT Translation vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void dvhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
-{
-       _vhpt_fault(vcpu, vadr);
-}
-
-/*
- * Deal with:
- *  General Exception vector
- */
-void _general_exception(struct kvm_vcpu *vcpu)
-{
-       inject_guest_interruption(vcpu, IA64_GENEX_VECTOR);
-}
-
-/*
- * Illegal Operation Fault
- *  @ General Exception Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void illegal_op(struct kvm_vcpu *vcpu)
-{
-       _general_exception(vcpu);
-}
-
-/*
- * Illegal Dependency Fault
- *  @ General Exception Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void illegal_dep(struct kvm_vcpu *vcpu)
-{
-       _general_exception(vcpu);
-}
-
-/*
- * Reserved Register/Field Fault
- *  @ General Exception Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void rsv_reg_field(struct kvm_vcpu *vcpu)
-{
-       _general_exception(vcpu);
-}
-/*
- * Privileged Operation Fault
- *  @ General Exception Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-
-void privilege_op(struct kvm_vcpu *vcpu)
-{
-       _general_exception(vcpu);
-}
-
-/*
- * Unimplement Data Address Fault
- *  @ General Exception Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void unimpl_daddr(struct kvm_vcpu *vcpu)
-{
-       _general_exception(vcpu);
-}
-
-/*
- * Privileged Register Fault
- *  @ General Exception Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void privilege_reg(struct kvm_vcpu *vcpu)
-{
-       _general_exception(vcpu);
-}
-
-/* Deal with
- *  Nat consumption vector
- * Parameter:
- *  vaddr: Optional, if t == REGISTER
- */
-static void _nat_consumption_fault(struct kvm_vcpu *vcpu, u64 vadr,
-                                               enum tlb_miss_type t)
-{
-       /* If vPSR.ic && t == DATA/INST, IFA */
-       if (t == DATA || t == INSTRUCTION) {
-               /* IFA */
-               set_ifa_itir_iha(vcpu, vadr, 1, 0, 0);
-       }
-
-       inject_guest_interruption(vcpu, IA64_NAT_CONSUMPTION_VECTOR);
-}
-
-/*
- * Instruction Nat Page Consumption Fault
- *  @ Nat Consumption Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void inat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr)
-{
-       _nat_consumption_fault(vcpu, vadr, INSTRUCTION);
-}
-
-/*
- * Register Nat Consumption Fault
- *  @ Nat Consumption Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void rnat_consumption(struct kvm_vcpu *vcpu)
-{
-       _nat_consumption_fault(vcpu, 0, REGISTER);
-}
-
-/*
- * Data Nat Page Consumption Fault
- *  @ Nat Consumption Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void dnat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr)
-{
-       _nat_consumption_fault(vcpu, vadr, DATA);
-}
-
-/* Deal with
- *  Page not present vector
- */
-static void __page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
-{
-       /* If vPSR.ic, IFA, ITIR */
-       set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
-       inject_guest_interruption(vcpu, IA64_PAGE_NOT_PRESENT_VECTOR);
-}
-
-void data_page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
-{
-       __page_not_present(vcpu, vadr);
-}
-
-void inst_page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
-{
-       __page_not_present(vcpu, vadr);
-}
-
-/* Deal with
- *  Data access rights vector
- */
-void data_access_rights(struct kvm_vcpu *vcpu, u64 vadr)
-{
-       /* If vPSR.ic, IFA, ITIR */
-       set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
-       inject_guest_interruption(vcpu, IA64_DATA_ACCESS_RIGHTS_VECTOR);
-}
-
-fpswa_ret_t vmm_fp_emulate(int fp_fault, void *bundle, unsigned long *ipsr,
-               unsigned long *fpsr, unsigned long *isr, unsigned long *pr,
-               unsigned long *ifs, struct kvm_pt_regs *regs)
-{
-       fp_state_t fp_state;
-       fpswa_ret_t ret;
-       struct kvm_vcpu *vcpu = current_vcpu;
-
-       uint64_t old_rr7 = ia64_get_rr(7UL<<61);
-
-       if (!vmm_fpswa_interface)
-               return (fpswa_ret_t) {-1, 0, 0, 0};
-
-       memset(&fp_state, 0, sizeof(fp_state_t));
-
-       /*
-        * compute fp_state.  only FP registers f6 - f11 are used by the
-        * vmm, so set those bits in the mask and set the low volatile
-        * pointer to point to these registers.
-        */
-       fp_state.bitmask_low64 = 0xfc0;  /* bit6..bit11 */
-
-       fp_state.fp_state_low_volatile = (fp_state_low_volatile_t *) &regs->f6;
-
-   /*
-        * unsigned long (*EFI_FPSWA) (
-        *      unsigned long    trap_type,
-        *      void             *Bundle,
-        *      unsigned long    *pipsr,
-        *      unsigned long    *pfsr,
-        *      unsigned long    *pisr,
-        *      unsigned long    *ppreds,
-        *      unsigned long    *pifs,
-        *      void             *fp_state);
-        */
-       /*Call host fpswa interface directly to virtualize
-        *guest fpswa request!
-        */
-       ia64_set_rr(7UL << 61, vcpu->arch.host.rr[7]);
-       ia64_srlz_d();
-
-       ret = (*vmm_fpswa_interface->fpswa) (fp_fault, bundle,
-                       ipsr, fpsr, isr, pr, ifs, &fp_state);
-       ia64_set_rr(7UL << 61, old_rr7);
-       ia64_srlz_d();
-       return ret;
-}
-
-/*
- * Handle floating-point assist faults and traps for domain.
- */
-unsigned long vmm_handle_fpu_swa(int fp_fault, struct kvm_pt_regs *regs,
-                                       unsigned long isr)
-{
-       struct kvm_vcpu *v = current_vcpu;
-       IA64_BUNDLE bundle;
-       unsigned long fault_ip;
-       fpswa_ret_t ret;
-
-       fault_ip = regs->cr_iip;
-       /*
-        * When the FP trap occurs, the trapping instruction is completed.
-        * If ipsr.ri == 0, there is the trapping instruction in previous
-        * bundle.
-        */
-       if (!fp_fault && (ia64_psr(regs)->ri == 0))
-               fault_ip -= 16;
-
-       if (fetch_code(v, fault_ip, &bundle))
-               return -EAGAIN;
-
-       if (!bundle.i64[0] && !bundle.i64[1])
-               return -EACCES;
-
-       ret = vmm_fp_emulate(fp_fault, &bundle, &regs->cr_ipsr, &regs->ar_fpsr,
-                       &isr, &regs->pr, &regs->cr_ifs, regs);
-       return ret.status;
-}
-
-void reflect_interruption(u64 ifa, u64 isr, u64 iim,
-               u64 vec, struct kvm_pt_regs *regs)
-{
-       u64 vector;
-       int status ;
-       struct kvm_vcpu *vcpu = current_vcpu;
-       u64 vpsr = VCPU(vcpu, vpsr);
-
-       vector = vec2off[vec];
-
-       if (!(vpsr & IA64_PSR_IC) && (vector != IA64_DATA_NESTED_TLB_VECTOR)) {
-               panic_vm(vcpu, "Interruption with vector :0x%lx occurs "
-                                               "with psr.ic = 0\n", vector);
-               return;
-       }
-
-       switch (vec) {
-       case 32:        /*IA64_FP_FAULT_VECTOR*/
-               status = vmm_handle_fpu_swa(1, regs, isr);
-               if (!status) {
-                       vcpu_increment_iip(vcpu);
-                       return;
-               } else if (-EAGAIN == status)
-                       return;
-               break;
-       case 33:        /*IA64_FP_TRAP_VECTOR*/
-               status = vmm_handle_fpu_swa(0, regs, isr);
-               if (!status)
-                       return ;
-               break;
-       }
-
-       VCPU(vcpu, isr) = isr;
-       VCPU(vcpu, iipa) = regs->cr_iip;
-       if (vector == IA64_BREAK_VECTOR || vector == IA64_SPECULATION_VECTOR)
-               VCPU(vcpu, iim) = iim;
-       else
-               set_ifa_itir_iha(vcpu, ifa, 1, 1, 1);
-
-       inject_guest_interruption(vcpu, vector);
-}
-
-static unsigned long kvm_trans_pal_call_args(struct kvm_vcpu *vcpu,
-                                               unsigned long arg)
-{
-       struct thash_data *data;
-       unsigned long gpa, poff;
-
-       if (!is_physical_mode(vcpu)) {
-               /* Depends on caller to provide the DTR or DTC mapping.*/
-               data = vtlb_lookup(vcpu, arg, D_TLB);
-               if (data)
-                       gpa = data->page_flags & _PAGE_PPN_MASK;
-               else {
-                       data = vhpt_lookup(arg);
-                       if (!data)
-                               return 0;
-                       gpa = data->gpaddr & _PAGE_PPN_MASK;
-               }
-
-               poff = arg & (PSIZE(data->ps) - 1);
-               arg = PAGEALIGN(gpa, data->ps) | poff;
-       }
-       arg = kvm_gpa_to_mpa(arg << 1 >> 1);
-
-       return (unsigned long)__va(arg);
-}
-
-static void set_pal_call_data(struct kvm_vcpu *vcpu)
-{
-       struct exit_ctl_data *p = &vcpu->arch.exit_data;
-       unsigned long gr28 = vcpu_get_gr(vcpu, 28);
-       unsigned long gr29 = vcpu_get_gr(vcpu, 29);
-       unsigned long gr30 = vcpu_get_gr(vcpu, 30);
-
-       /*FIXME:For static and stacked convention, firmware
-        * has put the parameters in gr28-gr31 before
-        * break to vmm  !!*/
-
-       switch (gr28) {
-       case PAL_PERF_MON_INFO:
-       case PAL_HALT_INFO:
-               p->u.pal_data.gr29 =  kvm_trans_pal_call_args(vcpu, gr29);
-               p->u.pal_data.gr30 = vcpu_get_gr(vcpu, 30);
-               break;
-       case PAL_BRAND_INFO:
-               p->u.pal_data.gr29 = gr29;
-               p->u.pal_data.gr30 = kvm_trans_pal_call_args(vcpu, gr30);
-               break;
-       default:
-               p->u.pal_data.gr29 = gr29;
-               p->u.pal_data.gr30 = vcpu_get_gr(vcpu, 30);
-       }
-       p->u.pal_data.gr28 = gr28;
-       p->u.pal_data.gr31 = vcpu_get_gr(vcpu, 31);
-
-       p->exit_reason = EXIT_REASON_PAL_CALL;
-}
-
-static void get_pal_call_result(struct kvm_vcpu *vcpu)
-{
-       struct exit_ctl_data *p = &vcpu->arch.exit_data;
-
-       if (p->exit_reason == EXIT_REASON_PAL_CALL) {
-               vcpu_set_gr(vcpu, 8, p->u.pal_data.ret.status, 0);
-               vcpu_set_gr(vcpu, 9, p->u.pal_data.ret.v0, 0);
-               vcpu_set_gr(vcpu, 10, p->u.pal_data.ret.v1, 0);
-               vcpu_set_gr(vcpu, 11, p->u.pal_data.ret.v2, 0);
-       } else
-               panic_vm(vcpu, "Mis-set for exit reason!\n");
-}
-
-static void set_sal_call_data(struct kvm_vcpu *vcpu)
-{
-       struct exit_ctl_data *p = &vcpu->arch.exit_data;
-
-       p->u.sal_data.in0 = vcpu_get_gr(vcpu, 32);
-       p->u.sal_data.in1 = vcpu_get_gr(vcpu, 33);
-       p->u.sal_data.in2 = vcpu_get_gr(vcpu, 34);
-       p->u.sal_data.in3 = vcpu_get_gr(vcpu, 35);
-       p->u.sal_data.in4 = vcpu_get_gr(vcpu, 36);
-       p->u.sal_data.in5 = vcpu_get_gr(vcpu, 37);
-       p->u.sal_data.in6 = vcpu_get_gr(vcpu, 38);
-       p->u.sal_data.in7 = vcpu_get_gr(vcpu, 39);
-       p->exit_reason = EXIT_REASON_SAL_CALL;
-}
-
-static void get_sal_call_result(struct kvm_vcpu *vcpu)
-{
-       struct exit_ctl_data *p = &vcpu->arch.exit_data;
-
-       if (p->exit_reason == EXIT_REASON_SAL_CALL) {
-               vcpu_set_gr(vcpu, 8, p->u.sal_data.ret.r8, 0);
-               vcpu_set_gr(vcpu, 9, p->u.sal_data.ret.r9, 0);
-               vcpu_set_gr(vcpu, 10, p->u.sal_data.ret.r10, 0);
-               vcpu_set_gr(vcpu, 11, p->u.sal_data.ret.r11, 0);
-       } else
-               panic_vm(vcpu, "Mis-set for exit reason!\n");
-}
-
-void  kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs,
-               unsigned long isr, unsigned long iim)
-{
-       struct kvm_vcpu *v = current_vcpu;
-       long psr;
-
-       if (ia64_psr(regs)->cpl == 0) {
-               /* Allow hypercalls only when cpl = 0.  */
-               if (iim == DOMN_PAL_REQUEST) {
-                       local_irq_save(psr);
-                       set_pal_call_data(v);
-                       vmm_transition(v);
-                       get_pal_call_result(v);
-                       vcpu_increment_iip(v);
-                       local_irq_restore(psr);
-                       return;
-               } else if (iim == DOMN_SAL_REQUEST) {
-                       local_irq_save(psr);
-                       set_sal_call_data(v);
-                       vmm_transition(v);
-                       get_sal_call_result(v);
-                       vcpu_increment_iip(v);
-                       local_irq_restore(psr);
-                       return;
-               }
-       }
-       reflect_interruption(ifa, isr, iim, 11, regs);
-}
-
-void check_pending_irq(struct kvm_vcpu *vcpu)
-{
-       int  mask, h_pending, h_inservice;
-       u64 isr;
-       unsigned long  vpsr;
-       struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-
-       h_pending = highest_pending_irq(vcpu);
-       if (h_pending == NULL_VECTOR) {
-               update_vhpi(vcpu, NULL_VECTOR);
-               return;
-       }
-       h_inservice = highest_inservice_irq(vcpu);
-
-       vpsr = VCPU(vcpu, vpsr);
-       mask = irq_masked(vcpu, h_pending, h_inservice);
-       if ((vpsr & IA64_PSR_I) && IRQ_NO_MASKED == mask) {
-               isr = vpsr & IA64_PSR_RI;
-               update_vhpi(vcpu, h_pending);
-               reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */
-       } else if (mask == IRQ_MASKED_BY_INSVC) {
-               if (VCPU(vcpu, vhpi))
-                       update_vhpi(vcpu, NULL_VECTOR);
-       } else {
-               /* masked by vpsr.i or vtpr.*/
-               update_vhpi(vcpu, h_pending);
-       }
-}
-
-static void generate_exirq(struct kvm_vcpu *vcpu)
-{
-       unsigned  vpsr;
-       uint64_t isr;
-
-       struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-
-       vpsr = VCPU(vcpu, vpsr);
-       isr = vpsr & IA64_PSR_RI;
-       if (!(vpsr & IA64_PSR_IC))
-               panic_vm(vcpu, "Trying to inject one IRQ with psr.ic=0\n");
-       reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */
-}
-
-void vhpi_detection(struct kvm_vcpu *vcpu)
-{
-       uint64_t    threshold, vhpi;
-       union ia64_tpr       vtpr;
-       struct ia64_psr vpsr;
-
-       vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
-       vtpr.val = VCPU(vcpu, tpr);
-
-       threshold = ((!vpsr.i) << 5) | (vtpr.mmi << 4) | vtpr.mic;
-       vhpi = VCPU(vcpu, vhpi);
-       if (vhpi > threshold) {
-               /* interrupt actived*/
-               generate_exirq(vcpu);
-       }
-}
-
-void leave_hypervisor_tail(void)
-{
-       struct kvm_vcpu *v = current_vcpu;
-
-       if (VMX(v, timer_check)) {
-               VMX(v, timer_check) = 0;
-               if (VMX(v, itc_check)) {
-                       if (vcpu_get_itc(v) > VCPU(v, itm)) {
-                               if (!(VCPU(v, itv) & (1 << 16))) {
-                                       vcpu_pend_interrupt(v, VCPU(v, itv)
-                                                       & 0xff);
-                                       VMX(v, itc_check) = 0;
-                               } else {
-                                       v->arch.timer_pending = 1;
-                               }
-                               VMX(v, last_itc) = VCPU(v, itm) + 1;
-                       }
-               }
-       }
-
-       rmb();
-       if (v->arch.irq_new_pending) {
-               v->arch.irq_new_pending = 0;
-               VMX(v, irq_check) = 0;
-               check_pending_irq(v);
-               return;
-       }
-       if (VMX(v, irq_check)) {
-               VMX(v, irq_check) = 0;
-               vhpi_detection(v);
-       }
-}
-
-static inline void handle_lds(struct kvm_pt_regs *regs)
-{
-       regs->cr_ipsr |= IA64_PSR_ED;
-}
-
-void physical_tlb_miss(struct kvm_vcpu *vcpu, unsigned long vadr, int type)
-{
-       unsigned long pte;
-       union ia64_rr rr;
-
-       rr.val = ia64_get_rr(vadr);
-       pte =  vadr & _PAGE_PPN_MASK;
-       pte = pte | PHY_PAGE_WB;
-       thash_vhpt_insert(vcpu, pte, (u64)(rr.ps << 2), vadr, type);
-       return;
-}
-
-void kvm_page_fault(u64 vadr , u64 vec, struct kvm_pt_regs *regs)
-{
-       unsigned long vpsr;
-       int type;
-
-       u64 vhpt_adr, gppa, pteval, rr, itir;
-       union ia64_isr misr;
-       union ia64_pta vpta;
-       struct thash_data *data;
-       struct kvm_vcpu *v = current_vcpu;
-
-       vpsr = VCPU(v, vpsr);
-       misr.val = VMX(v, cr_isr);
-
-       type = vec;
-
-       if (is_physical_mode(v) && (!(vadr << 1 >> 62))) {
-               if (vec == 2) {
-                       if (__gpfn_is_io((vadr << 1) >> (PAGE_SHIFT + 1))) {
-                               emulate_io_inst(v, ((vadr << 1) >> 1), 4);
-                               return;
-                       }
-               }
-               physical_tlb_miss(v, vadr, type);
-               return;
-       }
-       data = vtlb_lookup(v, vadr, type);
-       if (data != 0) {
-               if (type == D_TLB) {
-                       gppa = (vadr & ((1UL << data->ps) - 1))
-                               + (data->ppn >> (data->ps - 12) << data->ps);
-                       if (__gpfn_is_io(gppa >> PAGE_SHIFT)) {
-                               if (data->pl >= ((regs->cr_ipsr >>
-                                               IA64_PSR_CPL0_BIT) & 3))
-                                       emulate_io_inst(v, gppa, data->ma);
-                               else {
-                                       vcpu_set_isr(v, misr.val);
-                                       data_access_rights(v, vadr);
-                               }
-                               return ;
-                       }
-               }
-               thash_vhpt_insert(v, data->page_flags, data->itir, vadr, type);
-
-       } else if (type == D_TLB) {
-               if (misr.sp) {
-                       handle_lds(regs);
-                       return;
-               }
-
-               rr = vcpu_get_rr(v, vadr);
-               itir = rr & (RR_RID_MASK | RR_PS_MASK);
-
-               if (!vhpt_enabled(v, vadr, misr.rs ? RSE_REF : DATA_REF)) {
-                       if (vpsr & IA64_PSR_IC) {
-                               vcpu_set_isr(v, misr.val);
-                               alt_dtlb(v, vadr);
-                       } else {
-                               nested_dtlb(v);
-                       }
-                       return ;
-               }
-
-               vpta.val = vcpu_get_pta(v);
-               /* avoid recursively walking (short format) VHPT */
-
-               vhpt_adr = vcpu_thash(v, vadr);
-               if (!guest_vhpt_lookup(vhpt_adr, &pteval)) {
-                       /* VHPT successfully read.  */
-                       if (!(pteval & _PAGE_P)) {
-                               if (vpsr & IA64_PSR_IC) {
-                                       vcpu_set_isr(v, misr.val);
-                                       dtlb_fault(v, vadr);
-                               } else {
-                                       nested_dtlb(v);
-                               }
-                       } else if ((pteval & _PAGE_MA_MASK) != _PAGE_MA_ST) {
-                               thash_purge_and_insert(v, pteval, itir,
-                                                               vadr, D_TLB);
-                       } else if (vpsr & IA64_PSR_IC) {
-                               vcpu_set_isr(v, misr.val);
-                               dtlb_fault(v, vadr);
-                       } else {
-                               nested_dtlb(v);
-                       }
-               } else {
-                       /* Can't read VHPT.  */
-                       if (vpsr & IA64_PSR_IC) {
-                               vcpu_set_isr(v, misr.val);
-                               dvhpt_fault(v, vadr);
-                       } else {
-                               nested_dtlb(v);
-                       }
-               }
-       } else if (type == I_TLB) {
-               if (!(vpsr & IA64_PSR_IC))
-                       misr.ni = 1;
-               if (!vhpt_enabled(v, vadr, INST_REF)) {
-                       vcpu_set_isr(v, misr.val);
-                       alt_itlb(v, vadr);
-                       return;
-               }
-
-               vpta.val = vcpu_get_pta(v);
-
-               vhpt_adr = vcpu_thash(v, vadr);
-               if (!guest_vhpt_lookup(vhpt_adr, &pteval)) {
-                       /* VHPT successfully read.  */
-                       if (pteval & _PAGE_P) {
-                               if ((pteval & _PAGE_MA_MASK) == _PAGE_MA_ST) {
-                                       vcpu_set_isr(v, misr.val);
-                                       itlb_fault(v, vadr);
-                                       return ;
-                               }
-                               rr = vcpu_get_rr(v, vadr);
-                               itir = rr & (RR_RID_MASK | RR_PS_MASK);
-                               thash_purge_and_insert(v, pteval, itir,
-                                                       vadr, I_TLB);
-                       } else {
-                               vcpu_set_isr(v, misr.val);
-                               inst_page_not_present(v, vadr);
-                       }
-               } else {
-                       vcpu_set_isr(v, misr.val);
-                       ivhpt_fault(v, vadr);
-               }
-       }
-}
-
-void kvm_vexirq(struct kvm_vcpu *vcpu)
-{
-       u64 vpsr, isr;
-       struct kvm_pt_regs *regs;
-
-       regs = vcpu_regs(vcpu);
-       vpsr = VCPU(vcpu, vpsr);
-       isr = vpsr & IA64_PSR_RI;
-       reflect_interruption(0, isr, 0, 12, regs); /*EXT IRQ*/
-}
-
-void kvm_ia64_handle_irq(struct kvm_vcpu *v)
-{
-       struct exit_ctl_data *p = &v->arch.exit_data;
-       long psr;
-
-       local_irq_save(psr);
-       p->exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT;
-       vmm_transition(v);
-       local_irq_restore(psr);
-
-       VMX(v, timer_check) = 1;
-
-}
-
-static void ptc_ga_remote_func(struct kvm_vcpu *v, int pos)
-{
-       u64 oldrid, moldrid, oldpsbits, vaddr;
-       struct kvm_ptc_g *p = &v->arch.ptc_g_data[pos];
-       vaddr = p->vaddr;
-
-       oldrid = VMX(v, vrr[0]);
-       VMX(v, vrr[0]) = p->rr;
-       oldpsbits = VMX(v, psbits[0]);
-       VMX(v, psbits[0]) = VMX(v, psbits[REGION_NUMBER(vaddr)]);
-       moldrid = ia64_get_rr(0x0);
-       ia64_set_rr(0x0, vrrtomrr(p->rr));
-       ia64_srlz_d();
-
-       vaddr = PAGEALIGN(vaddr, p->ps);
-       thash_purge_entries_remote(v, vaddr, p->ps);
-
-       VMX(v, vrr[0]) = oldrid;
-       VMX(v, psbits[0]) = oldpsbits;
-       ia64_set_rr(0x0, moldrid);
-       ia64_dv_serialize_data();
-}
-
-static void vcpu_do_resume(struct kvm_vcpu *vcpu)
-{
-       /*Re-init VHPT and VTLB once from resume*/
-       vcpu->arch.vhpt.num = VHPT_NUM_ENTRIES;
-       thash_init(&vcpu->arch.vhpt, VHPT_SHIFT);
-       vcpu->arch.vtlb.num = VTLB_NUM_ENTRIES;
-       thash_init(&vcpu->arch.vtlb, VTLB_SHIFT);
-
-       ia64_set_pta(vcpu->arch.vhpt.pta.val);
-}
-
-static void vmm_sanity_check(struct kvm_vcpu *vcpu)
-{
-       struct exit_ctl_data *p = &vcpu->arch.exit_data;
-
-       if (!vmm_sanity && p->exit_reason != EXIT_REASON_DEBUG) {
-               panic_vm(vcpu, "Failed to do vmm sanity check,"
-                       "it maybe caused by crashed vmm!!\n\n");
-       }
-}
-
-static void kvm_do_resume_op(struct kvm_vcpu *vcpu)
-{
-       vmm_sanity_check(vcpu); /*Guarantee vcpu running on healthy vmm!*/
-
-       if (test_and_clear_bit(KVM_REQ_RESUME, &vcpu->requests)) {
-               vcpu_do_resume(vcpu);
-               return;
-       }
-
-       if (unlikely(test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))) {
-               thash_purge_all(vcpu);
-               return;
-       }
-
-       if (test_and_clear_bit(KVM_REQ_PTC_G, &vcpu->requests)) {
-               while (vcpu->arch.ptc_g_count > 0)
-                       ptc_ga_remote_func(vcpu, --vcpu->arch.ptc_g_count);
-       }
-}
-
-void vmm_transition(struct kvm_vcpu *vcpu)
-{
-       ia64_call_vsa(PAL_VPS_SAVE, (unsigned long)vcpu->arch.vpd,
-                       1, 0, 0, 0, 0, 0);
-       vmm_trampoline(&vcpu->arch.guest, &vcpu->arch.host);
-       ia64_call_vsa(PAL_VPS_RESTORE, (unsigned long)vcpu->arch.vpd,
-                                               1, 0, 0, 0, 0, 0);
-       kvm_do_resume_op(vcpu);
-}
-
-void vmm_panic_handler(u64 vec)
-{
-       struct kvm_vcpu *vcpu = current_vcpu;
-       vmm_sanity = 0;
-       panic_vm(vcpu, "Unexpected interruption occurs in VMM, vector:0x%lx\n",
-                       vec2off[vec]);
-}
diff --git a/arch/ia64/kvm/trampoline.S b/arch/ia64/kvm/trampoline.S
deleted file mode 100644 (file)
index 30897d4..0000000
+++ /dev/null
@@ -1,1038 +0,0 @@
-/* Save all processor states
- *
- * Copyright (c) 2007 Fleming Feng <fleming.feng@intel.com>
- * Copyright (c) 2007 Anthony Xu   <anthony.xu@intel.com>
- */
-
-#include <asm/asmmacro.h>
-#include "asm-offsets.h"
-
-
-#define CTX(name)    VMM_CTX_##name##_OFFSET
-
-       /*
-        *      r32:            context_t base address
-        */
-#define        SAVE_BRANCH_REGS                        \
-       add     r2 = CTX(B0),r32;               \
-       add     r3 = CTX(B1),r32;               \
-       mov     r16 = b0;                       \
-       mov     r17 = b1;                       \
-       ;;                                      \
-       st8     [r2]=r16,16;                    \
-       st8     [r3]=r17,16;                    \
-       ;;                                      \
-       mov     r16 = b2;                       \
-       mov     r17 = b3;                       \
-       ;;                                      \
-       st8     [r2]=r16,16;                    \
-       st8     [r3]=r17,16;                    \
-       ;;                                      \
-       mov     r16 = b4;                       \
-       mov     r17 = b5;                       \
-       ;;                                      \
-       st8     [r2]=r16;                       \
-       st8     [r3]=r17;                       \
-       ;;
-
-       /*
-        *      r33:            context_t base address
-        */
-#define        RESTORE_BRANCH_REGS                     \
-       add     r2 = CTX(B0),r33;               \
-       add     r3 = CTX(B1),r33;               \
-       ;;                                      \
-       ld8     r16=[r2],16;                    \
-       ld8     r17=[r3],16;                    \
-       ;;                                      \
-       mov     b0 = r16;                       \
-       mov     b1 = r17;                       \
-       ;;                                      \
-       ld8     r16=[r2],16;                    \
-       ld8     r17=[r3],16;                    \
-       ;;                                      \
-       mov     b2 = r16;                       \
-       mov     b3 = r17;                       \
-       ;;                                      \
-       ld8     r16=[r2];                       \
-       ld8     r17=[r3];                       \
-       ;;                                      \
-       mov     b4=r16;                         \
-       mov     b5=r17;                         \
-       ;;
-
-
-       /*
-        *      r32: context_t base address
-        *      bsw == 1
-        *      Save all bank1 general registers, r4 ~ r7
-        */
-#define        SAVE_GENERAL_REGS                       \
-       add     r2=CTX(R4),r32;                 \
-       add     r3=CTX(R5),r32;                 \
-       ;;                                      \
-.mem.offset 0,0;                               \
-       st8.spill       [r2]=r4,16;             \
-.mem.offset 8,0;                               \
-       st8.spill       [r3]=r5,16;             \
-       ;;                                      \
-.mem.offset 0,0;                               \
-       st8.spill       [r2]=r6,48;             \
-.mem.offset 8,0;                               \
-       st8.spill       [r3]=r7,48;             \
-       ;;                                      \
-.mem.offset 0,0;                               \
-    st8.spill    [r2]=r12;                     \
-.mem.offset 8,0;                               \
-    st8.spill    [r3]=r13;                     \
-    ;;
-
-       /*
-        *      r33: context_t base address
-        *      bsw == 1
-        */
-#define        RESTORE_GENERAL_REGS                    \
-       add     r2=CTX(R4),r33;                 \
-       add     r3=CTX(R5),r33;                 \
-       ;;                                      \
-       ld8.fill        r4=[r2],16;             \
-       ld8.fill        r5=[r3],16;             \
-       ;;                                      \
-       ld8.fill        r6=[r2],48;             \
-       ld8.fill        r7=[r3],48;             \
-       ;;                                      \
-       ld8.fill    r12=[r2];                   \
-       ld8.fill    r13 =[r3];                  \
-       ;;
-
-
-
-
-       /*
-        *      r32:            context_t base address
-        */
-#define        SAVE_KERNEL_REGS                        \
-       add     r2 = CTX(KR0),r32;              \
-       add     r3 = CTX(KR1),r32;              \
-       mov     r16 = ar.k0;                    \
-       mov     r17 = ar.k1;                    \
-       ;;                                      \
-       st8     [r2] = r16,16;                  \
-       st8     [r3] = r17,16;                  \
-       ;;                                      \
-       mov     r16 = ar.k2;                    \
-       mov     r17 = ar.k3;                    \
-       ;;                                      \
-       st8     [r2] = r16,16;                  \
-       st8     [r3] = r17,16;                  \
-       ;;                                      \
-       mov     r16 = ar.k4;                    \
-       mov     r17 = ar.k5;                    \
-       ;;                                      \
-       st8     [r2] = r16,16;                  \
-       st8     [r3] = r17,16;                  \
-       ;;                                      \
-       mov     r16 = ar.k6;                    \
-       mov     r17 = ar.k7;                    \
-       ;;                                      \
-       st8     [r2] = r16;                     \
-       st8     [r3] = r17;                     \
-       ;;
-
-
-
-       /*
-        *      r33:            context_t base address
-        */
-#define        RESTORE_KERNEL_REGS                     \
-       add     r2 = CTX(KR0),r33;              \
-       add     r3 = CTX(KR1),r33;              \
-       ;;                                      \
-       ld8     r16=[r2],16;                    \
-       ld8     r17=[r3],16;                    \
-       ;;                                      \
-       mov     ar.k0=r16;                      \
-       mov     ar.k1=r17;                      \
-       ;;                                      \
-       ld8     r16=[r2],16;                    \
-       ld8     r17=[r3],16;                    \
-       ;;                                      \
-       mov     ar.k2=r16;                      \
-       mov     ar.k3=r17;                      \
-       ;;                                      \
-       ld8     r16=[r2],16;                    \
-       ld8     r17=[r3],16;                    \
-       ;;                                      \
-       mov     ar.k4=r16;                      \
-       mov     ar.k5=r17;                      \
-       ;;                                      \
-       ld8     r16=[r2],16;                    \
-       ld8     r17=[r3],16;                    \
-       ;;                                      \
-       mov     ar.k6=r16;                      \
-       mov     ar.k7=r17;                      \
-       ;;
-
-
-
-       /*
-        *      r32:            context_t base address
-        */
-#define        SAVE_APP_REGS                           \
-       add  r2 = CTX(BSPSTORE),r32;            \
-       mov  r16 = ar.bspstore;                 \
-       ;;                                      \
-       st8  [r2] = r16,CTX(RNAT)-CTX(BSPSTORE);\
-       mov  r16 = ar.rnat;                     \
-       ;;                                      \
-       st8  [r2] = r16,CTX(FCR)-CTX(RNAT);     \
-       mov  r16 = ar.fcr;                      \
-       ;;                                      \
-       st8  [r2] = r16,CTX(EFLAG)-CTX(FCR);    \
-       mov  r16 = ar.eflag;                    \
-       ;;                                      \
-       st8  [r2] = r16,CTX(CFLG)-CTX(EFLAG);   \
-       mov  r16 = ar.cflg;                     \
-       ;;                                      \
-       st8  [r2] = r16,CTX(FSR)-CTX(CFLG);     \
-       mov  r16 = ar.fsr;                      \
-       ;;                                      \
-       st8  [r2] = r16,CTX(FIR)-CTX(FSR);      \
-       mov  r16 = ar.fir;                      \
-       ;;                                      \
-       st8  [r2] = r16,CTX(FDR)-CTX(FIR);      \
-       mov  r16 = ar.fdr;                      \
-       ;;                                      \
-       st8  [r2] = r16,CTX(UNAT)-CTX(FDR);     \
-       mov  r16 = ar.unat;                     \
-       ;;                                      \
-       st8  [r2] = r16,CTX(FPSR)-CTX(UNAT);    \
-       mov  r16 = ar.fpsr;                     \
-       ;;                                      \
-       st8  [r2] = r16,CTX(PFS)-CTX(FPSR);     \
-       mov  r16 = ar.pfs;                      \
-       ;;                                      \
-       st8  [r2] = r16,CTX(LC)-CTX(PFS);       \
-       mov  r16 = ar.lc;                       \
-       ;;                                      \
-       st8  [r2] = r16;                        \
-       ;;
-
-       /*
-        *      r33:            context_t base address
-        */
-#define        RESTORE_APP_REGS                        \
-       add  r2=CTX(BSPSTORE),r33;              \
-       ;;                                      \
-       ld8  r16=[r2],CTX(RNAT)-CTX(BSPSTORE);  \
-       ;;                                      \
-       mov  ar.bspstore=r16;                   \
-       ld8  r16=[r2],CTX(FCR)-CTX(RNAT);       \
-       ;;                                      \
-       mov  ar.rnat=r16;                       \
-       ld8  r16=[r2],CTX(EFLAG)-CTX(FCR);      \
-       ;;                                      \
-       mov  ar.fcr=r16;                        \
-       ld8  r16=[r2],CTX(CFLG)-CTX(EFLAG);     \
-       ;;                                      \
-       mov  ar.eflag=r16;                      \
-       ld8  r16=[r2],CTX(FSR)-CTX(CFLG);       \
-       ;;                                      \
-       mov  ar.cflg=r16;                       \
-       ld8  r16=[r2],CTX(FIR)-CTX(FSR);        \
-       ;;                                      \
-       mov  ar.fsr=r16;                        \
-       ld8  r16=[r2],CTX(FDR)-CTX(FIR);        \
-       ;;                                      \
-       mov  ar.fir=r16;                        \
-       ld8  r16=[r2],CTX(UNAT)-CTX(FDR);       \
-       ;;                                      \
-       mov  ar.fdr=r16;                        \
-       ld8  r16=[r2],CTX(FPSR)-CTX(UNAT);      \
-       ;;                                      \
-       mov  ar.unat=r16;                       \
-       ld8  r16=[r2],CTX(PFS)-CTX(FPSR);       \
-       ;;                                      \
-       mov  ar.fpsr=r16;                       \
-       ld8  r16=[r2],CTX(LC)-CTX(PFS);         \
-       ;;                                      \
-       mov  ar.pfs=r16;                        \
-       ld8  r16=[r2];                          \
-       ;;                                      \
-       mov  ar.lc=r16;                         \
-       ;;
-
-       /*
-        *      r32:            context_t base address
-        */
-#define        SAVE_CTL_REGS                           \
-       add     r2 = CTX(DCR),r32;              \
-       mov     r16 = cr.dcr;                   \
-       ;;                                      \
-       st8     [r2] = r16,CTX(IVA)-CTX(DCR);   \
-       ;;                                      \
-       mov     r16 = cr.iva;                   \
-       ;;                                      \
-       st8     [r2] = r16,CTX(PTA)-CTX(IVA);   \
-       ;;                                      \
-       mov r16 = cr.pta;                       \
-       ;;                                      \
-       st8 [r2] = r16 ;                        \
-       ;;
-
-       /*
-        *      r33:            context_t base address
-        */
-#define        RESTORE_CTL_REGS                                \
-       add     r2 = CTX(DCR),r33;                      \
-       ;;                                              \
-       ld8     r16 = [r2],CTX(IVA)-CTX(DCR);           \
-       ;;                                              \
-       mov     cr.dcr = r16;                           \
-       dv_serialize_data;                              \
-       ;;                                              \
-       ld8     r16 = [r2],CTX(PTA)-CTX(IVA);           \
-       ;;                                              \
-       mov     cr.iva = r16;                           \
-       dv_serialize_data;                              \
-       ;;                                              \
-       ld8 r16 = [r2];                                 \
-       ;;                                              \
-       mov cr.pta = r16;                               \
-       dv_serialize_data;                              \
-       ;;
-
-
-       /*
-        *      r32:            context_t base address
-        */
-#define        SAVE_REGION_REGS                        \
-       add     r2=CTX(RR0),r32;                \
-       mov     r16=rr[r0];                     \
-       dep.z   r18=1,61,3;                     \
-       ;;                                      \
-       st8     [r2]=r16,8;                     \
-       mov     r17=rr[r18];                    \
-       dep.z   r18=2,61,3;                     \
-       ;;                                      \
-       st8     [r2]=r17,8;                     \
-       mov     r16=rr[r18];                    \
-       dep.z   r18=3,61,3;                     \
-       ;;                                      \
-       st8     [r2]=r16,8;                     \
-       mov     r17=rr[r18];                    \
-       dep.z   r18=4,61,3;                     \
-       ;;                                      \
-       st8     [r2]=r17,8;                     \
-       mov     r16=rr[r18];                    \
-       dep.z   r18=5,61,3;                     \
-       ;;                                      \
-       st8     [r2]=r16,8;                     \
-       mov     r17=rr[r18];                    \
-       dep.z   r18=7,61,3;                     \
-       ;;                                      \
-       st8     [r2]=r17,16;                    \
-       mov     r16=rr[r18];                    \
-       ;;                                      \
-       st8     [r2]=r16,8;                     \
-       ;;
-
-       /*
-        *      r33:context_t base address
-        */
-#define        RESTORE_REGION_REGS     \
-       add     r2=CTX(RR0),r33;\
-       mov r18=r0;             \
-       ;;                      \
-       ld8     r20=[r2],8;     \
-       ;;      /* rr0 */       \
-       ld8     r21=[r2],8;     \
-       ;;      /* rr1 */       \
-       ld8     r22=[r2],8;     \
-       ;;      /* rr2 */       \
-       ld8     r23=[r2],8;     \
-       ;;      /* rr3 */       \
-       ld8     r24=[r2],8;     \
-       ;;      /* rr4 */       \
-       ld8     r25=[r2],16;    \
-       ;;      /* rr5 */       \
-       ld8     r27=[r2];       \
-       ;;      /* rr7 */       \
-       mov rr[r18]=r20;        \
-       dep.z   r18=1,61,3;     \
-       ;;  /* rr1 */           \
-       mov rr[r18]=r21;        \
-       dep.z   r18=2,61,3;     \
-       ;;  /* rr2 */           \
-       mov rr[r18]=r22;        \
-       dep.z   r18=3,61,3;     \
-       ;;  /* rr3 */           \
-       mov rr[r18]=r23;        \
-       dep.z   r18=4,61,3;     \
-       ;;  /* rr4 */           \
-       mov rr[r18]=r24;        \
-       dep.z   r18=5,61,3;     \
-       ;;  /* rr5 */           \
-       mov rr[r18]=r25;        \
-       dep.z   r18=7,61,3;     \
-       ;;  /* rr7 */           \
-       mov rr[r18]=r27;        \
-       ;;                      \
-       srlz.i;                 \
-       ;;
-
-
-
-       /*
-        *      r32:    context_t base address
-        *      r36~r39:scratch registers
-        */
-#define        SAVE_DEBUG_REGS                         \
-       add     r2=CTX(IBR0),r32;               \
-       add     r3=CTX(DBR0),r32;               \
-       mov     r16=ibr[r0];                    \
-       mov     r17=dbr[r0];                    \
-       ;;                                      \
-       st8     [r2]=r16,8;                     \
-       st8     [r3]=r17,8;                     \
-       add     r18=1,r0;                       \
-       ;;                                      \
-       mov     r16=ibr[r18];                   \
-       mov     r17=dbr[r18];                   \
-       ;;                                      \
-       st8     [r2]=r16,8;                     \
-       st8     [r3]=r17,8;                     \
-       add     r18=2,r0;                       \
-       ;;                                      \
-       mov     r16=ibr[r18];                   \
-       mov     r17=dbr[r18];                   \
-       ;;                                      \
-       st8     [r2]=r16,8;                     \
-       st8     [r3]=r17,8;                     \
-       add     r18=2,r0;                       \
-       ;;                                      \
-       mov     r16=ibr[r18];                   \
-       mov     r17=dbr[r18];                   \
-       ;;                                      \
-       st8     [r2]=r16,8;                     \
-       st8     [r3]=r17,8;                     \
-       add     r18=3,r0;                       \
-       ;;                                      \
-       mov     r16=ibr[r18];                   \
-       mov     r17=dbr[r18];                   \
-       ;;                                      \
-       st8     [r2]=r16,8;                     \
-       st8     [r3]=r17,8;                     \
-       add     r18=4,r0;                       \
-       ;;                                      \
-       mov     r16=ibr[r18];                   \
-       mov     r17=dbr[r18];                   \
-       ;;                                      \
-       st8     [r2]=r16,8;                     \
-       st8     [r3]=r17,8;                     \
-       add     r18=5,r0;                       \
-       ;;                                      \
-       mov     r16=ibr[r18];                   \
-       mov     r17=dbr[r18];                   \
-       ;;                                      \
-       st8     [r2]=r16,8;                     \
-       st8     [r3]=r17,8;                     \
-       add     r18=6,r0;                       \
-       ;;                                      \
-       mov     r16=ibr[r18];                   \
-       mov     r17=dbr[r18];                   \
-       ;;                                      \
-       st8     [r2]=r16,8;                     \
-       st8     [r3]=r17,8;                     \
-       add     r18=7,r0;                       \
-       ;;                                      \
-       mov     r16=ibr[r18];                   \
-       mov     r17=dbr[r18];                   \
-       ;;                                      \
-       st8     [r2]=r16,8;                     \
-       st8     [r3]=r17,8;                     \
-       ;;
-
-
-/*
- *      r33:    point to context_t structure
- *      ar.lc are corrupted.
- */
-#define RESTORE_DEBUG_REGS                     \
-       add     r2=CTX(IBR0),r33;               \
-       add     r3=CTX(DBR0),r33;               \
-       mov r16=7;                              \
-       mov r17=r0;                             \
-       ;;                                      \
-       mov ar.lc = r16;                        \
-       ;;                                      \
-1:                                             \
-       ld8 r18=[r2],8;                         \
-       ld8 r19=[r3],8;                         \
-       ;;                                      \
-       mov ibr[r17]=r18;                       \
-       mov dbr[r17]=r19;                       \
-       ;;                                      \
-       srlz.i;                                 \
-       ;;                                      \
-       add r17=1,r17;                          \
-       br.cloop.sptk 1b;                       \
-       ;;
-
-
-       /*
-        *      r32:            context_t base address
-        */
-#define        SAVE_FPU_LOW                            \
-       add     r2=CTX(F2),r32;                 \
-       add     r3=CTX(F3),r32;                 \
-       ;;                                      \
-       stf.spill.nta   [r2]=f2,32;             \
-       stf.spill.nta   [r3]=f3,32;             \
-       ;;                                      \
-       stf.spill.nta   [r2]=f4,32;             \
-       stf.spill.nta   [r3]=f5,32;             \
-       ;;                                      \
-       stf.spill.nta   [r2]=f6,32;             \
-       stf.spill.nta   [r3]=f7,32;             \
-       ;;                                      \
-       stf.spill.nta   [r2]=f8,32;             \
-       stf.spill.nta   [r3]=f9,32;             \
-       ;;                                      \
-       stf.spill.nta   [r2]=f10,32;            \
-       stf.spill.nta   [r3]=f11,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f12,32;            \
-       stf.spill.nta   [r3]=f13,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f14,32;            \
-       stf.spill.nta   [r3]=f15,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f16,32;            \
-       stf.spill.nta   [r3]=f17,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f18,32;            \
-       stf.spill.nta   [r3]=f19,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f20,32;            \
-       stf.spill.nta   [r3]=f21,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f22,32;            \
-       stf.spill.nta   [r3]=f23,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f24,32;            \
-       stf.spill.nta   [r3]=f25,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f26,32;            \
-       stf.spill.nta   [r3]=f27,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f28,32;            \
-       stf.spill.nta   [r3]=f29,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f30;               \
-       stf.spill.nta   [r3]=f31;               \
-       ;;
-
-       /*
-        *      r32:            context_t base address
-        */
-#define        SAVE_FPU_HIGH                           \
-       add     r2=CTX(F32),r32;                \
-       add     r3=CTX(F33),r32;                \
-       ;;                                      \
-       stf.spill.nta   [r2]=f32,32;            \
-       stf.spill.nta   [r3]=f33,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f34,32;            \
-       stf.spill.nta   [r3]=f35,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f36,32;            \
-       stf.spill.nta   [r3]=f37,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f38,32;            \
-       stf.spill.nta   [r3]=f39,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f40,32;            \
-       stf.spill.nta   [r3]=f41,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f42,32;            \
-       stf.spill.nta   [r3]=f43,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f44,32;            \
-       stf.spill.nta   [r3]=f45,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f46,32;            \
-       stf.spill.nta   [r3]=f47,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f48,32;            \
-       stf.spill.nta   [r3]=f49,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f50,32;            \
-       stf.spill.nta   [r3]=f51,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f52,32;            \
-       stf.spill.nta   [r3]=f53,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f54,32;            \
-       stf.spill.nta   [r3]=f55,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f56,32;            \
-       stf.spill.nta   [r3]=f57,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f58,32;            \
-       stf.spill.nta   [r3]=f59,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f60,32;            \
-       stf.spill.nta   [r3]=f61,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f62,32;            \
-       stf.spill.nta   [r3]=f63,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f64,32;            \
-       stf.spill.nta   [r3]=f65,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f66,32;            \
-       stf.spill.nta   [r3]=f67,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f68,32;            \
-       stf.spill.nta   [r3]=f69,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f70,32;            \
-       stf.spill.nta   [r3]=f71,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f72,32;            \
-       stf.spill.nta   [r3]=f73,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f74,32;            \
-       stf.spill.nta   [r3]=f75,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f76,32;            \
-       stf.spill.nta   [r3]=f77,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f78,32;            \
-       stf.spill.nta   [r3]=f79,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f80,32;            \
-       stf.spill.nta   [r3]=f81,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f82,32;            \
-       stf.spill.nta   [r3]=f83,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f84,32;            \
-       stf.spill.nta   [r3]=f85,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f86,32;            \
-       stf.spill.nta   [r3]=f87,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f88,32;            \
-       stf.spill.nta   [r3]=f89,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f90,32;            \
-       stf.spill.nta   [r3]=f91,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f92,32;            \
-       stf.spill.nta   [r3]=f93,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f94,32;            \
-       stf.spill.nta   [r3]=f95,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f96,32;            \
-       stf.spill.nta   [r3]=f97,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f98,32;            \
-       stf.spill.nta   [r3]=f99,32;            \
-       ;;                                      \
-       stf.spill.nta   [r2]=f100,32;           \
-       stf.spill.nta   [r3]=f101,32;           \
-       ;;                                      \
-       stf.spill.nta   [r2]=f102,32;           \
-       stf.spill.nta   [r3]=f103,32;           \
-       ;;                                      \
-       stf.spill.nta   [r2]=f104,32;           \
-       stf.spill.nta   [r3]=f105,32;           \
-       ;;                                      \
-       stf.spill.nta   [r2]=f106,32;           \
-       stf.spill.nta   [r3]=f107,32;           \
-       ;;                                      \
-       stf.spill.nta   [r2]=f108,32;           \
-       stf.spill.nta   [r3]=f109,32;           \
-       ;;                                      \
-       stf.spill.nta   [r2]=f110,32;           \
-       stf.spill.nta   [r3]=f111,32;           \
-       ;;                                      \
-       stf.spill.nta   [r2]=f112,32;           \
-       stf.spill.nta   [r3]=f113,32;           \
-       ;;                                      \
-       stf.spill.nta   [r2]=f114,32;           \
-       stf.spill.nta   [r3]=f115,32;           \
-       ;;                                      \
-       stf.spill.nta   [r2]=f116,32;           \
-       stf.spill.nta   [r3]=f117,32;           \
-       ;;                                      \
-       stf.spill.nta   [r2]=f118,32;           \
-       stf.spill.nta   [r3]=f119,32;           \
-       ;;                                      \
-       stf.spill.nta   [r2]=f120,32;           \
-       stf.spill.nta   [r3]=f121,32;           \
-       ;;                                      \
-       stf.spill.nta   [r2]=f122,32;           \
-       stf.spill.nta   [r3]=f123,32;           \
-       ;;                                      \
-       stf.spill.nta   [r2]=f124,32;           \
-       stf.spill.nta   [r3]=f125,32;           \
-       ;;                                      \
-       stf.spill.nta   [r2]=f126;              \
-       stf.spill.nta   [r3]=f127;              \
-       ;;
-
-     /*
-      *      r33:    point to context_t structure
-      */
-#define        RESTORE_FPU_LOW                         \
-    add     r2 = CTX(F2), r33;                 \
-    add     r3 = CTX(F3), r33;                 \
-    ;;                                         \
-    ldf.fill.nta f2 = [r2], 32;                        \
-    ldf.fill.nta f3 = [r3], 32;                        \
-    ;;                                         \
-    ldf.fill.nta f4 = [r2], 32;                        \
-    ldf.fill.nta f5 = [r3], 32;                        \
-    ;;                                         \
-    ldf.fill.nta f6 = [r2], 32;                        \
-    ldf.fill.nta f7 = [r3], 32;                        \
-    ;;                                         \
-    ldf.fill.nta f8 = [r2], 32;                        \
-    ldf.fill.nta f9 = [r3], 32;                        \
-    ;;                                         \
-    ldf.fill.nta f10 = [r2], 32;               \
-    ldf.fill.nta f11 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f12 = [r2], 32;               \
-    ldf.fill.nta f13 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f14 = [r2], 32;               \
-    ldf.fill.nta f15 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f16 = [r2], 32;               \
-    ldf.fill.nta f17 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f18 = [r2], 32;               \
-    ldf.fill.nta f19 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f20 = [r2], 32;               \
-    ldf.fill.nta f21 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f22 = [r2], 32;               \
-    ldf.fill.nta f23 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f24 = [r2], 32;               \
-    ldf.fill.nta f25 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f26 = [r2], 32;               \
-    ldf.fill.nta f27 = [r3], 32;               \
-       ;;                                      \
-    ldf.fill.nta f28 = [r2], 32;               \
-    ldf.fill.nta f29 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f30 = [r2], 32;               \
-    ldf.fill.nta f31 = [r3], 32;               \
-    ;;
-
-
-
-    /*
-     *      r33:    point to context_t structure
-     */
-#define        RESTORE_FPU_HIGH                        \
-    add     r2 = CTX(F32), r33;                        \
-    add     r3 = CTX(F33), r33;                        \
-    ;;                                         \
-    ldf.fill.nta f32 = [r2], 32;               \
-    ldf.fill.nta f33 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f34 = [r2], 32;               \
-    ldf.fill.nta f35 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f36 = [r2], 32;               \
-    ldf.fill.nta f37 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f38 = [r2], 32;               \
-    ldf.fill.nta f39 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f40 = [r2], 32;               \
-    ldf.fill.nta f41 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f42 = [r2], 32;               \
-    ldf.fill.nta f43 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f44 = [r2], 32;               \
-    ldf.fill.nta f45 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f46 = [r2], 32;               \
-    ldf.fill.nta f47 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f48 = [r2], 32;               \
-    ldf.fill.nta f49 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f50 = [r2], 32;               \
-    ldf.fill.nta f51 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f52 = [r2], 32;               \
-    ldf.fill.nta f53 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f54 = [r2], 32;               \
-    ldf.fill.nta f55 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f56 = [r2], 32;               \
-    ldf.fill.nta f57 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f58 = [r2], 32;               \
-    ldf.fill.nta f59 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f60 = [r2], 32;               \
-    ldf.fill.nta f61 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f62 = [r2], 32;               \
-    ldf.fill.nta f63 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f64 = [r2], 32;               \
-    ldf.fill.nta f65 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f66 = [r2], 32;               \
-    ldf.fill.nta f67 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f68 = [r2], 32;               \
-    ldf.fill.nta f69 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f70 = [r2], 32;               \
-    ldf.fill.nta f71 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f72 = [r2], 32;               \
-    ldf.fill.nta f73 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f74 = [r2], 32;               \
-    ldf.fill.nta f75 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f76 = [r2], 32;               \
-    ldf.fill.nta f77 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f78 = [r2], 32;               \
-    ldf.fill.nta f79 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f80 = [r2], 32;               \
-    ldf.fill.nta f81 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f82 = [r2], 32;               \
-    ldf.fill.nta f83 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f84 = [r2], 32;               \
-    ldf.fill.nta f85 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f86 = [r2], 32;               \
-    ldf.fill.nta f87 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f88 = [r2], 32;               \
-    ldf.fill.nta f89 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f90 = [r2], 32;               \
-    ldf.fill.nta f91 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f92 = [r2], 32;               \
-    ldf.fill.nta f93 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f94 = [r2], 32;               \
-    ldf.fill.nta f95 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f96 = [r2], 32;               \
-    ldf.fill.nta f97 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f98 = [r2], 32;               \
-    ldf.fill.nta f99 = [r3], 32;               \
-    ;;                                         \
-    ldf.fill.nta f100 = [r2], 32;              \
-    ldf.fill.nta f101 = [r3], 32;              \
-    ;;                                         \
-    ldf.fill.nta f102 = [r2], 32;              \
-    ldf.fill.nta f103 = [r3], 32;              \
-    ;;                                         \
-    ldf.fill.nta f104 = [r2], 32;              \
-    ldf.fill.nta f105 = [r3], 32;              \
-    ;;                                         \
-    ldf.fill.nta f106 = [r2], 32;              \
-    ldf.fill.nta f107 = [r3], 32;              \
-    ;;                                         \
-    ldf.fill.nta f108 = [r2], 32;              \
-    ldf.fill.nta f109 = [r3], 32;              \
-    ;;                                         \
-    ldf.fill.nta f110 = [r2], 32;              \
-    ldf.fill.nta f111 = [r3], 32;              \
-    ;;                                         \
-    ldf.fill.nta f112 = [r2], 32;              \
-    ldf.fill.nta f113 = [r3], 32;              \
-    ;;                                         \
-    ldf.fill.nta f114 = [r2], 32;              \
-    ldf.fill.nta f115 = [r3], 32;              \
-    ;;                                         \
-    ldf.fill.nta f116 = [r2], 32;              \
-    ldf.fill.nta f117 = [r3], 32;              \
-    ;;                                         \
-    ldf.fill.nta f118 = [r2], 32;              \
-    ldf.fill.nta f119 = [r3], 32;              \
-    ;;                                         \
-    ldf.fill.nta f120 = [r2], 32;              \
-    ldf.fill.nta f121 = [r3], 32;              \
-    ;;                                         \
-    ldf.fill.nta f122 = [r2], 32;              \
-    ldf.fill.nta f123 = [r3], 32;              \
-    ;;                                         \
-    ldf.fill.nta f124 = [r2], 32;              \
-    ldf.fill.nta f125 = [r3], 32;              \
-    ;;                                         \
-    ldf.fill.nta f126 = [r2], 32;              \
-    ldf.fill.nta f127 = [r3], 32;              \
-    ;;
-
-       /*
-        *      r32:            context_t base address
-        */
-#define        SAVE_PTK_REGS                           \
-    add r2=CTX(PKR0), r32;                     \
-    mov r16=7;                                 \
-    ;;                                                 \
-    mov ar.lc=r16;                             \
-    mov r17=r0;                                        \
-    ;;                                         \
-1:                                             \
-    mov r18=pkr[r17];                          \
-    ;;                                         \
-    srlz.i;                                    \
-    ;;                                                 \
-    st8 [r2]=r18, 8;                           \
-    ;;                                         \
-    add r17 =1,r17;                            \
-    ;;                                         \
-    br.cloop.sptk 1b;                          \
-    ;;
-
-/*
- *      r33:    point to context_t structure
- *      ar.lc are corrupted.
- */
-#define RESTORE_PTK_REGS                       \
-    add r2=CTX(PKR0), r33;                     \
-    mov r16=7;                                 \
-    ;;                                                 \
-    mov ar.lc=r16;                             \
-    mov r17=r0;                                        \
-    ;;                                         \
-1:                                             \
-    ld8 r18=[r2], 8;                           \
-    ;;                                         \
-    mov pkr[r17]=r18;                          \
-    ;;                                         \
-    srlz.i;                                    \
-    ;;                                                 \
-    add r17 =1,r17;                            \
-    ;;                                         \
-    br.cloop.sptk 1b;                          \
-    ;;
-
-
-/*
- * void vmm_trampoline( context_t * from,
- *                     context_t * to)
- *
- *     from:   r32
- *     to:     r33
- *  note: interrupt disabled before call this function.
- */
-GLOBAL_ENTRY(vmm_trampoline)
-    mov r16 = psr
-    adds r2 = CTX(PSR), r32
-    ;;
-    st8 [r2] = r16, 8       // psr
-    mov r17 = pr
-    ;;
-    st8 [r2] = r17, 8       // pr
-    mov r18 = ar.unat
-    ;;
-    st8 [r2] = r18
-    mov r17 = ar.rsc
-    ;;
-    adds r2 = CTX(RSC),r32
-    ;;
-    st8 [r2]= r17
-    mov ar.rsc =0
-    flushrs
-    ;;
-    SAVE_GENERAL_REGS
-    ;;
-    SAVE_KERNEL_REGS
-    ;;
-    SAVE_APP_REGS
-    ;;
-    SAVE_BRANCH_REGS
-    ;;
-    SAVE_CTL_REGS
-    ;;
-    SAVE_REGION_REGS
-    ;;
-    //SAVE_DEBUG_REGS
-    ;;
-    rsm  psr.dfl
-    ;;
-    srlz.d
-    ;;
-    SAVE_FPU_LOW
-    ;;
-    rsm  psr.dfh
-    ;;
-    srlz.d
-    ;;
-    SAVE_FPU_HIGH
-    ;;
-    SAVE_PTK_REGS
-    ;;
-    RESTORE_PTK_REGS
-    ;;
-    RESTORE_FPU_HIGH
-    ;;
-    RESTORE_FPU_LOW
-    ;;
-    //RESTORE_DEBUG_REGS
-    ;;
-    RESTORE_REGION_REGS
-    ;;
-    RESTORE_CTL_REGS
-    ;;
-    RESTORE_BRANCH_REGS
-    ;;
-    RESTORE_APP_REGS
-    ;;
-    RESTORE_KERNEL_REGS
-    ;;
-    RESTORE_GENERAL_REGS
-    ;;
-    adds r2=CTX(PSR), r33
-    ;;
-    ld8 r16=[r2], 8       // psr
-    ;;
-    mov psr.l=r16
-    ;;
-    srlz.d
-    ;;
-    ld8 r16=[r2], 8       // pr
-    ;;
-    mov pr =r16,-1
-    ld8 r16=[r2]       // unat
-    ;;
-    mov ar.unat=r16
-    ;;
-    adds r2=CTX(RSC),r33
-    ;;
-    ld8 r16 =[r2]
-    ;;
-    mov ar.rsc = r16
-    ;;
-    br.ret.sptk.few b0
-END(vmm_trampoline)
diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c
deleted file mode 100644 (file)
index 958815c..0000000
+++ /dev/null
@@ -1,2209 +0,0 @@
-/*
- * kvm_vcpu.c: handling all virtual cpu related thing.
- * Copyright (c) 2005, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- *  Shaofan Li (Susue Li) <susie.li@intel.com>
- *  Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
- *  Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
- *  Xiantao Zhang <xiantao.zhang@intel.com>
- */
-
-#include <linux/kvm_host.h>
-#include <linux/types.h>
-
-#include <asm/processor.h>
-#include <asm/ia64regs.h>
-#include <asm/gcc_intrin.h>
-#include <asm/kregs.h>
-#include <asm/pgtable.h>
-#include <asm/tlb.h>
-
-#include "asm-offsets.h"
-#include "vcpu.h"
-
-/*
- * Special notes:
- * - Index by it/dt/rt sequence
- * - Only existing mode transitions are allowed in this table
- * - RSE is placed at lazy mode when emulating guest partial mode
- * - If gva happens to be rr0 and rr4, only allowed case is identity
- *   mapping (gva=gpa), or panic! (How?)
- */
-int mm_switch_table[8][8] = {
-       /*  2004/09/12(Kevin): Allow switch to self */
-       /*
-        *  (it,dt,rt): (0,0,0) -> (1,1,1)
-        *  This kind of transition usually occurs in the very early
-        *  stage of Linux boot up procedure. Another case is in efi
-        *  and pal calls. (see "arch/ia64/kernel/head.S")
-        *
-        *  (it,dt,rt): (0,0,0) -> (0,1,1)
-        *  This kind of transition is found when OSYa exits efi boot
-        *  service. Due to gva = gpa in this case (Same region),
-        *  data access can be satisfied though itlb entry for physical
-        *  emulation is hit.
-        */
-       {SW_SELF, 0,  0,  SW_NOP, 0,  0,  0,  SW_P2V},
-       {0,  0,  0,  0,  0,  0,  0,  0},
-       {0,  0,  0,  0,  0,  0,  0,  0},
-       /*
-        *  (it,dt,rt): (0,1,1) -> (1,1,1)
-        *  This kind of transition is found in OSYa.
-        *
-        *  (it,dt,rt): (0,1,1) -> (0,0,0)
-        *  This kind of transition is found in OSYa
-        */
-       {SW_NOP, 0,  0,  SW_SELF, 0,  0,  0,  SW_P2V},
-       /* (1,0,0)->(1,1,1) */
-       {0,  0,  0,  0,  0,  0,  0,  SW_P2V},
-       /*
-        *  (it,dt,rt): (1,0,1) -> (1,1,1)
-        *  This kind of transition usually occurs when Linux returns
-        *  from the low level TLB miss handlers.
-        *  (see "arch/ia64/kernel/ivt.S")
-        */
-       {0,  0,  0,  0,  0,  SW_SELF, 0,  SW_P2V},
-       {0,  0,  0,  0,  0,  0,  0,  0},
-       /*
-        *  (it,dt,rt): (1,1,1) -> (1,0,1)
-        *  This kind of transition usually occurs in Linux low level
-        *  TLB miss handler. (see "arch/ia64/kernel/ivt.S")
-        *
-        *  (it,dt,rt): (1,1,1) -> (0,0,0)
-        *  This kind of transition usually occurs in pal and efi calls,
-        *  which requires running in physical mode.
-        *  (see "arch/ia64/kernel/head.S")
-        *  (1,1,1)->(1,0,0)
-        */
-
-       {SW_V2P, 0,  0,  0,  SW_V2P, SW_V2P, 0,  SW_SELF},
-};
-
-void physical_mode_init(struct kvm_vcpu  *vcpu)
-{
-       vcpu->arch.mode_flags = GUEST_IN_PHY;
-}
-
-void switch_to_physical_rid(struct kvm_vcpu *vcpu)
-{
-       unsigned long psr;
-
-       /* Save original virtual mode rr[0] and rr[4] */
-       psr = ia64_clear_ic();
-       ia64_set_rr(VRN0<<VRN_SHIFT, vcpu->arch.metaphysical_rr0);
-       ia64_srlz_d();
-       ia64_set_rr(VRN4<<VRN_SHIFT, vcpu->arch.metaphysical_rr4);
-       ia64_srlz_d();
-
-       ia64_set_psr(psr);
-       return;
-}
-
-void switch_to_virtual_rid(struct kvm_vcpu *vcpu)
-{
-       unsigned long psr;
-
-       psr = ia64_clear_ic();
-       ia64_set_rr(VRN0 << VRN_SHIFT, vcpu->arch.metaphysical_saved_rr0);
-       ia64_srlz_d();
-       ia64_set_rr(VRN4 << VRN_SHIFT, vcpu->arch.metaphysical_saved_rr4);
-       ia64_srlz_d();
-       ia64_set_psr(psr);
-       return;
-}
-
-static int mm_switch_action(struct ia64_psr opsr, struct ia64_psr npsr)
-{
-       return mm_switch_table[MODE_IND(opsr)][MODE_IND(npsr)];
-}
-
-void switch_mm_mode(struct kvm_vcpu *vcpu, struct ia64_psr old_psr,
-                                       struct ia64_psr new_psr)
-{
-       int act;
-       act = mm_switch_action(old_psr, new_psr);
-       switch (act) {
-       case SW_V2P:
-               /*printk("V -> P mode transition: (0x%lx -> 0x%lx)\n",
-               old_psr.val, new_psr.val);*/
-               switch_to_physical_rid(vcpu);
-               /*
-                * Set rse to enforced lazy, to prevent active rse
-                *save/restor when guest physical mode.
-                */
-               vcpu->arch.mode_flags |= GUEST_IN_PHY;
-               break;
-       case SW_P2V:
-               switch_to_virtual_rid(vcpu);
-               /*
-                * recover old mode which is saved when entering
-                * guest physical mode
-                */
-               vcpu->arch.mode_flags &= ~GUEST_IN_PHY;
-               break;
-       case SW_SELF:
-               break;
-       case SW_NOP:
-               break;
-       default:
-               /* Sanity check */
-               break;
-       }
-       return;
-}
-
-/*
- * In physical mode, insert tc/tr for region 0 and 4 uses
- * RID[0] and RID[4] which is for physical mode emulation.
- * However what those inserted tc/tr wants is rid for
- * virtual mode. So original virtual rid needs to be restored
- * before insert.
- *
- * Operations which required such switch include:
- *  - insertions (itc.*, itr.*)
- *  - purges (ptc.* and ptr.*)
- *  - tpa
- *  - tak
- *  - thash?, ttag?
- * All above needs actual virtual rid for destination entry.
- */
-
-void check_mm_mode_switch(struct kvm_vcpu *vcpu,  struct ia64_psr old_psr,
-                                       struct ia64_psr new_psr)
-{
-
-       if ((old_psr.dt != new_psr.dt)
-                       || (old_psr.it != new_psr.it)
-                       || (old_psr.rt != new_psr.rt))
-               switch_mm_mode(vcpu, old_psr, new_psr);
-
-       return;
-}
-
-
-/*
- * In physical mode, insert tc/tr for region 0 and 4 uses
- * RID[0] and RID[4] which is for physical mode emulation.
- * However what those inserted tc/tr wants is rid for
- * virtual mode. So original virtual rid needs to be restored
- * before insert.
- *
- * Operations which required such switch include:
- *  - insertions (itc.*, itr.*)
- *  - purges (ptc.* and ptr.*)
- *  - tpa
- *  - tak
- *  - thash?, ttag?
- * All above needs actual virtual rid for destination entry.
- */
-
-void prepare_if_physical_mode(struct kvm_vcpu *vcpu)
-{
-       if (is_physical_mode(vcpu)) {
-               vcpu->arch.mode_flags |= GUEST_PHY_EMUL;
-               switch_to_virtual_rid(vcpu);
-       }
-       return;
-}
-
-/* Recover always follows prepare */
-void recover_if_physical_mode(struct kvm_vcpu *vcpu)
-{
-       if (is_physical_mode(vcpu))
-               switch_to_physical_rid(vcpu);
-       vcpu->arch.mode_flags &= ~GUEST_PHY_EMUL;
-       return;
-}
-
-#define RPT(x) ((u16) &((struct kvm_pt_regs *)0)->x)
-
-static u16 gr_info[32] = {
-       0,      /* r0 is read-only : WE SHOULD NEVER GET THIS */
-       RPT(r1), RPT(r2), RPT(r3),
-       RPT(r4), RPT(r5), RPT(r6), RPT(r7),
-       RPT(r8), RPT(r9), RPT(r10), RPT(r11),
-       RPT(r12), RPT(r13), RPT(r14), RPT(r15),
-       RPT(r16), RPT(r17), RPT(r18), RPT(r19),
-       RPT(r20), RPT(r21), RPT(r22), RPT(r23),
-       RPT(r24), RPT(r25), RPT(r26), RPT(r27),
-       RPT(r28), RPT(r29), RPT(r30), RPT(r31)
-};
-
-#define IA64_FIRST_STACKED_GR   32
-#define IA64_FIRST_ROTATING_FR  32
-
-static inline unsigned long
-rotate_reg(unsigned long sor, unsigned long rrb, unsigned long reg)
-{
-       reg += rrb;
-       if (reg >= sor)
-               reg -= sor;
-       return reg;
-}
-
-/*
- * Return the (rotated) index for floating point register
- * be in the REGNUM (REGNUM must range from 32-127,
- * result is in the range from 0-95.
- */
-static inline unsigned long fph_index(struct kvm_pt_regs *regs,
-                                               long regnum)
-{
-       unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f;
-       return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR));
-}
-
-/*
- * The inverse of the above: given bspstore and the number of
- * registers, calculate ar.bsp.
- */
-static inline unsigned long *kvm_rse_skip_regs(unsigned long *addr,
-                                                       long num_regs)
-{
-       long delta = ia64_rse_slot_num(addr) + num_regs;
-       int i = 0;
-
-       if (num_regs < 0)
-               delta -= 0x3e;
-       if (delta < 0) {
-               while (delta <= -0x3f) {
-                       i--;
-                       delta += 0x3f;
-               }
-       } else {
-               while (delta >= 0x3f) {
-                       i++;
-                       delta -= 0x3f;
-               }
-       }
-
-       return addr + num_regs + i;
-}
-
-static void get_rse_reg(struct kvm_pt_regs *regs, unsigned long r1,
-                                       unsigned long *val, int *nat)
-{
-       unsigned long *bsp, *addr, *rnat_addr, *bspstore;
-       unsigned long *kbs = (void *) current_vcpu + VMM_RBS_OFFSET;
-       unsigned long nat_mask;
-       unsigned long old_rsc, new_rsc;
-       long sof = (regs->cr_ifs) & 0x7f;
-       long sor = (((regs->cr_ifs >> 14) & 0xf) << 3);
-       long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
-       long ridx = r1 - 32;
-
-       if (ridx < sor)
-               ridx = rotate_reg(sor, rrb_gr, ridx);
-
-       old_rsc = ia64_getreg(_IA64_REG_AR_RSC);
-       new_rsc = old_rsc&(~(0x3));
-       ia64_setreg(_IA64_REG_AR_RSC, new_rsc);
-
-       bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
-       bsp = kbs + (regs->loadrs >> 19);
-
-       addr = kvm_rse_skip_regs(bsp, -sof + ridx);
-       nat_mask = 1UL << ia64_rse_slot_num(addr);
-       rnat_addr = ia64_rse_rnat_addr(addr);
-
-       if (addr >= bspstore) {
-               ia64_flushrs();
-               ia64_mf();
-               bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
-       }
-       *val = *addr;
-       if (nat) {
-               if (bspstore < rnat_addr)
-                       *nat = (int)!!(ia64_getreg(_IA64_REG_AR_RNAT)
-                                                       & nat_mask);
-               else
-                       *nat = (int)!!((*rnat_addr) & nat_mask);
-               ia64_setreg(_IA64_REG_AR_RSC, old_rsc);
-       }
-}
-
-void set_rse_reg(struct kvm_pt_regs *regs, unsigned long r1,
-                               unsigned long val, unsigned long nat)
-{
-       unsigned long *bsp, *bspstore, *addr, *rnat_addr;
-       unsigned long *kbs = (void *) current_vcpu + VMM_RBS_OFFSET;
-       unsigned long nat_mask;
-       unsigned long old_rsc, new_rsc, psr;
-       unsigned long rnat;
-       long sof = (regs->cr_ifs) & 0x7f;
-       long sor = (((regs->cr_ifs >> 14) & 0xf) << 3);
-       long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
-       long ridx = r1 - 32;
-
-       if (ridx < sor)
-               ridx = rotate_reg(sor, rrb_gr, ridx);
-
-       old_rsc = ia64_getreg(_IA64_REG_AR_RSC);
-       /* put RSC to lazy mode, and set loadrs 0 */
-       new_rsc = old_rsc & (~0x3fff0003);
-       ia64_setreg(_IA64_REG_AR_RSC, new_rsc);
-       bsp = kbs + (regs->loadrs >> 19); /* 16 + 3 */
-
-       addr = kvm_rse_skip_regs(bsp, -sof + ridx);
-       nat_mask = 1UL << ia64_rse_slot_num(addr);
-       rnat_addr = ia64_rse_rnat_addr(addr);
-
-       local_irq_save(psr);
-       bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
-       if (addr >= bspstore) {
-
-               ia64_flushrs();
-               ia64_mf();
-               *addr = val;
-               bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
-               rnat = ia64_getreg(_IA64_REG_AR_RNAT);
-               if (bspstore < rnat_addr)
-                       rnat = rnat & (~nat_mask);
-               else
-                       *rnat_addr = (*rnat_addr)&(~nat_mask);
-
-               ia64_mf();
-               ia64_loadrs();
-               ia64_setreg(_IA64_REG_AR_RNAT, rnat);
-       } else {
-               rnat = ia64_getreg(_IA64_REG_AR_RNAT);
-               *addr = val;
-               if (bspstore < rnat_addr)
-                       rnat = rnat&(~nat_mask);
-               else
-                       *rnat_addr = (*rnat_addr) & (~nat_mask);
-
-               ia64_setreg(_IA64_REG_AR_BSPSTORE, (unsigned long)bspstore);
-               ia64_setreg(_IA64_REG_AR_RNAT, rnat);
-       }
-       local_irq_restore(psr);
-       ia64_setreg(_IA64_REG_AR_RSC, old_rsc);
-}
-
-void getreg(unsigned long regnum, unsigned long *val,
-                               int *nat, struct kvm_pt_regs *regs)
-{
-       unsigned long addr, *unat;
-       if (regnum >= IA64_FIRST_STACKED_GR) {
-               get_rse_reg(regs, regnum, val, nat);
-               return;
-       }
-
-       /*
-        * Now look at registers in [0-31] range and init correct UNAT
-        */
-       addr = (unsigned long)regs;
-       unat = &regs->eml_unat;
-
-       addr += gr_info[regnum];
-
-       *val  = *(unsigned long *)addr;
-       /*
-        * do it only when requested
-        */
-       if (nat)
-               *nat  = (*unat >> ((addr >> 3) & 0x3f)) & 0x1UL;
-}
-
-void setreg(unsigned long regnum, unsigned long val,
-                       int nat, struct kvm_pt_regs *regs)
-{
-       unsigned long addr;
-       unsigned long bitmask;
-       unsigned long *unat;
-
-       /*
-        * First takes care of stacked registers
-        */
-       if (regnum >= IA64_FIRST_STACKED_GR) {
-               set_rse_reg(regs, regnum, val, nat);
-               return;
-       }
-
-       /*
-        * Now look at registers in [0-31] range and init correct UNAT
-        */
-       addr = (unsigned long)regs;
-       unat = &regs->eml_unat;
-       /*
-        * add offset from base of struct
-        * and do it !
-        */
-       addr += gr_info[regnum];
-
-       *(unsigned long *)addr = val;
-
-       /*
-        * We need to clear the corresponding UNAT bit to fully emulate the load
-        * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4
-        */
-       bitmask   = 1UL << ((addr >> 3) & 0x3f);
-       if (nat)
-               *unat |= bitmask;
-        else
-               *unat &= ~bitmask;
-
-}
-
-u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg)
-{
-       struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-       unsigned long val;
-
-       if (!reg)
-               return 0;
-       getreg(reg, &val, 0, regs);
-       return val;
-}
-
-void vcpu_set_gr(struct kvm_vcpu *vcpu, unsigned long reg, u64 value, int nat)
-{
-       struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-       long sof = (regs->cr_ifs) & 0x7f;
-
-       if (!reg)
-               return;
-       if (reg >= sof + 32)
-               return;
-       setreg(reg, value, nat, regs);  /* FIXME: handle NATs later*/
-}
-
-void getfpreg(unsigned long regnum, struct ia64_fpreg *fpval,
-                               struct kvm_pt_regs *regs)
-{
-       /* Take floating register rotation into consideration*/
-       if (regnum >= IA64_FIRST_ROTATING_FR)
-               regnum = IA64_FIRST_ROTATING_FR + fph_index(regs, regnum);
-#define CASE_FIXED_FP(reg)                     \
-       case  (reg) :                           \
-               ia64_stf_spill(fpval, reg);     \
-       break
-
-       switch (regnum) {
-               CASE_FIXED_FP(0);
-               CASE_FIXED_FP(1);
-               CASE_FIXED_FP(2);
-               CASE_FIXED_FP(3);
-               CASE_FIXED_FP(4);
-               CASE_FIXED_FP(5);
-
-               CASE_FIXED_FP(6);
-               CASE_FIXED_FP(7);
-               CASE_FIXED_FP(8);
-               CASE_FIXED_FP(9);
-               CASE_FIXED_FP(10);
-               CASE_FIXED_FP(11);
-
-               CASE_FIXED_FP(12);
-               CASE_FIXED_FP(13);
-               CASE_FIXED_FP(14);
-               CASE_FIXED_FP(15);
-               CASE_FIXED_FP(16);
-               CASE_FIXED_FP(17);
-               CASE_FIXED_FP(18);
-               CASE_FIXED_FP(19);
-               CASE_FIXED_FP(20);
-               CASE_FIXED_FP(21);
-               CASE_FIXED_FP(22);
-               CASE_FIXED_FP(23);
-               CASE_FIXED_FP(24);
-               CASE_FIXED_FP(25);
-               CASE_FIXED_FP(26);
-               CASE_FIXED_FP(27);
-               CASE_FIXED_FP(28);
-               CASE_FIXED_FP(29);
-               CASE_FIXED_FP(30);
-               CASE_FIXED_FP(31);
-               CASE_FIXED_FP(32);
-               CASE_FIXED_FP(33);
-               CASE_FIXED_FP(34);
-               CASE_FIXED_FP(35);
-               CASE_FIXED_FP(36);
-               CASE_FIXED_FP(37);
-               CASE_FIXED_FP(38);
-               CASE_FIXED_FP(39);
-               CASE_FIXED_FP(40);
-               CASE_FIXED_FP(41);
-               CASE_FIXED_FP(42);
-               CASE_FIXED_FP(43);
-               CASE_FIXED_FP(44);
-               CASE_FIXED_FP(45);
-               CASE_FIXED_FP(46);
-               CASE_FIXED_FP(47);
-               CASE_FIXED_FP(48);
-               CASE_FIXED_FP(49);
-               CASE_FIXED_FP(50);
-               CASE_FIXED_FP(51);
-               CASE_FIXED_FP(52);
-               CASE_FIXED_FP(53);
-               CASE_FIXED_FP(54);
-               CASE_FIXED_FP(55);
-               CASE_FIXED_FP(56);
-               CASE_FIXED_FP(57);
-               CASE_FIXED_FP(58);
-               CASE_FIXED_FP(59);
-               CASE_FIXED_FP(60);
-               CASE_FIXED_FP(61);
-               CASE_FIXED_FP(62);
-               CASE_FIXED_FP(63);
-               CASE_FIXED_FP(64);
-               CASE_FIXED_FP(65);
-               CASE_FIXED_FP(66);
-               CASE_FIXED_FP(67);
-               CASE_FIXED_FP(68);
-               CASE_FIXED_FP(69);
-               CASE_FIXED_FP(70);
-               CASE_FIXED_FP(71);
-               CASE_FIXED_FP(72);
-               CASE_FIXED_FP(73);
-               CASE_FIXED_FP(74);
-               CASE_FIXED_FP(75);
-               CASE_FIXED_FP(76);
-               CASE_FIXED_FP(77);
-               CASE_FIXED_FP(78);
-               CASE_FIXED_FP(79);
-               CASE_FIXED_FP(80);
-               CASE_FIXED_FP(81);
-               CASE_FIXED_FP(82);
-               CASE_FIXED_FP(83);
-               CASE_FIXED_FP(84);
-               CASE_FIXED_FP(85);
-               CASE_FIXED_FP(86);
-               CASE_FIXED_FP(87);
-               CASE_FIXED_FP(88);
-               CASE_FIXED_FP(89);
-               CASE_FIXED_FP(90);
-               CASE_FIXED_FP(91);
-               CASE_FIXED_FP(92);
-               CASE_FIXED_FP(93);
-               CASE_FIXED_FP(94);
-               CASE_FIXED_FP(95);
-               CASE_FIXED_FP(96);
-               CASE_FIXED_FP(97);
-               CASE_FIXED_FP(98);
-               CASE_FIXED_FP(99);
-               CASE_FIXED_FP(100);
-               CASE_FIXED_FP(101);
-               CASE_FIXED_FP(102);
-               CASE_FIXED_FP(103);
-               CASE_FIXED_FP(104);
-               CASE_FIXED_FP(105);
-               CASE_FIXED_FP(106);
-               CASE_FIXED_FP(107);
-               CASE_FIXED_FP(108);
-               CASE_FIXED_FP(109);
-               CASE_FIXED_FP(110);
-               CASE_FIXED_FP(111);
-               CASE_FIXED_FP(112);
-               CASE_FIXED_FP(113);
-               CASE_FIXED_FP(114);
-               CASE_FIXED_FP(115);
-               CASE_FIXED_FP(116);
-               CASE_FIXED_FP(117);
-               CASE_FIXED_FP(118);
-               CASE_FIXED_FP(119);
-               CASE_FIXED_FP(120);
-               CASE_FIXED_FP(121);
-               CASE_FIXED_FP(122);
-               CASE_FIXED_FP(123);
-               CASE_FIXED_FP(124);
-               CASE_FIXED_FP(125);
-               CASE_FIXED_FP(126);
-               CASE_FIXED_FP(127);
-       }
-#undef CASE_FIXED_FP
-}
-
-void setfpreg(unsigned long regnum, struct ia64_fpreg *fpval,
-                                       struct kvm_pt_regs *regs)
-{
-       /* Take floating register rotation into consideration*/
-       if (regnum >= IA64_FIRST_ROTATING_FR)
-               regnum = IA64_FIRST_ROTATING_FR + fph_index(regs, regnum);
-
-#define CASE_FIXED_FP(reg)                     \
-       case (reg) :                            \
-               ia64_ldf_fill(reg, fpval);      \
-       break
-
-       switch (regnum) {
-               CASE_FIXED_FP(2);
-               CASE_FIXED_FP(3);
-               CASE_FIXED_FP(4);
-               CASE_FIXED_FP(5);
-
-               CASE_FIXED_FP(6);
-               CASE_FIXED_FP(7);
-               CASE_FIXED_FP(8);
-               CASE_FIXED_FP(9);
-               CASE_FIXED_FP(10);
-               CASE_FIXED_FP(11);
-
-               CASE_FIXED_FP(12);
-               CASE_FIXED_FP(13);
-               CASE_FIXED_FP(14);
-               CASE_FIXED_FP(15);
-               CASE_FIXED_FP(16);
-               CASE_FIXED_FP(17);
-               CASE_FIXED_FP(18);
-               CASE_FIXED_FP(19);
-               CASE_FIXED_FP(20);
-               CASE_FIXED_FP(21);
-               CASE_FIXED_FP(22);
-               CASE_FIXED_FP(23);
-               CASE_FIXED_FP(24);
-               CASE_FIXED_FP(25);
-               CASE_FIXED_FP(26);
-               CASE_FIXED_FP(27);
-               CASE_FIXED_FP(28);
-               CASE_FIXED_FP(29);
-               CASE_FIXED_FP(30);
-               CASE_FIXED_FP(31);
-               CASE_FIXED_FP(32);
-               CASE_FIXED_FP(33);
-               CASE_FIXED_FP(34);
-               CASE_FIXED_FP(35);
-               CASE_FIXED_FP(36);
-               CASE_FIXED_FP(37);
-               CASE_FIXED_FP(38);
-               CASE_FIXED_FP(39);
-               CASE_FIXED_FP(40);
-               CASE_FIXED_FP(41);
-               CASE_FIXED_FP(42);
-               CASE_FIXED_FP(43);
-               CASE_FIXED_FP(44);
-               CASE_FIXED_FP(45);
-               CASE_FIXED_FP(46);
-               CASE_FIXED_FP(47);
-               CASE_FIXED_FP(48);
-               CASE_FIXED_FP(49);
-               CASE_FIXED_FP(50);
-               CASE_FIXED_FP(51);
-               CASE_FIXED_FP(52);
-               CASE_FIXED_FP(53);
-               CASE_FIXED_FP(54);
-               CASE_FIXED_FP(55);
-               CASE_FIXED_FP(56);
-               CASE_FIXED_FP(57);
-               CASE_FIXED_FP(58);
-               CASE_FIXED_FP(59);
-               CASE_FIXED_FP(60);
-               CASE_FIXED_FP(61);
-               CASE_FIXED_FP(62);
-               CASE_FIXED_FP(63);
-               CASE_FIXED_FP(64);
-               CASE_FIXED_FP(65);
-               CASE_FIXED_FP(66);
-               CASE_FIXED_FP(67);
-               CASE_FIXED_FP(68);
-               CASE_FIXED_FP(69);
-               CASE_FIXED_FP(70);
-               CASE_FIXED_FP(71);
-               CASE_FIXED_FP(72);
-               CASE_FIXED_FP(73);
-               CASE_FIXED_FP(74);
-               CASE_FIXED_FP(75);
-               CASE_FIXED_FP(76);
-               CASE_FIXED_FP(77);
-               CASE_FIXED_FP(78);
-               CASE_FIXED_FP(79);
-               CASE_FIXED_FP(80);
-               CASE_FIXED_FP(81);
-               CASE_FIXED_FP(82);
-               CASE_FIXED_FP(83);
-               CASE_FIXED_FP(84);
-               CASE_FIXED_FP(85);
-               CASE_FIXED_FP(86);
-               CASE_FIXED_FP(87);
-               CASE_FIXED_FP(88);
-               CASE_FIXED_FP(89);
-               CASE_FIXED_FP(90);
-               CASE_FIXED_FP(91);
-               CASE_FIXED_FP(92);
-               CASE_FIXED_FP(93);
-               CASE_FIXED_FP(94);
-               CASE_FIXED_FP(95);
-               CASE_FIXED_FP(96);
-               CASE_FIXED_FP(97);
-               CASE_FIXED_FP(98);
-               CASE_FIXED_FP(99);
-               CASE_FIXED_FP(100);
-               CASE_FIXED_FP(101);
-               CASE_FIXED_FP(102);
-               CASE_FIXED_FP(103);
-               CASE_FIXED_FP(104);
-               CASE_FIXED_FP(105);
-               CASE_FIXED_FP(106);
-               CASE_FIXED_FP(107);
-               CASE_FIXED_FP(108);
-               CASE_FIXED_FP(109);
-               CASE_FIXED_FP(110);
-               CASE_FIXED_FP(111);
-               CASE_FIXED_FP(112);
-               CASE_FIXED_FP(113);
-               CASE_FIXED_FP(114);
-               CASE_FIXED_FP(115);
-               CASE_FIXED_FP(116);
-               CASE_FIXED_FP(117);
-               CASE_FIXED_FP(118);
-               CASE_FIXED_FP(119);
-               CASE_FIXED_FP(120);
-               CASE_FIXED_FP(121);
-               CASE_FIXED_FP(122);
-               CASE_FIXED_FP(123);
-               CASE_FIXED_FP(124);
-               CASE_FIXED_FP(125);
-               CASE_FIXED_FP(126);
-               CASE_FIXED_FP(127);
-       }
-}
-
-void vcpu_get_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
-                                               struct ia64_fpreg *val)
-{
-       struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-
-       getfpreg(reg, val, regs);   /* FIXME: handle NATs later*/
-}
-
-void vcpu_set_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
-                                               struct ia64_fpreg *val)
-{
-       struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-
-       if (reg > 1)
-               setfpreg(reg, val, regs);   /* FIXME: handle NATs later*/
-}
-
-/*
- * The Altix RTC is mapped specially here for the vmm module
- */
-#define SN_RTC_BASE    (u64 *)(KVM_VMM_BASE+(1UL<<KVM_VMM_SHIFT))
-static long kvm_get_itc(struct kvm_vcpu *vcpu)
-{
-#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
-       struct kvm *kvm = (struct kvm *)KVM_VM_BASE;
-
-       if (kvm->arch.is_sn2)
-               return (*SN_RTC_BASE);
-       else
-#endif
-               return ia64_getreg(_IA64_REG_AR_ITC);
-}
-
-/************************************************************************
- * lsapic timer
- ***********************************************************************/
-u64 vcpu_get_itc(struct kvm_vcpu *vcpu)
-{
-       unsigned long guest_itc;
-       guest_itc = VMX(vcpu, itc_offset) + kvm_get_itc(vcpu);
-
-       if (guest_itc >= VMX(vcpu, last_itc)) {
-               VMX(vcpu, last_itc) = guest_itc;
-               return  guest_itc;
-       } else
-               return VMX(vcpu, last_itc);
-}
-
-static inline void vcpu_set_itm(struct kvm_vcpu *vcpu, u64 val);
-static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val)
-{
-       struct kvm_vcpu *v;
-       struct kvm *kvm;
-       int i;
-       long itc_offset = val - kvm_get_itc(vcpu);
-       unsigned long vitv = VCPU(vcpu, itv);
-
-       kvm = (struct kvm *)KVM_VM_BASE;
-
-       if (kvm_vcpu_is_bsp(vcpu)) {
-               for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) {
-                       v = (struct kvm_vcpu *)((char *)vcpu +
-                                       sizeof(struct kvm_vcpu_data) * i);
-                       VMX(v, itc_offset) = itc_offset;
-                       VMX(v, last_itc) = 0;
-               }
-       }
-       VMX(vcpu, last_itc) = 0;
-       if (VCPU(vcpu, itm) <= val) {
-               VMX(vcpu, itc_check) = 0;
-               vcpu_unpend_interrupt(vcpu, vitv);
-       } else {
-               VMX(vcpu, itc_check) = 1;
-               vcpu_set_itm(vcpu, VCPU(vcpu, itm));
-       }
-
-}
-
-static inline u64 vcpu_get_itm(struct kvm_vcpu *vcpu)
-{
-       return ((u64)VCPU(vcpu, itm));
-}
-
-static inline void vcpu_set_itm(struct kvm_vcpu *vcpu, u64 val)
-{
-       unsigned long vitv = VCPU(vcpu, itv);
-       VCPU(vcpu, itm) = val;
-
-       if (val > vcpu_get_itc(vcpu)) {
-               VMX(vcpu, itc_check) = 1;
-               vcpu_unpend_interrupt(vcpu, vitv);
-               VMX(vcpu, timer_pending) = 0;
-       } else
-               VMX(vcpu, itc_check) = 0;
-}
-
-#define  ITV_VECTOR(itv)    (itv&0xff)
-#define  ITV_IRQ_MASK(itv)  (itv&(1<<16))
-
-static inline void vcpu_set_itv(struct kvm_vcpu *vcpu, u64 val)
-{
-       VCPU(vcpu, itv) = val;
-       if (!ITV_IRQ_MASK(val) && vcpu->arch.timer_pending) {
-               vcpu_pend_interrupt(vcpu, ITV_VECTOR(val));
-               vcpu->arch.timer_pending = 0;
-       }
-}
-
-static inline void vcpu_set_eoi(struct kvm_vcpu *vcpu, u64 val)
-{
-       int vec;
-
-       vec = highest_inservice_irq(vcpu);
-       if (vec == NULL_VECTOR)
-               return;
-       VMX(vcpu, insvc[vec >> 6]) &= ~(1UL << (vec & 63));
-       VCPU(vcpu, eoi) = 0;
-       vcpu->arch.irq_new_pending = 1;
-
-}
-
-/* See Table 5-8 in SDM vol2 for the definition */
-int irq_masked(struct kvm_vcpu *vcpu, int h_pending, int h_inservice)
-{
-       union ia64_tpr vtpr;
-
-       vtpr.val = VCPU(vcpu, tpr);
-
-       if (h_inservice == NMI_VECTOR)
-               return IRQ_MASKED_BY_INSVC;
-
-       if (h_pending == NMI_VECTOR) {
-               /* Non Maskable Interrupt */
-               return IRQ_NO_MASKED;
-       }
-
-       if (h_inservice == ExtINT_VECTOR)
-               return IRQ_MASKED_BY_INSVC;
-
-       if (h_pending == ExtINT_VECTOR) {
-               if (vtpr.mmi) {
-                       /* mask all external IRQ */
-                       return IRQ_MASKED_BY_VTPR;
-               } else
-                       return IRQ_NO_MASKED;
-       }
-
-       if (is_higher_irq(h_pending, h_inservice)) {
-               if (is_higher_class(h_pending, vtpr.mic + (vtpr.mmi << 4)))
-                       return IRQ_NO_MASKED;
-               else
-                       return IRQ_MASKED_BY_VTPR;
-       } else {
-               return IRQ_MASKED_BY_INSVC;
-       }
-}
-
-void vcpu_pend_interrupt(struct kvm_vcpu *vcpu, u8 vec)
-{
-       long spsr;
-       int ret;
-
-       local_irq_save(spsr);
-       ret = test_and_set_bit(vec, &VCPU(vcpu, irr[0]));
-       local_irq_restore(spsr);
-
-       vcpu->arch.irq_new_pending = 1;
-}
-
-void vcpu_unpend_interrupt(struct kvm_vcpu *vcpu, u8 vec)
-{
-       long spsr;
-       int ret;
-
-       local_irq_save(spsr);
-       ret = test_and_clear_bit(vec, &VCPU(vcpu, irr[0]));
-       local_irq_restore(spsr);
-       if (ret) {
-               vcpu->arch.irq_new_pending = 1;
-               wmb();
-       }
-}
-
-void update_vhpi(struct kvm_vcpu *vcpu, int vec)
-{
-       u64 vhpi;
-
-       if (vec == NULL_VECTOR)
-               vhpi = 0;
-       else if (vec == NMI_VECTOR)
-               vhpi = 32;
-       else if (vec == ExtINT_VECTOR)
-               vhpi = 16;
-       else
-               vhpi = vec >> 4;
-
-       VCPU(vcpu, vhpi) = vhpi;
-       if (VCPU(vcpu, vac).a_int)
-               ia64_call_vsa(PAL_VPS_SET_PENDING_INTERRUPT,
-                               (u64)vcpu->arch.vpd, 0, 0, 0, 0, 0, 0);
-}
-
-u64 vcpu_get_ivr(struct kvm_vcpu *vcpu)
-{
-       int vec, h_inservice, mask;
-
-       vec = highest_pending_irq(vcpu);
-       h_inservice = highest_inservice_irq(vcpu);
-       mask = irq_masked(vcpu, vec, h_inservice);
-       if (vec == NULL_VECTOR || mask == IRQ_MASKED_BY_INSVC) {
-               if (VCPU(vcpu, vhpi))
-                       update_vhpi(vcpu, NULL_VECTOR);
-               return IA64_SPURIOUS_INT_VECTOR;
-       }
-       if (mask == IRQ_MASKED_BY_VTPR) {
-               update_vhpi(vcpu, vec);
-               return IA64_SPURIOUS_INT_VECTOR;
-       }
-       VMX(vcpu, insvc[vec >> 6]) |= (1UL << (vec & 63));
-       vcpu_unpend_interrupt(vcpu, vec);
-       return  (u64)vec;
-}
-
-/**************************************************************************
-  Privileged operation emulation routines
- **************************************************************************/
-u64 vcpu_thash(struct kvm_vcpu *vcpu, u64 vadr)
-{
-       union ia64_pta vpta;
-       union ia64_rr vrr;
-       u64 pval;
-       u64 vhpt_offset;
-
-       vpta.val = vcpu_get_pta(vcpu);
-       vrr.val = vcpu_get_rr(vcpu, vadr);
-       vhpt_offset = ((vadr >> vrr.ps) << 3) & ((1UL << (vpta.size)) - 1);
-       if (vpta.vf) {
-               pval = ia64_call_vsa(PAL_VPS_THASH, vadr, vrr.val,
-                               vpta.val, 0, 0, 0, 0);
-       } else {
-               pval = (vadr & VRN_MASK) | vhpt_offset |
-                       (vpta.val << 3 >> (vpta.size + 3) << (vpta.size));
-       }
-       return  pval;
-}
-
-u64 vcpu_ttag(struct kvm_vcpu *vcpu, u64 vadr)
-{
-       union ia64_rr vrr;
-       union ia64_pta vpta;
-       u64 pval;
-
-       vpta.val = vcpu_get_pta(vcpu);
-       vrr.val = vcpu_get_rr(vcpu, vadr);
-       if (vpta.vf) {
-               pval = ia64_call_vsa(PAL_VPS_TTAG, vadr, vrr.val,
-                                               0, 0, 0, 0, 0);
-       } else
-               pval = 1;
-
-       return  pval;
-}
-
-u64 vcpu_tak(struct kvm_vcpu *vcpu, u64 vadr)
-{
-       struct thash_data *data;
-       union ia64_pta vpta;
-       u64 key;
-
-       vpta.val = vcpu_get_pta(vcpu);
-       if (vpta.vf == 0) {
-               key = 1;
-               return key;
-       }
-       data = vtlb_lookup(vcpu, vadr, D_TLB);
-       if (!data || !data->p)
-               key = 1;
-       else
-               key = data->key;
-
-       return key;
-}
-
-void kvm_thash(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long thash, vadr;
-
-       vadr = vcpu_get_gr(vcpu, inst.M46.r3);
-       thash = vcpu_thash(vcpu, vadr);
-       vcpu_set_gr(vcpu, inst.M46.r1, thash, 0);
-}
-
-void kvm_ttag(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long tag, vadr;
-
-       vadr = vcpu_get_gr(vcpu, inst.M46.r3);
-       tag = vcpu_ttag(vcpu, vadr);
-       vcpu_set_gr(vcpu, inst.M46.r1, tag, 0);
-}
-
-int vcpu_tpa(struct kvm_vcpu *vcpu, u64 vadr, unsigned long *padr)
-{
-       struct thash_data *data;
-       union ia64_isr visr, pt_isr;
-       struct kvm_pt_regs *regs;
-       struct ia64_psr vpsr;
-
-       regs = vcpu_regs(vcpu);
-       pt_isr.val = VMX(vcpu, cr_isr);
-       visr.val = 0;
-       visr.ei = pt_isr.ei;
-       visr.ir = pt_isr.ir;
-       vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
-       visr.na = 1;
-
-       data = vhpt_lookup(vadr);
-       if (data) {
-               if (data->p == 0) {
-                       vcpu_set_isr(vcpu, visr.val);
-                       data_page_not_present(vcpu, vadr);
-                       return IA64_FAULT;
-               } else if (data->ma == VA_MATTR_NATPAGE) {
-                       vcpu_set_isr(vcpu, visr.val);
-                       dnat_page_consumption(vcpu, vadr);
-                       return IA64_FAULT;
-               } else {
-                       *padr = (data->gpaddr >> data->ps << data->ps) |
-                               (vadr & (PSIZE(data->ps) - 1));
-                       return IA64_NO_FAULT;
-               }
-       }
-
-       data = vtlb_lookup(vcpu, vadr, D_TLB);
-       if (data) {
-               if (data->p == 0) {
-                       vcpu_set_isr(vcpu, visr.val);
-                       data_page_not_present(vcpu, vadr);
-                       return IA64_FAULT;
-               } else if (data->ma == VA_MATTR_NATPAGE) {
-                       vcpu_set_isr(vcpu, visr.val);
-                       dnat_page_consumption(vcpu, vadr);
-                       return IA64_FAULT;
-               } else{
-                       *padr = ((data->ppn >> (data->ps - 12)) << data->ps)
-                               | (vadr & (PSIZE(data->ps) - 1));
-                       return IA64_NO_FAULT;
-               }
-       }
-       if (!vhpt_enabled(vcpu, vadr, NA_REF)) {
-               if (vpsr.ic) {
-                       vcpu_set_isr(vcpu, visr.val);
-                       alt_dtlb(vcpu, vadr);
-                       return IA64_FAULT;
-               } else {
-                       nested_dtlb(vcpu);
-                       return IA64_FAULT;
-               }
-       } else {
-               if (vpsr.ic) {
-                       vcpu_set_isr(vcpu, visr.val);
-                       dvhpt_fault(vcpu, vadr);
-                       return IA64_FAULT;
-               } else{
-                       nested_dtlb(vcpu);
-                       return IA64_FAULT;
-               }
-       }
-
-       return IA64_NO_FAULT;
-}
-
-int kvm_tpa(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long r1, r3;
-
-       r3 = vcpu_get_gr(vcpu, inst.M46.r3);
-
-       if (vcpu_tpa(vcpu, r3, &r1))
-               return IA64_FAULT;
-
-       vcpu_set_gr(vcpu, inst.M46.r1, r1, 0);
-       return(IA64_NO_FAULT);
-}
-
-void kvm_tak(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long r1, r3;
-
-       r3 = vcpu_get_gr(vcpu, inst.M46.r3);
-       r1 = vcpu_tak(vcpu, r3);
-       vcpu_set_gr(vcpu, inst.M46.r1, r1, 0);
-}
-
-/************************************
- * Insert/Purge translation register/cache
- ************************************/
-void vcpu_itc_i(struct kvm_vcpu *vcpu, u64 pte, u64 itir, u64 ifa)
-{
-       thash_purge_and_insert(vcpu, pte, itir, ifa, I_TLB);
-}
-
-void vcpu_itc_d(struct kvm_vcpu *vcpu, u64 pte, u64 itir, u64 ifa)
-{
-       thash_purge_and_insert(vcpu, pte, itir, ifa, D_TLB);
-}
-
-void vcpu_itr_i(struct kvm_vcpu *vcpu, u64 slot, u64 pte, u64 itir, u64 ifa)
-{
-       u64 ps, va, rid;
-       struct thash_data *p_itr;
-
-       ps = itir_ps(itir);
-       va = PAGEALIGN(ifa, ps);
-       pte &= ~PAGE_FLAGS_RV_MASK;
-       rid = vcpu_get_rr(vcpu, ifa);
-       rid = rid & RR_RID_MASK;
-       p_itr = (struct thash_data *)&vcpu->arch.itrs[slot];
-       vcpu_set_tr(p_itr, pte, itir, va, rid);
-       vcpu_quick_region_set(VMX(vcpu, itr_regions), va);
-}
-
-
-void vcpu_itr_d(struct kvm_vcpu *vcpu, u64 slot, u64 pte, u64 itir, u64 ifa)
-{
-       u64 gpfn;
-       u64 ps, va, rid;
-       struct thash_data *p_dtr;
-
-       ps = itir_ps(itir);
-       va = PAGEALIGN(ifa, ps);
-       pte &= ~PAGE_FLAGS_RV_MASK;
-
-       if (ps != _PAGE_SIZE_16M)
-               thash_purge_entries(vcpu, va, ps);
-       gpfn = (pte & _PAGE_PPN_MASK) >> PAGE_SHIFT;
-       if (__gpfn_is_io(gpfn))
-               pte |= VTLB_PTE_IO;
-       rid = vcpu_get_rr(vcpu, va);
-       rid = rid & RR_RID_MASK;
-       p_dtr = (struct thash_data *)&vcpu->arch.dtrs[slot];
-       vcpu_set_tr((struct thash_data *)&vcpu->arch.dtrs[slot],
-                                                       pte, itir, va, rid);
-       vcpu_quick_region_set(VMX(vcpu, dtr_regions), va);
-}
-
-void vcpu_ptr_d(struct kvm_vcpu *vcpu, u64 ifa, u64 ps)
-{
-       int index;
-       u64 va;
-
-       va = PAGEALIGN(ifa, ps);
-       while ((index = vtr_find_overlap(vcpu, va, ps, D_TLB)) >= 0)
-               vcpu->arch.dtrs[index].page_flags = 0;
-
-       thash_purge_entries(vcpu, va, ps);
-}
-
-void vcpu_ptr_i(struct kvm_vcpu *vcpu, u64 ifa, u64 ps)
-{
-       int index;
-       u64 va;
-
-       va = PAGEALIGN(ifa, ps);
-       while ((index = vtr_find_overlap(vcpu, va, ps, I_TLB)) >= 0)
-               vcpu->arch.itrs[index].page_flags = 0;
-
-       thash_purge_entries(vcpu, va, ps);
-}
-
-void vcpu_ptc_l(struct kvm_vcpu *vcpu, u64 va, u64 ps)
-{
-       va = PAGEALIGN(va, ps);
-       thash_purge_entries(vcpu, va, ps);
-}
-
-void vcpu_ptc_e(struct kvm_vcpu *vcpu, u64 va)
-{
-       thash_purge_all(vcpu);
-}
-
-void vcpu_ptc_ga(struct kvm_vcpu *vcpu, u64 va, u64 ps)
-{
-       struct exit_ctl_data *p = &vcpu->arch.exit_data;
-       long psr;
-       local_irq_save(psr);
-       p->exit_reason = EXIT_REASON_PTC_G;
-
-       p->u.ptc_g_data.rr = vcpu_get_rr(vcpu, va);
-       p->u.ptc_g_data.vaddr = va;
-       p->u.ptc_g_data.ps = ps;
-       vmm_transition(vcpu);
-       /* Do Local Purge Here*/
-       vcpu_ptc_l(vcpu, va, ps);
-       local_irq_restore(psr);
-}
-
-
-void vcpu_ptc_g(struct kvm_vcpu *vcpu, u64 va, u64 ps)
-{
-       vcpu_ptc_ga(vcpu, va, ps);
-}
-
-void kvm_ptc_e(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long ifa;
-
-       ifa = vcpu_get_gr(vcpu, inst.M45.r3);
-       vcpu_ptc_e(vcpu, ifa);
-}
-
-void kvm_ptc_g(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long ifa, itir;
-
-       ifa = vcpu_get_gr(vcpu, inst.M45.r3);
-       itir = vcpu_get_gr(vcpu, inst.M45.r2);
-       vcpu_ptc_g(vcpu, ifa, itir_ps(itir));
-}
-
-void kvm_ptc_ga(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long ifa, itir;
-
-       ifa = vcpu_get_gr(vcpu, inst.M45.r3);
-       itir = vcpu_get_gr(vcpu, inst.M45.r2);
-       vcpu_ptc_ga(vcpu, ifa, itir_ps(itir));
-}
-
-void kvm_ptc_l(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long ifa, itir;
-
-       ifa = vcpu_get_gr(vcpu, inst.M45.r3);
-       itir = vcpu_get_gr(vcpu, inst.M45.r2);
-       vcpu_ptc_l(vcpu, ifa, itir_ps(itir));
-}
-
-void kvm_ptr_d(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long ifa, itir;
-
-       ifa = vcpu_get_gr(vcpu, inst.M45.r3);
-       itir = vcpu_get_gr(vcpu, inst.M45.r2);
-       vcpu_ptr_d(vcpu, ifa, itir_ps(itir));
-}
-
-void kvm_ptr_i(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long ifa, itir;
-
-       ifa = vcpu_get_gr(vcpu, inst.M45.r3);
-       itir = vcpu_get_gr(vcpu, inst.M45.r2);
-       vcpu_ptr_i(vcpu, ifa, itir_ps(itir));
-}
-
-void kvm_itr_d(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long itir, ifa, pte, slot;
-
-       slot = vcpu_get_gr(vcpu, inst.M45.r3);
-       pte = vcpu_get_gr(vcpu, inst.M45.r2);
-       itir = vcpu_get_itir(vcpu);
-       ifa = vcpu_get_ifa(vcpu);
-       vcpu_itr_d(vcpu, slot, pte, itir, ifa);
-}
-
-
-
-void kvm_itr_i(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long itir, ifa, pte, slot;
-
-       slot = vcpu_get_gr(vcpu, inst.M45.r3);
-       pte = vcpu_get_gr(vcpu, inst.M45.r2);
-       itir = vcpu_get_itir(vcpu);
-       ifa = vcpu_get_ifa(vcpu);
-       vcpu_itr_i(vcpu, slot, pte, itir, ifa);
-}
-
-void kvm_itc_d(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long itir, ifa, pte;
-
-       itir = vcpu_get_itir(vcpu);
-       ifa = vcpu_get_ifa(vcpu);
-       pte = vcpu_get_gr(vcpu, inst.M45.r2);
-       vcpu_itc_d(vcpu, pte, itir, ifa);
-}
-
-void kvm_itc_i(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long itir, ifa, pte;
-
-       itir = vcpu_get_itir(vcpu);
-       ifa = vcpu_get_ifa(vcpu);
-       pte = vcpu_get_gr(vcpu, inst.M45.r2);
-       vcpu_itc_i(vcpu, pte, itir, ifa);
-}
-
-/*************************************
- * Moves to semi-privileged registers
- *************************************/
-
-void kvm_mov_to_ar_imm(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long imm;
-
-       if (inst.M30.s)
-               imm = -inst.M30.imm;
-       else
-               imm = inst.M30.imm;
-
-       vcpu_set_itc(vcpu, imm);
-}
-
-void kvm_mov_to_ar_reg(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long r2;
-
-       r2 = vcpu_get_gr(vcpu, inst.M29.r2);
-       vcpu_set_itc(vcpu, r2);
-}
-
-void kvm_mov_from_ar_reg(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long r1;
-
-       r1 = vcpu_get_itc(vcpu);
-       vcpu_set_gr(vcpu, inst.M31.r1, r1, 0);
-}
-
-/**************************************************************************
-  struct kvm_vcpu protection key register access routines
- **************************************************************************/
-
-unsigned long vcpu_get_pkr(struct kvm_vcpu *vcpu, unsigned long reg)
-{
-       return ((unsigned long)ia64_get_pkr(reg));
-}
-
-void vcpu_set_pkr(struct kvm_vcpu *vcpu, unsigned long reg, unsigned long val)
-{
-       ia64_set_pkr(reg, val);
-}
-
-/********************************
- * Moves to privileged registers
- ********************************/
-unsigned long vcpu_set_rr(struct kvm_vcpu *vcpu, unsigned long reg,
-                                       unsigned long val)
-{
-       union ia64_rr oldrr, newrr;
-       unsigned long rrval;
-       struct exit_ctl_data *p = &vcpu->arch.exit_data;
-       unsigned long psr;
-
-       oldrr.val = vcpu_get_rr(vcpu, reg);
-       newrr.val = val;
-       vcpu->arch.vrr[reg >> VRN_SHIFT] = val;
-
-       switch ((unsigned long)(reg >> VRN_SHIFT)) {
-       case VRN6:
-               vcpu->arch.vmm_rr = vrrtomrr(val);
-               local_irq_save(psr);
-               p->exit_reason = EXIT_REASON_SWITCH_RR6;
-               vmm_transition(vcpu);
-               local_irq_restore(psr);
-               break;
-       case VRN4:
-               rrval = vrrtomrr(val);
-               vcpu->arch.metaphysical_saved_rr4 = rrval;
-               if (!is_physical_mode(vcpu))
-                       ia64_set_rr(reg, rrval);
-               break;
-       case VRN0:
-               rrval = vrrtomrr(val);
-               vcpu->arch.metaphysical_saved_rr0 = rrval;
-               if (!is_physical_mode(vcpu))
-                       ia64_set_rr(reg, rrval);
-               break;
-       default:
-               ia64_set_rr(reg, vrrtomrr(val));
-               break;
-       }
-
-       return (IA64_NO_FAULT);
-}
-
-void kvm_mov_to_rr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long r3, r2;
-
-       r3 = vcpu_get_gr(vcpu, inst.M42.r3);
-       r2 = vcpu_get_gr(vcpu, inst.M42.r2);
-       vcpu_set_rr(vcpu, r3, r2);
-}
-
-void kvm_mov_to_dbr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-}
-
-void kvm_mov_to_ibr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-}
-
-void kvm_mov_to_pmc(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long r3, r2;
-
-       r3 = vcpu_get_gr(vcpu, inst.M42.r3);
-       r2 = vcpu_get_gr(vcpu, inst.M42.r2);
-       vcpu_set_pmc(vcpu, r3, r2);
-}
-
-void kvm_mov_to_pmd(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long r3, r2;
-
-       r3 = vcpu_get_gr(vcpu, inst.M42.r3);
-       r2 = vcpu_get_gr(vcpu, inst.M42.r2);
-       vcpu_set_pmd(vcpu, r3, r2);
-}
-
-void kvm_mov_to_pkr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       u64 r3, r2;
-
-       r3 = vcpu_get_gr(vcpu, inst.M42.r3);
-       r2 = vcpu_get_gr(vcpu, inst.M42.r2);
-       vcpu_set_pkr(vcpu, r3, r2);
-}
-
-void kvm_mov_from_rr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long r3, r1;
-
-       r3 = vcpu_get_gr(vcpu, inst.M43.r3);
-       r1 = vcpu_get_rr(vcpu, r3);
-       vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
-}
-
-void kvm_mov_from_pkr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long r3, r1;
-
-       r3 = vcpu_get_gr(vcpu, inst.M43.r3);
-       r1 = vcpu_get_pkr(vcpu, r3);
-       vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
-}
-
-void kvm_mov_from_dbr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long r3, r1;
-
-       r3 = vcpu_get_gr(vcpu, inst.M43.r3);
-       r1 = vcpu_get_dbr(vcpu, r3);
-       vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
-}
-
-void kvm_mov_from_ibr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long r3, r1;
-
-       r3 = vcpu_get_gr(vcpu, inst.M43.r3);
-       r1 = vcpu_get_ibr(vcpu, r3);
-       vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
-}
-
-void kvm_mov_from_pmc(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long r3, r1;
-
-       r3 = vcpu_get_gr(vcpu, inst.M43.r3);
-       r1 = vcpu_get_pmc(vcpu, r3);
-       vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
-}
-
-unsigned long vcpu_get_cpuid(struct kvm_vcpu *vcpu, unsigned long reg)
-{
-       /* FIXME: This could get called as a result of a rsvd-reg fault */
-       if (reg > (ia64_get_cpuid(3) & 0xff))
-               return 0;
-       else
-               return ia64_get_cpuid(reg);
-}
-
-void kvm_mov_from_cpuid(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long r3, r1;
-
-       r3 = vcpu_get_gr(vcpu, inst.M43.r3);
-       r1 = vcpu_get_cpuid(vcpu, r3);
-       vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
-}
-
-void vcpu_set_tpr(struct kvm_vcpu *vcpu, unsigned long val)
-{
-       VCPU(vcpu, tpr) = val;
-       vcpu->arch.irq_check = 1;
-}
-
-unsigned long kvm_mov_to_cr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long r2;
-
-       r2 = vcpu_get_gr(vcpu, inst.M32.r2);
-       VCPU(vcpu, vcr[inst.M32.cr3]) = r2;
-
-       switch (inst.M32.cr3) {
-       case 0:
-               vcpu_set_dcr(vcpu, r2);
-               break;
-       case 1:
-               vcpu_set_itm(vcpu, r2);
-               break;
-       case 66:
-               vcpu_set_tpr(vcpu, r2);
-               break;
-       case 67:
-               vcpu_set_eoi(vcpu, r2);
-               break;
-       default:
-               break;
-       }
-
-       return 0;
-}
-
-unsigned long kvm_mov_from_cr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long tgt = inst.M33.r1;
-       unsigned long val;
-
-       switch (inst.M33.cr3) {
-       case 65:
-               val = vcpu_get_ivr(vcpu);
-               vcpu_set_gr(vcpu, tgt, val, 0);
-               break;
-
-       case 67:
-               vcpu_set_gr(vcpu, tgt, 0L, 0);
-               break;
-       default:
-               val = VCPU(vcpu, vcr[inst.M33.cr3]);
-               vcpu_set_gr(vcpu, tgt, val, 0);
-               break;
-       }
-
-       return 0;
-}
-
-void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val)
-{
-
-       unsigned long mask;
-       struct kvm_pt_regs *regs;
-       struct ia64_psr old_psr, new_psr;
-
-       old_psr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
-
-       regs = vcpu_regs(vcpu);
-       /* We only support guest as:
-        *  vpsr.pk = 0
-        *  vpsr.is = 0
-        * Otherwise panic
-        */
-       if (val & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM))
-               panic_vm(vcpu, "Only support guests with vpsr.pk =0 "
-                               "& vpsr.is=0\n");
-
-       /*
-        * For those IA64_PSR bits: id/da/dd/ss/ed/ia
-        * Since these bits will become 0, after success execution of each
-        * instruction, we will change set them to mIA64_PSR
-        */
-       VCPU(vcpu, vpsr) = val
-               & (~(IA64_PSR_ID | IA64_PSR_DA | IA64_PSR_DD |
-                       IA64_PSR_SS | IA64_PSR_ED | IA64_PSR_IA));
-
-       if (!old_psr.i && (val & IA64_PSR_I)) {
-               /* vpsr.i 0->1 */
-               vcpu->arch.irq_check = 1;
-       }
-       new_psr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
-
-       /*
-        * All vIA64_PSR bits shall go to mPSR (v->tf->tf_special.psr)
-        * , except for the following bits:
-        *  ic/i/dt/si/rt/mc/it/bn/vm
-        */
-       mask =  IA64_PSR_IC + IA64_PSR_I + IA64_PSR_DT + IA64_PSR_SI +
-               IA64_PSR_RT + IA64_PSR_MC + IA64_PSR_IT + IA64_PSR_BN +
-               IA64_PSR_VM;
-
-       regs->cr_ipsr = (regs->cr_ipsr & mask) | (val & (~mask));
-
-       check_mm_mode_switch(vcpu, old_psr, new_psr);
-
-       return ;
-}
-
-unsigned long vcpu_cover(struct kvm_vcpu *vcpu)
-{
-       struct ia64_psr vpsr;
-
-       struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-       vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
-
-       if (!vpsr.ic)
-               VCPU(vcpu, ifs) = regs->cr_ifs;
-       regs->cr_ifs = IA64_IFS_V;
-       return (IA64_NO_FAULT);
-}
-
-
-
-/**************************************************************************
-  VCPU banked general register access routines
- **************************************************************************/
-#define vcpu_bsw0_unat(i, b0unat, b1unat, runat, VMM_PT_REGS_R16_SLOT) \
-       do {                                                            \
-               __asm__ __volatile__ (                                  \
-                               ";;extr.u %0 = %3,%6,16;;\n"            \
-                               "dep %1 = %0, %1, 0, 16;;\n"            \
-                               "st8 [%4] = %1\n"                       \
-                               "extr.u %0 = %2, 16, 16;;\n"            \
-                               "dep %3 = %0, %3, %6, 16;;\n"           \
-                               "st8 [%5] = %3\n"                       \
-                               ::"r"(i), "r"(*b1unat), "r"(*b0unat),   \
-                               "r"(*runat), "r"(b1unat), "r"(runat),   \
-                               "i"(VMM_PT_REGS_R16_SLOT) : "memory");  \
-       } while (0)
-
-void vcpu_bsw0(struct kvm_vcpu *vcpu)
-{
-       unsigned long i;
-
-       struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-       unsigned long *r = &regs->r16;
-       unsigned long *b0 = &VCPU(vcpu, vbgr[0]);
-       unsigned long *b1 = &VCPU(vcpu, vgr[0]);
-       unsigned long *runat = &regs->eml_unat;
-       unsigned long *b0unat = &VCPU(vcpu, vbnat);
-       unsigned long *b1unat = &VCPU(vcpu, vnat);
-
-
-       if (VCPU(vcpu, vpsr) & IA64_PSR_BN) {
-               for (i = 0; i < 16; i++) {
-                       *b1++ = *r;
-                       *r++ = *b0++;
-               }
-               vcpu_bsw0_unat(i, b0unat, b1unat, runat,
-                               VMM_PT_REGS_R16_SLOT);
-               VCPU(vcpu, vpsr) &= ~IA64_PSR_BN;
-       }
-}
-
-#define vcpu_bsw1_unat(i, b0unat, b1unat, runat, VMM_PT_REGS_R16_SLOT) \
-       do {                                                            \
-               __asm__ __volatile__ (";;extr.u %0 = %3, %6, 16;;\n"    \
-                               "dep %1 = %0, %1, 16, 16;;\n"           \
-                               "st8 [%4] = %1\n"                       \
-                               "extr.u %0 = %2, 0, 16;;\n"             \
-                               "dep %3 = %0, %3, %6, 16;;\n"           \
-                               "st8 [%5] = %3\n"                       \
-                               ::"r"(i), "r"(*b0unat), "r"(*b1unat),   \
-                               "r"(*runat), "r"(b0unat), "r"(runat),   \
-                               "i"(VMM_PT_REGS_R16_SLOT) : "memory");  \
-       } while (0)
-
-void vcpu_bsw1(struct kvm_vcpu *vcpu)
-{
-       unsigned long i;
-       struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-       unsigned long *r = &regs->r16;
-       unsigned long *b0 = &VCPU(vcpu, vbgr[0]);
-       unsigned long *b1 = &VCPU(vcpu, vgr[0]);
-       unsigned long *runat = &regs->eml_unat;
-       unsigned long *b0unat = &VCPU(vcpu, vbnat);
-       unsigned long *b1unat = &VCPU(vcpu, vnat);
-
-       if (!(VCPU(vcpu, vpsr) & IA64_PSR_BN)) {
-               for (i = 0; i < 16; i++) {
-                       *b0++ = *r;
-                       *r++ = *b1++;
-               }
-               vcpu_bsw1_unat(i, b0unat, b1unat, runat,
-                               VMM_PT_REGS_R16_SLOT);
-               VCPU(vcpu, vpsr) |= IA64_PSR_BN;
-       }
-}
-
-void vcpu_rfi(struct kvm_vcpu *vcpu)
-{
-       unsigned long ifs, psr;
-       struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-
-       psr = VCPU(vcpu, ipsr);
-       if (psr & IA64_PSR_BN)
-               vcpu_bsw1(vcpu);
-       else
-               vcpu_bsw0(vcpu);
-       vcpu_set_psr(vcpu, psr);
-       ifs = VCPU(vcpu, ifs);
-       if (ifs >> 63)
-               regs->cr_ifs = ifs;
-       regs->cr_iip = VCPU(vcpu, iip);
-}
-
-/*
-   VPSR can't keep track of below bits of guest PSR
-   This function gets guest PSR
- */
-
-unsigned long vcpu_get_psr(struct kvm_vcpu *vcpu)
-{
-       unsigned long mask;
-       struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-
-       mask = IA64_PSR_BE | IA64_PSR_UP | IA64_PSR_AC | IA64_PSR_MFL |
-               IA64_PSR_MFH | IA64_PSR_CPL | IA64_PSR_RI;
-       return (VCPU(vcpu, vpsr) & ~mask) | (regs->cr_ipsr & mask);
-}
-
-void kvm_rsm(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long vpsr;
-       unsigned long imm24 = (inst.M44.i<<23) | (inst.M44.i2<<21)
-                                       | inst.M44.imm;
-
-       vpsr = vcpu_get_psr(vcpu);
-       vpsr &= (~imm24);
-       vcpu_set_psr(vcpu, vpsr);
-}
-
-void kvm_ssm(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long vpsr;
-       unsigned long imm24 = (inst.M44.i << 23) | (inst.M44.i2 << 21)
-                               | inst.M44.imm;
-
-       vpsr = vcpu_get_psr(vcpu);
-       vpsr |= imm24;
-       vcpu_set_psr(vcpu, vpsr);
-}
-
-/* Generate Mask
- * Parameter:
- *  bit -- starting bit
- *  len -- how many bits
- */
-#define MASK(bit,len)                                  \
-({                                                     \
-               __u64   ret;                            \
-                                                       \
-               __asm __volatile("dep %0=-1, r0, %1, %2"\
-                               : "=r" (ret):           \
-                 "M" (bit),                            \
-                 "M" (len));                           \
-               ret;                                    \
-})
-
-void vcpu_set_psr_l(struct kvm_vcpu *vcpu, unsigned long val)
-{
-       val = (val & MASK(0, 32)) | (vcpu_get_psr(vcpu) & MASK(32, 32));
-       vcpu_set_psr(vcpu, val);
-}
-
-void kvm_mov_to_psr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long val;
-
-       val = vcpu_get_gr(vcpu, inst.M35.r2);
-       vcpu_set_psr_l(vcpu, val);
-}
-
-void kvm_mov_from_psr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-       unsigned long val;
-
-       val = vcpu_get_psr(vcpu);
-       val = (val & MASK(0, 32)) | (val & MASK(35, 2));
-       vcpu_set_gr(vcpu, inst.M33.r1, val, 0);
-}
-
-void vcpu_increment_iip(struct kvm_vcpu *vcpu)
-{
-       struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-       struct ia64_psr *ipsr = (struct ia64_psr *)&regs->cr_ipsr;
-       if (ipsr->ri == 2) {
-               ipsr->ri = 0;
-               regs->cr_iip += 16;
-       } else
-               ipsr->ri++;
-}
-
-void vcpu_decrement_iip(struct kvm_vcpu *vcpu)
-{
-       struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-       struct ia64_psr *ipsr = (struct ia64_psr *)&regs->cr_ipsr;
-
-       if (ipsr->ri == 0) {
-               ipsr->ri = 2;
-               regs->cr_iip -= 16;
-       } else
-               ipsr->ri--;
-}
-
-/** Emulate a privileged operation.
- *
- *
- * @param vcpu virtual cpu
- * @cause the reason cause virtualization fault
- * @opcode the instruction code which cause virtualization fault
- */
-
-void kvm_emulate(struct kvm_vcpu *vcpu, struct kvm_pt_regs *regs)
-{
-       unsigned long status, cause, opcode ;
-       INST64 inst;
-
-       status = IA64_NO_FAULT;
-       cause = VMX(vcpu, cause);
-       opcode = VMX(vcpu, opcode);
-       inst.inst = opcode;
-       /*
-        * Switch to actual virtual rid in rr0 and rr4,
-        * which is required by some tlb related instructions.
-        */
-       prepare_if_physical_mode(vcpu);
-
-       switch (cause) {
-       case EVENT_RSM:
-               kvm_rsm(vcpu, inst);
-               break;
-       case EVENT_SSM:
-               kvm_ssm(vcpu, inst);
-               break;
-       case EVENT_MOV_TO_PSR:
-               kvm_mov_to_psr(vcpu, inst);
-               break;
-       case EVENT_MOV_FROM_PSR:
-               kvm_mov_from_psr(vcpu, inst);
-               break;
-       case EVENT_MOV_FROM_CR:
-               kvm_mov_from_cr(vcpu, inst);
-               break;
-       case EVENT_MOV_TO_CR:
-               kvm_mov_to_cr(vcpu, inst);
-               break;
-       case EVENT_BSW_0:
-               vcpu_bsw0(vcpu);
-               break;
-       case EVENT_BSW_1:
-               vcpu_bsw1(vcpu);
-               break;
-       case EVENT_COVER:
-               vcpu_cover(vcpu);
-               break;
-       case EVENT_RFI:
-               vcpu_rfi(vcpu);
-               break;
-       case EVENT_ITR_D:
-               kvm_itr_d(vcpu, inst);
-               break;
-       case EVENT_ITR_I:
-               kvm_itr_i(vcpu, inst);
-               break;
-       case EVENT_PTR_D:
-               kvm_ptr_d(vcpu, inst);
-               break;
-       case EVENT_PTR_I:
-               kvm_ptr_i(vcpu, inst);
-               break;
-       case EVENT_ITC_D:
-               kvm_itc_d(vcpu, inst);
-               break;
-       case EVENT_ITC_I:
-               kvm_itc_i(vcpu, inst);
-               break;
-       case EVENT_PTC_L:
-               kvm_ptc_l(vcpu, inst);
-               break;
-       case EVENT_PTC_G:
-               kvm_ptc_g(vcpu, inst);
-               break;
-       case EVENT_PTC_GA:
-               kvm_ptc_ga(vcpu, inst);
-               break;
-       case EVENT_PTC_E:
-               kvm_ptc_e(vcpu, inst);
-               break;
-       case EVENT_MOV_TO_RR:
-               kvm_mov_to_rr(vcpu, inst);
-               break;
-       case EVENT_MOV_FROM_RR:
-               kvm_mov_from_rr(vcpu, inst);
-               break;
-       case EVENT_THASH:
-               kvm_thash(vcpu, inst);
-               break;
-       case EVENT_TTAG:
-               kvm_ttag(vcpu, inst);
-               break;
-       case EVENT_TPA:
-               status = kvm_tpa(vcpu, inst);
-               break;
-       case EVENT_TAK:
-               kvm_tak(vcpu, inst);
-               break;
-       case EVENT_MOV_TO_AR_IMM:
-               kvm_mov_to_ar_imm(vcpu, inst);
-               break;
-       case EVENT_MOV_TO_AR:
-               kvm_mov_to_ar_reg(vcpu, inst);
-               break;
-       case EVENT_MOV_FROM_AR:
-               kvm_mov_from_ar_reg(vcpu, inst);
-               break;
-       case EVENT_MOV_TO_DBR:
-               kvm_mov_to_dbr(vcpu, inst);
-               break;
-       case EVENT_MOV_TO_IBR:
-               kvm_mov_to_ibr(vcpu, inst);
-               break;
-       case EVENT_MOV_TO_PMC:
-               kvm_mov_to_pmc(vcpu, inst);
-               break;
-       case EVENT_MOV_TO_PMD:
-               kvm_mov_to_pmd(vcpu, inst);
-               break;
-       case EVENT_MOV_TO_PKR:
-               kvm_mov_to_pkr(vcpu, inst);
-               break;
-       case EVENT_MOV_FROM_DBR:
-               kvm_mov_from_dbr(vcpu, inst);
-               break;
-       case EVENT_MOV_FROM_IBR:
-               kvm_mov_from_ibr(vcpu, inst);
-               break;
-       case EVENT_MOV_FROM_PMC:
-               kvm_mov_from_pmc(vcpu, inst);
-               break;
-       case EVENT_MOV_FROM_PKR:
-               kvm_mov_from_pkr(vcpu, inst);
-               break;
-       case EVENT_MOV_FROM_CPUID:
-               kvm_mov_from_cpuid(vcpu, inst);
-               break;
-       case EVENT_VMSW:
-               status = IA64_FAULT;
-               break;
-       default:
-               break;
-       };
-       /*Assume all status is NO_FAULT ?*/
-       if (status == IA64_NO_FAULT && cause != EVENT_RFI)
-               vcpu_increment_iip(vcpu);
-
-       recover_if_physical_mode(vcpu);
-}
-
-void init_vcpu(struct kvm_vcpu *vcpu)
-{
-       int i;
-
-       vcpu->arch.mode_flags = GUEST_IN_PHY;
-       VMX(vcpu, vrr[0]) = 0x38;
-       VMX(vcpu, vrr[1]) = 0x38;
-       VMX(vcpu, vrr[2]) = 0x38;
-       VMX(vcpu, vrr[3]) = 0x38;
-       VMX(vcpu, vrr[4]) = 0x38;
-       VMX(vcpu, vrr[5]) = 0x38;
-       VMX(vcpu, vrr[6]) = 0x38;
-       VMX(vcpu, vrr[7]) = 0x38;
-       VCPU(vcpu, vpsr) = IA64_PSR_BN;
-       VCPU(vcpu, dcr) = 0;
-       /* pta.size must not be 0.  The minimum is 15 (32k) */
-       VCPU(vcpu, pta) = 15 << 2;
-       VCPU(vcpu, itv) = 0x10000;
-       VCPU(vcpu, itm) = 0;
-       VMX(vcpu, last_itc) = 0;
-
-       VCPU(vcpu, lid) = VCPU_LID(vcpu);
-       VCPU(vcpu, ivr) = 0;
-       VCPU(vcpu, tpr) = 0x10000;
-       VCPU(vcpu, eoi) = 0;
-       VCPU(vcpu, irr[0]) = 0;
-       VCPU(vcpu, irr[1]) = 0;
-       VCPU(vcpu, irr[2]) = 0;
-       VCPU(vcpu, irr[3]) = 0;
-       VCPU(vcpu, pmv) = 0x10000;
-       VCPU(vcpu, cmcv) = 0x10000;
-       VCPU(vcpu, lrr0) = 0x10000;   /* default reset value? */
-       VCPU(vcpu, lrr1) = 0x10000;   /* default reset value? */
-       update_vhpi(vcpu, NULL_VECTOR);
-       VLSAPIC_XTP(vcpu) = 0x80;       /* disabled */
-
-       for (i = 0; i < 4; i++)
-               VLSAPIC_INSVC(vcpu, i) = 0;
-}
-
-void kvm_init_all_rr(struct kvm_vcpu *vcpu)
-{
-       unsigned long psr;
-
-       local_irq_save(psr);
-
-       /* WARNING: not allow co-exist of both virtual mode and physical
-        * mode in same region
-        */
-
-       vcpu->arch.metaphysical_saved_rr0 = vrrtomrr(VMX(vcpu, vrr[VRN0]));
-       vcpu->arch.metaphysical_saved_rr4 = vrrtomrr(VMX(vcpu, vrr[VRN4]));
-
-       if (is_physical_mode(vcpu)) {
-               if (vcpu->arch.mode_flags & GUEST_PHY_EMUL)
-                       panic_vm(vcpu, "Machine Status conflicts!\n");
-
-               ia64_set_rr((VRN0 << VRN_SHIFT), vcpu->arch.metaphysical_rr0);
-               ia64_dv_serialize_data();
-               ia64_set_rr((VRN4 << VRN_SHIFT), vcpu->arch.metaphysical_rr4);
-               ia64_dv_serialize_data();
-       } else {
-               ia64_set_rr((VRN0 << VRN_SHIFT),
-                               vcpu->arch.metaphysical_saved_rr0);
-               ia64_dv_serialize_data();
-               ia64_set_rr((VRN4 << VRN_SHIFT),
-                               vcpu->arch.metaphysical_saved_rr4);
-               ia64_dv_serialize_data();
-       }
-       ia64_set_rr((VRN1 << VRN_SHIFT),
-                       vrrtomrr(VMX(vcpu, vrr[VRN1])));
-       ia64_dv_serialize_data();
-       ia64_set_rr((VRN2 << VRN_SHIFT),
-                       vrrtomrr(VMX(vcpu, vrr[VRN2])));
-       ia64_dv_serialize_data();
-       ia64_set_rr((VRN3 << VRN_SHIFT),
-                       vrrtomrr(VMX(vcpu, vrr[VRN3])));
-       ia64_dv_serialize_data();
-       ia64_set_rr((VRN5 << VRN_SHIFT),
-                       vrrtomrr(VMX(vcpu, vrr[VRN5])));
-       ia64_dv_serialize_data();
-       ia64_set_rr((VRN7 << VRN_SHIFT),
-                       vrrtomrr(VMX(vcpu, vrr[VRN7])));
-       ia64_dv_serialize_data();
-       ia64_srlz_d();
-       ia64_set_psr(psr);
-}
-
-int vmm_entry(void)
-{
-       struct kvm_vcpu *v;
-       v = current_vcpu;
-
-       ia64_call_vsa(PAL_VPS_RESTORE, (unsigned long)v->arch.vpd,
-                                               0, 0, 0, 0, 0, 0);
-       kvm_init_vtlb(v);
-       kvm_init_vhpt(v);
-       init_vcpu(v);
-       kvm_init_all_rr(v);
-       vmm_reset_entry();
-
-       return 0;
-}
-
-static void kvm_show_registers(struct kvm_pt_regs *regs)
-{
-       unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri;
-
-       struct kvm_vcpu *vcpu = current_vcpu;
-       if (vcpu != NULL)
-               printk("vcpu 0x%p vcpu %d\n",
-                      vcpu, vcpu->vcpu_id);
-
-       printk("psr : %016lx ifs : %016lx ip  : [<%016lx>]\n",
-              regs->cr_ipsr, regs->cr_ifs, ip);
-
-       printk("unat: %016lx pfs : %016lx rsc : %016lx\n",
-              regs->ar_unat, regs->ar_pfs, regs->ar_rsc);
-       printk("rnat: %016lx bspstore: %016lx pr  : %016lx\n",
-              regs->ar_rnat, regs->ar_bspstore, regs->pr);
-       printk("ldrs: %016lx ccv : %016lx fpsr: %016lx\n",
-              regs->loadrs, regs->ar_ccv, regs->ar_fpsr);
-       printk("csd : %016lx ssd : %016lx\n", regs->ar_csd, regs->ar_ssd);
-       printk("b0  : %016lx b6  : %016lx b7  : %016lx\n", regs->b0,
-                                                       regs->b6, regs->b7);
-       printk("f6  : %05lx%016lx f7  : %05lx%016lx\n",
-              regs->f6.u.bits[1], regs->f6.u.bits[0],
-              regs->f7.u.bits[1], regs->f7.u.bits[0]);
-       printk("f8  : %05lx%016lx f9  : %05lx%016lx\n",
-              regs->f8.u.bits[1], regs->f8.u.bits[0],
-              regs->f9.u.bits[1], regs->f9.u.bits[0]);
-       printk("f10 : %05lx%016lx f11 : %05lx%016lx\n",
-              regs->f10.u.bits[1], regs->f10.u.bits[0],
-              regs->f11.u.bits[1], regs->f11.u.bits[0]);
-
-       printk("r1  : %016lx r2  : %016lx r3  : %016lx\n", regs->r1,
-                                                       regs->r2, regs->r3);
-       printk("r8  : %016lx r9  : %016lx r10 : %016lx\n", regs->r8,
-                                                       regs->r9, regs->r10);
-       printk("r11 : %016lx r12 : %016lx r13 : %016lx\n", regs->r11,
-                                                       regs->r12, regs->r13);
-       printk("r14 : %016lx r15 : %016lx r16 : %016lx\n", regs->r14,
-                                                       regs->r15, regs->r16);
-       printk("r17 : %016lx r18 : %016lx r19 : %016lx\n", regs->r17,
-                                                       regs->r18, regs->r19);
-       printk("r20 : %016lx r21 : %016lx r22 : %016lx\n", regs->r20,
-                                                       regs->r21, regs->r22);
-       printk("r23 : %016lx r24 : %016lx r25 : %016lx\n", regs->r23,
-                                                       regs->r24, regs->r25);
-       printk("r26 : %016lx r27 : %016lx r28 : %016lx\n", regs->r26,
-                                                       regs->r27, regs->r28);
-       printk("r29 : %016lx r30 : %016lx r31 : %016lx\n", regs->r29,
-                                                       regs->r30, regs->r31);
-
-}
-
-void panic_vm(struct kvm_vcpu *v, const char *fmt, ...)
-{
-       va_list args;
-       char buf[256];
-
-       struct kvm_pt_regs *regs = vcpu_regs(v);
-       struct exit_ctl_data *p = &v->arch.exit_data;
-       va_start(args, fmt);
-       vsnprintf(buf, sizeof(buf), fmt, args);
-       va_end(args);
-       printk(buf);
-       kvm_show_registers(regs);
-       p->exit_reason = EXIT_REASON_VM_PANIC;
-       vmm_transition(v);
-       /*Never to return*/
-       while (1);
-}
diff --git a/arch/ia64/kvm/vcpu.h b/arch/ia64/kvm/vcpu.h
deleted file mode 100644 (file)
index 988911b..0000000
+++ /dev/null
@@ -1,752 +0,0 @@
-/*
- *  vcpu.h: vcpu routines
- *     Copyright (c) 2005, Intel Corporation.
- *     Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
- *     Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
- *
- *     Copyright (c) 2007, Intel Corporation.
- *     Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
- *     Xiantao Zhang (xiantao.zhang@intel.com)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-
-#ifndef __KVM_VCPU_H__
-#define __KVM_VCPU_H__
-
-#include <asm/types.h>
-#include <asm/fpu.h>
-#include <asm/processor.h>
-
-#ifndef __ASSEMBLY__
-#include "vti.h"
-
-#include <linux/kvm_host.h>
-#include <linux/spinlock.h>
-
-typedef unsigned long IA64_INST;
-
-typedef union U_IA64_BUNDLE {
-       unsigned long i64[2];
-       struct { unsigned long template:5, slot0:41, slot1a:18,
-               slot1b:23, slot2:41; };
-       /* NOTE: following doesn't work because bitfields can't cross natural
-          size boundaries
-          struct { unsigned long template:5, slot0:41, slot1:41, slot2:41; }; */
-} IA64_BUNDLE;
-
-typedef union U_INST64_A5 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, r1:7, imm7b:7, r3:2, imm5c:5,
-               imm9d:9, s:1, major:4; };
-} INST64_A5;
-
-typedef union U_INST64_B4 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, btype:3, un3:3, p:1, b2:3, un11:11, x6:6,
-               wh:2, d:1, un1:1, major:4; };
-} INST64_B4;
-
-typedef union U_INST64_B8 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, un21:21, x6:6, un4:4, major:4; };
-} INST64_B8;
-
-typedef union U_INST64_B9 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, imm20:20, :1, x6:6, :3, i:1, major:4; };
-} INST64_B9;
-
-typedef union U_INST64_I19 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, imm20:20, :1, x6:6, x3:3, i:1, major:4; };
-} INST64_I19;
-
-typedef union U_INST64_I26 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, :7, r2:7, ar3:7, x6:6, x3:3, :1, major:4; };
-} INST64_I26;
-
-typedef union U_INST64_I27 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, :7, imm:7, ar3:7, x6:6, x3:3, s:1, major:4; };
-} INST64_I27;
-
-typedef union U_INST64_I28 { /* not privileged (mov from AR) */
-       IA64_INST inst;
-       struct { unsigned long qp:6, r1:7, :7, ar3:7, x6:6, x3:3, :1, major:4; };
-} INST64_I28;
-
-typedef union U_INST64_M28 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, :14, r3:7, x6:6, x3:3, :1, major:4; };
-} INST64_M28;
-
-typedef union U_INST64_M29 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, :7, r2:7, ar3:7, x6:6, x3:3, :1, major:4; };
-} INST64_M29;
-
-typedef union U_INST64_M30 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, :7, imm:7, ar3:7, x4:4, x2:2,
-               x3:3, s:1, major:4; };
-} INST64_M30;
-
-typedef union U_INST64_M31 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, r1:7, :7, ar3:7, x6:6, x3:3, :1, major:4; };
-} INST64_M31;
-
-typedef union U_INST64_M32 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, :7, r2:7, cr3:7, x6:6, x3:3, :1, major:4; };
-} INST64_M32;
-
-typedef union U_INST64_M33 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, r1:7, :7, cr3:7, x6:6, x3:3, :1, major:4; };
-} INST64_M33;
-
-typedef union U_INST64_M35 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, :7, r2:7, :7, x6:6, x3:3, :1, major:4; };
-
-} INST64_M35;
-
-typedef union U_INST64_M36 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, r1:7, :14, x6:6, x3:3, :1, major:4; };
-} INST64_M36;
-
-typedef union U_INST64_M37 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, imm20a:20, :1, x4:4, x2:2, x3:3,
-               i:1, major:4; };
-} INST64_M37;
-
-typedef union U_INST64_M41 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, :7, r2:7, :7, x6:6, x3:3, :1, major:4; };
-} INST64_M41;
-
-typedef union U_INST64_M42 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, :7, r2:7, r3:7, x6:6, x3:3, :1, major:4; };
-} INST64_M42;
-
-typedef union U_INST64_M43 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, r1:7, :7, r3:7, x6:6, x3:3, :1, major:4; };
-} INST64_M43;
-
-typedef union U_INST64_M44 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, imm:21, x4:4, i2:2, x3:3, i:1, major:4; };
-} INST64_M44;
-
-typedef union U_INST64_M45 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, :7, r2:7, r3:7, x6:6, x3:3, :1, major:4; };
-} INST64_M45;
-
-typedef union U_INST64_M46 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, r1:7, un7:7, r3:7, x6:6,
-               x3:3, un1:1, major:4; };
-} INST64_M46;
-
-typedef union U_INST64_M47 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, un14:14, r3:7, x6:6, x3:3, un1:1, major:4; };
-} INST64_M47;
-
-typedef union U_INST64_M1{
-       IA64_INST inst;
-       struct { unsigned long qp:6, r1:7, un7:7, r3:7, x:1, hint:2,
-               x6:6, m:1, major:4; };
-} INST64_M1;
-
-typedef union U_INST64_M2{
-       IA64_INST inst;
-       struct { unsigned long qp:6, r1:7, r2:7, r3:7, x:1, hint:2,
-               x6:6, m:1, major:4; };
-} INST64_M2;
-
-typedef union U_INST64_M3{
-       IA64_INST inst;
-       struct { unsigned long qp:6, r1:7, imm7:7, r3:7, i:1, hint:2,
-               x6:6, s:1, major:4; };
-} INST64_M3;
-
-typedef union U_INST64_M4 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, un7:7, r2:7, r3:7, x:1, hint:2,
-               x6:6, m:1, major:4; };
-} INST64_M4;
-
-typedef union U_INST64_M5 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, imm7:7, r2:7, r3:7, i:1, hint:2,
-               x6:6, s:1, major:4; };
-} INST64_M5;
-
-typedef union U_INST64_M6 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, f1:7, un7:7, r3:7, x:1, hint:2,
-               x6:6, m:1, major:4; };
-} INST64_M6;
-
-typedef union U_INST64_M9 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, :7, f2:7, r3:7, x:1, hint:2,
-               x6:6, m:1, major:4; };
-} INST64_M9;
-
-typedef union U_INST64_M10 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, imm7:7, f2:7, r3:7, i:1, hint:2,
-               x6:6, s:1, major:4; };
-} INST64_M10;
-
-typedef union U_INST64_M12 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, f1:7, f2:7, r3:7, x:1, hint:2,
-               x6:6, m:1, major:4; };
-} INST64_M12;
-
-typedef union U_INST64_M15 {
-       IA64_INST inst;
-       struct { unsigned long qp:6, :7, imm7:7, r3:7, i:1, hint:2,
-               x6:6, s:1, major:4; };
-} INST64_M15;
-
-typedef union U_INST64 {
-       IA64_INST inst;
-       struct { unsigned long :37, major:4; } generic;
-       INST64_A5 A5;   /* used in build_hypercall_bundle only */
-       INST64_B4 B4;   /* used in build_hypercall_bundle only */
-       INST64_B8 B8;   /* rfi, bsw.[01] */
-       INST64_B9 B9;   /* break.b */
-       INST64_I19 I19; /* used in build_hypercall_bundle only */
-       INST64_I26 I26; /* mov register to ar (I unit) */
-       INST64_I27 I27; /* mov immediate to ar (I unit) */
-       INST64_I28 I28; /* mov from ar (I unit) */
-       INST64_M1  M1;  /* ld integer */
-       INST64_M2  M2;
-       INST64_M3  M3;
-       INST64_M4  M4;  /* st integer */
-       INST64_M5  M5;
-       INST64_M6  M6;  /* ldfd floating pointer                */
-       INST64_M9  M9;  /* stfd floating pointer                */
-       INST64_M10 M10; /* stfd floating pointer                */
-       INST64_M12 M12;     /* ldfd pair floating pointer               */
-       INST64_M15 M15; /* lfetch + imm update                  */
-       INST64_M28 M28; /* purge translation cache entry        */
-       INST64_M29 M29; /* mov register to ar (M unit)          */
-       INST64_M30 M30; /* mov immediate to ar (M unit)         */
-       INST64_M31 M31; /* mov from ar (M unit)                 */
-       INST64_M32 M32; /* mov reg to cr                        */
-       INST64_M33 M33; /* mov from cr                          */
-       INST64_M35 M35; /* mov to psr                           */
-       INST64_M36 M36; /* mov from psr                         */
-       INST64_M37 M37; /* break.m                              */
-       INST64_M41 M41; /* translation cache insert             */
-       INST64_M42 M42; /* mov to indirect reg/translation reg insert*/
-       INST64_M43 M43; /* mov from indirect reg                */
-       INST64_M44 M44; /* set/reset system mask                */
-       INST64_M45 M45; /* translation purge                    */
-       INST64_M46 M46; /* translation access (tpa,tak)         */
-       INST64_M47 M47; /* purge translation entry              */
-} INST64;
-
-#define MASK_41 ((unsigned long)0x1ffffffffff)
-
-/* Virtual address memory attributes encoding */
-#define VA_MATTR_WB         0x0
-#define VA_MATTR_UC         0x4
-#define VA_MATTR_UCE        0x5
-#define VA_MATTR_WC         0x6
-#define VA_MATTR_NATPAGE    0x7
-
-#define PMASK(size)         (~((size) - 1))
-#define PSIZE(size)         (1UL<<(size))
-#define CLEARLSB(ppn, nbits)    (((ppn) >> (nbits)) << (nbits))
-#define PAGEALIGN(va, ps)      CLEARLSB(va, ps)
-#define PAGE_FLAGS_RV_MASK   (0x2|(0x3UL<<50)|(((1UL<<11)-1)<<53))
-#define _PAGE_MA_ST     (0x1 <<  2) /* is reserved for software use */
-
-#define ARCH_PAGE_SHIFT   12
-
-#define INVALID_TI_TAG (1UL << 63)
-
-#define VTLB_PTE_P_BIT      0
-#define VTLB_PTE_IO_BIT     60
-#define VTLB_PTE_IO         (1UL<<VTLB_PTE_IO_BIT)
-#define VTLB_PTE_P          (1UL<<VTLB_PTE_P_BIT)
-
-#define vcpu_quick_region_check(_tr_regions,_ifa)              \
-       (_tr_regions & (1 << ((unsigned long)_ifa >> 61)))
-
-#define vcpu_quick_region_set(_tr_regions,_ifa)             \
-       do {_tr_regions |= (1 << ((unsigned long)_ifa >> 61)); } while (0)
-
-static inline void vcpu_set_tr(struct thash_data *trp, u64 pte, u64 itir,
-               u64 va, u64 rid)
-{
-       trp->page_flags = pte;
-       trp->itir = itir;
-       trp->vadr = va;
-       trp->rid = rid;
-}
-
-extern u64 kvm_get_mpt_entry(u64 gpfn);
-
-/* Return I/ */
-static inline u64 __gpfn_is_io(u64 gpfn)
-{
-       u64  pte;
-       pte = kvm_get_mpt_entry(gpfn);
-       if (!(pte & GPFN_INV_MASK)) {
-               pte = pte & GPFN_IO_MASK;
-               if (pte != GPFN_PHYS_MMIO)
-                       return pte;
-       }
-       return 0;
-}
-#endif
-#define IA64_NO_FAULT  0
-#define IA64_FAULT     1
-
-#define VMM_RBS_OFFSET  ((VMM_TASK_SIZE + 15) & ~15)
-
-#define SW_BAD  0   /* Bad mode transitition */
-#define SW_V2P  1   /* Physical emulatino is activated */
-#define SW_P2V  2   /* Exit physical mode emulation */
-#define SW_SELF 3   /* No mode transition */
-#define SW_NOP  4   /* Mode transition, but without action required */
-
-#define GUEST_IN_PHY    0x1
-#define GUEST_PHY_EMUL  0x2
-
-#define current_vcpu ((struct kvm_vcpu *) ia64_getreg(_IA64_REG_TP))
-
-#define VRN_SHIFT      61
-#define VRN_MASK       0xe000000000000000
-#define VRN0           0x0UL
-#define VRN1           0x1UL
-#define VRN2           0x2UL
-#define VRN3           0x3UL
-#define VRN4           0x4UL
-#define VRN5           0x5UL
-#define VRN6           0x6UL
-#define VRN7           0x7UL
-
-#define IRQ_NO_MASKED         0
-#define IRQ_MASKED_BY_VTPR    1
-#define IRQ_MASKED_BY_INSVC   2   /* masked by inservice IRQ */
-
-#define PTA_BASE_SHIFT      15
-
-#define IA64_PSR_VM_BIT     46
-#define IA64_PSR_VM (__IA64_UL(1) << IA64_PSR_VM_BIT)
-
-/* Interruption Function State */
-#define IA64_IFS_V_BIT      63
-#define IA64_IFS_V  (__IA64_UL(1) << IA64_IFS_V_BIT)
-
-#define PHY_PAGE_UC (_PAGE_A|_PAGE_D|_PAGE_P|_PAGE_MA_UC|_PAGE_AR_RWX)
-#define PHY_PAGE_WB (_PAGE_A|_PAGE_D|_PAGE_P|_PAGE_MA_WB|_PAGE_AR_RWX)
-
-#ifndef __ASSEMBLY__
-
-#include <asm/gcc_intrin.h>
-
-#define is_physical_mode(v)            \
-       ((v->arch.mode_flags) & GUEST_IN_PHY)
-
-#define is_virtual_mode(v)     \
-       (!is_physical_mode(v))
-
-#define MODE_IND(psr)  \
-       (((psr).it << 2) + ((psr).dt << 1) + (psr).rt)
-
-#ifndef CONFIG_SMP
-#define _vmm_raw_spin_lock(x)   do {}while(0)
-#define _vmm_raw_spin_unlock(x) do {}while(0)
-#else
-typedef struct {
-       volatile unsigned int lock;
-} vmm_spinlock_t;
-#define _vmm_raw_spin_lock(x)                                          \
-       do {                                                            \
-               __u32 *ia64_spinlock_ptr = (__u32 *) (x);               \
-               __u64 ia64_spinlock_val;                                \
-               ia64_spinlock_val = ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0);\
-               if (unlikely(ia64_spinlock_val)) {                      \
-                       do {                                            \
-                               while (*ia64_spinlock_ptr)              \
-                               ia64_barrier();                         \
-                               ia64_spinlock_val =                     \
-                               ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0);\
-                       } while (ia64_spinlock_val);                    \
-               }                                                       \
-       } while (0)
-
-#define _vmm_raw_spin_unlock(x)                                \
-       do { barrier();                         \
-               ((vmm_spinlock_t *)x)->lock = 0; } \
-while (0)
-#endif
-
-void vmm_spin_lock(vmm_spinlock_t *lock);
-void vmm_spin_unlock(vmm_spinlock_t *lock);
-enum {
-       I_TLB = 1,
-       D_TLB = 2
-};
-
-union kvm_va {
-       struct {
-               unsigned long off : 60;         /* intra-region offset */
-               unsigned long reg :  4;         /* region number */
-       } f;
-       unsigned long l;
-       void *p;
-};
-
-#define __kvm_pa(x)     ({union kvm_va _v; _v.l = (long) (x);          \
-                                               _v.f.reg = 0; _v.l; })
-#define __kvm_va(x)     ({union kvm_va _v; _v.l = (long) (x);          \
-                               _v.f.reg = -1; _v.p; })
-
-#define _REGION_ID(x)           ({union ia64_rr _v; _v.val = (long)(x); \
-                                               _v.rid; })
-#define _REGION_PAGE_SIZE(x)    ({union ia64_rr _v; _v.val = (long)(x); \
-                                               _v.ps; })
-#define _REGION_HW_WALKER(x)    ({union ia64_rr _v; _v.val = (long)(x);        \
-                                               _v.ve; })
-
-enum vhpt_ref{ DATA_REF, NA_REF, INST_REF, RSE_REF };
-enum tlb_miss_type { INSTRUCTION, DATA, REGISTER };
-
-#define VCPU(_v, _x) ((_v)->arch.vpd->_x)
-#define VMX(_v, _x)  ((_v)->arch._x)
-
-#define VLSAPIC_INSVC(vcpu, i) ((vcpu)->arch.insvc[i])
-#define VLSAPIC_XTP(_v)        VMX(_v, xtp)
-
-static inline unsigned long itir_ps(unsigned long itir)
-{
-       return ((itir >> 2) & 0x3f);
-}
-
-
-/**************************************************************************
-  VCPU control register access routines
- **************************************************************************/
-
-static inline u64 vcpu_get_itir(struct kvm_vcpu *vcpu)
-{
-       return ((u64)VCPU(vcpu, itir));
-}
-
-static inline void vcpu_set_itir(struct kvm_vcpu *vcpu, u64 val)
-{
-       VCPU(vcpu, itir) = val;
-}
-
-static inline u64 vcpu_get_ifa(struct kvm_vcpu *vcpu)
-{
-       return ((u64)VCPU(vcpu, ifa));
-}
-
-static inline void vcpu_set_ifa(struct kvm_vcpu *vcpu, u64 val)
-{
-       VCPU(vcpu, ifa) = val;
-}
-
-static inline u64 vcpu_get_iva(struct kvm_vcpu *vcpu)
-{
-       return ((u64)VCPU(vcpu, iva));
-}
-
-static inline u64 vcpu_get_pta(struct kvm_vcpu *vcpu)
-{
-       return ((u64)VCPU(vcpu, pta));
-}
-
-static inline u64 vcpu_get_lid(struct kvm_vcpu *vcpu)
-{
-       return ((u64)VCPU(vcpu, lid));
-}
-
-static inline u64 vcpu_get_tpr(struct kvm_vcpu *vcpu)
-{
-       return ((u64)VCPU(vcpu, tpr));
-}
-
-static inline u64 vcpu_get_eoi(struct kvm_vcpu *vcpu)
-{
-       return (0UL);           /*reads of eoi always return 0 */
-}
-
-static inline u64 vcpu_get_irr0(struct kvm_vcpu *vcpu)
-{
-       return ((u64)VCPU(vcpu, irr[0]));
-}
-
-static inline u64 vcpu_get_irr1(struct kvm_vcpu *vcpu)
-{
-       return ((u64)VCPU(vcpu, irr[1]));
-}
-
-static inline u64 vcpu_get_irr2(struct kvm_vcpu *vcpu)
-{
-       return ((u64)VCPU(vcpu, irr[2]));
-}
-
-static inline u64 vcpu_get_irr3(struct kvm_vcpu *vcpu)
-{
-       return ((u64)VCPU(vcpu, irr[3]));
-}
-
-static inline void vcpu_set_dcr(struct kvm_vcpu *vcpu, u64 val)
-{
-       ia64_setreg(_IA64_REG_CR_DCR, val);
-}
-
-static inline void vcpu_set_isr(struct kvm_vcpu *vcpu, u64 val)
-{
-       VCPU(vcpu, isr) = val;
-}
-
-static inline void vcpu_set_lid(struct kvm_vcpu *vcpu, u64 val)
-{
-       VCPU(vcpu, lid) = val;
-}
-
-static inline void vcpu_set_ipsr(struct kvm_vcpu *vcpu, u64 val)
-{
-       VCPU(vcpu, ipsr) = val;
-}
-
-static inline void vcpu_set_iip(struct kvm_vcpu *vcpu, u64 val)
-{
-       VCPU(vcpu, iip) = val;
-}
-
-static inline void vcpu_set_ifs(struct kvm_vcpu *vcpu, u64 val)
-{
-       VCPU(vcpu, ifs) = val;
-}
-
-static inline void vcpu_set_iipa(struct kvm_vcpu *vcpu, u64 val)
-{
-       VCPU(vcpu, iipa) = val;
-}
-
-static inline void vcpu_set_iha(struct kvm_vcpu *vcpu, u64 val)
-{
-       VCPU(vcpu, iha) = val;
-}
-
-
-static inline u64 vcpu_get_rr(struct kvm_vcpu *vcpu, u64 reg)
-{
-       return vcpu->arch.vrr[reg>>61];
-}
-
-/**************************************************************************
-  VCPU debug breakpoint register access routines
- **************************************************************************/
-
-static inline void vcpu_set_dbr(struct kvm_vcpu *vcpu, u64 reg, u64 val)
-{
-       __ia64_set_dbr(reg, val);
-}
-
-static inline void vcpu_set_ibr(struct kvm_vcpu *vcpu, u64 reg, u64 val)
-{
-       ia64_set_ibr(reg, val);
-}
-
-static inline u64 vcpu_get_dbr(struct kvm_vcpu *vcpu, u64 reg)
-{
-       return ((u64)__ia64_get_dbr(reg));
-}
-
-static inline u64 vcpu_get_ibr(struct kvm_vcpu *vcpu, u64 reg)
-{
-       return ((u64)ia64_get_ibr(reg));
-}
-
-/**************************************************************************
-  VCPU performance monitor register access routines
- **************************************************************************/
-static inline void vcpu_set_pmc(struct kvm_vcpu *vcpu, u64 reg, u64 val)
-{
-       /* NOTE: Writes to unimplemented PMC registers are discarded */
-       ia64_set_pmc(reg, val);
-}
-
-static inline void vcpu_set_pmd(struct kvm_vcpu *vcpu, u64 reg, u64 val)
-{
-       /* NOTE: Writes to unimplemented PMD registers are discarded */
-       ia64_set_pmd(reg, val);
-}
-
-static inline u64 vcpu_get_pmc(struct kvm_vcpu *vcpu, u64 reg)
-{
-       /* NOTE: Reads from unimplemented PMC registers return zero */
-       return ((u64)ia64_get_pmc(reg));
-}
-
-static inline u64 vcpu_get_pmd(struct kvm_vcpu *vcpu, u64 reg)
-{
-       /* NOTE: Reads from unimplemented PMD registers return zero */
-       return ((u64)ia64_get_pmd(reg));
-}
-
-static inline unsigned long vrrtomrr(unsigned long val)
-{
-       union ia64_rr rr;
-       rr.val = val;
-       rr.rid = (rr.rid << 4) | 0xe;
-       if (rr.ps > PAGE_SHIFT)
-               rr.ps = PAGE_SHIFT;
-       rr.ve = 1;
-       return rr.val;
-}
-
-
-static inline int highest_bits(int *dat)
-{
-       u32  bits, bitnum;
-       int i;
-
-       /* loop for all 256 bits */
-       for (i = 7; i >= 0 ; i--) {
-               bits = dat[i];
-               if (bits) {
-                       bitnum = fls(bits);
-                       return i * 32 + bitnum - 1;
-               }
-       }
-       return NULL_VECTOR;
-}
-
-/*
- * The pending irq is higher than the inservice one.
- *
- */
-static inline int is_higher_irq(int pending, int inservice)
-{
-       return ((pending > inservice)
-                       || ((pending != NULL_VECTOR)
-                               && (inservice == NULL_VECTOR)));
-}
-
-static inline int is_higher_class(int pending, int mic)
-{
-       return ((pending >> 4) > mic);
-}
-
-/*
- * Return 0-255 for pending irq.
- *        NULL_VECTOR: when no pending.
- */
-static inline int highest_pending_irq(struct kvm_vcpu *vcpu)
-{
-       if (VCPU(vcpu, irr[0]) & (1UL<<NMI_VECTOR))
-               return NMI_VECTOR;
-       if (VCPU(vcpu, irr[0]) & (1UL<<ExtINT_VECTOR))
-               return ExtINT_VECTOR;
-
-       return highest_bits((int *)&VCPU(vcpu, irr[0]));
-}
-
-static inline int highest_inservice_irq(struct kvm_vcpu *vcpu)
-{
-       if (VMX(vcpu, insvc[0]) & (1UL<<NMI_VECTOR))
-               return NMI_VECTOR;
-       if (VMX(vcpu, insvc[0]) & (1UL<<ExtINT_VECTOR))
-               return ExtINT_VECTOR;
-
-       return highest_bits((int *)&(VMX(vcpu, insvc[0])));
-}
-
-extern void vcpu_get_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
-                                       struct ia64_fpreg *val);
-extern void vcpu_set_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
-                                       struct ia64_fpreg *val);
-extern u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg);
-extern void vcpu_set_gr(struct kvm_vcpu *vcpu, unsigned long reg,
-                       u64 val, int nat);
-extern unsigned long vcpu_get_psr(struct kvm_vcpu *vcpu);
-extern void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val);
-extern u64 vcpu_thash(struct kvm_vcpu *vcpu, u64 vadr);
-extern void vcpu_bsw0(struct kvm_vcpu *vcpu);
-extern void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte,
-                                       u64 itir, u64 va, int type);
-extern struct thash_data *vhpt_lookup(u64 va);
-extern u64 guest_vhpt_lookup(u64 iha, u64 *pte);
-extern void thash_purge_entries(struct kvm_vcpu *v, u64 va, u64 ps);
-extern void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps);
-extern u64 translate_phy_pte(u64 *pte, u64 itir, u64 va);
-extern void thash_purge_and_insert(struct kvm_vcpu *v, u64 pte,
-               u64 itir, u64 ifa, int type);
-extern void thash_purge_all(struct kvm_vcpu *v);
-extern struct thash_data *vtlb_lookup(struct kvm_vcpu *v,
-                                               u64 va, int is_data);
-extern int vtr_find_overlap(struct kvm_vcpu *vcpu, u64 va,
-                                               u64 ps, int is_data);
-
-extern void vcpu_increment_iip(struct kvm_vcpu *v);
-extern void vcpu_decrement_iip(struct kvm_vcpu *vcpu);
-extern void vcpu_pend_interrupt(struct kvm_vcpu *vcpu, u8 vec);
-extern void vcpu_unpend_interrupt(struct kvm_vcpu *vcpu, u8 vec);
-extern void data_page_not_present(struct kvm_vcpu *vcpu, u64 vadr);
-extern void dnat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr);
-extern void alt_dtlb(struct kvm_vcpu *vcpu, u64 vadr);
-extern void nested_dtlb(struct kvm_vcpu *vcpu);
-extern void dvhpt_fault(struct kvm_vcpu *vcpu, u64 vadr);
-extern int vhpt_enabled(struct kvm_vcpu *vcpu, u64 vadr, enum vhpt_ref ref);
-
-extern void update_vhpi(struct kvm_vcpu *vcpu, int vec);
-extern int irq_masked(struct kvm_vcpu *vcpu, int h_pending, int h_inservice);
-
-extern int fetch_code(struct kvm_vcpu *vcpu, u64 gip, IA64_BUNDLE *pbundle);
-extern void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma);
-extern void vmm_transition(struct kvm_vcpu *vcpu);
-extern void vmm_trampoline(union context *from, union context *to);
-extern int vmm_entry(void);
-extern  u64 vcpu_get_itc(struct kvm_vcpu *vcpu);
-
-extern void vmm_reset_entry(void);
-void kvm_init_vtlb(struct kvm_vcpu *v);
-void kvm_init_vhpt(struct kvm_vcpu *v);
-void thash_init(struct thash_cb *hcb, u64 sz);
-
-void panic_vm(struct kvm_vcpu *v, const char *fmt, ...);
-u64 kvm_gpa_to_mpa(u64 gpa);
-extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, u64 arg3,
-               u64 arg4, u64 arg5, u64 arg6, u64 arg7);
-
-extern long vmm_sanity;
-
-#endif
-#endif /* __VCPU_H__ */
diff --git a/arch/ia64/kvm/vmm.c b/arch/ia64/kvm/vmm.c
deleted file mode 100644 (file)
index 176a12c..0000000
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * vmm.c: vmm module interface with kvm module
- *
- * Copyright (c) 2007, Intel Corporation.
- *
- *  Xiantao Zhang (xiantao.zhang@intel.com)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- */
-
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <asm/fpswa.h>
-
-#include "vcpu.h"
-
-MODULE_AUTHOR("Intel");
-MODULE_LICENSE("GPL");
-
-extern char kvm_ia64_ivt;
-extern char kvm_asm_mov_from_ar;
-extern char kvm_asm_mov_from_ar_sn2;
-extern fpswa_interface_t *vmm_fpswa_interface;
-
-long vmm_sanity = 1;
-
-struct kvm_vmm_info vmm_info = {
-       .module                 = THIS_MODULE,
-       .vmm_entry              = vmm_entry,
-       .tramp_entry            = vmm_trampoline,
-       .vmm_ivt                = (unsigned long)&kvm_ia64_ivt,
-       .patch_mov_ar           = (unsigned long)&kvm_asm_mov_from_ar,
-       .patch_mov_ar_sn2       = (unsigned long)&kvm_asm_mov_from_ar_sn2,
-};
-
-static int __init  kvm_vmm_init(void)
-{
-
-       vmm_fpswa_interface = fpswa_interface;
-
-       /*Register vmm data to kvm side*/
-       return kvm_init(&vmm_info, 1024, 0, THIS_MODULE);
-}
-
-static void __exit kvm_vmm_exit(void)
-{
-       kvm_exit();
-       return ;
-}
-
-void vmm_spin_lock(vmm_spinlock_t *lock)
-{
-       _vmm_raw_spin_lock(lock);
-}
-
-void vmm_spin_unlock(vmm_spinlock_t *lock)
-{
-       _vmm_raw_spin_unlock(lock);
-}
-
-static void vcpu_debug_exit(struct kvm_vcpu *vcpu)
-{
-       struct exit_ctl_data *p = &vcpu->arch.exit_data;
-       long psr;
-
-       local_irq_save(psr);
-       p->exit_reason = EXIT_REASON_DEBUG;
-       vmm_transition(vcpu);
-       local_irq_restore(psr);
-}
-
-asmlinkage int printk(const char *fmt, ...)
-{
-       struct kvm_vcpu *vcpu = current_vcpu;
-       va_list args;
-       int r;
-
-       memset(vcpu->arch.log_buf, 0, VMM_LOG_LEN);
-       va_start(args, fmt);
-       r = vsnprintf(vcpu->arch.log_buf, VMM_LOG_LEN, fmt, args);
-       va_end(args);
-       vcpu_debug_exit(vcpu);
-       return r;
-}
-
-module_init(kvm_vmm_init)
-module_exit(kvm_vmm_exit)
diff --git a/arch/ia64/kvm/vmm_ivt.S b/arch/ia64/kvm/vmm_ivt.S
deleted file mode 100644 (file)
index 397e34a..0000000
+++ /dev/null
@@ -1,1392 +0,0 @@
-/*
- * arch/ia64/kvm/vmm_ivt.S
- *
- * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co
- *      Stephane Eranian <eranian@hpl.hp.com>
- *      David Mosberger <davidm@hpl.hp.com>
- * Copyright (C) 2000, 2002-2003 Intel Co
- *      Asit Mallick <asit.k.mallick@intel.com>
- *      Suresh Siddha <suresh.b.siddha@intel.com>
- *      Kenneth Chen <kenneth.w.chen@intel.com>
- *      Fenghua Yu <fenghua.yu@intel.com>
- *
- *
- * 00/08/23 Asit Mallick <asit.k.mallick@intel.com> TLB handling
- * for SMP
- * 00/12/20 David Mosberger-Tang <davidm@hpl.hp.com> DTLB/ITLB
- * handler now uses virtual PT.
- *
- * 07/6/20 Xuefei Xu  (Anthony Xu) (anthony.xu@intel.com)
- *              Supporting Intel virtualization architecture
- *
- */
-
-/*
- * This file defines the interruption vector table used by the CPU.
- * It does not include one entry per possible cause of interruption.
- *
- * The first 20 entries of the table contain 64 bundles each while the
- * remaining 48 entries contain only 16 bundles each.
- *
- * The 64 bundles are used to allow inlining the whole handler for
- * critical
- * interruptions like TLB misses.
- *
- *  For each entry, the comment is as follows:
- *
- *              // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss
- *              (12,51)
- *  entry offset ----/     /         /                  /
- *  /
- *  entry number ---------/         /                  /
- *  /
- *  size of the entry -------------/                  /
- *  /
- *  vector name -------------------------------------/
- *  /
- *  interruptions triggering this vector
- *  ----------------------/
- *
- * The table is 32KB in size and must be aligned on 32KB
- * boundary.
- * (The CPU ignores the 15 lower bits of the address)
- *
- * Table is based upon EAS2.6 (Oct 1999)
- */
-
-
-#include <asm/asmmacro.h>
-#include <asm/cache.h>
-#include <asm/pgtable.h>
-
-#include "asm-offsets.h"
-#include "vcpu.h"
-#include "kvm_minstate.h"
-#include "vti.h"
-
-#if 0
-# define PSR_DEFAULT_BITS   psr.ac
-#else
-# define PSR_DEFAULT_BITS   0
-#endif
-
-#define KVM_FAULT(n)    \
-       kvm_fault_##n:;          \
-       mov r19=n;;          \
-       br.sptk.many kvm_vmm_panic;         \
-       ;;                  \
-
-#define KVM_REFLECT(n)    \
-       mov r31=pr;           \
-       mov r19=n;       /* prepare to save predicates */ \
-       mov r29=cr.ipsr;      \
-       ;;      \
-       tbit.z p6,p7=r29,IA64_PSR_VM_BIT;       \
-(p7)   br.sptk.many kvm_dispatch_reflection;        \
-       br.sptk.many kvm_vmm_panic;      \
-
-GLOBAL_ENTRY(kvm_vmm_panic)
-       KVM_SAVE_MIN_WITH_COVER_R19
-       alloc r14=ar.pfs,0,0,1,0
-       mov out0=r15
-       adds r3=8,r2                // set up second base pointer
-       ;;
-       ssm psr.ic
-       ;;
-       srlz.i    // guarantee that interruption collection is on
-       ;;
-       (p15) ssm psr.i               // restore psr.
-       addl r14=@gprel(ia64_leave_hypervisor),gp
-       ;;
-       KVM_SAVE_REST
-       mov rp=r14
-       ;;
-       br.call.sptk.many b6=vmm_panic_handler;
-END(kvm_vmm_panic)
-
-    .section .text..ivt,"ax"
-
-    .align 32768    // align on 32KB boundary
-    .global kvm_ia64_ivt
-kvm_ia64_ivt:
-///////////////////////////////////////////////////////////////
-// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47)
-ENTRY(kvm_vhpt_miss)
-       KVM_FAULT(0)
-END(kvm_vhpt_miss)
-
-    .org kvm_ia64_ivt+0x400
-////////////////////////////////////////////////////////////////
-// 0x0400 Entry 1 (size 64 bundles) ITLB (21)
-ENTRY(kvm_itlb_miss)
-       mov r31 = pr
-       mov r29=cr.ipsr;
-       ;;
-       tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
-(p6)   br.sptk kvm_alt_itlb_miss
-       mov r19 = 1
-       br.sptk kvm_itlb_miss_dispatch
-       KVM_FAULT(1);
-END(kvm_itlb_miss)
-
-    .org kvm_ia64_ivt+0x0800
-//////////////////////////////////////////////////////////////////
-// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48)
-ENTRY(kvm_dtlb_miss)
-       mov r31 = pr
-       mov r29=cr.ipsr;
-       ;;
-       tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
-(p6)   br.sptk kvm_alt_dtlb_miss
-       br.sptk kvm_dtlb_miss_dispatch
-END(kvm_dtlb_miss)
-
-     .org kvm_ia64_ivt+0x0c00
-////////////////////////////////////////////////////////////////////
-// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19)
-ENTRY(kvm_alt_itlb_miss)
-       mov r16=cr.ifa    // get address that caused the TLB miss
-       ;;
-       movl r17=PAGE_KERNEL
-       mov r24=cr.ipsr
-       movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
-       ;;
-       and r19=r19,r16     // clear ed, reserved bits, and PTE control bits
-       ;;
-       or r19=r17,r19      // insert PTE control bits into r19
-       ;;
-       movl r20=IA64_GRANULE_SHIFT<<2
-       ;;
-       mov cr.itir=r20
-       ;;
-       itc.i r19               // insert the TLB entry
-       mov pr=r31,-1
-       rfi
-END(kvm_alt_itlb_miss)
-
-    .org kvm_ia64_ivt+0x1000
-/////////////////////////////////////////////////////////////////////
-// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
-ENTRY(kvm_alt_dtlb_miss)
-       mov r16=cr.ifa          // get address that caused the TLB miss
-       ;;
-       movl r17=PAGE_KERNEL
-       movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
-       mov r24=cr.ipsr
-       ;;
-       and r19=r19,r16     // clear ed, reserved bits, and PTE control bits
-       ;;
-       or r19=r19,r17  // insert PTE control bits into r19
-       ;;
-       movl r20=IA64_GRANULE_SHIFT<<2
-       ;;
-       mov cr.itir=r20
-       ;;
-       itc.d r19               // insert the TLB entry
-       mov pr=r31,-1
-       rfi
-END(kvm_alt_dtlb_miss)
-
-    .org kvm_ia64_ivt+0x1400
-//////////////////////////////////////////////////////////////////////
-// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45)
-ENTRY(kvm_nested_dtlb_miss)
-       KVM_FAULT(5)
-END(kvm_nested_dtlb_miss)
-
-    .org kvm_ia64_ivt+0x1800
-/////////////////////////////////////////////////////////////////////
-// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24)
-ENTRY(kvm_ikey_miss)
-       KVM_REFLECT(6)
-END(kvm_ikey_miss)
-
-    .org kvm_ia64_ivt+0x1c00
-/////////////////////////////////////////////////////////////////////
-// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
-ENTRY(kvm_dkey_miss)
-       KVM_REFLECT(7)
-END(kvm_dkey_miss)
-
-    .org kvm_ia64_ivt+0x2000
-////////////////////////////////////////////////////////////////////
-// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54)
-ENTRY(kvm_dirty_bit)
-       KVM_REFLECT(8)
-END(kvm_dirty_bit)
-
-    .org kvm_ia64_ivt+0x2400
-////////////////////////////////////////////////////////////////////
-// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
-ENTRY(kvm_iaccess_bit)
-       KVM_REFLECT(9)
-END(kvm_iaccess_bit)
-
-    .org kvm_ia64_ivt+0x2800
-///////////////////////////////////////////////////////////////////
-// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
-ENTRY(kvm_daccess_bit)
-       KVM_REFLECT(10)
-END(kvm_daccess_bit)
-
-    .org kvm_ia64_ivt+0x2c00
-/////////////////////////////////////////////////////////////////
-// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33)
-ENTRY(kvm_break_fault)
-       mov r31=pr
-       mov r19=11
-       mov r29=cr.ipsr
-       ;;
-       KVM_SAVE_MIN_WITH_COVER_R19
-       ;;
-       alloc r14=ar.pfs,0,0,4,0 //(must be first in insn group!)
-       mov out0=cr.ifa
-       mov out2=cr.isr     // FIXME: pity to make this slow access twice
-       mov out3=cr.iim     // FIXME: pity to make this slow access twice
-       adds r3=8,r2                // set up second base pointer
-       ;;
-       ssm psr.ic
-       ;;
-       srlz.i         // guarantee that interruption collection is on
-       ;;
-       (p15)ssm psr.i               // restore psr.i
-       addl r14=@gprel(ia64_leave_hypervisor),gp
-       ;;
-       KVM_SAVE_REST
-       mov rp=r14
-       ;;
-       adds out1=16,sp
-       br.call.sptk.many b6=kvm_ia64_handle_break
-       ;;
-END(kvm_break_fault)
-
-    .org kvm_ia64_ivt+0x3000
-/////////////////////////////////////////////////////////////////
-// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4)
-ENTRY(kvm_interrupt)
-       mov r31=pr              // prepare to save predicates
-       mov r19=12
-       mov r29=cr.ipsr
-       ;;
-       tbit.z p6,p7=r29,IA64_PSR_VM_BIT
-       tbit.z p0,p15=r29,IA64_PSR_I_BIT
-       ;;
-(p7)   br.sptk kvm_dispatch_interrupt
-       ;;
-       mov r27=ar.rsc          /* M */
-       mov r20=r1                      /* A */
-       mov r25=ar.unat         /* M */
-       mov r26=ar.pfs          /* I */
-       mov r28=cr.iip          /* M */
-       cover                   /* B (or nothing) */
-       ;;
-       mov r1=sp
-       ;;
-       invala                  /* M */
-       mov r30=cr.ifs
-       ;;
-       addl r1=-VMM_PT_REGS_SIZE,r1
-       ;;
-       adds r17=2*L1_CACHE_BYTES,r1    /* really: biggest cache-line size */
-       adds r16=PT(CR_IPSR),r1
-       ;;
-       lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES
-       st8 [r16]=r29                   /* save cr.ipsr */
-       ;;
-       lfetch.fault.excl.nt1 [r17]
-       mov r29=b0
-       ;;
-       adds r16=PT(R8),r1      /* initialize first base pointer */
-       adds r17=PT(R9),r1      /* initialize second base pointer */
-       mov r18=r0                      /* make sure r18 isn't NaT */
-       ;;
-.mem.offset 0,0; st8.spill [r16]=r8,16
-.mem.offset 8,0; st8.spill [r17]=r9,16
-        ;;
-.mem.offset 0,0; st8.spill [r16]=r10,24
-.mem.offset 8,0; st8.spill [r17]=r11,24
-        ;;
-       st8 [r16]=r28,16                /* save cr.iip */
-       st8 [r17]=r30,16                /* save cr.ifs */
-       mov r8=ar.fpsr          /* M */
-       mov r9=ar.csd
-       mov r10=ar.ssd
-       movl r11=FPSR_DEFAULT   /* L-unit */
-       ;;
-       st8 [r16]=r25,16                /* save ar.unat */
-       st8 [r17]=r26,16                /* save ar.pfs */
-       shl r18=r18,16          /* compute ar.rsc to be used for "loadrs" */
-       ;;
-       st8 [r16]=r27,16                /* save ar.rsc */
-       adds r17=16,r17         /* skip over ar_rnat field */
-       ;;
-       st8 [r17]=r31,16                /* save predicates */
-       adds r16=16,r16         /* skip over ar_bspstore field */
-       ;;
-       st8 [r16]=r29,16                /* save b0 */
-       st8 [r17]=r18,16                /* save ar.rsc value for "loadrs" */
-       ;;
-.mem.offset 0,0; st8.spill [r16]=r20,16    /* save original r1 */
-.mem.offset 8,0; st8.spill [r17]=r12,16
-       adds r12=-16,r1
-       /* switch to kernel memory stack (with 16 bytes of scratch) */
-       ;;
-.mem.offset 0,0; st8.spill [r16]=r13,16
-.mem.offset 8,0; st8.spill [r17]=r8,16 /* save ar.fpsr */
-       ;;
-.mem.offset 0,0; st8.spill [r16]=r15,16
-.mem.offset 8,0; st8.spill [r17]=r14,16
-       dep r14=-1,r0,60,4
-       ;;
-.mem.offset 0,0; st8.spill [r16]=r2,16
-.mem.offset 8,0; st8.spill [r17]=r3,16
-       adds r2=VMM_PT_REGS_R16_OFFSET,r1
-       adds r14 = VMM_VCPU_GP_OFFSET,r13
-       ;;
-       mov r8=ar.ccv
-       ld8 r14 = [r14]
-       ;;
-       mov r1=r14       /* establish kernel global pointer */
-       ;;                                          \
-       bsw.1
-       ;;
-       alloc r14=ar.pfs,0,0,1,0        // must be first in an insn group
-       mov out0=r13
-       ;;
-       ssm psr.ic
-       ;;
-       srlz.i
-       ;;
-       //(p15) ssm psr.i
-       adds r3=8,r2            // set up second base pointer for SAVE_REST
-       srlz.i                  // ensure everybody knows psr.ic is back on
-       ;;
-.mem.offset 0,0; st8.spill [r2]=r16,16
-.mem.offset 8,0; st8.spill [r3]=r17,16
-       ;;
-.mem.offset 0,0; st8.spill [r2]=r18,16
-.mem.offset 8,0; st8.spill [r3]=r19,16
-       ;;
-.mem.offset 0,0; st8.spill [r2]=r20,16
-.mem.offset 8,0; st8.spill [r3]=r21,16
-       mov r18=b6
-       ;;
-.mem.offset 0,0; st8.spill [r2]=r22,16
-.mem.offset 8,0; st8.spill [r3]=r23,16
-       mov r19=b7
-       ;;
-.mem.offset 0,0; st8.spill [r2]=r24,16
-.mem.offset 8,0; st8.spill [r3]=r25,16
-       ;;
-.mem.offset 0,0; st8.spill [r2]=r26,16
-.mem.offset 8,0; st8.spill [r3]=r27,16
-       ;;
-.mem.offset 0,0; st8.spill [r2]=r28,16
-.mem.offset 8,0; st8.spill [r3]=r29,16
-       ;;
-.mem.offset 0,0; st8.spill [r2]=r30,16
-.mem.offset 8,0; st8.spill [r3]=r31,32
-       ;;
-       mov ar.fpsr=r11       /* M-unit */
-       st8 [r2]=r8,8         /* ar.ccv */
-       adds r24=PT(B6)-PT(F7),r3
-       ;;
-       stf.spill [r2]=f6,32
-       stf.spill [r3]=f7,32
-       ;;
-       stf.spill [r2]=f8,32
-       stf.spill [r3]=f9,32
-       ;;
-       stf.spill [r2]=f10
-       stf.spill [r3]=f11
-       adds r25=PT(B7)-PT(F11),r3
-       ;;
-       st8 [r24]=r18,16       /* b6 */
-       st8 [r25]=r19,16       /* b7 */
-       ;;
-       st8 [r24]=r9           /* ar.csd */
-       st8 [r25]=r10          /* ar.ssd */
-       ;;
-       srlz.d          // make sure we see the effect of cr.ivr
-       addl r14=@gprel(ia64_leave_nested),gp
-       ;;
-       mov rp=r14
-       br.call.sptk.many b6=kvm_ia64_handle_irq
-       ;;
-END(kvm_interrupt)
-
-    .global kvm_dispatch_vexirq
-    .org kvm_ia64_ivt+0x3400
-//////////////////////////////////////////////////////////////////////
-// 0x3400 Entry 13 (size 64 bundles) Reserved
-ENTRY(kvm_virtual_exirq)
-       mov r31=pr
-       mov r19=13
-       mov r30 =r0
-       ;;
-kvm_dispatch_vexirq:
-       cmp.eq p6,p0 = 1,r30
-       ;;
-(p6)   add r29 = VMM_VCPU_SAVED_GP_OFFSET,r21
-       ;;
-(p6)   ld8 r1 = [r29]
-       ;;
-       KVM_SAVE_MIN_WITH_COVER_R19
-       alloc r14=ar.pfs,0,0,1,0
-       mov out0=r13
-
-       ssm psr.ic
-       ;;
-       srlz.i // guarantee that interruption collection is on
-       ;;
-       (p15) ssm psr.i               // restore psr.i
-       adds r3=8,r2                // set up second base pointer
-       ;;
-       KVM_SAVE_REST
-       addl r14=@gprel(ia64_leave_hypervisor),gp
-       ;;
-       mov rp=r14
-       br.call.sptk.many b6=kvm_vexirq
-END(kvm_virtual_exirq)
-
-    .org kvm_ia64_ivt+0x3800
-/////////////////////////////////////////////////////////////////////
-// 0x3800 Entry 14 (size 64 bundles) Reserved
-       KVM_FAULT(14)
-       // this code segment is from 2.6.16.13
-
-    .org kvm_ia64_ivt+0x3c00
-///////////////////////////////////////////////////////////////////////
-// 0x3c00 Entry 15 (size 64 bundles) Reserved
-       KVM_FAULT(15)
-
-    .org kvm_ia64_ivt+0x4000
-///////////////////////////////////////////////////////////////////////
-// 0x4000 Entry 16 (size 64 bundles) Reserved
-       KVM_FAULT(16)
-
-    .org kvm_ia64_ivt+0x4400
-//////////////////////////////////////////////////////////////////////
-// 0x4400 Entry 17 (size 64 bundles) Reserved
-       KVM_FAULT(17)
-
-    .org kvm_ia64_ivt+0x4800
-//////////////////////////////////////////////////////////////////////
-// 0x4800 Entry 18 (size 64 bundles) Reserved
-       KVM_FAULT(18)
-
-    .org kvm_ia64_ivt+0x4c00
-//////////////////////////////////////////////////////////////////////
-// 0x4c00 Entry 19 (size 64 bundles) Reserved
-       KVM_FAULT(19)
-
-    .org kvm_ia64_ivt+0x5000
-//////////////////////////////////////////////////////////////////////
-// 0x5000 Entry 20 (size 16 bundles) Page Not Present
-ENTRY(kvm_page_not_present)
-       KVM_REFLECT(20)
-END(kvm_page_not_present)
-
-    .org kvm_ia64_ivt+0x5100
-///////////////////////////////////////////////////////////////////////
-// 0x5100 Entry 21 (size 16 bundles) Key Permission vector
-ENTRY(kvm_key_permission)
-       KVM_REFLECT(21)
-END(kvm_key_permission)
-
-    .org kvm_ia64_ivt+0x5200
-//////////////////////////////////////////////////////////////////////
-// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26)
-ENTRY(kvm_iaccess_rights)
-       KVM_REFLECT(22)
-END(kvm_iaccess_rights)
-
-    .org kvm_ia64_ivt+0x5300
-//////////////////////////////////////////////////////////////////////
-// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
-ENTRY(kvm_daccess_rights)
-       KVM_REFLECT(23)
-END(kvm_daccess_rights)
-
-    .org kvm_ia64_ivt+0x5400
-/////////////////////////////////////////////////////////////////////
-// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39)
-ENTRY(kvm_general_exception)
-       KVM_REFLECT(24)
-       KVM_FAULT(24)
-END(kvm_general_exception)
-
-    .org kvm_ia64_ivt+0x5500
-//////////////////////////////////////////////////////////////////////
-// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35)
-ENTRY(kvm_disabled_fp_reg)
-       KVM_REFLECT(25)
-END(kvm_disabled_fp_reg)
-
-    .org kvm_ia64_ivt+0x5600
-////////////////////////////////////////////////////////////////////
-// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
-ENTRY(kvm_nat_consumption)
-       KVM_REFLECT(26)
-END(kvm_nat_consumption)
-
-    .org kvm_ia64_ivt+0x5700
-/////////////////////////////////////////////////////////////////////
-// 0x5700 Entry 27 (size 16 bundles) Speculation (40)
-ENTRY(kvm_speculation_vector)
-       KVM_REFLECT(27)
-END(kvm_speculation_vector)
-
-    .org kvm_ia64_ivt+0x5800
-/////////////////////////////////////////////////////////////////////
-// 0x5800 Entry 28 (size 16 bundles) Reserved
-       KVM_FAULT(28)
-
-    .org kvm_ia64_ivt+0x5900
-///////////////////////////////////////////////////////////////////
-// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56)
-ENTRY(kvm_debug_vector)
-       KVM_FAULT(29)
-END(kvm_debug_vector)
-
-    .org kvm_ia64_ivt+0x5a00
-///////////////////////////////////////////////////////////////
-// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
-ENTRY(kvm_unaligned_access)
-       KVM_REFLECT(30)
-END(kvm_unaligned_access)
-
-    .org kvm_ia64_ivt+0x5b00
-//////////////////////////////////////////////////////////////////////
-// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57)
-ENTRY(kvm_unsupported_data_reference)
-       KVM_REFLECT(31)
-END(kvm_unsupported_data_reference)
-
-    .org kvm_ia64_ivt+0x5c00
-////////////////////////////////////////////////////////////////////
-// 0x5c00 Entry 32 (size 16 bundles) Floating Point FAULT (65)
-ENTRY(kvm_floating_point_fault)
-       KVM_REFLECT(32)
-END(kvm_floating_point_fault)
-
-    .org kvm_ia64_ivt+0x5d00
-/////////////////////////////////////////////////////////////////////
-// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66)
-ENTRY(kvm_floating_point_trap)
-       KVM_REFLECT(33)
-END(kvm_floating_point_trap)
-
-    .org kvm_ia64_ivt+0x5e00
-//////////////////////////////////////////////////////////////////////
-// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66)
-ENTRY(kvm_lower_privilege_trap)
-       KVM_REFLECT(34)
-END(kvm_lower_privilege_trap)
-
-    .org kvm_ia64_ivt+0x5f00
-//////////////////////////////////////////////////////////////////////
-// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68)
-ENTRY(kvm_taken_branch_trap)
-       KVM_REFLECT(35)
-END(kvm_taken_branch_trap)
-
-    .org kvm_ia64_ivt+0x6000
-////////////////////////////////////////////////////////////////////
-// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69)
-ENTRY(kvm_single_step_trap)
-       KVM_REFLECT(36)
-END(kvm_single_step_trap)
-    .global kvm_virtualization_fault_back
-    .org kvm_ia64_ivt+0x6100
-/////////////////////////////////////////////////////////////////////
-// 0x6100 Entry 37 (size 16 bundles) Virtualization Fault
-ENTRY(kvm_virtualization_fault)
-       mov r31=pr
-       adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
-       ;;
-       st8 [r16] = r1
-       adds r17 = VMM_VCPU_GP_OFFSET, r21
-       ;;
-       ld8 r1 = [r17]
-       cmp.eq p6,p0=EVENT_MOV_FROM_AR,r24
-       cmp.eq p7,p0=EVENT_MOV_FROM_RR,r24
-       cmp.eq p8,p0=EVENT_MOV_TO_RR,r24
-       cmp.eq p9,p0=EVENT_RSM,r24
-       cmp.eq p10,p0=EVENT_SSM,r24
-       cmp.eq p11,p0=EVENT_MOV_TO_PSR,r24
-       cmp.eq p12,p0=EVENT_THASH,r24
-(p6)   br.dptk.many kvm_asm_mov_from_ar
-(p7)   br.dptk.many kvm_asm_mov_from_rr
-(p8)   br.dptk.many kvm_asm_mov_to_rr
-(p9)   br.dptk.many kvm_asm_rsm
-(p10)  br.dptk.many kvm_asm_ssm
-(p11)  br.dptk.many kvm_asm_mov_to_psr
-(p12)  br.dptk.many kvm_asm_thash
-       ;;
-kvm_virtualization_fault_back:
-       adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
-       ;;
-       ld8 r1 = [r16]
-       ;;
-       mov r19=37
-       adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
-       adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
-       ;;
-       st8 [r16] = r24
-       st8 [r17] = r25
-       ;;
-       cmp.ne p6,p0=EVENT_RFI, r24
-(p6)   br.sptk kvm_dispatch_virtualization_fault
-       ;;
-       adds r18=VMM_VPD_BASE_OFFSET,r21
-       ;;
-       ld8 r18=[r18]
-       ;;
-       adds r18=VMM_VPD_VIFS_OFFSET,r18
-       ;;
-       ld8 r18=[r18]
-       ;;
-       tbit.z p6,p0=r18,63
-(p6)   br.sptk kvm_dispatch_virtualization_fault
-       ;;
-//if vifs.v=1 desert current register frame
-       alloc r18=ar.pfs,0,0,0,0
-       br.sptk kvm_dispatch_virtualization_fault
-END(kvm_virtualization_fault)
-
-    .org kvm_ia64_ivt+0x6200
-//////////////////////////////////////////////////////////////
-// 0x6200 Entry 38 (size 16 bundles) Reserved
-       KVM_FAULT(38)
-
-    .org kvm_ia64_ivt+0x6300
-/////////////////////////////////////////////////////////////////
-// 0x6300 Entry 39 (size 16 bundles) Reserved
-       KVM_FAULT(39)
-
-    .org kvm_ia64_ivt+0x6400
-/////////////////////////////////////////////////////////////////
-// 0x6400 Entry 40 (size 16 bundles) Reserved
-       KVM_FAULT(40)
-
-    .org kvm_ia64_ivt+0x6500
-//////////////////////////////////////////////////////////////////
-// 0x6500 Entry 41 (size 16 bundles) Reserved
-       KVM_FAULT(41)
-
-    .org kvm_ia64_ivt+0x6600
-//////////////////////////////////////////////////////////////////
-// 0x6600 Entry 42 (size 16 bundles) Reserved
-       KVM_FAULT(42)
-
-    .org kvm_ia64_ivt+0x6700
-//////////////////////////////////////////////////////////////////
-// 0x6700 Entry 43 (size 16 bundles) Reserved
-       KVM_FAULT(43)
-
-    .org kvm_ia64_ivt+0x6800
-//////////////////////////////////////////////////////////////////
-// 0x6800 Entry 44 (size 16 bundles) Reserved
-       KVM_FAULT(44)
-
-    .org kvm_ia64_ivt+0x6900
-///////////////////////////////////////////////////////////////////
-// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception
-//(17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77)
-ENTRY(kvm_ia32_exception)
-       KVM_FAULT(45)
-END(kvm_ia32_exception)
-
-    .org kvm_ia64_ivt+0x6a00
-////////////////////////////////////////////////////////////////////
-// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept  (30,31,59,70,71)
-ENTRY(kvm_ia32_intercept)
-       KVM_FAULT(47)
-END(kvm_ia32_intercept)
-
-    .org kvm_ia64_ivt+0x6c00
-/////////////////////////////////////////////////////////////////////
-// 0x6c00 Entry 48 (size 16 bundles) Reserved
-       KVM_FAULT(48)
-
-    .org kvm_ia64_ivt+0x6d00
-//////////////////////////////////////////////////////////////////////
-// 0x6d00 Entry 49 (size 16 bundles) Reserved
-       KVM_FAULT(49)
-
-    .org kvm_ia64_ivt+0x6e00
-//////////////////////////////////////////////////////////////////////
-// 0x6e00 Entry 50 (size 16 bundles) Reserved
-       KVM_FAULT(50)
-
-    .org kvm_ia64_ivt+0x6f00
-/////////////////////////////////////////////////////////////////////
-// 0x6f00 Entry 51 (size 16 bundles) Reserved
-       KVM_FAULT(52)
-
-    .org kvm_ia64_ivt+0x7100
-////////////////////////////////////////////////////////////////////
-// 0x7100 Entry 53 (size 16 bundles) Reserved
-       KVM_FAULT(53)
-
-    .org kvm_ia64_ivt+0x7200
-/////////////////////////////////////////////////////////////////////
-// 0x7200 Entry 54 (size 16 bundles) Reserved
-       KVM_FAULT(54)
-
-    .org kvm_ia64_ivt+0x7300
-////////////////////////////////////////////////////////////////////
-// 0x7300 Entry 55 (size 16 bundles) Reserved
-       KVM_FAULT(55)
-
-    .org kvm_ia64_ivt+0x7400
-////////////////////////////////////////////////////////////////////
-// 0x7400 Entry 56 (size 16 bundles) Reserved
-       KVM_FAULT(56)
-
-    .org kvm_ia64_ivt+0x7500
-/////////////////////////////////////////////////////////////////////
-// 0x7500 Entry 57 (size 16 bundles) Reserved
-       KVM_FAULT(57)
-
-    .org kvm_ia64_ivt+0x7600
-/////////////////////////////////////////////////////////////////////
-// 0x7600 Entry 58 (size 16 bundles) Reserved
-       KVM_FAULT(58)
-
-    .org kvm_ia64_ivt+0x7700
-////////////////////////////////////////////////////////////////////
-// 0x7700 Entry 59 (size 16 bundles) Reserved
-       KVM_FAULT(59)
-
-    .org kvm_ia64_ivt+0x7800
-////////////////////////////////////////////////////////////////////
-// 0x7800 Entry 60 (size 16 bundles) Reserved
-       KVM_FAULT(60)
-
-    .org kvm_ia64_ivt+0x7900
-/////////////////////////////////////////////////////////////////////
-// 0x7900 Entry 61 (size 16 bundles) Reserved
-       KVM_FAULT(61)
-
-    .org kvm_ia64_ivt+0x7a00
-/////////////////////////////////////////////////////////////////////
-// 0x7a00 Entry 62 (size 16 bundles) Reserved
-       KVM_FAULT(62)
-
-    .org kvm_ia64_ivt+0x7b00
-/////////////////////////////////////////////////////////////////////
-// 0x7b00 Entry 63 (size 16 bundles) Reserved
-       KVM_FAULT(63)
-
-    .org kvm_ia64_ivt+0x7c00
-////////////////////////////////////////////////////////////////////
-// 0x7c00 Entry 64 (size 16 bundles) Reserved
-       KVM_FAULT(64)
-
-    .org kvm_ia64_ivt+0x7d00
-/////////////////////////////////////////////////////////////////////
-// 0x7d00 Entry 65 (size 16 bundles) Reserved
-       KVM_FAULT(65)
-
-    .org kvm_ia64_ivt+0x7e00
-/////////////////////////////////////////////////////////////////////
-// 0x7e00 Entry 66 (size 16 bundles) Reserved
-       KVM_FAULT(66)
-
-    .org kvm_ia64_ivt+0x7f00
-////////////////////////////////////////////////////////////////////
-// 0x7f00 Entry 67 (size 16 bundles) Reserved
-       KVM_FAULT(67)
-
-    .org kvm_ia64_ivt+0x8000
-// There is no particular reason for this code to be here, other than that
-// there happens to be space here that would go unused otherwise.  If this
-// fault ever gets "unreserved", simply moved the following code to a more
-// suitable spot...
-
-
-ENTRY(kvm_dtlb_miss_dispatch)
-       mov r19 = 2
-       KVM_SAVE_MIN_WITH_COVER_R19
-       alloc r14=ar.pfs,0,0,3,0
-       mov out0=cr.ifa
-       mov out1=r15
-       adds r3=8,r2                // set up second base pointer
-       ;;
-       ssm psr.ic
-       ;;
-       srlz.i     // guarantee that interruption collection is on
-       ;;
-       (p15) ssm psr.i               // restore psr.i
-       addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
-       ;;
-       KVM_SAVE_REST
-       KVM_SAVE_EXTRA
-       mov rp=r14
-       ;;
-       adds out2=16,r12
-       br.call.sptk.many b6=kvm_page_fault
-END(kvm_dtlb_miss_dispatch)
-
-ENTRY(kvm_itlb_miss_dispatch)
-
-       KVM_SAVE_MIN_WITH_COVER_R19
-       alloc r14=ar.pfs,0,0,3,0
-       mov out0=cr.ifa
-       mov out1=r15
-       adds r3=8,r2                // set up second base pointer
-       ;;
-       ssm psr.ic
-       ;;
-       srlz.i   // guarantee that interruption collection is on
-       ;;
-       (p15) ssm psr.i               // restore psr.i
-       addl r14=@gprel(ia64_leave_hypervisor),gp
-       ;;
-       KVM_SAVE_REST
-       mov rp=r14
-       ;;
-       adds out2=16,r12
-       br.call.sptk.many b6=kvm_page_fault
-END(kvm_itlb_miss_dispatch)
-
-ENTRY(kvm_dispatch_reflection)
-/*
- * Input:
- *  psr.ic: off
- *  r19:    intr type (offset into ivt, see ia64_int.h)
- *  r31:    contains saved predicates (pr)
- */
-       KVM_SAVE_MIN_WITH_COVER_R19
-       alloc r14=ar.pfs,0,0,5,0
-       mov out0=cr.ifa
-       mov out1=cr.isr
-       mov out2=cr.iim
-       mov out3=r15
-       adds r3=8,r2                // set up second base pointer
-       ;;
-       ssm psr.ic
-       ;;
-       srlz.i   // guarantee that interruption collection is on
-       ;;
-       (p15) ssm psr.i               // restore psr.i
-       addl r14=@gprel(ia64_leave_hypervisor),gp
-       ;;
-       KVM_SAVE_REST
-       mov rp=r14
-       ;;
-       adds out4=16,r12
-       br.call.sptk.many b6=reflect_interruption
-END(kvm_dispatch_reflection)
-
-ENTRY(kvm_dispatch_virtualization_fault)
-       adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
-       adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
-       ;;
-       st8 [r16] = r24
-       st8 [r17] = r25
-       ;;
-       KVM_SAVE_MIN_WITH_COVER_R19
-       ;;
-       alloc r14=ar.pfs,0,0,2,0 // (must be first in insn group!)
-       mov out0=r13        //vcpu
-       adds r3=8,r2                // set up second base pointer
-       ;;
-       ssm psr.ic
-       ;;
-       srlz.i    // guarantee that interruption collection is on
-       ;;
-       (p15) ssm psr.i               // restore psr.i
-       addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
-       ;;
-       KVM_SAVE_REST
-       KVM_SAVE_EXTRA
-       mov rp=r14
-       ;;
-       adds out1=16,sp         //regs
-       br.call.sptk.many b6=kvm_emulate
-END(kvm_dispatch_virtualization_fault)
-
-
-ENTRY(kvm_dispatch_interrupt)
-       KVM_SAVE_MIN_WITH_COVER_R19     // uses r31; defines r2 and r3
-       ;;
-       alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group
-       adds r3=8,r2            // set up second base pointer for SAVE_REST
-       ;;
-       ssm psr.ic
-       ;;
-       srlz.i
-       ;;
-       (p15) ssm psr.i
-       addl r14=@gprel(ia64_leave_hypervisor),gp
-       ;;
-       KVM_SAVE_REST
-       mov rp=r14
-       ;;
-       mov out0=r13            // pass pointer to pt_regs as second arg
-       br.call.sptk.many b6=kvm_ia64_handle_irq
-END(kvm_dispatch_interrupt)
-
-GLOBAL_ENTRY(ia64_leave_nested)
-       rsm psr.i
-       ;;
-       adds r21=PT(PR)+16,r12
-       ;;
-       lfetch [r21],PT(CR_IPSR)-PT(PR)
-       adds r2=PT(B6)+16,r12
-       adds r3=PT(R16)+16,r12
-       ;;
-       lfetch [r21]
-       ld8 r28=[r2],8          // load b6
-       adds r29=PT(R24)+16,r12
-
-       ld8.fill r16=[r3]
-       adds r3=PT(AR_CSD)-PT(R16),r3
-       adds r30=PT(AR_CCV)+16,r12
-       ;;
-       ld8.fill r24=[r29]
-       ld8 r15=[r30]           // load ar.ccv
-       ;;
-       ld8 r29=[r2],16         // load b7
-       ld8 r30=[r3],16         // load ar.csd
-       ;;
-       ld8 r31=[r2],16         // load ar.ssd
-       ld8.fill r8=[r3],16
-       ;;
-       ld8.fill r9=[r2],16
-       ld8.fill r10=[r3],PT(R17)-PT(R10)
-       ;;
-       ld8.fill r11=[r2],PT(R18)-PT(R11)
-       ld8.fill r17=[r3],16
-       ;;
-       ld8.fill r18=[r2],16
-       ld8.fill r19=[r3],16
-       ;;
-       ld8.fill r20=[r2],16
-       ld8.fill r21=[r3],16
-       mov ar.csd=r30
-       mov ar.ssd=r31
-       ;;
-       rsm psr.i | psr.ic
-       // initiate turning off of interrupt and interruption collection
-       invala                  // invalidate ALAT
-       ;;
-       srlz.i
-       ;;
-       ld8.fill r22=[r2],24
-       ld8.fill r23=[r3],24
-       mov b6=r28
-       ;;
-       ld8.fill r25=[r2],16
-       ld8.fill r26=[r3],16
-       mov b7=r29
-       ;;
-       ld8.fill r27=[r2],16
-       ld8.fill r28=[r3],16
-       ;;
-       ld8.fill r29=[r2],16
-       ld8.fill r30=[r3],24
-       ;;
-       ld8.fill r31=[r2],PT(F9)-PT(R31)
-       adds r3=PT(F10)-PT(F6),r3
-       ;;
-       ldf.fill f9=[r2],PT(F6)-PT(F9)
-       ldf.fill f10=[r3],PT(F8)-PT(F10)
-       ;;
-       ldf.fill f6=[r2],PT(F7)-PT(F6)
-       ;;
-       ldf.fill f7=[r2],PT(F11)-PT(F7)
-       ldf.fill f8=[r3],32
-       ;;
-       srlz.i                  // ensure interruption collection is off
-       mov ar.ccv=r15
-       ;;
-       bsw.0   // switch back to bank 0 (no stop bit required beforehand...)
-       ;;
-       ldf.fill f11=[r2]
-//     mov r18=r13
-//     mov r21=r13
-       adds r16=PT(CR_IPSR)+16,r12
-       adds r17=PT(CR_IIP)+16,r12
-       ;;
-       ld8 r29=[r16],16        // load cr.ipsr
-       ld8 r28=[r17],16        // load cr.iip
-       ;;
-       ld8 r30=[r16],16        // load cr.ifs
-       ld8 r25=[r17],16        // load ar.unat
-       ;;
-       ld8 r26=[r16],16        // load ar.pfs
-       ld8 r27=[r17],16        // load ar.rsc
-       cmp.eq p9,p0=r0,r0
-       // set p9 to indicate that we should restore cr.ifs
-       ;;
-       ld8 r24=[r16],16        // load ar.rnat (may be garbage)
-       ld8 r23=[r17],16// load ar.bspstore (may be garbage)
-       ;;
-       ld8 r31=[r16],16        // load predicates
-       ld8 r22=[r17],16        // load b0
-       ;;
-       ld8 r19=[r16],16        // load ar.rsc value for "loadrs"
-       ld8.fill r1=[r17],16    // load r1
-       ;;
-       ld8.fill r12=[r16],16
-       ld8.fill r13=[r17],16
-       ;;
-       ld8 r20=[r16],16        // ar.fpsr
-       ld8.fill r15=[r17],16
-       ;;
-       ld8.fill r14=[r16],16
-       ld8.fill r2=[r17]
-       ;;
-       ld8.fill r3=[r16]
-       ;;
-       mov r16=ar.bsp          // get existing backing store pointer
-       ;;
-       mov b0=r22
-       mov ar.pfs=r26
-       mov cr.ifs=r30
-       mov cr.ipsr=r29
-       mov ar.fpsr=r20
-       mov cr.iip=r28
-       ;;
-       mov ar.rsc=r27
-       mov ar.unat=r25
-       mov pr=r31,-1
-       rfi
-END(ia64_leave_nested)
-
-GLOBAL_ENTRY(ia64_leave_hypervisor_prepare)
-/*
- * work.need_resched etc. mustn't get changed
- *by this CPU before it returns to
- * user- or fsys-mode, hence we disable interrupts early on:
- */
-       adds r2 = PT(R4)+16,r12
-       adds r3 = PT(R5)+16,r12
-       adds r8 = PT(EML_UNAT)+16,r12
-       ;;
-       ld8 r8 = [r8]
-       ;;
-       mov ar.unat=r8
-       ;;
-       ld8.fill r4=[r2],16    //load r4
-       ld8.fill r5=[r3],16    //load r5
-       ;;
-       ld8.fill r6=[r2]    //load r6
-       ld8.fill r7=[r3]    //load r7
-       ;;
-END(ia64_leave_hypervisor_prepare)
-//fall through
-GLOBAL_ENTRY(ia64_leave_hypervisor)
-       rsm psr.i
-       ;;
-       br.call.sptk.many b0=leave_hypervisor_tail
-       ;;
-       adds r20=PT(PR)+16,r12
-       adds r8=PT(EML_UNAT)+16,r12
-       ;;
-       ld8 r8=[r8]
-       ;;
-       mov ar.unat=r8
-       ;;
-       lfetch [r20],PT(CR_IPSR)-PT(PR)
-       adds r2 = PT(B6)+16,r12
-       adds r3 = PT(B7)+16,r12
-       ;;
-       lfetch [r20]
-       ;;
-       ld8 r24=[r2],16        /* B6 */
-       ld8 r25=[r3],16        /* B7 */
-       ;;
-       ld8 r26=[r2],16        /* ar_csd */
-       ld8 r27=[r3],16        /* ar_ssd */
-       mov b6 = r24
-       ;;
-       ld8.fill r8=[r2],16
-       ld8.fill r9=[r3],16
-       mov b7 = r25
-       ;;
-       mov ar.csd = r26
-       mov ar.ssd = r27
-       ;;
-       ld8.fill r10=[r2],PT(R15)-PT(R10)
-       ld8.fill r11=[r3],PT(R14)-PT(R11)
-       ;;
-       ld8.fill r15=[r2],PT(R16)-PT(R15)
-       ld8.fill r14=[r3],PT(R17)-PT(R14)
-       ;;
-       ld8.fill r16=[r2],16
-       ld8.fill r17=[r3],16
-       ;;
-       ld8.fill r18=[r2],16
-       ld8.fill r19=[r3],16
-       ;;
-       ld8.fill r20=[r2],16
-       ld8.fill r21=[r3],16
-       ;;
-       ld8.fill r22=[r2],16
-       ld8.fill r23=[r3],16
-       ;;
-       ld8.fill r24=[r2],16
-       ld8.fill r25=[r3],16
-       ;;
-       ld8.fill r26=[r2],16
-       ld8.fill r27=[r3],16
-       ;;
-       ld8.fill r28=[r2],16
-       ld8.fill r29=[r3],16
-       ;;
-       ld8.fill r30=[r2],PT(F6)-PT(R30)
-       ld8.fill r31=[r3],PT(F7)-PT(R31)
-       ;;
-       rsm psr.i | psr.ic
-       // initiate turning off of interrupt and interruption collection
-       invala          // invalidate ALAT
-       ;;
-       srlz.i          // ensure interruption collection is off
-       ;;
-       bsw.0
-       ;;
-       adds r16 = PT(CR_IPSR)+16,r12
-       adds r17 = PT(CR_IIP)+16,r12
-       mov r21=r13             // get current
-       ;;
-       ld8 r31=[r16],16    // load cr.ipsr
-       ld8 r30=[r17],16    // load cr.iip
-       ;;
-       ld8 r29=[r16],16    // load cr.ifs
-       ld8 r28=[r17],16    // load ar.unat
-       ;;
-       ld8 r27=[r16],16    // load ar.pfs
-       ld8 r26=[r17],16    // load ar.rsc
-       ;;
-       ld8 r25=[r16],16    // load ar.rnat
-       ld8 r24=[r17],16    // load ar.bspstore
-       ;;
-       ld8 r23=[r16],16    // load predicates
-       ld8 r22=[r17],16    // load b0
-       ;;
-       ld8 r20=[r16],16    // load ar.rsc value for "loadrs"
-       ld8.fill r1=[r17],16    //load r1
-       ;;
-       ld8.fill r12=[r16],16    //load r12
-       ld8.fill r13=[r17],PT(R2)-PT(R13)    //load r13
-       ;;
-       ld8 r19=[r16],PT(R3)-PT(AR_FPSR)    //load ar_fpsr
-       ld8.fill r2=[r17],PT(AR_CCV)-PT(R2)    //load r2
-       ;;
-       ld8.fill r3=[r16]       //load r3
-       ld8 r18=[r17]   //load ar_ccv
-       ;;
-       mov ar.fpsr=r19
-       mov ar.ccv=r18
-       shr.u r18=r20,16
-       ;;
-kvm_rbs_switch:
-       mov r19=96
-
-kvm_dont_preserve_current_frame:
-/*
-    * To prevent leaking bits between the hypervisor and guest domain,
-    * we must clear the stacked registers in the "invalid" partition here.
-    * 5 registers/cycle on McKinley).
-    */
-#   define pRecurse    p6
-#   define pReturn     p7
-#   define Nregs       14
-
-       alloc loc0=ar.pfs,2,Nregs-2,2,0
-       shr.u loc1=r18,9        // RNaTslots <= floor(dirtySize / (64*8))
-       sub r19=r19,r18         // r19 = (physStackedSize + 8) - dirtySize
-       ;;
-       mov ar.rsc=r20          // load ar.rsc to be used for "loadrs"
-       shladd in0=loc1,3,r19
-       mov in1=0
-       ;;
-       TEXT_ALIGN(32)
-kvm_rse_clear_invalid:
-       alloc loc0=ar.pfs,2,Nregs-2,2,0
-       cmp.lt pRecurse,p0=Nregs*8,in0
-       // if more than Nregs regs left to clear, (re)curse
-       add out0=-Nregs*8,in0
-       add out1=1,in1          // increment recursion count
-       mov loc1=0
-       mov loc2=0
-       ;;
-       mov loc3=0
-       mov loc4=0
-       mov loc5=0
-       mov loc6=0
-       mov loc7=0
-(pRecurse) br.call.dptk.few b0=kvm_rse_clear_invalid
-       ;;
-       mov loc8=0
-       mov loc9=0
-       cmp.ne pReturn,p0=r0,in1
-       // if recursion count != 0, we need to do a br.ret
-       mov loc10=0
-       mov loc11=0
-(pReturn) br.ret.dptk.many b0
-
-#      undef pRecurse
-#      undef pReturn
-
-// loadrs has already been shifted
-       alloc r16=ar.pfs,0,0,0,0    // drop current register frame
-       ;;
-       loadrs
-       ;;
-       mov ar.bspstore=r24
-       ;;
-       mov ar.unat=r28
-       mov ar.rnat=r25
-       mov ar.rsc=r26
-       ;;
-       mov cr.ipsr=r31
-       mov cr.iip=r30
-       mov cr.ifs=r29
-       mov ar.pfs=r27
-       adds r18=VMM_VPD_BASE_OFFSET,r21
-       ;;
-       ld8 r18=[r18]   //vpd
-       adds r17=VMM_VCPU_ISR_OFFSET,r21
-       ;;
-       ld8 r17=[r17]
-       adds r19=VMM_VPD_VPSR_OFFSET,r18
-       ;;
-       ld8 r19=[r19]        //vpsr
-       mov r25=r18
-       adds r16= VMM_VCPU_GP_OFFSET,r21
-       ;;
-       ld8 r16= [r16] // Put gp in r24
-       movl r24=@gprel(ia64_vmm_entry)  // calculate return address
-       ;;
-       add  r24=r24,r16
-       ;;
-       br.sptk.many  kvm_vps_sync_write       // call the service
-       ;;
-END(ia64_leave_hypervisor)
-// fall through
-GLOBAL_ENTRY(ia64_vmm_entry)
-/*
- *  must be at bank 0
- *  parameter:
- *  r17:cr.isr
- *  r18:vpd
- *  r19:vpsr
- *  r22:b0
- *  r23:predicate
- */
-       mov r24=r22
-       mov r25=r18
-       tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT        // p1=vpsr.ic
-(p1)   br.cond.sptk.few kvm_vps_resume_normal
-(p2)   br.cond.sptk.many kvm_vps_resume_handler
-       ;;
-END(ia64_vmm_entry)
-
-/*
- * extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2,
- *                  u64 arg3, u64 arg4, u64 arg5,
- *                  u64 arg6, u64 arg7);
- *
- * XXX: The currently defined services use only 4 args at the max. The
- *  rest are not consumed.
- */
-GLOBAL_ENTRY(ia64_call_vsa)
-    .regstk 4,4,0,0
-
-rpsave  =   loc0
-pfssave =   loc1
-psrsave =   loc2
-entry   =   loc3
-hostret =   r24
-
-       alloc   pfssave=ar.pfs,4,4,0,0
-       mov rpsave=rp
-       adds entry=VMM_VCPU_VSA_BASE_OFFSET, r13
-       ;;
-       ld8 entry=[entry]
-1:     mov hostret=ip
-       mov r25=in1         // copy arguments
-       mov r26=in2
-       mov r27=in3
-       mov psrsave=psr
-       ;;
-       tbit.nz p6,p0=psrsave,14    // IA64_PSR_I
-       tbit.nz p7,p0=psrsave,13    // IA64_PSR_IC
-       ;;
-       add hostret=2f-1b,hostret   // calculate return address
-       add entry=entry,in0
-       ;;
-       rsm psr.i | psr.ic
-       ;;
-       srlz.i
-       mov b6=entry
-       br.cond.sptk b6         // call the service
-2:
-// Architectural sequence for enabling interrupts if necessary
-(p7)    ssm psr.ic
-       ;;
-(p7)    srlz.i
-       ;;
-(p6)    ssm psr.i
-       ;;
-       mov rp=rpsave
-       mov ar.pfs=pfssave
-       mov r8=r31
-       ;;
-       srlz.d
-       br.ret.sptk rp
-
-END(ia64_call_vsa)
-
-#define  INIT_BSPSTORE  ((4<<30)-(12<<20)-0x100)
-
-GLOBAL_ENTRY(vmm_reset_entry)
-       //set up ipsr, iip, vpd.vpsr, dcr
-       // For IPSR: it/dt/rt=1, i/ic=1, si=1, vm/bn=1
-       // For DCR: all bits 0
-       bsw.0
-       ;;
-       mov r21 =r13
-       adds r14=-VMM_PT_REGS_SIZE, r12
-       ;;
-       movl r6=0x501008826000      // IPSR dt/rt/it:1;i/ic:1, si:1, vm/bn:1
-       movl r10=0x8000000000000000
-       adds r16=PT(CR_IIP), r14
-       adds r20=PT(R1), r14
-       ;;
-       rsm psr.ic | psr.i
-       ;;
-       srlz.i
-       ;;
-       mov ar.rsc = 0
-       ;;
-       flushrs
-       ;;
-       mov ar.bspstore = 0
-       // clear BSPSTORE
-       ;;
-       mov cr.ipsr=r6
-       mov cr.ifs=r10
-       ld8 r4 = [r16] // Set init iip for first run.
-       ld8 r1 = [r20]
-       ;;
-       mov cr.iip=r4
-       adds r16=VMM_VPD_BASE_OFFSET,r13
-       ;;
-       ld8 r18=[r16]
-       ;;
-       adds r19=VMM_VPD_VPSR_OFFSET,r18
-       ;;
-       ld8 r19=[r19]
-       mov r17=r0
-       mov r22=r0
-       mov r23=r0
-       br.cond.sptk ia64_vmm_entry
-       br.ret.sptk  b0
-END(vmm_reset_entry)
diff --git a/arch/ia64/kvm/vti.h b/arch/ia64/kvm/vti.h
deleted file mode 100644 (file)
index b214b5b..0000000
+++ /dev/null
@@ -1,290 +0,0 @@
-/*
- * vti.h: prototype for generial vt related interface
- *     Copyright (c) 2004, Intel Corporation.
- *
- *     Xuefei Xu (Anthony Xu) (anthony.xu@intel.com)
- *     Fred Yang (fred.yang@intel.com)
- *     Kun Tian (Kevin Tian) (kevin.tian@intel.com)
- *
- *     Copyright (c) 2007, Intel Corporation.
- *     Zhang xiantao <xiantao.zhang@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- */
-#ifndef _KVM_VT_I_H
-#define _KVM_VT_I_H
-
-#ifndef __ASSEMBLY__
-#include <asm/page.h>
-
-#include <linux/kvm_host.h>
-
-/* define itr.i and itr.d  in ia64_itr function */
-#define        ITR     0x01
-#define        DTR     0x02
-#define        IaDTR   0x03
-
-#define IA64_TR_VMM       6 /*itr6, dtr6 : maps vmm code, vmbuffer*/
-#define IA64_TR_VM_DATA   7 /*dtr7       : maps current vm data*/
-
-#define RR6 (6UL<<61)
-#define RR7 (7UL<<61)
-
-
-/* config_options in pal_vp_init_env */
-#define        VP_INITIALIZE   1UL
-#define        VP_FR_PMC       1UL<<1
-#define        VP_OPCODE       1UL<<8
-#define        VP_CAUSE        1UL<<9
-#define VP_FW_ACC      1UL<<63
-
-/* init vp env with initializing vm_buffer */
-#define        VP_INIT_ENV_INITALIZE  (VP_INITIALIZE | VP_FR_PMC |\
-       VP_OPCODE | VP_CAUSE | VP_FW_ACC)
-/* init vp env without initializing vm_buffer */
-#define        VP_INIT_ENV  VP_FR_PMC | VP_OPCODE | VP_CAUSE | VP_FW_ACC
-
-#define                PAL_VP_CREATE   265
-/* Stacked Virt. Initializes a new VPD for the operation of
- * a new virtual processor in the virtual environment.
- */
-#define                PAL_VP_ENV_INFO 266
-/*Stacked Virt. Returns the parameters needed to enter a virtual environment.*/
-#define                PAL_VP_EXIT_ENV 267
-/*Stacked Virt. Allows a logical processor to exit a virtual environment.*/
-#define                PAL_VP_INIT_ENV 268
-/*Stacked Virt. Allows a logical processor to enter a virtual environment.*/
-#define                PAL_VP_REGISTER 269
-/*Stacked Virt. Register a different host IVT for the virtual processor.*/
-#define                PAL_VP_RESUME   270
-/* Renamed from PAL_VP_RESUME */
-#define                PAL_VP_RESTORE  270
-/*Stacked Virt. Resumes virtual processor operation on the logical processor.*/
-#define                PAL_VP_SUSPEND  271
-/* Renamed from PAL_VP_SUSPEND */
-#define                PAL_VP_SAVE     271
-/* Stacked Virt. Suspends operation for the specified virtual processor on
- * the logical processor.
- */
-#define                PAL_VP_TERMINATE 272
-/* Stacked Virt. Terminates operation for the specified virtual processor.*/
-
-union vac {
-       unsigned long value;
-       struct {
-               unsigned int a_int:1;
-               unsigned int a_from_int_cr:1;
-               unsigned int a_to_int_cr:1;
-               unsigned int a_from_psr:1;
-               unsigned int a_from_cpuid:1;
-               unsigned int a_cover:1;
-               unsigned int a_bsw:1;
-               long reserved:57;
-       };
-};
-
-union vdc {
-       unsigned long value;
-       struct {
-               unsigned int d_vmsw:1;
-               unsigned int d_extint:1;
-               unsigned int d_ibr_dbr:1;
-               unsigned int d_pmc:1;
-               unsigned int d_to_pmd:1;
-               unsigned int d_itm:1;
-               long reserved:58;
-       };
-};
-
-struct vpd {
-       union vac   vac;
-       union vdc   vdc;
-       unsigned long  virt_env_vaddr;
-       unsigned long  reserved1[29];
-       unsigned long  vhpi;
-       unsigned long  reserved2[95];
-       unsigned long  vgr[16];
-       unsigned long  vbgr[16];
-       unsigned long  vnat;
-       unsigned long  vbnat;
-       unsigned long  vcpuid[5];
-       unsigned long  reserved3[11];
-       unsigned long  vpsr;
-       unsigned long  vpr;
-       unsigned long  reserved4[76];
-       union {
-               unsigned long  vcr[128];
-               struct {
-                       unsigned long dcr;
-                       unsigned long itm;
-                       unsigned long iva;
-                       unsigned long rsv1[5];
-                       unsigned long pta;
-                       unsigned long rsv2[7];
-                       unsigned long ipsr;
-                       unsigned long isr;
-                       unsigned long rsv3;
-                       unsigned long iip;
-                       unsigned long ifa;
-                       unsigned long itir;
-                       unsigned long iipa;
-                       unsigned long ifs;
-                       unsigned long iim;
-                       unsigned long iha;
-                       unsigned long rsv4[38];
-                       unsigned long lid;
-                       unsigned long ivr;
-                       unsigned long tpr;
-                       unsigned long eoi;
-                       unsigned long irr[4];
-                       unsigned long itv;
-                       unsigned long pmv;
-                       unsigned long cmcv;
-                       unsigned long rsv5[5];
-                       unsigned long lrr0;
-                       unsigned long lrr1;
-                       unsigned long rsv6[46];
-               };
-       };
-       unsigned long  reserved5[128];
-       unsigned long  reserved6[3456];
-       unsigned long  vmm_avail[128];
-       unsigned long  reserved7[4096];
-};
-
-#define PAL_PROC_VM_BIT                (1UL << 40)
-#define PAL_PROC_VMSW_BIT      (1UL << 54)
-
-static inline s64 ia64_pal_vp_env_info(u64 *buffer_size,
-               u64 *vp_env_info)
-{
-       struct ia64_pal_retval iprv;
-       PAL_CALL_STK(iprv, PAL_VP_ENV_INFO, 0, 0, 0);
-       *buffer_size = iprv.v0;
-       *vp_env_info = iprv.v1;
-       return iprv.status;
-}
-
-static inline s64 ia64_pal_vp_exit_env(u64 iva)
-{
-       struct ia64_pal_retval iprv;
-
-       PAL_CALL_STK(iprv, PAL_VP_EXIT_ENV, (u64)iva, 0, 0);
-       return iprv.status;
-}
-
-static inline s64 ia64_pal_vp_init_env(u64 config_options, u64 pbase_addr,
-                       u64 vbase_addr, u64 *vsa_base)
-{
-       struct ia64_pal_retval iprv;
-
-       PAL_CALL_STK(iprv, PAL_VP_INIT_ENV, config_options, pbase_addr,
-                       vbase_addr);
-       *vsa_base = iprv.v0;
-
-       return iprv.status;
-}
-
-static inline s64 ia64_pal_vp_restore(u64 *vpd, u64 pal_proc_vector)
-{
-       struct ia64_pal_retval iprv;
-
-       PAL_CALL_STK(iprv, PAL_VP_RESTORE, (u64)vpd, pal_proc_vector, 0);
-
-       return iprv.status;
-}
-
-static inline s64 ia64_pal_vp_save(u64 *vpd, u64 pal_proc_vector)
-{
-       struct ia64_pal_retval iprv;
-
-       PAL_CALL_STK(iprv, PAL_VP_SAVE, (u64)vpd, pal_proc_vector, 0);
-
-       return iprv.status;
-}
-
-#endif
-
-/*VPD field offset*/
-#define VPD_VAC_START_OFFSET           0
-#define VPD_VDC_START_OFFSET           8
-#define VPD_VHPI_START_OFFSET          256
-#define VPD_VGR_START_OFFSET           1024
-#define VPD_VBGR_START_OFFSET          1152
-#define VPD_VNAT_START_OFFSET          1280
-#define VPD_VBNAT_START_OFFSET         1288
-#define VPD_VCPUID_START_OFFSET                1296
-#define VPD_VPSR_START_OFFSET          1424
-#define VPD_VPR_START_OFFSET           1432
-#define VPD_VRSE_CFLE_START_OFFSET     1440
-#define VPD_VCR_START_OFFSET           2048
-#define VPD_VTPR_START_OFFSET          2576
-#define VPD_VRR_START_OFFSET           3072
-#define VPD_VMM_VAIL_START_OFFSET      31744
-
-/*Virtualization faults*/
-
-#define EVENT_MOV_TO_AR                         1
-#define EVENT_MOV_TO_AR_IMM             2
-#define EVENT_MOV_FROM_AR               3
-#define EVENT_MOV_TO_CR                         4
-#define EVENT_MOV_FROM_CR               5
-#define EVENT_MOV_TO_PSR                6
-#define EVENT_MOV_FROM_PSR              7
-#define EVENT_ITC_D                     8
-#define EVENT_ITC_I                     9
-#define EVENT_MOV_TO_RR                         10
-#define EVENT_MOV_TO_DBR                11
-#define EVENT_MOV_TO_IBR                12
-#define EVENT_MOV_TO_PKR                13
-#define EVENT_MOV_TO_PMC                14
-#define EVENT_MOV_TO_PMD                15
-#define EVENT_ITR_D                     16
-#define EVENT_ITR_I                     17
-#define EVENT_MOV_FROM_RR               18
-#define EVENT_MOV_FROM_DBR              19
-#define EVENT_MOV_FROM_IBR              20
-#define EVENT_MOV_FROM_PKR              21
-#define EVENT_MOV_FROM_PMC              22
-#define EVENT_MOV_FROM_CPUID            23
-#define EVENT_SSM                       24
-#define EVENT_RSM                       25
-#define EVENT_PTC_L                     26
-#define EVENT_PTC_G                     27
-#define EVENT_PTC_GA                    28
-#define EVENT_PTR_D                     29
-#define EVENT_PTR_I                     30
-#define EVENT_THASH                     31
-#define EVENT_TTAG                      32
-#define EVENT_TPA                       33
-#define EVENT_TAK                       34
-#define EVENT_PTC_E                     35
-#define EVENT_COVER                     36
-#define EVENT_RFI                       37
-#define EVENT_BSW_0                     38
-#define EVENT_BSW_1                     39
-#define EVENT_VMSW                      40
-
-/**PAL virtual services offsets */
-#define PAL_VPS_RESUME_NORMAL           0x0000
-#define PAL_VPS_RESUME_HANDLER          0x0400
-#define PAL_VPS_SYNC_READ               0x0800
-#define PAL_VPS_SYNC_WRITE              0x0c00
-#define PAL_VPS_SET_PENDING_INTERRUPT   0x1000
-#define PAL_VPS_THASH                   0x1400
-#define PAL_VPS_TTAG                    0x1800
-#define PAL_VPS_RESTORE                 0x1c00
-#define PAL_VPS_SAVE                    0x2000
-
-#endif/* _VT_I_H*/
diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c
deleted file mode 100644 (file)
index a7869f8..0000000
+++ /dev/null
@@ -1,640 +0,0 @@
-/*
- * vtlb.c: guest virtual tlb handling module.
- * Copyright (c) 2004, Intel Corporation.
- *  Yaozu Dong (Eddie Dong) <Eddie.dong@intel.com>
- *  Xuefei Xu (Anthony Xu) <anthony.xu@intel.com>
- *
- * Copyright (c) 2007, Intel Corporation.
- *  Xuefei Xu (Anthony Xu) <anthony.xu@intel.com>
- *  Xiantao Zhang <xiantao.zhang@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#include "vcpu.h"
-
-#include <linux/rwsem.h>
-
-#include <asm/tlb.h>
-
-/*
- * Check to see if the address rid:va is translated by the TLB
- */
-
-static int __is_tr_translated(struct thash_data *trp, u64 rid, u64 va)
-{
-       return ((trp->p) && (trp->rid == rid)
-                               && ((va-trp->vadr) < PSIZE(trp->ps)));
-}
-
-/*
- * Only for GUEST TR format.
- */
-static int __is_tr_overlap(struct thash_data *trp, u64 rid, u64 sva, u64 eva)
-{
-       u64 sa1, ea1;
-
-       if (!trp->p || trp->rid != rid)
-               return 0;
-
-       sa1 = trp->vadr;
-       ea1 = sa1 + PSIZE(trp->ps) - 1;
-       eva -= 1;
-       if ((sva > ea1) || (sa1 > eva))
-               return 0;
-       else
-               return 1;
-
-}
-
-void machine_tlb_purge(u64 va, u64 ps)
-{
-       ia64_ptcl(va, ps << 2);
-}
-
-void local_flush_tlb_all(void)
-{
-       int i, j;
-       unsigned long flags, count0, count1;
-       unsigned long stride0, stride1, addr;
-
-       addr    = current_vcpu->arch.ptce_base;
-       count0  = current_vcpu->arch.ptce_count[0];
-       count1  = current_vcpu->arch.ptce_count[1];
-       stride0 = current_vcpu->arch.ptce_stride[0];
-       stride1 = current_vcpu->arch.ptce_stride[1];
-
-       local_irq_save(flags);
-       for (i = 0; i < count0; ++i) {
-               for (j = 0; j < count1; ++j) {
-                       ia64_ptce(addr);
-                       addr += stride1;
-               }
-               addr += stride0;
-       }
-       local_irq_restore(flags);
-       ia64_srlz_i();          /* srlz.i implies srlz.d */
-}
-
-int vhpt_enabled(struct kvm_vcpu *vcpu, u64 vadr, enum vhpt_ref ref)
-{
-       union ia64_rr    vrr;
-       union ia64_pta   vpta;
-       struct  ia64_psr   vpsr;
-
-       vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
-       vrr.val = vcpu_get_rr(vcpu, vadr);
-       vpta.val = vcpu_get_pta(vcpu);
-
-       if (vrr.ve & vpta.ve) {
-               switch (ref) {
-               case DATA_REF:
-               case NA_REF:
-                       return vpsr.dt;
-               case INST_REF:
-                       return vpsr.dt && vpsr.it && vpsr.ic;
-               case RSE_REF:
-                       return vpsr.dt && vpsr.rt;
-
-               }
-       }
-       return 0;
-}
-
-struct thash_data *vsa_thash(union ia64_pta vpta, u64 va, u64 vrr, u64 *tag)
-{
-       u64 index, pfn, rid, pfn_bits;
-
-       pfn_bits = vpta.size - 5 - 8;
-       pfn = REGION_OFFSET(va) >> _REGION_PAGE_SIZE(vrr);
-       rid = _REGION_ID(vrr);
-       index = ((rid & 0xff) << pfn_bits)|(pfn & ((1UL << pfn_bits) - 1));
-       *tag = ((rid >> 8) & 0xffff) | ((pfn >> pfn_bits) << 16);
-
-       return (struct thash_data *)((vpta.base << PTA_BASE_SHIFT) +
-                               (index << 5));
-}
-
-struct thash_data *__vtr_lookup(struct kvm_vcpu *vcpu, u64 va, int type)
-{
-
-       struct thash_data *trp;
-       int  i;
-       u64 rid;
-
-       rid = vcpu_get_rr(vcpu, va);
-       rid = rid & RR_RID_MASK;
-       if (type == D_TLB) {
-               if (vcpu_quick_region_check(vcpu->arch.dtr_regions, va)) {
-                       for (trp = (struct thash_data *)&vcpu->arch.dtrs, i = 0;
-                                               i < NDTRS; i++, trp++) {
-                               if (__is_tr_translated(trp, rid, va))
-                                       return trp;
-                       }
-               }
-       } else {
-               if (vcpu_quick_region_check(vcpu->arch.itr_regions, va)) {
-                       for (trp = (struct thash_data *)&vcpu->arch.itrs, i = 0;
-                                       i < NITRS; i++, trp++) {
-                               if (__is_tr_translated(trp, rid, va))
-                                       return trp;
-                       }
-               }
-       }
-
-       return NULL;
-}
-
-static void vhpt_insert(u64 pte, u64 itir, u64 ifa, u64 gpte)
-{
-       union ia64_rr rr;
-       struct thash_data *head;
-       unsigned long ps, gpaddr;
-
-       ps = itir_ps(itir);
-       rr.val = ia64_get_rr(ifa);
-
-        gpaddr = ((gpte & _PAGE_PPN_MASK) >> ps << ps) |
-                                       (ifa & ((1UL << ps) - 1));
-
-       head = (struct thash_data *)ia64_thash(ifa);
-       head->etag = INVALID_TI_TAG;
-       ia64_mf();
-       head->page_flags = pte & ~PAGE_FLAGS_RV_MASK;
-       head->itir = rr.ps << 2;
-       head->etag = ia64_ttag(ifa);
-       head->gpaddr = gpaddr;
-}
-
-void mark_pages_dirty(struct kvm_vcpu *v, u64 pte, u64 ps)
-{
-       u64 i, dirty_pages = 1;
-       u64 base_gfn = (pte&_PAGE_PPN_MASK) >> PAGE_SHIFT;
-       vmm_spinlock_t *lock = __kvm_va(v->arch.dirty_log_lock_pa);
-       void *dirty_bitmap = (void *)KVM_MEM_DIRTY_LOG_BASE;
-
-       dirty_pages <<= ps <= PAGE_SHIFT ? 0 : ps - PAGE_SHIFT;
-
-       vmm_spin_lock(lock);
-       for (i = 0; i < dirty_pages; i++) {
-               /* avoid RMW */
-               if (!test_bit(base_gfn + i, dirty_bitmap))
-                       set_bit(base_gfn + i , dirty_bitmap);
-       }
-       vmm_spin_unlock(lock);
-}
-
-void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte, u64 itir, u64 va, int type)
-{
-       u64 phy_pte, psr;
-       union ia64_rr mrr;
-
-       mrr.val = ia64_get_rr(va);
-       phy_pte = translate_phy_pte(&pte, itir, va);
-
-       if (itir_ps(itir) >= mrr.ps) {
-               vhpt_insert(phy_pte, itir, va, pte);
-       } else {
-               phy_pte  &= ~PAGE_FLAGS_RV_MASK;
-               psr = ia64_clear_ic();
-               ia64_itc(type, va, phy_pte, itir_ps(itir));
-               paravirt_dv_serialize_data();
-               ia64_set_psr(psr);
-       }
-
-       if (!(pte&VTLB_PTE_IO))
-               mark_pages_dirty(v, pte, itir_ps(itir));
-}
-
-/*
- *   vhpt lookup
- */
-struct thash_data *vhpt_lookup(u64 va)
-{
-       struct thash_data *head;
-       u64 tag;
-
-       head = (struct thash_data *)ia64_thash(va);
-       tag = ia64_ttag(va);
-       if (head->etag == tag)
-               return head;
-       return NULL;
-}
-
-u64 guest_vhpt_lookup(u64 iha, u64 *pte)
-{
-       u64 ret;
-       struct thash_data *data;
-
-       data = __vtr_lookup(current_vcpu, iha, D_TLB);
-       if (data != NULL)
-               thash_vhpt_insert(current_vcpu, data->page_flags,
-                       data->itir, iha, D_TLB);
-
-       asm volatile ("rsm psr.ic|psr.i;;"
-                       "srlz.d;;"
-                       "ld8.s r9=[%1];;"
-                       "tnat.nz p6,p7=r9;;"
-                       "(p6) mov %0=1;"
-                       "(p6) mov r9=r0;"
-                       "(p7) extr.u r9=r9,0,53;;"
-                       "(p7) mov %0=r0;"
-                       "(p7) st8 [%2]=r9;;"
-                       "ssm psr.ic;;"
-                       "srlz.d;;"
-                       "ssm psr.i;;"
-                       "srlz.d;;"
-                       : "=&r"(ret) : "r"(iha), "r"(pte) : "memory");
-
-       return ret;
-}
-
-/*
- *  purge software guest tlb
- */
-
-static void vtlb_purge(struct kvm_vcpu *v, u64 va, u64 ps)
-{
-       struct thash_data *cur;
-       u64 start, curadr, size, psbits, tag, rr_ps, num;
-       union ia64_rr vrr;
-       struct thash_cb *hcb = &v->arch.vtlb;
-
-       vrr.val = vcpu_get_rr(v, va);
-       psbits = VMX(v, psbits[(va >> 61)]);
-       start = va & ~((1UL << ps) - 1);
-       while (psbits) {
-               curadr = start;
-               rr_ps = __ffs(psbits);
-               psbits &= ~(1UL << rr_ps);
-               num = 1UL << ((ps < rr_ps) ? 0 : (ps - rr_ps));
-               size = PSIZE(rr_ps);
-               vrr.ps = rr_ps;
-               while (num) {
-                       cur = vsa_thash(hcb->pta, curadr, vrr.val, &tag);
-                       if (cur->etag == tag && cur->ps == rr_ps)
-                               cur->etag = INVALID_TI_TAG;
-                       curadr += size;
-                       num--;
-               }
-       }
-}
-
-
-/*
- *  purge VHPT and machine TLB
- */
-static void vhpt_purge(struct kvm_vcpu *v, u64 va, u64 ps)
-{
-       struct thash_data *cur;
-       u64 start, size, tag, num;
-       union ia64_rr rr;
-
-       start = va & ~((1UL << ps) - 1);
-       rr.val = ia64_get_rr(va);
-       size = PSIZE(rr.ps);
-       num = 1UL << ((ps < rr.ps) ? 0 : (ps - rr.ps));
-       while (num) {
-               cur = (struct thash_data *)ia64_thash(start);
-               tag = ia64_ttag(start);
-               if (cur->etag == tag)
-                       cur->etag = INVALID_TI_TAG;
-               start += size;
-               num--;
-       }
-       machine_tlb_purge(va, ps);
-}
-
-/*
- * Insert an entry into hash TLB or VHPT.
- * NOTES:
- *  1: When inserting VHPT to thash, "va" is a must covered
- *  address by the inserted machine VHPT entry.
- *  2: The format of entry is always in TLB.
- *  3: The caller need to make sure the new entry will not overlap
- *     with any existed entry.
- */
-void vtlb_insert(struct kvm_vcpu *v, u64 pte, u64 itir, u64 va)
-{
-       struct thash_data *head;
-       union ia64_rr vrr;
-       u64 tag;
-       struct thash_cb *hcb = &v->arch.vtlb;
-
-       vrr.val = vcpu_get_rr(v, va);
-       vrr.ps = itir_ps(itir);
-       VMX(v, psbits[va >> 61]) |= (1UL << vrr.ps);
-       head = vsa_thash(hcb->pta, va, vrr.val, &tag);
-       head->page_flags = pte;
-       head->itir = itir;
-       head->etag = tag;
-}
-
-int vtr_find_overlap(struct kvm_vcpu *vcpu, u64 va, u64 ps, int type)
-{
-       struct thash_data  *trp;
-       int  i;
-       u64 end, rid;
-
-       rid = vcpu_get_rr(vcpu, va);
-       rid = rid & RR_RID_MASK;
-       end = va + PSIZE(ps);
-       if (type == D_TLB) {
-               if (vcpu_quick_region_check(vcpu->arch.dtr_regions, va)) {
-                       for (trp = (struct thash_data *)&vcpu->arch.dtrs, i = 0;
-                                       i < NDTRS; i++, trp++) {
-                               if (__is_tr_overlap(trp, rid, va, end))
-                                       return i;
-                       }
-               }
-       } else {
-               if (vcpu_quick_region_check(vcpu->arch.itr_regions, va)) {
-                       for (trp = (struct thash_data *)&vcpu->arch.itrs, i = 0;
-                                       i < NITRS; i++, trp++) {
-                               if (__is_tr_overlap(trp, rid, va, end))
-                                       return i;
-                       }
-               }
-       }
-       return -1;
-}
-
-/*
- * Purge entries in VTLB and VHPT
- */
-void thash_purge_entries(struct kvm_vcpu *v, u64 va, u64 ps)
-{
-       if (vcpu_quick_region_check(v->arch.tc_regions, va))
-               vtlb_purge(v, va, ps);
-       vhpt_purge(v, va, ps);
-}
-
-void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps)
-{
-       u64 old_va = va;
-       va = REGION_OFFSET(va);
-       if (vcpu_quick_region_check(v->arch.tc_regions, old_va))
-               vtlb_purge(v, va, ps);
-       vhpt_purge(v, va, ps);
-}
-
-u64 translate_phy_pte(u64 *pte, u64 itir, u64 va)
-{
-       u64 ps, ps_mask, paddr, maddr, io_mask;
-       union pte_flags phy_pte;
-
-       ps = itir_ps(itir);
-       ps_mask = ~((1UL << ps) - 1);
-       phy_pte.val = *pte;
-       paddr = *pte;
-       paddr = ((paddr & _PAGE_PPN_MASK) & ps_mask) | (va & ~ps_mask);
-       maddr = kvm_get_mpt_entry(paddr >> PAGE_SHIFT);
-       io_mask = maddr & GPFN_IO_MASK;
-       if (io_mask && (io_mask != GPFN_PHYS_MMIO)) {
-               *pte |= VTLB_PTE_IO;
-               return -1;
-       }
-       maddr = ((maddr & _PAGE_PPN_MASK) & PAGE_MASK) |
-                                       (paddr & ~PAGE_MASK);
-       phy_pte.ppn = maddr >> ARCH_PAGE_SHIFT;
-       return phy_pte.val;
-}
-
-/*
- * Purge overlap TCs and then insert the new entry to emulate itc ops.
- * Notes: Only TC entry can purge and insert.
- */
-void  thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir,
-                                               u64 ifa, int type)
-{
-       u64 ps;
-       u64 phy_pte, io_mask, index;
-       union ia64_rr vrr, mrr;
-
-       ps = itir_ps(itir);
-       vrr.val = vcpu_get_rr(v, ifa);
-       mrr.val = ia64_get_rr(ifa);
-
-       index = (pte & _PAGE_PPN_MASK) >> PAGE_SHIFT;
-       io_mask = kvm_get_mpt_entry(index) & GPFN_IO_MASK;
-       phy_pte = translate_phy_pte(&pte, itir, ifa);
-
-       /* Ensure WB attribute if pte is related to a normal mem page,
-        * which is required by vga acceleration since qemu maps shared
-        * vram buffer with WB.
-        */
-       if (!(pte & VTLB_PTE_IO) && ((pte & _PAGE_MA_MASK) != _PAGE_MA_NAT) &&
-                       io_mask != GPFN_PHYS_MMIO) {
-               pte &= ~_PAGE_MA_MASK;
-               phy_pte &= ~_PAGE_MA_MASK;
-       }
-
-       vtlb_purge(v, ifa, ps);
-       vhpt_purge(v, ifa, ps);
-
-       if ((ps != mrr.ps) || (pte & VTLB_PTE_IO)) {
-               vtlb_insert(v, pte, itir, ifa);
-               vcpu_quick_region_set(VMX(v, tc_regions), ifa);
-       }
-       if (pte & VTLB_PTE_IO)
-               return;
-
-       if (ps >= mrr.ps)
-               vhpt_insert(phy_pte, itir, ifa, pte);
-       else {
-               u64 psr;
-               phy_pte  &= ~PAGE_FLAGS_RV_MASK;
-               psr = ia64_clear_ic();
-               ia64_itc(type, ifa, phy_pte, ps);
-               paravirt_dv_serialize_data();
-               ia64_set_psr(psr);
-       }
-       if (!(pte&VTLB_PTE_IO))
-               mark_pages_dirty(v, pte, ps);
-
-}
-
-/*
- * Purge all TCs or VHPT entries including those in Hash table.
- *
- */
-
-void thash_purge_all(struct kvm_vcpu *v)
-{
-       int i;
-       struct thash_data *head;
-       struct thash_cb  *vtlb, *vhpt;
-       vtlb = &v->arch.vtlb;
-       vhpt = &v->arch.vhpt;
-
-       for (i = 0; i < 8; i++)
-               VMX(v, psbits[i]) = 0;
-
-       head = vtlb->hash;
-       for (i = 0; i < vtlb->num; i++) {
-               head->page_flags = 0;
-               head->etag = INVALID_TI_TAG;
-               head->itir = 0;
-               head->next = 0;
-               head++;
-       };
-
-       head = vhpt->hash;
-       for (i = 0; i < vhpt->num; i++) {
-               head->page_flags = 0;
-               head->etag = INVALID_TI_TAG;
-               head->itir = 0;
-               head->next = 0;
-               head++;
-       };
-
-       local_flush_tlb_all();
-}
-
-/*
- * Lookup the hash table and its collision chain to find an entry
- * covering this address rid:va or the entry.
- *
- * INPUT:
- *  in: TLB format for both VHPT & TLB.
- */
-struct thash_data *vtlb_lookup(struct kvm_vcpu *v, u64 va, int is_data)
-{
-       struct thash_data  *cch;
-       u64    psbits, ps, tag;
-       union ia64_rr vrr;
-
-       struct thash_cb *hcb = &v->arch.vtlb;
-
-       cch = __vtr_lookup(v, va, is_data);
-       if (cch)
-               return cch;
-
-       if (vcpu_quick_region_check(v->arch.tc_regions, va) == 0)
-               return NULL;
-
-       psbits = VMX(v, psbits[(va >> 61)]);
-       vrr.val = vcpu_get_rr(v, va);
-       while (psbits) {
-               ps = __ffs(psbits);
-               psbits &= ~(1UL << ps);
-               vrr.ps = ps;
-               cch = vsa_thash(hcb->pta, va, vrr.val, &tag);
-               if (cch->etag == tag && cch->ps == ps)
-                       return cch;
-       }
-
-       return NULL;
-}
-
-/*
- * Initialize internal control data before service.
- */
-void thash_init(struct thash_cb *hcb, u64 sz)
-{
-       int i;
-       struct thash_data *head;
-
-       hcb->pta.val = (unsigned long)hcb->hash;
-       hcb->pta.vf = 1;
-       hcb->pta.ve = 1;
-       hcb->pta.size = sz;
-       head = hcb->hash;
-       for (i = 0; i < hcb->num; i++) {
-               head->page_flags = 0;
-               head->itir = 0;
-               head->etag = INVALID_TI_TAG;
-               head->next = 0;
-               head++;
-       }
-}
-
-u64 kvm_get_mpt_entry(u64 gpfn)
-{
-       u64 *base = (u64 *) KVM_P2M_BASE;
-
-       if (gpfn >= (KVM_P2M_SIZE >> 3))
-               panic_vm(current_vcpu, "Invalid gpfn =%lx\n", gpfn);
-
-       return *(base + gpfn);
-}
-
-u64 kvm_lookup_mpa(u64 gpfn)
-{
-       u64 maddr;
-       maddr = kvm_get_mpt_entry(gpfn);
-       return maddr&_PAGE_PPN_MASK;
-}
-
-u64 kvm_gpa_to_mpa(u64 gpa)
-{
-       u64 pte = kvm_lookup_mpa(gpa >> PAGE_SHIFT);
-       return (pte >> PAGE_SHIFT << PAGE_SHIFT) | (gpa & ~PAGE_MASK);
-}
-
-/*
- * Fetch guest bundle code.
- * INPUT:
- *  gip: guest ip
- *  pbundle: used to return fetched bundle.
- */
-int fetch_code(struct kvm_vcpu *vcpu, u64 gip, IA64_BUNDLE *pbundle)
-{
-       u64     gpip = 0;   /* guest physical IP*/
-       u64     *vpa;
-       struct thash_data    *tlb;
-       u64     maddr;
-
-       if (!(VCPU(vcpu, vpsr) & IA64_PSR_IT)) {
-               /* I-side physical mode */
-               gpip = gip;
-       } else {
-               tlb = vtlb_lookup(vcpu, gip, I_TLB);
-               if (tlb)
-                       gpip = (tlb->ppn >> (tlb->ps - 12) << tlb->ps) |
-                               (gip & (PSIZE(tlb->ps) - 1));
-       }
-       if (gpip) {
-               maddr = kvm_gpa_to_mpa(gpip);
-       } else {
-               tlb = vhpt_lookup(gip);
-               if (tlb == NULL) {
-                       ia64_ptcl(gip, ARCH_PAGE_SHIFT << 2);
-                       return IA64_FAULT;
-               }
-               maddr = (tlb->ppn >> (tlb->ps - 12) << tlb->ps)
-                                       | (gip & (PSIZE(tlb->ps) - 1));
-       }
-       vpa = (u64 *)__kvm_va(maddr);
-
-       pbundle->i64[0] = *vpa++;
-       pbundle->i64[1] = *vpa;
-
-       return IA64_NO_FAULT;
-}
-
-void kvm_init_vhpt(struct kvm_vcpu *v)
-{
-       v->arch.vhpt.num = VHPT_NUM_ENTRIES;
-       thash_init(&v->arch.vhpt, VHPT_SHIFT);
-       ia64_set_pta(v->arch.vhpt.pta.val);
-       /*Enable VHPT here?*/
-}
-
-void kvm_init_vtlb(struct kvm_vcpu *v)
-{
-       v->arch.vtlb.num = VTLB_NUM_ENTRIES;
-       thash_init(&v->arch.vtlb, VTLB_SHIFT);
-}
index 6acf0c2..942c7b1 100644 (file)
@@ -170,8 +170,6 @@ extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr,
                        unsigned long *nb_ret);
 extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr,
                        unsigned long gpa, bool dirty);
-extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
-                       long pte_index, unsigned long pteh, unsigned long ptel);
 extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
                        long pte_index, unsigned long pteh, unsigned long ptel,
                        pgd_t *pgdir, bool realmode, unsigned long *idx_ret);
index 0aa8179..2d81e20 100644 (file)
@@ -37,7 +37,6 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
 
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 #define KVM_DEFAULT_HPT_ORDER  24      /* 16MB HPT by default */
-extern unsigned long kvm_rma_pages;
 #endif
 
 #define VRMA_VSID      0x1ffffffUL     /* 1TB VSID reserved for VRMA */
@@ -148,7 +147,7 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
        /* This covers 14..54 bits of va*/
        rb = (v & ~0x7fUL) << 16;               /* AVA field */
 
-       rb |= v >> (62 - 8);                    /*  B field */
+       rb |= (v >> HPTE_V_SSIZE_SHIFT) << 8;   /*  B field */
        /*
         * AVA in v had cleared lower 23 bits. We need to derive
         * that from pteg index
index 0478556..7efd666 100644 (file)
@@ -180,11 +180,6 @@ struct kvmppc_spapr_tce_table {
        struct page *pages[0];
 };
 
-struct kvm_rma_info {
-       atomic_t use_count;
-       unsigned long base_pfn;
-};
-
 /* XICS components, defined in book3s_xics.c */
 struct kvmppc_xics;
 struct kvmppc_icp;
@@ -214,16 +209,9 @@ struct revmap_entry {
 #define KVMPPC_RMAP_PRESENT    0x100000000ul
 #define KVMPPC_RMAP_INDEX      0xfffffffful
 
-/* Low-order bits in memslot->arch.slot_phys[] */
-#define KVMPPC_PAGE_ORDER_MASK 0x1f
-#define KVMPPC_PAGE_NO_CACHE   HPTE_R_I        /* 0x20 */
-#define KVMPPC_PAGE_WRITETHRU  HPTE_R_W        /* 0x40 */
-#define KVMPPC_GOT_PAGE                0x80
-
 struct kvm_arch_memory_slot {
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
        unsigned long *rmap;
-       unsigned long *slot_phys;
 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
 };
 
@@ -242,14 +230,12 @@ struct kvm_arch {
        struct kvm_rma_info *rma;
        unsigned long vrma_slb_v;
        int rma_setup_done;
-       int using_mmu_notifiers;
        u32 hpt_order;
        atomic_t vcpus_running;
        u32 online_vcores;
        unsigned long hpt_npte;
        unsigned long hpt_mask;
        atomic_t hpte_mod_interest;
-       spinlock_t slot_phys_lock;
        cpumask_t need_tlb_flush;
        int hpt_cma_alloc;
 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
@@ -297,6 +283,7 @@ struct kvmppc_vcore {
        struct list_head runnable_threads;
        spinlock_t lock;
        wait_queue_head_t wq;
+       spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */
        u64 stolen_tb;
        u64 preempt_tb;
        struct kvm_vcpu *runner;
@@ -308,6 +295,7 @@ struct kvmppc_vcore {
        ulong dpdes;            /* doorbell state (POWER8) */
        void *mpp_buffer; /* Micro Partition Prefetch buffer */
        bool mpp_buffer_is_valid;
+       ulong conferring_threads;
 };
 
 #define VCORE_ENTRY_COUNT(vc)  ((vc)->entry_exit_count & 0xff)
@@ -664,6 +652,8 @@ struct kvm_vcpu_arch {
        spinlock_t tbacct_lock;
        u64 busy_stolen;
        u64 busy_preempt;
+
+       u32 emul_inst;
 #endif
 };
 
index a6dcdb6..46bf652 100644 (file)
@@ -170,8 +170,6 @@ extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
                             unsigned long ioba, unsigned long tce);
 extern long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
                             unsigned long ioba);
-extern struct kvm_rma_info *kvm_alloc_rma(void);
-extern void kvm_release_rma(struct kvm_rma_info *ri);
 extern struct page *kvm_alloc_hpt(unsigned long nr_pages);
 extern void kvm_release_hpt(struct page *page, unsigned long nr_pages);
 extern int kvmppc_core_init_vm(struct kvm *kvm);
index c161ef3..24d78e1 100644 (file)
@@ -489,7 +489,6 @@ int main(void)
        DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid));
        DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr));
        DEFINE(KVM_HOST_SDR1, offsetof(struct kvm, arch.host_sdr1));
-       DEFINE(KVM_TLBIE_LOCK, offsetof(struct kvm, arch.tlbie_lock));
        DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits));
        DEFINE(KVM_ENABLED_HCALLS, offsetof(struct kvm, arch.enabled_hcalls));
        DEFINE(KVM_LPCR, offsetof(struct kvm, arch.lpcr));
@@ -499,6 +498,7 @@ int main(void)
        DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar));
        DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr));
        DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty));
+       DEFINE(VCPU_HEIR, offsetof(struct kvm_vcpu, arch.emul_inst));
 #endif
 #ifdef CONFIG_PPC_BOOK3S
        DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id));
index 602eb51..f5769f1 100644 (file)
@@ -172,6 +172,7 @@ config KVM_XICS
        depends on KVM_BOOK3S_64 && !KVM_MPIC
        select HAVE_KVM_IRQCHIP
        select HAVE_KVM_IRQFD
+       default y
        ---help---
          Include support for the XICS (eXternal Interrupt Controller
          Specification) interrupt controller architecture used on
index b32db4b..888bf46 100644 (file)
@@ -64,14 +64,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
        { NULL }
 };
 
-void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
-{
-}
-
-void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
-{
-}
-
 void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu)
 {
        if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) {
index cd0b073..a2eb6d3 100644 (file)
@@ -78,11 +78,6 @@ static inline bool sr_kp(u32 sr_raw)
        return (sr_raw & 0x20000000) ? true: false;
 }
 
-static inline bool sr_nx(u32 sr_raw)
-{
-       return (sr_raw & 0x10000000) ? true: false;
-}
-
 static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,
                                          struct kvmppc_pte *pte, bool data,
                                          bool iswrite);
index d407702..534acb3 100644 (file)
@@ -37,8 +37,7 @@
 #include <asm/ppc-opcode.h>
 #include <asm/cputable.h>
 
-/* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
-#define MAX_LPID_970   63
+#include "trace_hv.h"
 
 /* Power architecture requires HPT is at least 256kB */
 #define PPC_MIN_HPT_ORDER      18
@@ -229,14 +228,9 @@ int kvmppc_mmu_hv_init(void)
        if (!cpu_has_feature(CPU_FTR_HVMODE))
                return -EINVAL;
 
-       /* POWER7 has 10-bit LPIDs, PPC970 and e500mc have 6-bit LPIDs */
-       if (cpu_has_feature(CPU_FTR_ARCH_206)) {
-               host_lpid = mfspr(SPRN_LPID);   /* POWER7 */
-               rsvd_lpid = LPID_RSVD;
-       } else {
-               host_lpid = 0;                  /* PPC970 */
-               rsvd_lpid = MAX_LPID_970;
-       }
+       /* POWER7 has 10-bit LPIDs (12-bit in POWER8) */
+       host_lpid = mfspr(SPRN_LPID);
+       rsvd_lpid = LPID_RSVD;
 
        kvmppc_init_lpid(rsvd_lpid + 1);
 
@@ -259,130 +253,12 @@ static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu)
        kvmppc_set_msr(vcpu, msr);
 }
 
-/*
- * This is called to get a reference to a guest page if there isn't
- * one already in the memslot->arch.slot_phys[] array.
- */
-static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
-                                 struct kvm_memory_slot *memslot,
-                                 unsigned long psize)
-{
-       unsigned long start;
-       long np, err;
-       struct page *page, *hpage, *pages[1];
-       unsigned long s, pgsize;
-       unsigned long *physp;
-       unsigned int is_io, got, pgorder;
-       struct vm_area_struct *vma;
-       unsigned long pfn, i, npages;
-
-       physp = memslot->arch.slot_phys;
-       if (!physp)
-               return -EINVAL;
-       if (physp[gfn - memslot->base_gfn])
-               return 0;
-
-       is_io = 0;
-       got = 0;
-       page = NULL;
-       pgsize = psize;
-       err = -EINVAL;
-       start = gfn_to_hva_memslot(memslot, gfn);
-
-       /* Instantiate and get the page we want access to */
-       np = get_user_pages_fast(start, 1, 1, pages);
-       if (np != 1) {
-               /* Look up the vma for the page */
-               down_read(&current->mm->mmap_sem);
-               vma = find_vma(current->mm, start);
-               if (!vma || vma->vm_start > start ||
-                   start + psize > vma->vm_end ||
-                   !(vma->vm_flags & VM_PFNMAP))
-                       goto up_err;
-               is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot));
-               pfn = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
-               /* check alignment of pfn vs. requested page size */
-               if (psize > PAGE_SIZE && (pfn & ((psize >> PAGE_SHIFT) - 1)))
-                       goto up_err;
-               up_read(&current->mm->mmap_sem);
-
-       } else {
-               page = pages[0];
-               got = KVMPPC_GOT_PAGE;
-
-               /* See if this is a large page */
-               s = PAGE_SIZE;
-               if (PageHuge(page)) {
-                       hpage = compound_head(page);
-                       s <<= compound_order(hpage);
-                       /* Get the whole large page if slot alignment is ok */
-                       if (s > psize && slot_is_aligned(memslot, s) &&
-                           !(memslot->userspace_addr & (s - 1))) {
-                               start &= ~(s - 1);
-                               pgsize = s;
-                               get_page(hpage);
-                               put_page(page);
-                               page = hpage;
-                       }
-               }
-               if (s < psize)
-                       goto out;
-               pfn = page_to_pfn(page);
-       }
-
-       npages = pgsize >> PAGE_SHIFT;
-       pgorder = __ilog2(npages);
-       physp += (gfn - memslot->base_gfn) & ~(npages - 1);
-       spin_lock(&kvm->arch.slot_phys_lock);
-       for (i = 0; i < npages; ++i) {
-               if (!physp[i]) {
-                       physp[i] = ((pfn + i) << PAGE_SHIFT) +
-                               got + is_io + pgorder;
-                       got = 0;
-               }
-       }
-       spin_unlock(&kvm->arch.slot_phys_lock);
-       err = 0;
-
- out:
-       if (got)
-               put_page(page);
-       return err;
-
- up_err:
-       up_read(&current->mm->mmap_sem);
-       return err;
-}
-
 long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
                                long pte_index, unsigned long pteh,
                                unsigned long ptel, unsigned long *pte_idx_ret)
 {
-       unsigned long psize, gpa, gfn;
-       struct kvm_memory_slot *memslot;
        long ret;
 
-       if (kvm->arch.using_mmu_notifiers)
-               goto do_insert;
-
-       psize = hpte_page_size(pteh, ptel);
-       if (!psize)
-               return H_PARAMETER;
-
-       pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID);
-
-       /* Find the memslot (if any) for this address */
-       gpa = (ptel & HPTE_R_RPN) & ~(psize - 1);
-       gfn = gpa >> PAGE_SHIFT;
-       memslot = gfn_to_memslot(kvm, gfn);
-       if (memslot && !(memslot->flags & KVM_MEMSLOT_INVALID)) {
-               if (!slot_is_aligned(memslot, psize))
-                       return H_PARAMETER;
-               if (kvmppc_get_guest_page(kvm, gfn, memslot, psize) < 0)
-                       return H_PARAMETER;
-       }
-
- do_insert:
        /* Protect linux PTE lookup from page table destruction */
        rcu_read_lock_sched();  /* this disables preemption too */
        ret = kvmppc_do_h_enter(kvm, flags, pte_index, pteh, ptel,
@@ -397,19 +273,6 @@ long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
 
 }
 
-/*
- * We come here on a H_ENTER call from the guest when we are not
- * using mmu notifiers and we don't have the requested page pinned
- * already.
- */
-long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
-                            long pte_index, unsigned long pteh,
-                            unsigned long ptel)
-{
-       return kvmppc_virtmode_do_h_enter(vcpu->kvm, flags, pte_index,
-                                         pteh, ptel, &vcpu->arch.gpr[4]);
-}
-
 static struct kvmppc_slb *kvmppc_mmu_book3s_hv_find_slbe(struct kvm_vcpu *vcpu,
                                                         gva_t eaddr)
 {
@@ -494,7 +357,7 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
        gpte->may_execute = gpte->may_read && !(gr & (HPTE_R_N | HPTE_R_G));
 
        /* Storage key permission check for POWER7 */
-       if (data && virtmode && cpu_has_feature(CPU_FTR_ARCH_206)) {
+       if (data && virtmode) {
                int amrfield = hpte_get_skey_perm(gr, vcpu->arch.amr);
                if (amrfield & 1)
                        gpte->may_read = 0;
@@ -622,14 +485,13 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
        gfn = gpa >> PAGE_SHIFT;
        memslot = gfn_to_memslot(kvm, gfn);
 
+       trace_kvm_page_fault_enter(vcpu, hpte, memslot, ea, dsisr);
+
        /* No memslot means it's an emulated MMIO region */
        if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
                return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea,
                                              dsisr & DSISR_ISSTORE);
 
-       if (!kvm->arch.using_mmu_notifiers)
-               return -EFAULT;         /* should never get here */
-
        /*
         * This should never happen, because of the slot_is_aligned()
         * check in kvmppc_do_h_enter().
@@ -641,6 +503,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
        mmu_seq = kvm->mmu_notifier_seq;
        smp_rmb();
 
+       ret = -EFAULT;
        is_io = 0;
        pfn = 0;
        page = NULL;
@@ -664,7 +527,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
                }
                up_read(&current->mm->mmap_sem);
                if (!pfn)
-                       return -EFAULT;
+                       goto out_put;
        } else {
                page = pages[0];
                pfn = page_to_pfn(page);
@@ -694,14 +557,14 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
                }
        }
 
-       ret = -EFAULT;
        if (psize > pte_size)
                goto out_put;
 
        /* Check WIMG vs. the actual page we're accessing */
        if (!hpte_cache_flags_ok(r, is_io)) {
                if (is_io)
-                       return -EFAULT;
+                       goto out_put;
+
                /*
                 * Allow guest to map emulated device memory as
                 * uncacheable, but actually make it cacheable.
@@ -765,6 +628,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
                SetPageDirty(page);
 
  out_put:
+       trace_kvm_page_fault_exit(vcpu, hpte, ret);
+
        if (page) {
                /*
                 * We drop pages[0] here, not page because page might
@@ -895,8 +760,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
                psize = hpte_page_size(be64_to_cpu(hptep[0]), ptel);
                if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) &&
                    hpte_rpn(ptel, psize) == gfn) {
-                       if (kvm->arch.using_mmu_notifiers)
-                               hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
+                       hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
                        kvmppc_invalidate_hpte(kvm, hptep, i);
                        /* Harvest R and C */
                        rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C);
@@ -914,15 +778,13 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
 
 int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva)
 {
-       if (kvm->arch.using_mmu_notifiers)
-               kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
+       kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
        return 0;
 }
 
 int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end)
 {
-       if (kvm->arch.using_mmu_notifiers)
-               kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp);
+       kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp);
        return 0;
 }
 
@@ -1004,8 +866,6 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 
 int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, unsigned long end)
 {
-       if (!kvm->arch.using_mmu_notifiers)
-               return 0;
        return kvm_handle_hva_range(kvm, start, end, kvm_age_rmapp);
 }
 
@@ -1042,15 +902,11 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 
 int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva)
 {
-       if (!kvm->arch.using_mmu_notifiers)
-               return 0;
        return kvm_handle_hva(kvm, hva, kvm_test_age_rmapp);
 }
 
 void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte)
 {
-       if (!kvm->arch.using_mmu_notifiers)
-               return;
        kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
 }
 
@@ -1117,8 +973,11 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
                }
 
                /* Now check and modify the HPTE */
-               if (!(hptep[0] & cpu_to_be64(HPTE_V_VALID)))
+               if (!(hptep[0] & cpu_to_be64(HPTE_V_VALID))) {
+                       /* unlock and continue */
+                       hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
                        continue;
+               }
 
                /* need to make it temporarily absent so C is stable */
                hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
@@ -1206,35 +1065,17 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
        struct page *page, *pages[1];
        int npages;
        unsigned long hva, offset;
-       unsigned long pa;
-       unsigned long *physp;
        int srcu_idx;
 
        srcu_idx = srcu_read_lock(&kvm->srcu);
        memslot = gfn_to_memslot(kvm, gfn);
        if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
                goto err;
-       if (!kvm->arch.using_mmu_notifiers) {
-               physp = memslot->arch.slot_phys;
-               if (!physp)
-                       goto err;
-               physp += gfn - memslot->base_gfn;
-               pa = *physp;
-               if (!pa) {
-                       if (kvmppc_get_guest_page(kvm, gfn, memslot,
-                                                 PAGE_SIZE) < 0)
-                               goto err;
-                       pa = *physp;
-               }
-               page = pfn_to_page(pa >> PAGE_SHIFT);
-               get_page(page);
-       } else {
-               hva = gfn_to_hva_memslot(memslot, gfn);
-               npages = get_user_pages_fast(hva, 1, 1, pages);
-               if (npages < 1)
-                       goto err;
-               page = pages[0];
-       }
+       hva = gfn_to_hva_memslot(memslot, gfn);
+       npages = get_user_pages_fast(hva, 1, 1, pages);
+       if (npages < 1)
+               goto err;
+       page = pages[0];
        srcu_read_unlock(&kvm->srcu, srcu_idx);
 
        offset = gpa & (PAGE_SIZE - 1);
@@ -1258,7 +1099,7 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa,
 
        put_page(page);
 
-       if (!dirty || !kvm->arch.using_mmu_notifiers)
+       if (!dirty)
                return;
 
        /* We need to mark this page dirty in the rmap chain */
@@ -1539,9 +1380,15 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
                hptp = (__be64 *)(kvm->arch.hpt_virt + (i * HPTE_SIZE));
                lbuf = (unsigned long __user *)buf;
                for (j = 0; j < hdr.n_valid; ++j) {
+                       __be64 hpte_v;
+                       __be64 hpte_r;
+
                        err = -EFAULT;
-                       if (__get_user(v, lbuf) || __get_user(r, lbuf + 1))
+                       if (__get_user(hpte_v, lbuf) ||
+                           __get_user(hpte_r, lbuf + 1))
                                goto out;
+                       v = be64_to_cpu(hpte_v);
+                       r = be64_to_cpu(hpte_r);
                        err = -EINVAL;
                        if (!(v & HPTE_V_VALID))
                                goto out;
@@ -1652,10 +1499,7 @@ void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu)
 {
        struct kvmppc_mmu *mmu = &vcpu->arch.mmu;
 
-       if (cpu_has_feature(CPU_FTR_ARCH_206))
-               vcpu->arch.slb_nr = 32;         /* POWER7 */
-       else
-               vcpu->arch.slb_nr = 64;
+       vcpu->arch.slb_nr = 32;         /* POWER7/POWER8 */
 
        mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate;
        mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr;
index e63587d..de4018a 100644 (file)
@@ -58,6 +58,9 @@
 
 #include "book3s.h"
 
+#define CREATE_TRACE_POINTS
+#include "trace_hv.h"
+
 /* #define EXIT_DEBUG */
 /* #define EXIT_DEBUG_SIMPLE */
 /* #define EXIT_DEBUG_INT */
@@ -135,11 +138,10 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
  * stolen.
  *
  * Updates to busy_stolen are protected by arch.tbacct_lock;
- * updates to vc->stolen_tb are protected by the arch.tbacct_lock
- * of the vcpu that has taken responsibility for running the vcore
- * (i.e. vc->runner).  The stolen times are measured in units of
- * timebase ticks.  (Note that the != TB_NIL checks below are
- * purely defensive; they should never fail.)
+ * updates to vc->stolen_tb are protected by the vcore->stoltb_lock
+ * lock.  The stolen times are measured in units of timebase ticks.
+ * (Note that the != TB_NIL checks below are purely defensive;
+ * they should never fail.)
  */
 
 static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
@@ -147,12 +149,21 @@ static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
        struct kvmppc_vcore *vc = vcpu->arch.vcore;
        unsigned long flags;
 
-       spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
-       if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE &&
-           vc->preempt_tb != TB_NIL) {
-               vc->stolen_tb += mftb() - vc->preempt_tb;
-               vc->preempt_tb = TB_NIL;
+       /*
+        * We can test vc->runner without taking the vcore lock,
+        * because only this task ever sets vc->runner to this
+        * vcpu, and once it is set to this vcpu, only this task
+        * ever sets it to NULL.
+        */
+       if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) {
+               spin_lock_irqsave(&vc->stoltb_lock, flags);
+               if (vc->preempt_tb != TB_NIL) {
+                       vc->stolen_tb += mftb() - vc->preempt_tb;
+                       vc->preempt_tb = TB_NIL;
+               }
+               spin_unlock_irqrestore(&vc->stoltb_lock, flags);
        }
+       spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
        if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST &&
            vcpu->arch.busy_preempt != TB_NIL) {
                vcpu->arch.busy_stolen += mftb() - vcpu->arch.busy_preempt;
@@ -166,9 +177,12 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu)
        struct kvmppc_vcore *vc = vcpu->arch.vcore;
        unsigned long flags;
 
-       spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
-       if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE)
+       if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) {
+               spin_lock_irqsave(&vc->stoltb_lock, flags);
                vc->preempt_tb = mftb();
+               spin_unlock_irqrestore(&vc->stoltb_lock, flags);
+       }
+       spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
        if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST)
                vcpu->arch.busy_preempt = mftb();
        spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
@@ -191,9 +205,6 @@ int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
        struct kvmppc_vcore *vc = vcpu->arch.vcore;
 
        if (arch_compat) {
-               if (!cpu_has_feature(CPU_FTR_ARCH_206))
-                       return -EINVAL; /* 970 has no compat mode support */
-
                switch (arch_compat) {
                case PVR_ARCH_205:
                        /*
@@ -505,25 +516,14 @@ static void kvmppc_update_vpas(struct kvm_vcpu *vcpu)
 static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now)
 {
        u64 p;
+       unsigned long flags;
 
-       /*
-        * If we are the task running the vcore, then since we hold
-        * the vcore lock, we can't be preempted, so stolen_tb/preempt_tb
-        * can't be updated, so we don't need the tbacct_lock.
-        * If the vcore is inactive, it can't become active (since we
-        * hold the vcore lock), so the vcpu load/put functions won't
-        * update stolen_tb/preempt_tb, and we don't need tbacct_lock.
-        */
+       spin_lock_irqsave(&vc->stoltb_lock, flags);
+       p = vc->stolen_tb;
        if (vc->vcore_state != VCORE_INACTIVE &&
-           vc->runner->arch.run_task != current) {
-               spin_lock_irq(&vc->runner->arch.tbacct_lock);
-               p = vc->stolen_tb;
-               if (vc->preempt_tb != TB_NIL)
-                       p += now - vc->preempt_tb;
-               spin_unlock_irq(&vc->runner->arch.tbacct_lock);
-       } else {
-               p = vc->stolen_tb;
-       }
+           vc->preempt_tb != TB_NIL)
+               p += now - vc->preempt_tb;
+       spin_unlock_irqrestore(&vc->stoltb_lock, flags);
        return p;
 }
 
@@ -607,10 +607,45 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
        }
 }
 
+static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target)
+{
+       struct kvmppc_vcore *vcore = target->arch.vcore;
+
+       /*
+        * We expect to have been called by the real mode handler
+        * (kvmppc_rm_h_confer()) which would have directly returned
+        * H_SUCCESS if the source vcore wasn't idle (e.g. if it may
+        * have useful work to do and should not confer) so we don't
+        * recheck that here.
+        */
+
+       spin_lock(&vcore->lock);
+       if (target->arch.state == KVMPPC_VCPU_RUNNABLE &&
+           vcore->vcore_state != VCORE_INACTIVE)
+               target = vcore->runner;
+       spin_unlock(&vcore->lock);
+
+       return kvm_vcpu_yield_to(target);
+}
+
+static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu)
+{
+       int yield_count = 0;
+       struct lppaca *lppaca;
+
+       spin_lock(&vcpu->arch.vpa_update_lock);
+       lppaca = (struct lppaca *)vcpu->arch.vpa.pinned_addr;
+       if (lppaca)
+               yield_count = lppaca->yield_count;
+       spin_unlock(&vcpu->arch.vpa_update_lock);
+       return yield_count;
+}
+
 int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 {
        unsigned long req = kvmppc_get_gpr(vcpu, 3);
        unsigned long target, ret = H_SUCCESS;
+       int yield_count;
        struct kvm_vcpu *tvcpu;
        int idx, rc;
 
@@ -619,14 +654,6 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
                return RESUME_HOST;
 
        switch (req) {
-       case H_ENTER:
-               idx = srcu_read_lock(&vcpu->kvm->srcu);
-               ret = kvmppc_virtmode_h_enter(vcpu, kvmppc_get_gpr(vcpu, 4),
-                                             kvmppc_get_gpr(vcpu, 5),
-                                             kvmppc_get_gpr(vcpu, 6),
-                                             kvmppc_get_gpr(vcpu, 7));
-               srcu_read_unlock(&vcpu->kvm->srcu, idx);
-               break;
        case H_CEDE:
                break;
        case H_PROD:
@@ -654,7 +681,10 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
                        ret = H_PARAMETER;
                        break;
                }
-               kvm_vcpu_yield_to(tvcpu);
+               yield_count = kvmppc_get_gpr(vcpu, 5);
+               if (kvmppc_get_yield_count(tvcpu) != yield_count)
+                       break;
+               kvm_arch_vcpu_yield_to(tvcpu);
                break;
        case H_REGISTER_VPA:
                ret = do_h_register_vpa(vcpu, kvmppc_get_gpr(vcpu, 4),
@@ -769,6 +799,8 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
                vcpu->stat.ext_intr_exits++;
                r = RESUME_GUEST;
                break;
+       /* HMI is hypervisor interrupt and host has handled it. Resume guest.*/
+       case BOOK3S_INTERRUPT_HMI:
        case BOOK3S_INTERRUPT_PERFMON:
                r = RESUME_GUEST;
                break;
@@ -837,6 +869,10 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
         * Accordingly return to Guest or Host.
         */
        case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
+               if (vcpu->arch.emul_inst != KVM_INST_FETCH_FAILED)
+                       vcpu->arch.last_inst = kvmppc_need_byteswap(vcpu) ?
+                               swab32(vcpu->arch.emul_inst) :
+                               vcpu->arch.emul_inst;
                if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) {
                        r = kvmppc_emulate_debug_inst(run, vcpu);
                } else {
@@ -1357,6 +1393,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
 
        INIT_LIST_HEAD(&vcore->runnable_threads);
        spin_lock_init(&vcore->lock);
+       spin_lock_init(&vcore->stoltb_lock);
        init_waitqueue_head(&vcore->wq);
        vcore->preempt_tb = TB_NIL;
        vcore->lpcr = kvm->arch.lpcr;
@@ -1694,9 +1731,11 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
        vc->n_woken = 0;
        vc->nap_count = 0;
        vc->entry_exit_count = 0;
+       vc->preempt_tb = TB_NIL;
        vc->vcore_state = VCORE_STARTING;
        vc->in_guest = 0;
        vc->napping_threads = 0;
+       vc->conferring_threads = 0;
 
        /*
         * Updating any of the vpas requires calling kvmppc_pin_guest_page,
@@ -1726,6 +1765,7 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
        list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
                kvmppc_start_thread(vcpu);
                kvmppc_create_dtl_entry(vcpu, vc);
+               trace_kvm_guest_enter(vcpu);
        }
 
        /* Set this explicitly in case thread 0 doesn't have a vcpu */
@@ -1734,6 +1774,9 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
 
        vc->vcore_state = VCORE_RUNNING;
        preempt_disable();
+
+       trace_kvmppc_run_core(vc, 0);
+
        spin_unlock(&vc->lock);
 
        kvm_guest_enter();
@@ -1779,6 +1822,8 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
                    kvmppc_core_pending_dec(vcpu))
                        kvmppc_core_dequeue_dec(vcpu);
 
+               trace_kvm_guest_exit(vcpu);
+
                ret = RESUME_GUEST;
                if (vcpu->arch.trap)
                        ret = kvmppc_handle_exit_hv(vcpu->arch.kvm_run, vcpu,
@@ -1804,6 +1849,8 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
                        wake_up(&vcpu->arch.cpu_run);
                }
        }
+
+       trace_kvmppc_run_core(vc, 1);
 }
 
 /*
@@ -1826,15 +1873,37 @@ static void kvmppc_wait_for_exec(struct kvm_vcpu *vcpu, int wait_state)
  */
 static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
 {
+       struct kvm_vcpu *vcpu;
+       int do_sleep = 1;
+
        DEFINE_WAIT(wait);
 
        prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
+
+       /*
+        * Check one last time for pending exceptions and ceded state after
+        * we put ourselves on the wait queue
+        */
+       list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
+               if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded) {
+                       do_sleep = 0;
+                       break;
+               }
+       }
+
+       if (!do_sleep) {
+               finish_wait(&vc->wq, &wait);
+               return;
+       }
+
        vc->vcore_state = VCORE_SLEEPING;
+       trace_kvmppc_vcore_blocked(vc, 0);
        spin_unlock(&vc->lock);
        schedule();
        finish_wait(&vc->wq, &wait);
        spin_lock(&vc->lock);
        vc->vcore_state = VCORE_INACTIVE;
+       trace_kvmppc_vcore_blocked(vc, 1);
 }
 
 static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
@@ -1843,6 +1912,8 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
        struct kvmppc_vcore *vc;
        struct kvm_vcpu *v, *vn;
 
+       trace_kvmppc_run_vcpu_enter(vcpu);
+
        kvm_run->exit_reason = 0;
        vcpu->arch.ret = RESUME_GUEST;
        vcpu->arch.trap = 0;
@@ -1872,6 +1943,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
                    VCORE_EXIT_COUNT(vc) == 0) {
                        kvmppc_create_dtl_entry(vcpu, vc);
                        kvmppc_start_thread(vcpu);
+                       trace_kvm_guest_enter(vcpu);
                } else if (vc->vcore_state == VCORE_SLEEPING) {
                        wake_up(&vc->wq);
                }
@@ -1936,6 +2008,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
                wake_up(&v->arch.cpu_run);
        }
 
+       trace_kvmppc_run_vcpu_exit(vcpu, kvm_run);
        spin_unlock(&vc->lock);
        return vcpu->arch.ret;
 }
@@ -1962,7 +2035,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
        /* Order vcpus_running vs. rma_setup_done, see kvmppc_alloc_reset_hpt */
        smp_mb();
 
-       /* On the first time here, set up HTAB and VRMA or RMA */
+       /* On the first time here, set up HTAB and VRMA */
        if (!vcpu->kvm->arch.rma_setup_done) {
                r = kvmppc_hv_setup_htab_rma(vcpu);
                if (r)
@@ -1981,7 +2054,9 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
 
                if (run->exit_reason == KVM_EXIT_PAPR_HCALL &&
                    !(vcpu->arch.shregs.msr & MSR_PR)) {
+                       trace_kvm_hcall_enter(vcpu);
                        r = kvmppc_pseries_do_hcall(vcpu);
+                       trace_kvm_hcall_exit(vcpu, r);
                        kvmppc_core_prepare_to_enter(vcpu);
                } else if (r == RESUME_PAGE_FAULT) {
                        srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
@@ -1997,98 +2072,6 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
        return r;
 }
 
-
-/* Work out RMLS (real mode limit selector) field value for a given RMA size.
-   Assumes POWER7 or PPC970. */
-static inline int lpcr_rmls(unsigned long rma_size)
-{
-       switch (rma_size) {
-       case 32ul << 20:        /* 32 MB */
-               if (cpu_has_feature(CPU_FTR_ARCH_206))
-                       return 8;       /* only supported on POWER7 */
-               return -1;
-       case 64ul << 20:        /* 64 MB */
-               return 3;
-       case 128ul << 20:       /* 128 MB */
-               return 7;
-       case 256ul << 20:       /* 256 MB */
-               return 4;
-       case 1ul << 30:         /* 1 GB */
-               return 2;
-       case 16ul << 30:        /* 16 GB */
-               return 1;
-       case 256ul << 30:       /* 256 GB */
-               return 0;
-       default:
-               return -1;
-       }
-}
-
-static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
-       struct page *page;
-       struct kvm_rma_info *ri = vma->vm_file->private_data;
-
-       if (vmf->pgoff >= kvm_rma_pages)
-               return VM_FAULT_SIGBUS;
-
-       page = pfn_to_page(ri->base_pfn + vmf->pgoff);
-       get_page(page);
-       vmf->page = page;
-       return 0;
-}
-
-static const struct vm_operations_struct kvm_rma_vm_ops = {
-       .fault = kvm_rma_fault,
-};
-
-static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
-{
-       vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
-       vma->vm_ops = &kvm_rma_vm_ops;
-       return 0;
-}
-
-static int kvm_rma_release(struct inode *inode, struct file *filp)
-{
-       struct kvm_rma_info *ri = filp->private_data;
-
-       kvm_release_rma(ri);
-       return 0;
-}
-
-static const struct file_operations kvm_rma_fops = {
-       .mmap           = kvm_rma_mmap,
-       .release        = kvm_rma_release,
-};
-
-static long kvm_vm_ioctl_allocate_rma(struct kvm *kvm,
-                                     struct kvm_allocate_rma *ret)
-{
-       long fd;
-       struct kvm_rma_info *ri;
-       /*
-        * Only do this on PPC970 in HV mode
-        */
-       if (!cpu_has_feature(CPU_FTR_HVMODE) ||
-           !cpu_has_feature(CPU_FTR_ARCH_201))
-               return -EINVAL;
-
-       if (!kvm_rma_pages)
-               return -EINVAL;
-
-       ri = kvm_alloc_rma();
-       if (!ri)
-               return -ENOMEM;
-
-       fd = anon_inode_getfd("kvm-rma", &kvm_rma_fops, ri, O_RDWR | O_CLOEXEC);
-       if (fd < 0)
-               kvm_release_rma(ri);
-
-       ret->rma_size = kvm_rma_pages << PAGE_SHIFT;
-       return fd;
-}
-
 static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps,
                                     int linux_psize)
 {
@@ -2167,26 +2150,6 @@ out:
        return r;
 }
 
-static void unpin_slot(struct kvm_memory_slot *memslot)
-{
-       unsigned long *physp;
-       unsigned long j, npages, pfn;
-       struct page *page;
-
-       physp = memslot->arch.slot_phys;
-       npages = memslot->npages;
-       if (!physp)
-               return;
-       for (j = 0; j < npages; j++) {
-               if (!(physp[j] & KVMPPC_GOT_PAGE))
-                       continue;
-               pfn = physp[j] >> PAGE_SHIFT;
-               page = pfn_to_page(pfn);
-               SetPageDirty(page);
-               put_page(page);
-       }
-}
-
 static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free,
                                        struct kvm_memory_slot *dont)
 {
@@ -2194,11 +2157,6 @@ static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free,
                vfree(free->arch.rmap);
                free->arch.rmap = NULL;
        }
-       if (!dont || free->arch.slot_phys != dont->arch.slot_phys) {
-               unpin_slot(free);
-               vfree(free->arch.slot_phys);
-               free->arch.slot_phys = NULL;
-       }
 }
 
 static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot,
@@ -2207,7 +2165,6 @@ static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot,
        slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap));
        if (!slot->arch.rmap)
                return -ENOMEM;
-       slot->arch.slot_phys = NULL;
 
        return 0;
 }
@@ -2216,17 +2173,6 @@ static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm,
                                        struct kvm_memory_slot *memslot,
                                        struct kvm_userspace_memory_region *mem)
 {
-       unsigned long *phys;
-
-       /* Allocate a slot_phys array if needed */
-       phys = memslot->arch.slot_phys;
-       if (!kvm->arch.using_mmu_notifiers && !phys && memslot->npages) {
-               phys = vzalloc(memslot->npages * sizeof(unsigned long));
-               if (!phys)
-                       return -ENOMEM;
-               memslot->arch.slot_phys = phys;
-       }
-
        return 0;
 }
 
@@ -2284,17 +2230,11 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 {
        int err = 0;
        struct kvm *kvm = vcpu->kvm;
-       struct kvm_rma_info *ri = NULL;
        unsigned long hva;
        struct kvm_memory_slot *memslot;
        struct vm_area_struct *vma;
        unsigned long lpcr = 0, senc;
-       unsigned long lpcr_mask = 0;
        unsigned long psize, porder;
-       unsigned long rma_size;
-       unsigned long rmls;
-       unsigned long *physp;
-       unsigned long i, npages;
        int srcu_idx;
 
        mutex_lock(&kvm->lock);
@@ -2329,88 +2269,25 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
        psize = vma_kernel_pagesize(vma);
        porder = __ilog2(psize);
 
-       /* Is this one of our preallocated RMAs? */
-       if (vma->vm_file && vma->vm_file->f_op == &kvm_rma_fops &&
-           hva == vma->vm_start)
-               ri = vma->vm_file->private_data;
-
        up_read(&current->mm->mmap_sem);
 
-       if (!ri) {
-               /* On POWER7, use VRMA; on PPC970, give up */
-               err = -EPERM;
-               if (cpu_has_feature(CPU_FTR_ARCH_201)) {
-                       pr_err("KVM: CPU requires an RMO\n");
-                       goto out_srcu;
-               }
+       /* We can handle 4k, 64k or 16M pages in the VRMA */
+       err = -EINVAL;
+       if (!(psize == 0x1000 || psize == 0x10000 ||
+             psize == 0x1000000))
+               goto out_srcu;
 
-               /* We can handle 4k, 64k or 16M pages in the VRMA */
-               err = -EINVAL;
-               if (!(psize == 0x1000 || psize == 0x10000 ||
-                     psize == 0x1000000))
-                       goto out_srcu;
+       /* Update VRMASD field in the LPCR */
+       senc = slb_pgsize_encoding(psize);
+       kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
+               (VRMA_VSID << SLB_VSID_SHIFT_1T);
+       /* the -4 is to account for senc values starting at 0x10 */
+       lpcr = senc << (LPCR_VRMASD_SH - 4);
 
-               /* Update VRMASD field in the LPCR */
-               senc = slb_pgsize_encoding(psize);
-               kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
-                       (VRMA_VSID << SLB_VSID_SHIFT_1T);
-               lpcr_mask = LPCR_VRMASD;
-               /* the -4 is to account for senc values starting at 0x10 */
-               lpcr = senc << (LPCR_VRMASD_SH - 4);
+       /* Create HPTEs in the hash page table for the VRMA */
+       kvmppc_map_vrma(vcpu, memslot, porder);
 
-               /* Create HPTEs in the hash page table for the VRMA */
-               kvmppc_map_vrma(vcpu, memslot, porder);
-
-       } else {
-               /* Set up to use an RMO region */
-               rma_size = kvm_rma_pages;
-               if (rma_size > memslot->npages)
-                       rma_size = memslot->npages;
-               rma_size <<= PAGE_SHIFT;
-               rmls = lpcr_rmls(rma_size);
-               err = -EINVAL;
-               if ((long)rmls < 0) {
-                       pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size);
-                       goto out_srcu;
-               }
-               atomic_inc(&ri->use_count);
-               kvm->arch.rma = ri;
-
-               /* Update LPCR and RMOR */
-               if (cpu_has_feature(CPU_FTR_ARCH_201)) {
-                       /* PPC970; insert RMLS value (split field) in HID4 */
-                       lpcr_mask = (1ul << HID4_RMLS0_SH) |
-                               (3ul << HID4_RMLS2_SH) | HID4_RMOR;
-                       lpcr = ((rmls >> 2) << HID4_RMLS0_SH) |
-                               ((rmls & 3) << HID4_RMLS2_SH);
-                       /* RMOR is also in HID4 */
-                       lpcr |= ((ri->base_pfn >> (26 - PAGE_SHIFT)) & 0xffff)
-                               << HID4_RMOR_SH;
-               } else {
-                       /* POWER7 */
-                       lpcr_mask = LPCR_VPM0 | LPCR_VRMA_L | LPCR_RMLS;
-                       lpcr = rmls << LPCR_RMLS_SH;
-                       kvm->arch.rmor = ri->base_pfn << PAGE_SHIFT;
-               }
-               pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n",
-                       ri->base_pfn << PAGE_SHIFT, rma_size, lpcr);
-
-               /* Initialize phys addrs of pages in RMO */
-               npages = kvm_rma_pages;
-               porder = __ilog2(npages);
-               physp = memslot->arch.slot_phys;
-               if (physp) {
-                       if (npages > memslot->npages)
-                               npages = memslot->npages;
-                       spin_lock(&kvm->arch.slot_phys_lock);
-                       for (i = 0; i < npages; ++i)
-                               physp[i] = ((ri->base_pfn + i) << PAGE_SHIFT) +
-                                       porder;
-                       spin_unlock(&kvm->arch.slot_phys_lock);
-               }
-       }
-
-       kvmppc_update_lpcr(kvm, lpcr, lpcr_mask);
+       kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD);
 
        /* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */
        smp_wmb();
@@ -2449,35 +2326,21 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
        memcpy(kvm->arch.enabled_hcalls, default_enabled_hcalls,
               sizeof(kvm->arch.enabled_hcalls));
 
-       kvm->arch.rma = NULL;
-
        kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
 
-       if (cpu_has_feature(CPU_FTR_ARCH_201)) {
-               /* PPC970; HID4 is effectively the LPCR */
-               kvm->arch.host_lpid = 0;
-               kvm->arch.host_lpcr = lpcr = mfspr(SPRN_HID4);
-               lpcr &= ~((3 << HID4_LPID1_SH) | (0xful << HID4_LPID5_SH));
-               lpcr |= ((lpid >> 4) << HID4_LPID1_SH) |
-                       ((lpid & 0xf) << HID4_LPID5_SH);
-       } else {
-               /* POWER7; init LPCR for virtual RMA mode */
-               kvm->arch.host_lpid = mfspr(SPRN_LPID);
-               kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR);
-               lpcr &= LPCR_PECE | LPCR_LPES;
-               lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE |
-                       LPCR_VPM0 | LPCR_VPM1;
-               kvm->arch.vrma_slb_v = SLB_VSID_B_1T |
-                       (VRMA_VSID << SLB_VSID_SHIFT_1T);
-               /* On POWER8 turn on online bit to enable PURR/SPURR */
-               if (cpu_has_feature(CPU_FTR_ARCH_207S))
-                       lpcr |= LPCR_ONL;
-       }
+       /* Init LPCR for virtual RMA mode */
+       kvm->arch.host_lpid = mfspr(SPRN_LPID);
+       kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR);
+       lpcr &= LPCR_PECE | LPCR_LPES;
+       lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE |
+               LPCR_VPM0 | LPCR_VPM1;
+       kvm->arch.vrma_slb_v = SLB_VSID_B_1T |
+               (VRMA_VSID << SLB_VSID_SHIFT_1T);
+       /* On POWER8 turn on online bit to enable PURR/SPURR */
+       if (cpu_has_feature(CPU_FTR_ARCH_207S))
+               lpcr |= LPCR_ONL;
        kvm->arch.lpcr = lpcr;
 
-       kvm->arch.using_mmu_notifiers = !!cpu_has_feature(CPU_FTR_ARCH_206);
-       spin_lock_init(&kvm->arch.slot_phys_lock);
-
        /*
         * Track that we now have a HV mode VM active. This blocks secondary
         * CPU threads from coming online.
@@ -2507,10 +2370,6 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
        kvm_hv_vm_deactivated();
 
        kvmppc_free_vcores(kvm);
-       if (kvm->arch.rma) {
-               kvm_release_rma(kvm->arch.rma);
-               kvm->arch.rma = NULL;
-       }
 
        kvmppc_free_hpt(kvm);
 }
@@ -2536,7 +2395,8 @@ static int kvmppc_core_emulate_mfspr_hv(struct kvm_vcpu *vcpu, int sprn,
 
 static int kvmppc_core_check_processor_compat_hv(void)
 {
-       if (!cpu_has_feature(CPU_FTR_HVMODE))
+       if (!cpu_has_feature(CPU_FTR_HVMODE) ||
+           !cpu_has_feature(CPU_FTR_ARCH_206))
                return -EIO;
        return 0;
 }
@@ -2550,16 +2410,6 @@ static long kvm_arch_vm_ioctl_hv(struct file *filp,
 
        switch (ioctl) {
 
-       case KVM_ALLOCATE_RMA: {
-               struct kvm_allocate_rma rma;
-               struct kvm *kvm = filp->private_data;
-
-               r = kvm_vm_ioctl_allocate_rma(kvm, &rma);
-               if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma)))
-                       r = -EFAULT;
-               break;
-       }
-
        case KVM_PPC_ALLOCATE_HTAB: {
                u32 htab_order;
 
index 3f1bb5a..1f083ff 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/memblock.h>
 #include <linux/sizes.h>
 #include <linux/cma.h>
+#include <linux/bitops.h>
 
 #include <asm/cputable.h>
 #include <asm/kvm_ppc.h>
  * By default we reserve 5% of memory for hash pagetable allocation.
  */
 static unsigned long kvm_cma_resv_ratio = 5;
-/*
- * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area.
- * Each RMA has to be physically contiguous and of a size that the
- * hardware supports.  PPC970 and POWER7 support 64MB, 128MB and 256MB,
- * and other larger sizes.  Since we are unlikely to be allocate that
- * much physically contiguous memory after the system is up and running,
- * we preallocate a set of RMAs in early boot using CMA.
- * should be power of 2.
- */
-unsigned long kvm_rma_pages = (1 << 27) >> PAGE_SHIFT; /* 128MB */
-EXPORT_SYMBOL_GPL(kvm_rma_pages);
 
 static struct cma *kvm_cma;
 
-/* Work out RMLS (real mode limit selector) field value for a given RMA size.
-   Assumes POWER7 or PPC970. */
-static inline int lpcr_rmls(unsigned long rma_size)
-{
-       switch (rma_size) {
-       case 32ul << 20:        /* 32 MB */
-               if (cpu_has_feature(CPU_FTR_ARCH_206))
-                       return 8;       /* only supported on POWER7 */
-               return -1;
-       case 64ul << 20:        /* 64 MB */
-               return 3;
-       case 128ul << 20:       /* 128 MB */
-               return 7;
-       case 256ul << 20:       /* 256 MB */
-               return 4;
-       case 1ul << 30:         /* 1 GB */
-               return 2;
-       case 16ul << 30:        /* 16 GB */
-               return 1;
-       case 256ul << 30:       /* 256 GB */
-               return 0;
-       default:
-               return -1;
-       }
-}
-
-static int __init early_parse_rma_size(char *p)
-{
-       unsigned long kvm_rma_size;
-
-       pr_debug("%s(%s)\n", __func__, p);
-       if (!p)
-               return -EINVAL;
-       kvm_rma_size = memparse(p, &p);
-       /*
-        * Check that the requested size is one supported in hardware
-        */
-       if (lpcr_rmls(kvm_rma_size) < 0) {
-               pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
-               return -EINVAL;
-       }
-       kvm_rma_pages = kvm_rma_size >> PAGE_SHIFT;
-       return 0;
-}
-early_param("kvm_rma_size", early_parse_rma_size);
-
-struct kvm_rma_info *kvm_alloc_rma()
-{
-       struct page *page;
-       struct kvm_rma_info *ri;
-
-       ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
-       if (!ri)
-               return NULL;
-       page = cma_alloc(kvm_cma, kvm_rma_pages, order_base_2(kvm_rma_pages));
-       if (!page)
-               goto err_out;
-       atomic_set(&ri->use_count, 1);
-       ri->base_pfn = page_to_pfn(page);
-       return ri;
-err_out:
-       kfree(ri);
-       return NULL;
-}
-EXPORT_SYMBOL_GPL(kvm_alloc_rma);
-
-void kvm_release_rma(struct kvm_rma_info *ri)
-{
-       if (atomic_dec_and_test(&ri->use_count)) {
-               cma_release(kvm_cma, pfn_to_page(ri->base_pfn), kvm_rma_pages);
-               kfree(ri);
-       }
-}
-EXPORT_SYMBOL_GPL(kvm_release_rma);
-
 static int __init early_parse_kvm_cma_resv(char *p)
 {
        pr_debug("%s(%s)\n", __func__, p);
@@ -132,14 +47,9 @@ early_param("kvm_cma_resv_ratio", early_parse_kvm_cma_resv);
 
 struct page *kvm_alloc_hpt(unsigned long nr_pages)
 {
-       unsigned long align_pages = HPT_ALIGN_PAGES;
-
        VM_BUG_ON(order_base_2(nr_pages) < KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
 
-       /* Old CPUs require HPT aligned on a multiple of its size */
-       if (!cpu_has_feature(CPU_FTR_ARCH_206))
-               align_pages = nr_pages;
-       return cma_alloc(kvm_cma, nr_pages, order_base_2(align_pages));
+       return cma_alloc(kvm_cma, nr_pages, order_base_2(HPT_ALIGN_PAGES));
 }
 EXPORT_SYMBOL_GPL(kvm_alloc_hpt);
 
@@ -180,21 +90,43 @@ void __init kvm_cma_reserve(void)
        if (selected_size) {
                pr_debug("%s: reserving %ld MiB for global area\n", __func__,
                         (unsigned long)selected_size / SZ_1M);
-               /*
-                * Old CPUs require HPT aligned on a multiple of its size. So for them
-                * make the alignment as max size we could request.
-                */
-               if (!cpu_has_feature(CPU_FTR_ARCH_206))
-                       align_size = __rounddown_pow_of_two(selected_size);
-               else
-                       align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
-
-               align_size = max(kvm_rma_pages << PAGE_SHIFT, align_size);
+               align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
                cma_declare_contiguous(0, selected_size, 0, align_size,
                        KVM_CMA_CHUNK_ORDER - PAGE_SHIFT, false, &kvm_cma);
        }
 }
 
+/*
+ * Real-mode H_CONFER implementation.
+ * We check if we are the only vcpu out of this virtual core
+ * still running in the guest and not ceded.  If so, we pop up
+ * to the virtual-mode implementation; if not, just return to
+ * the guest.
+ */
+long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
+                           unsigned int yield_count)
+{
+       struct kvmppc_vcore *vc = vcpu->arch.vcore;
+       int threads_running;
+       int threads_ceded;
+       int threads_conferring;
+       u64 stop = get_tb() + 10 * tb_ticks_per_usec;
+       int rv = H_SUCCESS; /* => don't yield */
+
+       set_bit(vcpu->arch.ptid, &vc->conferring_threads);
+       while ((get_tb() < stop) && (VCORE_EXIT_COUNT(vc) == 0)) {
+               threads_running = VCORE_ENTRY_COUNT(vc);
+               threads_ceded = hweight32(vc->napping_threads);
+               threads_conferring = hweight32(vc->conferring_threads);
+               if (threads_ceded + threads_conferring >= threads_running) {
+                       rv = H_TOO_HARD; /* => do yield */
+                       break;
+               }
+       }
+       clear_bit(vcpu->arch.ptid, &vc->conferring_threads);
+       return rv;
+}
+
 /*
  * When running HV mode KVM we need to block certain operations while KVM VMs
  * exist in the system. We use a counter of VMs to track this.
index 731be74..36540a9 100644 (file)
@@ -52,10 +52,8 @@ _GLOBAL(__kvmppc_vcore_entry)
        std     r3, _CCR(r1)
 
        /* Save host DSCR */
-BEGIN_FTR_SECTION
        mfspr   r3, SPRN_DSCR
        std     r3, HSTATE_DSCR(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
 BEGIN_FTR_SECTION
        /* Save host DABR */
@@ -84,11 +82,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
        mfspr   r7, SPRN_MMCR0          /* save MMCR0 */
        mtspr   SPRN_MMCR0, r3          /* freeze all counters, disable interrupts */
        mfspr   r6, SPRN_MMCRA
-BEGIN_FTR_SECTION
-       /* On P7, clear MMCRA in order to disable SDAR updates */
+       /* Clear MMCRA in order to disable SDAR updates */
        li      r5, 0
        mtspr   SPRN_MMCRA, r5
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
        isync
        ld      r3, PACALPPACAPTR(r13)  /* is the host using the PMU? */
        lbz     r5, LPPACA_PMCINUSE(r3)
@@ -113,20 +109,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
        mfspr   r7, SPRN_PMC4
        mfspr   r8, SPRN_PMC5
        mfspr   r9, SPRN_PMC6
-BEGIN_FTR_SECTION
-       mfspr   r10, SPRN_PMC7
-       mfspr   r11, SPRN_PMC8
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
        stw     r3, HSTATE_PMC(r13)
        stw     r5, HSTATE_PMC + 4(r13)
        stw     r6, HSTATE_PMC + 8(r13)
        stw     r7, HSTATE_PMC + 12(r13)
        stw     r8, HSTATE_PMC + 16(r13)
        stw     r9, HSTATE_PMC + 20(r13)
-BEGIN_FTR_SECTION
-       stw     r10, HSTATE_PMC + 24(r13)
-       stw     r11, HSTATE_PMC + 28(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 31:
 
        /*
@@ -140,31 +128,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
        add     r8,r8,r7
        std     r8,HSTATE_DECEXP(r13)
 
-#ifdef CONFIG_SMP
-       /*
-        * On PPC970, if the guest vcpu has an external interrupt pending,
-        * send ourselves an IPI so as to interrupt the guest once it
-        * enables interrupts.  (It must have interrupts disabled,
-        * otherwise we would already have delivered the interrupt.)
-        *
-        * XXX If this is a UP build, smp_send_reschedule is not available,
-        * so the interrupt will be delayed until the next time the vcpu
-        * enters the guest with interrupts enabled.
-        */
-BEGIN_FTR_SECTION
-       ld      r4, HSTATE_KVM_VCPU(r13)
-       ld      r0, VCPU_PENDING_EXC(r4)
-       li      r7, (1 << BOOK3S_IRQPRIO_EXTERNAL)
-       oris    r7, r7, (1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
-       and.    r0, r0, r7
-       beq     32f
-       lhz     r3, PACAPACAINDEX(r13)
-       bl      smp_send_reschedule
-       nop
-32:
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
-#endif /* CONFIG_SMP */
-
        /* Jump to partition switch code */
        bl      kvmppc_hv_entry_trampoline
        nop
index d562c8e..60081bd 100644 (file)
@@ -138,8 +138,5 @@ out:
 
 long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu)
 {
-       if (cpu_has_feature(CPU_FTR_ARCH_206))
-               return kvmppc_realmode_mc_power7(vcpu);
-
-       return 0;
+       return kvmppc_realmode_mc_power7(vcpu);
 }
index 084ad54..510bdfb 100644 (file)
@@ -45,16 +45,12 @@ static int global_invalidates(struct kvm *kvm, unsigned long flags)
         * as indicated by local_paca->kvm_hstate.kvm_vcpu being set,
         * we can use tlbiel as long as we mark all other physical
         * cores as potentially having stale TLB entries for this lpid.
-        * If we're not using MMU notifiers, we never take pages away
-        * from the guest, so we can use tlbiel if requested.
         * Otherwise, don't use tlbiel.
         */
        if (kvm->arch.online_vcores == 1 && local_paca->kvm_hstate.kvm_vcpu)
                global = 0;
-       else if (kvm->arch.using_mmu_notifiers)
-               global = 1;
        else
-               global = !(flags & H_LOCAL);
+               global = 1;
 
        if (!global) {
                /* any other core might now have stale TLB entries... */
@@ -170,7 +166,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
        struct revmap_entry *rev;
        unsigned long g_ptel;
        struct kvm_memory_slot *memslot;
-       unsigned long *physp, pte_size;
+       unsigned long pte_size;
        unsigned long is_io;
        unsigned long *rmap;
        pte_t pte;
@@ -198,9 +194,6 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
        is_io = ~0ul;
        rmap = NULL;
        if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) {
-               /* PPC970 can't do emulated MMIO */
-               if (!cpu_has_feature(CPU_FTR_ARCH_206))
-                       return H_PARAMETER;
                /* Emulated MMIO - mark this with key=31 */
                pteh |= HPTE_V_ABSENT;
                ptel |= HPTE_R_KEY_HI | HPTE_R_KEY_LO;
@@ -213,37 +206,20 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
        slot_fn = gfn - memslot->base_gfn;
        rmap = &memslot->arch.rmap[slot_fn];
 
-       if (!kvm->arch.using_mmu_notifiers) {
-               physp = memslot->arch.slot_phys;
-               if (!physp)
-                       return H_PARAMETER;
-               physp += slot_fn;
-               if (realmode)
-                       physp = real_vmalloc_addr(physp);
-               pa = *physp;
-               if (!pa)
-                       return H_TOO_HARD;
-               is_io = pa & (HPTE_R_I | HPTE_R_W);
-               pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK);
-               pa &= PAGE_MASK;
+       /* Translate to host virtual address */
+       hva = __gfn_to_hva_memslot(memslot, gfn);
+
+       /* Look up the Linux PTE for the backing page */
+       pte_size = psize;
+       pte = lookup_linux_pte_and_update(pgdir, hva, writing, &pte_size);
+       if (pte_present(pte) && !pte_numa(pte)) {
+               if (writing && !pte_write(pte))
+                       /* make the actual HPTE be read-only */
+                       ptel = hpte_make_readonly(ptel);
+               is_io = hpte_cache_bits(pte_val(pte));
+               pa = pte_pfn(pte) << PAGE_SHIFT;
+               pa |= hva & (pte_size - 1);
                pa |= gpa & ~PAGE_MASK;
-       } else {
-               /* Translate to host virtual address */
-               hva = __gfn_to_hva_memslot(memslot, gfn);
-
-               /* Look up the Linux PTE for the backing page */
-               pte_size = psize;
-               pte = lookup_linux_pte_and_update(pgdir, hva, writing,
-                                                 &pte_size);
-               if (pte_present(pte) && !pte_numa(pte)) {
-                       if (writing && !pte_write(pte))
-                               /* make the actual HPTE be read-only */
-                               ptel = hpte_make_readonly(ptel);
-                       is_io = hpte_cache_bits(pte_val(pte));
-                       pa = pte_pfn(pte) << PAGE_SHIFT;
-                       pa |= hva & (pte_size - 1);
-                       pa |= gpa & ~PAGE_MASK;
-               }
        }
 
        if (pte_size < psize)
@@ -337,8 +313,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
                        rmap = real_vmalloc_addr(rmap);
                lock_rmap(rmap);
                /* Check for pending invalidations under the rmap chain lock */
-               if (kvm->arch.using_mmu_notifiers &&
-                   mmu_notifier_retry(kvm, mmu_seq)) {
+               if (mmu_notifier_retry(kvm, mmu_seq)) {
                        /* inval in progress, write a non-present HPTE */
                        pteh |= HPTE_V_ABSENT;
                        pteh &= ~HPTE_V_VALID;
@@ -395,61 +370,11 @@ static inline int try_lock_tlbie(unsigned int *lock)
        return old == 0;
 }
 
-/*
- * tlbie/tlbiel is a bit different on the PPC970 compared to later
- * processors such as POWER7; the large page bit is in the instruction
- * not RB, and the top 16 bits and the bottom 12 bits of the VA
- * in RB must be 0.
- */
-static void do_tlbies_970(struct kvm *kvm, unsigned long *rbvalues,
-                         long npages, int global, bool need_sync)
-{
-       long i;
-
-       if (global) {
-               while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
-                       cpu_relax();
-               if (need_sync)
-                       asm volatile("ptesync" : : : "memory");
-               for (i = 0; i < npages; ++i) {
-                       unsigned long rb = rbvalues[i];
-
-                       if (rb & 1)             /* large page */
-                               asm volatile("tlbie %0,1" : :
-                                            "r" (rb & 0x0000fffffffff000ul));
-                       else
-                               asm volatile("tlbie %0,0" : :
-                                            "r" (rb & 0x0000fffffffff000ul));
-               }
-               asm volatile("eieio; tlbsync; ptesync" : : : "memory");
-               kvm->arch.tlbie_lock = 0;
-       } else {
-               if (need_sync)
-                       asm volatile("ptesync" : : : "memory");
-               for (i = 0; i < npages; ++i) {
-                       unsigned long rb = rbvalues[i];
-
-                       if (rb & 1)             /* large page */
-                               asm volatile("tlbiel %0,1" : :
-                                            "r" (rb & 0x0000fffffffff000ul));
-                       else
-                               asm volatile("tlbiel %0,0" : :
-                                            "r" (rb & 0x0000fffffffff000ul));
-               }
-               asm volatile("ptesync" : : : "memory");
-       }
-}
-
 static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
                      long npages, int global, bool need_sync)
 {
        long i;
 
-       if (cpu_has_feature(CPU_FTR_ARCH_201)) {
-               /* PPC970 tlbie instruction is a bit different */
-               do_tlbies_970(kvm, rbvalues, npages, global, need_sync);
-               return;
-       }
        if (global) {
                while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
                        cpu_relax();
@@ -667,40 +592,29 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
                rev->guest_rpte = r;
                note_hpte_modification(kvm, rev);
        }
-       r = (be64_to_cpu(hpte[1]) & ~mask) | bits;
 
        /* Update HPTE */
        if (v & HPTE_V_VALID) {
-               rb = compute_tlbie_rb(v, r, pte_index);
-               hpte[0] = cpu_to_be64(v & ~HPTE_V_VALID);
-               do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true);
                /*
-                * If the host has this page as readonly but the guest
-                * wants to make it read/write, reduce the permissions.
-                * Checking the host permissions involves finding the
-                * memslot and then the Linux PTE for the page.
+                * If the page is valid, don't let it transition from
+                * readonly to writable.  If it should be writable, we'll
+                * take a trap and let the page fault code sort it out.
                 */
-               if (hpte_is_writable(r) && kvm->arch.using_mmu_notifiers) {
-                       unsigned long psize, gfn, hva;
-                       struct kvm_memory_slot *memslot;
-                       pgd_t *pgdir = vcpu->arch.pgdir;
-                       pte_t pte;
-
-                       psize = hpte_page_size(v, r);
-                       gfn = ((r & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
-                       memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
-                       if (memslot) {
-                               hva = __gfn_to_hva_memslot(memslot, gfn);
-                               pte = lookup_linux_pte_and_update(pgdir, hva,
-                                                                 1, &psize);
-                               if (pte_present(pte) && !pte_write(pte))
-                                       r = hpte_make_readonly(r);
-                       }
+               pte = be64_to_cpu(hpte[1]);
+               r = (pte & ~mask) | bits;
+               if (hpte_is_writable(r) && !hpte_is_writable(pte))
+                       r = hpte_make_readonly(r);
+               /* If the PTE is changing, invalidate it first */
+               if (r != pte) {
+                       rb = compute_tlbie_rb(v, r, pte_index);
+                       hpte[0] = cpu_to_be64((v & ~HPTE_V_VALID) |
+                                             HPTE_V_ABSENT);
+                       do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags),
+                                 true);
+                       hpte[1] = cpu_to_be64(r);
                }
        }
-       hpte[1] = cpu_to_be64(r);
-       eieio();
-       hpte[0] = cpu_to_be64(v & ~HPTE_V_HVLOCK);
+       unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
        asm volatile("ptesync" : : : "memory");
        return H_SUCCESS;
 }
index 3ee38e6..7b066f6 100644 (file)
@@ -183,8 +183,10 @@ static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
         * state update in HW (ie bus transactions) so we can handle them
         * separately here as well.
         */
-       if (resend)
+       if (resend) {
                icp->rm_action |= XICS_RM_CHECK_RESEND;
+               icp->rm_resend_icp = icp;
+       }
 }
 
 
@@ -254,10 +256,25 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
         * nothing needs to be done as there can be no XISR to
         * reject.
         *
+        * ICP state: Check_IPI
+        *
         * If the CPPR is less favored, then we might be replacing
-        * an interrupt, and thus need to possibly reject it as in
+        * an interrupt, and thus need to possibly reject it.
         *
-        * ICP state: Check_IPI
+        * ICP State: IPI
+        *
+        * Besides rejecting any pending interrupts, we also
+        * update XISR and pending_pri to mark IPI as pending.
+        *
+        * PAPR does not describe this state, but if the MFRR is being
+        * made less favored than its earlier value, there might be
+        * a previously-rejected interrupt needing to be resent.
+        * Ideally, we would want to resend only if
+        *      prio(pending_interrupt) < mfrr &&
+        *      prio(pending_interrupt) < cppr
+        * where pending interrupt is the one that was rejected. But
+        * we don't have that state, so we simply trigger a resend
+        * whenever the MFRR is made less favored.
         */
        do {
                old_state = new_state = ACCESS_ONCE(icp->state);
@@ -270,13 +287,14 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
                resend = false;
                if (mfrr < new_state.cppr) {
                        /* Reject a pending interrupt if not an IPI */
-                       if (mfrr <= new_state.pending_pri)
+                       if (mfrr <= new_state.pending_pri) {
                                reject = new_state.xisr;
-                       new_state.pending_pri = mfrr;
-                       new_state.xisr = XICS_IPI;
+                               new_state.pending_pri = mfrr;
+                               new_state.xisr = XICS_IPI;
+                       }
                }
 
-               if (mfrr > old_state.mfrr && mfrr > new_state.cppr) {
+               if (mfrr > old_state.mfrr) {
                        resend = new_state.need_resend;
                        new_state.need_resend = 0;
                }
@@ -289,8 +307,10 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
        }
 
        /* Pass resends to virtual mode */
-       if (resend)
+       if (resend) {
                this_icp->rm_action |= XICS_RM_CHECK_RESEND;
+               this_icp->rm_resend_icp = icp;
+       }
 
        return check_too_hard(xics, this_icp);
 }
index 65c105b..10554df 100644 (file)
@@ -94,20 +94,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
        lwz     r6, HSTATE_PMC + 12(r13)
        lwz     r8, HSTATE_PMC + 16(r13)
        lwz     r9, HSTATE_PMC + 20(r13)
-BEGIN_FTR_SECTION
-       lwz     r10, HSTATE_PMC + 24(r13)
-       lwz     r11, HSTATE_PMC + 28(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
        mtspr   SPRN_PMC1, r3
        mtspr   SPRN_PMC2, r4
        mtspr   SPRN_PMC3, r5
        mtspr   SPRN_PMC4, r6
        mtspr   SPRN_PMC5, r8
        mtspr   SPRN_PMC6, r9
-BEGIN_FTR_SECTION
-       mtspr   SPRN_PMC7, r10
-       mtspr   SPRN_PMC8, r11
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
        ld      r3, HSTATE_MMCR(r13)
        ld      r4, HSTATE_MMCR + 8(r13)
        ld      r5, HSTATE_MMCR + 16(r13)
@@ -153,11 +145,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
        cmpwi   cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK
        cmpwi   r12, BOOK3S_INTERRUPT_EXTERNAL
-BEGIN_FTR_SECTION
        beq     11f
        cmpwi   cr2, r12, BOOK3S_INTERRUPT_HMI
        beq     cr2, 14f                        /* HMI check */
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
        /* RFI into the highmem handler, or branch to interrupt handler */
        mfmsr   r6
@@ -166,7 +156,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
        mtmsrd  r6, 1                   /* Clear RI in MSR */
        mtsrr0  r8
        mtsrr1  r7
-       beqa    0x500                   /* external interrupt (PPC970) */
        beq     cr1, 13f                /* machine check */
        RFI
 
@@ -393,11 +382,8 @@ kvmppc_hv_entry:
        slbia
        ptesync
 
-BEGIN_FTR_SECTION
-       b       30f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
        /*
-        * POWER7 host -> guest partition switch code.
+        * POWER7/POWER8 host -> guest partition switch code.
         * We don't have to lock against concurrent tlbies,
         * but we do have to coordinate across hardware threads.
         */
@@ -505,97 +491,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
        cmpwi   r3,512          /* 1 microsecond */
        li      r12,BOOK3S_INTERRUPT_HV_DECREMENTER
        blt     hdec_soon
-       b       31f
-
-       /*
-        * PPC970 host -> guest partition switch code.
-        * We have to lock against concurrent tlbies,
-        * using native_tlbie_lock to lock against host tlbies
-        * and kvm->arch.tlbie_lock to lock against guest tlbies.
-        * We also have to invalidate the TLB since its
-        * entries aren't tagged with the LPID.
-        */
-30:    ld      r5,HSTATE_KVM_VCORE(r13)
-       ld      r9,VCORE_KVM(r5)        /* pointer to struct kvm */
-
-       /* first take native_tlbie_lock */
-       .section ".toc","aw"
-toc_tlbie_lock:
-       .tc     native_tlbie_lock[TC],native_tlbie_lock
-       .previous
-       ld      r3,toc_tlbie_lock@toc(r2)
-#ifdef __BIG_ENDIAN__
-       lwz     r8,PACA_LOCK_TOKEN(r13)
-#else
-       lwz     r8,PACAPACAINDEX(r13)
-#endif
-24:    lwarx   r0,0,r3
-       cmpwi   r0,0
-       bne     24b
-       stwcx.  r8,0,r3
-       bne     24b
-       isync
-
-       ld      r5,HSTATE_KVM_VCORE(r13)
-       ld      r7,VCORE_LPCR(r5)       /* use vcore->lpcr to store HID4 */
-       li      r0,0x18f
-       rotldi  r0,r0,HID4_LPID5_SH     /* all lpid bits in HID4 = 1 */
-       or      r0,r7,r0
-       ptesync
-       sync
-       mtspr   SPRN_HID4,r0            /* switch to reserved LPID */
-       isync
-       li      r0,0
-       stw     r0,0(r3)                /* drop native_tlbie_lock */
-
-       /* invalidate the whole TLB */
-       li      r0,256
-       mtctr   r0
-       li      r6,0
-25:    tlbiel  r6
-       addi    r6,r6,0x1000
-       bdnz    25b
-       ptesync
 
-       /* Take the guest's tlbie_lock */
-       addi    r3,r9,KVM_TLBIE_LOCK
-24:    lwarx   r0,0,r3
-       cmpwi   r0,0
-       bne     24b
-       stwcx.  r8,0,r3
-       bne     24b
-       isync
-       ld      r6,KVM_SDR1(r9)
-       mtspr   SPRN_SDR1,r6            /* switch to partition page table */
-
-       /* Set up HID4 with the guest's LPID etc. */
-       sync
-       mtspr   SPRN_HID4,r7
-       isync
-
-       /* drop the guest's tlbie_lock */
-       li      r0,0
-       stw     r0,0(r3)
-
-       /* Check if HDEC expires soon */
-       mfspr   r3,SPRN_HDEC
-       cmpwi   r3,10
-       li      r12,BOOK3S_INTERRUPT_HV_DECREMENTER
-       blt     hdec_soon
-
-       /* Enable HDEC interrupts */
-       mfspr   r0,SPRN_HID0
-       li      r3,1
-       rldimi  r0,r3, HID0_HDICE_SH, 64-HID0_HDICE_SH-1
-       sync
-       mtspr   SPRN_HID0,r0
-       mfspr   r0,SPRN_HID0
-       mfspr   r0,SPRN_HID0
-       mfspr   r0,SPRN_HID0
-       mfspr   r0,SPRN_HID0
-       mfspr   r0,SPRN_HID0
-       mfspr   r0,SPRN_HID0
-31:
        /* Do we have a guest vcpu to run? */
        cmpdi   r4, 0
        beq     kvmppc_primary_no_guest
@@ -625,7 +521,6 @@ kvmppc_got_guest:
        stb     r6, VCPU_VPA_DIRTY(r4)
 25:
 
-BEGIN_FTR_SECTION
        /* Save purr/spurr */
        mfspr   r5,SPRN_PURR
        mfspr   r6,SPRN_SPURR
@@ -635,7 +530,6 @@ BEGIN_FTR_SECTION
        ld      r8,VCPU_SPURR(r4)
        mtspr   SPRN_PURR,r7
        mtspr   SPRN_SPURR,r8
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
 BEGIN_FTR_SECTION
        /* Set partition DABR */
@@ -644,9 +538,7 @@ BEGIN_FTR_SECTION
        ld      r6,VCPU_DABR(r4)
        mtspr   SPRN_DABRX,r5
        mtspr   SPRN_DABR,r6
- BEGIN_FTR_SECTION_NESTED(89)
        isync
- END_FTR_SECTION_NESTED(CPU_FTR_ARCH_206, CPU_FTR_ARCH_206, 89)
 END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -777,20 +669,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
        lwz     r7, VCPU_PMC + 12(r4)
        lwz     r8, VCPU_PMC + 16(r4)
        lwz     r9, VCPU_PMC + 20(r4)
-BEGIN_FTR_SECTION
-       lwz     r10, VCPU_PMC + 24(r4)
-       lwz     r11, VCPU_PMC + 28(r4)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
        mtspr   SPRN_PMC1, r3
        mtspr   SPRN_PMC2, r5
        mtspr   SPRN_PMC3, r6
        mtspr   SPRN_PMC4, r7
        mtspr   SPRN_PMC5, r8
        mtspr   SPRN_PMC6, r9
-BEGIN_FTR_SECTION
-       mtspr   SPRN_PMC7, r10
-       mtspr   SPRN_PMC8, r11
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
        ld      r3, VCPU_MMCR(r4)
        ld      r5, VCPU_MMCR + 8(r4)
        ld      r6, VCPU_MMCR + 16(r4)
@@ -837,14 +721,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
        ld      r30, VCPU_GPR(R30)(r4)
        ld      r31, VCPU_GPR(R31)(r4)
 
-BEGIN_FTR_SECTION
        /* Switch DSCR to guest value */
        ld      r5, VCPU_DSCR(r4)
        mtspr   SPRN_DSCR, r5
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
 BEGIN_FTR_SECTION
-       /* Skip next section on POWER7 or PPC970 */
+       /* Skip next section on POWER7 */
        b       8f
 END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
        /* Turn on TM so we can access TFHAR/TFIAR/TEXASR */
@@ -920,7 +802,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
        mtspr   SPRN_DAR, r5
        mtspr   SPRN_DSISR, r6
 
-BEGIN_FTR_SECTION
        /* Restore AMR and UAMOR, set AMOR to all 1s */
        ld      r5,VCPU_AMR(r4)
        ld      r6,VCPU_UAMOR(r4)
@@ -928,7 +809,6 @@ BEGIN_FTR_SECTION
        mtspr   SPRN_AMR,r5
        mtspr   SPRN_UAMOR,r6
        mtspr   SPRN_AMOR,r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
        /* Restore state of CTRL run bit; assume 1 on entry */
        lwz     r5,VCPU_CTRL(r4)
@@ -963,13 +843,11 @@ deliver_guest_interrupt:
        rldicl  r0, r0, 64 - BOOK3S_IRQPRIO_EXTERNAL_LEVEL, 63
        cmpdi   cr1, r0, 0
        andi.   r8, r11, MSR_EE
-BEGIN_FTR_SECTION
        mfspr   r8, SPRN_LPCR
        /* Insert EXTERNAL_LEVEL bit into LPCR at the MER bit position */
        rldimi  r8, r0, LPCR_MER_SH, 63 - LPCR_MER_SH
        mtspr   SPRN_LPCR, r8
        isync
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
        beq     5f
        li      r0, BOOK3S_INTERRUPT_EXTERNAL
        bne     cr1, 12f
@@ -1124,15 +1002,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 
        stw     r12,VCPU_TRAP(r9)
 
-       /* Save HEIR (HV emulation assist reg) in last_inst
+       /* Save HEIR (HV emulation assist reg) in emul_inst
           if this is an HEI (HV emulation interrupt, e40) */
        li      r3,KVM_INST_FETCH_FAILED
-BEGIN_FTR_SECTION
        cmpwi   r12,BOOK3S_INTERRUPT_H_EMUL_ASSIST
        bne     11f
        mfspr   r3,SPRN_HEIR
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
-11:    stw     r3,VCPU_LAST_INST(r9)
+11:    stw     r3,VCPU_HEIR(r9)
 
        /* these are volatile across C function calls */
        mfctr   r3
@@ -1140,13 +1016,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
        std     r3, VCPU_CTR(r9)
        stw     r4, VCPU_XER(r9)
 
-BEGIN_FTR_SECTION
        /* If this is a page table miss then see if it's theirs or ours */
        cmpwi   r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
        beq     kvmppc_hdsi
        cmpwi   r12, BOOK3S_INTERRUPT_H_INST_STORAGE
        beq     kvmppc_hisi
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
        /* See if this is a leftover HDEC interrupt */
        cmpwi   r12,BOOK3S_INTERRUPT_HV_DECREMENTER
@@ -1159,11 +1033,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
        cmpwi   r12,BOOK3S_INTERRUPT_SYSCALL
        beq     hcall_try_real_mode
 
-       /* Only handle external interrupts here on arch 206 and later */
-BEGIN_FTR_SECTION
-       b       ext_interrupt_to_host
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
-
        /* External interrupt ? */
        cmpwi   r12, BOOK3S_INTERRUPT_EXTERNAL
        bne+    ext_interrupt_to_host
@@ -1193,11 +1062,9 @@ guest_exit_cont:         /* r9 = vcpu, r12 = trap, r13 = paca */
        mfdsisr r7
        std     r6, VCPU_DAR(r9)
        stw     r7, VCPU_DSISR(r9)
-BEGIN_FTR_SECTION
        /* don't overwrite fault_dar/fault_dsisr if HDSI */
        cmpwi   r12,BOOK3S_INTERRUPT_H_DATA_STORAGE
        beq     6f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
        std     r6, VCPU_FAULT_DAR(r9)
        stw     r7, VCPU_FAULT_DSISR(r9)
 
@@ -1236,7 +1103,6 @@ mc_cont:
        /*
         * Save the guest PURR/SPURR
         */
-BEGIN_FTR_SECTION
        mfspr   r5,SPRN_PURR
        mfspr   r6,SPRN_SPURR
        ld      r7,VCPU_PURR(r9)
@@ -1256,7 +1122,6 @@ BEGIN_FTR_SECTION
        add     r4,r4,r6
        mtspr   SPRN_PURR,r3
        mtspr   SPRN_SPURR,r4
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_201)
 
        /* Save DEC */
        mfspr   r5,SPRN_DEC
@@ -1306,22 +1171,18 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 8:
 
        /* Save and reset AMR and UAMOR before turning on the MMU */
-BEGIN_FTR_SECTION
        mfspr   r5,SPRN_AMR
        mfspr   r6,SPRN_UAMOR
        std     r5,VCPU_AMR(r9)
        std     r6,VCPU_UAMOR(r9)
        li      r6,0
        mtspr   SPRN_AMR,r6
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
        /* Switch DSCR back to host value */
-BEGIN_FTR_SECTION
        mfspr   r8, SPRN_DSCR
        ld      r7, HSTATE_DSCR(r13)
        std     r8, VCPU_DSCR(r9)
        mtspr   SPRN_DSCR, r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
        /* Save non-volatile GPRs */
        std     r14, VCPU_GPR(R14)(r9)
@@ -1503,11 +1364,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
        mfspr   r4, SPRN_MMCR0          /* save MMCR0 */
        mtspr   SPRN_MMCR0, r3          /* freeze all counters, disable ints */
        mfspr   r6, SPRN_MMCRA
-BEGIN_FTR_SECTION
-       /* On P7, clear MMCRA in order to disable SDAR updates */
+       /* Clear MMCRA in order to disable SDAR updates */
        li      r7, 0
        mtspr   SPRN_MMCRA, r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
        isync
        beq     21f                     /* if no VPA, save PMU stuff anyway */
        lbz     r7, LPPACA_PMCINUSE(r8)
@@ -1532,20 +1391,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
        mfspr   r6, SPRN_PMC4
        mfspr   r7, SPRN_PMC5
        mfspr   r8, SPRN_PMC6
-BEGIN_FTR_SECTION
-       mfspr   r10, SPRN_PMC7
-       mfspr   r11, SPRN_PMC8
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
        stw     r3, VCPU_PMC(r9)
        stw     r4, VCPU_PMC + 4(r9)
        stw     r5, VCPU_PMC + 8(r9)
        stw     r6, VCPU_PMC + 12(r9)
        stw     r7, VCPU_PMC + 16(r9)
        stw     r8, VCPU_PMC + 20(r9)
-BEGIN_FTR_SECTION
-       stw     r10, VCPU_PMC + 24(r9)
-       stw     r11, VCPU_PMC + 28(r9)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 BEGIN_FTR_SECTION
        mfspr   r5, SPRN_SIER
        mfspr   r6, SPRN_SPMC1
@@ -1566,11 +1417,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
        ptesync
 
 hdec_soon:                     /* r12 = trap, r13 = paca */
-BEGIN_FTR_SECTION
-       b       32f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
        /*
-        * POWER7 guest -> host partition switch code.
+        * POWER7/POWER8 guest -> host partition switch code.
         * We don't have to lock against tlbies but we do
         * have to coordinate the hardware threads.
         */
@@ -1698,87 +1546,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 16:    ld      r8,KVM_HOST_LPCR(r4)
        mtspr   SPRN_LPCR,r8
        isync
-       b       33f
-
-       /*
-        * PPC970 guest -> host partition switch code.
-        * We have to lock against concurrent tlbies, and
-        * we have to flush the whole TLB.
-        */
-32:    ld      r5,HSTATE_KVM_VCORE(r13)
-       ld      r4,VCORE_KVM(r5)        /* pointer to struct kvm */
-
-       /* Take the guest's tlbie_lock */
-#ifdef __BIG_ENDIAN__
-       lwz     r8,PACA_LOCK_TOKEN(r13)
-#else
-       lwz     r8,PACAPACAINDEX(r13)
-#endif
-       addi    r3,r4,KVM_TLBIE_LOCK
-24:    lwarx   r0,0,r3
-       cmpwi   r0,0
-       bne     24b
-       stwcx.  r8,0,r3
-       bne     24b
-       isync
-
-       ld      r7,KVM_HOST_LPCR(r4)    /* use kvm->arch.host_lpcr for HID4 */
-       li      r0,0x18f
-       rotldi  r0,r0,HID4_LPID5_SH     /* all lpid bits in HID4 = 1 */
-       or      r0,r7,r0
-       ptesync
-       sync
-       mtspr   SPRN_HID4,r0            /* switch to reserved LPID */
-       isync
-       li      r0,0
-       stw     r0,0(r3)                /* drop guest tlbie_lock */
-
-       /* invalidate the whole TLB */
-       li      r0,256
-       mtctr   r0
-       li      r6,0
-25:    tlbiel  r6
-       addi    r6,r6,0x1000
-       bdnz    25b
-       ptesync
-
-       /* take native_tlbie_lock */
-       ld      r3,toc_tlbie_lock@toc(2)
-24:    lwarx   r0,0,r3
-       cmpwi   r0,0
-       bne     24b
-       stwcx.  r8,0,r3
-       bne     24b
-       isync
-
-       ld      r6,KVM_HOST_SDR1(r4)
-       mtspr   SPRN_SDR1,r6            /* switch to host page table */
-
-       /* Set up host HID4 value */
-       sync
-       mtspr   SPRN_HID4,r7
-       isync
-       li      r0,0
-       stw     r0,0(r3)                /* drop native_tlbie_lock */
-
-       lis     r8,0x7fff               /* MAX_INT@h */
-       mtspr   SPRN_HDEC,r8
-
-       /* Disable HDEC interrupts */
-       mfspr   r0,SPRN_HID0
-       li      r3,0
-       rldimi  r0,r3, HID0_HDICE_SH, 64-HID0_HDICE_SH-1
-       sync
-       mtspr   SPRN_HID0,r0
-       mfspr   r0,SPRN_HID0
-       mfspr   r0,SPRN_HID0
-       mfspr   r0,SPRN_HID0
-       mfspr   r0,SPRN_HID0
-       mfspr   r0,SPRN_HID0
-       mfspr   r0,SPRN_HID0
 
        /* load host SLB entries */
-33:    ld      r8,PACA_SLBSHADOWPTR(r13)
+       ld      r8,PACA_SLBSHADOWPTR(r13)
 
        .rept   SLB_NUM_BOLTED
        li      r3, SLBSHADOW_SAVEAREA
@@ -2047,7 +1817,7 @@ hcall_real_table:
        .long   0               /* 0xd8 */
        .long   0               /* 0xdc */
        .long   DOTSYM(kvmppc_h_cede) - hcall_real_table
-       .long   0               /* 0xe4 */
+       .long   DOTSYM(kvmppc_rm_h_confer) - hcall_real_table
        .long   0               /* 0xe8 */
        .long   0               /* 0xec */
        .long   0               /* 0xf0 */
@@ -2126,9 +1896,6 @@ _GLOBAL(kvmppc_h_cede)
        stw     r0,VCPU_TRAP(r3)
        li      r0,H_SUCCESS
        std     r0,VCPU_GPR(R3)(r3)
-BEGIN_FTR_SECTION
-       b       kvm_cede_exit   /* just send it up to host on 970 */
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
 
        /*
         * Set our bit in the bitmask of napping threads unless all the
@@ -2455,7 +2222,6 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 #endif
        mtmsrd  r8
-       isync
        addi    r3,r3,VCPU_FPRS
        bl      store_fp_state
 #ifdef CONFIG_ALTIVEC
@@ -2491,7 +2257,6 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 #endif
        mtmsrd  r8
-       isync
        addi    r3,r4,VCPU_FPRS
        bl      load_fp_state
 #ifdef CONFIG_ALTIVEC
index bfb8035..bd6ab16 100644 (file)
@@ -352,14 +352,6 @@ static inline u32 inst_get_field(u32 inst, int msb, int lsb)
        return kvmppc_get_field(inst, msb + 32, lsb + 32);
 }
 
-/*
- * Replaces inst bits with ordering according to spec.
- */
-static inline u32 inst_set_field(u32 inst, int msb, int lsb, int value)
-{
-       return kvmppc_set_field(inst, msb + 32, lsb + 32, value);
-}
-
 bool kvmppc_inst_is_paired_single(struct kvm_vcpu *vcpu, u32 inst)
 {
        if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE))
index cf2eb16..f573839 100644 (file)
@@ -644,11 +644,6 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
        return r;
 }
 
-static inline int get_fpr_index(int i)
-{
-       return i * TS_FPRWIDTH;
-}
-
 /* Give up external provider (FPU, Altivec, VSX) */
 void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
 {
index eaeb780..807351f 100644 (file)
@@ -613,10 +613,25 @@ static noinline int kvmppc_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
         * there might be a previously-rejected interrupt needing
         * to be resent.
         *
+        * ICP state: Check_IPI
+        *
         * If the CPPR is less favored, then we might be replacing
-        * an interrupt, and thus need to possibly reject it as in
+        * an interrupt, and thus need to possibly reject it.
         *
-        * ICP state: Check_IPI
+        * ICP State: IPI
+        *
+        * Besides rejecting any pending interrupts, we also
+        * update XISR and pending_pri to mark IPI as pending.
+        *
+        * PAPR does not describe this state, but if the MFRR is being
+        * made less favored than its earlier value, there might be
+        * a previously-rejected interrupt needing to be resent.
+        * Ideally, we would want to resend only if
+        *      prio(pending_interrupt) < mfrr &&
+        *      prio(pending_interrupt) < cppr
+        * where pending interrupt is the one that was rejected. But
+        * we don't have that state, so we simply trigger a resend
+        * whenever the MFRR is made less favored.
         */
        do {
                old_state = new_state = ACCESS_ONCE(icp->state);
@@ -629,13 +644,14 @@ static noinline int kvmppc_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
                resend = false;
                if (mfrr < new_state.cppr) {
                        /* Reject a pending interrupt if not an IPI */
-                       if (mfrr <= new_state.pending_pri)
+                       if (mfrr <= new_state.pending_pri) {
                                reject = new_state.xisr;
-                       new_state.pending_pri = mfrr;
-                       new_state.xisr = XICS_IPI;
+                               new_state.pending_pri = mfrr;
+                               new_state.xisr = XICS_IPI;
+                       }
                }
 
-               if (mfrr > old_state.mfrr && mfrr > new_state.cppr) {
+               if (mfrr > old_state.mfrr) {
                        resend = new_state.need_resend;
                        new_state.need_resend = 0;
                }
@@ -789,7 +805,7 @@ static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
        if (icp->rm_action & XICS_RM_KICK_VCPU)
                kvmppc_fast_vcpu_kick(icp->rm_kick_target);
        if (icp->rm_action & XICS_RM_CHECK_RESEND)
-               icp_check_resend(xics, icp);
+               icp_check_resend(xics, icp->rm_resend_icp);
        if (icp->rm_action & XICS_RM_REJECT)
                icp_deliver_irq(xics, icp, icp->rm_reject);
        if (icp->rm_action & XICS_RM_NOTIFY_EOI)
index e8aaa7a..73f0f27 100644 (file)
@@ -74,6 +74,7 @@ struct kvmppc_icp {
 #define XICS_RM_NOTIFY_EOI     0x8
        u32 rm_action;
        struct kvm_vcpu *rm_kick_target;
+       struct kvmppc_icp *rm_resend_icp;
        u32  rm_reject;
        u32  rm_eoied_irq;
 
index e1cb588..b29ce75 100644 (file)
@@ -299,14 +299,6 @@ void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr)
        kvmppc_e500_recalc_shadow_pid(to_e500(vcpu));
 }
 
-void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
-{
-}
-
-void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
-{
-}
-
 static void kvmppc_core_vcpu_load_e500(struct kvm_vcpu *vcpu, int cpu)
 {
        kvmppc_booke_vcpu_load(vcpu, cpu);
index c1f8f53..c45eaab 100644 (file)
@@ -527,18 +527,12 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
                        r = 0;
                break;
        case KVM_CAP_PPC_RMA:
-               r = hv_enabled;
-               /* PPC970 requires an RMA */
-               if (r && cpu_has_feature(CPU_FTR_ARCH_201))
-                       r = 2;
+               r = 0;
                break;
 #endif
        case KVM_CAP_SYNC_MMU:
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-               if (hv_enabled)
-                       r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0;
-               else
-                       r = 0;
+               r = hv_enabled;
 #elif defined(KVM_ARCH_WANT_MMU_NOTIFIER)
                r = 1;
 #else
diff --git a/arch/powerpc/kvm/trace_book3s.h b/arch/powerpc/kvm/trace_book3s.h
new file mode 100644 (file)
index 0000000..f647ce0
--- /dev/null
@@ -0,0 +1,32 @@
+#if !defined(_TRACE_KVM_BOOK3S_H)
+#define _TRACE_KVM_BOOK3S_H
+
+/*
+ * Common defines used by the trace macros in trace_pr.h and trace_hv.h
+ */
+
+#define kvm_trace_symbol_exit \
+       {0x100, "SYSTEM_RESET"}, \
+       {0x200, "MACHINE_CHECK"}, \
+       {0x300, "DATA_STORAGE"}, \
+       {0x380, "DATA_SEGMENT"}, \
+       {0x400, "INST_STORAGE"}, \
+       {0x480, "INST_SEGMENT"}, \
+       {0x500, "EXTERNAL"}, \
+       {0x501, "EXTERNAL_LEVEL"}, \
+       {0x502, "EXTERNAL_HV"}, \
+       {0x600, "ALIGNMENT"}, \
+       {0x700, "PROGRAM"}, \
+       {0x800, "FP_UNAVAIL"}, \
+       {0x900, "DECREMENTER"}, \
+       {0x980, "HV_DECREMENTER"}, \
+       {0xc00, "SYSCALL"}, \
+       {0xd00, "TRACE"}, \
+       {0xe00, "H_DATA_STORAGE"}, \
+       {0xe20, "H_INST_STORAGE"}, \
+       {0xe40, "H_EMUL_ASSIST"}, \
+       {0xf00, "PERFMON"}, \
+       {0xf20, "ALTIVEC"}, \
+       {0xf40, "VSX"}
+
+#endif
index f7537cf..7ec534d 100644 (file)
@@ -151,6 +151,47 @@ TRACE_EVENT(kvm_booke206_ref_release,
                __entry->pfn, __entry->flags)
 );
 
+#ifdef CONFIG_SPE_POSSIBLE
+#define kvm_trace_symbol_irqprio_spe \
+       {BOOKE_IRQPRIO_SPE_UNAVAIL, "SPE_UNAVAIL"}, \
+       {BOOKE_IRQPRIO_SPE_FP_DATA, "SPE_FP_DATA"}, \
+       {BOOKE_IRQPRIO_SPE_FP_ROUND, "SPE_FP_ROUND"},
+#else
+#define kvm_trace_symbol_irqprio_spe
+#endif
+
+#ifdef CONFIG_PPC_E500MC
+#define kvm_trace_symbol_irqprio_e500mc \
+       {BOOKE_IRQPRIO_ALTIVEC_UNAVAIL, "ALTIVEC_UNAVAIL"}, \
+       {BOOKE_IRQPRIO_ALTIVEC_ASSIST, "ALTIVEC_ASSIST"},
+#else
+#define kvm_trace_symbol_irqprio_e500mc
+#endif
+
+#define kvm_trace_symbol_irqprio \
+       kvm_trace_symbol_irqprio_spe \
+       kvm_trace_symbol_irqprio_e500mc \
+       {BOOKE_IRQPRIO_DATA_STORAGE, "DATA_STORAGE"}, \
+       {BOOKE_IRQPRIO_INST_STORAGE, "INST_STORAGE"}, \
+       {BOOKE_IRQPRIO_ALIGNMENT, "ALIGNMENT"}, \
+       {BOOKE_IRQPRIO_PROGRAM, "PROGRAM"}, \
+       {BOOKE_IRQPRIO_FP_UNAVAIL, "FP_UNAVAIL"}, \
+       {BOOKE_IRQPRIO_SYSCALL, "SYSCALL"}, \
+       {BOOKE_IRQPRIO_AP_UNAVAIL, "AP_UNAVAIL"}, \
+       {BOOKE_IRQPRIO_DTLB_MISS, "DTLB_MISS"}, \
+       {BOOKE_IRQPRIO_ITLB_MISS, "ITLB_MISS"}, \
+       {BOOKE_IRQPRIO_MACHINE_CHECK, "MACHINE_CHECK"}, \
+       {BOOKE_IRQPRIO_DEBUG, "DEBUG"}, \
+       {BOOKE_IRQPRIO_CRITICAL, "CRITICAL"}, \
+       {BOOKE_IRQPRIO_WATCHDOG, "WATCHDOG"}, \
+       {BOOKE_IRQPRIO_EXTERNAL, "EXTERNAL"}, \
+       {BOOKE_IRQPRIO_FIT, "FIT"}, \
+       {BOOKE_IRQPRIO_DECREMENTER, "DECREMENTER"}, \
+       {BOOKE_IRQPRIO_PERFORMANCE_MONITOR, "PERFORMANCE_MONITOR"}, \
+       {BOOKE_IRQPRIO_EXTERNAL_LEVEL, "EXTERNAL_LEVEL"}, \
+       {BOOKE_IRQPRIO_DBELL, "DBELL"}, \
+       {BOOKE_IRQPRIO_DBELL_CRIT, "DBELL_CRIT"} \
+
 TRACE_EVENT(kvm_booke_queue_irqprio,
        TP_PROTO(struct kvm_vcpu *vcpu, unsigned int priority),
        TP_ARGS(vcpu, priority),
@@ -167,8 +208,10 @@ TRACE_EVENT(kvm_booke_queue_irqprio,
                __entry->pending        = vcpu->arch.pending_exceptions;
        ),
 
-       TP_printk("vcpu=%x prio=%x pending=%lx",
-               __entry->cpu_nr, __entry->priority, __entry->pending)
+       TP_printk("vcpu=%x prio=%s pending=%lx",
+               __entry->cpu_nr,
+               __print_symbolic(__entry->priority, kvm_trace_symbol_irqprio),
+               __entry->pending)
 );
 
 #endif
diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h
new file mode 100644 (file)
index 0000000..33d9daf
--- /dev/null
@@ -0,0 +1,477 @@
+#if !defined(_TRACE_KVM_HV_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KVM_HV_H
+
+#include <linux/tracepoint.h>
+#include "trace_book3s.h"
+#include <asm/hvcall.h>
+#include <asm/kvm_asm.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm_hv
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_hv
+
+#define kvm_trace_symbol_hcall \
+       {H_REMOVE,                      "H_REMOVE"}, \
+       {H_ENTER,                       "H_ENTER"}, \
+       {H_READ,                        "H_READ"}, \
+       {H_CLEAR_MOD,                   "H_CLEAR_MOD"}, \
+       {H_CLEAR_REF,                   "H_CLEAR_REF"}, \
+       {H_PROTECT,                     "H_PROTECT"}, \
+       {H_GET_TCE,                     "H_GET_TCE"}, \
+       {H_PUT_TCE,                     "H_PUT_TCE"}, \
+       {H_SET_SPRG0,                   "H_SET_SPRG0"}, \
+       {H_SET_DABR,                    "H_SET_DABR"}, \
+       {H_PAGE_INIT,                   "H_PAGE_INIT"}, \
+       {H_SET_ASR,                     "H_SET_ASR"}, \
+       {H_ASR_ON,                      "H_ASR_ON"}, \
+       {H_ASR_OFF,                     "H_ASR_OFF"}, \
+       {H_LOGICAL_CI_LOAD,             "H_LOGICAL_CI_LOAD"}, \
+       {H_LOGICAL_CI_STORE,            "H_LOGICAL_CI_STORE"}, \
+       {H_LOGICAL_CACHE_LOAD,          "H_LOGICAL_CACHE_LOAD"}, \
+       {H_LOGICAL_CACHE_STORE,         "H_LOGICAL_CACHE_STORE"}, \
+       {H_LOGICAL_ICBI,                "H_LOGICAL_ICBI"}, \
+       {H_LOGICAL_DCBF,                "H_LOGICAL_DCBF"}, \
+       {H_GET_TERM_CHAR,               "H_GET_TERM_CHAR"}, \
+       {H_PUT_TERM_CHAR,               "H_PUT_TERM_CHAR"}, \
+       {H_REAL_TO_LOGICAL,             "H_REAL_TO_LOGICAL"}, \
+       {H_HYPERVISOR_DATA,             "H_HYPERVISOR_DATA"}, \
+       {H_EOI,                         "H_EOI"}, \
+       {H_CPPR,                        "H_CPPR"}, \
+       {H_IPI,                         "H_IPI"}, \
+       {H_IPOLL,                       "H_IPOLL"}, \
+       {H_XIRR,                        "H_XIRR"}, \
+       {H_PERFMON,                     "H_PERFMON"}, \
+       {H_MIGRATE_DMA,                 "H_MIGRATE_DMA"}, \
+       {H_REGISTER_VPA,                "H_REGISTER_VPA"}, \
+       {H_CEDE,                        "H_CEDE"}, \
+       {H_CONFER,                      "H_CONFER"}, \
+       {H_PROD,                        "H_PROD"}, \
+       {H_GET_PPP,                     "H_GET_PPP"}, \
+       {H_SET_PPP,                     "H_SET_PPP"}, \
+       {H_PURR,                        "H_PURR"}, \
+       {H_PIC,                         "H_PIC"}, \
+       {H_REG_CRQ,                     "H_REG_CRQ"}, \
+       {H_FREE_CRQ,                    "H_FREE_CRQ"}, \
+       {H_VIO_SIGNAL,                  "H_VIO_SIGNAL"}, \
+       {H_SEND_CRQ,                    "H_SEND_CRQ"}, \
+       {H_COPY_RDMA,                   "H_COPY_RDMA"}, \
+       {H_REGISTER_LOGICAL_LAN,        "H_REGISTER_LOGICAL_LAN"}, \
+       {H_FREE_LOGICAL_LAN,            "H_FREE_LOGICAL_LAN"}, \
+       {H_ADD_LOGICAL_LAN_BUFFER,      "H_ADD_LOGICAL_LAN_BUFFER"}, \
+       {H_SEND_LOGICAL_LAN,            "H_SEND_LOGICAL_LAN"}, \
+       {H_BULK_REMOVE,                 "H_BULK_REMOVE"}, \
+       {H_MULTICAST_CTRL,              "H_MULTICAST_CTRL"}, \
+       {H_SET_XDABR,                   "H_SET_XDABR"}, \
+       {H_STUFF_TCE,                   "H_STUFF_TCE"}, \
+       {H_PUT_TCE_INDIRECT,            "H_PUT_TCE_INDIRECT"}, \
+       {H_CHANGE_LOGICAL_LAN_MAC,      "H_CHANGE_LOGICAL_LAN_MAC"}, \
+       {H_VTERM_PARTNER_INFO,          "H_VTERM_PARTNER_INFO"}, \
+       {H_REGISTER_VTERM,              "H_REGISTER_VTERM"}, \
+       {H_FREE_VTERM,                  "H_FREE_VTERM"}, \
+       {H_RESET_EVENTS,                "H_RESET_EVENTS"}, \
+       {H_ALLOC_RESOURCE,              "H_ALLOC_RESOURCE"}, \
+       {H_FREE_RESOURCE,               "H_FREE_RESOURCE"}, \
+       {H_MODIFY_QP,                   "H_MODIFY_QP"}, \
+       {H_QUERY_QP,                    "H_QUERY_QP"}, \
+       {H_REREGISTER_PMR,              "H_REREGISTER_PMR"}, \
+       {H_REGISTER_SMR,                "H_REGISTER_SMR"}, \
+       {H_QUERY_MR,                    "H_QUERY_MR"}, \
+       {H_QUERY_MW,                    "H_QUERY_MW"}, \
+       {H_QUERY_HCA,                   "H_QUERY_HCA"}, \
+       {H_QUERY_PORT,                  "H_QUERY_PORT"}, \
+       {H_MODIFY_PORT,                 "H_MODIFY_PORT"}, \
+       {H_DEFINE_AQP1,                 "H_DEFINE_AQP1"}, \
+       {H_GET_TRACE_BUFFER,            "H_GET_TRACE_BUFFER"}, \
+       {H_DEFINE_AQP0,                 "H_DEFINE_AQP0"}, \
+       {H_RESIZE_MR,                   "H_RESIZE_MR"}, \
+       {H_ATTACH_MCQP,                 "H_ATTACH_MCQP"}, \
+       {H_DETACH_MCQP,                 "H_DETACH_MCQP"}, \
+       {H_CREATE_RPT,                  "H_CREATE_RPT"}, \
+       {H_REMOVE_RPT,                  "H_REMOVE_RPT"}, \
+       {H_REGISTER_RPAGES,             "H_REGISTER_RPAGES"}, \
+       {H_DISABLE_AND_GETC,            "H_DISABLE_AND_GETC"}, \
+       {H_ERROR_DATA,                  "H_ERROR_DATA"}, \
+       {H_GET_HCA_INFO,                "H_GET_HCA_INFO"}, \
+       {H_GET_PERF_COUNT,              "H_GET_PERF_COUNT"}, \
+       {H_MANAGE_TRACE,                "H_MANAGE_TRACE"}, \
+       {H_FREE_LOGICAL_LAN_BUFFER,     "H_FREE_LOGICAL_LAN_BUFFER"}, \
+       {H_QUERY_INT_STATE,             "H_QUERY_INT_STATE"}, \
+       {H_POLL_PENDING,                "H_POLL_PENDING"}, \
+       {H_ILLAN_ATTRIBUTES,            "H_ILLAN_ATTRIBUTES"}, \
+       {H_MODIFY_HEA_QP,               "H_MODIFY_HEA_QP"}, \
+       {H_QUERY_HEA_QP,                "H_QUERY_HEA_QP"}, \
+       {H_QUERY_HEA,                   "H_QUERY_HEA"}, \
+       {H_QUERY_HEA_PORT,              "H_QUERY_HEA_PORT"}, \
+       {H_MODIFY_HEA_PORT,             "H_MODIFY_HEA_PORT"}, \
+       {H_REG_BCMC,                    "H_REG_BCMC"}, \
+       {H_DEREG_BCMC,                  "H_DEREG_BCMC"}, \
+       {H_REGISTER_HEA_RPAGES,         "H_REGISTER_HEA_RPAGES"}, \
+       {H_DISABLE_AND_GET_HEA,         "H_DISABLE_AND_GET_HEA"}, \
+       {H_GET_HEA_INFO,                "H_GET_HEA_INFO"}, \
+       {H_ALLOC_HEA_RESOURCE,          "H_ALLOC_HEA_RESOURCE"}, \
+       {H_ADD_CONN,                    "H_ADD_CONN"}, \
+       {H_DEL_CONN,                    "H_DEL_CONN"}, \
+       {H_JOIN,                        "H_JOIN"}, \
+       {H_VASI_STATE,                  "H_VASI_STATE"}, \
+       {H_ENABLE_CRQ,                  "H_ENABLE_CRQ"}, \
+       {H_GET_EM_PARMS,                "H_GET_EM_PARMS"}, \
+       {H_SET_MPP,                     "H_SET_MPP"}, \
+       {H_GET_MPP,                     "H_GET_MPP"}, \
+       {H_HOME_NODE_ASSOCIATIVITY,     "H_HOME_NODE_ASSOCIATIVITY"}, \
+       {H_BEST_ENERGY,                 "H_BEST_ENERGY"}, \
+       {H_XIRR_X,                      "H_XIRR_X"}, \
+       {H_RANDOM,                      "H_RANDOM"}, \
+       {H_COP,                         "H_COP"}, \
+       {H_GET_MPP_X,                   "H_GET_MPP_X"}, \
+       {H_SET_MODE,                    "H_SET_MODE"}, \
+       {H_RTAS,                        "H_RTAS"}
+
+#define kvm_trace_symbol_kvmret \
+       {RESUME_GUEST,                  "RESUME_GUEST"}, \
+       {RESUME_GUEST_NV,               "RESUME_GUEST_NV"}, \
+       {RESUME_HOST,                   "RESUME_HOST"}, \
+       {RESUME_HOST_NV,                "RESUME_HOST_NV"}
+
+#define kvm_trace_symbol_hcall_rc \
+       {H_SUCCESS,                     "H_SUCCESS"}, \
+       {H_BUSY,                        "H_BUSY"}, \
+       {H_CLOSED,                      "H_CLOSED"}, \
+       {H_NOT_AVAILABLE,               "H_NOT_AVAILABLE"}, \
+       {H_CONSTRAINED,                 "H_CONSTRAINED"}, \
+       {H_PARTIAL,                     "H_PARTIAL"}, \
+       {H_IN_PROGRESS,                 "H_IN_PROGRESS"}, \
+       {H_PAGE_REGISTERED,             "H_PAGE_REGISTERED"}, \
+       {H_PARTIAL_STORE,               "H_PARTIAL_STORE"}, \
+       {H_PENDING,                     "H_PENDING"}, \
+       {H_CONTINUE,                    "H_CONTINUE"}, \
+       {H_LONG_BUSY_START_RANGE,       "H_LONG_BUSY_START_RANGE"}, \
+       {H_LONG_BUSY_ORDER_1_MSEC,      "H_LONG_BUSY_ORDER_1_MSEC"}, \
+       {H_LONG_BUSY_ORDER_10_MSEC,     "H_LONG_BUSY_ORDER_10_MSEC"}, \
+       {H_LONG_BUSY_ORDER_100_MSEC,    "H_LONG_BUSY_ORDER_100_MSEC"}, \
+       {H_LONG_BUSY_ORDER_1_SEC,       "H_LONG_BUSY_ORDER_1_SEC"}, \
+       {H_LONG_BUSY_ORDER_10_SEC,      "H_LONG_BUSY_ORDER_10_SEC"}, \
+       {H_LONG_BUSY_ORDER_100_SEC,     "H_LONG_BUSY_ORDER_100_SEC"}, \
+       {H_LONG_BUSY_END_RANGE,         "H_LONG_BUSY_END_RANGE"}, \
+       {H_TOO_HARD,                    "H_TOO_HARD"}, \
+       {H_HARDWARE,                    "H_HARDWARE"}, \
+       {H_FUNCTION,                    "H_FUNCTION"}, \
+       {H_PRIVILEGE,                   "H_PRIVILEGE"}, \
+       {H_PARAMETER,                   "H_PARAMETER"}, \
+       {H_BAD_MODE,                    "H_BAD_MODE"}, \
+       {H_PTEG_FULL,                   "H_PTEG_FULL"}, \
+       {H_NOT_FOUND,                   "H_NOT_FOUND"}, \
+       {H_RESERVED_DABR,               "H_RESERVED_DABR"}, \
+       {H_NO_MEM,                      "H_NO_MEM"}, \
+       {H_AUTHORITY,                   "H_AUTHORITY"}, \
+       {H_PERMISSION,                  "H_PERMISSION"}, \
+       {H_DROPPED,                     "H_DROPPED"}, \
+       {H_SOURCE_PARM,                 "H_SOURCE_PARM"}, \
+       {H_DEST_PARM,                   "H_DEST_PARM"}, \
+       {H_REMOTE_PARM,                 "H_REMOTE_PARM"}, \
+       {H_RESOURCE,                    "H_RESOURCE"}, \
+       {H_ADAPTER_PARM,                "H_ADAPTER_PARM"}, \
+       {H_RH_PARM,                     "H_RH_PARM"}, \
+       {H_RCQ_PARM,                    "H_RCQ_PARM"}, \
+       {H_SCQ_PARM,                    "H_SCQ_PARM"}, \
+       {H_EQ_PARM,                     "H_EQ_PARM"}, \
+       {H_RT_PARM,                     "H_RT_PARM"}, \
+       {H_ST_PARM,                     "H_ST_PARM"}, \
+       {H_SIGT_PARM,                   "H_SIGT_PARM"}, \
+       {H_TOKEN_PARM,                  "H_TOKEN_PARM"}, \
+       {H_MLENGTH_PARM,                "H_MLENGTH_PARM"}, \
+       {H_MEM_PARM,                    "H_MEM_PARM"}, \
+       {H_MEM_ACCESS_PARM,             "H_MEM_ACCESS_PARM"}, \
+       {H_ATTR_PARM,                   "H_ATTR_PARM"}, \
+       {H_PORT_PARM,                   "H_PORT_PARM"}, \
+       {H_MCG_PARM,                    "H_MCG_PARM"}, \
+       {H_VL_PARM,                     "H_VL_PARM"}, \
+       {H_TSIZE_PARM,                  "H_TSIZE_PARM"}, \
+       {H_TRACE_PARM,                  "H_TRACE_PARM"}, \
+       {H_MASK_PARM,                   "H_MASK_PARM"}, \
+       {H_MCG_FULL,                    "H_MCG_FULL"}, \
+       {H_ALIAS_EXIST,                 "H_ALIAS_EXIST"}, \
+       {H_P_COUNTER,                   "H_P_COUNTER"}, \
+       {H_TABLE_FULL,                  "H_TABLE_FULL"}, \
+       {H_ALT_TABLE,                   "H_ALT_TABLE"}, \
+       {H_MR_CONDITION,                "H_MR_CONDITION"}, \
+       {H_NOT_ENOUGH_RESOURCES,        "H_NOT_ENOUGH_RESOURCES"}, \
+       {H_R_STATE,                     "H_R_STATE"}, \
+       {H_RESCINDED,                   "H_RESCINDED"}, \
+       {H_P2,                          "H_P2"}, \
+       {H_P3,                          "H_P3"}, \
+       {H_P4,                          "H_P4"}, \
+       {H_P5,                          "H_P5"}, \
+       {H_P6,                          "H_P6"}, \
+       {H_P7,                          "H_P7"}, \
+       {H_P8,                          "H_P8"}, \
+       {H_P9,                          "H_P9"}, \
+       {H_TOO_BIG,                     "H_TOO_BIG"}, \
+       {H_OVERLAP,                     "H_OVERLAP"}, \
+       {H_INTERRUPT,                   "H_INTERRUPT"}, \
+       {H_BAD_DATA,                    "H_BAD_DATA"}, \
+       {H_NOT_ACTIVE,                  "H_NOT_ACTIVE"}, \
+       {H_SG_LIST,                     "H_SG_LIST"}, \
+       {H_OP_MODE,                     "H_OP_MODE"}, \
+       {H_COP_HW,                      "H_COP_HW"}, \
+       {H_UNSUPPORTED_FLAG_START,      "H_UNSUPPORTED_FLAG_START"}, \
+       {H_UNSUPPORTED_FLAG_END,        "H_UNSUPPORTED_FLAG_END"}, \
+       {H_MULTI_THREADS_ACTIVE,        "H_MULTI_THREADS_ACTIVE"}, \
+       {H_OUTSTANDING_COP_OPS,         "H_OUTSTANDING_COP_OPS"}
+
+TRACE_EVENT(kvm_guest_enter,
+       TP_PROTO(struct kvm_vcpu *vcpu),
+       TP_ARGS(vcpu),
+
+       TP_STRUCT__entry(
+               __field(int,            vcpu_id)
+               __field(unsigned long,  pc)
+               __field(unsigned long,  pending_exceptions)
+               __field(u8,             ceded)
+       ),
+
+       TP_fast_assign(
+               __entry->vcpu_id        = vcpu->vcpu_id;
+               __entry->pc             = kvmppc_get_pc(vcpu);
+               __entry->ceded          = vcpu->arch.ceded;
+               __entry->pending_exceptions  = vcpu->arch.pending_exceptions;
+       ),
+
+       TP_printk("VCPU %d: pc=0x%lx pexcp=0x%lx ceded=%d",
+                       __entry->vcpu_id,
+                       __entry->pc,
+                       __entry->pending_exceptions, __entry->ceded)
+);
+
+TRACE_EVENT(kvm_guest_exit,
+       TP_PROTO(struct kvm_vcpu *vcpu),
+       TP_ARGS(vcpu),
+
+       TP_STRUCT__entry(
+               __field(int,            vcpu_id)
+               __field(int,            trap)
+               __field(unsigned long,  pc)
+               __field(unsigned long,  msr)
+               __field(u8,             ceded)
+       ),
+
+       TP_fast_assign(
+               __entry->vcpu_id = vcpu->vcpu_id;
+               __entry->trap    = vcpu->arch.trap;
+               __entry->ceded   = vcpu->arch.ceded;
+               __entry->pc      = kvmppc_get_pc(vcpu);
+               __entry->msr     = vcpu->arch.shregs.msr;
+       ),
+
+       TP_printk("VCPU %d: trap=%s pc=0x%lx msr=0x%lx, ceded=%d",
+               __entry->vcpu_id,
+               __print_symbolic(__entry->trap, kvm_trace_symbol_exit),
+               __entry->pc, __entry->msr, __entry->ceded
+       )
+);
+
+TRACE_EVENT(kvm_page_fault_enter,
+       TP_PROTO(struct kvm_vcpu *vcpu, unsigned long *hptep,
+                struct kvm_memory_slot *memslot, unsigned long ea,
+                unsigned long dsisr),
+
+       TP_ARGS(vcpu, hptep, memslot, ea, dsisr),
+
+       TP_STRUCT__entry(
+               __field(int,            vcpu_id)
+               __field(unsigned long,  hpte_v)
+               __field(unsigned long,  hpte_r)
+               __field(unsigned long,  gpte_r)
+               __field(unsigned long,  ea)
+               __field(u64,            base_gfn)
+               __field(u32,            slot_flags)
+               __field(u32,            dsisr)
+       ),
+
+       TP_fast_assign(
+               __entry->vcpu_id  = vcpu->vcpu_id;
+               __entry->hpte_v   = hptep[0];
+               __entry->hpte_r   = hptep[1];
+               __entry->gpte_r   = hptep[2];
+               __entry->ea       = ea;
+               __entry->dsisr    = dsisr;
+               __entry->base_gfn = memslot ? memslot->base_gfn : -1UL;
+               __entry->slot_flags = memslot ? memslot->flags : 0;
+       ),
+
+       TP_printk("VCPU %d: hpte=0x%lx:0x%lx guest=0x%lx ea=0x%lx,%x slot=0x%llx,0x%x",
+                  __entry->vcpu_id,
+                  __entry->hpte_v, __entry->hpte_r, __entry->gpte_r,
+                  __entry->ea, __entry->dsisr,
+                  __entry->base_gfn, __entry->slot_flags)
+);
+
+TRACE_EVENT(kvm_page_fault_exit,
+       TP_PROTO(struct kvm_vcpu *vcpu, unsigned long *hptep, long ret),
+
+       TP_ARGS(vcpu, hptep, ret),
+
+       TP_STRUCT__entry(
+               __field(int,            vcpu_id)
+               __field(unsigned long,  hpte_v)
+               __field(unsigned long,  hpte_r)
+               __field(long,           ret)
+       ),
+
+       TP_fast_assign(
+               __entry->vcpu_id  = vcpu->vcpu_id;
+               __entry->hpte_v = hptep[0];
+               __entry->hpte_r = hptep[1];
+               __entry->ret = ret;
+       ),
+
+       TP_printk("VCPU %d: hpte=0x%lx:0x%lx ret=0x%lx",
+                  __entry->vcpu_id,
+                  __entry->hpte_v, __entry->hpte_r, __entry->ret)
+);
+
+TRACE_EVENT(kvm_hcall_enter,
+       TP_PROTO(struct kvm_vcpu *vcpu),
+
+       TP_ARGS(vcpu),
+
+       TP_STRUCT__entry(
+               __field(int,            vcpu_id)
+               __field(unsigned long,  req)
+               __field(unsigned long,  gpr4)
+               __field(unsigned long,  gpr5)
+               __field(unsigned long,  gpr6)
+               __field(unsigned long,  gpr7)
+       ),
+
+       TP_fast_assign(
+               __entry->vcpu_id  = vcpu->vcpu_id;
+               __entry->req   = kvmppc_get_gpr(vcpu, 3);
+               __entry->gpr4  = kvmppc_get_gpr(vcpu, 4);
+               __entry->gpr5  = kvmppc_get_gpr(vcpu, 5);
+               __entry->gpr6  = kvmppc_get_gpr(vcpu, 6);
+               __entry->gpr7  = kvmppc_get_gpr(vcpu, 7);
+       ),
+
+       TP_printk("VCPU %d: hcall=%s GPR4-7=0x%lx,0x%lx,0x%lx,0x%lx",
+                  __entry->vcpu_id,
+                  __print_symbolic(__entry->req, kvm_trace_symbol_hcall),
+                  __entry->gpr4, __entry->gpr5, __entry->gpr6, __entry->gpr7)
+);
+
+TRACE_EVENT(kvm_hcall_exit,
+       TP_PROTO(struct kvm_vcpu *vcpu, int ret),
+
+       TP_ARGS(vcpu, ret),
+
+       TP_STRUCT__entry(
+               __field(int,            vcpu_id)
+               __field(unsigned long,  ret)
+               __field(unsigned long,  hcall_rc)
+       ),
+
+       TP_fast_assign(
+               __entry->vcpu_id  = vcpu->vcpu_id;
+               __entry->ret      = ret;
+               __entry->hcall_rc = kvmppc_get_gpr(vcpu, 3);
+       ),
+
+       TP_printk("VCPU %d: ret=%s hcall_rc=%s",
+                  __entry->vcpu_id,
+                  __print_symbolic(__entry->ret, kvm_trace_symbol_kvmret),
+                  __print_symbolic(__entry->ret & RESUME_FLAG_HOST ?
+                                       H_TOO_HARD : __entry->hcall_rc,
+                                       kvm_trace_symbol_hcall_rc))
+);
+
+TRACE_EVENT(kvmppc_run_core,
+       TP_PROTO(struct kvmppc_vcore *vc, int where),
+
+       TP_ARGS(vc, where),
+
+       TP_STRUCT__entry(
+               __field(int,    n_runnable)
+               __field(int,    runner_vcpu)
+               __field(int,    where)
+               __field(pid_t,  tgid)
+       ),
+
+       TP_fast_assign(
+               __entry->runner_vcpu    = vc->runner->vcpu_id;
+               __entry->n_runnable     = vc->n_runnable;
+               __entry->where          = where;
+               __entry->tgid           = current->tgid;
+       ),
+
+       TP_printk("%s runner_vcpu==%d runnable=%d tgid=%d",
+                   __entry->where ? "Exit" : "Enter",
+                   __entry->runner_vcpu, __entry->n_runnable, __entry->tgid)
+);
+
+TRACE_EVENT(kvmppc_vcore_blocked,
+       TP_PROTO(struct kvmppc_vcore *vc, int where),
+
+       TP_ARGS(vc, where),
+
+       TP_STRUCT__entry(
+               __field(int,    n_runnable)
+               __field(int,    runner_vcpu)
+               __field(int,    where)
+               __field(pid_t,  tgid)
+       ),
+
+       TP_fast_assign(
+               __entry->runner_vcpu = vc->runner->vcpu_id;
+               __entry->n_runnable  = vc->n_runnable;
+               __entry->where       = where;
+               __entry->tgid        = current->tgid;
+       ),
+
+       TP_printk("%s runner_vcpu=%d runnable=%d tgid=%d",
+                  __entry->where ? "Exit" : "Enter",
+                  __entry->runner_vcpu, __entry->n_runnable, __entry->tgid)
+);
+
+TRACE_EVENT(kvmppc_run_vcpu_enter,
+       TP_PROTO(struct kvm_vcpu *vcpu),
+
+       TP_ARGS(vcpu),
+
+       TP_STRUCT__entry(
+               __field(int,            vcpu_id)
+               __field(pid_t,          tgid)
+       ),
+
+       TP_fast_assign(
+               __entry->vcpu_id  = vcpu->vcpu_id;
+               __entry->tgid     = current->tgid;
+       ),
+
+       TP_printk("VCPU %d: tgid=%d", __entry->vcpu_id, __entry->tgid)
+);
+
+TRACE_EVENT(kvmppc_run_vcpu_exit,
+       TP_PROTO(struct kvm_vcpu *vcpu, struct kvm_run *run),
+
+       TP_ARGS(vcpu, run),
+
+       TP_STRUCT__entry(
+               __field(int,            vcpu_id)
+               __field(int,            exit)
+               __field(int,            ret)
+       ),
+
+       TP_fast_assign(
+               __entry->vcpu_id  = vcpu->vcpu_id;
+               __entry->exit     = run->exit_reason;
+               __entry->ret      = vcpu->arch.ret;
+       ),
+
+       TP_printk("VCPU %d: exit=%d, ret=%d",
+                       __entry->vcpu_id, __entry->exit, __entry->ret)
+);
+
+#endif /* _TRACE_KVM_HV_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
index e1357cd..810507c 100644 (file)
@@ -3,36 +3,13 @@
 #define _TRACE_KVM_PR_H
 
 #include <linux/tracepoint.h>
+#include "trace_book3s.h"
 
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM kvm_pr
 #define TRACE_INCLUDE_PATH .
 #define TRACE_INCLUDE_FILE trace_pr
 
-#define kvm_trace_symbol_exit \
-       {0x100, "SYSTEM_RESET"}, \
-       {0x200, "MACHINE_CHECK"}, \
-       {0x300, "DATA_STORAGE"}, \
-       {0x380, "DATA_SEGMENT"}, \
-       {0x400, "INST_STORAGE"}, \
-       {0x480, "INST_SEGMENT"}, \
-       {0x500, "EXTERNAL"}, \
-       {0x501, "EXTERNAL_LEVEL"}, \
-       {0x502, "EXTERNAL_HV"}, \
-       {0x600, "ALIGNMENT"}, \
-       {0x700, "PROGRAM"}, \
-       {0x800, "FP_UNAVAIL"}, \
-       {0x900, "DECREMENTER"}, \
-       {0x980, "HV_DECREMENTER"}, \
-       {0xc00, "SYSCALL"}, \
-       {0xd00, "TRACE"}, \
-       {0xe00, "H_DATA_STORAGE"}, \
-       {0xe20, "H_INST_STORAGE"}, \
-       {0xe40, "H_EMUL_ASSIST"}, \
-       {0xf00, "PERFMON"}, \
-       {0xf20, "ALTIVEC"}, \
-       {0xf40, "VSX"}
-
 TRACE_EVENT(kvm_book3s_reenter,
        TP_PROTO(int r, struct kvm_vcpu *vcpu),
        TP_ARGS(r, vcpu),
index 2175f91..9cba74d 100644 (file)
@@ -123,7 +123,7 @@ struct kvm_s390_sie_block {
 #define ICPT_PARTEXEC  0x38
 #define ICPT_IOINST    0x40
        __u8    icptcode;               /* 0x0050 */
-       __u8    reserved51;             /* 0x0051 */
+       __u8    icptstatus;             /* 0x0051 */
        __u16   ihcpu;                  /* 0x0052 */
        __u8    reserved54[2];          /* 0x0054 */
        __u16   ipa;                    /* 0x0056 */
@@ -226,10 +226,17 @@ struct kvm_vcpu_stat {
        u32 instruction_sigp_sense_running;
        u32 instruction_sigp_external_call;
        u32 instruction_sigp_emergency;
+       u32 instruction_sigp_cond_emergency;
+       u32 instruction_sigp_start;
        u32 instruction_sigp_stop;
+       u32 instruction_sigp_stop_store_status;
+       u32 instruction_sigp_store_status;
        u32 instruction_sigp_arch;
        u32 instruction_sigp_prefix;
        u32 instruction_sigp_restart;
+       u32 instruction_sigp_init_cpu_reset;
+       u32 instruction_sigp_cpu_reset;
+       u32 instruction_sigp_unknown;
        u32 diagnose_10;
        u32 diagnose_44;
        u32 diagnose_9c;
@@ -288,6 +295,79 @@ struct kvm_vcpu_stat {
 #define PGM_PER                                0x80
 #define PGM_CRYPTO_OPERATION           0x119
 
+/* irq types in order of priority */
+enum irq_types {
+       IRQ_PEND_MCHK_EX = 0,
+       IRQ_PEND_SVC,
+       IRQ_PEND_PROG,
+       IRQ_PEND_MCHK_REP,
+       IRQ_PEND_EXT_IRQ_KEY,
+       IRQ_PEND_EXT_MALFUNC,
+       IRQ_PEND_EXT_EMERGENCY,
+       IRQ_PEND_EXT_EXTERNAL,
+       IRQ_PEND_EXT_CLOCK_COMP,
+       IRQ_PEND_EXT_CPU_TIMER,
+       IRQ_PEND_EXT_TIMING,
+       IRQ_PEND_EXT_SERVICE,
+       IRQ_PEND_EXT_HOST,
+       IRQ_PEND_PFAULT_INIT,
+       IRQ_PEND_PFAULT_DONE,
+       IRQ_PEND_VIRTIO,
+       IRQ_PEND_IO_ISC_0,
+       IRQ_PEND_IO_ISC_1,
+       IRQ_PEND_IO_ISC_2,
+       IRQ_PEND_IO_ISC_3,
+       IRQ_PEND_IO_ISC_4,
+       IRQ_PEND_IO_ISC_5,
+       IRQ_PEND_IO_ISC_6,
+       IRQ_PEND_IO_ISC_7,
+       IRQ_PEND_SIGP_STOP,
+       IRQ_PEND_RESTART,
+       IRQ_PEND_SET_PREFIX,
+       IRQ_PEND_COUNT
+};
+
+/*
+ * Repressible (non-floating) machine check interrupts
+ * subclass bits in MCIC
+ */
+#define MCHK_EXTD_BIT 58
+#define MCHK_DEGR_BIT 56
+#define MCHK_WARN_BIT 55
+#define MCHK_REP_MASK ((1UL << MCHK_DEGR_BIT) | \
+                      (1UL << MCHK_EXTD_BIT) | \
+                      (1UL << MCHK_WARN_BIT))
+
+/* Exigent machine check interrupts subclass bits in MCIC */
+#define MCHK_SD_BIT 63
+#define MCHK_PD_BIT 62
+#define MCHK_EX_MASK ((1UL << MCHK_SD_BIT) | (1UL << MCHK_PD_BIT))
+
+#define IRQ_PEND_EXT_MASK ((1UL << IRQ_PEND_EXT_IRQ_KEY)    | \
+                          (1UL << IRQ_PEND_EXT_CLOCK_COMP) | \
+                          (1UL << IRQ_PEND_EXT_CPU_TIMER)  | \
+                          (1UL << IRQ_PEND_EXT_MALFUNC)    | \
+                          (1UL << IRQ_PEND_EXT_EMERGENCY)  | \
+                          (1UL << IRQ_PEND_EXT_EXTERNAL)   | \
+                          (1UL << IRQ_PEND_EXT_TIMING)     | \
+                          (1UL << IRQ_PEND_EXT_HOST)       | \
+                          (1UL << IRQ_PEND_EXT_SERVICE)    | \
+                          (1UL << IRQ_PEND_VIRTIO)         | \
+                          (1UL << IRQ_PEND_PFAULT_INIT)    | \
+                          (1UL << IRQ_PEND_PFAULT_DONE))
+
+#define IRQ_PEND_IO_MASK ((1UL << IRQ_PEND_IO_ISC_0) | \
+                         (1UL << IRQ_PEND_IO_ISC_1) | \
+                         (1UL << IRQ_PEND_IO_ISC_2) | \
+                         (1UL << IRQ_PEND_IO_ISC_3) | \
+                         (1UL << IRQ_PEND_IO_ISC_4) | \
+                         (1UL << IRQ_PEND_IO_ISC_5) | \
+                         (1UL << IRQ_PEND_IO_ISC_6) | \
+                         (1UL << IRQ_PEND_IO_ISC_7))
+
+#define IRQ_PEND_MCHK_MASK ((1UL << IRQ_PEND_MCHK_REP) | \
+                           (1UL << IRQ_PEND_MCHK_EX))
+
 struct kvm_s390_interrupt_info {
        struct list_head list;
        u64     type;
@@ -306,14 +386,25 @@ struct kvm_s390_interrupt_info {
 #define ACTION_STORE_ON_STOP           (1<<0)
 #define ACTION_STOP_ON_STOP            (1<<1)
 
+struct kvm_s390_irq_payload {
+       struct kvm_s390_io_info io;
+       struct kvm_s390_ext_info ext;
+       struct kvm_s390_pgm_info pgm;
+       struct kvm_s390_emerg_info emerg;
+       struct kvm_s390_extcall_info extcall;
+       struct kvm_s390_prefix_info prefix;
+       struct kvm_s390_mchk_info mchk;
+};
+
 struct kvm_s390_local_interrupt {
        spinlock_t lock;
-       struct list_head list;
-       atomic_t active;
        struct kvm_s390_float_interrupt *float_int;
        wait_queue_head_t *wq;
        atomic_t *cpuflags;
        unsigned int action_bits;
+       DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS);
+       struct kvm_s390_irq_payload irq;
+       unsigned long pending_irqs;
 };
 
 struct kvm_s390_float_interrupt {
@@ -434,6 +525,8 @@ struct kvm_arch{
        int user_cpu_state_ctrl;
        struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
        wait_queue_head_t ipte_wq;
+       int ipte_lock_count;
+       struct mutex ipte_mutex;
        spinlock_t start_stop_lock;
        struct kvm_s390_crypto crypto;
 };
index e510b94..3009c2b 100644 (file)
@@ -24,6 +24,7 @@ void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long);
 
 int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
                          unsigned long key, bool nq);
+unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr);
 
 static inline void clear_table(unsigned long *s, unsigned long val, size_t n)
 {
index 4957611..fad4ae2 100644 (file)
@@ -10,6 +10,7 @@
 #define SIGP_RESTART                 6
 #define SIGP_STOP_AND_STORE_STATUS    9
 #define SIGP_INITIAL_CPU_RESET      11
+#define SIGP_CPU_RESET              12
 #define SIGP_SET_PREFIX                     13
 #define SIGP_STORE_STATUS_AT_ADDRESS 14
 #define SIGP_SET_ARCHITECTURE       18
index 0f961a1..8b9ccf0 100644 (file)
@@ -207,8 +207,6 @@ union raddress {
        unsigned long pfra : 52; /* Page-Frame Real Address */
 };
 
-static int ipte_lock_count;
-static DEFINE_MUTEX(ipte_mutex);
 
 int ipte_lock_held(struct kvm_vcpu *vcpu)
 {
@@ -216,47 +214,51 @@ int ipte_lock_held(struct kvm_vcpu *vcpu)
 
        if (vcpu->arch.sie_block->eca & 1)
                return ic->kh != 0;
-       return ipte_lock_count != 0;
+       return vcpu->kvm->arch.ipte_lock_count != 0;
 }
 
 static void ipte_lock_simple(struct kvm_vcpu *vcpu)
 {
        union ipte_control old, new, *ic;
 
-       mutex_lock(&ipte_mutex);
-       ipte_lock_count++;
-       if (ipte_lock_count > 1)
+       mutex_lock(&vcpu->kvm->arch.ipte_mutex);
+       vcpu->kvm->arch.ipte_lock_count++;
+       if (vcpu->kvm->arch.ipte_lock_count > 1)
                goto out;
        ic = &vcpu->kvm->arch.sca->ipte_control;
        do {
-               old = ACCESS_ONCE(*ic);
+               old = *ic;
+               barrier();
                while (old.k) {
                        cond_resched();
-                       old = ACCESS_ONCE(*ic);
+                       old = *ic;
+                       barrier();
                }
                new = old;
                new.k = 1;
        } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
 out:
-       mutex_unlock(&ipte_mutex);
+       mutex_unlock(&vcpu->kvm->arch.ipte_mutex);
 }
 
 static void ipte_unlock_simple(struct kvm_vcpu *vcpu)
 {
        union ipte_control old, new, *ic;
 
-       mutex_lock(&ipte_mutex);
-       ipte_lock_count--;
-       if (ipte_lock_count)
+       mutex_lock(&vcpu->kvm->arch.ipte_mutex);
+       vcpu->kvm->arch.ipte_lock_count--;
+       if (vcpu->kvm->arch.ipte_lock_count)
                goto out;
        ic = &vcpu->kvm->arch.sca->ipte_control;
        do {
-               new = old = ACCESS_ONCE(*ic);
+               old = *ic;
+               barrier();
+               new = old;
                new.k = 0;
        } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
        wake_up(&vcpu->kvm->arch.ipte_wq);
 out:
-       mutex_unlock(&ipte_mutex);
+       mutex_unlock(&vcpu->kvm->arch.ipte_mutex);
 }
 
 static void ipte_lock_siif(struct kvm_vcpu *vcpu)
@@ -265,10 +267,12 @@ static void ipte_lock_siif(struct kvm_vcpu *vcpu)
 
        ic = &vcpu->kvm->arch.sca->ipte_control;
        do {
-               old = ACCESS_ONCE(*ic);
+               old = *ic;
+               barrier();
                while (old.kg) {
                        cond_resched();
-                       old = ACCESS_ONCE(*ic);
+                       old = *ic;
+                       barrier();
                }
                new = old;
                new.k = 1;
@@ -282,7 +286,9 @@ static void ipte_unlock_siif(struct kvm_vcpu *vcpu)
 
        ic = &vcpu->kvm->arch.sca->ipte_control;
        do {
-               new = old = ACCESS_ONCE(*ic);
+               old = *ic;
+               barrier();
+               new = old;
                new.kh--;
                if (!new.kh)
                        new.k = 0;
index eaf4629..81c77ab 100644 (file)
@@ -38,6 +38,19 @@ static const intercept_handler_t instruction_handlers[256] = {
        [0xeb] = kvm_s390_handle_eb,
 };
 
+void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc)
+{
+       struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block;
+
+       /* Use the length of the EXECUTE instruction if necessary */
+       if (sie_block->icptstatus & 1) {
+               ilc = (sie_block->icptstatus >> 4) & 0x6;
+               if (!ilc)
+                       ilc = 4;
+       }
+       sie_block->gpsw.addr = __rewind_psw(sie_block->gpsw, ilc);
+}
+
 static int handle_noop(struct kvm_vcpu *vcpu)
 {
        switch (vcpu->arch.sie_block->icptcode) {
@@ -244,7 +257,7 @@ static int handle_instruction_and_prog(struct kvm_vcpu *vcpu)
 static int handle_external_interrupt(struct kvm_vcpu *vcpu)
 {
        u16 eic = vcpu->arch.sie_block->eic;
-       struct kvm_s390_interrupt irq;
+       struct kvm_s390_irq irq;
        psw_t newpsw;
        int rc;
 
@@ -269,7 +282,7 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu)
                if (kvm_s390_si_ext_call_pending(vcpu))
                        return 0;
                irq.type = KVM_S390_INT_EXTERNAL_CALL;
-               irq.parm = vcpu->arch.sie_block->extcpuaddr;
+               irq.u.extcall.code = vcpu->arch.sie_block->extcpuaddr;
                break;
        default:
                return -EOPNOTSUPP;
@@ -288,7 +301,6 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu)
  */
 static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
 {
-       psw_t *psw = &vcpu->arch.sie_block->gpsw;
        unsigned long srcaddr, dstaddr;
        int reg1, reg2, rc;
 
@@ -310,7 +322,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
        if (rc != 0)
                return rc;
 
-       psw->addr = __rewind_psw(*psw, 4);
+       kvm_s390_rewind_psw(vcpu, 4);
 
        return 0;
 }
index a398384..f00f31e 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/mmu_context.h>
 #include <linux/signal.h>
 #include <linux/slab.h>
+#include <linux/bitmap.h>
 #include <asm/asm-offsets.h>
 #include <asm/uaccess.h>
 #include "kvm-s390.h"
@@ -27,8 +28,8 @@
 #define IOINT_CSSID_MASK 0x03fc0000
 #define IOINT_AI_MASK 0x04000000
 #define PFAULT_INIT 0x0600
-
-static int __must_check deliver_ckc_interrupt(struct kvm_vcpu *vcpu);
+#define PFAULT_DONE 0x0680
+#define VIRTIO_PARAM 0x0d00
 
 static int is_ioint(u64 type)
 {
@@ -136,6 +137,31 @@ static int __must_check __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
        return 0;
 }
 
+static inline unsigned long pending_local_irqs(struct kvm_vcpu *vcpu)
+{
+       return vcpu->arch.local_int.pending_irqs;
+}
+
+static unsigned long deliverable_local_irqs(struct kvm_vcpu *vcpu)
+{
+       unsigned long active_mask = pending_local_irqs(vcpu);
+
+       if (psw_extint_disabled(vcpu))
+               active_mask &= ~IRQ_PEND_EXT_MASK;
+       if (!(vcpu->arch.sie_block->gcr[0] & 0x2000ul))
+               __clear_bit(IRQ_PEND_EXT_EXTERNAL, &active_mask);
+       if (!(vcpu->arch.sie_block->gcr[0] & 0x4000ul))
+               __clear_bit(IRQ_PEND_EXT_EMERGENCY, &active_mask);
+       if (!(vcpu->arch.sie_block->gcr[0] & 0x800ul))
+               __clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &active_mask);
+       if (!(vcpu->arch.sie_block->gcr[0] & 0x400ul))
+               __clear_bit(IRQ_PEND_EXT_CPU_TIMER, &active_mask);
+       if (psw_mchk_disabled(vcpu))
+               active_mask &= ~IRQ_PEND_MCHK_MASK;
+
+       return active_mask;
+}
+
 static void __set_cpu_idle(struct kvm_vcpu *vcpu)
 {
        atomic_set_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
@@ -170,26 +196,45 @@ static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag)
        atomic_set_mask(flag, &vcpu->arch.sie_block->cpuflags);
 }
 
+static void set_intercept_indicators_ext(struct kvm_vcpu *vcpu)
+{
+       if (!(pending_local_irqs(vcpu) & IRQ_PEND_EXT_MASK))
+               return;
+       if (psw_extint_disabled(vcpu))
+               __set_cpuflag(vcpu, CPUSTAT_EXT_INT);
+       else
+               vcpu->arch.sie_block->lctl |= LCTL_CR0;
+}
+
+static void set_intercept_indicators_mchk(struct kvm_vcpu *vcpu)
+{
+       if (!(pending_local_irqs(vcpu) & IRQ_PEND_MCHK_MASK))
+               return;
+       if (psw_mchk_disabled(vcpu))
+               vcpu->arch.sie_block->ictl |= ICTL_LPSW;
+       else
+               vcpu->arch.sie_block->lctl |= LCTL_CR14;
+}
+
+/* Set interception request for non-deliverable local interrupts */
+static void set_intercept_indicators_local(struct kvm_vcpu *vcpu)
+{
+       set_intercept_indicators_ext(vcpu);
+       set_intercept_indicators_mchk(vcpu);
+}
+
 static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
                                      struct kvm_s390_interrupt_info *inti)
 {
        switch (inti->type) {
-       case KVM_S390_INT_EXTERNAL_CALL:
-       case KVM_S390_INT_EMERGENCY:
        case KVM_S390_INT_SERVICE:
-       case KVM_S390_INT_PFAULT_INIT:
        case KVM_S390_INT_PFAULT_DONE:
        case KVM_S390_INT_VIRTIO:
-       case KVM_S390_INT_CLOCK_COMP:
-       case KVM_S390_INT_CPU_TIMER:
                if (psw_extint_disabled(vcpu))
                        __set_cpuflag(vcpu, CPUSTAT_EXT_INT);
                else
                        vcpu->arch.sie_block->lctl |= LCTL_CR0;
                break;
-       case KVM_S390_SIGP_STOP:
-               __set_cpuflag(vcpu, CPUSTAT_STOP_INT);
-               break;
        case KVM_S390_MCHK:
                if (psw_mchk_disabled(vcpu))
                        vcpu->arch.sie_block->ictl |= ICTL_LPSW;
@@ -226,13 +271,236 @@ static u16 get_ilc(struct kvm_vcpu *vcpu)
        }
 }
 
-static int __must_check __deliver_prog_irq(struct kvm_vcpu *vcpu,
-                             struct kvm_s390_pgm_info *pgm_info)
+static int __must_check __deliver_cpu_timer(struct kvm_vcpu *vcpu)
+{
+       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+       int rc;
+
+       trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_CPU_TIMER,
+                                        0, 0);
+
+       rc  = put_guest_lc(vcpu, EXT_IRQ_CPU_TIMER,
+                          (u16 *)__LC_EXT_INT_CODE);
+       rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
+       rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+                            &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+                           &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       clear_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
+       return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_ckc(struct kvm_vcpu *vcpu)
+{
+       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+       int rc;
+
+       trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_CLOCK_COMP,
+                                        0, 0);
+
+       rc  = put_guest_lc(vcpu, EXT_IRQ_CLK_COMP,
+                          (u16 __user *)__LC_EXT_INT_CODE);
+       rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
+       rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+                            &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+                           &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
+       return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_pfault_init(struct kvm_vcpu *vcpu)
+{
+       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+       struct kvm_s390_ext_info ext;
+       int rc;
+
+       spin_lock(&li->lock);
+       ext = li->irq.ext;
+       clear_bit(IRQ_PEND_PFAULT_INIT, &li->pending_irqs);
+       li->irq.ext.ext_params2 = 0;
+       spin_unlock(&li->lock);
+
+       VCPU_EVENT(vcpu, 4, "interrupt: pfault init parm:%x,parm64:%llx",
+                  0, ext.ext_params2);
+       trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+                                        KVM_S390_INT_PFAULT_INIT,
+                                        0, ext.ext_params2);
+
+       rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, (u16 *) __LC_EXT_INT_CODE);
+       rc |= put_guest_lc(vcpu, PFAULT_INIT, (u16 *) __LC_EXT_CPU_ADDR);
+       rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+                            &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+                           &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       rc |= put_guest_lc(vcpu, ext.ext_params2, (u64 *) __LC_EXT_PARAMS2);
+       return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_machine_check(struct kvm_vcpu *vcpu)
+{
+       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+       struct kvm_s390_mchk_info mchk;
+       int rc;
+
+       spin_lock(&li->lock);
+       mchk = li->irq.mchk;
+       /*
+        * If there was an exigent machine check pending, then any repressible
+        * machine checks that might have been pending are indicated along
+        * with it, so always clear both bits
+        */
+       clear_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs);
+       clear_bit(IRQ_PEND_MCHK_REP, &li->pending_irqs);
+       memset(&li->irq.mchk, 0, sizeof(mchk));
+       spin_unlock(&li->lock);
+
+       VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx",
+                  mchk.mcic);
+       trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_MCHK,
+                                        mchk.cr14, mchk.mcic);
+
+       rc  = kvm_s390_vcpu_store_status(vcpu, KVM_S390_STORE_STATUS_PREFIXED);
+       rc |= put_guest_lc(vcpu, mchk.mcic,
+                          (u64 __user *) __LC_MCCK_CODE);
+       rc |= put_guest_lc(vcpu, mchk.failing_storage_address,
+                          (u64 __user *) __LC_MCCK_FAIL_STOR_ADDR);
+       rc |= write_guest_lc(vcpu, __LC_PSW_SAVE_AREA,
+                            &mchk.fixed_logout, sizeof(mchk.fixed_logout));
+       rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW,
+                            &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
+                           &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_restart(struct kvm_vcpu *vcpu)
+{
+       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+       int rc;
+
+       VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu restart");
+       vcpu->stat.deliver_restart_signal++;
+       trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0);
+
+       rc  = write_guest_lc(vcpu,
+                            offsetof(struct _lowcore, restart_old_psw),
+                            &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       rc |= read_guest_lc(vcpu, offsetof(struct _lowcore, restart_psw),
+                           &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       clear_bit(IRQ_PEND_RESTART, &li->pending_irqs);
+       return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_stop(struct kvm_vcpu *vcpu)
+{
+       VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop");
+       vcpu->stat.deliver_stop_signal++;
+       trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_SIGP_STOP,
+                                        0, 0);
+
+       __set_cpuflag(vcpu, CPUSTAT_STOP_INT);
+       clear_bit(IRQ_PEND_SIGP_STOP, &vcpu->arch.local_int.pending_irqs);
+       return 0;
+}
+
+static int __must_check __deliver_set_prefix(struct kvm_vcpu *vcpu)
+{
+       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+       struct kvm_s390_prefix_info prefix;
+
+       spin_lock(&li->lock);
+       prefix = li->irq.prefix;
+       li->irq.prefix.address = 0;
+       clear_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs);
+       spin_unlock(&li->lock);
+
+       VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x", prefix.address);
+       vcpu->stat.deliver_prefix_signal++;
+       trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+                                        KVM_S390_SIGP_SET_PREFIX,
+                                        prefix.address, 0);
+
+       kvm_s390_set_prefix(vcpu, prefix.address);
+       return 0;
+}
+
+static int __must_check __deliver_emergency_signal(struct kvm_vcpu *vcpu)
+{
+       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+       int rc;
+       int cpu_addr;
+
+       spin_lock(&li->lock);
+       cpu_addr = find_first_bit(li->sigp_emerg_pending, KVM_MAX_VCPUS);
+       clear_bit(cpu_addr, li->sigp_emerg_pending);
+       if (bitmap_empty(li->sigp_emerg_pending, KVM_MAX_VCPUS))
+               clear_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs);
+       spin_unlock(&li->lock);
+
+       VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp emerg");
+       vcpu->stat.deliver_emergency_signal++;
+       trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY,
+                                        cpu_addr, 0);
+
+       rc  = put_guest_lc(vcpu, EXT_IRQ_EMERGENCY_SIG,
+                          (u16 *)__LC_EXT_INT_CODE);
+       rc |= put_guest_lc(vcpu, cpu_addr, (u16 *)__LC_EXT_CPU_ADDR);
+       rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+                            &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+                           &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_external_call(struct kvm_vcpu *vcpu)
+{
+       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+       struct kvm_s390_extcall_info extcall;
+       int rc;
+
+       spin_lock(&li->lock);
+       extcall = li->irq.extcall;
+       li->irq.extcall.code = 0;
+       clear_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs);
+       spin_unlock(&li->lock);
+
+       VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call");
+       vcpu->stat.deliver_external_call++;
+       trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+                                        KVM_S390_INT_EXTERNAL_CALL,
+                                        extcall.code, 0);
+
+       rc  = put_guest_lc(vcpu, EXT_IRQ_EXTERNAL_CALL,
+                          (u16 *)__LC_EXT_INT_CODE);
+       rc |= put_guest_lc(vcpu, extcall.code, (u16 *)__LC_EXT_CPU_ADDR);
+       rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+                            &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, &vcpu->arch.sie_block->gpsw,
+                           sizeof(psw_t));
+       return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
 {
+       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+       struct kvm_s390_pgm_info pgm_info;
        int rc = 0;
        u16 ilc = get_ilc(vcpu);
 
-       switch (pgm_info->code & ~PGM_PER) {
+       spin_lock(&li->lock);
+       pgm_info = li->irq.pgm;
+       clear_bit(IRQ_PEND_PROG, &li->pending_irqs);
+       memset(&li->irq.pgm, 0, sizeof(pgm_info));
+       spin_unlock(&li->lock);
+
+       VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x",
+                  pgm_info.code, ilc);
+       vcpu->stat.deliver_program_int++;
+       trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
+                                        pgm_info.code, 0);
+
+       switch (pgm_info.code & ~PGM_PER) {
        case PGM_AFX_TRANSLATION:
        case PGM_ASX_TRANSLATION:
        case PGM_EX_TRANSLATION:
@@ -243,7 +511,7 @@ static int __must_check __deliver_prog_irq(struct kvm_vcpu *vcpu,
        case PGM_PRIMARY_AUTHORITY:
        case PGM_SECONDARY_AUTHORITY:
        case PGM_SPACE_SWITCH:
-               rc = put_guest_lc(vcpu, pgm_info->trans_exc_code,
+               rc = put_guest_lc(vcpu, pgm_info.trans_exc_code,
                                  (u64 *)__LC_TRANS_EXC_CODE);
                break;
        case PGM_ALEN_TRANSLATION:
@@ -252,7 +520,7 @@ static int __must_check __deliver_prog_irq(struct kvm_vcpu *vcpu,
        case PGM_ASTE_SEQUENCE:
        case PGM_ASTE_VALIDITY:
        case PGM_EXTENDED_AUTHORITY:
-               rc = put_guest_lc(vcpu, pgm_info->exc_access_id,
+               rc = put_guest_lc(vcpu, pgm_info.exc_access_id,
                                  (u8 *)__LC_EXC_ACCESS_ID);
                break;
        case PGM_ASCE_TYPE:
@@ -261,247 +529,208 @@ static int __must_check __deliver_prog_irq(struct kvm_vcpu *vcpu,
        case PGM_REGION_SECOND_TRANS:
        case PGM_REGION_THIRD_TRANS:
        case PGM_SEGMENT_TRANSLATION:
-               rc = put_guest_lc(vcpu, pgm_info->trans_exc_code,
+               rc = put_guest_lc(vcpu, pgm_info.trans_exc_code,
                                  (u64 *)__LC_TRANS_EXC_CODE);
-               rc |= put_guest_lc(vcpu, pgm_info->exc_access_id,
+               rc |= put_guest_lc(vcpu, pgm_info.exc_access_id,
                                   (u8 *)__LC_EXC_ACCESS_ID);
-               rc |= put_guest_lc(vcpu, pgm_info->op_access_id,
+               rc |= put_guest_lc(vcpu, pgm_info.op_access_id,
                                   (u8 *)__LC_OP_ACCESS_ID);
                break;
        case PGM_MONITOR:
-               rc = put_guest_lc(vcpu, pgm_info->mon_class_nr,
-                                 (u64 *)__LC_MON_CLASS_NR);
-               rc |= put_guest_lc(vcpu, pgm_info->mon_code,
+               rc = put_guest_lc(vcpu, pgm_info.mon_class_nr,
+                                 (u16 *)__LC_MON_CLASS_NR);
+               rc |= put_guest_lc(vcpu, pgm_info.mon_code,
                                   (u64 *)__LC_MON_CODE);
                break;
        case PGM_DATA:
-               rc = put_guest_lc(vcpu, pgm_info->data_exc_code,
+               rc = put_guest_lc(vcpu, pgm_info.data_exc_code,
                                  (u32 *)__LC_DATA_EXC_CODE);
                break;
        case PGM_PROTECTION:
-               rc = put_guest_lc(vcpu, pgm_info->trans_exc_code,
+               rc = put_guest_lc(vcpu, pgm_info.trans_exc_code,
                                  (u64 *)__LC_TRANS_EXC_CODE);
-               rc |= put_guest_lc(vcpu, pgm_info->exc_access_id,
+               rc |= put_guest_lc(vcpu, pgm_info.exc_access_id,
                                   (u8 *)__LC_EXC_ACCESS_ID);
                break;
        }
 
-       if (pgm_info->code & PGM_PER) {
-               rc |= put_guest_lc(vcpu, pgm_info->per_code,
+       if (pgm_info.code & PGM_PER) {
+               rc |= put_guest_lc(vcpu, pgm_info.per_code,
                                   (u8 *) __LC_PER_CODE);
-               rc |= put_guest_lc(vcpu, pgm_info->per_atmid,
+               rc |= put_guest_lc(vcpu, pgm_info.per_atmid,
                                   (u8 *)__LC_PER_ATMID);
-               rc |= put_guest_lc(vcpu, pgm_info->per_address,
+               rc |= put_guest_lc(vcpu, pgm_info.per_address,
                                   (u64 *) __LC_PER_ADDRESS);
-               rc |= put_guest_lc(vcpu, pgm_info->per_access_id,
+               rc |= put_guest_lc(vcpu, pgm_info.per_access_id,
                                   (u8 *) __LC_PER_ACCESS_ID);
        }
 
        rc |= put_guest_lc(vcpu, ilc, (u16 *) __LC_PGM_ILC);
-       rc |= put_guest_lc(vcpu, pgm_info->code,
+       rc |= put_guest_lc(vcpu, pgm_info.code,
                           (u16 *)__LC_PGM_INT_CODE);
        rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW,
                             &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
        rc |= read_guest_lc(vcpu, __LC_PGM_NEW_PSW,
                            &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       return rc ? -EFAULT : 0;
+}
 
-       return rc;
+static int __must_check __deliver_service(struct kvm_vcpu *vcpu,
+                                         struct kvm_s390_interrupt_info *inti)
+{
+       int rc;
+
+       VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
+                  inti->ext.ext_params);
+       vcpu->stat.deliver_service_signal++;
+       trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+                                        inti->ext.ext_params, 0);
+
+       rc  = put_guest_lc(vcpu, EXT_IRQ_SERVICE_SIG, (u16 *)__LC_EXT_INT_CODE);
+       rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
+       rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+                            &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+                           &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       rc |= put_guest_lc(vcpu, inti->ext.ext_params,
+                          (u32 *)__LC_EXT_PARAMS);
+       return rc ? -EFAULT : 0;
 }
 
-static int __must_check __do_deliver_interrupt(struct kvm_vcpu *vcpu,
-                                  struct kvm_s390_interrupt_info *inti)
+static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu,
+                                          struct kvm_s390_interrupt_info *inti)
 {
-       const unsigned short table[] = { 2, 4, 4, 6 };
-       int rc = 0;
+       int rc;
+
+       trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+                                        KVM_S390_INT_PFAULT_DONE, 0,
+                                        inti->ext.ext_params2);
+
+       rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, (u16 *)__LC_EXT_INT_CODE);
+       rc |= put_guest_lc(vcpu, PFAULT_DONE, (u16 *)__LC_EXT_CPU_ADDR);
+       rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+                            &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+                           &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
+                          (u64 *)__LC_EXT_PARAMS2);
+       return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_virtio(struct kvm_vcpu *vcpu,
+                                        struct kvm_s390_interrupt_info *inti)
+{
+       int rc;
+
+       VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx",
+                  inti->ext.ext_params, inti->ext.ext_params2);
+       vcpu->stat.deliver_virtio_interrupt++;
+       trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+                                        inti->ext.ext_params,
+                                        inti->ext.ext_params2);
+
+       rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, (u16 *)__LC_EXT_INT_CODE);
+       rc |= put_guest_lc(vcpu, VIRTIO_PARAM, (u16 *)__LC_EXT_CPU_ADDR);
+       rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+                            &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+                           &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       rc |= put_guest_lc(vcpu, inti->ext.ext_params,
+                          (u32 *)__LC_EXT_PARAMS);
+       rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
+                          (u64 *)__LC_EXT_PARAMS2);
+       return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_io(struct kvm_vcpu *vcpu,
+                                    struct kvm_s390_interrupt_info *inti)
+{
+       int rc;
+
+       VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type);
+       vcpu->stat.deliver_io_int++;
+       trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+                                        ((__u32)inti->io.subchannel_id << 16) |
+                                               inti->io.subchannel_nr,
+                                        ((__u64)inti->io.io_int_parm << 32) |
+                                               inti->io.io_int_word);
+
+       rc  = put_guest_lc(vcpu, inti->io.subchannel_id,
+                          (u16 *)__LC_SUBCHANNEL_ID);
+       rc |= put_guest_lc(vcpu, inti->io.subchannel_nr,
+                          (u16 *)__LC_SUBCHANNEL_NR);
+       rc |= put_guest_lc(vcpu, inti->io.io_int_parm,
+                          (u32 *)__LC_IO_INT_PARM);
+       rc |= put_guest_lc(vcpu, inti->io.io_int_word,
+                          (u32 *)__LC_IO_INT_WORD);
+       rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW,
+                            &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW,
+                           &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_mchk_floating(struct kvm_vcpu *vcpu,
+                                          struct kvm_s390_interrupt_info *inti)
+{
+       struct kvm_s390_mchk_info *mchk = &inti->mchk;
+       int rc;
+
+       VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx",
+                  mchk->mcic);
+       trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_MCHK,
+                                        mchk->cr14, mchk->mcic);
+
+       rc  = kvm_s390_vcpu_store_status(vcpu, KVM_S390_STORE_STATUS_PREFIXED);
+       rc |= put_guest_lc(vcpu, mchk->mcic,
+                       (u64 __user *) __LC_MCCK_CODE);
+       rc |= put_guest_lc(vcpu, mchk->failing_storage_address,
+                       (u64 __user *) __LC_MCCK_FAIL_STOR_ADDR);
+       rc |= write_guest_lc(vcpu, __LC_PSW_SAVE_AREA,
+                            &mchk->fixed_logout, sizeof(mchk->fixed_logout));
+       rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW,
+                            &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
+                           &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       return rc ? -EFAULT : 0;
+}
+
+typedef int (*deliver_irq_t)(struct kvm_vcpu *vcpu);
+
+static const deliver_irq_t deliver_irq_funcs[] = {
+       [IRQ_PEND_MCHK_EX]        = __deliver_machine_check,
+       [IRQ_PEND_PROG]           = __deliver_prog,
+       [IRQ_PEND_EXT_EMERGENCY]  = __deliver_emergency_signal,
+       [IRQ_PEND_EXT_EXTERNAL]   = __deliver_external_call,
+       [IRQ_PEND_EXT_CLOCK_COMP] = __deliver_ckc,
+       [IRQ_PEND_EXT_CPU_TIMER]  = __deliver_cpu_timer,
+       [IRQ_PEND_RESTART]        = __deliver_restart,
+       [IRQ_PEND_SIGP_STOP]      = __deliver_stop,
+       [IRQ_PEND_SET_PREFIX]     = __deliver_set_prefix,
+       [IRQ_PEND_PFAULT_INIT]    = __deliver_pfault_init,
+};
+
+static int __must_check __deliver_floating_interrupt(struct kvm_vcpu *vcpu,
+                                          struct kvm_s390_interrupt_info *inti)
+{
+       int rc;
 
        switch (inti->type) {
-       case KVM_S390_INT_EMERGENCY:
-               VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp emerg");
-               vcpu->stat.deliver_emergency_signal++;
-               trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-                                                inti->emerg.code, 0);
-               rc  = put_guest_lc(vcpu, 0x1201, (u16 *)__LC_EXT_INT_CODE);
-               rc |= put_guest_lc(vcpu, inti->emerg.code,
-                                  (u16 *)__LC_EXT_CPU_ADDR);
-               rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-                                    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-               rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-                                   &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-               break;
-       case KVM_S390_INT_EXTERNAL_CALL:
-               VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call");
-               vcpu->stat.deliver_external_call++;
-               trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-                                                inti->extcall.code, 0);
-               rc  = put_guest_lc(vcpu, 0x1202, (u16 *)__LC_EXT_INT_CODE);
-               rc |= put_guest_lc(vcpu, inti->extcall.code,
-                                  (u16 *)__LC_EXT_CPU_ADDR);
-               rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-                                    &vcpu->arch.sie_block->gpsw,
-                                    sizeof(psw_t));
-               rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-                                   &vcpu->arch.sie_block->gpsw,
-                                   sizeof(psw_t));
-               break;
-       case KVM_S390_INT_CLOCK_COMP:
-               trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-                                                inti->ext.ext_params, 0);
-               rc = deliver_ckc_interrupt(vcpu);
-               break;
-       case KVM_S390_INT_CPU_TIMER:
-               trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-                                                inti->ext.ext_params, 0);
-               rc  = put_guest_lc(vcpu, EXT_IRQ_CPU_TIMER,
-                                  (u16 *)__LC_EXT_INT_CODE);
-               rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-                                    &vcpu->arch.sie_block->gpsw,
-                                    sizeof(psw_t));
-               rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-                                   &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-               rc |= put_guest_lc(vcpu, inti->ext.ext_params,
-                                  (u32 *)__LC_EXT_PARAMS);
-               break;
        case KVM_S390_INT_SERVICE:
-               VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
-                          inti->ext.ext_params);
-               vcpu->stat.deliver_service_signal++;
-               trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-                                                inti->ext.ext_params, 0);
-               rc  = put_guest_lc(vcpu, 0x2401, (u16 *)__LC_EXT_INT_CODE);
-               rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-                                    &vcpu->arch.sie_block->gpsw,
-                                    sizeof(psw_t));
-               rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-                                   &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-               rc |= put_guest_lc(vcpu, inti->ext.ext_params,
-                                  (u32 *)__LC_EXT_PARAMS);
-               break;
-       case KVM_S390_INT_PFAULT_INIT:
-               trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0,
-                                                inti->ext.ext_params2);
-               rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE,
-                                  (u16 *) __LC_EXT_INT_CODE);
-               rc |= put_guest_lc(vcpu, PFAULT_INIT, (u16 *) __LC_EXT_CPU_ADDR);
-               rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-                                    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-               rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-                                   &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-               rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
-                                  (u64 *) __LC_EXT_PARAMS2);
+               rc = __deliver_service(vcpu, inti);
                break;
        case KVM_S390_INT_PFAULT_DONE:
-               trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0,
-                                                inti->ext.ext_params2);
-               rc  = put_guest_lc(vcpu, 0x2603, (u16 *)__LC_EXT_INT_CODE);
-               rc |= put_guest_lc(vcpu, 0x0680, (u16 *)__LC_EXT_CPU_ADDR);
-               rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-                                    &vcpu->arch.sie_block->gpsw,
-                                    sizeof(psw_t));
-               rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-                                   &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-               rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
-                                  (u64 *)__LC_EXT_PARAMS2);
+               rc = __deliver_pfault_done(vcpu, inti);
                break;
        case KVM_S390_INT_VIRTIO:
-               VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx",
-                          inti->ext.ext_params, inti->ext.ext_params2);
-               vcpu->stat.deliver_virtio_interrupt++;
-               trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-                                                inti->ext.ext_params,
-                                                inti->ext.ext_params2);
-               rc  = put_guest_lc(vcpu, 0x2603, (u16 *)__LC_EXT_INT_CODE);
-               rc |= put_guest_lc(vcpu, 0x0d00, (u16 *)__LC_EXT_CPU_ADDR);
-               rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-                                    &vcpu->arch.sie_block->gpsw,
-                                    sizeof(psw_t));
-               rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-                                   &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-               rc |= put_guest_lc(vcpu, inti->ext.ext_params,
-                                  (u32 *)__LC_EXT_PARAMS);
-               rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
-                                  (u64 *)__LC_EXT_PARAMS2);
-               break;
-       case KVM_S390_SIGP_STOP:
-               VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop");
-               vcpu->stat.deliver_stop_signal++;
-               trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-                                                0, 0);
-               __set_intercept_indicator(vcpu, inti);
-               break;
-
-       case KVM_S390_SIGP_SET_PREFIX:
-               VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x",
-                          inti->prefix.address);
-               vcpu->stat.deliver_prefix_signal++;
-               trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-                                                inti->prefix.address, 0);
-               kvm_s390_set_prefix(vcpu, inti->prefix.address);
-               break;
-
-       case KVM_S390_RESTART:
-               VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu restart");
-               vcpu->stat.deliver_restart_signal++;
-               trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-                                                0, 0);
-               rc  = write_guest_lc(vcpu,
-                                    offsetof(struct _lowcore, restart_old_psw),
-                                    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-               rc |= read_guest_lc(vcpu, offsetof(struct _lowcore, restart_psw),
-                                   &vcpu->arch.sie_block->gpsw,
-                                   sizeof(psw_t));
+               rc = __deliver_virtio(vcpu, inti);
                break;
-       case KVM_S390_PROGRAM_INT:
-               VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x",
-                          inti->pgm.code,
-                          table[vcpu->arch.sie_block->ipa >> 14]);
-               vcpu->stat.deliver_program_int++;
-               trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-                                                inti->pgm.code, 0);
-               rc = __deliver_prog_irq(vcpu, &inti->pgm);
-               break;
-
        case KVM_S390_MCHK:
-               VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx",
-                          inti->mchk.mcic);
-               trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-                                                inti->mchk.cr14,
-                                                inti->mchk.mcic);
-               rc  = kvm_s390_vcpu_store_status(vcpu,
-                                                KVM_S390_STORE_STATUS_PREFIXED);
-               rc |= put_guest_lc(vcpu, inti->mchk.mcic, (u64 *)__LC_MCCK_CODE);
-               rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW,
-                                    &vcpu->arch.sie_block->gpsw,
-                                    sizeof(psw_t));
-               rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
-                                   &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+               rc = __deliver_mchk_floating(vcpu, inti);
                break;
-
        case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
-       {
-               __u32 param0 = ((__u32)inti->io.subchannel_id << 16) |
-                       inti->io.subchannel_nr;
-               __u64 param1 = ((__u64)inti->io.io_int_parm << 32) |
-                       inti->io.io_int_word;
-               VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type);
-               vcpu->stat.deliver_io_int++;
-               trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-                                                param0, param1);
-               rc  = put_guest_lc(vcpu, inti->io.subchannel_id,
-                                  (u16 *)__LC_SUBCHANNEL_ID);
-               rc |= put_guest_lc(vcpu, inti->io.subchannel_nr,
-                                  (u16 *)__LC_SUBCHANNEL_NR);
-               rc |= put_guest_lc(vcpu, inti->io.io_int_parm,
-                                  (u32 *)__LC_IO_INT_PARM);
-               rc |= put_guest_lc(vcpu, inti->io.io_int_word,
-                                  (u32 *)__LC_IO_INT_WORD);
-               rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW,
-                                    &vcpu->arch.sie_block->gpsw,
-                                    sizeof(psw_t));
-               rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW,
-                                   &vcpu->arch.sie_block->gpsw,
-                                   sizeof(psw_t));
+               rc = __deliver_io(vcpu, inti);
                break;
-       }
        default:
                BUG();
        }
@@ -509,19 +738,6 @@ static int __must_check __do_deliver_interrupt(struct kvm_vcpu *vcpu,
        return rc;
 }
 
-static int __must_check deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
-{
-       int rc;
-
-       rc  = put_guest_lc(vcpu, 0x1004, (u16 __user *)__LC_EXT_INT_CODE);
-       rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-                            &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-       rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-                           &vcpu->arch.sie_block->gpsw,
-                           sizeof(psw_t));
-       return rc;
-}
-
 /* Check whether SIGP interpretation facility has an external call pending */
 int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu)
 {
@@ -538,20 +754,11 @@ int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu)
 
 int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
 {
-       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
        struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
        struct kvm_s390_interrupt_info  *inti;
-       int rc = 0;
+       int rc;
 
-       if (atomic_read(&li->active)) {
-               spin_lock(&li->lock);
-               list_for_each_entry(inti, &li->list, list)
-                       if (__interrupt_is_deliverable(vcpu, inti)) {
-                               rc = 1;
-                               break;
-                       }
-               spin_unlock(&li->lock);
-       }
+       rc = !!deliverable_local_irqs(vcpu);
 
        if ((!rc) && atomic_read(&fi->active)) {
                spin_lock(&fi->lock);
@@ -643,18 +850,15 @@ enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer)
 void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu)
 {
        struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
-       struct kvm_s390_interrupt_info  *n, *inti = NULL;
 
        spin_lock(&li->lock);
-       list_for_each_entry_safe(inti, n, &li->list, list) {
-               list_del(&inti->list);
-               kfree(inti);
-       }
-       atomic_set(&li->active, 0);
+       li->pending_irqs = 0;
+       bitmap_zero(li->sigp_emerg_pending, KVM_MAX_VCPUS);
+       memset(&li->irq, 0, sizeof(li->irq));
        spin_unlock(&li->lock);
 
        /* clear pending external calls set by sigp interpretation facility */
-       atomic_clear_mask(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags);
+       atomic_clear_mask(CPUSTAT_ECALL_PEND, li->cpuflags);
        atomic_clear_mask(SIGP_CTRL_C,
                          &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].ctrl);
 }
@@ -664,34 +868,35 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
        struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
        struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
        struct kvm_s390_interrupt_info  *n, *inti = NULL;
+       deliver_irq_t func;
        int deliver;
        int rc = 0;
+       unsigned long irq_type;
+       unsigned long deliverable_irqs;
 
        __reset_intercept_indicators(vcpu);
-       if (atomic_read(&li->active)) {
-               do {
-                       deliver = 0;
-                       spin_lock(&li->lock);
-                       list_for_each_entry_safe(inti, n, &li->list, list) {
-                               if (__interrupt_is_deliverable(vcpu, inti)) {
-                                       list_del(&inti->list);
-                                       deliver = 1;
-                                       break;
-                               }
-                               __set_intercept_indicator(vcpu, inti);
-                       }
-                       if (list_empty(&li->list))
-                               atomic_set(&li->active, 0);
-                       spin_unlock(&li->lock);
-                       if (deliver) {
-                               rc = __do_deliver_interrupt(vcpu, inti);
-                               kfree(inti);
-                       }
-               } while (!rc && deliver);
-       }
 
-       if (!rc && kvm_cpu_has_pending_timer(vcpu))
-               rc = deliver_ckc_interrupt(vcpu);
+       /* pending ckc conditions might have been invalidated */
+       clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
+       if (kvm_cpu_has_pending_timer(vcpu))
+               set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
+
+       do {
+               deliverable_irqs = deliverable_local_irqs(vcpu);
+               /* bits are in the order of interrupt priority */
+               irq_type = find_first_bit(&deliverable_irqs, IRQ_PEND_COUNT);
+               if (irq_type == IRQ_PEND_COUNT)
+                       break;
+               func = deliver_irq_funcs[irq_type];
+               if (!func) {
+                       WARN_ON_ONCE(func == NULL);
+                       clear_bit(irq_type, &li->pending_irqs);
+                       continue;
+               }
+               rc = func(vcpu);
+       } while (!rc && irq_type != IRQ_PEND_COUNT);
+
+       set_intercept_indicators_local(vcpu);
 
        if (!rc && atomic_read(&fi->active)) {
                do {
@@ -710,7 +915,7 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
                                atomic_set(&fi->active, 0);
                        spin_unlock(&fi->lock);
                        if (deliver) {
-                               rc = __do_deliver_interrupt(vcpu, inti);
+                               rc = __deliver_floating_interrupt(vcpu, inti);
                                kfree(inti);
                        }
                } while (!rc && deliver);
@@ -719,23 +924,26 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
        return rc;
 }
 
-int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
+static int __inject_prog(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 {
        struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
-       struct kvm_s390_interrupt_info *inti;
 
-       inti = kzalloc(sizeof(*inti), GFP_KERNEL);
-       if (!inti)
-               return -ENOMEM;
+       li->irq.pgm = irq->u.pgm;
+       set_bit(IRQ_PEND_PROG, &li->pending_irqs);
+       return 0;
+}
 
-       inti->type = KVM_S390_PROGRAM_INT;
-       inti->pgm.code = code;
+int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
+{
+       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+       struct kvm_s390_irq irq;
 
        VCPU_EVENT(vcpu, 3, "inject: program check %d (from kernel)", code);
-       trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, inti->type, code, 0, 1);
+       trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, code,
+                                  0, 1);
        spin_lock(&li->lock);
-       list_add(&inti->list, &li->list);
-       atomic_set(&li->active, 1);
+       irq.u.pgm.code = code;
+       __inject_prog(vcpu, &irq);
        BUG_ON(waitqueue_active(li->wq));
        spin_unlock(&li->lock);
        return 0;
@@ -745,27 +953,166 @@ int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
                             struct kvm_s390_pgm_info *pgm_info)
 {
        struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
-       struct kvm_s390_interrupt_info *inti;
-
-       inti = kzalloc(sizeof(*inti), GFP_KERNEL);
-       if (!inti)
-               return -ENOMEM;
+       struct kvm_s390_irq irq;
+       int rc;
 
        VCPU_EVENT(vcpu, 3, "inject: prog irq %d (from kernel)",
                   pgm_info->code);
        trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
                                   pgm_info->code, 0, 1);
-
-       inti->type = KVM_S390_PROGRAM_INT;
-       memcpy(&inti->pgm, pgm_info, sizeof(inti->pgm));
        spin_lock(&li->lock);
-       list_add(&inti->list, &li->list);
-       atomic_set(&li->active, 1);
+       irq.u.pgm = *pgm_info;
+       rc = __inject_prog(vcpu, &irq);
        BUG_ON(waitqueue_active(li->wq));
        spin_unlock(&li->lock);
+       return rc;
+}
+
+static int __inject_pfault_init(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+       VCPU_EVENT(vcpu, 3, "inject: external irq params:%x, params2:%llx",
+                  irq->u.ext.ext_params, irq->u.ext.ext_params2);
+       trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_PFAULT_INIT,
+                                  irq->u.ext.ext_params,
+                                  irq->u.ext.ext_params2, 2);
+
+       li->irq.ext = irq->u.ext;
+       set_bit(IRQ_PEND_PFAULT_INIT, &li->pending_irqs);
+       atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
        return 0;
 }
 
+int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+       struct kvm_s390_extcall_info *extcall = &li->irq.extcall;
+
+       VCPU_EVENT(vcpu, 3, "inject: external call source-cpu:%u",
+                  irq->u.extcall.code);
+       trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EXTERNAL_CALL,
+                                  irq->u.extcall.code, 0, 2);
+
+       *extcall = irq->u.extcall;
+       set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs);
+       atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+       return 0;
+}
+
+static int __inject_set_prefix(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+       struct kvm_s390_prefix_info *prefix = &li->irq.prefix;
+
+       VCPU_EVENT(vcpu, 3, "inject: set prefix to %x (from user)",
+                  prefix->address);
+       trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_SET_PREFIX,
+                                  prefix->address, 0, 2);
+
+       *prefix = irq->u.prefix;
+       set_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs);
+       return 0;
+}
+
+static int __inject_sigp_stop(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+       trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_STOP, 0, 0, 2);
+
+       li->action_bits |= ACTION_STOP_ON_STOP;
+       set_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs);
+       return 0;
+}
+
+static int __inject_sigp_restart(struct kvm_vcpu *vcpu,
+                                struct kvm_s390_irq *irq)
+{
+       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+       VCPU_EVENT(vcpu, 3, "inject: restart type %llx", irq->type);
+       trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0, 2);
+
+       set_bit(IRQ_PEND_RESTART, &li->pending_irqs);
+       return 0;
+}
+
+static int __inject_sigp_emergency(struct kvm_vcpu *vcpu,
+                                  struct kvm_s390_irq *irq)
+{
+       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+       struct kvm_s390_emerg_info *emerg = &li->irq.emerg;
+
+       VCPU_EVENT(vcpu, 3, "inject: emergency %u\n",
+                  irq->u.emerg.code);
+       trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY,
+                                  emerg->code, 0, 2);
+
+       set_bit(emerg->code, li->sigp_emerg_pending);
+       set_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs);
+       atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+       return 0;
+}
+
+static int __inject_mchk(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+       struct kvm_s390_mchk_info *mchk = &li->irq.mchk;
+
+       VCPU_EVENT(vcpu, 5, "inject: machine check parm64:%llx",
+                  mchk->mcic);
+       trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_MCHK, 0,
+                                  mchk->mcic, 2);
+
+       /*
+        * Because repressible machine checks can be indicated along with
+        * exigent machine checks (PoP, Chapter 11, Interruption action)
+        * we need to combine cr14, mcic and external damage code.
+        * Failing storage address and the logout area should not be or'ed
+        * together, we just indicate the last occurrence of the corresponding
+        * machine check
+        */
+       mchk->cr14 |= irq->u.mchk.cr14;
+       mchk->mcic |= irq->u.mchk.mcic;
+       mchk->ext_damage_code |= irq->u.mchk.ext_damage_code;
+       mchk->failing_storage_address = irq->u.mchk.failing_storage_address;
+       memcpy(&mchk->fixed_logout, &irq->u.mchk.fixed_logout,
+              sizeof(mchk->fixed_logout));
+       if (mchk->mcic & MCHK_EX_MASK)
+               set_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs);
+       else if (mchk->mcic & MCHK_REP_MASK)
+               set_bit(IRQ_PEND_MCHK_REP,  &li->pending_irqs);
+       return 0;
+}
+
+static int __inject_ckc(struct kvm_vcpu *vcpu)
+{
+       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+       VCPU_EVENT(vcpu, 3, "inject: type %x", KVM_S390_INT_CLOCK_COMP);
+       trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CLOCK_COMP,
+                                  0, 0, 2);
+
+       set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
+       atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+       return 0;
+}
+
+static int __inject_cpu_timer(struct kvm_vcpu *vcpu)
+{
+       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+       VCPU_EVENT(vcpu, 3, "inject: type %x", KVM_S390_INT_CPU_TIMER);
+       trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CPU_TIMER,
+                                  0, 0, 2);
+
+       set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
+       atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+       return 0;
+}
+
+
 struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
                                                    u64 cr6, u64 schid)
 {
@@ -851,7 +1198,17 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
        dst_vcpu = kvm_get_vcpu(kvm, sigcpu);
        li = &dst_vcpu->arch.local_int;
        spin_lock(&li->lock);
-       atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+       switch (inti->type) {
+       case KVM_S390_MCHK:
+               atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
+               break;
+       case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+               atomic_set_mask(CPUSTAT_IO_INT, li->cpuflags);
+               break;
+       default:
+               atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+               break;
+       }
        spin_unlock(&li->lock);
        kvm_s390_vcpu_wakeup(kvm_get_vcpu(kvm, sigcpu));
 unlock_fi:
@@ -920,92 +1277,85 @@ void kvm_s390_reinject_io_int(struct kvm *kvm,
        __inject_vm(kvm, inti);
 }
 
-int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
-                        struct kvm_s390_interrupt *s390int)
+int s390int_to_s390irq(struct kvm_s390_interrupt *s390int,
+                      struct kvm_s390_irq *irq)
 {
-       struct kvm_s390_local_interrupt *li;
-       struct kvm_s390_interrupt_info *inti;
+       irq->type = s390int->type;
+       switch (irq->type) {
+       case KVM_S390_PROGRAM_INT:
+               if (s390int->parm & 0xffff0000)
+                       return -EINVAL;
+               irq->u.pgm.code = s390int->parm;
+               break;
+       case KVM_S390_SIGP_SET_PREFIX:
+               irq->u.prefix.address = s390int->parm;
+               break;
+       case KVM_S390_INT_EXTERNAL_CALL:
+               if (irq->u.extcall.code & 0xffff0000)
+                       return -EINVAL;
+               irq->u.extcall.code = s390int->parm;
+               break;
+       case KVM_S390_INT_EMERGENCY:
+               if (irq->u.emerg.code & 0xffff0000)
+                       return -EINVAL;
+               irq->u.emerg.code = s390int->parm;
+               break;
+       case KVM_S390_MCHK:
+               irq->u.mchk.mcic = s390int->parm64;
+               break;
+       }
+       return 0;
+}
 
-       inti = kzalloc(sizeof(*inti), GFP_KERNEL);
-       if (!inti)
-               return -ENOMEM;
+int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+       int rc;
 
-       switch (s390int->type) {
+       spin_lock(&li->lock);
+       switch (irq->type) {
        case KVM_S390_PROGRAM_INT:
-               if (s390int->parm & 0xffff0000) {
-                       kfree(inti);
-                       return -EINVAL;
-               }
-               inti->type = s390int->type;
-               inti->pgm.code = s390int->parm;
                VCPU_EVENT(vcpu, 3, "inject: program check %d (from user)",
-                          s390int->parm);
+                          irq->u.pgm.code);
+               rc = __inject_prog(vcpu, irq);
                break;
        case KVM_S390_SIGP_SET_PREFIX:
-               inti->prefix.address = s390int->parm;
-               inti->type = s390int->type;
-               VCPU_EVENT(vcpu, 3, "inject: set prefix to %x (from user)",
-                          s390int->parm);
+               rc = __inject_set_prefix(vcpu, irq);
                break;
        case KVM_S390_SIGP_STOP:
+               rc = __inject_sigp_stop(vcpu, irq);
+               break;
        case KVM_S390_RESTART:
+               rc = __inject_sigp_restart(vcpu, irq);
+               break;
        case KVM_S390_INT_CLOCK_COMP:
+               rc = __inject_ckc(vcpu);
+               break;
        case KVM_S390_INT_CPU_TIMER:
-               VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type);
-               inti->type = s390int->type;
+               rc = __inject_cpu_timer(vcpu);
                break;
        case KVM_S390_INT_EXTERNAL_CALL:
-               if (s390int->parm & 0xffff0000) {
-                       kfree(inti);
-                       return -EINVAL;
-               }
-               VCPU_EVENT(vcpu, 3, "inject: external call source-cpu:%u",
-                          s390int->parm);
-               inti->type = s390int->type;
-               inti->extcall.code = s390int->parm;
+               rc = __inject_extcall(vcpu, irq);
                break;
        case KVM_S390_INT_EMERGENCY:
-               if (s390int->parm & 0xffff0000) {
-                       kfree(inti);
-                       return -EINVAL;
-               }
-               VCPU_EVENT(vcpu, 3, "inject: emergency %u\n", s390int->parm);
-               inti->type = s390int->type;
-               inti->emerg.code = s390int->parm;
+               rc = __inject_sigp_emergency(vcpu, irq);
                break;
        case KVM_S390_MCHK:
-               VCPU_EVENT(vcpu, 5, "inject: machine check parm64:%llx",
-                          s390int->parm64);
-               inti->type = s390int->type;
-               inti->mchk.mcic = s390int->parm64;
+               rc = __inject_mchk(vcpu, irq);
                break;
        case KVM_S390_INT_PFAULT_INIT:
-               inti->type = s390int->type;
-               inti->ext.ext_params2 = s390int->parm64;
+               rc = __inject_pfault_init(vcpu, irq);
                break;
        case KVM_S390_INT_VIRTIO:
        case KVM_S390_INT_SERVICE:
        case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
        default:
-               kfree(inti);
-               return -EINVAL;
+               rc = -EINVAL;
        }
-       trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, s390int->type, s390int->parm,
-                                  s390int->parm64, 2);
-
-       li = &vcpu->arch.local_int;
-       spin_lock(&li->lock);
-       if (inti->type == KVM_S390_PROGRAM_INT)
-               list_add(&inti->list, &li->list);
-       else
-               list_add_tail(&inti->list, &li->list);
-       atomic_set(&li->active, 1);
-       if (inti->type == KVM_S390_SIGP_STOP)
-               li->action_bits |= ACTION_STOP_ON_STOP;
-       atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
        spin_unlock(&li->lock);
-       kvm_s390_vcpu_wakeup(vcpu);
-       return 0;
+       if (!rc)
+               kvm_s390_vcpu_wakeup(vcpu);
+       return rc;
 }
 
 void kvm_s390_clear_float_irqs(struct kvm *kvm)
index 6b049ee..3e09801 100644 (file)
@@ -81,10 +81,17 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
        { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
        { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
        { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
+       { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
+       { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
        { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
+       { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
+       { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
        { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
        { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
        { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
+       { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
+       { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
+       { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
        { "diagnose_10", VCPU_STAT(diagnose_10) },
        { "diagnose_44", VCPU_STAT(diagnose_44) },
        { "diagnose_9c", VCPU_STAT(diagnose_9c) },
@@ -453,6 +460,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
        spin_lock_init(&kvm->arch.float_int.lock);
        INIT_LIST_HEAD(&kvm->arch.float_int.list);
        init_waitqueue_head(&kvm->arch.ipte_wq);
+       mutex_init(&kvm->arch.ipte_mutex);
 
        debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
        VM_EVENT(kvm, 3, "%s", "vm created");
@@ -711,7 +719,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
        }
 
        spin_lock_init(&vcpu->arch.local_int.lock);
-       INIT_LIST_HEAD(&vcpu->arch.local_int.list);
        vcpu->arch.local_int.float_int = &kvm->arch.float_int;
        vcpu->arch.local_int.wq = &vcpu->wq;
        vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
@@ -1114,13 +1121,15 @@ static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
                                      unsigned long token)
 {
        struct kvm_s390_interrupt inti;
-       inti.parm64 = token;
+       struct kvm_s390_irq irq;
 
        if (start_token) {
-               inti.type = KVM_S390_INT_PFAULT_INIT;
-               WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &inti));
+               irq.u.ext.ext_params2 = token;
+               irq.type = KVM_S390_INT_PFAULT_INIT;
+               WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
        } else {
                inti.type = KVM_S390_INT_PFAULT_DONE;
+               inti.parm64 = token;
                WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
        }
 }
@@ -1614,11 +1623,14 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
        switch (ioctl) {
        case KVM_S390_INTERRUPT: {
                struct kvm_s390_interrupt s390int;
+               struct kvm_s390_irq s390irq;
 
                r = -EFAULT;
                if (copy_from_user(&s390int, argp, sizeof(s390int)))
                        break;
-               r = kvm_s390_inject_vcpu(vcpu, &s390int);
+               if (s390int_to_s390irq(&s390int, &s390irq))
+                       return -EINVAL;
+               r = kvm_s390_inject_vcpu(vcpu, &s390irq);
                break;
        }
        case KVM_S390_STORE_STATUS:
index 244d023..a8f3d9b 100644 (file)
@@ -24,8 +24,6 @@ typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
 /* declare vfacilities extern */
 extern unsigned long *vfacilities;
 
-int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
-
 /* Transactional Memory Execution related macros */
 #define IS_TE_ENABLED(vcpu)    ((vcpu->arch.sie_block->ecb & 0x10))
 #define TDB_FORMAT1            1
@@ -144,7 +142,7 @@ void kvm_s390_clear_float_irqs(struct kvm *kvm);
 int __must_check kvm_s390_inject_vm(struct kvm *kvm,
                                    struct kvm_s390_interrupt *s390int);
 int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
-                                     struct kvm_s390_interrupt *s390int);
+                                     struct kvm_s390_irq *irq);
 int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
 struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
                                                    u64 cr6, u64 schid);
@@ -152,6 +150,10 @@ void kvm_s390_reinject_io_int(struct kvm *kvm,
                              struct kvm_s390_interrupt_info *inti);
 int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked);
 
+/* implemented in intercept.c */
+void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc);
+int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
+
 /* implemented in priv.c */
 int is_valid_psw(psw_t *psw);
 int kvm_s390_handle_b2(struct kvm_vcpu *vcpu);
@@ -222,6 +224,9 @@ static inline int kvm_s390_inject_prog_cond(struct kvm_vcpu *vcpu, int rc)
        return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
 }
 
+int s390int_to_s390irq(struct kvm_s390_interrupt *s390int,
+                       struct kvm_s390_irq *s390irq);
+
 /* implemented in interrupt.c */
 int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
 int psw_extint_disabled(struct kvm_vcpu *vcpu);
index f47cb0c..1be578d 100644 (file)
@@ -180,21 +180,18 @@ static int handle_skey(struct kvm_vcpu *vcpu)
        if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
                return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-       vcpu->arch.sie_block->gpsw.addr =
-               __rewind_psw(vcpu->arch.sie_block->gpsw, 4);
+       kvm_s390_rewind_psw(vcpu, 4);
        VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation");
        return 0;
 }
 
 static int handle_ipte_interlock(struct kvm_vcpu *vcpu)
 {
-       psw_t *psw = &vcpu->arch.sie_block->gpsw;
-
        vcpu->stat.instruction_ipte_interlock++;
-       if (psw_bits(*psw).p)
+       if (psw_bits(vcpu->arch.sie_block->gpsw).p)
                return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
        wait_event(vcpu->kvm->arch.ipte_wq, !ipte_lock_held(vcpu));
-       psw->addr = __rewind_psw(*psw, 4);
+       kvm_s390_rewind_psw(vcpu, 4);
        VCPU_EVENT(vcpu, 4, "%s", "retrying ipte interlock operation");
        return 0;
 }
@@ -650,10 +647,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
        start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
-       if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
-               if (kvm_s390_check_low_addr_protection(vcpu, start))
-                       return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
-       }
+       start = kvm_s390_logical_to_effective(vcpu, start);
 
        switch (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) {
        case 0x00000000:
@@ -669,6 +663,12 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
        default:
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
        }
+
+       if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
+               if (kvm_s390_check_low_addr_protection(vcpu, start))
+                       return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
+       }
+
        while (start < end) {
                unsigned long useraddr, abs_addr;
 
@@ -725,8 +725,7 @@ static int handle_essa(struct kvm_vcpu *vcpu)
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
        /* Rewind PSW to repeat the ESSA instruction */
-       vcpu->arch.sie_block->gpsw.addr =
-               __rewind_psw(vcpu->arch.sie_block->gpsw, 4);
+       kvm_s390_rewind_psw(vcpu, 4);
        vcpu->arch.sie_block->cbrlo &= PAGE_MASK;       /* reset nceo */
        cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo);
        down_read(&gmap->mm->mmap_sem);
@@ -769,8 +768,8 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
 {
        int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
        int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
-       u32 val = 0;
-       int reg, rc;
+       int reg, rc, nr_regs;
+       u32 ctl_array[16];
        u64 ga;
 
        vcpu->stat.instruction_lctl++;
@@ -786,19 +785,20 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
        VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
        trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, ga);
 
+       nr_regs = ((reg3 - reg1) & 0xf) + 1;
+       rc = read_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u32));
+       if (rc)
+               return kvm_s390_inject_prog_cond(vcpu, rc);
        reg = reg1;
+       nr_regs = 0;
        do {
-               rc = read_guest(vcpu, ga, &val, sizeof(val));
-               if (rc)
-                       return kvm_s390_inject_prog_cond(vcpu, rc);
                vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul;
-               vcpu->arch.sie_block->gcr[reg] |= val;
-               ga += 4;
+               vcpu->arch.sie_block->gcr[reg] |= ctl_array[nr_regs++];
                if (reg == reg3)
                        break;
                reg = (reg + 1) % 16;
        } while (1);
-
+       kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
        return 0;
 }
 
@@ -806,9 +806,9 @@ int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu)
 {
        int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
        int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
+       int reg, rc, nr_regs;
+       u32 ctl_array[16];
        u64 ga;
-       u32 val;
-       int reg, rc;
 
        vcpu->stat.instruction_stctl++;
 
@@ -824,26 +824,24 @@ int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu)
        trace_kvm_s390_handle_stctl(vcpu, 0, reg1, reg3, ga);
 
        reg = reg1;
+       nr_regs = 0;
        do {
-               val = vcpu->arch.sie_block->gcr[reg] &  0x00000000fffffffful;
-               rc = write_guest(vcpu, ga, &val, sizeof(val));
-               if (rc)
-                       return kvm_s390_inject_prog_cond(vcpu, rc);
-               ga += 4;
+               ctl_array[nr_regs++] = vcpu->arch.sie_block->gcr[reg];
                if (reg == reg3)
                        break;
                reg = (reg + 1) % 16;
        } while (1);
-
-       return 0;
+       rc = write_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u32));
+       return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0;
 }
 
 static int handle_lctlg(struct kvm_vcpu *vcpu)
 {
        int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
        int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
-       u64 ga, val;
-       int reg, rc;
+       int reg, rc, nr_regs;
+       u64 ctl_array[16];
+       u64 ga;
 
        vcpu->stat.instruction_lctlg++;
 
@@ -855,22 +853,22 @@ static int handle_lctlg(struct kvm_vcpu *vcpu)
        if (ga & 7)
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-       reg = reg1;
-
        VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
        trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, ga);
 
+       nr_regs = ((reg3 - reg1) & 0xf) + 1;
+       rc = read_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u64));
+       if (rc)
+               return kvm_s390_inject_prog_cond(vcpu, rc);
+       reg = reg1;
+       nr_regs = 0;
        do {
-               rc = read_guest(vcpu, ga, &val, sizeof(val));
-               if (rc)
-                       return kvm_s390_inject_prog_cond(vcpu, rc);
-               vcpu->arch.sie_block->gcr[reg] = val;
-               ga += 8;
+               vcpu->arch.sie_block->gcr[reg] = ctl_array[nr_regs++];
                if (reg == reg3)
                        break;
                reg = (reg + 1) % 16;
        } while (1);
-
+       kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
        return 0;
 }
 
@@ -878,8 +876,9 @@ static int handle_stctg(struct kvm_vcpu *vcpu)
 {
        int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
        int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
-       u64 ga, val;
-       int reg, rc;
+       int reg, rc, nr_regs;
+       u64 ctl_array[16];
+       u64 ga;
 
        vcpu->stat.instruction_stctg++;
 
@@ -891,23 +890,19 @@ static int handle_stctg(struct kvm_vcpu *vcpu)
        if (ga & 7)
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-       reg = reg1;
-
        VCPU_EVENT(vcpu, 5, "stctg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
        trace_kvm_s390_handle_stctl(vcpu, 1, reg1, reg3, ga);
 
+       reg = reg1;
+       nr_regs = 0;
        do {
-               val = vcpu->arch.sie_block->gcr[reg];
-               rc = write_guest(vcpu, ga, &val, sizeof(val));
-               if (rc)
-                       return kvm_s390_inject_prog_cond(vcpu, rc);
-               ga += 8;
+               ctl_array[nr_regs++] = vcpu->arch.sie_block->gcr[reg];
                if (reg == reg3)
                        break;
                reg = (reg + 1) % 16;
        } while (1);
-
-       return 0;
+       rc = write_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u64));
+       return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0;
 }
 
 static const intercept_handler_t eb_handlers[256] = {
index cf243ba..6651f9f 100644 (file)
 #include "kvm-s390.h"
 #include "trace.h"
 
-static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr,
+static int __sigp_sense(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu,
                        u64 *reg)
 {
        struct kvm_s390_local_interrupt *li;
-       struct kvm_vcpu *dst_vcpu = NULL;
        int cpuflags;
        int rc;
 
-       if (cpu_addr >= KVM_MAX_VCPUS)
-               return SIGP_CC_NOT_OPERATIONAL;
-
-       dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
-       if (!dst_vcpu)
-               return SIGP_CC_NOT_OPERATIONAL;
        li = &dst_vcpu->arch.local_int;
 
        cpuflags = atomic_read(li->cpuflags);
@@ -48,55 +41,53 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr,
                rc = SIGP_CC_STATUS_STORED;
        }
 
-       VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", cpu_addr, rc);
+       VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", dst_vcpu->vcpu_id,
+                  rc);
        return rc;
 }
 
-static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
+static int __inject_sigp_emergency(struct kvm_vcpu *vcpu,
+                                   struct kvm_vcpu *dst_vcpu)
 {
-       struct kvm_s390_interrupt s390int = {
+       struct kvm_s390_irq irq = {
                .type = KVM_S390_INT_EMERGENCY,
-               .parm = vcpu->vcpu_id,
+               .u.emerg.code = vcpu->vcpu_id,
        };
-       struct kvm_vcpu *dst_vcpu = NULL;
        int rc = 0;
 
-       if (cpu_addr < KVM_MAX_VCPUS)
-               dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
-       if (!dst_vcpu)
-               return SIGP_CC_NOT_OPERATIONAL;
-
-       rc = kvm_s390_inject_vcpu(dst_vcpu, &s390int);
+       rc = kvm_s390_inject_vcpu(dst_vcpu, &irq);
        if (!rc)
-               VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr);
+               VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x",
+                          dst_vcpu->vcpu_id);
 
        return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED;
 }
 
-static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr,
+static int __sigp_emergency(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu)
+{
+       return __inject_sigp_emergency(vcpu, dst_vcpu);
+}
+
+static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu,
+                                       struct kvm_vcpu *dst_vcpu,
                                        u16 asn, u64 *reg)
 {
-       struct kvm_vcpu *dst_vcpu = NULL;
        const u64 psw_int_mask = PSW_MASK_IO | PSW_MASK_EXT;
        u16 p_asn, s_asn;
        psw_t *psw;
        u32 flags;
 
-       if (cpu_addr < KVM_MAX_VCPUS)
-               dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
-       if (!dst_vcpu)
-               return SIGP_CC_NOT_OPERATIONAL;
        flags = atomic_read(&dst_vcpu->arch.sie_block->cpuflags);
        psw = &dst_vcpu->arch.sie_block->gpsw;
        p_asn = dst_vcpu->arch.sie_block->gcr[4] & 0xffff;  /* Primary ASN */
        s_asn = dst_vcpu->arch.sie_block->gcr[3] & 0xffff;  /* Secondary ASN */
 
-       /* Deliver the emergency signal? */
+       /* Inject the emergency signal? */
        if (!(flags & CPUSTAT_STOPPED)
            || (psw->mask & psw_int_mask) != psw_int_mask
            || ((flags & CPUSTAT_WAIT) && psw->addr != 0)
            || (!(flags & CPUSTAT_WAIT) && (asn == p_asn || asn == s_asn))) {
-               return __sigp_emergency(vcpu, cpu_addr);
+               return __inject_sigp_emergency(vcpu, dst_vcpu);
        } else {
                *reg &= 0xffffffff00000000UL;
                *reg |= SIGP_STATUS_INCORRECT_STATE;
@@ -104,23 +95,19 @@ static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr,
        }
 }
 
-static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr)
+static int __sigp_external_call(struct kvm_vcpu *vcpu,
+                               struct kvm_vcpu *dst_vcpu)
 {
-       struct kvm_s390_interrupt s390int = {
+       struct kvm_s390_irq irq = {
                .type = KVM_S390_INT_EXTERNAL_CALL,
-               .parm = vcpu->vcpu_id,
+               .u.extcall.code = vcpu->vcpu_id,
        };
-       struct kvm_vcpu *dst_vcpu = NULL;
        int rc;
 
-       if (cpu_addr < KVM_MAX_VCPUS)
-               dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
-       if (!dst_vcpu)
-               return SIGP_CC_NOT_OPERATIONAL;
-
-       rc = kvm_s390_inject_vcpu(dst_vcpu, &s390int);
+       rc = kvm_s390_inject_vcpu(dst_vcpu, &irq);
        if (!rc)
-               VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", cpu_addr);
+               VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x",
+                          dst_vcpu->vcpu_id);
 
        return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED;
 }
@@ -128,29 +115,20 @@ static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr)
 static int __inject_sigp_stop(struct kvm_vcpu *dst_vcpu, int action)
 {
        struct kvm_s390_local_interrupt *li = &dst_vcpu->arch.local_int;
-       struct kvm_s390_interrupt_info *inti;
        int rc = SIGP_CC_ORDER_CODE_ACCEPTED;
 
-       inti = kzalloc(sizeof(*inti), GFP_ATOMIC);
-       if (!inti)
-               return -ENOMEM;
-       inti->type = KVM_S390_SIGP_STOP;
-
        spin_lock(&li->lock);
        if (li->action_bits & ACTION_STOP_ON_STOP) {
                /* another SIGP STOP is pending */
-               kfree(inti);
                rc = SIGP_CC_BUSY;
                goto out;
        }
        if ((atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) {
-               kfree(inti);
                if ((action & ACTION_STORE_ON_STOP) != 0)
                        rc = -ESHUTDOWN;
                goto out;
        }
-       list_add_tail(&inti->list, &li->list);
-       atomic_set(&li->active, 1);
+       set_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs);
        li->action_bits |= action;
        atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
        kvm_s390_vcpu_wakeup(dst_vcpu);
@@ -160,23 +138,27 @@ out:
        return rc;
 }
 
-static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action)
+static int __sigp_stop(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu)
 {
-       struct kvm_vcpu *dst_vcpu = NULL;
        int rc;
 
-       if (cpu_addr >= KVM_MAX_VCPUS)
-               return SIGP_CC_NOT_OPERATIONAL;
+       rc = __inject_sigp_stop(dst_vcpu, ACTION_STOP_ON_STOP);
+       VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", dst_vcpu->vcpu_id);
 
-       dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
-       if (!dst_vcpu)
-               return SIGP_CC_NOT_OPERATIONAL;
+       return rc;
+}
 
-       rc = __inject_sigp_stop(dst_vcpu, action);
+static int __sigp_stop_and_store_status(struct kvm_vcpu *vcpu,
+                                       struct kvm_vcpu *dst_vcpu, u64 *reg)
+{
+       int rc;
 
-       VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr);
+       rc = __inject_sigp_stop(dst_vcpu, ACTION_STOP_ON_STOP |
+                                             ACTION_STORE_ON_STOP);
+       VCPU_EVENT(vcpu, 4, "sent sigp stop and store status to cpu %x",
+                  dst_vcpu->vcpu_id);
 
-       if ((action & ACTION_STORE_ON_STOP) != 0 && rc == -ESHUTDOWN) {
+       if (rc == -ESHUTDOWN) {
                /* If the CPU has already been stopped, we still have
                 * to save the status when doing stop-and-store. This
                 * has to be done after unlocking all spinlocks. */
@@ -212,18 +194,12 @@ static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
        return rc;
 }
 
-static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
-                            u64 *reg)
+static int __sigp_set_prefix(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu,
+                            u32 address, u64 *reg)
 {
        struct kvm_s390_local_interrupt *li;
-       struct kvm_vcpu *dst_vcpu = NULL;
-       struct kvm_s390_interrupt_info *inti;
        int rc;
 
-       if (cpu_addr < KVM_MAX_VCPUS)
-               dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
-       if (!dst_vcpu)
-               return SIGP_CC_NOT_OPERATIONAL;
        li = &dst_vcpu->arch.local_int;
 
        /*
@@ -238,46 +214,34 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
                return SIGP_CC_STATUS_STORED;
        }
 
-       inti = kzalloc(sizeof(*inti), GFP_KERNEL);
-       if (!inti)
-               return SIGP_CC_BUSY;
-
        spin_lock(&li->lock);
        /* cpu must be in stopped state */
        if (!(atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) {
                *reg &= 0xffffffff00000000UL;
                *reg |= SIGP_STATUS_INCORRECT_STATE;
                rc = SIGP_CC_STATUS_STORED;
-               kfree(inti);
                goto out_li;
        }
 
-       inti->type = KVM_S390_SIGP_SET_PREFIX;
-       inti->prefix.address = address;
-
-       list_add_tail(&inti->list, &li->list);
-       atomic_set(&li->active, 1);
+       li->irq.prefix.address = address;
+       set_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs);
        kvm_s390_vcpu_wakeup(dst_vcpu);
        rc = SIGP_CC_ORDER_CODE_ACCEPTED;
 
-       VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", cpu_addr, address);
+       VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", dst_vcpu->vcpu_id,
+                  address);
 out_li:
        spin_unlock(&li->lock);
        return rc;
 }
 
-static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu, u16 cpu_id,
-                                       u32 addr, u64 *reg)
+static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu,
+                                      struct kvm_vcpu *dst_vcpu,
+                                      u32 addr, u64 *reg)
 {
-       struct kvm_vcpu *dst_vcpu = NULL;
        int flags;
        int rc;
 
-       if (cpu_id < KVM_MAX_VCPUS)
-               dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_id);
-       if (!dst_vcpu)
-               return SIGP_CC_NOT_OPERATIONAL;
-
        spin_lock(&dst_vcpu->arch.local_int.lock);
        flags = atomic_read(dst_vcpu->arch.local_int.cpuflags);
        spin_unlock(&dst_vcpu->arch.local_int.lock);
@@ -297,19 +261,12 @@ static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu, u16 cpu_id,
        return rc;
 }
 
-static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr,
-                               u64 *reg)
+static int __sigp_sense_running(struct kvm_vcpu *vcpu,
+                               struct kvm_vcpu *dst_vcpu, u64 *reg)
 {
        struct kvm_s390_local_interrupt *li;
-       struct kvm_vcpu *dst_vcpu = NULL;
        int rc;
 
-       if (cpu_addr >= KVM_MAX_VCPUS)
-               return SIGP_CC_NOT_OPERATIONAL;
-
-       dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
-       if (!dst_vcpu)
-               return SIGP_CC_NOT_OPERATIONAL;
        li = &dst_vcpu->arch.local_int;
        if (atomic_read(li->cpuflags) & CPUSTAT_RUNNING) {
                /* running */
@@ -321,26 +278,19 @@ static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr,
                rc = SIGP_CC_STATUS_STORED;
        }
 
-       VCPU_EVENT(vcpu, 4, "sensed running status of cpu %x rc %x", cpu_addr,
-                  rc);
+       VCPU_EVENT(vcpu, 4, "sensed running status of cpu %x rc %x",
+                  dst_vcpu->vcpu_id, rc);
 
        return rc;
 }
 
-/* Test whether the destination CPU is available and not busy */
-static int sigp_check_callable(struct kvm_vcpu *vcpu, u16 cpu_addr)
+static int __prepare_sigp_re_start(struct kvm_vcpu *vcpu,
+                                  struct kvm_vcpu *dst_vcpu, u8 order_code)
 {
-       struct kvm_s390_local_interrupt *li;
-       int rc = SIGP_CC_ORDER_CODE_ACCEPTED;
-       struct kvm_vcpu *dst_vcpu = NULL;
-
-       if (cpu_addr >= KVM_MAX_VCPUS)
-               return SIGP_CC_NOT_OPERATIONAL;
+       struct kvm_s390_local_interrupt *li = &dst_vcpu->arch.local_int;
+       /* handle (RE)START in user space */
+       int rc = -EOPNOTSUPP;
 
-       dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
-       if (!dst_vcpu)
-               return SIGP_CC_NOT_OPERATIONAL;
-       li = &dst_vcpu->arch.local_int;
        spin_lock(&li->lock);
        if (li->action_bits & ACTION_STOP_ON_STOP)
                rc = SIGP_CC_BUSY;
@@ -349,90 +299,131 @@ static int sigp_check_callable(struct kvm_vcpu *vcpu, u16 cpu_addr)
        return rc;
 }
 
-int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
+static int __prepare_sigp_cpu_reset(struct kvm_vcpu *vcpu,
+                                   struct kvm_vcpu *dst_vcpu, u8 order_code)
 {
-       int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
-       int r3 = vcpu->arch.sie_block->ipa & 0x000f;
-       u32 parameter;
-       u16 cpu_addr = vcpu->run->s.regs.gprs[r3];
-       u8 order_code;
-       int rc;
+       /* handle (INITIAL) CPU RESET in user space */
+       return -EOPNOTSUPP;
+}
 
-       /* sigp in userspace can exit */
-       if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
-               return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+static int __prepare_sigp_unknown(struct kvm_vcpu *vcpu,
+                                 struct kvm_vcpu *dst_vcpu)
+{
+       /* handle unknown orders in user space */
+       return -EOPNOTSUPP;
+}
 
-       order_code = kvm_s390_get_base_disp_rs(vcpu);
+static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 order_code,
+                          u16 cpu_addr, u32 parameter, u64 *status_reg)
+{
+       int rc;
+       struct kvm_vcpu *dst_vcpu;
 
-       if (r1 % 2)
-               parameter = vcpu->run->s.regs.gprs[r1];
-       else
-               parameter = vcpu->run->s.regs.gprs[r1 + 1];
+       if (cpu_addr >= KVM_MAX_VCPUS)
+               return SIGP_CC_NOT_OPERATIONAL;
+
+       dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
+       if (!dst_vcpu)
+               return SIGP_CC_NOT_OPERATIONAL;
 
-       trace_kvm_s390_handle_sigp(vcpu, order_code, cpu_addr, parameter);
        switch (order_code) {
        case SIGP_SENSE:
                vcpu->stat.instruction_sigp_sense++;
-               rc = __sigp_sense(vcpu, cpu_addr,
-                                 &vcpu->run->s.regs.gprs[r1]);
+               rc = __sigp_sense(vcpu, dst_vcpu, status_reg);
                break;
        case SIGP_EXTERNAL_CALL:
                vcpu->stat.instruction_sigp_external_call++;
-               rc = __sigp_external_call(vcpu, cpu_addr);
+               rc = __sigp_external_call(vcpu, dst_vcpu);
                break;
        case SIGP_EMERGENCY_SIGNAL:
                vcpu->stat.instruction_sigp_emergency++;
-               rc = __sigp_emergency(vcpu, cpu_addr);
+               rc = __sigp_emergency(vcpu, dst_vcpu);
                break;
        case SIGP_STOP:
                vcpu->stat.instruction_sigp_stop++;
-               rc = __sigp_stop(vcpu, cpu_addr, ACTION_STOP_ON_STOP);
+               rc = __sigp_stop(vcpu, dst_vcpu);
                break;
        case SIGP_STOP_AND_STORE_STATUS:
-               vcpu->stat.instruction_sigp_stop++;
-               rc = __sigp_stop(vcpu, cpu_addr, ACTION_STORE_ON_STOP |
-                                                ACTION_STOP_ON_STOP);
+               vcpu->stat.instruction_sigp_stop_store_status++;
+               rc = __sigp_stop_and_store_status(vcpu, dst_vcpu, status_reg);
                break;
        case SIGP_STORE_STATUS_AT_ADDRESS:
-               rc = __sigp_store_status_at_addr(vcpu, cpu_addr, parameter,
-                                                &vcpu->run->s.regs.gprs[r1]);
-               break;
-       case SIGP_SET_ARCHITECTURE:
-               vcpu->stat.instruction_sigp_arch++;
-               rc = __sigp_set_arch(vcpu, parameter);
+               vcpu->stat.instruction_sigp_store_status++;
+               rc = __sigp_store_status_at_addr(vcpu, dst_vcpu, parameter,
+                                                status_reg);
                break;
        case SIGP_SET_PREFIX:
                vcpu->stat.instruction_sigp_prefix++;
-               rc = __sigp_set_prefix(vcpu, cpu_addr, parameter,
-                                      &vcpu->run->s.regs.gprs[r1]);
+               rc = __sigp_set_prefix(vcpu, dst_vcpu, parameter, status_reg);
                break;
        case SIGP_COND_EMERGENCY_SIGNAL:
-               rc = __sigp_conditional_emergency(vcpu, cpu_addr, parameter,
-                                                 &vcpu->run->s.regs.gprs[r1]);
+               vcpu->stat.instruction_sigp_cond_emergency++;
+               rc = __sigp_conditional_emergency(vcpu, dst_vcpu, parameter,
+                                                 status_reg);
                break;
        case SIGP_SENSE_RUNNING:
                vcpu->stat.instruction_sigp_sense_running++;
-               rc = __sigp_sense_running(vcpu, cpu_addr,
-                                         &vcpu->run->s.regs.gprs[r1]);
+               rc = __sigp_sense_running(vcpu, dst_vcpu, status_reg);
                break;
        case SIGP_START:
-               rc = sigp_check_callable(vcpu, cpu_addr);
-               if (rc == SIGP_CC_ORDER_CODE_ACCEPTED)
-                       rc = -EOPNOTSUPP;    /* Handle START in user space */
+               vcpu->stat.instruction_sigp_start++;
+               rc = __prepare_sigp_re_start(vcpu, dst_vcpu, order_code);
                break;
        case SIGP_RESTART:
                vcpu->stat.instruction_sigp_restart++;
-               rc = sigp_check_callable(vcpu, cpu_addr);
-               if (rc == SIGP_CC_ORDER_CODE_ACCEPTED) {
-                       VCPU_EVENT(vcpu, 4,
-                                  "sigp restart %x to handle userspace",
-                                  cpu_addr);
-                       /* user space must know about restart */
-                       rc = -EOPNOTSUPP;
-               }
+               rc = __prepare_sigp_re_start(vcpu, dst_vcpu, order_code);
+               break;
+       case SIGP_INITIAL_CPU_RESET:
+               vcpu->stat.instruction_sigp_init_cpu_reset++;
+               rc = __prepare_sigp_cpu_reset(vcpu, dst_vcpu, order_code);
+               break;
+       case SIGP_CPU_RESET:
+               vcpu->stat.instruction_sigp_cpu_reset++;
+               rc = __prepare_sigp_cpu_reset(vcpu, dst_vcpu, order_code);
+               break;
+       default:
+               vcpu->stat.instruction_sigp_unknown++;
+               rc = __prepare_sigp_unknown(vcpu, dst_vcpu);
+       }
+
+       if (rc == -EOPNOTSUPP)
+               VCPU_EVENT(vcpu, 4,
+                          "sigp order %u -> cpu %x: handled in user space",
+                          order_code, dst_vcpu->vcpu_id);
+
+       return rc;
+}
+
+int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
+{
+       int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+       int r3 = vcpu->arch.sie_block->ipa & 0x000f;
+       u32 parameter;
+       u16 cpu_addr = vcpu->run->s.regs.gprs[r3];
+       u8 order_code;
+       int rc;
+
+       /* sigp in userspace can exit */
+       if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+               return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+       order_code = kvm_s390_get_base_disp_rs(vcpu);
+
+       if (r1 % 2)
+               parameter = vcpu->run->s.regs.gprs[r1];
+       else
+               parameter = vcpu->run->s.regs.gprs[r1 + 1];
+
+       trace_kvm_s390_handle_sigp(vcpu, order_code, cpu_addr, parameter);
+       switch (order_code) {
+       case SIGP_SET_ARCHITECTURE:
+               vcpu->stat.instruction_sigp_arch++;
+               rc = __sigp_set_arch(vcpu, parameter);
                break;
        default:
-               return -EOPNOTSUPP;
+               rc = handle_sigp_dst(vcpu, order_code, cpu_addr,
+                                    parameter,
+                                    &vcpu->run->s.regs.gprs[r1]);
        }
 
        if (rc < 0)
index 71c7eff..be99357 100644 (file)
@@ -844,7 +844,7 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
 
        down_read(&mm->mmap_sem);
 retry:
-       ptep = get_locked_pte(current->mm, addr, &ptl);
+       ptep = get_locked_pte(mm, addr, &ptl);
        if (unlikely(!ptep)) {
                up_read(&mm->mmap_sem);
                return -EFAULT;
@@ -888,6 +888,45 @@ retry:
 }
 EXPORT_SYMBOL(set_guest_storage_key);
 
+unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr)
+{
+       spinlock_t *ptl;
+       pgste_t pgste;
+       pte_t *ptep;
+       uint64_t physaddr;
+       unsigned long key = 0;
+
+       down_read(&mm->mmap_sem);
+       ptep = get_locked_pte(mm, addr, &ptl);
+       if (unlikely(!ptep)) {
+               up_read(&mm->mmap_sem);
+               return -EFAULT;
+       }
+       pgste = pgste_get_lock(ptep);
+
+       if (pte_val(*ptep) & _PAGE_INVALID) {
+               key |= (pgste_val(pgste) & PGSTE_ACC_BITS) >> 56;
+               key |= (pgste_val(pgste) & PGSTE_FP_BIT) >> 56;
+               key |= (pgste_val(pgste) & PGSTE_GR_BIT) >> 48;
+               key |= (pgste_val(pgste) & PGSTE_GC_BIT) >> 48;
+       } else {
+               physaddr = pte_val(*ptep) & PAGE_MASK;
+               key = page_get_storage_key(physaddr);
+
+               /* Reflect guest's logical view, not physical */
+               if (pgste_val(pgste) & PGSTE_GR_BIT)
+                       key |= _PAGE_REFERENCED;
+               if (pgste_val(pgste) & PGSTE_GC_BIT)
+                       key |= _PAGE_CHANGED;
+       }
+
+       pgste_set_unlock(ptep, pgste);
+       pte_unmap_unlock(ptep, ptl);
+       up_read(&mm->mmap_sem);
+       return key;
+}
+EXPORT_SYMBOL(get_guest_storage_key);
+
 #else /* CONFIG_PGSTE */
 
 static inline int page_table_with_pgste(struct page *page)
index 6ed0c30..d89c6b8 100644 (file)
@@ -33,7 +33,7 @@
 
 #define KVM_MAX_VCPUS 255
 #define KVM_SOFT_MAX_VCPUS 160
-#define KVM_USER_MEM_SLOTS 125
+#define KVM_USER_MEM_SLOTS 509
 /* memory slots that are not exposed to userspace */
 #define KVM_PRIVATE_MEM_SLOTS 3
 #define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS)
@@ -51,6 +51,7 @@
                          | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
 
 #define CR3_L_MODE_RESERVED_BITS 0xFFFFFF0000000000ULL
+#define CR3_PCID_INVD           (1UL << 63)
 #define CR4_RESERVED_BITS                                               \
        (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
                          | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE     \
@@ -361,6 +362,7 @@ struct kvm_vcpu_arch {
        int mp_state;
        u64 ia32_misc_enable_msr;
        bool tpr_access_reporting;
+       u64 ia32_xss;
 
        /*
         * Paging state of the vcpu
@@ -542,7 +544,7 @@ struct kvm_apic_map {
        struct rcu_head rcu;
        u8 ldr_bits;
        /* fields bellow are used to decode ldr values in different modes */
-       u32 cid_shift, cid_mask, lid_mask;
+       u32 cid_shift, cid_mask, lid_mask, broadcast;
        struct kvm_lapic *phys_map[256];
        /* first index is cluster id second is cpu id in a cluster */
        struct kvm_lapic *logical_map[16][16];
@@ -602,6 +604,9 @@ struct kvm_arch {
 
        struct kvm_xen_hvm_config xen_hvm_config;
 
+       /* reads protected by irq_srcu, writes by irq_lock */
+       struct hlist_head mask_notifier_list;
+
        /* fields used by HYPER-V emulation */
        u64 hv_guest_os_id;
        u64 hv_hypercall;
@@ -659,6 +664,16 @@ struct msr_data {
        u64 data;
 };
 
+struct kvm_lapic_irq {
+       u32 vector;
+       u32 delivery_mode;
+       u32 dest_mode;
+       u32 level;
+       u32 trig_mode;
+       u32 shorthand;
+       u32 dest_id;
+};
+
 struct kvm_x86_ops {
        int (*cpu_has_kvm_support)(void);          /* __init */
        int (*disabled_by_bios)(void);             /* __init */
@@ -767,6 +782,7 @@ struct kvm_x86_ops {
                               enum x86_intercept_stage stage);
        void (*handle_external_intr)(struct kvm_vcpu *vcpu);
        bool (*mpx_supported)(void);
+       bool (*xsaves_supported)(void);
 
        int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
 
@@ -818,6 +834,19 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
                          const void *val, int bytes);
 u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
 
+struct kvm_irq_mask_notifier {
+       void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked);
+       int irq;
+       struct hlist_node link;
+};
+
+void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
+                                   struct kvm_irq_mask_notifier *kimn);
+void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
+                                     struct kvm_irq_mask_notifier *kimn);
+void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
+                            bool mask);
+
 extern bool tdp_enabled;
 
 u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
@@ -863,7 +892,7 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
 
 void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
 int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg);
-void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector);
+void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
 
 int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
                    int reason, bool has_error_code, u32 error_code);
@@ -895,6 +924,7 @@ int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
                            gfn_t gfn, void *data, int offset, int len,
                            u32 access);
 bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl);
+bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr);
 
 static inline int __kvm_irq_line_state(unsigned long *irq_state,
                                       int irq_source_id, int level)
@@ -1066,6 +1096,7 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
 void kvm_define_shared_msr(unsigned index, u32 msr);
 int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
 
+unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu);
 bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
 
 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
index bcbfade..45afaee 100644 (file)
@@ -69,6 +69,7 @@
 #define SECONDARY_EXEC_PAUSE_LOOP_EXITING      0x00000400
 #define SECONDARY_EXEC_ENABLE_INVPCID          0x00001000
 #define SECONDARY_EXEC_SHADOW_VMCS              0x00004000
+#define SECONDARY_EXEC_XSAVES                  0x00100000
 
 
 #define PIN_BASED_EXT_INTR_MASK                 0x00000001
@@ -159,6 +160,8 @@ enum vmcs_field {
        EOI_EXIT_BITMAP3_HIGH           = 0x00002023,
        VMREAD_BITMAP                   = 0x00002026,
        VMWRITE_BITMAP                  = 0x00002028,
+       XSS_EXIT_BITMAP                 = 0x0000202C,
+       XSS_EXIT_BITMAP_HIGH            = 0x0000202D,
        GUEST_PHYSICAL_ADDRESS          = 0x00002400,
        GUEST_PHYSICAL_ADDRESS_HIGH     = 0x00002401,
        VMCS_LINK_POINTER               = 0x00002800,
index 7e7a79a..5fa9770 100644 (file)
@@ -16,6 +16,7 @@
 #define XSTATE_Hi16_ZMM                0x80
 
 #define XSTATE_FPSSE   (XSTATE_FP | XSTATE_SSE)
+#define XSTATE_AVX512  (XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM)
 /* Bit 63 of XCR0 is reserved for future expansion */
 #define XSTATE_EXTEND_MASK     (~(XSTATE_FPSSE | (1ULL << 63)))
 
index 990a2fe..b813bf9 100644 (file)
@@ -72,6 +72,8 @@
 #define EXIT_REASON_XSETBV              55
 #define EXIT_REASON_APIC_WRITE          56
 #define EXIT_REASON_INVPCID             58
+#define EXIT_REASON_XSAVES              63
+#define EXIT_REASON_XRSTORS             64
 
 #define VMX_EXIT_REASONS \
        { EXIT_REASON_EXCEPTION_NMI,         "EXCEPTION_NMI" }, \
        { EXIT_REASON_INVALID_STATE,         "INVALID_STATE" }, \
        { EXIT_REASON_INVD,                  "INVD" }, \
        { EXIT_REASON_INVVPID,               "INVVPID" }, \
-       { EXIT_REASON_INVPCID,               "INVPCID" }
+       { EXIT_REASON_INVPCID,               "INVPCID" }, \
+       { EXIT_REASON_XSAVES,                "XSAVES" }, \
+       { EXIT_REASON_XRSTORS,               "XRSTORS" }
 
 #endif /* _UAPIVMX_H */
index f6945be..94f6434 100644 (file)
@@ -283,7 +283,14 @@ NOKPROBE_SYMBOL(do_async_page_fault);
 static void __init paravirt_ops_setup(void)
 {
        pv_info.name = "KVM";
-       pv_info.paravirt_enabled = 1;
+
+       /*
+        * KVM isn't paravirt in the sense of paravirt_enabled.  A KVM
+        * guest kernel works like a bare metal kernel with additional
+        * features, and paravirt_enabled is about features that are
+        * missing.
+        */
+       pv_info.paravirt_enabled = 0;
 
        if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY))
                pv_cpu_ops.io_delay = kvm_io_delay;
index d9156ce..42caaef 100644 (file)
@@ -59,13 +59,12 @@ static void kvm_get_wallclock(struct timespec *now)
 
        native_write_msr(msr_kvm_wall_clock, low, high);
 
-       preempt_disable();
-       cpu = smp_processor_id();
+       cpu = get_cpu();
 
        vcpu_time = &hv_clock[cpu].pvti;
        pvclock_read_wallclock(&wall_clock, vcpu_time, now);
 
-       preempt_enable();
+       put_cpu();
 }
 
 static int kvm_set_wallclock(const struct timespec *now)
@@ -107,11 +106,10 @@ static unsigned long kvm_get_tsc_khz(void)
        int cpu;
        unsigned long tsc_khz;
 
-       preempt_disable();
-       cpu = smp_processor_id();
+       cpu = get_cpu();
        src = &hv_clock[cpu].pvti;
        tsc_khz = pvclock_tsc_khz(src);
-       preempt_enable();
+       put_cpu();
        return tsc_khz;
 }
 
@@ -263,7 +261,6 @@ void __init kvmclock_init(void)
 #endif
        kvm_get_preset_lpj();
        clocksource_register_hz(&kvm_clock, NSEC_PER_SEC);
-       pv_info.paravirt_enabled = 1;
        pv_info.name = "KVM";
 
        if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT))
@@ -284,23 +281,22 @@ int __init kvm_setup_vsyscall_timeinfo(void)
 
        size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS);
 
-       preempt_disable();
-       cpu = smp_processor_id();
+       cpu = get_cpu();
 
        vcpu_time = &hv_clock[cpu].pvti;
        flags = pvclock_read_flags(vcpu_time);
 
        if (!(flags & PVCLOCK_TSC_STABLE_BIT)) {
-               preempt_enable();
+               put_cpu();
                return 1;
        }
 
        if ((ret = pvclock_init_vsyscall(hv_clock, size))) {
-               preempt_enable();
+               put_cpu();
                return ret;
        }
 
-       preempt_enable();
+       put_cpu();
 
        kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK;
 #endif
index 4c540c4..0de1fae 100644 (file)
@@ -738,3 +738,4 @@ void *get_xsave_addr(struct xsave_struct *xsave, int xstate)
 
        return (void *)xsave + xstate_comp_offsets[feature];
 }
+EXPORT_SYMBOL_GPL(get_xsave_addr);
index 25d22b2..08f790d 100644 (file)
@@ -7,14 +7,13 @@ CFLAGS_vmx.o := -I.
 
 KVM := ../../../virt/kvm
 
-kvm-y                  += $(KVM)/kvm_main.o $(KVM)/ioapic.o \
-                               $(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o \
+kvm-y                  += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
                                $(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o
-kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT)    += $(KVM)/assigned-dev.o $(KVM)/iommu.o
 kvm-$(CONFIG_KVM_ASYNC_PF)     += $(KVM)/async_pf.o
 
 kvm-y                  += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
-                          i8254.o cpuid.o pmu.o
+                          i8254.o ioapic.o irq_comm.o cpuid.o pmu.o
+kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT)    += assigned-dev.o iommu.o
 kvm-intel-y            += vmx.o
 kvm-amd-y              += svm.o
 
diff --git a/arch/x86/kvm/assigned-dev.c b/arch/x86/kvm/assigned-dev.c
new file mode 100644 (file)
index 0000000..6eb5c20
--- /dev/null
@@ -0,0 +1,1052 @@
+/*
+ * Kernel-based Virtual Machine - device assignment support
+ *
+ * Copyright (C) 2010 Red Hat, Inc. and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/kvm.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+#include <linux/errno.h>
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/namei.h>
+#include <linux/fs.h>
+#include "irq.h"
+#include "assigned-dev.h"
+
+struct kvm_assigned_dev_kernel {
+       struct kvm_irq_ack_notifier ack_notifier;
+       struct list_head list;
+       int assigned_dev_id;
+       int host_segnr;
+       int host_busnr;
+       int host_devfn;
+       unsigned int entries_nr;
+       int host_irq;
+       bool host_irq_disabled;
+       bool pci_2_3;
+       struct msix_entry *host_msix_entries;
+       int guest_irq;
+       struct msix_entry *guest_msix_entries;
+       unsigned long irq_requested_type;
+       int irq_source_id;
+       int flags;
+       struct pci_dev *dev;
+       struct kvm *kvm;
+       spinlock_t intx_lock;
+       spinlock_t intx_mask_lock;
+       char irq_name[32];
+       struct pci_saved_state *pci_saved_state;
+};
+
+static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
+                                                     int assigned_dev_id)
+{
+       struct list_head *ptr;
+       struct kvm_assigned_dev_kernel *match;
+
+       list_for_each(ptr, head) {
+               match = list_entry(ptr, struct kvm_assigned_dev_kernel, list);
+               if (match->assigned_dev_id == assigned_dev_id)
+                       return match;
+       }
+       return NULL;
+}
+
+static int find_index_from_host_irq(struct kvm_assigned_dev_kernel
+                                   *assigned_dev, int irq)
+{
+       int i, index;
+       struct msix_entry *host_msix_entries;
+
+       host_msix_entries = assigned_dev->host_msix_entries;
+
+       index = -1;
+       for (i = 0; i < assigned_dev->entries_nr; i++)
+               if (irq == host_msix_entries[i].vector) {
+                       index = i;
+                       break;
+               }
+       if (index < 0)
+               printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n");
+
+       return index;
+}
+
+static irqreturn_t kvm_assigned_dev_intx(int irq, void *dev_id)
+{
+       struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
+       int ret;
+
+       spin_lock(&assigned_dev->intx_lock);
+       if (pci_check_and_mask_intx(assigned_dev->dev)) {
+               assigned_dev->host_irq_disabled = true;
+               ret = IRQ_WAKE_THREAD;
+       } else
+               ret = IRQ_NONE;
+       spin_unlock(&assigned_dev->intx_lock);
+
+       return ret;
+}
+
+static void
+kvm_assigned_dev_raise_guest_irq(struct kvm_assigned_dev_kernel *assigned_dev,
+                                int vector)
+{
+       if (unlikely(assigned_dev->irq_requested_type &
+                    KVM_DEV_IRQ_GUEST_INTX)) {
+               spin_lock(&assigned_dev->intx_mask_lock);
+               if (!(assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX))
+                       kvm_set_irq(assigned_dev->kvm,
+                                   assigned_dev->irq_source_id, vector, 1,
+                                   false);
+               spin_unlock(&assigned_dev->intx_mask_lock);
+       } else
+               kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
+                           vector, 1, false);
+}
+
+static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id)
+{
+       struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
+
+       if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
+               spin_lock_irq(&assigned_dev->intx_lock);
+               disable_irq_nosync(irq);
+               assigned_dev->host_irq_disabled = true;
+               spin_unlock_irq(&assigned_dev->intx_lock);
+       }
+
+       kvm_assigned_dev_raise_guest_irq(assigned_dev,
+                                        assigned_dev->guest_irq);
+
+       return IRQ_HANDLED;
+}
+
+#ifdef __KVM_HAVE_MSI
+static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id)
+{
+       struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
+       int ret = kvm_set_irq_inatomic(assigned_dev->kvm,
+                                      assigned_dev->irq_source_id,
+                                      assigned_dev->guest_irq, 1);
+       return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED;
+}
+
+static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id)
+{
+       struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
+
+       kvm_assigned_dev_raise_guest_irq(assigned_dev,
+                                        assigned_dev->guest_irq);
+
+       return IRQ_HANDLED;
+}
+#endif
+
+#ifdef __KVM_HAVE_MSIX
+static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id)
+{
+       struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
+       int index = find_index_from_host_irq(assigned_dev, irq);
+       u32 vector;
+       int ret = 0;
+
+       if (index >= 0) {
+               vector = assigned_dev->guest_msix_entries[index].vector;
+               ret = kvm_set_irq_inatomic(assigned_dev->kvm,
+                                          assigned_dev->irq_source_id,
+                                          vector, 1);
+       }
+
+       return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED;
+}
+
+static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id)
+{
+       struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
+       int index = find_index_from_host_irq(assigned_dev, irq);
+       u32 vector;
+
+       if (index >= 0) {
+               vector = assigned_dev->guest_msix_entries[index].vector;
+               kvm_assigned_dev_raise_guest_irq(assigned_dev, vector);
+       }
+
+       return IRQ_HANDLED;
+}
+#endif
+
+/* Ack the irq line for an assigned device */
+static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
+{
+       struct kvm_assigned_dev_kernel *dev =
+               container_of(kian, struct kvm_assigned_dev_kernel,
+                            ack_notifier);
+
+       kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0, false);
+
+       spin_lock(&dev->intx_mask_lock);
+
+       if (!(dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) {
+               bool reassert = false;
+
+               spin_lock_irq(&dev->intx_lock);
+               /*
+                * The guest IRQ may be shared so this ack can come from an
+                * IRQ for another guest device.
+                */
+               if (dev->host_irq_disabled) {
+                       if (!(dev->flags & KVM_DEV_ASSIGN_PCI_2_3))
+                               enable_irq(dev->host_irq);
+                       else if (!pci_check_and_unmask_intx(dev->dev))
+                               reassert = true;
+                       dev->host_irq_disabled = reassert;
+               }
+               spin_unlock_irq(&dev->intx_lock);
+
+               if (reassert)
+                       kvm_set_irq(dev->kvm, dev->irq_source_id,
+                                   dev->guest_irq, 1, false);
+       }
+
+       spin_unlock(&dev->intx_mask_lock);
+}
+
+static void deassign_guest_irq(struct kvm *kvm,
+                              struct kvm_assigned_dev_kernel *assigned_dev)
+{
+       if (assigned_dev->ack_notifier.gsi != -1)
+               kvm_unregister_irq_ack_notifier(kvm,
+                                               &assigned_dev->ack_notifier);
+
+       kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
+                   assigned_dev->guest_irq, 0, false);
+
+       if (assigned_dev->irq_source_id != -1)
+               kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
+       assigned_dev->irq_source_id = -1;
+       assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK);
+}
+
+/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */
+static void deassign_host_irq(struct kvm *kvm,
+                             struct kvm_assigned_dev_kernel *assigned_dev)
+{
+       /*
+        * We disable irq here to prevent further events.
+        *
+        * Notice this maybe result in nested disable if the interrupt type is
+        * INTx, but it's OK for we are going to free it.
+        *
+        * If this function is a part of VM destroy, please ensure that till
+        * now, the kvm state is still legal for probably we also have to wait
+        * on a currently running IRQ handler.
+        */
+       if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
+               int i;
+               for (i = 0; i < assigned_dev->entries_nr; i++)
+                       disable_irq(assigned_dev->host_msix_entries[i].vector);
+
+               for (i = 0; i < assigned_dev->entries_nr; i++)
+                       free_irq(assigned_dev->host_msix_entries[i].vector,
+                                assigned_dev);
+
+               assigned_dev->entries_nr = 0;
+               kfree(assigned_dev->host_msix_entries);
+               kfree(assigned_dev->guest_msix_entries);
+               pci_disable_msix(assigned_dev->dev);
+       } else {
+               /* Deal with MSI and INTx */
+               if ((assigned_dev->irq_requested_type &
+                    KVM_DEV_IRQ_HOST_INTX) &&
+                   (assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
+                       spin_lock_irq(&assigned_dev->intx_lock);
+                       pci_intx(assigned_dev->dev, false);
+                       spin_unlock_irq(&assigned_dev->intx_lock);
+                       synchronize_irq(assigned_dev->host_irq);
+               } else
+                       disable_irq(assigned_dev->host_irq);
+
+               free_irq(assigned_dev->host_irq, assigned_dev);
+
+               if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI)
+                       pci_disable_msi(assigned_dev->dev);
+       }
+
+       assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK);
+}
+
+static int kvm_deassign_irq(struct kvm *kvm,
+                           struct kvm_assigned_dev_kernel *assigned_dev,
+                           unsigned long irq_requested_type)
+{
+       unsigned long guest_irq_type, host_irq_type;
+
+       if (!irqchip_in_kernel(kvm))
+               return -EINVAL;
+       /* no irq assignment to deassign */
+       if (!assigned_dev->irq_requested_type)
+               return -ENXIO;
+
+       host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK;
+       guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK;
+
+       if (host_irq_type)
+               deassign_host_irq(kvm, assigned_dev);
+       if (guest_irq_type)
+               deassign_guest_irq(kvm, assigned_dev);
+
+       return 0;
+}
+
+static void kvm_free_assigned_irq(struct kvm *kvm,
+                                 struct kvm_assigned_dev_kernel *assigned_dev)
+{
+       kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type);
+}
+
+static void kvm_free_assigned_device(struct kvm *kvm,
+                                    struct kvm_assigned_dev_kernel
+                                    *assigned_dev)
+{
+       kvm_free_assigned_irq(kvm, assigned_dev);
+
+       pci_reset_function(assigned_dev->dev);
+       if (pci_load_and_free_saved_state(assigned_dev->dev,
+                                         &assigned_dev->pci_saved_state))
+               printk(KERN_INFO "%s: Couldn't reload %s saved state\n",
+                      __func__, dev_name(&assigned_dev->dev->dev));
+       else
+               pci_restore_state(assigned_dev->dev);
+
+       pci_clear_dev_assigned(assigned_dev->dev);
+
+       pci_release_regions(assigned_dev->dev);
+       pci_disable_device(assigned_dev->dev);
+       pci_dev_put(assigned_dev->dev);
+
+       list_del(&assigned_dev->list);
+       kfree(assigned_dev);
+}
+
+void kvm_free_all_assigned_devices(struct kvm *kvm)
+{
+       struct list_head *ptr, *ptr2;
+       struct kvm_assigned_dev_kernel *assigned_dev;
+
+       list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) {
+               assigned_dev = list_entry(ptr,
+                                         struct kvm_assigned_dev_kernel,
+                                         list);
+
+               kvm_free_assigned_device(kvm, assigned_dev);
+       }
+}
+
+static int assigned_device_enable_host_intx(struct kvm *kvm,
+                                           struct kvm_assigned_dev_kernel *dev)
+{
+       irq_handler_t irq_handler;
+       unsigned long flags;
+
+       dev->host_irq = dev->dev->irq;
+
+       /*
+        * We can only share the IRQ line with other host devices if we are
+        * able to disable the IRQ source at device-level - independently of
+        * the guest driver. Otherwise host devices may suffer from unbounded
+        * IRQ latencies when the guest keeps the line asserted.
+        */
+       if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) {
+               irq_handler = kvm_assigned_dev_intx;
+               flags = IRQF_SHARED;
+       } else {
+               irq_handler = NULL;
+               flags = IRQF_ONESHOT;
+       }
+       if (request_threaded_irq(dev->host_irq, irq_handler,
+                                kvm_assigned_dev_thread_intx, flags,
+                                dev->irq_name, dev))
+               return -EIO;
+
+       if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) {
+               spin_lock_irq(&dev->intx_lock);
+               pci_intx(dev->dev, true);
+               spin_unlock_irq(&dev->intx_lock);
+       }
+       return 0;
+}
+
+#ifdef __KVM_HAVE_MSI
+static int assigned_device_enable_host_msi(struct kvm *kvm,
+                                          struct kvm_assigned_dev_kernel *dev)
+{
+       int r;
+
+       if (!dev->dev->msi_enabled) {
+               r = pci_enable_msi(dev->dev);
+               if (r)
+                       return r;
+       }
+
+       dev->host_irq = dev->dev->irq;
+       if (request_threaded_irq(dev->host_irq, kvm_assigned_dev_msi,
+                                kvm_assigned_dev_thread_msi, 0,
+                                dev->irq_name, dev)) {
+               pci_disable_msi(dev->dev);
+               return -EIO;
+       }
+
+       return 0;
+}
+#endif
+
+#ifdef __KVM_HAVE_MSIX
+static int assigned_device_enable_host_msix(struct kvm *kvm,
+                                           struct kvm_assigned_dev_kernel *dev)
+{
+       int i, r = -EINVAL;
+
+       /* host_msix_entries and guest_msix_entries should have been
+        * initialized */
+       if (dev->entries_nr == 0)
+               return r;
+
+       r = pci_enable_msix_exact(dev->dev,
+                                 dev->host_msix_entries, dev->entries_nr);
+       if (r)
+               return r;
+
+       for (i = 0; i < dev->entries_nr; i++) {
+               r = request_threaded_irq(dev->host_msix_entries[i].vector,
+                                        kvm_assigned_dev_msix,
+                                        kvm_assigned_dev_thread_msix,
+                                        0, dev->irq_name, dev);
+               if (r)
+                       goto err;
+       }
+
+       return 0;
+err:
+       for (i -= 1; i >= 0; i--)
+               free_irq(dev->host_msix_entries[i].vector, dev);
+       pci_disable_msix(dev->dev);
+       return r;
+}
+
+#endif
+
+static int assigned_device_enable_guest_intx(struct kvm *kvm,
+                               struct kvm_assigned_dev_kernel *dev,
+                               struct kvm_assigned_irq *irq)
+{
+       dev->guest_irq = irq->guest_irq;
+       dev->ack_notifier.gsi = irq->guest_irq;
+       return 0;
+}
+
+#ifdef __KVM_HAVE_MSI
+static int assigned_device_enable_guest_msi(struct kvm *kvm,
+                       struct kvm_assigned_dev_kernel *dev,
+                       struct kvm_assigned_irq *irq)
+{
+       dev->guest_irq = irq->guest_irq;
+       dev->ack_notifier.gsi = -1;
+       return 0;
+}
+#endif
+
+#ifdef __KVM_HAVE_MSIX
+static int assigned_device_enable_guest_msix(struct kvm *kvm,
+                       struct kvm_assigned_dev_kernel *dev,
+                       struct kvm_assigned_irq *irq)
+{
+       dev->guest_irq = irq->guest_irq;
+       dev->ack_notifier.gsi = -1;
+       return 0;
+}
+#endif
+
+static int assign_host_irq(struct kvm *kvm,
+                          struct kvm_assigned_dev_kernel *dev,
+                          __u32 host_irq_type)
+{
+       int r = -EEXIST;
+
+       if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK)
+               return r;
+
+       snprintf(dev->irq_name, sizeof(dev->irq_name), "kvm:%s",
+                pci_name(dev->dev));
+
+       switch (host_irq_type) {
+       case KVM_DEV_IRQ_HOST_INTX:
+               r = assigned_device_enable_host_intx(kvm, dev);
+               break;
+#ifdef __KVM_HAVE_MSI
+       case KVM_DEV_IRQ_HOST_MSI:
+               r = assigned_device_enable_host_msi(kvm, dev);
+               break;
+#endif
+#ifdef __KVM_HAVE_MSIX
+       case KVM_DEV_IRQ_HOST_MSIX:
+               r = assigned_device_enable_host_msix(kvm, dev);
+               break;
+#endif
+       default:
+               r = -EINVAL;
+       }
+       dev->host_irq_disabled = false;
+
+       if (!r)
+               dev->irq_requested_type |= host_irq_type;
+
+       return r;
+}
+
+static int assign_guest_irq(struct kvm *kvm,
+                           struct kvm_assigned_dev_kernel *dev,
+                           struct kvm_assigned_irq *irq,
+                           unsigned long guest_irq_type)
+{
+       int id;
+       int r = -EEXIST;
+
+       if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK)
+               return r;
+
+       id = kvm_request_irq_source_id(kvm);
+       if (id < 0)
+               return id;
+
+       dev->irq_source_id = id;
+
+       switch (guest_irq_type) {
+       case KVM_DEV_IRQ_GUEST_INTX:
+               r = assigned_device_enable_guest_intx(kvm, dev, irq);
+               break;
+#ifdef __KVM_HAVE_MSI
+       case KVM_DEV_IRQ_GUEST_MSI:
+               r = assigned_device_enable_guest_msi(kvm, dev, irq);
+               break;
+#endif
+#ifdef __KVM_HAVE_MSIX
+       case KVM_DEV_IRQ_GUEST_MSIX:
+               r = assigned_device_enable_guest_msix(kvm, dev, irq);
+               break;
+#endif
+       default:
+               r = -EINVAL;
+       }
+
+       if (!r) {
+               dev->irq_requested_type |= guest_irq_type;
+               if (dev->ack_notifier.gsi != -1)
+                       kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier);
+       } else {
+               kvm_free_irq_source_id(kvm, dev->irq_source_id);
+               dev->irq_source_id = -1;
+       }
+
+       return r;
+}
+
+/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */
+static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
+                                  struct kvm_assigned_irq *assigned_irq)
+{
+       int r = -EINVAL;
+       struct kvm_assigned_dev_kernel *match;
+       unsigned long host_irq_type, guest_irq_type;
+
+       if (!irqchip_in_kernel(kvm))
+               return r;
+
+       mutex_lock(&kvm->lock);
+       r = -ENODEV;
+       match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+                                     assigned_irq->assigned_dev_id);
+       if (!match)
+               goto out;
+
+       host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK);
+       guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK);
+
+       r = -EINVAL;
+       /* can only assign one type at a time */
+       if (hweight_long(host_irq_type) > 1)
+               goto out;
+       if (hweight_long(guest_irq_type) > 1)
+               goto out;
+       if (host_irq_type == 0 && guest_irq_type == 0)
+               goto out;
+
+       r = 0;
+       if (host_irq_type)
+               r = assign_host_irq(kvm, match, host_irq_type);
+       if (r)
+               goto out;
+
+       if (guest_irq_type)
+               r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type);
+out:
+       mutex_unlock(&kvm->lock);
+       return r;
+}
+
+static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm,
+                                        struct kvm_assigned_irq
+                                        *assigned_irq)
+{
+       int r = -ENODEV;
+       struct kvm_assigned_dev_kernel *match;
+       unsigned long irq_type;
+
+       mutex_lock(&kvm->lock);
+
+       match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+                                     assigned_irq->assigned_dev_id);
+       if (!match)
+               goto out;
+
+       irq_type = assigned_irq->flags & (KVM_DEV_IRQ_HOST_MASK |
+                                         KVM_DEV_IRQ_GUEST_MASK);
+       r = kvm_deassign_irq(kvm, match, irq_type);
+out:
+       mutex_unlock(&kvm->lock);
+       return r;
+}
+
+/*
+ * We want to test whether the caller has been granted permissions to
+ * use this device.  To be able to configure and control the device,
+ * the user needs access to PCI configuration space and BAR resources.
+ * These are accessed through PCI sysfs.  PCI config space is often
+ * passed to the process calling this ioctl via file descriptor, so we
+ * can't rely on access to that file.  We can check for permissions
+ * on each of the BAR resource files, which is a pretty clear
+ * indicator that the user has been granted access to the device.
+ */
+static int probe_sysfs_permissions(struct pci_dev *dev)
+{
+#ifdef CONFIG_SYSFS
+       int i;
+       bool bar_found = false;
+
+       for (i = PCI_STD_RESOURCES; i <= PCI_STD_RESOURCE_END; i++) {
+               char *kpath, *syspath;
+               struct path path;
+               struct inode *inode;
+               int r;
+
+               if (!pci_resource_len(dev, i))
+                       continue;
+
+               kpath = kobject_get_path(&dev->dev.kobj, GFP_KERNEL);
+               if (!kpath)
+                       return -ENOMEM;
+
+               /* Per sysfs-rules, sysfs is always at /sys */
+               syspath = kasprintf(GFP_KERNEL, "/sys%s/resource%d", kpath, i);
+               kfree(kpath);
+               if (!syspath)
+                       return -ENOMEM;
+
+               r = kern_path(syspath, LOOKUP_FOLLOW, &path);
+               kfree(syspath);
+               if (r)
+                       return r;
+
+               inode = path.dentry->d_inode;
+
+               r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS);
+               path_put(&path);
+               if (r)
+                       return r;
+
+               bar_found = true;
+       }
+
+       /* If no resources, probably something special */
+       if (!bar_found)
+               return -EPERM;
+
+       return 0;
+#else
+       return -EINVAL; /* No way to control the device without sysfs */
+#endif
+}
+
+static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
+                                     struct kvm_assigned_pci_dev *assigned_dev)
+{
+       int r = 0, idx;
+       struct kvm_assigned_dev_kernel *match;
+       struct pci_dev *dev;
+
+       if (!(assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU))
+               return -EINVAL;
+
+       mutex_lock(&kvm->lock);
+       idx = srcu_read_lock(&kvm->srcu);
+
+       match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+                                     assigned_dev->assigned_dev_id);
+       if (match) {
+               /* device already assigned */
+               r = -EEXIST;
+               goto out;
+       }
+
+       match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL);
+       if (match == NULL) {
+               printk(KERN_INFO "%s: Couldn't allocate memory\n",
+                      __func__);
+               r = -ENOMEM;
+               goto out;
+       }
+       dev = pci_get_domain_bus_and_slot(assigned_dev->segnr,
+                                  assigned_dev->busnr,
+                                  assigned_dev->devfn);
+       if (!dev) {
+               printk(KERN_INFO "%s: host device not found\n", __func__);
+               r = -EINVAL;
+               goto out_free;
+       }
+
+       /* Don't allow bridges to be assigned */
+       if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) {
+               r = -EPERM;
+               goto out_put;
+       }
+
+       r = probe_sysfs_permissions(dev);
+       if (r)
+               goto out_put;
+
+       if (pci_enable_device(dev)) {
+               printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
+               r = -EBUSY;
+               goto out_put;
+       }
+       r = pci_request_regions(dev, "kvm_assigned_device");
+       if (r) {
+               printk(KERN_INFO "%s: Could not get access to device regions\n",
+                      __func__);
+               goto out_disable;
+       }
+
+       pci_reset_function(dev);
+       pci_save_state(dev);
+       match->pci_saved_state = pci_store_saved_state(dev);
+       if (!match->pci_saved_state)
+               printk(KERN_DEBUG "%s: Couldn't store %s saved state\n",
+                      __func__, dev_name(&dev->dev));
+
+       if (!pci_intx_mask_supported(dev))
+               assigned_dev->flags &= ~KVM_DEV_ASSIGN_PCI_2_3;
+
+       match->assigned_dev_id = assigned_dev->assigned_dev_id;
+       match->host_segnr = assigned_dev->segnr;
+       match->host_busnr = assigned_dev->busnr;
+       match->host_devfn = assigned_dev->devfn;
+       match->flags = assigned_dev->flags;
+       match->dev = dev;
+       spin_lock_init(&match->intx_lock);
+       spin_lock_init(&match->intx_mask_lock);
+       match->irq_source_id = -1;
+       match->kvm = kvm;
+       match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
+
+       list_add(&match->list, &kvm->arch.assigned_dev_head);
+
+       if (!kvm->arch.iommu_domain) {
+               r = kvm_iommu_map_guest(kvm);
+               if (r)
+                       goto out_list_del;
+       }
+       r = kvm_assign_device(kvm, match->dev);
+       if (r)
+               goto out_list_del;
+
+out:
+       srcu_read_unlock(&kvm->srcu, idx);
+       mutex_unlock(&kvm->lock);
+       return r;
+out_list_del:
+       if (pci_load_and_free_saved_state(dev, &match->pci_saved_state))
+               printk(KERN_INFO "%s: Couldn't reload %s saved state\n",
+                      __func__, dev_name(&dev->dev));
+       list_del(&match->list);
+       pci_release_regions(dev);
+out_disable:
+       pci_disable_device(dev);
+out_put:
+       pci_dev_put(dev);
+out_free:
+       kfree(match);
+       srcu_read_unlock(&kvm->srcu, idx);
+       mutex_unlock(&kvm->lock);
+       return r;
+}
+
+static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
+               struct kvm_assigned_pci_dev *assigned_dev)
+{
+       int r = 0;
+       struct kvm_assigned_dev_kernel *match;
+
+       mutex_lock(&kvm->lock);
+
+       match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+                                     assigned_dev->assigned_dev_id);
+       if (!match) {
+               printk(KERN_INFO "%s: device hasn't been assigned before, "
+                 "so cannot be deassigned\n", __func__);
+               r = -EINVAL;
+               goto out;
+       }
+
+       kvm_deassign_device(kvm, match->dev);
+
+       kvm_free_assigned_device(kvm, match);
+
+out:
+       mutex_unlock(&kvm->lock);
+       return r;
+}
+
+
+#ifdef __KVM_HAVE_MSIX
+static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm,
+                                   struct kvm_assigned_msix_nr *entry_nr)
+{
+       int r = 0;
+       struct kvm_assigned_dev_kernel *adev;
+
+       mutex_lock(&kvm->lock);
+
+       adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+                                     entry_nr->assigned_dev_id);
+       if (!adev) {
+               r = -EINVAL;
+               goto msix_nr_out;
+       }
+
+       if (adev->entries_nr == 0) {
+               adev->entries_nr = entry_nr->entry_nr;
+               if (adev->entries_nr == 0 ||
+                   adev->entries_nr > KVM_MAX_MSIX_PER_DEV) {
+                       r = -EINVAL;
+                       goto msix_nr_out;
+               }
+
+               adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) *
+                                               entry_nr->entry_nr,
+                                               GFP_KERNEL);
+               if (!adev->host_msix_entries) {
+                       r = -ENOMEM;
+                       goto msix_nr_out;
+               }
+               adev->guest_msix_entries =
+                       kzalloc(sizeof(struct msix_entry) * entry_nr->entry_nr,
+                               GFP_KERNEL);
+               if (!adev->guest_msix_entries) {
+                       kfree(adev->host_msix_entries);
+                       r = -ENOMEM;
+                       goto msix_nr_out;
+               }
+       } else /* Not allowed set MSI-X number twice */
+               r = -EINVAL;
+msix_nr_out:
+       mutex_unlock(&kvm->lock);
+       return r;
+}
+
+static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm,
+                                      struct kvm_assigned_msix_entry *entry)
+{
+       int r = 0, i;
+       struct kvm_assigned_dev_kernel *adev;
+
+       mutex_lock(&kvm->lock);
+
+       adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+                                     entry->assigned_dev_id);
+
+       if (!adev) {
+               r = -EINVAL;
+               goto msix_entry_out;
+       }
+
+       for (i = 0; i < adev->entries_nr; i++)
+               if (adev->guest_msix_entries[i].vector == 0 ||
+                   adev->guest_msix_entries[i].entry == entry->entry) {
+                       adev->guest_msix_entries[i].entry = entry->entry;
+                       adev->guest_msix_entries[i].vector = entry->gsi;
+                       adev->host_msix_entries[i].entry = entry->entry;
+                       break;
+               }
+       if (i == adev->entries_nr) {
+               r = -ENOSPC;
+               goto msix_entry_out;
+       }
+
+msix_entry_out:
+       mutex_unlock(&kvm->lock);
+
+       return r;
+}
+#endif
+
+static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm,
+               struct kvm_assigned_pci_dev *assigned_dev)
+{
+       int r = 0;
+       struct kvm_assigned_dev_kernel *match;
+
+       mutex_lock(&kvm->lock);
+
+       match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+                                     assigned_dev->assigned_dev_id);
+       if (!match) {
+               r = -ENODEV;
+               goto out;
+       }
+
+       spin_lock(&match->intx_mask_lock);
+
+       match->flags &= ~KVM_DEV_ASSIGN_MASK_INTX;
+       match->flags |= assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX;
+
+       if (match->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) {
+               if (assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX) {
+                       kvm_set_irq(match->kvm, match->irq_source_id,
+                                   match->guest_irq, 0, false);
+                       /*
+                        * Masking at hardware-level is performed on demand,
+                        * i.e. when an IRQ actually arrives at the host.
+                        */
+               } else if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
+                       /*
+                        * Unmask the IRQ line if required. Unmasking at
+                        * device level will be performed by user space.
+                        */
+                       spin_lock_irq(&match->intx_lock);
+                       if (match->host_irq_disabled) {
+                               enable_irq(match->host_irq);
+                               match->host_irq_disabled = false;
+                       }
+                       spin_unlock_irq(&match->intx_lock);
+               }
+       }
+
+       spin_unlock(&match->intx_mask_lock);
+
+out:
+       mutex_unlock(&kvm->lock);
+       return r;
+}
+
+long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
+                                 unsigned long arg)
+{
+       void __user *argp = (void __user *)arg;
+       int r;
+
+       switch (ioctl) {
+       case KVM_ASSIGN_PCI_DEVICE: {
+               struct kvm_assigned_pci_dev assigned_dev;
+
+               r = -EFAULT;
+               if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
+                       goto out;
+               r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev);
+               if (r)
+                       goto out;
+               break;
+       }
+       case KVM_ASSIGN_IRQ: {
+               r = -EOPNOTSUPP;
+               break;
+       }
+       case KVM_ASSIGN_DEV_IRQ: {
+               struct kvm_assigned_irq assigned_irq;
+
+               r = -EFAULT;
+               if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
+                       goto out;
+               r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq);
+               if (r)
+                       goto out;
+               break;
+       }
+       case KVM_DEASSIGN_DEV_IRQ: {
+               struct kvm_assigned_irq assigned_irq;
+
+               r = -EFAULT;
+               if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
+                       goto out;
+               r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq);
+               if (r)
+                       goto out;
+               break;
+       }
+       case KVM_DEASSIGN_PCI_DEVICE: {
+               struct kvm_assigned_pci_dev assigned_dev;
+
+               r = -EFAULT;
+               if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
+                       goto out;
+               r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev);
+               if (r)
+                       goto out;
+               break;
+       }
+#ifdef __KVM_HAVE_MSIX
+       case KVM_ASSIGN_SET_MSIX_NR: {
+               struct kvm_assigned_msix_nr entry_nr;
+               r = -EFAULT;
+               if (copy_from_user(&entry_nr, argp, sizeof entry_nr))
+                       goto out;
+               r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr);
+               if (r)
+                       goto out;
+               break;
+       }
+       case KVM_ASSIGN_SET_MSIX_ENTRY: {
+               struct kvm_assigned_msix_entry entry;
+               r = -EFAULT;
+               if (copy_from_user(&entry, argp, sizeof entry))
+                       goto out;
+               r = kvm_vm_ioctl_set_msix_entry(kvm, &entry);
+               if (r)
+                       goto out;
+               break;
+       }
+#endif
+       case KVM_ASSIGN_SET_INTX_MASK: {
+               struct kvm_assigned_pci_dev assigned_dev;
+
+               r = -EFAULT;
+               if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
+                       goto out;
+               r = kvm_vm_ioctl_set_pci_irq_mask(kvm, &assigned_dev);
+               break;
+       }
+       default:
+               r = -ENOTTY;
+               break;
+       }
+out:
+       return r;
+}
diff --git a/arch/x86/kvm/assigned-dev.h b/arch/x86/kvm/assigned-dev.h
new file mode 100644 (file)
index 0000000..a428c1a
--- /dev/null
@@ -0,0 +1,32 @@
+#ifndef ARCH_X86_KVM_ASSIGNED_DEV_H
+#define ARCH_X86_KVM_ASSIGNED_DEV_H
+
+#include <linux/kvm_host.h>
+
+#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
+int kvm_assign_device(struct kvm *kvm, struct pci_dev *pdev);
+int kvm_deassign_device(struct kvm *kvm, struct pci_dev *pdev);
+
+int kvm_iommu_map_guest(struct kvm *kvm);
+int kvm_iommu_unmap_guest(struct kvm *kvm);
+
+long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
+                                 unsigned long arg);
+
+void kvm_free_all_assigned_devices(struct kvm *kvm);
+#else
+static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
+{
+       return 0;
+}
+
+static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
+                                               unsigned long arg)
+{
+       return -ENOTTY;
+}
+
+static inline void kvm_free_all_assigned_devices(struct kvm *kvm) {}
+#endif /* CONFIG_KVM_DEVICE_ASSIGNMENT */
+
+#endif /* ARCH_X86_KVM_ASSIGNED_DEV_H */
index 976e3a5..8a80737 100644 (file)
@@ -23,7 +23,7 @@
 #include "mmu.h"
 #include "trace.h"
 
-static u32 xstate_required_size(u64 xstate_bv)
+static u32 xstate_required_size(u64 xstate_bv, bool compacted)
 {
        int feature_bit = 0;
        u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
@@ -31,9 +31,10 @@ static u32 xstate_required_size(u64 xstate_bv)
        xstate_bv &= XSTATE_EXTEND_MASK;
        while (xstate_bv) {
                if (xstate_bv & 0x1) {
-                       u32 eax, ebx, ecx, edx;
+                       u32 eax, ebx, ecx, edx, offset;
                        cpuid_count(0xD, feature_bit, &eax, &ebx, &ecx, &edx);
-                       ret = max(ret, eax + ebx);
+                       offset = compacted ? ret : ebx;
+                       ret = max(ret, offset + eax);
                }
 
                xstate_bv >>= 1;
@@ -53,6 +54,8 @@ u64 kvm_supported_xcr0(void)
        return xcr0;
 }
 
+#define F(x) bit(X86_FEATURE_##x)
+
 int kvm_update_cpuid(struct kvm_vcpu *vcpu)
 {
        struct kvm_cpuid_entry2 *best;
@@ -64,13 +67,13 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
 
        /* Update OSXSAVE bit */
        if (cpu_has_xsave && best->function == 0x1) {
-               best->ecx &= ~(bit(X86_FEATURE_OSXSAVE));
+               best->ecx &= ~F(OSXSAVE);
                if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE))
-                       best->ecx |= bit(X86_FEATURE_OSXSAVE);
+                       best->ecx |= F(OSXSAVE);
        }
 
        if (apic) {
-               if (best->ecx & bit(X86_FEATURE_TSC_DEADLINE_TIMER))
+               if (best->ecx & F(TSC_DEADLINE_TIMER))
                        apic->lapic_timer.timer_mode_mask = 3 << 17;
                else
                        apic->lapic_timer.timer_mode_mask = 1 << 17;
@@ -85,9 +88,13 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
                        (best->eax | ((u64)best->edx << 32)) &
                        kvm_supported_xcr0();
                vcpu->arch.guest_xstate_size = best->ebx =
-                       xstate_required_size(vcpu->arch.xcr0);
+                       xstate_required_size(vcpu->arch.xcr0, false);
        }
 
+       best = kvm_find_cpuid_entry(vcpu, 0xD, 1);
+       if (best && (best->eax & (F(XSAVES) | F(XSAVEC))))
+               best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
+
        /*
         * The existing code assumes virtual address is 48-bit in the canonical
         * address checks; exit if it is ever changed.
@@ -122,8 +129,8 @@ static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
                        break;
                }
        }
-       if (entry && (entry->edx & bit(X86_FEATURE_NX)) && !is_efer_nx()) {
-               entry->edx &= ~bit(X86_FEATURE_NX);
+       if (entry && (entry->edx & F(NX)) && !is_efer_nx()) {
+               entry->edx &= ~F(NX);
                printk(KERN_INFO "kvm: guest NX capability removed\n");
        }
 }
@@ -227,8 +234,6 @@ static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
        entry->flags = 0;
 }
 
-#define F(x) bit(X86_FEATURE_##x)
-
 static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry,
                                   u32 func, u32 index, int *nent, int maxnent)
 {
@@ -267,6 +272,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
        unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
        unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0;
        unsigned f_mpx = kvm_x86_ops->mpx_supported() ? F(MPX) : 0;
+       unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0;
 
        /* cpuid 1.edx */
        const u32 kvm_supported_word0_x86_features =
@@ -317,7 +323,12 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
        const u32 kvm_supported_word9_x86_features =
                F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
                F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
-               F(ADX) | F(SMAP);
+               F(ADX) | F(SMAP) | F(AVX512F) | F(AVX512PF) | F(AVX512ER) |
+               F(AVX512CD);
+
+       /* cpuid 0xD.1.eax */
+       const u32 kvm_supported_word10_x86_features =
+               F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves;
 
        /* all calls to cpuid_count() should be made on the same cpu */
        get_cpu();
@@ -453,16 +464,34 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
                u64 supported = kvm_supported_xcr0();
 
                entry->eax &= supported;
+               entry->ebx = xstate_required_size(supported, false);
+               entry->ecx = entry->ebx;
                entry->edx &= supported >> 32;
                entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+               if (!supported)
+                       break;
+
                for (idx = 1, i = 1; idx < 64; ++idx) {
                        u64 mask = ((u64)1 << idx);
                        if (*nent >= maxnent)
                                goto out;
 
                        do_cpuid_1_ent(&entry[i], function, idx);
-                       if (entry[i].eax == 0 || !(supported & mask))
-                               continue;
+                       if (idx == 1) {
+                               entry[i].eax &= kvm_supported_word10_x86_features;
+                               entry[i].ebx = 0;
+                               if (entry[i].eax & (F(XSAVES)|F(XSAVEC)))
+                                       entry[i].ebx =
+                                               xstate_required_size(supported,
+                                                                    true);
+                       } else {
+                               if (entry[i].eax == 0 || !(supported & mask))
+                                       continue;
+                               if (WARN_ON_ONCE(entry[i].ecx & 1))
+                                       continue;
+                       }
+                       entry[i].ecx = 0;
+                       entry[i].edx = 0;
                        entry[i].flags |=
                               KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
                        ++*nent;
index 9f8a2fa..169b09d 100644 (file)
 #define Prefix      (3<<15)     /* Instruction varies with 66/f2/f3 prefix */
 #define RMExt       (4<<15)     /* Opcode extension in ModRM r/m if mod == 3 */
 #define Escape      (5<<15)     /* Escape to coprocessor instruction */
+#define InstrDual   (6<<15)     /* Alternate instruction decoding of mod == 3 */
 #define Sse         (1<<18)     /* SSE Vector instruction */
 /* Generic ModRM decode. */
 #define ModRM       (1<<19)
 #define CheckPerm   ((u64)1 << 49)  /* Has valid check_perm field */
 #define NoBigReal   ((u64)1 << 50)  /* No big real mode */
 #define PrivUD      ((u64)1 << 51)  /* #UD instead of #GP on CPL > 0 */
+#define NearBranch  ((u64)1 << 52)  /* Near branches */
+#define No16       ((u64)1 << 53)  /* No 16 bit operand */
 
 #define DstXacc     (DstAccLo | SrcAccHi | SrcWrite)
 
@@ -209,6 +212,7 @@ struct opcode {
                const struct group_dual *gdual;
                const struct gprefix *gprefix;
                const struct escape *esc;
+               const struct instr_dual *idual;
                void (*fastop)(struct fastop *fake);
        } u;
        int (*check_perm)(struct x86_emulate_ctxt *ctxt);
@@ -231,6 +235,11 @@ struct escape {
        struct opcode high[64];
 };
 
+struct instr_dual {
+       struct opcode mod012;
+       struct opcode mod3;
+};
+
 /* EFLAGS bit definitions. */
 #define EFLG_ID (1<<21)
 #define EFLG_VIP (1<<20)
@@ -379,6 +388,15 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
        ON64(FOP2E(op##q, rax, cl)) \
        FOP_END
 
+/* 2 operand, src and dest are reversed */
+#define FASTOP2R(op, name) \
+       FOP_START(name) \
+       FOP2E(op##b, dl, al) \
+       FOP2E(op##w, dx, ax) \
+       FOP2E(op##l, edx, eax) \
+       ON64(FOP2E(op##q, rdx, rax)) \
+       FOP_END
+
 #define FOP3E(op,  dst, src, src2) \
        FOP_ALIGN #op " %" #src2 ", %" #src ", %" #dst " \n\t" FOP_RET
 
@@ -477,9 +495,9 @@ address_mask(struct x86_emulate_ctxt *ctxt, unsigned long reg)
 }
 
 static inline unsigned long
-register_address(struct x86_emulate_ctxt *ctxt, unsigned long reg)
+register_address(struct x86_emulate_ctxt *ctxt, int reg)
 {
-       return address_mask(ctxt, reg);
+       return address_mask(ctxt, reg_read(ctxt, reg));
 }
 
 static void masked_increment(ulong *reg, ulong mask, int inc)
@@ -488,7 +506,7 @@ static void masked_increment(ulong *reg, ulong mask, int inc)
 }
 
 static inline void
-register_address_increment(struct x86_emulate_ctxt *ctxt, unsigned long *reg, int inc)
+register_address_increment(struct x86_emulate_ctxt *ctxt, int reg, int inc)
 {
        ulong mask;
 
@@ -496,7 +514,7 @@ register_address_increment(struct x86_emulate_ctxt *ctxt, unsigned long *reg, in
                mask = ~0UL;
        else
                mask = ad_mask(ctxt);
-       masked_increment(reg, mask, inc);
+       masked_increment(reg_rmw(ctxt, reg), mask, inc);
 }
 
 static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc)
@@ -564,40 +582,6 @@ static int emulate_nm(struct x86_emulate_ctxt *ctxt)
        return emulate_exception(ctxt, NM_VECTOR, 0, false);
 }
 
-static inline int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
-                              int cs_l)
-{
-       switch (ctxt->op_bytes) {
-       case 2:
-               ctxt->_eip = (u16)dst;
-               break;
-       case 4:
-               ctxt->_eip = (u32)dst;
-               break;
-#ifdef CONFIG_X86_64
-       case 8:
-               if ((cs_l && is_noncanonical_address(dst)) ||
-                   (!cs_l && (dst >> 32) != 0))
-                       return emulate_gp(ctxt, 0);
-               ctxt->_eip = dst;
-               break;
-#endif
-       default:
-               WARN(1, "unsupported eip assignment size\n");
-       }
-       return X86EMUL_CONTINUE;
-}
-
-static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
-{
-       return assign_eip_far(ctxt, dst, ctxt->mode == X86EMUL_MODE_PROT64);
-}
-
-static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
-{
-       return assign_eip_near(ctxt, ctxt->_eip + rel);
-}
-
 static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
 {
        u16 selector;
@@ -641,25 +625,24 @@ static bool insn_aligned(struct x86_emulate_ctxt *ctxt, unsigned size)
                return true;
 }
 
-static int __linearize(struct x86_emulate_ctxt *ctxt,
-                    struct segmented_address addr,
-                    unsigned *max_size, unsigned size,
-                    bool write, bool fetch,
-                    ulong *linear)
+static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
+                                      struct segmented_address addr,
+                                      unsigned *max_size, unsigned size,
+                                      bool write, bool fetch,
+                                      enum x86emul_mode mode, ulong *linear)
 {
        struct desc_struct desc;
        bool usable;
        ulong la;
        u32 lim;
        u16 sel;
-       unsigned cpl;
 
        la = seg_base(ctxt, addr.seg) + addr.ea;
        *max_size = 0;
-       switch (ctxt->mode) {
+       switch (mode) {
        case X86EMUL_MODE_PROT64:
-               if (((signed long)la << 16) >> 16 != la)
-                       return emulate_gp(ctxt, 0);
+               if (is_noncanonical_address(la))
+                       goto bad;
 
                *max_size = min_t(u64, ~0u, (1ull << 48) - la);
                if (size > *max_size)
@@ -678,46 +661,20 @@ static int __linearize(struct x86_emulate_ctxt *ctxt,
                if (!fetch && (desc.type & 8) && !(desc.type & 2))
                        goto bad;
                lim = desc_limit_scaled(&desc);
-               if ((ctxt->mode == X86EMUL_MODE_REAL) && !fetch &&
-                   (ctxt->d & NoBigReal)) {
-                       /* la is between zero and 0xffff */
-                       if (la > 0xffff)
-                               goto bad;
-                       *max_size = 0x10000 - la;
-               } else if ((desc.type & 8) || !(desc.type & 4)) {
-                       /* expand-up segment */
-                       if (addr.ea > lim)
-                               goto bad;
-                       *max_size = min_t(u64, ~0u, (u64)lim + 1 - addr.ea);
-               } else {
+               if (!(desc.type & 8) && (desc.type & 4)) {
                        /* expand-down segment */
                        if (addr.ea <= lim)
                                goto bad;
                        lim = desc.d ? 0xffffffff : 0xffff;
-                       if (addr.ea > lim)
-                               goto bad;
-                       *max_size = min_t(u64, ~0u, (u64)lim + 1 - addr.ea);
                }
+               if (addr.ea > lim)
+                       goto bad;
+               *max_size = min_t(u64, ~0u, (u64)lim + 1 - addr.ea);
                if (size > *max_size)
                        goto bad;
-               cpl = ctxt->ops->cpl(ctxt);
-               if (!(desc.type & 8)) {
-                       /* data segment */
-                       if (cpl > desc.dpl)
-                               goto bad;
-               } else if ((desc.type & 8) && !(desc.type & 4)) {
-                       /* nonconforming code segment */
-                       if (cpl != desc.dpl)
-                               goto bad;
-               } else if ((desc.type & 8) && (desc.type & 4)) {
-                       /* conforming code segment */
-                       if (cpl < desc.dpl)
-                               goto bad;
-               }
+               la &= (u32)-1;
                break;
        }
-       if (fetch ? ctxt->mode != X86EMUL_MODE_PROT64 : ctxt->ad_bytes != 8)
-               la &= (u32)-1;
        if (insn_aligned(ctxt, size) && ((la & (size - 1)) != 0))
                return emulate_gp(ctxt, 0);
        *linear = la;
@@ -735,9 +692,55 @@ static int linearize(struct x86_emulate_ctxt *ctxt,
                     ulong *linear)
 {
        unsigned max_size;
-       return __linearize(ctxt, addr, &max_size, size, write, false, linear);
+       return __linearize(ctxt, addr, &max_size, size, write, false,
+                          ctxt->mode, linear);
+}
+
+static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst,
+                            enum x86emul_mode mode)
+{
+       ulong linear;
+       int rc;
+       unsigned max_size;
+       struct segmented_address addr = { .seg = VCPU_SREG_CS,
+                                          .ea = dst };
+
+       if (ctxt->op_bytes != sizeof(unsigned long))
+               addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1);
+       rc = __linearize(ctxt, addr, &max_size, 1, false, true, mode, &linear);
+       if (rc == X86EMUL_CONTINUE)
+               ctxt->_eip = addr.ea;
+       return rc;
+}
+
+static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
+{
+       return assign_eip(ctxt, dst, ctxt->mode);
 }
 
+static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
+                         const struct desc_struct *cs_desc)
+{
+       enum x86emul_mode mode = ctxt->mode;
+
+#ifdef CONFIG_X86_64
+       if (ctxt->mode >= X86EMUL_MODE_PROT32 && cs_desc->l) {
+               u64 efer = 0;
+
+               ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
+               if (efer & EFER_LMA)
+                       mode = X86EMUL_MODE_PROT64;
+       }
+#endif
+       if (mode == X86EMUL_MODE_PROT16 || mode == X86EMUL_MODE_PROT32)
+               mode = cs_desc->d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
+       return assign_eip(ctxt, dst, mode);
+}
+
+static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
+{
+       return assign_eip_near(ctxt, ctxt->_eip + rel);
+}
 
 static int segmented_read_std(struct x86_emulate_ctxt *ctxt,
                              struct segmented_address addr,
@@ -776,7 +779,8 @@ static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size)
         * boundary check itself.  Instead, we use max_size to check
         * against op_size.
         */
-       rc = __linearize(ctxt, addr, &max_size, 0, false, true, &linear);
+       rc = __linearize(ctxt, addr, &max_size, 0, false, true, ctxt->mode,
+                        &linear);
        if (unlikely(rc != X86EMUL_CONTINUE))
                return rc;
 
@@ -911,6 +915,8 @@ FASTOP2W(btc);
 
 FASTOP2(xadd);
 
+FASTOP2R(cmp, cmp_r);
+
 static u8 test_cc(unsigned int condition, unsigned long flags)
 {
        u8 rc;
@@ -1221,6 +1227,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
                        if (index_reg != 4)
                                modrm_ea += reg_read(ctxt, index_reg) << scale;
                } else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) {
+                       modrm_ea += insn_fetch(s32, ctxt);
                        if (ctxt->mode == X86EMUL_MODE_PROT64)
                                ctxt->rip_relative = 1;
                } else {
@@ -1229,10 +1236,6 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
                        adjust_modrm_seg(ctxt, base_reg);
                }
                switch (ctxt->modrm_mod) {
-               case 0:
-                       if (ctxt->modrm_rm == 5)
-                               modrm_ea += insn_fetch(s32, ctxt);
-                       break;
                case 1:
                        modrm_ea += insn_fetch(s8, ctxt);
                        break;
@@ -1284,7 +1287,8 @@ static void fetch_bit_operand(struct x86_emulate_ctxt *ctxt)
                else
                        sv = (s64)ctxt->src.val & (s64)mask;
 
-               ctxt->dst.addr.mem.ea += (sv >> 3);
+               ctxt->dst.addr.mem.ea = address_mask(ctxt,
+                                          ctxt->dst.addr.mem.ea + (sv >> 3));
        }
 
        /* only subword offset */
@@ -1610,6 +1614,9 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
                                sizeof(base3), &ctxt->exception);
                if (ret != X86EMUL_CONTINUE)
                        return ret;
+               if (is_noncanonical_address(get_desc_base(&seg_desc) |
+                                            ((u64)base3 << 32)))
+                       return emulate_gp(ctxt, 0);
        }
 load:
        ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
@@ -1807,6 +1814,10 @@ static int em_push_sreg(struct x86_emulate_ctxt *ctxt)
        int seg = ctxt->src2.val;
 
        ctxt->src.val = get_segment_selector(ctxt, seg);
+       if (ctxt->op_bytes == 4) {
+               rsp_increment(ctxt, -2);
+               ctxt->op_bytes = 2;
+       }
 
        return em_push(ctxt);
 }
@@ -1850,7 +1861,7 @@ static int em_pusha(struct x86_emulate_ctxt *ctxt)
 
 static int em_pushf(struct x86_emulate_ctxt *ctxt)
 {
-       ctxt->src.val =  (unsigned long)ctxt->eflags;
+       ctxt->src.val = (unsigned long)ctxt->eflags & ~EFLG_VM;
        return em_push(ctxt);
 }
 
@@ -2035,7 +2046,7 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
        if (rc != X86EMUL_CONTINUE)
                return rc;
 
-       rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l);
+       rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
        if (rc != X86EMUL_CONTINUE) {
                WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64);
                /* assigning eip failed; restore the old cs */
@@ -2045,31 +2056,22 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
        return rc;
 }
 
-static int em_grp45(struct x86_emulate_ctxt *ctxt)
+static int em_jmp_abs(struct x86_emulate_ctxt *ctxt)
 {
-       int rc = X86EMUL_CONTINUE;
+       return assign_eip_near(ctxt, ctxt->src.val);
+}
 
-       switch (ctxt->modrm_reg) {
-       case 2: /* call near abs */ {
-               long int old_eip;
-               old_eip = ctxt->_eip;
-               rc = assign_eip_near(ctxt, ctxt->src.val);
-               if (rc != X86EMUL_CONTINUE)
-                       break;
-               ctxt->src.val = old_eip;
-               rc = em_push(ctxt);
-               break;
-       }
-       case 4: /* jmp abs */
-               rc = assign_eip_near(ctxt, ctxt->src.val);
-               break;
-       case 5: /* jmp far */
-               rc = em_jmp_far(ctxt);
-               break;
-       case 6: /* push */
-               rc = em_push(ctxt);
-               break;
-       }
+static int em_call_near_abs(struct x86_emulate_ctxt *ctxt)
+{
+       int rc;
+       long int old_eip;
+
+       old_eip = ctxt->_eip;
+       rc = assign_eip_near(ctxt, ctxt->src.val);
+       if (rc != X86EMUL_CONTINUE)
+               return rc;
+       ctxt->src.val = old_eip;
+       rc = em_push(ctxt);
        return rc;
 }
 
@@ -2128,11 +2130,11 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt)
        /* Outer-privilege level return is not implemented */
        if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl)
                return X86EMUL_UNHANDLEABLE;
-       rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, 0, false,
+       rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, cpl, false,
                                       &new_desc);
        if (rc != X86EMUL_CONTINUE)
                return rc;
-       rc = assign_eip_far(ctxt, eip, new_desc.l);
+       rc = assign_eip_far(ctxt, eip, &new_desc);
        if (rc != X86EMUL_CONTINUE) {
                WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64);
                ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
@@ -2316,6 +2318,7 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt)
 
                ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data);
                ctxt->eflags &= ~msr_data;
+               ctxt->eflags |= EFLG_RESERVED_ONE_MASK;
 #endif
        } else {
                /* legacy mode */
@@ -2349,11 +2352,9 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt)
            && !vendor_intel(ctxt))
                return emulate_ud(ctxt);
 
-       /* XXX sysenter/sysexit have not been tested in 64bit mode.
-       * Therefore, we inject an #UD.
-       */
+       /* sysenter/sysexit have not been tested in 64bit mode. */
        if (ctxt->mode == X86EMUL_MODE_PROT64)
-               return emulate_ud(ctxt);
+               return X86EMUL_UNHANDLEABLE;
 
        setup_syscalls_segments(ctxt, &cs, &ss);
 
@@ -2425,6 +2426,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
                if ((msr_data & 0xfffc) == 0x0)
                        return emulate_gp(ctxt, 0);
                ss_sel = (u16)(msr_data + 24);
+               rcx = (u32)rcx;
+               rdx = (u32)rdx;
                break;
        case X86EMUL_MODE_PROT64:
                cs_sel = (u16)(msr_data + 32);
@@ -2599,7 +2602,6 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt,
        ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
                            &ctxt->exception);
        if (ret != X86EMUL_CONTINUE)
-               /* FIXME: need to provide precise fault address */
                return ret;
 
        save_state_to_tss16(ctxt, &tss_seg);
@@ -2607,13 +2609,11 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt,
        ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
                             &ctxt->exception);
        if (ret != X86EMUL_CONTINUE)
-               /* FIXME: need to provide precise fault address */
                return ret;
 
        ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg,
                            &ctxt->exception);
        if (ret != X86EMUL_CONTINUE)
-               /* FIXME: need to provide precise fault address */
                return ret;
 
        if (old_tss_sel != 0xffff) {
@@ -2624,7 +2624,6 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt,
                                     sizeof tss_seg.prev_task_link,
                                     &ctxt->exception);
                if (ret != X86EMUL_CONTINUE)
-                       /* FIXME: need to provide precise fault address */
                        return ret;
        }
 
@@ -2813,7 +2812,8 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
         *
         * 1. jmp/call/int to task gate: Check against DPL of the task gate
         * 2. Exception/IRQ/iret: No check is performed
-        * 3. jmp/call to TSS: Check against DPL of the TSS
+        * 3. jmp/call to TSS/task-gate: No check is performed since the
+        *    hardware checks it before exiting.
         */
        if (reason == TASK_SWITCH_GATE) {
                if (idt_index != -1) {
@@ -2830,13 +2830,8 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
                        if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl)
                                return emulate_gp(ctxt, (idt_index << 3) | 0x2);
                }
-       } else if (reason != TASK_SWITCH_IRET) {
-               int dpl = next_tss_desc.dpl;
-               if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl)
-                       return emulate_gp(ctxt, tss_selector);
        }
 
-
        desc_limit = desc_limit_scaled(&next_tss_desc);
        if (!next_tss_desc.p ||
            ((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
@@ -2913,8 +2908,8 @@ static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
 {
        int df = (ctxt->eflags & EFLG_DF) ? -op->count : op->count;
 
-       register_address_increment(ctxt, reg_rmw(ctxt, reg), df * op->bytes);
-       op->addr.mem.ea = register_address(ctxt, reg_read(ctxt, reg));
+       register_address_increment(ctxt, reg, df * op->bytes);
+       op->addr.mem.ea = register_address(ctxt, reg);
 }
 
 static int em_das(struct x86_emulate_ctxt *ctxt)
@@ -3025,7 +3020,7 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt)
        if (rc != X86EMUL_CONTINUE)
                return X86EMUL_CONTINUE;
 
-       rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l);
+       rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
        if (rc != X86EMUL_CONTINUE)
                goto fail;
 
@@ -3215,6 +3210,8 @@ static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt)
                return emulate_ud(ctxt);
 
        ctxt->dst.val = get_segment_selector(ctxt, ctxt->modrm_reg);
+       if (ctxt->dst.bytes == 4 && ctxt->dst.type == OP_MEM)
+               ctxt->dst.bytes = 2;
        return X86EMUL_CONTINUE;
 }
 
@@ -3317,7 +3314,7 @@ static int em_sidt(struct x86_emulate_ctxt *ctxt)
        return emulate_store_desc_ptr(ctxt, ctxt->ops->get_idt);
 }
 
-static int em_lgdt(struct x86_emulate_ctxt *ctxt)
+static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt)
 {
        struct desc_ptr desc_ptr;
        int rc;
@@ -3329,12 +3326,23 @@ static int em_lgdt(struct x86_emulate_ctxt *ctxt)
                             ctxt->op_bytes);
        if (rc != X86EMUL_CONTINUE)
                return rc;
-       ctxt->ops->set_gdt(ctxt, &desc_ptr);
+       if (ctxt->mode == X86EMUL_MODE_PROT64 &&
+           is_noncanonical_address(desc_ptr.address))
+               return emulate_gp(ctxt, 0);
+       if (lgdt)
+               ctxt->ops->set_gdt(ctxt, &desc_ptr);
+       else
+               ctxt->ops->set_idt(ctxt, &desc_ptr);
        /* Disable writeback. */
        ctxt->dst.type = OP_NONE;
        return X86EMUL_CONTINUE;
 }
 
+static int em_lgdt(struct x86_emulate_ctxt *ctxt)
+{
+       return em_lgdt_lidt(ctxt, true);
+}
+
 static int em_vmmcall(struct x86_emulate_ctxt *ctxt)
 {
        int rc;
@@ -3348,20 +3356,7 @@ static int em_vmmcall(struct x86_emulate_ctxt *ctxt)
 
 static int em_lidt(struct x86_emulate_ctxt *ctxt)
 {
-       struct desc_ptr desc_ptr;
-       int rc;
-
-       if (ctxt->mode == X86EMUL_MODE_PROT64)
-               ctxt->op_bytes = 8;
-       rc = read_descriptor(ctxt, ctxt->src.addr.mem,
-                            &desc_ptr.size, &desc_ptr.address,
-                            ctxt->op_bytes);
-       if (rc != X86EMUL_CONTINUE)
-               return rc;
-       ctxt->ops->set_idt(ctxt, &desc_ptr);
-       /* Disable writeback. */
-       ctxt->dst.type = OP_NONE;
-       return X86EMUL_CONTINUE;
+       return em_lgdt_lidt(ctxt, false);
 }
 
 static int em_smsw(struct x86_emulate_ctxt *ctxt)
@@ -3384,7 +3379,7 @@ static int em_loop(struct x86_emulate_ctxt *ctxt)
 {
        int rc = X86EMUL_CONTINUE;
 
-       register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1);
+       register_address_increment(ctxt, VCPU_REGS_RCX, -1);
        if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) &&
            (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
                rc = jmp_rel(ctxt, ctxt->src.val);
@@ -3554,7 +3549,7 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt)
 
                ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
                if (efer & EFER_LMA)
-                       rsvd = CR3_L_MODE_RESERVED_BITS;
+                       rsvd = CR3_L_MODE_RESERVED_BITS & ~CR3_PCID_INVD;
 
                if (new_val & rsvd)
                        return emulate_gp(ctxt, 0);
@@ -3596,8 +3591,15 @@ static int check_dr_read(struct x86_emulate_ctxt *ctxt)
        if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5))
                return emulate_ud(ctxt);
 
-       if (check_dr7_gd(ctxt))
+       if (check_dr7_gd(ctxt)) {
+               ulong dr6;
+
+               ctxt->ops->get_dr(ctxt, 6, &dr6);
+               dr6 &= ~15;
+               dr6 |= DR6_BD | DR6_RTM;
+               ctxt->ops->set_dr(ctxt, 6, dr6);
                return emulate_db(ctxt);
+       }
 
        return X86EMUL_CONTINUE;
 }
@@ -3684,6 +3686,7 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
 #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
 #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
 #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
+#define ID(_f, _i) { .flags = ((_f) | InstrDual | ModRM), .u.idual = (_i) }
 #define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) }
 #define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
 #define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) }
@@ -3780,11 +3783,11 @@ static const struct opcode group4[] = {
 static const struct opcode group5[] = {
        F(DstMem | SrcNone | Lock,              em_inc),
        F(DstMem | SrcNone | Lock,              em_dec),
-       I(SrcMem | Stack,                       em_grp45),
+       I(SrcMem | NearBranch,                  em_call_near_abs),
        I(SrcMemFAddr | ImplicitOps | Stack,    em_call_far),
-       I(SrcMem | Stack,                       em_grp45),
-       I(SrcMemFAddr | ImplicitOps,            em_grp45),
-       I(SrcMem | Stack,                       em_grp45), D(Undefined),
+       I(SrcMem | NearBranch,                  em_jmp_abs),
+       I(SrcMemFAddr | ImplicitOps,            em_jmp_far),
+       I(SrcMem | Stack,                       em_push), D(Undefined),
 };
 
 static const struct opcode group6[] = {
@@ -3845,8 +3848,12 @@ static const struct gprefix pfx_0f_6f_0f_7f = {
        I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov),
 };
 
+static const struct instr_dual instr_dual_0f_2b = {
+       I(0, em_mov), N
+};
+
 static const struct gprefix pfx_0f_2b = {
-       I(0, em_mov), I(0, em_mov), N, N,
+       ID(0, &instr_dual_0f_2b), ID(0, &instr_dual_0f_2b), N, N,
 };
 
 static const struct gprefix pfx_0f_28_0f_29 = {
@@ -3920,6 +3927,10 @@ static const struct escape escape_dd = { {
        N, N, N, N, N, N, N, N,
 } };
 
+static const struct instr_dual instr_dual_0f_c3 = {
+       I(DstMem | SrcReg | ModRM | No16 | Mov, em_mov), N
+};
+
 static const struct opcode opcode_table[256] = {
        /* 0x00 - 0x07 */
        F6ALU(Lock, em_add),
@@ -3964,7 +3975,7 @@ static const struct opcode opcode_table[256] = {
        I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */
        I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */
        /* 0x70 - 0x7F */
-       X16(D(SrcImmByte)),
+       X16(D(SrcImmByte | NearBranch)),
        /* 0x80 - 0x87 */
        G(ByteOp | DstMem | SrcImm, group1),
        G(DstMem | SrcImm, group1),
@@ -3991,20 +4002,20 @@ static const struct opcode opcode_table[256] = {
        I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
        I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov),
        I2bv(SrcSI | DstDI | Mov | String, em_mov),
-       F2bv(SrcSI | DstDI | String | NoWrite, em_cmp),
+       F2bv(SrcSI | DstDI | String | NoWrite, em_cmp_r),
        /* 0xA8 - 0xAF */
        F2bv(DstAcc | SrcImm | NoWrite, em_test),
        I2bv(SrcAcc | DstDI | Mov | String, em_mov),
        I2bv(SrcSI | DstAcc | Mov | String, em_mov),
-       F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp),
+       F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp_r),
        /* 0xB0 - 0xB7 */
        X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)),
        /* 0xB8 - 0xBF */
        X8(I(DstReg | SrcImm64 | Mov, em_mov)),
        /* 0xC0 - 0xC7 */
        G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2),
-       I(ImplicitOps | Stack | SrcImmU16, em_ret_near_imm),
-       I(ImplicitOps | Stack, em_ret),
+       I(ImplicitOps | NearBranch | SrcImmU16, em_ret_near_imm),
+       I(ImplicitOps | NearBranch, em_ret),
        I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg),
        I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg),
        G(ByteOp, group11), G(0, group11),
@@ -4024,13 +4035,14 @@ static const struct opcode opcode_table[256] = {
        /* 0xD8 - 0xDF */
        N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N,
        /* 0xE0 - 0xE7 */
-       X3(I(SrcImmByte, em_loop)),
-       I(SrcImmByte, em_jcxz),
+       X3(I(SrcImmByte | NearBranch, em_loop)),
+       I(SrcImmByte | NearBranch, em_jcxz),
        I2bvIP(SrcImmUByte | DstAcc, em_in,  in,  check_perm_in),
        I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out),
        /* 0xE8 - 0xEF */
-       I(SrcImm | Stack, em_call), D(SrcImm | ImplicitOps),
-       I(SrcImmFAddr | No64, em_jmp_far), D(SrcImmByte | ImplicitOps),
+       I(SrcImm | NearBranch, em_call), D(SrcImm | ImplicitOps | NearBranch),
+       I(SrcImmFAddr | No64, em_jmp_far),
+       D(SrcImmByte | ImplicitOps | NearBranch),
        I2bvIP(SrcDX | DstAcc, em_in,  in,  check_perm_in),
        I2bvIP(SrcAcc | DstDX, em_out, out, check_perm_out),
        /* 0xF0 - 0xF7 */
@@ -4090,7 +4102,7 @@ static const struct opcode twobyte_table[256] = {
        N, N, N, N,
        N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f),
        /* 0x80 - 0x8F */
-       X16(D(SrcImm)),
+       X16(D(SrcImm | NearBranch)),
        /* 0x90 - 0x9F */
        X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
        /* 0xA0 - 0xA7 */
@@ -4121,7 +4133,7 @@ static const struct opcode twobyte_table[256] = {
        D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
        /* 0xC0 - 0xC7 */
        F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd),
-       N, D(DstMem | SrcReg | ModRM | Mov),
+       N, ID(0, &instr_dual_0f_c3),
        N, N, N, GD(0, &group9),
        /* 0xC8 - 0xCF */
        X8(I(DstReg, em_bswap)),
@@ -4134,12 +4146,20 @@ static const struct opcode twobyte_table[256] = {
        N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N
 };
 
+static const struct instr_dual instr_dual_0f_38_f0 = {
+       I(DstReg | SrcMem | Mov, em_movbe), N
+};
+
+static const struct instr_dual instr_dual_0f_38_f1 = {
+       I(DstMem | SrcReg | Mov, em_movbe), N
+};
+
 static const struct gprefix three_byte_0f_38_f0 = {
-       I(DstReg | SrcMem | Mov, em_movbe), N, N, N
+       ID(0, &instr_dual_0f_38_f0), N, N, N
 };
 
 static const struct gprefix three_byte_0f_38_f1 = {
-       I(DstMem | SrcReg | Mov, em_movbe), N, N, N
+       ID(0, &instr_dual_0f_38_f1), N, N, N
 };
 
 /*
@@ -4152,8 +4172,8 @@ static const struct opcode opcode_map_0f_38[256] = {
        /* 0x80 - 0xef */
        X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
        /* 0xf0 - 0xf1 */
-       GP(EmulateOnUD | ModRM | Prefix, &three_byte_0f_38_f0),
-       GP(EmulateOnUD | ModRM | Prefix, &three_byte_0f_38_f1),
+       GP(EmulateOnUD | ModRM, &three_byte_0f_38_f0),
+       GP(EmulateOnUD | ModRM, &three_byte_0f_38_f1),
        /* 0xf2 - 0xff */
        N, N, X4(N), X8(N)
 };
@@ -4275,7 +4295,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
                op->type = OP_MEM;
                op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
                op->addr.mem.ea =
-                       register_address(ctxt, reg_read(ctxt, VCPU_REGS_RDI));
+                       register_address(ctxt, VCPU_REGS_RDI);
                op->addr.mem.seg = VCPU_SREG_ES;
                op->val = 0;
                op->count = 1;
@@ -4329,7 +4349,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
                op->type = OP_MEM;
                op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
                op->addr.mem.ea =
-                       register_address(ctxt, reg_read(ctxt, VCPU_REGS_RSI));
+                       register_address(ctxt, VCPU_REGS_RSI);
                op->addr.mem.seg = ctxt->seg_override;
                op->val = 0;
                op->count = 1;
@@ -4338,7 +4358,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
                op->type = OP_MEM;
                op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
                op->addr.mem.ea =
-                       register_address(ctxt,
+                       address_mask(ctxt,
                                reg_read(ctxt, VCPU_REGS_RBX) +
                                (reg_read(ctxt, VCPU_REGS_RAX) & 0xff));
                op->addr.mem.seg = ctxt->seg_override;
@@ -4510,8 +4530,7 @@ done_prefixes:
 
        /* vex-prefix instructions are not implemented */
        if (ctxt->opcode_len == 1 && (ctxt->b == 0xc5 || ctxt->b == 0xc4) &&
-           (mode == X86EMUL_MODE_PROT64 ||
-           (mode >= X86EMUL_MODE_PROT16 && (ctxt->modrm & 0x80)))) {
+           (mode == X86EMUL_MODE_PROT64 || (ctxt->modrm & 0xc0) == 0xc0)) {
                ctxt->d = NotImpl;
        }
 
@@ -4549,6 +4568,12 @@ done_prefixes:
                        else
                                opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7];
                        break;
+               case InstrDual:
+                       if ((ctxt->modrm >> 6) == 3)
+                               opcode = opcode.u.idual->mod3;
+                       else
+                               opcode = opcode.u.idual->mod012;
+                       break;
                default:
                        return EMULATION_FAILED;
                }
@@ -4567,7 +4592,8 @@ done_prefixes:
                return EMULATION_FAILED;
 
        if (unlikely(ctxt->d &
-                    (NotImpl|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm))) {
+           (NotImpl|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm|NearBranch|
+            No16))) {
                /*
                 * These are copied unconditionally here, and checked unconditionally
                 * in x86_emulate_insn.
@@ -4578,8 +4604,12 @@ done_prefixes:
                if (ctxt->d & NotImpl)
                        return EMULATION_FAILED;
 
-               if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack))
-                       ctxt->op_bytes = 8;
+               if (mode == X86EMUL_MODE_PROT64) {
+                       if (ctxt->op_bytes == 4 && (ctxt->d & Stack))
+                               ctxt->op_bytes = 8;
+                       else if (ctxt->d & NearBranch)
+                               ctxt->op_bytes = 8;
+               }
 
                if (ctxt->d & Op3264) {
                        if (mode == X86EMUL_MODE_PROT64)
@@ -4588,6 +4618,9 @@ done_prefixes:
                                ctxt->op_bytes = 4;
                }
 
+               if ((ctxt->d & No16) && ctxt->op_bytes == 2)
+                       ctxt->op_bytes = 4;
+
                if (ctxt->d & Sse)
                        ctxt->op_bytes = 16;
                else if (ctxt->d & Mmx)
@@ -4631,7 +4664,8 @@ done_prefixes:
        rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask);
 
        if (ctxt->rip_relative)
-               ctxt->memopp->addr.mem.ea += ctxt->_eip;
+               ctxt->memopp->addr.mem.ea = address_mask(ctxt,
+                                       ctxt->memopp->addr.mem.ea + ctxt->_eip);
 
 done:
        return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
@@ -4775,6 +4809,12 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
                                goto done;
                }
 
+               /* Instruction can only be executed in protected mode */
+               if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
+                       rc = emulate_ud(ctxt);
+                       goto done;
+               }
+
                /* Privileged instruction can be executed only in CPL=0 */
                if ((ctxt->d & Priv) && ops->cpl(ctxt)) {
                        if (ctxt->d & PrivUD)
@@ -4784,12 +4824,6 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
                        goto done;
                }
 
-               /* Instruction can only be executed in protected mode */
-               if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
-                       rc = emulate_ud(ctxt);
-                       goto done;
-               }
-
                /* Do instruction specific permission checks */
                if (ctxt->d & CheckPerm) {
                        rc = ctxt->check_perm(ctxt);
@@ -4974,8 +5008,7 @@ writeback:
                        count = ctxt->src.count;
                else
                        count = ctxt->dst.count;
-               register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX),
-                               -count);
+               register_address_increment(ctxt, VCPU_REGS_RCX, -count);
 
                if (!string_insn_completed(ctxt)) {
                        /*
@@ -5053,11 +5086,6 @@ twobyte_insn:
                ctxt->dst.val = (ctxt->src.bytes == 1) ? (s8) ctxt->src.val :
                                                        (s16) ctxt->src.val;
                break;
-       case 0xc3:              /* movnti */
-               ctxt->dst.bytes = ctxt->op_bytes;
-               ctxt->dst.val = (ctxt->op_bytes == 8) ? (u64) ctxt->src.val :
-                                                       (u32) ctxt->src.val;
-               break;
        default:
                goto cannot_emulate;
        }
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
new file mode 100644 (file)
index 0000000..b1947e0
--- /dev/null
@@ -0,0 +1,675 @@
+/*
+ *  Copyright (C) 2001  MandrakeSoft S.A.
+ *  Copyright 2010 Red Hat, Inc. and/or its affiliates.
+ *
+ *    MandrakeSoft S.A.
+ *    43, rue d'Aboukir
+ *    75002 Paris - France
+ *    http://www.linux-mandrake.com/
+ *    http://www.mandrakesoft.com/
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2 of the License, or (at your option) any later version.
+ *
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public
+ *  License along with this library; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ *
+ *  Yunhong Jiang <yunhong.jiang@intel.com>
+ *  Yaozu (Eddie) Dong <eddie.dong@intel.com>
+ *  Based on Xen 3.1 code.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/kvm.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/smp.h>
+#include <linux/hrtimer.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/current.h>
+#include <trace/events/kvm.h>
+
+#include "ioapic.h"
+#include "lapic.h"
+#include "irq.h"
+
+#if 0
+#define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg)
+#else
+#define ioapic_debug(fmt, arg...)
+#endif
+static int ioapic_service(struct kvm_ioapic *vioapic, int irq,
+               bool line_status);
+
+static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
+                                         unsigned long addr,
+                                         unsigned long length)
+{
+       unsigned long result = 0;
+
+       switch (ioapic->ioregsel) {
+       case IOAPIC_REG_VERSION:
+               result = ((((IOAPIC_NUM_PINS - 1) & 0xff) << 16)
+                         | (IOAPIC_VERSION_ID & 0xff));
+               break;
+
+       case IOAPIC_REG_APIC_ID:
+       case IOAPIC_REG_ARB_ID:
+               result = ((ioapic->id & 0xf) << 24);
+               break;
+
+       default:
+               {
+                       u32 redir_index = (ioapic->ioregsel - 0x10) >> 1;
+                       u64 redir_content;
+
+                       if (redir_index < IOAPIC_NUM_PINS)
+                               redir_content =
+                                       ioapic->redirtbl[redir_index].bits;
+                       else
+                               redir_content = ~0ULL;
+
+                       result = (ioapic->ioregsel & 0x1) ?
+                           (redir_content >> 32) & 0xffffffff :
+                           redir_content & 0xffffffff;
+                       break;
+               }
+       }
+
+       return result;
+}
+
+static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic)
+{
+       ioapic->rtc_status.pending_eoi = 0;
+       bitmap_zero(ioapic->rtc_status.dest_map, KVM_MAX_VCPUS);
+}
+
+static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic);
+
+static void rtc_status_pending_eoi_check_valid(struct kvm_ioapic *ioapic)
+{
+       if (WARN_ON(ioapic->rtc_status.pending_eoi < 0))
+               kvm_rtc_eoi_tracking_restore_all(ioapic);
+}
+
+static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu)
+{
+       bool new_val, old_val;
+       struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
+       union kvm_ioapic_redirect_entry *e;
+
+       e = &ioapic->redirtbl[RTC_GSI];
+       if (!kvm_apic_match_dest(vcpu, NULL, 0, e->fields.dest_id,
+                               e->fields.dest_mode))
+               return;
+
+       new_val = kvm_apic_pending_eoi(vcpu, e->fields.vector);
+       old_val = test_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
+
+       if (new_val == old_val)
+               return;
+
+       if (new_val) {
+               __set_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
+               ioapic->rtc_status.pending_eoi++;
+       } else {
+               __clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
+               ioapic->rtc_status.pending_eoi--;
+               rtc_status_pending_eoi_check_valid(ioapic);
+       }
+}
+
+void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu)
+{
+       struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
+
+       spin_lock(&ioapic->lock);
+       __rtc_irq_eoi_tracking_restore_one(vcpu);
+       spin_unlock(&ioapic->lock);
+}
+
+static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic)
+{
+       struct kvm_vcpu *vcpu;
+       int i;
+
+       if (RTC_GSI >= IOAPIC_NUM_PINS)
+               return;
+
+       rtc_irq_eoi_tracking_reset(ioapic);
+       kvm_for_each_vcpu(i, vcpu, ioapic->kvm)
+           __rtc_irq_eoi_tracking_restore_one(vcpu);
+}
+
+static void rtc_irq_eoi(struct kvm_ioapic *ioapic, struct kvm_vcpu *vcpu)
+{
+       if (test_and_clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map)) {
+               --ioapic->rtc_status.pending_eoi;
+               rtc_status_pending_eoi_check_valid(ioapic);
+       }
+}
+
+static bool rtc_irq_check_coalesced(struct kvm_ioapic *ioapic)
+{
+       if (ioapic->rtc_status.pending_eoi > 0)
+               return true; /* coalesced */
+
+       return false;
+}
+
+static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq,
+               int irq_level, bool line_status)
+{
+       union kvm_ioapic_redirect_entry entry;
+       u32 mask = 1 << irq;
+       u32 old_irr;
+       int edge, ret;
+
+       entry = ioapic->redirtbl[irq];
+       edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG);
+
+       if (!irq_level) {
+               ioapic->irr &= ~mask;
+               ret = 1;
+               goto out;
+       }
+
+       /*
+        * Return 0 for coalesced interrupts; for edge-triggered interrupts,
+        * this only happens if a previous edge has not been delivered due
+        * do masking.  For level interrupts, the remote_irr field tells
+        * us if the interrupt is waiting for an EOI.
+        *
+        * RTC is special: it is edge-triggered, but userspace likes to know
+        * if it has been already ack-ed via EOI because coalesced RTC
+        * interrupts lead to time drift in Windows guests.  So we track
+        * EOI manually for the RTC interrupt.
+        */
+       if (irq == RTC_GSI && line_status &&
+               rtc_irq_check_coalesced(ioapic)) {
+               ret = 0;
+               goto out;
+       }
+
+       old_irr = ioapic->irr;
+       ioapic->irr |= mask;
+       if ((edge && old_irr == ioapic->irr) ||
+           (!edge && entry.fields.remote_irr)) {
+               ret = 0;
+               goto out;
+       }
+
+       ret = ioapic_service(ioapic, irq, line_status);
+
+out:
+       trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
+       return ret;
+}
+
+static void kvm_ioapic_inject_all(struct kvm_ioapic *ioapic, unsigned long irr)
+{
+       u32 idx;
+
+       rtc_irq_eoi_tracking_reset(ioapic);
+       for_each_set_bit(idx, &irr, IOAPIC_NUM_PINS)
+               ioapic_set_irq(ioapic, idx, 1, true);
+
+       kvm_rtc_eoi_tracking_restore_all(ioapic);
+}
+
+
+static void update_handled_vectors(struct kvm_ioapic *ioapic)
+{
+       DECLARE_BITMAP(handled_vectors, 256);
+       int i;
+
+       memset(handled_vectors, 0, sizeof(handled_vectors));
+       for (i = 0; i < IOAPIC_NUM_PINS; ++i)
+               __set_bit(ioapic->redirtbl[i].fields.vector, handled_vectors);
+       memcpy(ioapic->handled_vectors, handled_vectors,
+              sizeof(handled_vectors));
+       smp_wmb();
+}
+
+void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
+                       u32 *tmr)
+{
+       struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
+       union kvm_ioapic_redirect_entry *e;
+       int index;
+
+       spin_lock(&ioapic->lock);
+       for (index = 0; index < IOAPIC_NUM_PINS; index++) {
+               e = &ioapic->redirtbl[index];
+               if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG ||
+                   kvm_irq_has_notifier(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index) ||
+                   index == RTC_GSI) {
+                       if (kvm_apic_match_dest(vcpu, NULL, 0,
+                               e->fields.dest_id, e->fields.dest_mode)) {
+                               __set_bit(e->fields.vector,
+                                       (unsigned long *)eoi_exit_bitmap);
+                               if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG)
+                                       __set_bit(e->fields.vector,
+                                               (unsigned long *)tmr);
+                       }
+               }
+       }
+       spin_unlock(&ioapic->lock);
+}
+
+void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
+{
+       struct kvm_ioapic *ioapic = kvm->arch.vioapic;
+
+       if (!ioapic)
+               return;
+       kvm_make_scan_ioapic_request(kvm);
+}
+
+static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
+{
+       unsigned index;
+       bool mask_before, mask_after;
+       union kvm_ioapic_redirect_entry *e;
+
+       switch (ioapic->ioregsel) {
+       case IOAPIC_REG_VERSION:
+               /* Writes are ignored. */
+               break;
+
+       case IOAPIC_REG_APIC_ID:
+               ioapic->id = (val >> 24) & 0xf;
+               break;
+
+       case IOAPIC_REG_ARB_ID:
+               break;
+
+       default:
+               index = (ioapic->ioregsel - 0x10) >> 1;
+
+               ioapic_debug("change redir index %x val %x\n", index, val);
+               if (index >= IOAPIC_NUM_PINS)
+                       return;
+               e = &ioapic->redirtbl[index];
+               mask_before = e->fields.mask;
+               if (ioapic->ioregsel & 1) {
+                       e->bits &= 0xffffffff;
+                       e->bits |= (u64) val << 32;
+               } else {
+                       e->bits &= ~0xffffffffULL;
+                       e->bits |= (u32) val;
+                       e->fields.remote_irr = 0;
+               }
+               update_handled_vectors(ioapic);
+               mask_after = e->fields.mask;
+               if (mask_before != mask_after)
+                       kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after);
+               if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG
+                   && ioapic->irr & (1 << index))
+                       ioapic_service(ioapic, index, false);
+               kvm_vcpu_request_scan_ioapic(ioapic->kvm);
+               break;
+       }
+}
+
+static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status)
+{
+       union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq];
+       struct kvm_lapic_irq irqe;
+       int ret;
+
+       if (entry->fields.mask)
+               return -1;
+
+       ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
+                    "vector=%x trig_mode=%x\n",
+                    entry->fields.dest_id, entry->fields.dest_mode,
+                    entry->fields.delivery_mode, entry->fields.vector,
+                    entry->fields.trig_mode);
+
+       irqe.dest_id = entry->fields.dest_id;
+       irqe.vector = entry->fields.vector;
+       irqe.dest_mode = entry->fields.dest_mode;
+       irqe.trig_mode = entry->fields.trig_mode;
+       irqe.delivery_mode = entry->fields.delivery_mode << 8;
+       irqe.level = 1;
+       irqe.shorthand = 0;
+
+       if (irqe.trig_mode == IOAPIC_EDGE_TRIG)
+               ioapic->irr &= ~(1 << irq);
+
+       if (irq == RTC_GSI && line_status) {
+               /*
+                * pending_eoi cannot ever become negative (see
+                * rtc_status_pending_eoi_check_valid) and the caller
+                * ensures that it is only called if it is >= zero, namely
+                * if rtc_irq_check_coalesced returns false).
+                */
+               BUG_ON(ioapic->rtc_status.pending_eoi != 0);
+               ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe,
+                               ioapic->rtc_status.dest_map);
+               ioapic->rtc_status.pending_eoi = (ret < 0 ? 0 : ret);
+       } else
+               ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL);
+
+       if (ret && irqe.trig_mode == IOAPIC_LEVEL_TRIG)
+               entry->fields.remote_irr = 1;
+
+       return ret;
+}
+
+int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
+                      int level, bool line_status)
+{
+       int ret, irq_level;
+
+       BUG_ON(irq < 0 || irq >= IOAPIC_NUM_PINS);
+
+       spin_lock(&ioapic->lock);
+       irq_level = __kvm_irq_line_state(&ioapic->irq_states[irq],
+                                        irq_source_id, level);
+       ret = ioapic_set_irq(ioapic, irq, irq_level, line_status);
+
+       spin_unlock(&ioapic->lock);
+
+       return ret;
+}
+
+void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id)
+{
+       int i;
+
+       spin_lock(&ioapic->lock);
+       for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++)
+               __clear_bit(irq_source_id, &ioapic->irq_states[i]);
+       spin_unlock(&ioapic->lock);
+}
+
+static void kvm_ioapic_eoi_inject_work(struct work_struct *work)
+{
+       int i;
+       struct kvm_ioapic *ioapic = container_of(work, struct kvm_ioapic,
+                                                eoi_inject.work);
+       spin_lock(&ioapic->lock);
+       for (i = 0; i < IOAPIC_NUM_PINS; i++) {
+               union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i];
+
+               if (ent->fields.trig_mode != IOAPIC_LEVEL_TRIG)
+                       continue;
+
+               if (ioapic->irr & (1 << i) && !ent->fields.remote_irr)
+                       ioapic_service(ioapic, i, false);
+       }
+       spin_unlock(&ioapic->lock);
+}
+
+#define IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT 10000
+
+static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
+                       struct kvm_ioapic *ioapic, int vector, int trigger_mode)
+{
+       int i;
+
+       for (i = 0; i < IOAPIC_NUM_PINS; i++) {
+               union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i];
+
+               if (ent->fields.vector != vector)
+                       continue;
+
+               if (i == RTC_GSI)
+                       rtc_irq_eoi(ioapic, vcpu);
+               /*
+                * We are dropping lock while calling ack notifiers because ack
+                * notifier callbacks for assigned devices call into IOAPIC
+                * recursively. Since remote_irr is cleared only after call
+                * to notifiers if the same vector will be delivered while lock
+                * is dropped it will be put into irr and will be delivered
+                * after ack notifier returns.
+                */
+               spin_unlock(&ioapic->lock);
+               kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i);
+               spin_lock(&ioapic->lock);
+
+               if (trigger_mode != IOAPIC_LEVEL_TRIG)
+                       continue;
+
+               ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
+               ent->fields.remote_irr = 0;
+               if (!ent->fields.mask && (ioapic->irr & (1 << i))) {
+                       ++ioapic->irq_eoi[i];
+                       if (ioapic->irq_eoi[i] == IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT) {
+                               /*
+                                * Real hardware does not deliver the interrupt
+                                * immediately during eoi broadcast, and this
+                                * lets a buggy guest make slow progress
+                                * even if it does not correctly handle a
+                                * level-triggered interrupt.  Emulate this
+                                * behavior if we detect an interrupt storm.
+                                */
+                               schedule_delayed_work(&ioapic->eoi_inject, HZ / 100);
+                               ioapic->irq_eoi[i] = 0;
+                               trace_kvm_ioapic_delayed_eoi_inj(ent->bits);
+                       } else {
+                               ioapic_service(ioapic, i, false);
+                       }
+               } else {
+                       ioapic->irq_eoi[i] = 0;
+               }
+       }
+}
+
+bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector)
+{
+       struct kvm_ioapic *ioapic = kvm->arch.vioapic;
+       smp_rmb();
+       return test_bit(vector, ioapic->handled_vectors);
+}
+
+void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode)
+{
+       struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
+
+       spin_lock(&ioapic->lock);
+       __kvm_ioapic_update_eoi(vcpu, ioapic, vector, trigger_mode);
+       spin_unlock(&ioapic->lock);
+}
+
+static inline struct kvm_ioapic *to_ioapic(struct kvm_io_device *dev)
+{
+       return container_of(dev, struct kvm_ioapic, dev);
+}
+
+static inline int ioapic_in_range(struct kvm_ioapic *ioapic, gpa_t addr)
+{
+       return ((addr >= ioapic->base_address &&
+                (addr < ioapic->base_address + IOAPIC_MEM_LENGTH)));
+}
+
+static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
+                           void *val)
+{
+       struct kvm_ioapic *ioapic = to_ioapic(this);
+       u32 result;
+       if (!ioapic_in_range(ioapic, addr))
+               return -EOPNOTSUPP;
+
+       ioapic_debug("addr %lx\n", (unsigned long)addr);
+       ASSERT(!(addr & 0xf));  /* check alignment */
+
+       addr &= 0xff;
+       spin_lock(&ioapic->lock);
+       switch (addr) {
+       case IOAPIC_REG_SELECT:
+               result = ioapic->ioregsel;
+               break;
+
+       case IOAPIC_REG_WINDOW:
+               result = ioapic_read_indirect(ioapic, addr, len);
+               break;
+
+       default:
+               result = 0;
+               break;
+       }
+       spin_unlock(&ioapic->lock);
+
+       switch (len) {
+       case 8:
+               *(u64 *) val = result;
+               break;
+       case 1:
+       case 2:
+       case 4:
+               memcpy(val, (char *)&result, len);
+               break;
+       default:
+               printk(KERN_WARNING "ioapic: wrong length %d\n", len);
+       }
+       return 0;
+}
+
+static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
+                            const void *val)
+{
+       struct kvm_ioapic *ioapic = to_ioapic(this);
+       u32 data;
+       if (!ioapic_in_range(ioapic, addr))
+               return -EOPNOTSUPP;
+
+       ioapic_debug("ioapic_mmio_write addr=%p len=%d val=%p\n",
+                    (void*)addr, len, val);
+       ASSERT(!(addr & 0xf));  /* check alignment */
+
+       switch (len) {
+       case 8:
+       case 4:
+               data = *(u32 *) val;
+               break;
+       case 2:
+               data = *(u16 *) val;
+               break;
+       case 1:
+               data = *(u8  *) val;
+               break;
+       default:
+               printk(KERN_WARNING "ioapic: Unsupported size %d\n", len);
+               return 0;
+       }
+
+       addr &= 0xff;
+       spin_lock(&ioapic->lock);
+       switch (addr) {
+       case IOAPIC_REG_SELECT:
+               ioapic->ioregsel = data & 0xFF; /* 8-bit register */
+               break;
+
+       case IOAPIC_REG_WINDOW:
+               ioapic_write_indirect(ioapic, data);
+               break;
+
+       default:
+               break;
+       }
+       spin_unlock(&ioapic->lock);
+       return 0;
+}
+
+static void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
+{
+       int i;
+
+       cancel_delayed_work_sync(&ioapic->eoi_inject);
+       for (i = 0; i < IOAPIC_NUM_PINS; i++)
+               ioapic->redirtbl[i].fields.mask = 1;
+       ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS;
+       ioapic->ioregsel = 0;
+       ioapic->irr = 0;
+       ioapic->id = 0;
+       memset(ioapic->irq_eoi, 0x00, IOAPIC_NUM_PINS);
+       rtc_irq_eoi_tracking_reset(ioapic);
+       update_handled_vectors(ioapic);
+}
+
+static const struct kvm_io_device_ops ioapic_mmio_ops = {
+       .read     = ioapic_mmio_read,
+       .write    = ioapic_mmio_write,
+};
+
+int kvm_ioapic_init(struct kvm *kvm)
+{
+       struct kvm_ioapic *ioapic;
+       int ret;
+
+       ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL);
+       if (!ioapic)
+               return -ENOMEM;
+       spin_lock_init(&ioapic->lock);
+       INIT_DELAYED_WORK(&ioapic->eoi_inject, kvm_ioapic_eoi_inject_work);
+       kvm->arch.vioapic = ioapic;
+       kvm_ioapic_reset(ioapic);
+       kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
+       ioapic->kvm = kvm;
+       mutex_lock(&kvm->slots_lock);
+       ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, ioapic->base_address,
+                                     IOAPIC_MEM_LENGTH, &ioapic->dev);
+       mutex_unlock(&kvm->slots_lock);
+       if (ret < 0) {
+               kvm->arch.vioapic = NULL;
+               kfree(ioapic);
+       }
+
+       return ret;
+}
+
+void kvm_ioapic_destroy(struct kvm *kvm)
+{
+       struct kvm_ioapic *ioapic = kvm->arch.vioapic;
+
+       cancel_delayed_work_sync(&ioapic->eoi_inject);
+       if (ioapic) {
+               kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
+               kvm->arch.vioapic = NULL;
+               kfree(ioapic);
+       }
+}
+
+int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
+{
+       struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
+       if (!ioapic)
+               return -EINVAL;
+
+       spin_lock(&ioapic->lock);
+       memcpy(state, ioapic, sizeof(struct kvm_ioapic_state));
+       spin_unlock(&ioapic->lock);
+       return 0;
+}
+
+int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
+{
+       struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
+       if (!ioapic)
+               return -EINVAL;
+
+       spin_lock(&ioapic->lock);
+       memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
+       ioapic->irr = 0;
+       update_handled_vectors(ioapic);
+       kvm_vcpu_request_scan_ioapic(kvm);
+       kvm_ioapic_inject_all(ioapic, state->irr);
+       spin_unlock(&ioapic->lock);
+       return 0;
+}
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
new file mode 100644 (file)
index 0000000..3c91955
--- /dev/null
@@ -0,0 +1,119 @@
+#ifndef __KVM_IO_APIC_H
+#define __KVM_IO_APIC_H
+
+#include <linux/kvm_host.h>
+
+#include "iodev.h"
+
+struct kvm;
+struct kvm_vcpu;
+
+#define IOAPIC_NUM_PINS  KVM_IOAPIC_NUM_PINS
+#define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */
+#define IOAPIC_EDGE_TRIG  0
+#define IOAPIC_LEVEL_TRIG 1
+
+#define IOAPIC_DEFAULT_BASE_ADDRESS  0xfec00000
+#define IOAPIC_MEM_LENGTH            0x100
+
+/* Direct registers. */
+#define IOAPIC_REG_SELECT  0x00
+#define IOAPIC_REG_WINDOW  0x10
+
+/* Indirect registers. */
+#define IOAPIC_REG_APIC_ID 0x00        /* x86 IOAPIC only */
+#define IOAPIC_REG_VERSION 0x01
+#define IOAPIC_REG_ARB_ID  0x02        /* x86 IOAPIC only */
+
+/*ioapic delivery mode*/
+#define        IOAPIC_FIXED                    0x0
+#define        IOAPIC_LOWEST_PRIORITY          0x1
+#define        IOAPIC_PMI                      0x2
+#define        IOAPIC_NMI                      0x4
+#define        IOAPIC_INIT                     0x5
+#define        IOAPIC_EXTINT                   0x7
+
+#ifdef CONFIG_X86
+#define RTC_GSI 8
+#else
+#define RTC_GSI -1U
+#endif
+
+struct rtc_status {
+       int pending_eoi;
+       DECLARE_BITMAP(dest_map, KVM_MAX_VCPUS);
+};
+
+union kvm_ioapic_redirect_entry {
+       u64 bits;
+       struct {
+               u8 vector;
+               u8 delivery_mode:3;
+               u8 dest_mode:1;
+               u8 delivery_status:1;
+               u8 polarity:1;
+               u8 remote_irr:1;
+               u8 trig_mode:1;
+               u8 mask:1;
+               u8 reserve:7;
+               u8 reserved[4];
+               u8 dest_id;
+       } fields;
+};
+
+struct kvm_ioapic {
+       u64 base_address;
+       u32 ioregsel;
+       u32 id;
+       u32 irr;
+       u32 pad;
+       union kvm_ioapic_redirect_entry redirtbl[IOAPIC_NUM_PINS];
+       unsigned long irq_states[IOAPIC_NUM_PINS];
+       struct kvm_io_device dev;
+       struct kvm *kvm;
+       void (*ack_notifier)(void *opaque, int irq);
+       spinlock_t lock;
+       DECLARE_BITMAP(handled_vectors, 256);
+       struct rtc_status rtc_status;
+       struct delayed_work eoi_inject;
+       u32 irq_eoi[IOAPIC_NUM_PINS];
+};
+
+#ifdef DEBUG
+#define ASSERT(x)                                                      \
+do {                                                                   \
+       if (!(x)) {                                                     \
+               printk(KERN_EMERG "assertion failed %s: %d: %s\n",      \
+                      __FILE__, __LINE__, #x);                         \
+               BUG();                                                  \
+       }                                                               \
+} while (0)
+#else
+#define ASSERT(x) do { } while (0)
+#endif
+
+static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
+{
+       return kvm->arch.vioapic;
+}
+
+void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu);
+int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
+               int short_hand, unsigned int dest, int dest_mode);
+int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
+void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector,
+                       int trigger_mode);
+bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector);
+int kvm_ioapic_init(struct kvm *kvm);
+void kvm_ioapic_destroy(struct kvm *kvm);
+int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
+                      int level, bool line_status);
+void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id);
+int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
+               struct kvm_lapic_irq *irq, unsigned long *dest_map);
+int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
+int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
+void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
+                       u32 *tmr);
+
+#endif
diff --git a/arch/x86/kvm/iommu.c b/arch/x86/kvm/iommu.c
new file mode 100644 (file)
index 0000000..17b73ee
--- /dev/null
@@ -0,0 +1,353 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) 2006-2008 Intel Corporation
+ * Copyright IBM Corporation, 2008
+ * Copyright 2010 Red Hat, Inc. and/or its affiliates.
+ *
+ * Author: Allen M. Kay <allen.m.kay@intel.com>
+ * Author: Weidong Han <weidong.han@intel.com>
+ * Author: Ben-Ami Yassour <benami@il.ibm.com>
+ */
+
+#include <linux/list.h>
+#include <linux/kvm_host.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/stat.h>
+#include <linux/dmar.h>
+#include <linux/iommu.h>
+#include <linux/intel-iommu.h>
+#include "assigned-dev.h"
+
+static bool allow_unsafe_assigned_interrupts;
+module_param_named(allow_unsafe_assigned_interrupts,
+                  allow_unsafe_assigned_interrupts, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(allow_unsafe_assigned_interrupts,
+ "Enable device assignment on platforms without interrupt remapping support.");
+
+static int kvm_iommu_unmap_memslots(struct kvm *kvm);
+static void kvm_iommu_put_pages(struct kvm *kvm,
+                               gfn_t base_gfn, unsigned long npages);
+
+static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn,
+                          unsigned long npages)
+{
+       gfn_t end_gfn;
+       pfn_t pfn;
+
+       pfn     = gfn_to_pfn_memslot(slot, gfn);
+       end_gfn = gfn + npages;
+       gfn    += 1;
+
+       if (is_error_noslot_pfn(pfn))
+               return pfn;
+
+       while (gfn < end_gfn)
+               gfn_to_pfn_memslot(slot, gfn++);
+
+       return pfn;
+}
+
+static void kvm_unpin_pages(struct kvm *kvm, pfn_t pfn, unsigned long npages)
+{
+       unsigned long i;
+
+       for (i = 0; i < npages; ++i)
+               kvm_release_pfn_clean(pfn + i);
+}
+
+int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
+{
+       gfn_t gfn, end_gfn;
+       pfn_t pfn;
+       int r = 0;
+       struct iommu_domain *domain = kvm->arch.iommu_domain;
+       int flags;
+
+       /* check if iommu exists and in use */
+       if (!domain)
+               return 0;
+
+       gfn     = slot->base_gfn;
+       end_gfn = gfn + slot->npages;
+
+       flags = IOMMU_READ;
+       if (!(slot->flags & KVM_MEM_READONLY))
+               flags |= IOMMU_WRITE;
+       if (!kvm->arch.iommu_noncoherent)
+               flags |= IOMMU_CACHE;
+
+
+       while (gfn < end_gfn) {
+               unsigned long page_size;
+
+               /* Check if already mapped */
+               if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn))) {
+                       gfn += 1;
+                       continue;
+               }
+
+               /* Get the page size we could use to map */
+               page_size = kvm_host_page_size(kvm, gfn);
+
+               /* Make sure the page_size does not exceed the memslot */
+               while ((gfn + (page_size >> PAGE_SHIFT)) > end_gfn)
+                       page_size >>= 1;
+
+               /* Make sure gfn is aligned to the page size we want to map */
+               while ((gfn << PAGE_SHIFT) & (page_size - 1))
+                       page_size >>= 1;
+
+               /* Make sure hva is aligned to the page size we want to map */
+               while (__gfn_to_hva_memslot(slot, gfn) & (page_size - 1))
+                       page_size >>= 1;
+
+               /*
+                * Pin all pages we are about to map in memory. This is
+                * important because we unmap and unpin in 4kb steps later.
+                */
+               pfn = kvm_pin_pages(slot, gfn, page_size >> PAGE_SHIFT);
+               if (is_error_noslot_pfn(pfn)) {
+                       gfn += 1;
+                       continue;
+               }
+
+               /* Map into IO address space */
+               r = iommu_map(domain, gfn_to_gpa(gfn), pfn_to_hpa(pfn),
+                             page_size, flags);
+               if (r) {
+                       printk(KERN_ERR "kvm_iommu_map_address:"
+                              "iommu failed to map pfn=%llx\n", pfn);
+                       kvm_unpin_pages(kvm, pfn, page_size >> PAGE_SHIFT);
+                       goto unmap_pages;
+               }
+
+               gfn += page_size >> PAGE_SHIFT;
+
+
+       }
+
+       return 0;
+
+unmap_pages:
+       kvm_iommu_put_pages(kvm, slot->base_gfn, gfn - slot->base_gfn);
+       return r;
+}
+
+static int kvm_iommu_map_memslots(struct kvm *kvm)
+{
+       int idx, r = 0;
+       struct kvm_memslots *slots;
+       struct kvm_memory_slot *memslot;
+
+       if (kvm->arch.iommu_noncoherent)
+               kvm_arch_register_noncoherent_dma(kvm);
+
+       idx = srcu_read_lock(&kvm->srcu);
+       slots = kvm_memslots(kvm);
+
+       kvm_for_each_memslot(memslot, slots) {
+               r = kvm_iommu_map_pages(kvm, memslot);
+               if (r)
+                       break;
+       }
+       srcu_read_unlock(&kvm->srcu, idx);
+
+       return r;
+}
+
+int kvm_assign_device(struct kvm *kvm, struct pci_dev *pdev)
+{
+       struct iommu_domain *domain = kvm->arch.iommu_domain;
+       int r;
+       bool noncoherent;
+
+       /* check if iommu exists and in use */
+       if (!domain)
+               return 0;
+
+       if (pdev == NULL)
+               return -ENODEV;
+
+       r = iommu_attach_device(domain, &pdev->dev);
+       if (r) {
+               dev_err(&pdev->dev, "kvm assign device failed ret %d", r);
+               return r;
+       }
+
+       noncoherent = !iommu_capable(&pci_bus_type, IOMMU_CAP_CACHE_COHERENCY);
+
+       /* Check if need to update IOMMU page table for guest memory */
+       if (noncoherent != kvm->arch.iommu_noncoherent) {
+               kvm_iommu_unmap_memslots(kvm);
+               kvm->arch.iommu_noncoherent = noncoherent;
+               r = kvm_iommu_map_memslots(kvm);
+               if (r)
+                       goto out_unmap;
+       }
+
+       pci_set_dev_assigned(pdev);
+
+       dev_info(&pdev->dev, "kvm assign device\n");
+
+       return 0;
+out_unmap:
+       kvm_iommu_unmap_memslots(kvm);
+       return r;
+}
+
+int kvm_deassign_device(struct kvm *kvm, struct pci_dev *pdev)
+{
+       struct iommu_domain *domain = kvm->arch.iommu_domain;
+
+       /* check if iommu exists and in use */
+       if (!domain)
+               return 0;
+
+       if (pdev == NULL)
+               return -ENODEV;
+
+       iommu_detach_device(domain, &pdev->dev);
+
+       pci_clear_dev_assigned(pdev);
+
+       dev_info(&pdev->dev, "kvm deassign device\n");
+
+       return 0;
+}
+
+int kvm_iommu_map_guest(struct kvm *kvm)
+{
+       int r;
+
+       if (!iommu_present(&pci_bus_type)) {
+               printk(KERN_ERR "%s: iommu not found\n", __func__);
+               return -ENODEV;
+       }
+
+       mutex_lock(&kvm->slots_lock);
+
+       kvm->arch.iommu_domain = iommu_domain_alloc(&pci_bus_type);
+       if (!kvm->arch.iommu_domain) {
+               r = -ENOMEM;
+               goto out_unlock;
+       }
+
+       if (!allow_unsafe_assigned_interrupts &&
+           !iommu_capable(&pci_bus_type, IOMMU_CAP_INTR_REMAP)) {
+               printk(KERN_WARNING "%s: No interrupt remapping support,"
+                      " disallowing device assignment."
+                      " Re-enble with \"allow_unsafe_assigned_interrupts=1\""
+                      " module option.\n", __func__);
+               iommu_domain_free(kvm->arch.iommu_domain);
+               kvm->arch.iommu_domain = NULL;
+               r = -EPERM;
+               goto out_unlock;
+       }
+
+       r = kvm_iommu_map_memslots(kvm);
+       if (r)
+               kvm_iommu_unmap_memslots(kvm);
+
+out_unlock:
+       mutex_unlock(&kvm->slots_lock);
+       return r;
+}
+
+static void kvm_iommu_put_pages(struct kvm *kvm,
+                               gfn_t base_gfn, unsigned long npages)
+{
+       struct iommu_domain *domain;
+       gfn_t end_gfn, gfn;
+       pfn_t pfn;
+       u64 phys;
+
+       domain  = kvm->arch.iommu_domain;
+       end_gfn = base_gfn + npages;
+       gfn     = base_gfn;
+
+       /* check if iommu exists and in use */
+       if (!domain)
+               return;
+
+       while (gfn < end_gfn) {
+               unsigned long unmap_pages;
+               size_t size;
+
+               /* Get physical address */
+               phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn));
+
+               if (!phys) {
+                       gfn++;
+                       continue;
+               }
+
+               pfn  = phys >> PAGE_SHIFT;
+
+               /* Unmap address from IO address space */
+               size       = iommu_unmap(domain, gfn_to_gpa(gfn), PAGE_SIZE);
+               unmap_pages = 1ULL << get_order(size);
+
+               /* Unpin all pages we just unmapped to not leak any memory */
+               kvm_unpin_pages(kvm, pfn, unmap_pages);
+
+               gfn += unmap_pages;
+       }
+}
+
+void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
+{
+       kvm_iommu_put_pages(kvm, slot->base_gfn, slot->npages);
+}
+
+static int kvm_iommu_unmap_memslots(struct kvm *kvm)
+{
+       int idx;
+       struct kvm_memslots *slots;
+       struct kvm_memory_slot *memslot;
+
+       idx = srcu_read_lock(&kvm->srcu);
+       slots = kvm_memslots(kvm);
+
+       kvm_for_each_memslot(memslot, slots)
+               kvm_iommu_unmap_pages(kvm, memslot);
+
+       srcu_read_unlock(&kvm->srcu, idx);
+
+       if (kvm->arch.iommu_noncoherent)
+               kvm_arch_unregister_noncoherent_dma(kvm);
+
+       return 0;
+}
+
+int kvm_iommu_unmap_guest(struct kvm *kvm)
+{
+       struct iommu_domain *domain = kvm->arch.iommu_domain;
+
+       /* check if iommu exists and in use */
+       if (!domain)
+               return 0;
+
+       mutex_lock(&kvm->slots_lock);
+       kvm_iommu_unmap_memslots(kvm);
+       kvm->arch.iommu_domain = NULL;
+       kvm->arch.iommu_noncoherent = false;
+       mutex_unlock(&kvm->slots_lock);
+
+       iommu_domain_free(domain);
+       return 0;
+}
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
new file mode 100644 (file)
index 0000000..72298b3
--- /dev/null
@@ -0,0 +1,332 @@
+/*
+ * irq_comm.c: Common API for in kernel interrupt controller
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ * Authors:
+ *   Yaozu (Eddie) Dong <Eddie.dong@intel.com>
+ *
+ * Copyright 2010 Red Hat, Inc. and/or its affiliates.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <trace/events/kvm.h>
+
+#include <asm/msidef.h>
+
+#include "irq.h"
+
+#include "ioapic.h"
+
+static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
+                          struct kvm *kvm, int irq_source_id, int level,
+                          bool line_status)
+{
+       struct kvm_pic *pic = pic_irqchip(kvm);
+       return kvm_pic_set_irq(pic, e->irqchip.pin, irq_source_id, level);
+}
+
+static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
+                             struct kvm *kvm, int irq_source_id, int level,
+                             bool line_status)
+{
+       struct kvm_ioapic *ioapic = kvm->arch.vioapic;
+       return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, irq_source_id, level,
+                               line_status);
+}
+
+inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq)
+{
+       return irq->delivery_mode == APIC_DM_LOWEST;
+}
+
+int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
+               struct kvm_lapic_irq *irq, unsigned long *dest_map)
+{
+       int i, r = -1;
+       struct kvm_vcpu *vcpu, *lowest = NULL;
+
+       if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
+                       kvm_is_dm_lowest_prio(irq)) {
+               printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
+               irq->delivery_mode = APIC_DM_FIXED;
+       }
+
+       if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map))
+               return r;
+
+       kvm_for_each_vcpu(i, vcpu, kvm) {
+               if (!kvm_apic_present(vcpu))
+                       continue;
+
+               if (!kvm_apic_match_dest(vcpu, src, irq->shorthand,
+                                       irq->dest_id, irq->dest_mode))
+                       continue;
+
+               if (!kvm_is_dm_lowest_prio(irq)) {
+                       if (r < 0)
+                               r = 0;
+                       r += kvm_apic_set_irq(vcpu, irq, dest_map);
+               } else if (kvm_lapic_enabled(vcpu)) {
+                       if (!lowest)
+                               lowest = vcpu;
+                       else if (kvm_apic_compare_prio(vcpu, lowest) < 0)
+                               lowest = vcpu;
+               }
+       }
+
+       if (lowest)
+               r = kvm_apic_set_irq(lowest, irq, dest_map);
+
+       return r;
+}
+
+static inline void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
+                                  struct kvm_lapic_irq *irq)
+{
+       trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data);
+
+       irq->dest_id = (e->msi.address_lo &
+                       MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
+       irq->vector = (e->msi.data &
+                       MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT;
+       irq->dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo;
+       irq->trig_mode = (1 << MSI_DATA_TRIGGER_SHIFT) & e->msi.data;
+       irq->delivery_mode = e->msi.data & 0x700;
+       irq->level = 1;
+       irq->shorthand = 0;
+       /* TODO Deal with RH bit of MSI message address */
+}
+
+int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
+               struct kvm *kvm, int irq_source_id, int level, bool line_status)
+{
+       struct kvm_lapic_irq irq;
+
+       if (!level)
+               return -1;
+
+       kvm_set_msi_irq(e, &irq);
+
+       return kvm_irq_delivery_to_apic(kvm, NULL, &irq, NULL);
+}
+
+
+static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e,
+                        struct kvm *kvm)
+{
+       struct kvm_lapic_irq irq;
+       int r;
+
+       kvm_set_msi_irq(e, &irq);
+
+       if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL))
+               return r;
+       else
+               return -EWOULDBLOCK;
+}
+
+/*
+ * Deliver an IRQ in an atomic context if we can, or return a failure,
+ * user can retry in a process context.
+ * Return value:
+ *  -EWOULDBLOCK - Can't deliver in atomic context: retry in a process context.
+ *  Other values - No need to retry.
+ */
+int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
+{
+       struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
+       struct kvm_kernel_irq_routing_entry *e;
+       int ret = -EINVAL;
+       int idx;
+
+       trace_kvm_set_irq(irq, level, irq_source_id);
+
+       /*
+        * Injection into either PIC or IOAPIC might need to scan all CPUs,
+        * which would need to be retried from thread context;  when same GSI
+        * is connected to both PIC and IOAPIC, we'd have to report a
+        * partial failure here.
+        * Since there's no easy way to do this, we only support injecting MSI
+        * which is limited to 1:1 GSI mapping.
+        */
+       idx = srcu_read_lock(&kvm->irq_srcu);
+       if (kvm_irq_map_gsi(kvm, entries, irq) > 0) {
+               e = &entries[0];
+               if (likely(e->type == KVM_IRQ_ROUTING_MSI))
+                       ret = kvm_set_msi_inatomic(e, kvm);
+               else
+                       ret = -EWOULDBLOCK;
+       }
+       srcu_read_unlock(&kvm->irq_srcu, idx);
+       return ret;
+}
+
+int kvm_request_irq_source_id(struct kvm *kvm)
+{
+       unsigned long *bitmap = &kvm->arch.irq_sources_bitmap;
+       int irq_source_id;
+
+       mutex_lock(&kvm->irq_lock);
+       irq_source_id = find_first_zero_bit(bitmap, BITS_PER_LONG);
+
+       if (irq_source_id >= BITS_PER_LONG) {
+               printk(KERN_WARNING "kvm: exhaust allocatable IRQ sources!\n");
+               irq_source_id = -EFAULT;
+               goto unlock;
+       }
+
+       ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
+       ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID);
+       set_bit(irq_source_id, bitmap);
+unlock:
+       mutex_unlock(&kvm->irq_lock);
+
+       return irq_source_id;
+}
+
+void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
+{
+       ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
+       ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID);
+
+       mutex_lock(&kvm->irq_lock);
+       if (irq_source_id < 0 ||
+           irq_source_id >= BITS_PER_LONG) {
+               printk(KERN_ERR "kvm: IRQ source ID out of range!\n");
+               goto unlock;
+       }
+       clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
+       if (!irqchip_in_kernel(kvm))
+               goto unlock;
+
+       kvm_ioapic_clear_all(kvm->arch.vioapic, irq_source_id);
+       kvm_pic_clear_all(pic_irqchip(kvm), irq_source_id);
+unlock:
+       mutex_unlock(&kvm->irq_lock);
+}
+
+void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
+                                   struct kvm_irq_mask_notifier *kimn)
+{
+       mutex_lock(&kvm->irq_lock);
+       kimn->irq = irq;
+       hlist_add_head_rcu(&kimn->link, &kvm->arch.mask_notifier_list);
+       mutex_unlock(&kvm->irq_lock);
+}
+
+void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
+                                     struct kvm_irq_mask_notifier *kimn)
+{
+       mutex_lock(&kvm->irq_lock);
+       hlist_del_rcu(&kimn->link);
+       mutex_unlock(&kvm->irq_lock);
+       synchronize_srcu(&kvm->irq_srcu);
+}
+
+void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
+                            bool mask)
+{
+       struct kvm_irq_mask_notifier *kimn;
+       int idx, gsi;
+
+       idx = srcu_read_lock(&kvm->irq_srcu);
+       gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
+       if (gsi != -1)
+               hlist_for_each_entry_rcu(kimn, &kvm->arch.mask_notifier_list, link)
+                       if (kimn->irq == gsi)
+                               kimn->func(kimn, mask);
+       srcu_read_unlock(&kvm->irq_srcu, idx);
+}
+
+int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
+                         const struct kvm_irq_routing_entry *ue)
+{
+       int r = -EINVAL;
+       int delta;
+       unsigned max_pin;
+
+       switch (ue->type) {
+       case KVM_IRQ_ROUTING_IRQCHIP:
+               delta = 0;
+               switch (ue->u.irqchip.irqchip) {
+               case KVM_IRQCHIP_PIC_MASTER:
+                       e->set = kvm_set_pic_irq;
+                       max_pin = PIC_NUM_PINS;
+                       break;
+               case KVM_IRQCHIP_PIC_SLAVE:
+                       e->set = kvm_set_pic_irq;
+                       max_pin = PIC_NUM_PINS;
+                       delta = 8;
+                       break;
+               case KVM_IRQCHIP_IOAPIC:
+                       max_pin = KVM_IOAPIC_NUM_PINS;
+                       e->set = kvm_set_ioapic_irq;
+                       break;
+               default:
+                       goto out;
+               }
+               e->irqchip.irqchip = ue->u.irqchip.irqchip;
+               e->irqchip.pin = ue->u.irqchip.pin + delta;
+               if (e->irqchip.pin >= max_pin)
+                       goto out;
+               break;
+       case KVM_IRQ_ROUTING_MSI:
+               e->set = kvm_set_msi;
+               e->msi.address_lo = ue->u.msi.address_lo;
+               e->msi.address_hi = ue->u.msi.address_hi;
+               e->msi.data = ue->u.msi.data;
+               break;
+       default:
+               goto out;
+       }
+
+       r = 0;
+out:
+       return r;
+}
+
+#define IOAPIC_ROUTING_ENTRY(irq) \
+       { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP,  \
+         .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } }
+#define ROUTING_ENTRY1(irq) IOAPIC_ROUTING_ENTRY(irq)
+
+#define PIC_ROUTING_ENTRY(irq) \
+       { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP,  \
+         .u.irqchip = { .irqchip = SELECT_PIC(irq), .pin = (irq) % 8 } }
+#define ROUTING_ENTRY2(irq) \
+       IOAPIC_ROUTING_ENTRY(irq), PIC_ROUTING_ENTRY(irq)
+
+static const struct kvm_irq_routing_entry default_routing[] = {
+       ROUTING_ENTRY2(0), ROUTING_ENTRY2(1),
+       ROUTING_ENTRY2(2), ROUTING_ENTRY2(3),
+       ROUTING_ENTRY2(4), ROUTING_ENTRY2(5),
+       ROUTING_ENTRY2(6), ROUTING_ENTRY2(7),
+       ROUTING_ENTRY2(8), ROUTING_ENTRY2(9),
+       ROUTING_ENTRY2(10), ROUTING_ENTRY2(11),
+       ROUTING_ENTRY2(12), ROUTING_ENTRY2(13),
+       ROUTING_ENTRY2(14), ROUTING_ENTRY2(15),
+       ROUTING_ENTRY1(16), ROUTING_ENTRY1(17),
+       ROUTING_ENTRY1(18), ROUTING_ENTRY1(19),
+       ROUTING_ENTRY1(20), ROUTING_ENTRY1(21),
+       ROUTING_ENTRY1(22), ROUTING_ENTRY1(23),
+};
+
+int kvm_setup_default_irq_routing(struct kvm *kvm)
+{
+       return kvm_set_irq_routing(kvm, default_routing,
+                                  ARRAY_SIZE(default_routing), 0);
+}
index b8345dd..4f0c0b9 100644 (file)
@@ -68,6 +68,9 @@
 #define MAX_APIC_VECTOR                        256
 #define APIC_VECTORS_PER_REG           32
 
+#define APIC_BROADCAST                 0xFF
+#define X2APIC_BROADCAST               0xFFFFFFFFul
+
 #define VEC_POS(v) ((v) & (32 - 1))
 #define REG_POS(v) (((v) >> 5) << 4)
 
@@ -129,8 +132,6 @@ static inline int kvm_apic_id(struct kvm_lapic *apic)
        return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
 }
 
-#define KVM_X2APIC_CID_BITS 0
-
 static void recalculate_apic_map(struct kvm *kvm)
 {
        struct kvm_apic_map *new, *old = NULL;
@@ -149,42 +150,56 @@ static void recalculate_apic_map(struct kvm *kvm)
        new->cid_shift = 8;
        new->cid_mask = 0;
        new->lid_mask = 0xff;
+       new->broadcast = APIC_BROADCAST;
 
        kvm_for_each_vcpu(i, vcpu, kvm) {
                struct kvm_lapic *apic = vcpu->arch.apic;
-               u16 cid, lid;
-               u32 ldr;
 
                if (!kvm_apic_present(vcpu))
                        continue;
 
+               if (apic_x2apic_mode(apic)) {
+                       new->ldr_bits = 32;
+                       new->cid_shift = 16;
+                       new->cid_mask = new->lid_mask = 0xffff;
+                       new->broadcast = X2APIC_BROADCAST;
+               } else if (kvm_apic_get_reg(apic, APIC_LDR)) {
+                       if (kvm_apic_get_reg(apic, APIC_DFR) ==
+                                                       APIC_DFR_CLUSTER) {
+                               new->cid_shift = 4;
+                               new->cid_mask = 0xf;
+                               new->lid_mask = 0xf;
+                       } else {
+                               new->cid_shift = 8;
+                               new->cid_mask = 0;
+                               new->lid_mask = 0xff;
+                       }
+               }
+
                /*
                 * All APICs have to be configured in the same mode by an OS.
                 * We take advatage of this while building logical id loockup
-                * table. After reset APICs are in xapic/flat mode, so if we
-                * find apic with different setting we assume this is the mode
+                * table. After reset APICs are in software disabled mode, so if
+                * we find apic with different setting we assume this is the mode
                 * OS wants all apics to be in; build lookup table accordingly.
                 */
-               if (apic_x2apic_mode(apic)) {
-                       new->ldr_bits = 32;
-                       new->cid_shift = 16;
-                       new->cid_mask = (1 << KVM_X2APIC_CID_BITS) - 1;
-                       new->lid_mask = 0xffff;
-               } else if (kvm_apic_sw_enabled(apic) &&
-                               !new->cid_mask /* flat mode */ &&
-                               kvm_apic_get_reg(apic, APIC_DFR) == APIC_DFR_CLUSTER) {
-                       new->cid_shift = 4;
-                       new->cid_mask = 0xf;
-                       new->lid_mask = 0xf;
-               }
+               if (kvm_apic_sw_enabled(apic))
+                       break;
+       }
 
-               new->phys_map[kvm_apic_id(apic)] = apic;
+       kvm_for_each_vcpu(i, vcpu, kvm) {
+               struct kvm_lapic *apic = vcpu->arch.apic;
+               u16 cid, lid;
+               u32 ldr, aid;
 
+               aid = kvm_apic_id(apic);
                ldr = kvm_apic_get_reg(apic, APIC_LDR);
                cid = apic_cluster_id(new, ldr);
                lid = apic_logical_id(new, ldr);
 
-               if (lid)
+               if (aid < ARRAY_SIZE(new->phys_map))
+                       new->phys_map[aid] = apic;
+               if (lid && cid < ARRAY_SIZE(new->logical_map))
                        new->logical_map[cid][ffs(lid) - 1] = apic;
        }
 out:
@@ -201,11 +216,13 @@ out:
 
 static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
 {
-       u32 prev = kvm_apic_get_reg(apic, APIC_SPIV);
+       bool enabled = val & APIC_SPIV_APIC_ENABLED;
 
        apic_set_reg(apic, APIC_SPIV, val);
-       if ((prev ^ val) & APIC_SPIV_APIC_ENABLED) {
-               if (val & APIC_SPIV_APIC_ENABLED) {
+
+       if (enabled != apic->sw_enabled) {
+               apic->sw_enabled = enabled;
+               if (enabled) {
                        static_key_slow_dec_deferred(&apic_sw_disabled);
                        recalculate_apic_map(apic->vcpu->kvm);
                } else
@@ -237,21 +254,17 @@ static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type)
 
 static inline int apic_lvtt_oneshot(struct kvm_lapic *apic)
 {
-       return ((kvm_apic_get_reg(apic, APIC_LVTT) &
-               apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_ONESHOT);
+       return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_ONESHOT;
 }
 
 static inline int apic_lvtt_period(struct kvm_lapic *apic)
 {
-       return ((kvm_apic_get_reg(apic, APIC_LVTT) &
-               apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_PERIODIC);
+       return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_PERIODIC;
 }
 
 static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
 {
-       return ((kvm_apic_get_reg(apic, APIC_LVTT) &
-               apic->lapic_timer.timer_mode_mask) ==
-                       APIC_LVT_TIMER_TSCDEADLINE);
+       return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE;
 }
 
 static inline int apic_lvt_nmi_mode(u32 lvt_val)
@@ -326,8 +339,12 @@ EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
 
 static inline void apic_set_irr(int vec, struct kvm_lapic *apic)
 {
-       apic->irr_pending = true;
        apic_set_vector(vec, apic->regs + APIC_IRR);
+       /*
+        * irr_pending must be true if any interrupt is pending; set it after
+        * APIC_IRR to avoid race with apic_clear_irr
+        */
+       apic->irr_pending = true;
 }
 
 static inline int apic_search_irr(struct kvm_lapic *apic)
@@ -359,13 +376,15 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
 
        vcpu = apic->vcpu;
 
-       apic_clear_vector(vec, apic->regs + APIC_IRR);
-       if (unlikely(kvm_apic_vid_enabled(vcpu->kvm)))
+       if (unlikely(kvm_apic_vid_enabled(vcpu->kvm))) {
                /* try to update RVI */
+               apic_clear_vector(vec, apic->regs + APIC_IRR);
                kvm_make_request(KVM_REQ_EVENT, vcpu);
-       else {
-               vec = apic_search_irr(apic);
-               apic->irr_pending = (vec != -1);
+       } else {
+               apic->irr_pending = false;
+               apic_clear_vector(vec, apic->regs + APIC_IRR);
+               if (apic_search_irr(apic) != -1)
+                       apic->irr_pending = true;
        }
 }
 
@@ -558,16 +577,25 @@ static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
        apic_update_ppr(apic);
 }
 
-int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest)
+static int kvm_apic_broadcast(struct kvm_lapic *apic, u32 dest)
+{
+       return dest == (apic_x2apic_mode(apic) ?
+                       X2APIC_BROADCAST : APIC_BROADCAST);
+}
+
+int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 dest)
 {
-       return dest == 0xff || kvm_apic_id(apic) == dest;
+       return kvm_apic_id(apic) == dest || kvm_apic_broadcast(apic, dest);
 }
 
-int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
+int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
 {
        int result = 0;
        u32 logical_id;
 
+       if (kvm_apic_broadcast(apic, mda))
+               return 1;
+
        if (apic_x2apic_mode(apic)) {
                logical_id = kvm_apic_get_reg(apic, APIC_LDR);
                return logical_id & mda;
@@ -595,7 +623,7 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
 }
 
 int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
-                          int short_hand, int dest, int dest_mode)
+                          int short_hand, unsigned int dest, int dest_mode)
 {
        int result = 0;
        struct kvm_lapic *target = vcpu->arch.apic;
@@ -657,15 +685,24 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
        if (!map)
                goto out;
 
+       if (irq->dest_id == map->broadcast)
+               goto out;
+
+       ret = true;
+
        if (irq->dest_mode == 0) { /* physical mode */
-               if (irq->delivery_mode == APIC_DM_LOWEST ||
-                               irq->dest_id == 0xff)
+               if (irq->dest_id >= ARRAY_SIZE(map->phys_map))
                        goto out;
-               dst = &map->phys_map[irq->dest_id & 0xff];
+
+               dst = &map->phys_map[irq->dest_id];
        } else {
                u32 mda = irq->dest_id << (32 - map->ldr_bits);
+               u16 cid = apic_cluster_id(map, mda);
+
+               if (cid >= ARRAY_SIZE(map->logical_map))
+                       goto out;
 
-               dst = map->logical_map[apic_cluster_id(map, mda)];
+               dst = map->logical_map[cid];
 
                bitmap = apic_logical_id(map, mda);
 
@@ -691,8 +728,6 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
                        *r = 0;
                *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
        }
-
-       ret = true;
 out:
        rcu_read_unlock();
        return ret;
@@ -1034,6 +1069,26 @@ static void update_divide_count(struct kvm_lapic *apic)
                                   apic->divide_count);
 }
 
+static void apic_timer_expired(struct kvm_lapic *apic)
+{
+       struct kvm_vcpu *vcpu = apic->vcpu;
+       wait_queue_head_t *q = &vcpu->wq;
+
+       /*
+        * Note: KVM_REQ_PENDING_TIMER is implicitly checked in
+        * vcpu_enter_guest.
+        */
+       if (atomic_read(&apic->lapic_timer.pending))
+               return;
+
+       atomic_inc(&apic->lapic_timer.pending);
+       /* FIXME: this code should not know anything about vcpus */
+       kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
+
+       if (waitqueue_active(q))
+               wake_up_interruptible(q);
+}
+
 static void start_apic_timer(struct kvm_lapic *apic)
 {
        ktime_t now;
@@ -1096,9 +1151,10 @@ static void start_apic_timer(struct kvm_lapic *apic)
                if (likely(tscdeadline > guest_tsc)) {
                        ns = (tscdeadline - guest_tsc) * 1000000ULL;
                        do_div(ns, this_tsc_khz);
-               }
-               hrtimer_start(&apic->lapic_timer.timer,
-                       ktime_add_ns(now, ns), HRTIMER_MODE_ABS);
+                       hrtimer_start(&apic->lapic_timer.timer,
+                               ktime_add_ns(now, ns), HRTIMER_MODE_ABS);
+               } else
+                       apic_timer_expired(apic);
 
                local_irq_restore(flags);
        }
@@ -1203,17 +1259,20 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
 
                break;
 
-       case APIC_LVTT:
-               if ((kvm_apic_get_reg(apic, APIC_LVTT) &
-                   apic->lapic_timer.timer_mode_mask) !=
-                  (val & apic->lapic_timer.timer_mode_mask))
+       case APIC_LVTT: {
+               u32 timer_mode = val & apic->lapic_timer.timer_mode_mask;
+
+               if (apic->lapic_timer.timer_mode != timer_mode) {
+                       apic->lapic_timer.timer_mode = timer_mode;
                        hrtimer_cancel(&apic->lapic_timer.timer);
+               }
 
                if (!kvm_apic_sw_enabled(apic))
                        val |= APIC_LVT_MASKED;
                val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask);
                apic_set_reg(apic, APIC_LVTT, val);
                break;
+       }
 
        case APIC_TMICT:
                if (apic_lvtt_tscdeadline(apic))
@@ -1320,7 +1379,7 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu)
        if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE))
                static_key_slow_dec_deferred(&apic_hw_disabled);
 
-       if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED))
+       if (!apic->sw_enabled)
                static_key_slow_dec_deferred(&apic_sw_disabled);
 
        if (apic->regs)
@@ -1355,9 +1414,6 @@ void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
                return;
 
        hrtimer_cancel(&apic->lapic_timer.timer);
-       /* Inject here so clearing tscdeadline won't override new value */
-       if (apic_has_pending_timer(vcpu))
-               kvm_inject_apic_timer_irqs(vcpu);
        apic->lapic_timer.tscdeadline = data;
        start_apic_timer(apic);
 }
@@ -1422,6 +1478,10 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
        apic->base_address = apic->vcpu->arch.apic_base &
                             MSR_IA32_APICBASE_BASE;
 
+       if ((value & MSR_IA32_APICBASE_ENABLE) &&
+            apic->base_address != APIC_DEFAULT_PHYS_BASE)
+               pr_warn_once("APIC base relocation is unsupported by KVM");
+
        /* with FSB delivery interrupt, we can restart APIC functionality */
        apic_debug("apic base msr is 0x%016" PRIx64 ", and base address is "
                   "0x%lx.\n", apic->vcpu->arch.apic_base, apic->base_address);
@@ -1447,6 +1507,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
 
        for (i = 0; i < APIC_LVT_NUM; i++)
                apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
+       apic->lapic_timer.timer_mode = 0;
        apic_set_reg(apic, APIC_LVT0,
                     SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
 
@@ -1538,23 +1599,8 @@ static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
 {
        struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
        struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer);
-       struct kvm_vcpu *vcpu = apic->vcpu;
-       wait_queue_head_t *q = &vcpu->wq;
-
-       /*
-        * There is a race window between reading and incrementing, but we do
-        * not care about potentially losing timer events in the !reinject
-        * case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked
-        * in vcpu_enter_guest.
-        */
-       if (!atomic_read(&ktimer->pending)) {
-               atomic_inc(&ktimer->pending);
-               /* FIXME: this code should not know anything about vcpus */
-               kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
-       }
 
-       if (waitqueue_active(q))
-               wake_up_interruptible(q);
+       apic_timer_expired(apic);
 
        if (lapic_is_periodic(apic)) {
                hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
@@ -1693,6 +1739,9 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
        apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm) ?
                                1 : count_vectors(apic->regs + APIC_ISR);
        apic->highest_isr_cache = -1;
+       if (kvm_x86_ops->hwapic_irr_update)
+               kvm_x86_ops->hwapic_irr_update(vcpu,
+                               apic_find_highest_irr(apic));
        kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic));
        kvm_make_request(KVM_REQ_EVENT, vcpu);
        kvm_rtc_eoi_tracking_restore_one(vcpu);
@@ -1837,8 +1886,11 @@ int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
        if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
                return 1;
 
+       if (reg == APIC_ICR2)
+               return 1;
+
        /* if this is ICR write vector before command */
-       if (msr == 0x830)
+       if (reg == APIC_ICR)
                apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
        return apic_reg_write(apic, reg, (u32)data);
 }
@@ -1851,9 +1903,15 @@ int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
        if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
                return 1;
 
+       if (reg == APIC_DFR || reg == APIC_ICR2) {
+               apic_debug("KVM_APIC_READ: read x2apic reserved register %x\n",
+                          reg);
+               return 1;
+       }
+
        if (apic_reg_read(apic, reg, 4, &low))
                return 1;
-       if (msr == 0x830)
+       if (reg == APIC_ICR)
                apic_reg_read(apic, APIC_ICR2, 4, &high);
 
        *data = (((u64)high) << 32) | low;
@@ -1908,7 +1966,7 @@ int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data)
 void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
 {
        struct kvm_lapic *apic = vcpu->arch.apic;
-       unsigned int sipi_vector;
+       u8 sipi_vector;
        unsigned long pe;
 
        if (!kvm_vcpu_has_lapic(vcpu) || !apic->pending_events)
index 6a11845..c674fce 100644 (file)
@@ -11,6 +11,7 @@
 struct kvm_timer {
        struct hrtimer timer;
        s64 period;                             /* unit: ns */
+       u32 timer_mode;
        u32 timer_mode_mask;
        u64 tscdeadline;
        atomic_t pending;                       /* accumulated triggered timers */
@@ -22,6 +23,7 @@ struct kvm_lapic {
        struct kvm_timer lapic_timer;
        u32 divide_count;
        struct kvm_vcpu *vcpu;
+       bool sw_enabled;
        bool irr_pending;
        /* Number of bits set in ISR. */
        s16 isr_count;
@@ -55,8 +57,8 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu);
 
 void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr);
 void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir);
-int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
-int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
+int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 dest);
+int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda);
 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
                unsigned long *dest_map);
 int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
@@ -119,11 +121,11 @@ static inline int kvm_apic_hw_enabled(struct kvm_lapic *apic)
 
 extern struct static_key_deferred apic_sw_disabled;
 
-static inline int kvm_apic_sw_enabled(struct kvm_lapic *apic)
+static inline bool kvm_apic_sw_enabled(struct kvm_lapic *apic)
 {
        if (static_key_false(&apic_sw_disabled.key))
-               return kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED;
-       return APIC_SPIV_APIC_ENABLED;
+               return apic->sw_enabled;
+       return true;
 }
 
 static inline bool kvm_apic_present(struct kvm_vcpu *vcpu)
@@ -152,8 +154,6 @@ static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr)
        ldr >>= 32 - map->ldr_bits;
        cid = (ldr >> map->cid_shift) & map->cid_mask;
 
-       BUG_ON(cid >= ARRAY_SIZE(map->logical_map));
-
        return cid;
 }
 
index 978f402..10fbed1 100644 (file)
@@ -214,13 +214,12 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
 #define MMIO_GEN_LOW_SHIFT             10
 #define MMIO_GEN_LOW_MASK              ((1 << MMIO_GEN_LOW_SHIFT) - 2)
 #define MMIO_GEN_MASK                  ((1 << MMIO_GEN_SHIFT) - 1)
-#define MMIO_MAX_GEN                   ((1 << MMIO_GEN_SHIFT) - 1)
 
 static u64 generation_mmio_spte_mask(unsigned int gen)
 {
        u64 mask;
 
-       WARN_ON(gen > MMIO_MAX_GEN);
+       WARN_ON(gen & ~MMIO_GEN_MASK);
 
        mask = (gen & MMIO_GEN_LOW_MASK) << MMIO_SPTE_GEN_LOW_SHIFT;
        mask |= ((u64)gen >> MMIO_GEN_LOW_SHIFT) << MMIO_SPTE_GEN_HIGH_SHIFT;
@@ -263,13 +262,13 @@ static bool is_mmio_spte(u64 spte)
 
 static gfn_t get_mmio_spte_gfn(u64 spte)
 {
-       u64 mask = generation_mmio_spte_mask(MMIO_MAX_GEN) | shadow_mmio_mask;
+       u64 mask = generation_mmio_spte_mask(MMIO_GEN_MASK) | shadow_mmio_mask;
        return (spte & ~mask) >> PAGE_SHIFT;
 }
 
 static unsigned get_mmio_spte_access(u64 spte)
 {
-       u64 mask = generation_mmio_spte_mask(MMIO_MAX_GEN) | shadow_mmio_mask;
+       u64 mask = generation_mmio_spte_mask(MMIO_GEN_MASK) | shadow_mmio_mask;
        return (spte & ~mask) & ~PAGE_MASK;
 }
 
index 7527cef..41dd038 100644 (file)
@@ -1056,9 +1056,11 @@ static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool ho
 {
        struct vcpu_svm *svm = to_svm(vcpu);
 
-       WARN_ON(adjustment < 0);
-       if (host)
-               adjustment = svm_scale_tsc(vcpu, adjustment);
+       if (host) {
+               if (svm->tsc_ratio != TSC_RATIO_DEFAULT)
+                       WARN_ON(adjustment < 0);
+               adjustment = svm_scale_tsc(vcpu, (u64)adjustment);
+       }
 
        svm->vmcb->control.tsc_offset += adjustment;
        if (is_guest_mode(vcpu))
@@ -2999,7 +3001,6 @@ static int dr_interception(struct vcpu_svm *svm)
 {
        int reg, dr;
        unsigned long val;
-       int err;
 
        if (svm->vcpu.guest_debug == 0) {
                /*
@@ -3019,12 +3020,15 @@ static int dr_interception(struct vcpu_svm *svm)
        dr = svm->vmcb->control.exit_code - SVM_EXIT_READ_DR0;
 
        if (dr >= 16) { /* mov to DRn */
+               if (!kvm_require_dr(&svm->vcpu, dr - 16))
+                       return 1;
                val = kvm_register_read(&svm->vcpu, reg);
                kvm_set_dr(&svm->vcpu, dr - 16, val);
        } else {
-               err = kvm_get_dr(&svm->vcpu, dr, &val);
-               if (!err)
-                       kvm_register_write(&svm->vcpu, reg, val);
+               if (!kvm_require_dr(&svm->vcpu, dr))
+                       return 1;
+               kvm_get_dr(&svm->vcpu, dr, &val);
+               kvm_register_write(&svm->vcpu, reg, val);
        }
 
        skip_emulated_instruction(&svm->vcpu);
@@ -4123,6 +4127,11 @@ static bool svm_mpx_supported(void)
        return false;
 }
 
+static bool svm_xsaves_supported(void)
+{
+       return false;
+}
+
 static bool svm_has_wbinvd_exit(void)
 {
        return true;
@@ -4410,6 +4419,7 @@ static struct kvm_x86_ops svm_x86_ops = {
        .rdtscp_supported = svm_rdtscp_supported,
        .invpcid_supported = svm_invpcid_supported,
        .mpx_supported = svm_mpx_supported,
+       .xsaves_supported = svm_xsaves_supported,
 
        .set_supported_cpuid = svm_set_supported_cpuid,
 
index 6b06ab8..c2a34bb 100644 (file)
@@ -5,6 +5,7 @@
 #include <asm/vmx.h>
 #include <asm/svm.h>
 #include <asm/clocksource.h>
+#include <asm/pvclock-abi.h>
 
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM kvm
@@ -877,6 +878,42 @@ TRACE_EVENT(kvm_ple_window,
 #define trace_kvm_ple_window_shrink(vcpu_id, new, old) \
        trace_kvm_ple_window(false, vcpu_id, new, old)
 
+TRACE_EVENT(kvm_pvclock_update,
+       TP_PROTO(unsigned int vcpu_id, struct pvclock_vcpu_time_info *pvclock),
+       TP_ARGS(vcpu_id, pvclock),
+
+       TP_STRUCT__entry(
+               __field(        unsigned int,   vcpu_id                 )
+               __field(        __u32,          version                 )
+               __field(        __u64,          tsc_timestamp           )
+               __field(        __u64,          system_time             )
+               __field(        __u32,          tsc_to_system_mul       )
+               __field(        __s8,           tsc_shift               )
+               __field(        __u8,           flags                   )
+       ),
+
+       TP_fast_assign(
+               __entry->vcpu_id           = vcpu_id;
+               __entry->version           = pvclock->version;
+               __entry->tsc_timestamp     = pvclock->tsc_timestamp;
+               __entry->system_time       = pvclock->system_time;
+               __entry->tsc_to_system_mul = pvclock->tsc_to_system_mul;
+               __entry->tsc_shift         = pvclock->tsc_shift;
+               __entry->flags             = pvclock->flags;
+       ),
+
+       TP_printk("vcpu_id %u, pvclock { version %u, tsc_timestamp 0x%llx, "
+                 "system_time 0x%llx, tsc_to_system_mul 0x%x, tsc_shift %d, "
+                 "flags 0x%x }",
+                 __entry->vcpu_id,
+                 __entry->version,
+                 __entry->tsc_timestamp,
+                 __entry->system_time,
+                 __entry->tsc_to_system_mul,
+                 __entry->tsc_shift,
+                 __entry->flags)
+);
+
 #endif /* _TRACE_KVM_H */
 
 #undef TRACE_INCLUDE_PATH
index 3e556c6..feb852b 100644 (file)
@@ -99,13 +99,15 @@ module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
 static bool __read_mostly nested = 0;
 module_param(nested, bool, S_IRUGO);
 
+static u64 __read_mostly host_xss;
+
 #define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
 #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE)
 #define KVM_VM_CR0_ALWAYS_ON                                           \
        (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
 #define KVM_CR4_GUEST_OWNED_BITS                                     \
        (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR      \
-        | X86_CR4_OSXMMEXCPT)
+        | X86_CR4_OSXMMEXCPT | X86_CR4_TSD)
 
 #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
 #define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
@@ -214,6 +216,7 @@ struct __packed vmcs12 {
        u64 virtual_apic_page_addr;
        u64 apic_access_addr;
        u64 ept_pointer;
+       u64 xss_exit_bitmap;
        u64 guest_physical_address;
        u64 vmcs_link_pointer;
        u64 guest_ia32_debugctl;
@@ -616,6 +619,7 @@ static const unsigned short vmcs_field_to_offset_table[] = {
        FIELD64(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr),
        FIELD64(APIC_ACCESS_ADDR, apic_access_addr),
        FIELD64(EPT_POINTER, ept_pointer),
+       FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap),
        FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address),
        FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer),
        FIELD64(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl),
@@ -720,12 +724,15 @@ static const unsigned short vmcs_field_to_offset_table[] = {
        FIELD(HOST_RSP, host_rsp),
        FIELD(HOST_RIP, host_rip),
 };
-static const int max_vmcs_field = ARRAY_SIZE(vmcs_field_to_offset_table);
 
 static inline short vmcs_field_to_offset(unsigned long field)
 {
-       if (field >= max_vmcs_field || vmcs_field_to_offset_table[field] == 0)
-               return -1;
+       BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX);
+
+       if (field >= ARRAY_SIZE(vmcs_field_to_offset_table) ||
+           vmcs_field_to_offset_table[field] == 0)
+               return -ENOENT;
+
        return vmcs_field_to_offset_table[field];
 }
 
@@ -758,6 +765,7 @@ static u64 construct_eptp(unsigned long root_hpa);
 static void kvm_cpu_vmxon(u64 addr);
 static void kvm_cpu_vmxoff(void);
 static bool vmx_mpx_supported(void);
+static bool vmx_xsaves_supported(void);
 static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
 static void vmx_set_segment(struct kvm_vcpu *vcpu,
                            struct kvm_segment *var, int seg);
@@ -1098,6 +1106,12 @@ static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12)
        return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT);
 }
 
+static inline bool nested_cpu_has_xsaves(struct vmcs12 *vmcs12)
+{
+       return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES) &&
+               vmx_xsaves_supported();
+}
+
 static inline bool is_exception(u32 intr_info)
 {
        return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
@@ -1659,12 +1673,20 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
        vmx->guest_msrs[efer_offset].mask = ~ignore_bits;
 
        clear_atomic_switch_msr(vmx, MSR_EFER);
-       /* On ept, can't emulate nx, and must switch nx atomically */
-       if (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX)) {
+
+       /*
+        * On EPT, we can't emulate NX, so we must switch EFER atomically.
+        * On CPUs that support "load IA32_EFER", always switch EFER
+        * atomically, since it's faster than switching it manually.
+        */
+       if (cpu_has_load_ia32_efer ||
+           (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX))) {
                guest_efer = vmx->vcpu.arch.efer;
                if (!(guest_efer & EFER_LMA))
                        guest_efer &= ~EFER_LME;
-               add_atomic_switch_msr(vmx, MSR_EFER, guest_efer, host_efer);
+               if (guest_efer != host_efer)
+                       add_atomic_switch_msr(vmx, MSR_EFER,
+                                             guest_efer, host_efer);
                return false;
        }
 
@@ -2377,12 +2399,13 @@ static __init void nested_vmx_setup_ctls_msrs(void)
        nested_vmx_secondary_ctls_low = 0;
        nested_vmx_secondary_ctls_high &=
                SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
-               SECONDARY_EXEC_UNRESTRICTED_GUEST |
-               SECONDARY_EXEC_WBINVD_EXITING;
+               SECONDARY_EXEC_WBINVD_EXITING |
+               SECONDARY_EXEC_XSAVES;
 
        if (enable_ept) {
                /* nested EPT: emulate EPT also to L1 */
-               nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT;
+               nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT |
+                       SECONDARY_EXEC_UNRESTRICTED_GUEST;
                nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
                         VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT |
                         VMX_EPT_INVEPT_BIT;
@@ -2558,6 +2581,11 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
                if (!nested_vmx_allowed(vcpu))
                        return 1;
                return vmx_get_vmx_msr(vcpu, msr_index, pdata);
+       case MSR_IA32_XSS:
+               if (!vmx_xsaves_supported())
+                       return 1;
+               data = vcpu->arch.ia32_xss;
+               break;
        case MSR_TSC_AUX:
                if (!to_vmx(vcpu)->rdtscp_enabled)
                        return 1;
@@ -2649,6 +2677,22 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                break;
        case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
                return 1; /* they are read-only */
+       case MSR_IA32_XSS:
+               if (!vmx_xsaves_supported())
+                       return 1;
+               /*
+                * The only supported bit as of Skylake is bit 8, but
+                * it is not supported on KVM.
+                */
+               if (data != 0)
+                       return 1;
+               vcpu->arch.ia32_xss = data;
+               if (vcpu->arch.ia32_xss != host_xss)
+                       add_atomic_switch_msr(vmx, MSR_IA32_XSS,
+                               vcpu->arch.ia32_xss, host_xss);
+               else
+                       clear_atomic_switch_msr(vmx, MSR_IA32_XSS);
+               break;
        case MSR_TSC_AUX:
                if (!vmx->rdtscp_enabled)
                        return 1;
@@ -2884,7 +2928,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
                        SECONDARY_EXEC_ENABLE_INVPCID |
                        SECONDARY_EXEC_APIC_REGISTER_VIRT |
                        SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
-                       SECONDARY_EXEC_SHADOW_VMCS;
+                       SECONDARY_EXEC_SHADOW_VMCS |
+                       SECONDARY_EXEC_XSAVES;
                if (adjust_vmx_controls(min2, opt2,
                                        MSR_IA32_VMX_PROCBASED_CTLS2,
                                        &_cpu_based_2nd_exec_control) < 0)
@@ -3007,6 +3052,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
                }
        }
 
+       if (cpu_has_xsaves)
+               rdmsrl(MSR_IA32_XSS, host_xss);
+
        return 0;
 }
 
@@ -3110,76 +3158,6 @@ static __init int alloc_kvm_area(void)
        return 0;
 }
 
-static __init int hardware_setup(void)
-{
-       if (setup_vmcs_config(&vmcs_config) < 0)
-               return -EIO;
-
-       if (boot_cpu_has(X86_FEATURE_NX))
-               kvm_enable_efer_bits(EFER_NX);
-
-       if (!cpu_has_vmx_vpid())
-               enable_vpid = 0;
-       if (!cpu_has_vmx_shadow_vmcs())
-               enable_shadow_vmcs = 0;
-       if (enable_shadow_vmcs)
-               init_vmcs_shadow_fields();
-
-       if (!cpu_has_vmx_ept() ||
-           !cpu_has_vmx_ept_4levels()) {
-               enable_ept = 0;
-               enable_unrestricted_guest = 0;
-               enable_ept_ad_bits = 0;
-       }
-
-       if (!cpu_has_vmx_ept_ad_bits())
-               enable_ept_ad_bits = 0;
-
-       if (!cpu_has_vmx_unrestricted_guest())
-               enable_unrestricted_guest = 0;
-
-       if (!cpu_has_vmx_flexpriority()) {
-               flexpriority_enabled = 0;
-
-               /*
-                * set_apic_access_page_addr() is used to reload apic access
-                * page upon invalidation.  No need to do anything if the
-                * processor does not have the APIC_ACCESS_ADDR VMCS field.
-                */
-               kvm_x86_ops->set_apic_access_page_addr = NULL;
-       }
-
-       if (!cpu_has_vmx_tpr_shadow())
-               kvm_x86_ops->update_cr8_intercept = NULL;
-
-       if (enable_ept && !cpu_has_vmx_ept_2m_page())
-               kvm_disable_largepages();
-
-       if (!cpu_has_vmx_ple())
-               ple_gap = 0;
-
-       if (!cpu_has_vmx_apicv())
-               enable_apicv = 0;
-
-       if (enable_apicv)
-               kvm_x86_ops->update_cr8_intercept = NULL;
-       else {
-               kvm_x86_ops->hwapic_irr_update = NULL;
-               kvm_x86_ops->deliver_posted_interrupt = NULL;
-               kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy;
-       }
-
-       if (nested)
-               nested_vmx_setup_ctls_msrs();
-
-       return alloc_kvm_area();
-}
-
-static __exit void hardware_unsetup(void)
-{
-       free_kvm_area();
-}
-
 static bool emulation_required(struct kvm_vcpu *vcpu)
 {
        return emulate_invalid_guest_state && !guest_state_valid(vcpu);
@@ -4396,6 +4374,7 @@ static void ept_set_mmio_spte_mask(void)
        kvm_mmu_set_mmio_spte_mask((0x3ull << 62) | 0x6ull);
 }
 
+#define VMX_XSS_EXIT_BITMAP 0
 /*
  * Sets up the vmcs for emulated real mode.
  */
@@ -4505,6 +4484,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
        vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
        set_cr4_guest_host_mask(vmx);
 
+       if (vmx_xsaves_supported())
+               vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP);
+
        return 0;
 }
 
@@ -5163,13 +5145,20 @@ static int handle_cr(struct kvm_vcpu *vcpu)
 static int handle_dr(struct kvm_vcpu *vcpu)
 {
        unsigned long exit_qualification;
-       int dr, reg;
+       int dr, dr7, reg;
+
+       exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+       dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
+
+       /* First, if DR does not exist, trigger UD */
+       if (!kvm_require_dr(vcpu, dr))
+               return 1;
 
        /* Do not handle if the CPL > 0, will trigger GP on re-entry */
        if (!kvm_require_cpl(vcpu, 0))
                return 1;
-       dr = vmcs_readl(GUEST_DR7);
-       if (dr & DR7_GD) {
+       dr7 = vmcs_readl(GUEST_DR7);
+       if (dr7 & DR7_GD) {
                /*
                 * As the vm-exit takes precedence over the debug trap, we
                 * need to emulate the latter, either for the host or the
@@ -5177,17 +5166,14 @@ static int handle_dr(struct kvm_vcpu *vcpu)
                 */
                if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
                        vcpu->run->debug.arch.dr6 = vcpu->arch.dr6;
-                       vcpu->run->debug.arch.dr7 = dr;
-                       vcpu->run->debug.arch.pc =
-                               vmcs_readl(GUEST_CS_BASE) +
-                               vmcs_readl(GUEST_RIP);
+                       vcpu->run->debug.arch.dr7 = dr7;
+                       vcpu->run->debug.arch.pc = kvm_get_linear_rip(vcpu);
                        vcpu->run->debug.arch.exception = DB_VECTOR;
                        vcpu->run->exit_reason = KVM_EXIT_DEBUG;
                        return 0;
                } else {
-                       vcpu->arch.dr7 &= ~DR7_GD;
+                       vcpu->arch.dr6 &= ~15;
                        vcpu->arch.dr6 |= DR6_BD | DR6_RTM;
-                       vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
                        kvm_queue_exception(vcpu, DB_VECTOR);
                        return 1;
                }
@@ -5209,8 +5195,6 @@ static int handle_dr(struct kvm_vcpu *vcpu)
                return 1;
        }
 
-       exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
-       dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
        reg = DEBUG_REG_ACCESS_REG(exit_qualification);
        if (exit_qualification & TYPE_MOV_FROM_DR) {
                unsigned long val;
@@ -5391,6 +5375,20 @@ static int handle_xsetbv(struct kvm_vcpu *vcpu)
        return 1;
 }
 
+static int handle_xsaves(struct kvm_vcpu *vcpu)
+{
+       skip_emulated_instruction(vcpu);
+       WARN(1, "this should never happen\n");
+       return 1;
+}
+
+static int handle_xrstors(struct kvm_vcpu *vcpu)
+{
+       skip_emulated_instruction(vcpu);
+       WARN(1, "this should never happen\n");
+       return 1;
+}
+
 static int handle_apic_access(struct kvm_vcpu *vcpu)
 {
        if (likely(fasteoi)) {
@@ -5492,7 +5490,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu)
        }
 
        /* clear all local breakpoint enable flags */
-       vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~0x55);
+       vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~0x155);
 
        /*
         * TODO: What about debug traps on tss switch?
@@ -5539,11 +5537,11 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
        trace_kvm_page_fault(gpa, exit_qualification);
 
        /* It is a write fault? */
-       error_code = exit_qualification & (1U << 1);
+       error_code = exit_qualification & PFERR_WRITE_MASK;
        /* It is a fetch fault? */
-       error_code |= (exit_qualification & (1U << 2)) << 2;
+       error_code |= (exit_qualification << 2) & PFERR_FETCH_MASK;
        /* ept page table is present? */
-       error_code |= (exit_qualification >> 3) & 0x1;
+       error_code |= (exit_qualification >> 3) & PFERR_PRESENT_MASK;
 
        vcpu->arch.exit_qualification = exit_qualification;
 
@@ -5785,6 +5783,204 @@ static void update_ple_window_actual_max(void)
                                            ple_window_grow, INT_MIN);
 }
 
+static __init int hardware_setup(void)
+{
+       int r = -ENOMEM, i, msr;
+
+       rdmsrl_safe(MSR_EFER, &host_efer);
+
+       for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
+               kvm_define_shared_msr(i, vmx_msr_index[i]);
+
+       vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL);
+       if (!vmx_io_bitmap_a)
+               return r;
+
+       vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL);
+       if (!vmx_io_bitmap_b)
+               goto out;
+
+       vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL);
+       if (!vmx_msr_bitmap_legacy)
+               goto out1;
+
+       vmx_msr_bitmap_legacy_x2apic =
+                               (unsigned long *)__get_free_page(GFP_KERNEL);
+       if (!vmx_msr_bitmap_legacy_x2apic)
+               goto out2;
+
+       vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
+       if (!vmx_msr_bitmap_longmode)
+               goto out3;
+
+       vmx_msr_bitmap_longmode_x2apic =
+                               (unsigned long *)__get_free_page(GFP_KERNEL);
+       if (!vmx_msr_bitmap_longmode_x2apic)
+               goto out4;
+       vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
+       if (!vmx_vmread_bitmap)
+               goto out5;
+
+       vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
+       if (!vmx_vmwrite_bitmap)
+               goto out6;
+
+       memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
+       memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
+
+       /*
+        * Allow direct access to the PC debug port (it is often used for I/O
+        * delays, but the vmexits simply slow things down).
+        */
+       memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE);
+       clear_bit(0x80, vmx_io_bitmap_a);
+
+       memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
+
+       memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
+       memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
+
+       vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
+       vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
+       vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
+       vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
+       vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
+       vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
+       vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true);
+
+       memcpy(vmx_msr_bitmap_legacy_x2apic,
+                       vmx_msr_bitmap_legacy, PAGE_SIZE);
+       memcpy(vmx_msr_bitmap_longmode_x2apic,
+                       vmx_msr_bitmap_longmode, PAGE_SIZE);
+
+       if (enable_apicv) {
+               for (msr = 0x800; msr <= 0x8ff; msr++)
+                       vmx_disable_intercept_msr_read_x2apic(msr);
+
+               /* According SDM, in x2apic mode, the whole id reg is used.
+                * But in KVM, it only use the highest eight bits. Need to
+                * intercept it */
+               vmx_enable_intercept_msr_read_x2apic(0x802);
+               /* TMCCT */
+               vmx_enable_intercept_msr_read_x2apic(0x839);
+               /* TPR */
+               vmx_disable_intercept_msr_write_x2apic(0x808);
+               /* EOI */
+               vmx_disable_intercept_msr_write_x2apic(0x80b);
+               /* SELF-IPI */
+               vmx_disable_intercept_msr_write_x2apic(0x83f);
+       }
+
+       if (enable_ept) {
+               kvm_mmu_set_mask_ptes(0ull,
+                       (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
+                       (enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull,
+                       0ull, VMX_EPT_EXECUTABLE_MASK);
+               ept_set_mmio_spte_mask();
+               kvm_enable_tdp();
+       } else
+               kvm_disable_tdp();
+
+       update_ple_window_actual_max();
+
+       if (setup_vmcs_config(&vmcs_config) < 0) {
+               r = -EIO;
+               goto out7;
+    }
+
+       if (boot_cpu_has(X86_FEATURE_NX))
+               kvm_enable_efer_bits(EFER_NX);
+
+       if (!cpu_has_vmx_vpid())
+               enable_vpid = 0;
+       if (!cpu_has_vmx_shadow_vmcs())
+               enable_shadow_vmcs = 0;
+       if (enable_shadow_vmcs)
+               init_vmcs_shadow_fields();
+
+       if (!cpu_has_vmx_ept() ||
+           !cpu_has_vmx_ept_4levels()) {
+               enable_ept = 0;
+               enable_unrestricted_guest = 0;
+               enable_ept_ad_bits = 0;
+       }
+
+       if (!cpu_has_vmx_ept_ad_bits())
+               enable_ept_ad_bits = 0;
+
+       if (!cpu_has_vmx_unrestricted_guest())
+               enable_unrestricted_guest = 0;
+
+       if (!cpu_has_vmx_flexpriority()) {
+               flexpriority_enabled = 0;
+
+               /*
+                * set_apic_access_page_addr() is used to reload apic access
+                * page upon invalidation.  No need to do anything if the
+                * processor does not have the APIC_ACCESS_ADDR VMCS field.
+                */
+               kvm_x86_ops->set_apic_access_page_addr = NULL;
+       }
+
+       if (!cpu_has_vmx_tpr_shadow())
+               kvm_x86_ops->update_cr8_intercept = NULL;
+
+       if (enable_ept && !cpu_has_vmx_ept_2m_page())
+               kvm_disable_largepages();
+
+       if (!cpu_has_vmx_ple())
+               ple_gap = 0;
+
+       if (!cpu_has_vmx_apicv())
+               enable_apicv = 0;
+
+       if (enable_apicv)
+               kvm_x86_ops->update_cr8_intercept = NULL;
+       else {
+               kvm_x86_ops->hwapic_irr_update = NULL;
+               kvm_x86_ops->deliver_posted_interrupt = NULL;
+               kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy;
+       }
+
+       if (nested)
+               nested_vmx_setup_ctls_msrs();
+
+       return alloc_kvm_area();
+
+out7:
+       free_page((unsigned long)vmx_vmwrite_bitmap);
+out6:
+       free_page((unsigned long)vmx_vmread_bitmap);
+out5:
+       free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
+out4:
+       free_page((unsigned long)vmx_msr_bitmap_longmode);
+out3:
+       free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
+out2:
+       free_page((unsigned long)vmx_msr_bitmap_legacy);
+out1:
+       free_page((unsigned long)vmx_io_bitmap_b);
+out:
+       free_page((unsigned long)vmx_io_bitmap_a);
+
+    return r;
+}
+
+static __exit void hardware_unsetup(void)
+{
+       free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
+       free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
+       free_page((unsigned long)vmx_msr_bitmap_legacy);
+       free_page((unsigned long)vmx_msr_bitmap_longmode);
+       free_page((unsigned long)vmx_io_bitmap_b);
+       free_page((unsigned long)vmx_io_bitmap_a);
+       free_page((unsigned long)vmx_vmwrite_bitmap);
+       free_page((unsigned long)vmx_vmread_bitmap);
+
+       free_kvm_area();
+}
+
 /*
  * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE
  * exiting, so only get here on cpu with PAUSE-Loop-Exiting.
@@ -6361,58 +6557,60 @@ static inline int vmcs_field_readonly(unsigned long field)
  * some of the bits we return here (e.g., on 32-bit guests, only 32 bits of
  * 64-bit fields are to be returned).
  */
-static inline bool vmcs12_read_any(struct kvm_vcpu *vcpu,
-                                       unsigned long field, u64 *ret)
+static inline int vmcs12_read_any(struct kvm_vcpu *vcpu,
+                                 unsigned long field, u64 *ret)
 {
        short offset = vmcs_field_to_offset(field);
        char *p;
 
        if (offset < 0)
-               return 0;
+               return offset;
 
        p = ((char *)(get_vmcs12(vcpu))) + offset;
 
        switch (vmcs_field_type(field)) {
        case VMCS_FIELD_TYPE_NATURAL_WIDTH:
                *ret = *((natural_width *)p);
-               return 1;
+               return 0;
        case VMCS_FIELD_TYPE_U16:
                *ret = *((u16 *)p);
-               return 1;
+               return 0;
        case VMCS_FIELD_TYPE_U32:
                *ret = *((u32 *)p);
-               return 1;
+               return 0;
        case VMCS_FIELD_TYPE_U64:
                *ret = *((u64 *)p);
-               return 1;
+               return 0;
        default:
-               return 0; /* can never happen. */
+               WARN_ON(1);
+               return -ENOENT;
        }
 }
 
 
-static inline bool vmcs12_write_any(struct kvm_vcpu *vcpu,
-                                   unsigned long field, u64 field_value){
+static inline int vmcs12_write_any(struct kvm_vcpu *vcpu,
+                                  unsigned long field, u64 field_value){
        short offset = vmcs_field_to_offset(field);
        char *p = ((char *) get_vmcs12(vcpu)) + offset;
        if (offset < 0)
-               return false;
+               return offset;
 
        switch (vmcs_field_type(field)) {
        case VMCS_FIELD_TYPE_U16:
                *(u16 *)p = field_value;
-               return true;
+               return 0;
        case VMCS_FIELD_TYPE_U32:
                *(u32 *)p = field_value;
-               return true;
+               return 0;
        case VMCS_FIELD_TYPE_U64:
                *(u64 *)p = field_value;
-               return true;
+               return 0;
        case VMCS_FIELD_TYPE_NATURAL_WIDTH:
                *(natural_width *)p = field_value;
-               return true;
+               return 0;
        default:
-               return false; /* can never happen. */
+               WARN_ON(1);
+               return -ENOENT;
        }
 
 }
@@ -6445,6 +6643,9 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
                case VMCS_FIELD_TYPE_NATURAL_WIDTH:
                        field_value = vmcs_readl(field);
                        break;
+               default:
+                       WARN_ON(1);
+                       continue;
                }
                vmcs12_write_any(&vmx->vcpu, field, field_value);
        }
@@ -6490,6 +6691,9 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
                        case VMCS_FIELD_TYPE_NATURAL_WIDTH:
                                vmcs_writel(field, (long)field_value);
                                break;
+                       default:
+                               WARN_ON(1);
+                               break;
                        }
                }
        }
@@ -6528,7 +6732,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
        /* Decode instruction info and find the field to read */
        field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
        /* Read the field, zero-extended to a u64 field_value */
-       if (!vmcs12_read_any(vcpu, field, &field_value)) {
+       if (vmcs12_read_any(vcpu, field, &field_value) < 0) {
                nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
                skip_emulated_instruction(vcpu);
                return 1;
@@ -6598,7 +6802,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
                return 1;
        }
 
-       if (!vmcs12_write_any(vcpu, field, field_value)) {
+       if (vmcs12_write_any(vcpu, field, field_value) < 0) {
                nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
                skip_emulated_instruction(vcpu);
                return 1;
@@ -6802,6 +7006,8 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
        [EXIT_REASON_MONITOR_INSTRUCTION]     = handle_monitor,
        [EXIT_REASON_INVEPT]                  = handle_invept,
        [EXIT_REASON_INVVPID]                 = handle_invvpid,
+       [EXIT_REASON_XSAVES]                  = handle_xsaves,
+       [EXIT_REASON_XRSTORS]                 = handle_xrstors,
 };
 
 static const int kvm_vmx_max_exit_handlers =
@@ -7089,6 +7295,14 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
                return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING);
        case EXIT_REASON_XSETBV:
                return 1;
+       case EXIT_REASON_XSAVES: case EXIT_REASON_XRSTORS:
+               /*
+                * This should never happen, since it is not possible to
+                * set XSS to a non-zero value---neither in L1 nor in L2.
+                * If if it were, XSS would have to be checked against
+                * the XSS exit bitmap in vmcs12.
+                */
+               return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
        default:
                return 1;
        }
@@ -7277,6 +7491,9 @@ static void vmx_set_rvi(int vector)
        u16 status;
        u8 old;
 
+       if (vector == -1)
+               vector = 0;
+
        status = vmcs_read16(GUEST_INTR_STATUS);
        old = (u8)status & 0xff;
        if ((u8)vector != old) {
@@ -7288,22 +7505,23 @@ static void vmx_set_rvi(int vector)
 
 static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
 {
+       if (!is_guest_mode(vcpu)) {
+               vmx_set_rvi(max_irr);
+               return;
+       }
+
        if (max_irr == -1)
                return;
 
        /*
-        * If a vmexit is needed, vmx_check_nested_events handles it.
+        * In guest mode.  If a vmexit is needed, vmx_check_nested_events
+        * handles it.
         */
-       if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
+       if (nested_exit_on_intr(vcpu))
                return;
 
-       if (!is_guest_mode(vcpu)) {
-               vmx_set_rvi(max_irr);
-               return;
-       }
-
        /*
-        * Fall back to pre-APICv interrupt injection since L2
+        * Else, fall back to pre-APICv interrupt injection since L2
         * is run without virtual interrupt delivery.
         */
        if (!kvm_event_needs_reinjection(vcpu) &&
@@ -7400,6 +7618,12 @@ static bool vmx_mpx_supported(void)
                (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_BNDCFGS);
 }
 
+static bool vmx_xsaves_supported(void)
+{
+       return vmcs_config.cpu_based_2nd_exec_ctrl &
+               SECONDARY_EXEC_XSAVES;
+}
+
 static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
 {
        u32 exit_intr_info;
@@ -8135,6 +8359,8 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
        vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
        vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
 
+       if (nested_cpu_has_xsaves(vmcs12))
+               vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap);
        vmcs_write64(VMCS_LINK_POINTER, -1ull);
 
        exec_control = vmcs12->pin_based_vm_exec_control;
@@ -8775,6 +9001,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
        vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
        if (vmx_mpx_supported())
                vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
+       if (nested_cpu_has_xsaves(vmcs12))
+               vmcs12->xss_exit_bitmap = vmcs_read64(XSS_EXIT_BITMAP);
 
        /* update exit information fields: */
 
@@ -9176,6 +9404,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
        .check_intercept = vmx_check_intercept,
        .handle_external_intr = vmx_handle_external_intr,
        .mpx_supported = vmx_mpx_supported,
+       .xsaves_supported = vmx_xsaves_supported,
 
        .check_nested_events = vmx_check_nested_events,
 
@@ -9184,150 +9413,21 @@ static struct kvm_x86_ops vmx_x86_ops = {
 
 static int __init vmx_init(void)
 {
-       int r, i, msr;
-
-       rdmsrl_safe(MSR_EFER, &host_efer);
-
-       for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
-               kvm_define_shared_msr(i, vmx_msr_index[i]);
-
-       vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL);
-       if (!vmx_io_bitmap_a)
-               return -ENOMEM;
-
-       r = -ENOMEM;
-
-       vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL);
-       if (!vmx_io_bitmap_b)
-               goto out;
-
-       vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL);
-       if (!vmx_msr_bitmap_legacy)
-               goto out1;
-
-       vmx_msr_bitmap_legacy_x2apic =
-                               (unsigned long *)__get_free_page(GFP_KERNEL);
-       if (!vmx_msr_bitmap_legacy_x2apic)
-               goto out2;
-
-       vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
-       if (!vmx_msr_bitmap_longmode)
-               goto out3;
-
-       vmx_msr_bitmap_longmode_x2apic =
-                               (unsigned long *)__get_free_page(GFP_KERNEL);
-       if (!vmx_msr_bitmap_longmode_x2apic)
-               goto out4;
-       vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
-       if (!vmx_vmread_bitmap)
-               goto out5;
-
-       vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
-       if (!vmx_vmwrite_bitmap)
-               goto out6;
-
-       memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
-       memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
-
-       /*
-        * Allow direct access to the PC debug port (it is often used for I/O
-        * delays, but the vmexits simply slow things down).
-        */
-       memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE);
-       clear_bit(0x80, vmx_io_bitmap_a);
-
-       memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
-
-       memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
-       memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
-
-       set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
-
-       r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
-                    __alignof__(struct vcpu_vmx), THIS_MODULE);
+       int r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
+                     __alignof__(struct vcpu_vmx), THIS_MODULE);
        if (r)
-               goto out7;
+               return r;
 
 #ifdef CONFIG_KEXEC
        rcu_assign_pointer(crash_vmclear_loaded_vmcss,
                           crash_vmclear_local_loaded_vmcss);
 #endif
 
-       vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
-       vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
-       vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
-       vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
-       vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
-       vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
-       vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true);
-
-       memcpy(vmx_msr_bitmap_legacy_x2apic,
-                       vmx_msr_bitmap_legacy, PAGE_SIZE);
-       memcpy(vmx_msr_bitmap_longmode_x2apic,
-                       vmx_msr_bitmap_longmode, PAGE_SIZE);
-
-       if (enable_apicv) {
-               for (msr = 0x800; msr <= 0x8ff; msr++)
-                       vmx_disable_intercept_msr_read_x2apic(msr);
-
-               /* According SDM, in x2apic mode, the whole id reg is used.
-                * But in KVM, it only use the highest eight bits. Need to
-                * intercept it */
-               vmx_enable_intercept_msr_read_x2apic(0x802);
-               /* TMCCT */
-               vmx_enable_intercept_msr_read_x2apic(0x839);
-               /* TPR */
-               vmx_disable_intercept_msr_write_x2apic(0x808);
-               /* EOI */
-               vmx_disable_intercept_msr_write_x2apic(0x80b);
-               /* SELF-IPI */
-               vmx_disable_intercept_msr_write_x2apic(0x83f);
-       }
-
-       if (enable_ept) {
-               kvm_mmu_set_mask_ptes(0ull,
-                       (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
-                       (enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull,
-                       0ull, VMX_EPT_EXECUTABLE_MASK);
-               ept_set_mmio_spte_mask();
-               kvm_enable_tdp();
-       } else
-               kvm_disable_tdp();
-
-       update_ple_window_actual_max();
-
        return 0;
-
-out7:
-       free_page((unsigned long)vmx_vmwrite_bitmap);
-out6:
-       free_page((unsigned long)vmx_vmread_bitmap);
-out5:
-       free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
-out4:
-       free_page((unsigned long)vmx_msr_bitmap_longmode);
-out3:
-       free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
-out2:
-       free_page((unsigned long)vmx_msr_bitmap_legacy);
-out1:
-       free_page((unsigned long)vmx_io_bitmap_b);
-out:
-       free_page((unsigned long)vmx_io_bitmap_a);
-       return r;
 }
 
 static void __exit vmx_exit(void)
 {
-       free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
-       free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
-       free_page((unsigned long)vmx_msr_bitmap_legacy);
-       free_page((unsigned long)vmx_msr_bitmap_longmode);
-       free_page((unsigned long)vmx_io_bitmap_b);
-       free_page((unsigned long)vmx_io_bitmap_a);
-       free_page((unsigned long)vmx_vmwrite_bitmap);
-       free_page((unsigned long)vmx_vmread_bitmap);
-
 #ifdef CONFIG_KEXEC
        RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
        synchronize_rcu();
index 0033df3..c259814 100644 (file)
@@ -27,6 +27,7 @@
 #include "kvm_cache_regs.h"
 #include "x86.h"
 #include "cpuid.h"
+#include "assigned-dev.h"
 
 #include <linux/clocksource.h>
 #include <linux/interrupt.h>
@@ -353,6 +354,8 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
 
        if (!vcpu->arch.exception.pending) {
        queue:
+               if (has_error && !is_protmode(vcpu))
+                       has_error = false;
                vcpu->arch.exception.pending = true;
                vcpu->arch.exception.has_error_code = has_error;
                vcpu->arch.exception.nr = nr;
@@ -455,6 +458,16 @@ bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl)
 }
 EXPORT_SYMBOL_GPL(kvm_require_cpl);
 
+bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr)
+{
+       if ((dr != 4 && dr != 5) || !kvm_read_cr4_bits(vcpu, X86_CR4_DE))
+               return true;
+
+       kvm_queue_exception(vcpu, UD_VECTOR);
+       return false;
+}
+EXPORT_SYMBOL_GPL(kvm_require_dr);
+
 /*
  * This function will be used to read from the physical memory of the currently
  * running guest. The difference to kvm_read_guest_page is that this function
@@ -656,6 +669,12 @@ int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
        if ((!(xcr0 & XSTATE_BNDREGS)) != (!(xcr0 & XSTATE_BNDCSR)))
                return 1;
 
+       if (xcr0 & XSTATE_AVX512) {
+               if (!(xcr0 & XSTATE_YMM))
+                       return 1;
+               if ((xcr0 & XSTATE_AVX512) != XSTATE_AVX512)
+                       return 1;
+       }
        kvm_put_guest_xcr0(vcpu);
        vcpu->arch.xcr0 = xcr0;
 
@@ -732,6 +751,10 @@ EXPORT_SYMBOL_GPL(kvm_set_cr4);
 
 int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 {
+#ifdef CONFIG_X86_64
+       cr3 &= ~CR3_PCID_INVD;
+#endif
+
        if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
                kvm_mmu_sync_roots(vcpu);
                kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
@@ -811,8 +834,6 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
                        vcpu->arch.eff_db[dr] = val;
                break;
        case 4:
-               if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
-                       return 1; /* #UD */
                /* fall through */
        case 6:
                if (val & 0xffffffff00000000ULL)
@@ -821,8 +842,6 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
                kvm_update_dr6(vcpu);
                break;
        case 5:
-               if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
-                       return 1; /* #UD */
                /* fall through */
        default: /* 7 */
                if (val & 0xffffffff00000000ULL)
@@ -837,27 +856,21 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
 
 int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
 {
-       int res;
-
-       res = __kvm_set_dr(vcpu, dr, val);
-       if (res > 0)
-               kvm_queue_exception(vcpu, UD_VECTOR);
-       else if (res < 0)
+       if (__kvm_set_dr(vcpu, dr, val)) {
                kvm_inject_gp(vcpu, 0);
-
-       return res;
+               return 1;
+       }
+       return 0;
 }
 EXPORT_SYMBOL_GPL(kvm_set_dr);
 
-static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
+int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
 {
        switch (dr) {
        case 0 ... 3:
                *val = vcpu->arch.db[dr];
                break;
        case 4:
-               if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
-                       return 1;
                /* fall through */
        case 6:
                if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
@@ -866,23 +879,11 @@ static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
                        *val = kvm_x86_ops->get_dr6(vcpu);
                break;
        case 5:
-               if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
-                       return 1;
                /* fall through */
        default: /* 7 */
                *val = vcpu->arch.dr7;
                break;
        }
-
-       return 0;
-}
-
-int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
-{
-       if (_kvm_get_dr(vcpu, dr, val)) {
-               kvm_queue_exception(vcpu, UD_VECTOR);
-               return 1;
-       }
        return 0;
 }
 EXPORT_SYMBOL_GPL(kvm_get_dr);
@@ -1237,21 +1238,22 @@ void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
 {
 #ifdef CONFIG_X86_64
        bool vcpus_matched;
-       bool do_request = false;
        struct kvm_arch *ka = &vcpu->kvm->arch;
        struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
 
        vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
                         atomic_read(&vcpu->kvm->online_vcpus));
 
-       if (vcpus_matched && gtod->clock.vclock_mode == VCLOCK_TSC)
-               if (!ka->use_master_clock)
-                       do_request = 1;
-
-       if (!vcpus_matched && ka->use_master_clock)
-                       do_request = 1;
-
-       if (do_request)
+       /*
+        * Once the masterclock is enabled, always perform request in
+        * order to update it.
+        *
+        * In order to enable masterclock, the host clocksource must be TSC
+        * and the vcpus need to have matched TSCs.  When that happens,
+        * perform request to enable masterclock.
+        */
+       if (ka->use_master_clock ||
+           (gtod->clock.vclock_mode == VCLOCK_TSC && vcpus_matched))
                kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
 
        trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc,
@@ -1637,16 +1639,16 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
        vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
        vcpu->last_guest_tsc = tsc_timestamp;
 
+       if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time,
+               &guest_hv_clock, sizeof(guest_hv_clock))))
+               return 0;
+
        /*
         * The interface expects us to write an even number signaling that the
         * update is finished. Since the guest won't see the intermediate
         * state, we just increase by 2 at the end.
         */
-       vcpu->hv_clock.version += 2;
-
-       if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time,
-               &guest_hv_clock, sizeof(guest_hv_clock))))
-               return 0;
+       vcpu->hv_clock.version = guest_hv_clock.version + 2;
 
        /* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
        pvclock_flags = (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED);
@@ -1662,6 +1664,8 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
 
        vcpu->hv_clock.flags = pvclock_flags;
 
+       trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock);
+
        kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
                                &vcpu->hv_clock,
                                sizeof(vcpu->hv_clock));
@@ -2140,7 +2144,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
        case MSR_IA32_TSC_ADJUST:
                if (guest_cpuid_has_tsc_adjust(vcpu)) {
                        if (!msr_info->host_initiated) {
-                               u64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
+                               s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
                                kvm_x86_ops->adjust_tsc_offset(vcpu, adj, true);
                        }
                        vcpu->arch.ia32_tsc_adjust_msr = data;
@@ -3106,7 +3110,7 @@ static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
        unsigned long val;
 
        memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
-       _kvm_get_dr(vcpu, 6, &val);
+       kvm_get_dr(vcpu, 6, &val);
        dbgregs->dr6 = val;
        dbgregs->dr7 = vcpu->arch.dr7;
        dbgregs->flags = 0;
@@ -3128,15 +3132,89 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
        return 0;
 }
 
+#define XSTATE_COMPACTION_ENABLED (1ULL << 63)
+
+static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
+{
+       struct xsave_struct *xsave = &vcpu->arch.guest_fpu.state->xsave;
+       u64 xstate_bv = xsave->xsave_hdr.xstate_bv;
+       u64 valid;
+
+       /*
+        * Copy legacy XSAVE area, to avoid complications with CPUID
+        * leaves 0 and 1 in the loop below.
+        */
+       memcpy(dest, xsave, XSAVE_HDR_OFFSET);
+
+       /* Set XSTATE_BV */
+       *(u64 *)(dest + XSAVE_HDR_OFFSET) = xstate_bv;
+
+       /*
+        * Copy each region from the possibly compacted offset to the
+        * non-compacted offset.
+        */
+       valid = xstate_bv & ~XSTATE_FPSSE;
+       while (valid) {
+               u64 feature = valid & -valid;
+               int index = fls64(feature) - 1;
+               void *src = get_xsave_addr(xsave, feature);
+
+               if (src) {
+                       u32 size, offset, ecx, edx;
+                       cpuid_count(XSTATE_CPUID, index,
+                                   &size, &offset, &ecx, &edx);
+                       memcpy(dest + offset, src, size);
+               }
+
+               valid -= feature;
+       }
+}
+
+static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
+{
+       struct xsave_struct *xsave = &vcpu->arch.guest_fpu.state->xsave;
+       u64 xstate_bv = *(u64 *)(src + XSAVE_HDR_OFFSET);
+       u64 valid;
+
+       /*
+        * Copy legacy XSAVE area, to avoid complications with CPUID
+        * leaves 0 and 1 in the loop below.
+        */
+       memcpy(xsave, src, XSAVE_HDR_OFFSET);
+
+       /* Set XSTATE_BV and possibly XCOMP_BV.  */
+       xsave->xsave_hdr.xstate_bv = xstate_bv;
+       if (cpu_has_xsaves)
+               xsave->xsave_hdr.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED;
+
+       /*
+        * Copy each region from the non-compacted offset to the
+        * possibly compacted offset.
+        */
+       valid = xstate_bv & ~XSTATE_FPSSE;
+       while (valid) {
+               u64 feature = valid & -valid;
+               int index = fls64(feature) - 1;
+               void *dest = get_xsave_addr(xsave, feature);
+
+               if (dest) {
+                       u32 size, offset, ecx, edx;
+                       cpuid_count(XSTATE_CPUID, index,
+                                   &size, &offset, &ecx, &edx);
+                       memcpy(dest, src + offset, size);
+               } else
+                       WARN_ON_ONCE(1);
+
+               valid -= feature;
+       }
+}
+
 static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
                                         struct kvm_xsave *guest_xsave)
 {
        if (cpu_has_xsave) {
-               memcpy(guest_xsave->region,
-                       &vcpu->arch.guest_fpu.state->xsave,
-                       vcpu->arch.guest_xstate_size);
-               *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] &=
-                       vcpu->arch.guest_supported_xcr0 | XSTATE_FPSSE;
+               memset(guest_xsave, 0, sizeof(struct kvm_xsave));
+               fill_xsave((u8 *) guest_xsave->region, vcpu);
        } else {
                memcpy(guest_xsave->region,
                        &vcpu->arch.guest_fpu.state->fxsave,
@@ -3160,8 +3238,7 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
                 */
                if (xstate_bv & ~kvm_supported_xcr0())
                        return -EINVAL;
-               memcpy(&vcpu->arch.guest_fpu.state->xsave,
-                       guest_xsave->region, vcpu->arch.guest_xstate_size);
+               load_xsave(vcpu, (u8 *)guest_xsave->region);
        } else {
                if (xstate_bv & ~XSTATE_FPSSE)
                        return -EINVAL;
@@ -4004,7 +4081,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
        }
 
        default:
-               ;
+               r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg);
        }
 out:
        return r;
@@ -4667,7 +4744,7 @@ static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt)
 
 int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
 {
-       return _kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
+       return kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
 }
 
 int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
@@ -5211,21 +5288,17 @@ static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflag
 
 static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
 {
-       struct kvm_run *kvm_run = vcpu->run;
-       unsigned long eip = vcpu->arch.emulate_ctxt.eip;
-       u32 dr6 = 0;
-
        if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) &&
            (vcpu->arch.guest_debug_dr7 & DR7_BP_EN_MASK)) {
-               dr6 = kvm_vcpu_check_hw_bp(eip, 0,
+               struct kvm_run *kvm_run = vcpu->run;
+               unsigned long eip = kvm_get_linear_rip(vcpu);
+               u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
                                           vcpu->arch.guest_debug_dr7,
                                           vcpu->arch.eff_db);
 
                if (dr6 != 0) {
                        kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1 | DR6_RTM;
-                       kvm_run->debug.arch.pc = kvm_rip_read(vcpu) +
-                               get_segment_base(vcpu, VCPU_SREG_CS);
-
+                       kvm_run->debug.arch.pc = eip;
                        kvm_run->debug.arch.exception = DB_VECTOR;
                        kvm_run->exit_reason = KVM_EXIT_DEBUG;
                        *r = EMULATE_USER_EXIT;
@@ -5235,7 +5308,8 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
 
        if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK) &&
            !(kvm_get_rflags(vcpu) & X86_EFLAGS_RF)) {
-               dr6 = kvm_vcpu_check_hw_bp(eip, 0,
+               unsigned long eip = kvm_get_linear_rip(vcpu);
+               u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
                                           vcpu->arch.dr7,
                                           vcpu->arch.db);
 
@@ -5365,7 +5439,9 @@ restart:
                kvm_rip_write(vcpu, ctxt->eip);
                if (r == EMULATE_DONE)
                        kvm_vcpu_check_singlestep(vcpu, rflags, &r);
-               __kvm_set_rflags(vcpu, ctxt->eflags);
+               if (!ctxt->have_exception ||
+                   exception_type(ctxt->exception.vector) == EXCPT_TRAP)
+                       __kvm_set_rflags(vcpu, ctxt->eflags);
 
                /*
                 * For STI, interrupts are shadowed; so KVM_REQ_EVENT will
@@ -5965,6 +6041,12 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
                        __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
                                             X86_EFLAGS_RF);
 
+               if (vcpu->arch.exception.nr == DB_VECTOR &&
+                   (vcpu->arch.dr7 & DR7_GD)) {
+                       vcpu->arch.dr7 &= ~DR7_GD;
+                       kvm_update_dr7(vcpu);
+               }
+
                kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr,
                                          vcpu->arch.exception.has_error_code,
                                          vcpu->arch.exception.error_code,
@@ -6873,6 +6955,9 @@ int fx_init(struct kvm_vcpu *vcpu)
                return err;
 
        fpu_finit(&vcpu->arch.guest_fpu);
+       if (cpu_has_xsaves)
+               vcpu->arch.guest_fpu.state->xsave.xsave_hdr.xcomp_bv =
+                       host_xcr0 | XSTATE_COMPACTION_ENABLED;
 
        /*
         * Ensure guest xcr0 is valid for loading
@@ -7024,7 +7109,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu)
        kvm_x86_ops->vcpu_reset(vcpu);
 }
 
-void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector)
+void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
 {
        struct kvm_segment cs;
 
@@ -7256,6 +7341,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
        if (type)
                return -EINVAL;
 
+       INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
        INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
        INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
        INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
@@ -7536,12 +7622,18 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
        return kvm_x86_ops->interrupt_allowed(vcpu);
 }
 
-bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip)
+unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu)
 {
-       unsigned long current_rip = kvm_rip_read(vcpu) +
-               get_segment_base(vcpu, VCPU_SREG_CS);
+       if (is_64_bit_mode(vcpu))
+               return kvm_rip_read(vcpu);
+       return (u32)(get_segment_base(vcpu, VCPU_SREG_CS) +
+                    kvm_rip_read(vcpu));
+}
+EXPORT_SYMBOL_GPL(kvm_get_linear_rip);
 
-       return current_rip == linear_rip;
+bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip)
+{
+       return kvm_get_linear_rip(vcpu) == linear_rip;
 }
 EXPORT_SYMBOL_GPL(kvm_is_linear_rip);
 
index 7cb9c45..cc1d61a 100644 (file)
@@ -162,7 +162,8 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
 bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data);
 
 #define KVM_SUPPORTED_XCR0     (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \
-                               | XSTATE_BNDREGS | XSTATE_BNDCSR)
+                               | XSTATE_BNDREGS | XSTATE_BNDCSR \
+                               | XSTATE_AVX512)
 extern u64 host_xcr0;
 
 extern u64 kvm_supported_xcr0(void);
index ad9db60..b3f45a5 100644 (file)
@@ -60,7 +60,8 @@ struct arch_timer_cpu {
 
 #ifdef CONFIG_KVM_ARM_TIMER
 int kvm_timer_hyp_init(void);
-int kvm_timer_init(struct kvm *kvm);
+void kvm_timer_enable(struct kvm *kvm);
+void kvm_timer_init(struct kvm *kvm);
 void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
                          const struct kvm_irq_level *irq);
 void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu);
@@ -77,11 +78,8 @@ static inline int kvm_timer_hyp_init(void)
        return 0;
 };
 
-static inline int kvm_timer_init(struct kvm *kvm)
-{
-       return 0;
-}
-
+static inline void kvm_timer_enable(struct kvm *kvm) {}
+static inline void kvm_timer_init(struct kvm *kvm) {}
 static inline void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
                                        const struct kvm_irq_level *irq) {}
 static inline void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) {}
index 206dcc3..ac4888d 100644 (file)
@@ -274,7 +274,7 @@ struct kvm_exit_mmio;
 #ifdef CONFIG_KVM_ARM_VGIC
 int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write);
 int kvm_vgic_hyp_init(void);
-int kvm_vgic_init(struct kvm *kvm);
+int kvm_vgic_map_resources(struct kvm *kvm);
 int kvm_vgic_create(struct kvm *kvm);
 void kvm_vgic_destroy(struct kvm *kvm);
 void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu);
@@ -287,7 +287,8 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
                      struct kvm_exit_mmio *mmio);
 
 #define irqchip_in_kernel(k)   (!!((k)->arch.vgic.in_kernel))
-#define vgic_initialized(k)    ((k)->arch.vgic.ready)
+#define vgic_initialized(k)    (!!((k)->arch.vgic.nr_cpus))
+#define vgic_ready(k)          ((k)->arch.vgic.ready)
 
 int vgic_v2_probe(struct device_node *vgic_node,
                  const struct vgic_ops **ops,
@@ -321,7 +322,7 @@ static inline int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr,
        return -ENXIO;
 }
 
-static inline int kvm_vgic_init(struct kvm *kvm)
+static inline int kvm_vgic_map_resources(struct kvm *kvm)
 {
        return 0;
 }
@@ -373,6 +374,11 @@ static inline bool vgic_initialized(struct kvm *kvm)
 {
        return true;
 }
+
+static inline bool vgic_ready(struct kvm *kvm)
+{
+       return true;
+}
 #endif
 
 #endif
index a6059bd..26f1060 100644 (file)
@@ -43,6 +43,7 @@
  * include/linux/kvm_h.
  */
 #define KVM_MEMSLOT_INVALID    (1UL << 16)
+#define KVM_MEMSLOT_INCOHERENT (1UL << 17)
 
 /* Two fragments for cross MMIO pages. */
 #define KVM_MAX_MMIO_FRAGMENTS 2
@@ -353,6 +354,8 @@ struct kvm_memslots {
        struct kvm_memory_slot memslots[KVM_MEM_SLOTS_NUM];
        /* The mapping table from slot id to the index in memslots[]. */
        short id_to_index[KVM_MEM_SLOTS_NUM];
+       atomic_t lru_slot;
+       int used_slots;
 };
 
 struct kvm {
@@ -395,7 +398,6 @@ struct kvm {
         * Update side is protected by irq_lock.
         */
        struct kvm_irq_routing_table __rcu *irq_routing;
-       struct hlist_head mask_notifier_list;
 #endif
 #ifdef CONFIG_HAVE_KVM_IRQFD
        struct hlist_head irq_ack_notifier_list;
@@ -447,6 +449,14 @@ void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
 int __must_check vcpu_load(struct kvm_vcpu *vcpu);
 void vcpu_put(struct kvm_vcpu *vcpu);
 
+#ifdef __KVM_HAVE_IOAPIC
+void kvm_vcpu_request_scan_ioapic(struct kvm *kvm);
+#else
+static inline void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
+{
+}
+#endif
+
 #ifdef CONFIG_HAVE_KVM_IRQFD
 int kvm_irqfd_init(void);
 void kvm_irqfd_exit(void);
@@ -711,44 +721,6 @@ struct kvm_irq_ack_notifier {
        void (*irq_acked)(struct kvm_irq_ack_notifier *kian);
 };
 
-struct kvm_assigned_dev_kernel {
-       struct kvm_irq_ack_notifier ack_notifier;
-       struct list_head list;
-       int assigned_dev_id;
-       int host_segnr;
-       int host_busnr;
-       int host_devfn;
-       unsigned int entries_nr;
-       int host_irq;
-       bool host_irq_disabled;
-       bool pci_2_3;
-       struct msix_entry *host_msix_entries;
-       int guest_irq;
-       struct msix_entry *guest_msix_entries;
-       unsigned long irq_requested_type;
-       int irq_source_id;
-       int flags;
-       struct pci_dev *dev;
-       struct kvm *kvm;
-       spinlock_t intx_lock;
-       spinlock_t intx_mask_lock;
-       char irq_name[32];
-       struct pci_saved_state *pci_saved_state;
-};
-
-struct kvm_irq_mask_notifier {
-       void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked);
-       int irq;
-       struct hlist_node link;
-};
-
-void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
-                                   struct kvm_irq_mask_notifier *kimn);
-void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
-                                     struct kvm_irq_mask_notifier *kimn);
-void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
-                            bool mask);
-
 int kvm_irq_map_gsi(struct kvm *kvm,
                    struct kvm_kernel_irq_routing_entry *entries, int gsi);
 int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin);
@@ -770,12 +742,6 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
 int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
 void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
-int kvm_iommu_map_guest(struct kvm *kvm);
-int kvm_iommu_unmap_guest(struct kvm *kvm);
-int kvm_assign_device(struct kvm *kvm,
-                     struct kvm_assigned_dev_kernel *assigned_dev);
-int kvm_deassign_device(struct kvm *kvm,
-                       struct kvm_assigned_dev_kernel *assigned_dev);
 #else
 static inline int kvm_iommu_map_pages(struct kvm *kvm,
                                      struct kvm_memory_slot *slot)
@@ -787,11 +753,6 @@ static inline void kvm_iommu_unmap_pages(struct kvm *kvm,
                                         struct kvm_memory_slot *slot)
 {
 }
-
-static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
-{
-       return 0;
-}
 #endif
 
 static inline void kvm_guest_enter(void)
@@ -832,12 +793,28 @@ static inline void kvm_guest_exit(void)
 static inline struct kvm_memory_slot *
 search_memslots(struct kvm_memslots *slots, gfn_t gfn)
 {
-       struct kvm_memory_slot *memslot;
+       int start = 0, end = slots->used_slots;
+       int slot = atomic_read(&slots->lru_slot);
+       struct kvm_memory_slot *memslots = slots->memslots;
+
+       if (gfn >= memslots[slot].base_gfn &&
+           gfn < memslots[slot].base_gfn + memslots[slot].npages)
+               return &memslots[slot];
 
-       kvm_for_each_memslot(memslot, slots)
-               if (gfn >= memslot->base_gfn &&
-                     gfn < memslot->base_gfn + memslot->npages)
-                       return memslot;
+       while (start < end) {
+               slot = start + (end - start) / 2;
+
+               if (gfn >= memslots[slot].base_gfn)
+                       end = slot;
+               else
+                       start = slot + 1;
+       }
+
+       if (gfn >= memslots[start].base_gfn &&
+           gfn < memslots[start].base_gfn + memslots[start].npages) {
+               atomic_set(&slots->lru_slot, start);
+               return &memslots[start];
+       }
 
        return NULL;
 }
@@ -1011,25 +988,6 @@ static inline bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) { return true; }
 
 #endif
 
-#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
-
-long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
-                                 unsigned long arg);
-
-void kvm_free_all_assigned_devices(struct kvm *kvm);
-
-#else
-
-static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
-                                               unsigned long arg)
-{
-       return -ENOTTY;
-}
-
-static inline void kvm_free_all_assigned_devices(struct kvm *kvm) {}
-
-#endif
-
 static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu)
 {
        set_bit(req, &vcpu->requests);
index b606bb6..931da7e 100644 (file)
@@ -54,33 +54,6 @@ typedef u64            hfn_t;
 
 typedef hfn_t pfn_t;
 
-union kvm_ioapic_redirect_entry {
-       u64 bits;
-       struct {
-               u8 vector;
-               u8 delivery_mode:3;
-               u8 dest_mode:1;
-               u8 delivery_status:1;
-               u8 polarity:1;
-               u8 remote_irr:1;
-               u8 trig_mode:1;
-               u8 mask:1;
-               u8 reserve:7;
-               u8 reserved[4];
-               u8 dest_id;
-       } fields;
-};
-
-struct kvm_lapic_irq {
-       u32 vector;
-       u32 delivery_mode;
-       u32 dest_mode;
-       u32 level;
-       u32 trig_mode;
-       u32 shorthand;
-       u32 dest_id;
-};
-
 struct gfn_to_hva_cache {
        u64 generation;
        gpa_t gpa;
index 6076882..a37fd12 100644 (file)
@@ -647,11 +647,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_MP_STATE 14
 #define KVM_CAP_COALESCED_MMIO 15
 #define KVM_CAP_SYNC_MMU 16  /* Changes to host mmap are reflected in guest */
-#define KVM_CAP_DEVICE_ASSIGNMENT 17
 #define KVM_CAP_IOMMU 18
-#ifdef __KVM_HAVE_MSI
-#define KVM_CAP_DEVICE_MSI 20
-#endif
 /* Bug in KVM_SET_USER_MEMORY_REGION fixed: */
 #define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21
 #define KVM_CAP_USER_NMI 22
@@ -663,10 +659,6 @@ struct kvm_ppc_smmu_info {
 #endif
 #define KVM_CAP_IRQ_ROUTING 25
 #define KVM_CAP_IRQ_INJECT_STATUS 26
-#define KVM_CAP_DEVICE_DEASSIGNMENT 27
-#ifdef __KVM_HAVE_MSIX
-#define KVM_CAP_DEVICE_MSIX 28
-#endif
 #define KVM_CAP_ASSIGN_DEV_IRQ 29
 /* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */
 #define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30
@@ -1107,9 +1099,6 @@ struct kvm_s390_ucas_mapping {
 #define KVM_X86_SETUP_MCE         _IOW(KVMIO,  0x9c, __u64)
 #define KVM_X86_GET_MCE_CAP_SUPPORTED _IOR(KVMIO,  0x9d, __u64)
 #define KVM_X86_SET_MCE           _IOW(KVMIO,  0x9e, struct kvm_x86_mce)
-/* IA64 stack access */
-#define KVM_IA64_VCPU_GET_STACK   _IOR(KVMIO,  0x9a, void *)
-#define KVM_IA64_VCPU_SET_STACK   _IOW(KVMIO,  0x9b, void *)
 /* Available with KVM_CAP_VCPU_EVENTS */
 #define KVM_GET_VCPU_EVENTS       _IOR(KVMIO,  0x9f, struct kvm_vcpu_events)
 #define KVM_SET_VCPU_EVENTS       _IOW(KVMIO,  0xa0, struct kvm_vcpu_events)
index 22fa819..1c0772b 100644 (file)
@@ -61,12 +61,14 @@ static void timer_disarm(struct arch_timer_cpu *timer)
 
 static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu)
 {
+       int ret;
        struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 
        timer->cntv_ctl |= ARCH_TIMER_CTRL_IT_MASK;
-       kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
-                           timer->irq->irq,
-                           timer->irq->level);
+       ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
+                                 timer->irq->irq,
+                                 timer->irq->level);
+       WARN_ON(ret);
 }
 
 static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
@@ -307,12 +309,24 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
        timer_disarm(timer);
 }
 
-int kvm_timer_init(struct kvm *kvm)
+void kvm_timer_enable(struct kvm *kvm)
 {
-       if (timecounter && wqueue) {
-               kvm->arch.timer.cntvoff = kvm_phys_timer_read();
+       if (kvm->arch.timer.enabled)
+               return;
+
+       /*
+        * There is a potential race here between VCPUs starting for the first
+        * time, which may be enabling the timer multiple times.  That doesn't
+        * hurt though, because we're just setting a variable to the same
+        * variable that it already was.  The important thing is that all
+        * VCPUs have the enabled variable set, before entering the guest, if
+        * the arch timers are enabled.
+        */
+       if (timecounter && wqueue)
                kvm->arch.timer.enabled = 1;
-       }
+}
 
-       return 0;
+void kvm_timer_init(struct kvm *kvm)
+{
+       kvm->arch.timer.cntvoff = kvm_phys_timer_read();
 }
index aacdb59..03affc7 100644 (file)
@@ -91,6 +91,7 @@
 #define ACCESS_WRITE_VALUE     (3 << 1)
 #define ACCESS_WRITE_MASK(x)   ((x) & (3 << 1))
 
+static int vgic_init(struct kvm *kvm);
 static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
 static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu);
 static void vgic_update_state(struct kvm *kvm);
@@ -1607,7 +1608,7 @@ static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
        }
 }
 
-static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid,
+static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
                                  unsigned int irq_num, bool level)
 {
        struct vgic_dist *dist = &kvm->arch.vgic;
@@ -1643,9 +1644,10 @@ static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid,
                        vgic_dist_irq_clear_level(vcpu, irq_num);
                        if (!vgic_dist_irq_soft_pend(vcpu, irq_num))
                                vgic_dist_irq_clear_pending(vcpu, irq_num);
-               } else {
-                       vgic_dist_irq_clear_pending(vcpu, irq_num);
                }
+
+               ret = false;
+               goto out;
        }
 
        enabled = vgic_irq_is_enabled(vcpu, irq_num);
@@ -1672,7 +1674,7 @@ static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid,
 out:
        spin_unlock(&dist->lock);
 
-       return ret;
+       return ret ? cpuid : -EINVAL;
 }
 
 /**
@@ -1692,11 +1694,26 @@ out:
 int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
                        bool level)
 {
-       if (likely(vgic_initialized(kvm)) &&
-           vgic_update_irq_pending(kvm, cpuid, irq_num, level))
-               vgic_kick_vcpus(kvm);
+       int ret = 0;
+       int vcpu_id;
 
-       return 0;
+       if (unlikely(!vgic_initialized(kvm))) {
+               mutex_lock(&kvm->lock);
+               ret = vgic_init(kvm);
+               mutex_unlock(&kvm->lock);
+
+               if (ret)
+                       goto out;
+       }
+
+       vcpu_id = vgic_update_irq_pending(kvm, cpuid, irq_num, level);
+       if (vcpu_id >= 0) {
+               /* kick the specified vcpu */
+               kvm_vcpu_kick(kvm_get_vcpu(kvm, vcpu_id));
+       }
+
+out:
+       return ret;
 }
 
 static irqreturn_t vgic_maintenance_handler(int irq, void *data)
@@ -1726,39 +1743,14 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
 
        int sz = (nr_irqs - VGIC_NR_PRIVATE_IRQS) / 8;
        vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL);
-       vgic_cpu->vgic_irq_lr_map = kzalloc(nr_irqs, GFP_KERNEL);
+       vgic_cpu->vgic_irq_lr_map = kmalloc(nr_irqs, GFP_KERNEL);
 
        if (!vgic_cpu->pending_shared || !vgic_cpu->vgic_irq_lr_map) {
                kvm_vgic_vcpu_destroy(vcpu);
                return -ENOMEM;
        }
 
-       return 0;
-}
-
-/**
- * kvm_vgic_vcpu_init - Initialize per-vcpu VGIC state
- * @vcpu: pointer to the vcpu struct
- *
- * Initialize the vgic_cpu struct and vgic_dist struct fields pertaining to
- * this vcpu and enable the VGIC for this VCPU
- */
-static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
-{
-       struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
-       struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-       int i;
-
-       for (i = 0; i < dist->nr_irqs; i++) {
-               if (i < VGIC_NR_PPIS)
-                       vgic_bitmap_set_irq_val(&dist->irq_enabled,
-                                               vcpu->vcpu_id, i, 1);
-               if (i < VGIC_NR_PRIVATE_IRQS)
-                       vgic_bitmap_set_irq_val(&dist->irq_cfg,
-                                               vcpu->vcpu_id, i, VGIC_CFG_EDGE);
-
-               vgic_cpu->vgic_irq_lr_map[i] = LR_EMPTY;
-       }
+       memset(vgic_cpu->vgic_irq_lr_map, LR_EMPTY, nr_irqs);
 
        /*
         * Store the number of LRs per vcpu, so we don't have to go
@@ -1767,7 +1759,7 @@ static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
         */
        vgic_cpu->nr_lr = vgic->nr_lr;
 
-       vgic_enable(vcpu);
+       return 0;
 }
 
 void kvm_vgic_destroy(struct kvm *kvm)
@@ -1798,20 +1790,21 @@ void kvm_vgic_destroy(struct kvm *kvm)
        dist->irq_spi_cpu = NULL;
        dist->irq_spi_target = NULL;
        dist->irq_pending_on_cpu = NULL;
+       dist->nr_cpus = 0;
 }
 
 /*
  * Allocate and initialize the various data structures. Must be called
  * with kvm->lock held!
  */
-static int vgic_init_maps(struct kvm *kvm)
+static int vgic_init(struct kvm *kvm)
 {
        struct vgic_dist *dist = &kvm->arch.vgic;
        struct kvm_vcpu *vcpu;
        int nr_cpus, nr_irqs;
-       int ret, i;
+       int ret, i, vcpu_id;
 
-       if (dist->nr_cpus)      /* Already allocated */
+       if (vgic_initialized(kvm))
                return 0;
 
        nr_cpus = dist->nr_cpus = atomic_read(&kvm->online_vcpus);
@@ -1859,16 +1852,28 @@ static int vgic_init_maps(struct kvm *kvm)
        if (ret)
                goto out;
 
-       kvm_for_each_vcpu(i, vcpu, kvm) {
+       for (i = VGIC_NR_PRIVATE_IRQS; i < dist->nr_irqs; i += 4)
+               vgic_set_target_reg(kvm, 0, i);
+
+       kvm_for_each_vcpu(vcpu_id, vcpu, kvm) {
                ret = vgic_vcpu_init_maps(vcpu, nr_irqs);
                if (ret) {
                        kvm_err("VGIC: Failed to allocate vcpu memory\n");
                        break;
                }
-       }
 
-       for (i = VGIC_NR_PRIVATE_IRQS; i < dist->nr_irqs; i += 4)
-               vgic_set_target_reg(kvm, 0, i);
+               for (i = 0; i < dist->nr_irqs; i++) {
+                       if (i < VGIC_NR_PPIS)
+                               vgic_bitmap_set_irq_val(&dist->irq_enabled,
+                                                       vcpu->vcpu_id, i, 1);
+                       if (i < VGIC_NR_PRIVATE_IRQS)
+                               vgic_bitmap_set_irq_val(&dist->irq_cfg,
+                                                       vcpu->vcpu_id, i,
+                                                       VGIC_CFG_EDGE);
+               }
+
+               vgic_enable(vcpu);
+       }
 
 out:
        if (ret)
@@ -1878,25 +1883,23 @@ out:
 }
 
 /**
- * kvm_vgic_init - Initialize global VGIC state before running any VCPUs
+ * kvm_vgic_map_resources - Configure global VGIC state before running any VCPUs
  * @kvm: pointer to the kvm struct
  *
  * Map the virtual CPU interface into the VM before running any VCPUs.  We
  * can't do this at creation time, because user space must first set the
- * virtual CPU interface address in the guest physical address space.  Also
- * initialize the ITARGETSRn regs to 0 on the emulated distributor.
+ * virtual CPU interface address in the guest physical address space.
  */
-int kvm_vgic_init(struct kvm *kvm)
+int kvm_vgic_map_resources(struct kvm *kvm)
 {
-       struct kvm_vcpu *vcpu;
-       int ret = 0, i;
+       int ret = 0;
 
        if (!irqchip_in_kernel(kvm))
                return 0;
 
        mutex_lock(&kvm->lock);
 
-       if (vgic_initialized(kvm))
+       if (vgic_ready(kvm))
                goto out;
 
        if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) ||
@@ -1906,7 +1909,11 @@ int kvm_vgic_init(struct kvm *kvm)
                goto out;
        }
 
-       ret = vgic_init_maps(kvm);
+       /*
+        * Initialize the vgic if this hasn't already been done on demand by
+        * accessing the vgic state from userspace.
+        */
+       ret = vgic_init(kvm);
        if (ret) {
                kvm_err("Unable to allocate maps\n");
                goto out;
@@ -1920,9 +1927,6 @@ int kvm_vgic_init(struct kvm *kvm)
                goto out;
        }
 
-       kvm_for_each_vcpu(i, vcpu, kvm)
-               kvm_vgic_vcpu_init(vcpu);
-
        kvm->arch.vgic.ready = true;
 out:
        if (ret)
@@ -2167,7 +2171,7 @@ static int vgic_attr_regs_access(struct kvm_device *dev,
 
        mutex_lock(&dev->kvm->lock);
 
-       ret = vgic_init_maps(dev->kvm);
+       ret = vgic_init(dev->kvm);
        if (ret)
                goto out;
 
@@ -2289,7 +2293,7 @@ static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 
                mutex_lock(&dev->kvm->lock);
 
-               if (vgic_initialized(dev->kvm) || dev->kvm->arch.vgic.nr_irqs)
+               if (vgic_ready(dev->kvm) || dev->kvm->arch.vgic.nr_irqs)
                        ret = -EBUSY;
                else
                        dev->kvm->arch.vgic.nr_irqs = val;
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
deleted file mode 100644 (file)
index e05000e..0000000
+++ /dev/null
@@ -1,1026 +0,0 @@
-/*
- * Kernel-based Virtual Machine - device assignment support
- *
- * Copyright (C) 2010 Red Hat, Inc. and/or its affiliates.
- *
- * This work is licensed under the terms of the GNU GPL, version 2.  See
- * the COPYING file in the top-level directory.
- *
- */
-
-#include <linux/kvm_host.h>
-#include <linux/kvm.h>
-#include <linux/uaccess.h>
-#include <linux/vmalloc.h>
-#include <linux/errno.h>
-#include <linux/spinlock.h>
-#include <linux/pci.h>
-#include <linux/interrupt.h>
-#include <linux/slab.h>
-#include <linux/namei.h>
-#include <linux/fs.h>
-#include "irq.h"
-
-static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
-                                                     int assigned_dev_id)
-{
-       struct list_head *ptr;
-       struct kvm_assigned_dev_kernel *match;
-
-       list_for_each(ptr, head) {
-               match = list_entry(ptr, struct kvm_assigned_dev_kernel, list);
-               if (match->assigned_dev_id == assigned_dev_id)
-                       return match;
-       }
-       return NULL;
-}
-
-static int find_index_from_host_irq(struct kvm_assigned_dev_kernel
-                                   *assigned_dev, int irq)
-{
-       int i, index;
-       struct msix_entry *host_msix_entries;
-
-       host_msix_entries = assigned_dev->host_msix_entries;
-
-       index = -1;
-       for (i = 0; i < assigned_dev->entries_nr; i++)
-               if (irq == host_msix_entries[i].vector) {
-                       index = i;
-                       break;
-               }
-       if (index < 0)
-               printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n");
-
-       return index;
-}
-
-static irqreturn_t kvm_assigned_dev_intx(int irq, void *dev_id)
-{
-       struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
-       int ret;
-
-       spin_lock(&assigned_dev->intx_lock);
-       if (pci_check_and_mask_intx(assigned_dev->dev)) {
-               assigned_dev->host_irq_disabled = true;
-               ret = IRQ_WAKE_THREAD;
-       } else
-               ret = IRQ_NONE;
-       spin_unlock(&assigned_dev->intx_lock);
-
-       return ret;
-}
-
-static void
-kvm_assigned_dev_raise_guest_irq(struct kvm_assigned_dev_kernel *assigned_dev,
-                                int vector)
-{
-       if (unlikely(assigned_dev->irq_requested_type &
-                    KVM_DEV_IRQ_GUEST_INTX)) {
-               spin_lock(&assigned_dev->intx_mask_lock);
-               if (!(assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX))
-                       kvm_set_irq(assigned_dev->kvm,
-                                   assigned_dev->irq_source_id, vector, 1,
-                                   false);
-               spin_unlock(&assigned_dev->intx_mask_lock);
-       } else
-               kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
-                           vector, 1, false);
-}
-
-static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id)
-{
-       struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
-
-       if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
-               spin_lock_irq(&assigned_dev->intx_lock);
-               disable_irq_nosync(irq);
-               assigned_dev->host_irq_disabled = true;
-               spin_unlock_irq(&assigned_dev->intx_lock);
-       }
-
-       kvm_assigned_dev_raise_guest_irq(assigned_dev,
-                                        assigned_dev->guest_irq);
-
-       return IRQ_HANDLED;
-}
-
-#ifdef __KVM_HAVE_MSI
-static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id)
-{
-       struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
-       int ret = kvm_set_irq_inatomic(assigned_dev->kvm,
-                                      assigned_dev->irq_source_id,
-                                      assigned_dev->guest_irq, 1);
-       return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED;
-}
-
-static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id)
-{
-       struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
-
-       kvm_assigned_dev_raise_guest_irq(assigned_dev,
-                                        assigned_dev->guest_irq);
-
-       return IRQ_HANDLED;
-}
-#endif
-
-#ifdef __KVM_HAVE_MSIX
-static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id)
-{
-       struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
-       int index = find_index_from_host_irq(assigned_dev, irq);
-       u32 vector;
-       int ret = 0;
-
-       if (index >= 0) {
-               vector = assigned_dev->guest_msix_entries[index].vector;
-               ret = kvm_set_irq_inatomic(assigned_dev->kvm,
-                                          assigned_dev->irq_source_id,
-                                          vector, 1);
-       }
-
-       return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED;
-}
-
-static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id)
-{
-       struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
-       int index = find_index_from_host_irq(assigned_dev, irq);
-       u32 vector;
-
-       if (index >= 0) {
-               vector = assigned_dev->guest_msix_entries[index].vector;
-               kvm_assigned_dev_raise_guest_irq(assigned_dev, vector);
-       }
-
-       return IRQ_HANDLED;
-}
-#endif
-
-/* Ack the irq line for an assigned device */
-static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
-{
-       struct kvm_assigned_dev_kernel *dev =
-               container_of(kian, struct kvm_assigned_dev_kernel,
-                            ack_notifier);
-
-       kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0, false);
-
-       spin_lock(&dev->intx_mask_lock);
-
-       if (!(dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) {
-               bool reassert = false;
-
-               spin_lock_irq(&dev->intx_lock);
-               /*
-                * The guest IRQ may be shared so this ack can come from an
-                * IRQ for another guest device.
-                */
-               if (dev->host_irq_disabled) {
-                       if (!(dev->flags & KVM_DEV_ASSIGN_PCI_2_3))
-                               enable_irq(dev->host_irq);
-                       else if (!pci_check_and_unmask_intx(dev->dev))
-                               reassert = true;
-                       dev->host_irq_disabled = reassert;
-               }
-               spin_unlock_irq(&dev->intx_lock);
-
-               if (reassert)
-                       kvm_set_irq(dev->kvm, dev->irq_source_id,
-                                   dev->guest_irq, 1, false);
-       }
-
-       spin_unlock(&dev->intx_mask_lock);
-}
-
-static void deassign_guest_irq(struct kvm *kvm,
-                              struct kvm_assigned_dev_kernel *assigned_dev)
-{
-       if (assigned_dev->ack_notifier.gsi != -1)
-               kvm_unregister_irq_ack_notifier(kvm,
-                                               &assigned_dev->ack_notifier);
-
-       kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
-                   assigned_dev->guest_irq, 0, false);
-
-       if (assigned_dev->irq_source_id != -1)
-               kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
-       assigned_dev->irq_source_id = -1;
-       assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK);
-}
-
-/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */
-static void deassign_host_irq(struct kvm *kvm,
-                             struct kvm_assigned_dev_kernel *assigned_dev)
-{
-       /*
-        * We disable irq here to prevent further events.
-        *
-        * Notice this maybe result in nested disable if the interrupt type is
-        * INTx, but it's OK for we are going to free it.
-        *
-        * If this function is a part of VM destroy, please ensure that till
-        * now, the kvm state is still legal for probably we also have to wait
-        * on a currently running IRQ handler.
-        */
-       if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
-               int i;
-               for (i = 0; i < assigned_dev->entries_nr; i++)
-                       disable_irq(assigned_dev->host_msix_entries[i].vector);
-
-               for (i = 0; i < assigned_dev->entries_nr; i++)
-                       free_irq(assigned_dev->host_msix_entries[i].vector,
-                                assigned_dev);
-
-               assigned_dev->entries_nr = 0;
-               kfree(assigned_dev->host_msix_entries);
-               kfree(assigned_dev->guest_msix_entries);
-               pci_disable_msix(assigned_dev->dev);
-       } else {
-               /* Deal with MSI and INTx */
-               if ((assigned_dev->irq_requested_type &
-                    KVM_DEV_IRQ_HOST_INTX) &&
-                   (assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
-                       spin_lock_irq(&assigned_dev->intx_lock);
-                       pci_intx(assigned_dev->dev, false);
-                       spin_unlock_irq(&assigned_dev->intx_lock);
-                       synchronize_irq(assigned_dev->host_irq);
-               } else
-                       disable_irq(assigned_dev->host_irq);
-
-               free_irq(assigned_dev->host_irq, assigned_dev);
-
-               if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI)
-                       pci_disable_msi(assigned_dev->dev);
-       }
-
-       assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK);
-}
-
-static int kvm_deassign_irq(struct kvm *kvm,
-                           struct kvm_assigned_dev_kernel *assigned_dev,
-                           unsigned long irq_requested_type)
-{
-       unsigned long guest_irq_type, host_irq_type;
-
-       if (!irqchip_in_kernel(kvm))
-               return -EINVAL;
-       /* no irq assignment to deassign */
-       if (!assigned_dev->irq_requested_type)
-               return -ENXIO;
-
-       host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK;
-       guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK;
-
-       if (host_irq_type)
-               deassign_host_irq(kvm, assigned_dev);
-       if (guest_irq_type)
-               deassign_guest_irq(kvm, assigned_dev);
-
-       return 0;
-}
-
-static void kvm_free_assigned_irq(struct kvm *kvm,
-                                 struct kvm_assigned_dev_kernel *assigned_dev)
-{
-       kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type);
-}
-
-static void kvm_free_assigned_device(struct kvm *kvm,
-                                    struct kvm_assigned_dev_kernel
-                                    *assigned_dev)
-{
-       kvm_free_assigned_irq(kvm, assigned_dev);
-
-       pci_reset_function(assigned_dev->dev);
-       if (pci_load_and_free_saved_state(assigned_dev->dev,
-                                         &assigned_dev->pci_saved_state))
-               printk(KERN_INFO "%s: Couldn't reload %s saved state\n",
-                      __func__, dev_name(&assigned_dev->dev->dev));
-       else
-               pci_restore_state(assigned_dev->dev);
-
-       pci_clear_dev_assigned(assigned_dev->dev);
-
-       pci_release_regions(assigned_dev->dev);
-       pci_disable_device(assigned_dev->dev);
-       pci_dev_put(assigned_dev->dev);
-
-       list_del(&assigned_dev->list);
-       kfree(assigned_dev);
-}
-
-void kvm_free_all_assigned_devices(struct kvm *kvm)
-{
-       struct list_head *ptr, *ptr2;
-       struct kvm_assigned_dev_kernel *assigned_dev;
-
-       list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) {
-               assigned_dev = list_entry(ptr,
-                                         struct kvm_assigned_dev_kernel,
-                                         list);
-
-               kvm_free_assigned_device(kvm, assigned_dev);
-       }
-}
-
-static int assigned_device_enable_host_intx(struct kvm *kvm,
-                                           struct kvm_assigned_dev_kernel *dev)
-{
-       irq_handler_t irq_handler;
-       unsigned long flags;
-
-       dev->host_irq = dev->dev->irq;
-
-       /*
-        * We can only share the IRQ line with other host devices if we are
-        * able to disable the IRQ source at device-level - independently of
-        * the guest driver. Otherwise host devices may suffer from unbounded
-        * IRQ latencies when the guest keeps the line asserted.
-        */
-       if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) {
-               irq_handler = kvm_assigned_dev_intx;
-               flags = IRQF_SHARED;
-       } else {
-               irq_handler = NULL;
-               flags = IRQF_ONESHOT;
-       }
-       if (request_threaded_irq(dev->host_irq, irq_handler,
-                                kvm_assigned_dev_thread_intx, flags,
-                                dev->irq_name, dev))
-               return -EIO;
-
-       if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) {
-               spin_lock_irq(&dev->intx_lock);
-               pci_intx(dev->dev, true);
-               spin_unlock_irq(&dev->intx_lock);
-       }
-       return 0;
-}
-
-#ifdef __KVM_HAVE_MSI
-static int assigned_device_enable_host_msi(struct kvm *kvm,
-                                          struct kvm_assigned_dev_kernel *dev)
-{
-       int r;
-
-       if (!dev->dev->msi_enabled) {
-               r = pci_enable_msi(dev->dev);
-               if (r)
-                       return r;
-       }
-
-       dev->host_irq = dev->dev->irq;
-       if (request_threaded_irq(dev->host_irq, kvm_assigned_dev_msi,
-                                kvm_assigned_dev_thread_msi, 0,
-                                dev->irq_name, dev)) {
-               pci_disable_msi(dev->dev);
-               return -EIO;
-       }
-
-       return 0;
-}
-#endif
-
-#ifdef __KVM_HAVE_MSIX
-static int assigned_device_enable_host_msix(struct kvm *kvm,
-                                           struct kvm_assigned_dev_kernel *dev)
-{
-       int i, r = -EINVAL;
-
-       /* host_msix_entries and guest_msix_entries should have been
-        * initialized */
-       if (dev->entries_nr == 0)
-               return r;
-
-       r = pci_enable_msix_exact(dev->dev,
-                                 dev->host_msix_entries, dev->entries_nr);
-       if (r)
-               return r;
-
-       for (i = 0; i < dev->entries_nr; i++) {
-               r = request_threaded_irq(dev->host_msix_entries[i].vector,
-                                        kvm_assigned_dev_msix,
-                                        kvm_assigned_dev_thread_msix,
-                                        0, dev->irq_name, dev);
-               if (r)
-                       goto err;
-       }
-
-       return 0;
-err:
-       for (i -= 1; i >= 0; i--)
-               free_irq(dev->host_msix_entries[i].vector, dev);
-       pci_disable_msix(dev->dev);
-       return r;
-}
-
-#endif
-
-static int assigned_device_enable_guest_intx(struct kvm *kvm,
-                               struct kvm_assigned_dev_kernel *dev,
-                               struct kvm_assigned_irq *irq)
-{
-       dev->guest_irq = irq->guest_irq;
-       dev->ack_notifier.gsi = irq->guest_irq;
-       return 0;
-}
-
-#ifdef __KVM_HAVE_MSI
-static int assigned_device_enable_guest_msi(struct kvm *kvm,
-                       struct kvm_assigned_dev_kernel *dev,
-                       struct kvm_assigned_irq *irq)
-{
-       dev->guest_irq = irq->guest_irq;
-       dev->ack_notifier.gsi = -1;
-       return 0;
-}
-#endif
-
-#ifdef __KVM_HAVE_MSIX
-static int assigned_device_enable_guest_msix(struct kvm *kvm,
-                       struct kvm_assigned_dev_kernel *dev,
-                       struct kvm_assigned_irq *irq)
-{
-       dev->guest_irq = irq->guest_irq;
-       dev->ack_notifier.gsi = -1;
-       return 0;
-}
-#endif
-
-static int assign_host_irq(struct kvm *kvm,
-                          struct kvm_assigned_dev_kernel *dev,
-                          __u32 host_irq_type)
-{
-       int r = -EEXIST;
-
-       if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK)
-               return r;
-
-       snprintf(dev->irq_name, sizeof(dev->irq_name), "kvm:%s",
-                pci_name(dev->dev));
-
-       switch (host_irq_type) {
-       case KVM_DEV_IRQ_HOST_INTX:
-               r = assigned_device_enable_host_intx(kvm, dev);
-               break;
-#ifdef __KVM_HAVE_MSI
-       case KVM_DEV_IRQ_HOST_MSI:
-               r = assigned_device_enable_host_msi(kvm, dev);
-               break;
-#endif
-#ifdef __KVM_HAVE_MSIX
-       case KVM_DEV_IRQ_HOST_MSIX:
-               r = assigned_device_enable_host_msix(kvm, dev);
-               break;
-#endif
-       default:
-               r = -EINVAL;
-       }
-       dev->host_irq_disabled = false;
-
-       if (!r)
-               dev->irq_requested_type |= host_irq_type;
-
-       return r;
-}
-
-static int assign_guest_irq(struct kvm *kvm,
-                           struct kvm_assigned_dev_kernel *dev,
-                           struct kvm_assigned_irq *irq,
-                           unsigned long guest_irq_type)
-{
-       int id;
-       int r = -EEXIST;
-
-       if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK)
-               return r;
-
-       id = kvm_request_irq_source_id(kvm);
-       if (id < 0)
-               return id;
-
-       dev->irq_source_id = id;
-
-       switch (guest_irq_type) {
-       case KVM_DEV_IRQ_GUEST_INTX:
-               r = assigned_device_enable_guest_intx(kvm, dev, irq);
-               break;
-#ifdef __KVM_HAVE_MSI
-       case KVM_DEV_IRQ_GUEST_MSI:
-               r = assigned_device_enable_guest_msi(kvm, dev, irq);
-               break;
-#endif
-#ifdef __KVM_HAVE_MSIX
-       case KVM_DEV_IRQ_GUEST_MSIX:
-               r = assigned_device_enable_guest_msix(kvm, dev, irq);
-               break;
-#endif
-       default:
-               r = -EINVAL;
-       }
-
-       if (!r) {
-               dev->irq_requested_type |= guest_irq_type;
-               if (dev->ack_notifier.gsi != -1)
-                       kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier);
-       } else {
-               kvm_free_irq_source_id(kvm, dev->irq_source_id);
-               dev->irq_source_id = -1;
-       }
-
-       return r;
-}
-
-/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */
-static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
-                                  struct kvm_assigned_irq *assigned_irq)
-{
-       int r = -EINVAL;
-       struct kvm_assigned_dev_kernel *match;
-       unsigned long host_irq_type, guest_irq_type;
-
-       if (!irqchip_in_kernel(kvm))
-               return r;
-
-       mutex_lock(&kvm->lock);
-       r = -ENODEV;
-       match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
-                                     assigned_irq->assigned_dev_id);
-       if (!match)
-               goto out;
-
-       host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK);
-       guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK);
-
-       r = -EINVAL;
-       /* can only assign one type at a time */
-       if (hweight_long(host_irq_type) > 1)
-               goto out;
-       if (hweight_long(guest_irq_type) > 1)
-               goto out;
-       if (host_irq_type == 0 && guest_irq_type == 0)
-               goto out;
-
-       r = 0;
-       if (host_irq_type)
-               r = assign_host_irq(kvm, match, host_irq_type);
-       if (r)
-               goto out;
-
-       if (guest_irq_type)
-               r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type);
-out:
-       mutex_unlock(&kvm->lock);
-       return r;
-}
-
-static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm,
-                                        struct kvm_assigned_irq
-                                        *assigned_irq)
-{
-       int r = -ENODEV;
-       struct kvm_assigned_dev_kernel *match;
-       unsigned long irq_type;
-
-       mutex_lock(&kvm->lock);
-
-       match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
-                                     assigned_irq->assigned_dev_id);
-       if (!match)
-               goto out;
-
-       irq_type = assigned_irq->flags & (KVM_DEV_IRQ_HOST_MASK |
-                                         KVM_DEV_IRQ_GUEST_MASK);
-       r = kvm_deassign_irq(kvm, match, irq_type);
-out:
-       mutex_unlock(&kvm->lock);
-       return r;
-}
-
-/*
- * We want to test whether the caller has been granted permissions to
- * use this device.  To be able to configure and control the device,
- * the user needs access to PCI configuration space and BAR resources.
- * These are accessed through PCI sysfs.  PCI config space is often
- * passed to the process calling this ioctl via file descriptor, so we
- * can't rely on access to that file.  We can check for permissions
- * on each of the BAR resource files, which is a pretty clear
- * indicator that the user has been granted access to the device.
- */
-static int probe_sysfs_permissions(struct pci_dev *dev)
-{
-#ifdef CONFIG_SYSFS
-       int i;
-       bool bar_found = false;
-
-       for (i = PCI_STD_RESOURCES; i <= PCI_STD_RESOURCE_END; i++) {
-               char *kpath, *syspath;
-               struct path path;
-               struct inode *inode;
-               int r;
-
-               if (!pci_resource_len(dev, i))
-                       continue;
-
-               kpath = kobject_get_path(&dev->dev.kobj, GFP_KERNEL);
-               if (!kpath)
-                       return -ENOMEM;
-
-               /* Per sysfs-rules, sysfs is always at /sys */
-               syspath = kasprintf(GFP_KERNEL, "/sys%s/resource%d", kpath, i);
-               kfree(kpath);
-               if (!syspath)
-                       return -ENOMEM;
-
-               r = kern_path(syspath, LOOKUP_FOLLOW, &path);
-               kfree(syspath);
-               if (r)
-                       return r;
-
-               inode = path.dentry->d_inode;
-
-               r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS);
-               path_put(&path);
-               if (r)
-                       return r;
-
-               bar_found = true;
-       }
-
-       /* If no resources, probably something special */
-       if (!bar_found)
-               return -EPERM;
-
-       return 0;
-#else
-       return -EINVAL; /* No way to control the device without sysfs */
-#endif
-}
-
-static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
-                                     struct kvm_assigned_pci_dev *assigned_dev)
-{
-       int r = 0, idx;
-       struct kvm_assigned_dev_kernel *match;
-       struct pci_dev *dev;
-
-       if (!(assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU))
-               return -EINVAL;
-
-       mutex_lock(&kvm->lock);
-       idx = srcu_read_lock(&kvm->srcu);
-
-       match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
-                                     assigned_dev->assigned_dev_id);
-       if (match) {
-               /* device already assigned */
-               r = -EEXIST;
-               goto out;
-       }
-
-       match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL);
-       if (match == NULL) {
-               printk(KERN_INFO "%s: Couldn't allocate memory\n",
-                      __func__);
-               r = -ENOMEM;
-               goto out;
-       }
-       dev = pci_get_domain_bus_and_slot(assigned_dev->segnr,
-                                  assigned_dev->busnr,
-                                  assigned_dev->devfn);
-       if (!dev) {
-               printk(KERN_INFO "%s: host device not found\n", __func__);
-               r = -EINVAL;
-               goto out_free;
-       }
-
-       /* Don't allow bridges to be assigned */
-       if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) {
-               r = -EPERM;
-               goto out_put;
-       }
-
-       r = probe_sysfs_permissions(dev);
-       if (r)
-               goto out_put;
-
-       if (pci_enable_device(dev)) {
-               printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
-               r = -EBUSY;
-               goto out_put;
-       }
-       r = pci_request_regions(dev, "kvm_assigned_device");
-       if (r) {
-               printk(KERN_INFO "%s: Could not get access to device regions\n",
-                      __func__);
-               goto out_disable;
-       }
-
-       pci_reset_function(dev);
-       pci_save_state(dev);
-       match->pci_saved_state = pci_store_saved_state(dev);
-       if (!match->pci_saved_state)
-               printk(KERN_DEBUG "%s: Couldn't store %s saved state\n",
-                      __func__, dev_name(&dev->dev));
-
-       if (!pci_intx_mask_supported(dev))
-               assigned_dev->flags &= ~KVM_DEV_ASSIGN_PCI_2_3;
-
-       match->assigned_dev_id = assigned_dev->assigned_dev_id;
-       match->host_segnr = assigned_dev->segnr;
-       match->host_busnr = assigned_dev->busnr;
-       match->host_devfn = assigned_dev->devfn;
-       match->flags = assigned_dev->flags;
-       match->dev = dev;
-       spin_lock_init(&match->intx_lock);
-       spin_lock_init(&match->intx_mask_lock);
-       match->irq_source_id = -1;
-       match->kvm = kvm;
-       match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
-
-       list_add(&match->list, &kvm->arch.assigned_dev_head);
-
-       if (!kvm->arch.iommu_domain) {
-               r = kvm_iommu_map_guest(kvm);
-               if (r)
-                       goto out_list_del;
-       }
-       r = kvm_assign_device(kvm, match);
-       if (r)
-               goto out_list_del;
-
-out:
-       srcu_read_unlock(&kvm->srcu, idx);
-       mutex_unlock(&kvm->lock);
-       return r;
-out_list_del:
-       if (pci_load_and_free_saved_state(dev, &match->pci_saved_state))
-               printk(KERN_INFO "%s: Couldn't reload %s saved state\n",
-                      __func__, dev_name(&dev->dev));
-       list_del(&match->list);
-       pci_release_regions(dev);
-out_disable:
-       pci_disable_device(dev);
-out_put:
-       pci_dev_put(dev);
-out_free:
-       kfree(match);
-       srcu_read_unlock(&kvm->srcu, idx);
-       mutex_unlock(&kvm->lock);
-       return r;
-}
-
-static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
-               struct kvm_assigned_pci_dev *assigned_dev)
-{
-       int r = 0;
-       struct kvm_assigned_dev_kernel *match;
-
-       mutex_lock(&kvm->lock);
-
-       match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
-                                     assigned_dev->assigned_dev_id);
-       if (!match) {
-               printk(KERN_INFO "%s: device hasn't been assigned before, "
-                 "so cannot be deassigned\n", __func__);
-               r = -EINVAL;
-               goto out;
-       }
-
-       kvm_deassign_device(kvm, match);
-
-       kvm_free_assigned_device(kvm, match);
-
-out:
-       mutex_unlock(&kvm->lock);
-       return r;
-}
-
-
-#ifdef __KVM_HAVE_MSIX
-static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm,
-                                   struct kvm_assigned_msix_nr *entry_nr)
-{
-       int r = 0;
-       struct kvm_assigned_dev_kernel *adev;
-
-       mutex_lock(&kvm->lock);
-
-       adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
-                                     entry_nr->assigned_dev_id);
-       if (!adev) {
-               r = -EINVAL;
-               goto msix_nr_out;
-       }
-
-       if (adev->entries_nr == 0) {
-               adev->entries_nr = entry_nr->entry_nr;
-               if (adev->entries_nr == 0 ||
-                   adev->entries_nr > KVM_MAX_MSIX_PER_DEV) {
-                       r = -EINVAL;
-                       goto msix_nr_out;
-               }
-
-               adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) *
-                                               entry_nr->entry_nr,
-                                               GFP_KERNEL);
-               if (!adev->host_msix_entries) {
-                       r = -ENOMEM;
-                       goto msix_nr_out;
-               }
-               adev->guest_msix_entries =
-                       kzalloc(sizeof(struct msix_entry) * entry_nr->entry_nr,
-                               GFP_KERNEL);
-               if (!adev->guest_msix_entries) {
-                       kfree(adev->host_msix_entries);
-                       r = -ENOMEM;
-                       goto msix_nr_out;
-               }
-       } else /* Not allowed set MSI-X number twice */
-               r = -EINVAL;
-msix_nr_out:
-       mutex_unlock(&kvm->lock);
-       return r;
-}
-
-static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm,
-                                      struct kvm_assigned_msix_entry *entry)
-{
-       int r = 0, i;
-       struct kvm_assigned_dev_kernel *adev;
-
-       mutex_lock(&kvm->lock);
-
-       adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
-                                     entry->assigned_dev_id);
-
-       if (!adev) {
-               r = -EINVAL;
-               goto msix_entry_out;
-       }
-
-       for (i = 0; i < adev->entries_nr; i++)
-               if (adev->guest_msix_entries[i].vector == 0 ||
-                   adev->guest_msix_entries[i].entry == entry->entry) {
-                       adev->guest_msix_entries[i].entry = entry->entry;
-                       adev->guest_msix_entries[i].vector = entry->gsi;
-                       adev->host_msix_entries[i].entry = entry->entry;
-                       break;
-               }
-       if (i == adev->entries_nr) {
-               r = -ENOSPC;
-               goto msix_entry_out;
-       }
-
-msix_entry_out:
-       mutex_unlock(&kvm->lock);
-
-       return r;
-}
-#endif
-
-static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm,
-               struct kvm_assigned_pci_dev *assigned_dev)
-{
-       int r = 0;
-       struct kvm_assigned_dev_kernel *match;
-
-       mutex_lock(&kvm->lock);
-
-       match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
-                                     assigned_dev->assigned_dev_id);
-       if (!match) {
-               r = -ENODEV;
-               goto out;
-       }
-
-       spin_lock(&match->intx_mask_lock);
-
-       match->flags &= ~KVM_DEV_ASSIGN_MASK_INTX;
-       match->flags |= assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX;
-
-       if (match->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) {
-               if (assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX) {
-                       kvm_set_irq(match->kvm, match->irq_source_id,
-                                   match->guest_irq, 0, false);
-                       /*
-                        * Masking at hardware-level is performed on demand,
-                        * i.e. when an IRQ actually arrives at the host.
-                        */
-               } else if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
-                       /*
-                        * Unmask the IRQ line if required. Unmasking at
-                        * device level will be performed by user space.
-                        */
-                       spin_lock_irq(&match->intx_lock);
-                       if (match->host_irq_disabled) {
-                               enable_irq(match->host_irq);
-                               match->host_irq_disabled = false;
-                       }
-                       spin_unlock_irq(&match->intx_lock);
-               }
-       }
-
-       spin_unlock(&match->intx_mask_lock);
-
-out:
-       mutex_unlock(&kvm->lock);
-       return r;
-}
-
-long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
-                                 unsigned long arg)
-{
-       void __user *argp = (void __user *)arg;
-       int r;
-
-       switch (ioctl) {
-       case KVM_ASSIGN_PCI_DEVICE: {
-               struct kvm_assigned_pci_dev assigned_dev;
-
-               r = -EFAULT;
-               if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
-                       goto out;
-               r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev);
-               if (r)
-                       goto out;
-               break;
-       }
-       case KVM_ASSIGN_IRQ: {
-               r = -EOPNOTSUPP;
-               break;
-       }
-       case KVM_ASSIGN_DEV_IRQ: {
-               struct kvm_assigned_irq assigned_irq;
-
-               r = -EFAULT;
-               if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
-                       goto out;
-               r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq);
-               if (r)
-                       goto out;
-               break;
-       }
-       case KVM_DEASSIGN_DEV_IRQ: {
-               struct kvm_assigned_irq assigned_irq;
-
-               r = -EFAULT;
-               if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
-                       goto out;
-               r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq);
-               if (r)
-                       goto out;
-               break;
-       }
-       case KVM_DEASSIGN_PCI_DEVICE: {
-               struct kvm_assigned_pci_dev assigned_dev;
-
-               r = -EFAULT;
-               if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
-                       goto out;
-               r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev);
-               if (r)
-                       goto out;
-               break;
-       }
-#ifdef __KVM_HAVE_MSIX
-       case KVM_ASSIGN_SET_MSIX_NR: {
-               struct kvm_assigned_msix_nr entry_nr;
-               r = -EFAULT;
-               if (copy_from_user(&entry_nr, argp, sizeof entry_nr))
-                       goto out;
-               r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr);
-               if (r)
-                       goto out;
-               break;
-       }
-       case KVM_ASSIGN_SET_MSIX_ENTRY: {
-               struct kvm_assigned_msix_entry entry;
-               r = -EFAULT;
-               if (copy_from_user(&entry, argp, sizeof entry))
-                       goto out;
-               r = kvm_vm_ioctl_set_msix_entry(kvm, &entry);
-               if (r)
-                       goto out;
-               break;
-       }
-#endif
-       case KVM_ASSIGN_SET_INTX_MASK: {
-               struct kvm_assigned_pci_dev assigned_dev;
-
-               r = -EFAULT;
-               if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
-                       goto out;
-               r = kvm_vm_ioctl_set_pci_irq_mask(kvm, &assigned_dev);
-               break;
-       }
-       default:
-               r = -ENOTTY;
-               break;
-       }
-out:
-       return r;
-}
index b0fb390..148b239 100644 (file)
@@ -36,9 +36,6 @@
 #include <linux/seqlock.h>
 #include <trace/events/kvm.h>
 
-#ifdef __KVM_HAVE_IOAPIC
-#include "ioapic.h"
-#endif
 #include "iodev.h"
 
 #ifdef CONFIG_HAVE_KVM_IRQFD
@@ -492,9 +489,7 @@ void kvm_register_irq_ack_notifier(struct kvm *kvm,
        mutex_lock(&kvm->irq_lock);
        hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
        mutex_unlock(&kvm->irq_lock);
-#ifdef __KVM_HAVE_IOAPIC
        kvm_vcpu_request_scan_ioapic(kvm);
-#endif
 }
 
 void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
@@ -504,9 +499,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
        hlist_del_init_rcu(&kian->link);
        mutex_unlock(&kvm->irq_lock);
        synchronize_srcu(&kvm->irq_srcu);
-#ifdef __KVM_HAVE_IOAPIC
        kvm_vcpu_request_scan_ioapic(kvm);
-#endif
 }
 #endif
 
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
deleted file mode 100644 (file)
index 0ba4057..0000000
+++ /dev/null
@@ -1,687 +0,0 @@
-/*
- *  Copyright (C) 2001  MandrakeSoft S.A.
- *  Copyright 2010 Red Hat, Inc. and/or its affiliates.
- *
- *    MandrakeSoft S.A.
- *    43, rue d'Aboukir
- *    75002 Paris - France
- *    http://www.linux-mandrake.com/
- *    http://www.mandrakesoft.com/
- *
- *  This library is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU Lesser General Public
- *  License as published by the Free Software Foundation; either
- *  version 2 of the License, or (at your option) any later version.
- *
- *  This library is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  Lesser General Public License for more details.
- *
- *  You should have received a copy of the GNU Lesser General Public
- *  License along with this library; if not, write to the Free Software
- *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- *
- *  Yunhong Jiang <yunhong.jiang@intel.com>
- *  Yaozu (Eddie) Dong <eddie.dong@intel.com>
- *  Based on Xen 3.1 code.
- */
-
-#include <linux/kvm_host.h>
-#include <linux/kvm.h>
-#include <linux/mm.h>
-#include <linux/highmem.h>
-#include <linux/smp.h>
-#include <linux/hrtimer.h>
-#include <linux/io.h>
-#include <linux/slab.h>
-#include <linux/export.h>
-#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/current.h>
-#include <trace/events/kvm.h>
-
-#include "ioapic.h"
-#include "lapic.h"
-#include "irq.h"
-
-#if 0
-#define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg)
-#else
-#define ioapic_debug(fmt, arg...)
-#endif
-static int ioapic_service(struct kvm_ioapic *vioapic, int irq,
-               bool line_status);
-
-static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
-                                         unsigned long addr,
-                                         unsigned long length)
-{
-       unsigned long result = 0;
-
-       switch (ioapic->ioregsel) {
-       case IOAPIC_REG_VERSION:
-               result = ((((IOAPIC_NUM_PINS - 1) & 0xff) << 16)
-                         | (IOAPIC_VERSION_ID & 0xff));
-               break;
-
-       case IOAPIC_REG_APIC_ID:
-       case IOAPIC_REG_ARB_ID:
-               result = ((ioapic->id & 0xf) << 24);
-               break;
-
-       default:
-               {
-                       u32 redir_index = (ioapic->ioregsel - 0x10) >> 1;
-                       u64 redir_content;
-
-                       if (redir_index < IOAPIC_NUM_PINS)
-                               redir_content =
-                                       ioapic->redirtbl[redir_index].bits;
-                       else
-                               redir_content = ~0ULL;
-
-                       result = (ioapic->ioregsel & 0x1) ?
-                           (redir_content >> 32) & 0xffffffff :
-                           redir_content & 0xffffffff;
-                       break;
-               }
-       }
-
-       return result;
-}
-
-static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic)
-{
-       ioapic->rtc_status.pending_eoi = 0;
-       bitmap_zero(ioapic->rtc_status.dest_map, KVM_MAX_VCPUS);
-}
-
-static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic);
-
-static void rtc_status_pending_eoi_check_valid(struct kvm_ioapic *ioapic)
-{
-       if (WARN_ON(ioapic->rtc_status.pending_eoi < 0))
-               kvm_rtc_eoi_tracking_restore_all(ioapic);
-}
-
-static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu)
-{
-       bool new_val, old_val;
-       struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
-       union kvm_ioapic_redirect_entry *e;
-
-       e = &ioapic->redirtbl[RTC_GSI];
-       if (!kvm_apic_match_dest(vcpu, NULL, 0, e->fields.dest_id,
-                               e->fields.dest_mode))
-               return;
-
-       new_val = kvm_apic_pending_eoi(vcpu, e->fields.vector);
-       old_val = test_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
-
-       if (new_val == old_val)
-               return;
-
-       if (new_val) {
-               __set_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
-               ioapic->rtc_status.pending_eoi++;
-       } else {
-               __clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
-               ioapic->rtc_status.pending_eoi--;
-               rtc_status_pending_eoi_check_valid(ioapic);
-       }
-}
-
-void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu)
-{
-       struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
-
-       spin_lock(&ioapic->lock);
-       __rtc_irq_eoi_tracking_restore_one(vcpu);
-       spin_unlock(&ioapic->lock);
-}
-
-static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic)
-{
-       struct kvm_vcpu *vcpu;
-       int i;
-
-       if (RTC_GSI >= IOAPIC_NUM_PINS)
-               return;
-
-       rtc_irq_eoi_tracking_reset(ioapic);
-       kvm_for_each_vcpu(i, vcpu, ioapic->kvm)
-           __rtc_irq_eoi_tracking_restore_one(vcpu);
-}
-
-static void rtc_irq_eoi(struct kvm_ioapic *ioapic, struct kvm_vcpu *vcpu)
-{
-       if (test_and_clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map)) {
-               --ioapic->rtc_status.pending_eoi;
-               rtc_status_pending_eoi_check_valid(ioapic);
-       }
-}
-
-static bool rtc_irq_check_coalesced(struct kvm_ioapic *ioapic)
-{
-       if (ioapic->rtc_status.pending_eoi > 0)
-               return true; /* coalesced */
-
-       return false;
-}
-
-static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq,
-               int irq_level, bool line_status)
-{
-       union kvm_ioapic_redirect_entry entry;
-       u32 mask = 1 << irq;
-       u32 old_irr;
-       int edge, ret;
-
-       entry = ioapic->redirtbl[irq];
-       edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG);
-
-       if (!irq_level) {
-               ioapic->irr &= ~mask;
-               ret = 1;
-               goto out;
-       }
-
-       /*
-        * Return 0 for coalesced interrupts; for edge-triggered interrupts,
-        * this only happens if a previous edge has not been delivered due
-        * do masking.  For level interrupts, the remote_irr field tells
-        * us if the interrupt is waiting for an EOI.
-        *
-        * RTC is special: it is edge-triggered, but userspace likes to know
-        * if it has been already ack-ed via EOI because coalesced RTC
-        * interrupts lead to time drift in Windows guests.  So we track
-        * EOI manually for the RTC interrupt.
-        */
-       if (irq == RTC_GSI && line_status &&
-               rtc_irq_check_coalesced(ioapic)) {
-               ret = 0;
-               goto out;
-       }
-
-       old_irr = ioapic->irr;
-       ioapic->irr |= mask;
-       if ((edge && old_irr == ioapic->irr) ||
-           (!edge && entry.fields.remote_irr)) {
-               ret = 0;
-               goto out;
-       }
-
-       ret = ioapic_service(ioapic, irq, line_status);
-
-out:
-       trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
-       return ret;
-}
-
-static void kvm_ioapic_inject_all(struct kvm_ioapic *ioapic, unsigned long irr)
-{
-       u32 idx;
-
-       rtc_irq_eoi_tracking_reset(ioapic);
-       for_each_set_bit(idx, &irr, IOAPIC_NUM_PINS)
-               ioapic_set_irq(ioapic, idx, 1, true);
-
-       kvm_rtc_eoi_tracking_restore_all(ioapic);
-}
-
-
-static void update_handled_vectors(struct kvm_ioapic *ioapic)
-{
-       DECLARE_BITMAP(handled_vectors, 256);
-       int i;
-
-       memset(handled_vectors, 0, sizeof(handled_vectors));
-       for (i = 0; i < IOAPIC_NUM_PINS; ++i)
-               __set_bit(ioapic->redirtbl[i].fields.vector, handled_vectors);
-       memcpy(ioapic->handled_vectors, handled_vectors,
-              sizeof(handled_vectors));
-       smp_wmb();
-}
-
-void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
-                       u32 *tmr)
-{
-       struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
-       union kvm_ioapic_redirect_entry *e;
-       int index;
-
-       spin_lock(&ioapic->lock);
-       for (index = 0; index < IOAPIC_NUM_PINS; index++) {
-               e = &ioapic->redirtbl[index];
-               if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG ||
-                   kvm_irq_has_notifier(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index) ||
-                   index == RTC_GSI) {
-                       if (kvm_apic_match_dest(vcpu, NULL, 0,
-                               e->fields.dest_id, e->fields.dest_mode)) {
-                               __set_bit(e->fields.vector,
-                                       (unsigned long *)eoi_exit_bitmap);
-                               if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG)
-                                       __set_bit(e->fields.vector,
-                                               (unsigned long *)tmr);
-                       }
-               }
-       }
-       spin_unlock(&ioapic->lock);
-}
-
-#ifdef CONFIG_X86
-void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
-{
-       struct kvm_ioapic *ioapic = kvm->arch.vioapic;
-
-       if (!ioapic)
-               return;
-       kvm_make_scan_ioapic_request(kvm);
-}
-#else
-void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
-{
-       return;
-}
-#endif
-
-static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
-{
-       unsigned index;
-       bool mask_before, mask_after;
-       union kvm_ioapic_redirect_entry *e;
-
-       switch (ioapic->ioregsel) {
-       case IOAPIC_REG_VERSION:
-               /* Writes are ignored. */
-               break;
-
-       case IOAPIC_REG_APIC_ID:
-               ioapic->id = (val >> 24) & 0xf;
-               break;
-
-       case IOAPIC_REG_ARB_ID:
-               break;
-
-       default:
-               index = (ioapic->ioregsel - 0x10) >> 1;
-
-               ioapic_debug("change redir index %x val %x\n", index, val);
-               if (index >= IOAPIC_NUM_PINS)
-                       return;
-               e = &ioapic->redirtbl[index];
-               mask_before = e->fields.mask;
-               if (ioapic->ioregsel & 1) {
-                       e->bits &= 0xffffffff;
-                       e->bits |= (u64) val << 32;
-               } else {
-                       e->bits &= ~0xffffffffULL;
-                       e->bits |= (u32) val;
-                       e->fields.remote_irr = 0;
-               }
-               update_handled_vectors(ioapic);
-               mask_after = e->fields.mask;
-               if (mask_before != mask_after)
-                       kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after);
-               if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG
-                   && ioapic->irr & (1 << index))
-                       ioapic_service(ioapic, index, false);
-               kvm_vcpu_request_scan_ioapic(ioapic->kvm);
-               break;
-       }
-}
-
-static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status)
-{
-       union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq];
-       struct kvm_lapic_irq irqe;
-       int ret;
-
-       if (entry->fields.mask)
-               return -1;
-
-       ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
-                    "vector=%x trig_mode=%x\n",
-                    entry->fields.dest_id, entry->fields.dest_mode,
-                    entry->fields.delivery_mode, entry->fields.vector,
-                    entry->fields.trig_mode);
-
-       irqe.dest_id = entry->fields.dest_id;
-       irqe.vector = entry->fields.vector;
-       irqe.dest_mode = entry->fields.dest_mode;
-       irqe.trig_mode = entry->fields.trig_mode;
-       irqe.delivery_mode = entry->fields.delivery_mode << 8;
-       irqe.level = 1;
-       irqe.shorthand = 0;
-
-       if (irqe.trig_mode == IOAPIC_EDGE_TRIG)
-               ioapic->irr &= ~(1 << irq);
-
-       if (irq == RTC_GSI && line_status) {
-               /*
-                * pending_eoi cannot ever become negative (see
-                * rtc_status_pending_eoi_check_valid) and the caller
-                * ensures that it is only called if it is >= zero, namely
-                * if rtc_irq_check_coalesced returns false).
-                */
-               BUG_ON(ioapic->rtc_status.pending_eoi != 0);
-               ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe,
-                               ioapic->rtc_status.dest_map);
-               ioapic->rtc_status.pending_eoi = (ret < 0 ? 0 : ret);
-       } else
-               ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL);
-
-       if (ret && irqe.trig_mode == IOAPIC_LEVEL_TRIG)
-               entry->fields.remote_irr = 1;
-
-       return ret;
-}
-
-int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
-                      int level, bool line_status)
-{
-       int ret, irq_level;
-
-       BUG_ON(irq < 0 || irq >= IOAPIC_NUM_PINS);
-
-       spin_lock(&ioapic->lock);
-       irq_level = __kvm_irq_line_state(&ioapic->irq_states[irq],
-                                        irq_source_id, level);
-       ret = ioapic_set_irq(ioapic, irq, irq_level, line_status);
-
-       spin_unlock(&ioapic->lock);
-
-       return ret;
-}
-
-void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id)
-{
-       int i;
-
-       spin_lock(&ioapic->lock);
-       for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++)
-               __clear_bit(irq_source_id, &ioapic->irq_states[i]);
-       spin_unlock(&ioapic->lock);
-}
-
-static void kvm_ioapic_eoi_inject_work(struct work_struct *work)
-{
-       int i;
-       struct kvm_ioapic *ioapic = container_of(work, struct kvm_ioapic,
-                                                eoi_inject.work);
-       spin_lock(&ioapic->lock);
-       for (i = 0; i < IOAPIC_NUM_PINS; i++) {
-               union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i];
-
-               if (ent->fields.trig_mode != IOAPIC_LEVEL_TRIG)
-                       continue;
-
-               if (ioapic->irr & (1 << i) && !ent->fields.remote_irr)
-                       ioapic_service(ioapic, i, false);
-       }
-       spin_unlock(&ioapic->lock);
-}
-
-#define IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT 10000
-
-static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
-                       struct kvm_ioapic *ioapic, int vector, int trigger_mode)
-{
-       int i;
-
-       for (i = 0; i < IOAPIC_NUM_PINS; i++) {
-               union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i];
-
-               if (ent->fields.vector != vector)
-                       continue;
-
-               if (i == RTC_GSI)
-                       rtc_irq_eoi(ioapic, vcpu);
-               /*
-                * We are dropping lock while calling ack notifiers because ack
-                * notifier callbacks for assigned devices call into IOAPIC
-                * recursively. Since remote_irr is cleared only after call
-                * to notifiers if the same vector will be delivered while lock
-                * is dropped it will be put into irr and will be delivered
-                * after ack notifier returns.
-                */
-               spin_unlock(&ioapic->lock);
-               kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i);
-               spin_lock(&ioapic->lock);
-
-               if (trigger_mode != IOAPIC_LEVEL_TRIG)
-                       continue;
-
-               ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
-               ent->fields.remote_irr = 0;
-               if (!ent->fields.mask && (ioapic->irr & (1 << i))) {
-                       ++ioapic->irq_eoi[i];
-                       if (ioapic->irq_eoi[i] == IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT) {
-                               /*
-                                * Real hardware does not deliver the interrupt
-                                * immediately during eoi broadcast, and this
-                                * lets a buggy guest make slow progress
-                                * even if it does not correctly handle a
-                                * level-triggered interrupt.  Emulate this
-                                * behavior if we detect an interrupt storm.
-                                */
-                               schedule_delayed_work(&ioapic->eoi_inject, HZ / 100);
-                               ioapic->irq_eoi[i] = 0;
-                               trace_kvm_ioapic_delayed_eoi_inj(ent->bits);
-                       } else {
-                               ioapic_service(ioapic, i, false);
-                       }
-               } else {
-                       ioapic->irq_eoi[i] = 0;
-               }
-       }
-}
-
-bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector)
-{
-       struct kvm_ioapic *ioapic = kvm->arch.vioapic;
-       smp_rmb();
-       return test_bit(vector, ioapic->handled_vectors);
-}
-
-void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode)
-{
-       struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
-
-       spin_lock(&ioapic->lock);
-       __kvm_ioapic_update_eoi(vcpu, ioapic, vector, trigger_mode);
-       spin_unlock(&ioapic->lock);
-}
-
-static inline struct kvm_ioapic *to_ioapic(struct kvm_io_device *dev)
-{
-       return container_of(dev, struct kvm_ioapic, dev);
-}
-
-static inline int ioapic_in_range(struct kvm_ioapic *ioapic, gpa_t addr)
-{
-       return ((addr >= ioapic->base_address &&
-                (addr < ioapic->base_address + IOAPIC_MEM_LENGTH)));
-}
-
-static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
-                           void *val)
-{
-       struct kvm_ioapic *ioapic = to_ioapic(this);
-       u32 result;
-       if (!ioapic_in_range(ioapic, addr))
-               return -EOPNOTSUPP;
-
-       ioapic_debug("addr %lx\n", (unsigned long)addr);
-       ASSERT(!(addr & 0xf));  /* check alignment */
-
-       addr &= 0xff;
-       spin_lock(&ioapic->lock);
-       switch (addr) {
-       case IOAPIC_REG_SELECT:
-               result = ioapic->ioregsel;
-               break;
-
-       case IOAPIC_REG_WINDOW:
-               result = ioapic_read_indirect(ioapic, addr, len);
-               break;
-
-       default:
-               result = 0;
-               break;
-       }
-       spin_unlock(&ioapic->lock);
-
-       switch (len) {
-       case 8:
-               *(u64 *) val = result;
-               break;
-       case 1:
-       case 2:
-       case 4:
-               memcpy(val, (char *)&result, len);
-               break;
-       default:
-               printk(KERN_WARNING "ioapic: wrong length %d\n", len);
-       }
-       return 0;
-}
-
-static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
-                            const void *val)
-{
-       struct kvm_ioapic *ioapic = to_ioapic(this);
-       u32 data;
-       if (!ioapic_in_range(ioapic, addr))
-               return -EOPNOTSUPP;
-
-       ioapic_debug("ioapic_mmio_write addr=%p len=%d val=%p\n",
-                    (void*)addr, len, val);
-       ASSERT(!(addr & 0xf));  /* check alignment */
-
-       switch (len) {
-       case 8:
-       case 4:
-               data = *(u32 *) val;
-               break;
-       case 2:
-               data = *(u16 *) val;
-               break;
-       case 1:
-               data = *(u8  *) val;
-               break;
-       default:
-               printk(KERN_WARNING "ioapic: Unsupported size %d\n", len);
-               return 0;
-       }
-
-       addr &= 0xff;
-       spin_lock(&ioapic->lock);
-       switch (addr) {
-       case IOAPIC_REG_SELECT:
-               ioapic->ioregsel = data & 0xFF; /* 8-bit register */
-               break;
-
-       case IOAPIC_REG_WINDOW:
-               ioapic_write_indirect(ioapic, data);
-               break;
-#ifdef CONFIG_IA64
-       case IOAPIC_REG_EOI:
-               __kvm_ioapic_update_eoi(NULL, ioapic, data, IOAPIC_LEVEL_TRIG);
-               break;
-#endif
-
-       default:
-               break;
-       }
-       spin_unlock(&ioapic->lock);
-       return 0;
-}
-
-static void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
-{
-       int i;
-
-       cancel_delayed_work_sync(&ioapic->eoi_inject);
-       for (i = 0; i < IOAPIC_NUM_PINS; i++)
-               ioapic->redirtbl[i].fields.mask = 1;
-       ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS;
-       ioapic->ioregsel = 0;
-       ioapic->irr = 0;
-       ioapic->id = 0;
-       memset(ioapic->irq_eoi, 0x00, IOAPIC_NUM_PINS);
-       rtc_irq_eoi_tracking_reset(ioapic);
-       update_handled_vectors(ioapic);
-}
-
-static const struct kvm_io_device_ops ioapic_mmio_ops = {
-       .read     = ioapic_mmio_read,
-       .write    = ioapic_mmio_write,
-};
-
-int kvm_ioapic_init(struct kvm *kvm)
-{
-       struct kvm_ioapic *ioapic;
-       int ret;
-
-       ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL);
-       if (!ioapic)
-               return -ENOMEM;
-       spin_lock_init(&ioapic->lock);
-       INIT_DELAYED_WORK(&ioapic->eoi_inject, kvm_ioapic_eoi_inject_work);
-       kvm->arch.vioapic = ioapic;
-       kvm_ioapic_reset(ioapic);
-       kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
-       ioapic->kvm = kvm;
-       mutex_lock(&kvm->slots_lock);
-       ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, ioapic->base_address,
-                                     IOAPIC_MEM_LENGTH, &ioapic->dev);
-       mutex_unlock(&kvm->slots_lock);
-       if (ret < 0) {
-               kvm->arch.vioapic = NULL;
-               kfree(ioapic);
-       }
-
-       return ret;
-}
-
-void kvm_ioapic_destroy(struct kvm *kvm)
-{
-       struct kvm_ioapic *ioapic = kvm->arch.vioapic;
-
-       cancel_delayed_work_sync(&ioapic->eoi_inject);
-       if (ioapic) {
-               kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
-               kvm->arch.vioapic = NULL;
-               kfree(ioapic);
-       }
-}
-
-int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
-{
-       struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
-       if (!ioapic)
-               return -EINVAL;
-
-       spin_lock(&ioapic->lock);
-       memcpy(state, ioapic, sizeof(struct kvm_ioapic_state));
-       spin_unlock(&ioapic->lock);
-       return 0;
-}
-
-int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
-{
-       struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
-       if (!ioapic)
-               return -EINVAL;
-
-       spin_lock(&ioapic->lock);
-       memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
-       ioapic->irr = 0;
-       update_handled_vectors(ioapic);
-       kvm_vcpu_request_scan_ioapic(kvm);
-       kvm_ioapic_inject_all(ioapic, state->irr);
-       spin_unlock(&ioapic->lock);
-       return 0;
-}
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
deleted file mode 100644 (file)
index e23b706..0000000
+++ /dev/null
@@ -1,104 +0,0 @@
-#ifndef __KVM_IO_APIC_H
-#define __KVM_IO_APIC_H
-
-#include <linux/kvm_host.h>
-
-#include "iodev.h"
-
-struct kvm;
-struct kvm_vcpu;
-
-#define IOAPIC_NUM_PINS  KVM_IOAPIC_NUM_PINS
-#define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */
-#define IOAPIC_EDGE_TRIG  0
-#define IOAPIC_LEVEL_TRIG 1
-
-#define IOAPIC_DEFAULT_BASE_ADDRESS  0xfec00000
-#define IOAPIC_MEM_LENGTH            0x100
-
-/* Direct registers. */
-#define IOAPIC_REG_SELECT  0x00
-#define IOAPIC_REG_WINDOW  0x10
-#define IOAPIC_REG_EOI     0x40        /* IA64 IOSAPIC only */
-
-/* Indirect registers. */
-#define IOAPIC_REG_APIC_ID 0x00        /* x86 IOAPIC only */
-#define IOAPIC_REG_VERSION 0x01
-#define IOAPIC_REG_ARB_ID  0x02        /* x86 IOAPIC only */
-
-/*ioapic delivery mode*/
-#define        IOAPIC_FIXED                    0x0
-#define        IOAPIC_LOWEST_PRIORITY          0x1
-#define        IOAPIC_PMI                      0x2
-#define        IOAPIC_NMI                      0x4
-#define        IOAPIC_INIT                     0x5
-#define        IOAPIC_EXTINT                   0x7
-
-#ifdef CONFIG_X86
-#define RTC_GSI 8
-#else
-#define RTC_GSI -1U
-#endif
-
-struct rtc_status {
-       int pending_eoi;
-       DECLARE_BITMAP(dest_map, KVM_MAX_VCPUS);
-};
-
-struct kvm_ioapic {
-       u64 base_address;
-       u32 ioregsel;
-       u32 id;
-       u32 irr;
-       u32 pad;
-       union kvm_ioapic_redirect_entry redirtbl[IOAPIC_NUM_PINS];
-       unsigned long irq_states[IOAPIC_NUM_PINS];
-       struct kvm_io_device dev;
-       struct kvm *kvm;
-       void (*ack_notifier)(void *opaque, int irq);
-       spinlock_t lock;
-       DECLARE_BITMAP(handled_vectors, 256);
-       struct rtc_status rtc_status;
-       struct delayed_work eoi_inject;
-       u32 irq_eoi[IOAPIC_NUM_PINS];
-};
-
-#ifdef DEBUG
-#define ASSERT(x)                                                      \
-do {                                                                   \
-       if (!(x)) {                                                     \
-               printk(KERN_EMERG "assertion failed %s: %d: %s\n",      \
-                      __FILE__, __LINE__, #x);                         \
-               BUG();                                                  \
-       }                                                               \
-} while (0)
-#else
-#define ASSERT(x) do { } while (0)
-#endif
-
-static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
-{
-       return kvm->arch.vioapic;
-}
-
-void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu);
-int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
-               int short_hand, int dest, int dest_mode);
-int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
-void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector,
-                       int trigger_mode);
-bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector);
-int kvm_ioapic_init(struct kvm *kvm);
-void kvm_ioapic_destroy(struct kvm *kvm);
-int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
-                      int level, bool line_status);
-void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id);
-int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
-               struct kvm_lapic_irq *irq, unsigned long *dest_map);
-int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
-int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
-void kvm_vcpu_request_scan_ioapic(struct kvm *kvm);
-void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
-                       u32 *tmr);
-
-#endif
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
deleted file mode 100644 (file)
index c1e6ae9..0000000
+++ /dev/null
@@ -1,358 +0,0 @@
-/*
- * Copyright (c) 2006, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- * Copyright (C) 2006-2008 Intel Corporation
- * Copyright IBM Corporation, 2008
- * Copyright 2010 Red Hat, Inc. and/or its affiliates.
- *
- * Author: Allen M. Kay <allen.m.kay@intel.com>
- * Author: Weidong Han <weidong.han@intel.com>
- * Author: Ben-Ami Yassour <benami@il.ibm.com>
- */
-
-#include <linux/list.h>
-#include <linux/kvm_host.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/stat.h>
-#include <linux/dmar.h>
-#include <linux/iommu.h>
-#include <linux/intel-iommu.h>
-
-static bool allow_unsafe_assigned_interrupts;
-module_param_named(allow_unsafe_assigned_interrupts,
-                  allow_unsafe_assigned_interrupts, bool, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(allow_unsafe_assigned_interrupts,
- "Enable device assignment on platforms without interrupt remapping support.");
-
-static int kvm_iommu_unmap_memslots(struct kvm *kvm);
-static void kvm_iommu_put_pages(struct kvm *kvm,
-                               gfn_t base_gfn, unsigned long npages);
-
-static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn,
-                          unsigned long npages)
-{
-       gfn_t end_gfn;
-       pfn_t pfn;
-
-       pfn     = gfn_to_pfn_memslot(slot, gfn);
-       end_gfn = gfn + npages;
-       gfn    += 1;
-
-       if (is_error_noslot_pfn(pfn))
-               return pfn;
-
-       while (gfn < end_gfn)
-               gfn_to_pfn_memslot(slot, gfn++);
-
-       return pfn;
-}
-
-static void kvm_unpin_pages(struct kvm *kvm, pfn_t pfn, unsigned long npages)
-{
-       unsigned long i;
-
-       for (i = 0; i < npages; ++i)
-               kvm_release_pfn_clean(pfn + i);
-}
-
-int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
-{
-       gfn_t gfn, end_gfn;
-       pfn_t pfn;
-       int r = 0;
-       struct iommu_domain *domain = kvm->arch.iommu_domain;
-       int flags;
-
-       /* check if iommu exists and in use */
-       if (!domain)
-               return 0;
-
-       gfn     = slot->base_gfn;
-       end_gfn = gfn + slot->npages;
-
-       flags = IOMMU_READ;
-       if (!(slot->flags & KVM_MEM_READONLY))
-               flags |= IOMMU_WRITE;
-       if (!kvm->arch.iommu_noncoherent)
-               flags |= IOMMU_CACHE;
-
-
-       while (gfn < end_gfn) {
-               unsigned long page_size;
-
-               /* Check if already mapped */
-               if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn))) {
-                       gfn += 1;
-                       continue;
-               }
-
-               /* Get the page size we could use to map */
-               page_size = kvm_host_page_size(kvm, gfn);
-
-               /* Make sure the page_size does not exceed the memslot */
-               while ((gfn + (page_size >> PAGE_SHIFT)) > end_gfn)
-                       page_size >>= 1;
-
-               /* Make sure gfn is aligned to the page size we want to map */
-               while ((gfn << PAGE_SHIFT) & (page_size - 1))
-                       page_size >>= 1;
-
-               /* Make sure hva is aligned to the page size we want to map */
-               while (__gfn_to_hva_memslot(slot, gfn) & (page_size - 1))
-                       page_size >>= 1;
-
-               /*
-                * Pin all pages we are about to map in memory. This is
-                * important because we unmap and unpin in 4kb steps later.
-                */
-               pfn = kvm_pin_pages(slot, gfn, page_size >> PAGE_SHIFT);
-               if (is_error_noslot_pfn(pfn)) {
-                       gfn += 1;
-                       continue;
-               }
-
-               /* Map into IO address space */
-               r = iommu_map(domain, gfn_to_gpa(gfn), pfn_to_hpa(pfn),
-                             page_size, flags);
-               if (r) {
-                       printk(KERN_ERR "kvm_iommu_map_address:"
-                              "iommu failed to map pfn=%llx\n", pfn);
-                       kvm_unpin_pages(kvm, pfn, page_size >> PAGE_SHIFT);
-                       goto unmap_pages;
-               }
-
-               gfn += page_size >> PAGE_SHIFT;
-
-
-       }
-
-       return 0;
-
-unmap_pages:
-       kvm_iommu_put_pages(kvm, slot->base_gfn, gfn - slot->base_gfn);
-       return r;
-}
-
-static int kvm_iommu_map_memslots(struct kvm *kvm)
-{
-       int idx, r = 0;
-       struct kvm_memslots *slots;
-       struct kvm_memory_slot *memslot;
-
-       if (kvm->arch.iommu_noncoherent)
-               kvm_arch_register_noncoherent_dma(kvm);
-
-       idx = srcu_read_lock(&kvm->srcu);
-       slots = kvm_memslots(kvm);
-
-       kvm_for_each_memslot(memslot, slots) {
-               r = kvm_iommu_map_pages(kvm, memslot);
-               if (r)
-                       break;
-       }
-       srcu_read_unlock(&kvm->srcu, idx);
-
-       return r;
-}
-
-int kvm_assign_device(struct kvm *kvm,
-                     struct kvm_assigned_dev_kernel *assigned_dev)
-{
-       struct pci_dev *pdev = NULL;
-       struct iommu_domain *domain = kvm->arch.iommu_domain;
-       int r;
-       bool noncoherent;
-
-       /* check if iommu exists and in use */
-       if (!domain)
-               return 0;
-
-       pdev = assigned_dev->dev;
-       if (pdev == NULL)
-               return -ENODEV;
-
-       r = iommu_attach_device(domain, &pdev->dev);
-       if (r) {
-               dev_err(&pdev->dev, "kvm assign device failed ret %d", r);
-               return r;
-       }
-
-       noncoherent = !iommu_capable(&pci_bus_type, IOMMU_CAP_CACHE_COHERENCY);
-
-       /* Check if need to update IOMMU page table for guest memory */
-       if (noncoherent != kvm->arch.iommu_noncoherent) {
-               kvm_iommu_unmap_memslots(kvm);
-               kvm->arch.iommu_noncoherent = noncoherent;
-               r = kvm_iommu_map_memslots(kvm);
-               if (r)
-                       goto out_unmap;
-       }
-
-       pci_set_dev_assigned(pdev);
-
-       dev_info(&pdev->dev, "kvm assign device\n");
-
-       return 0;
-out_unmap:
-       kvm_iommu_unmap_memslots(kvm);
-       return r;
-}
-
-int kvm_deassign_device(struct kvm *kvm,
-                       struct kvm_assigned_dev_kernel *assigned_dev)
-{
-       struct iommu_domain *domain = kvm->arch.iommu_domain;
-       struct pci_dev *pdev = NULL;
-
-       /* check if iommu exists and in use */
-       if (!domain)
-               return 0;
-
-       pdev = assigned_dev->dev;
-       if (pdev == NULL)
-               return -ENODEV;
-
-       iommu_detach_device(domain, &pdev->dev);
-
-       pci_clear_dev_assigned(pdev);
-
-       dev_info(&pdev->dev, "kvm deassign device\n");
-
-       return 0;
-}
-
-int kvm_iommu_map_guest(struct kvm *kvm)
-{
-       int r;
-
-       if (!iommu_present(&pci_bus_type)) {
-               printk(KERN_ERR "%s: iommu not found\n", __func__);
-               return -ENODEV;
-       }
-
-       mutex_lock(&kvm->slots_lock);
-
-       kvm->arch.iommu_domain = iommu_domain_alloc(&pci_bus_type);
-       if (!kvm->arch.iommu_domain) {
-               r = -ENOMEM;
-               goto out_unlock;
-       }
-
-       if (!allow_unsafe_assigned_interrupts &&
-           !iommu_capable(&pci_bus_type, IOMMU_CAP_INTR_REMAP)) {
-               printk(KERN_WARNING "%s: No interrupt remapping support,"
-                      " disallowing device assignment."
-                      " Re-enble with \"allow_unsafe_assigned_interrupts=1\""
-                      " module option.\n", __func__);
-               iommu_domain_free(kvm->arch.iommu_domain);
-               kvm->arch.iommu_domain = NULL;
-               r = -EPERM;
-               goto out_unlock;
-       }
-
-       r = kvm_iommu_map_memslots(kvm);
-       if (r)
-               kvm_iommu_unmap_memslots(kvm);
-
-out_unlock:
-       mutex_unlock(&kvm->slots_lock);
-       return r;
-}
-
-static void kvm_iommu_put_pages(struct kvm *kvm,
-                               gfn_t base_gfn, unsigned long npages)
-{
-       struct iommu_domain *domain;
-       gfn_t end_gfn, gfn;
-       pfn_t pfn;
-       u64 phys;
-
-       domain  = kvm->arch.iommu_domain;
-       end_gfn = base_gfn + npages;
-       gfn     = base_gfn;
-
-       /* check if iommu exists and in use */
-       if (!domain)
-               return;
-
-       while (gfn < end_gfn) {
-               unsigned long unmap_pages;
-               size_t size;
-
-               /* Get physical address */
-               phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn));
-
-               if (!phys) {
-                       gfn++;
-                       continue;
-               }
-
-               pfn  = phys >> PAGE_SHIFT;
-
-               /* Unmap address from IO address space */
-               size       = iommu_unmap(domain, gfn_to_gpa(gfn), PAGE_SIZE);
-               unmap_pages = 1ULL << get_order(size);
-
-               /* Unpin all pages we just unmapped to not leak any memory */
-               kvm_unpin_pages(kvm, pfn, unmap_pages);
-
-               gfn += unmap_pages;
-       }
-}
-
-void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
-{
-       kvm_iommu_put_pages(kvm, slot->base_gfn, slot->npages);
-}
-
-static int kvm_iommu_unmap_memslots(struct kvm *kvm)
-{
-       int idx;
-       struct kvm_memslots *slots;
-       struct kvm_memory_slot *memslot;
-
-       idx = srcu_read_lock(&kvm->srcu);
-       slots = kvm_memslots(kvm);
-
-       kvm_for_each_memslot(memslot, slots)
-               kvm_iommu_unmap_pages(kvm, memslot);
-
-       srcu_read_unlock(&kvm->srcu, idx);
-
-       if (kvm->arch.iommu_noncoherent)
-               kvm_arch_unregister_noncoherent_dma(kvm);
-
-       return 0;
-}
-
-int kvm_iommu_unmap_guest(struct kvm *kvm)
-{
-       struct iommu_domain *domain = kvm->arch.iommu_domain;
-
-       /* check if iommu exists and in use */
-       if (!domain)
-               return 0;
-
-       mutex_lock(&kvm->slots_lock);
-       kvm_iommu_unmap_memslots(kvm);
-       kvm->arch.iommu_domain = NULL;
-       kvm->arch.iommu_noncoherent = false;
-       mutex_unlock(&kvm->slots_lock);
-
-       iommu_domain_free(domain);
-       return 0;
-}
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
deleted file mode 100644 (file)
index 963b899..0000000
+++ /dev/null
@@ -1,369 +0,0 @@
-/*
- * irq_comm.c: Common API for in kernel interrupt controller
- * Copyright (c) 2007, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- * Authors:
- *   Yaozu (Eddie) Dong <Eddie.dong@intel.com>
- *
- * Copyright 2010 Red Hat, Inc. and/or its affiliates.
- */
-
-#include <linux/kvm_host.h>
-#include <linux/slab.h>
-#include <linux/export.h>
-#include <trace/events/kvm.h>
-
-#include <asm/msidef.h>
-#ifdef CONFIG_IA64
-#include <asm/iosapic.h>
-#endif
-
-#include "irq.h"
-
-#include "ioapic.h"
-
-static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
-                          struct kvm *kvm, int irq_source_id, int level,
-                          bool line_status)
-{
-#ifdef CONFIG_X86
-       struct kvm_pic *pic = pic_irqchip(kvm);
-       return kvm_pic_set_irq(pic, e->irqchip.pin, irq_source_id, level);
-#else
-       return -1;
-#endif
-}
-
-static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
-                             struct kvm *kvm, int irq_source_id, int level,
-                             bool line_status)
-{
-       struct kvm_ioapic *ioapic = kvm->arch.vioapic;
-       return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, irq_source_id, level,
-                               line_status);
-}
-
-inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq)
-{
-#ifdef CONFIG_IA64
-       return irq->delivery_mode ==
-               (IOSAPIC_LOWEST_PRIORITY << IOSAPIC_DELIVERY_SHIFT);
-#else
-       return irq->delivery_mode == APIC_DM_LOWEST;
-#endif
-}
-
-int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
-               struct kvm_lapic_irq *irq, unsigned long *dest_map)
-{
-       int i, r = -1;
-       struct kvm_vcpu *vcpu, *lowest = NULL;
-
-       if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
-                       kvm_is_dm_lowest_prio(irq)) {
-               printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
-               irq->delivery_mode = APIC_DM_FIXED;
-       }
-
-       if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map))
-               return r;
-
-       kvm_for_each_vcpu(i, vcpu, kvm) {
-               if (!kvm_apic_present(vcpu))
-                       continue;
-
-               if (!kvm_apic_match_dest(vcpu, src, irq->shorthand,
-                                       irq->dest_id, irq->dest_mode))
-                       continue;
-
-               if (!kvm_is_dm_lowest_prio(irq)) {
-                       if (r < 0)
-                               r = 0;
-                       r += kvm_apic_set_irq(vcpu, irq, dest_map);
-               } else if (kvm_lapic_enabled(vcpu)) {
-                       if (!lowest)
-                               lowest = vcpu;
-                       else if (kvm_apic_compare_prio(vcpu, lowest) < 0)
-                               lowest = vcpu;
-               }
-       }
-
-       if (lowest)
-               r = kvm_apic_set_irq(lowest, irq, dest_map);
-
-       return r;
-}
-
-static inline void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
-                                  struct kvm_lapic_irq *irq)
-{
-       trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data);
-
-       irq->dest_id = (e->msi.address_lo &
-                       MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
-       irq->vector = (e->msi.data &
-                       MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT;
-       irq->dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo;
-       irq->trig_mode = (1 << MSI_DATA_TRIGGER_SHIFT) & e->msi.data;
-       irq->delivery_mode = e->msi.data & 0x700;
-       irq->level = 1;
-       irq->shorthand = 0;
-       /* TODO Deal with RH bit of MSI message address */
-}
-
-int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
-               struct kvm *kvm, int irq_source_id, int level, bool line_status)
-{
-       struct kvm_lapic_irq irq;
-
-       if (!level)
-               return -1;
-
-       kvm_set_msi_irq(e, &irq);
-
-       return kvm_irq_delivery_to_apic(kvm, NULL, &irq, NULL);
-}
-
-
-static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e,
-                        struct kvm *kvm)
-{
-       struct kvm_lapic_irq irq;
-       int r;
-
-       kvm_set_msi_irq(e, &irq);
-
-       if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL))
-               return r;
-       else
-               return -EWOULDBLOCK;
-}
-
-/*
- * Deliver an IRQ in an atomic context if we can, or return a failure,
- * user can retry in a process context.
- * Return value:
- *  -EWOULDBLOCK - Can't deliver in atomic context: retry in a process context.
- *  Other values - No need to retry.
- */
-int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
-{
-       struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
-       struct kvm_kernel_irq_routing_entry *e;
-       int ret = -EINVAL;
-       int idx;
-
-       trace_kvm_set_irq(irq, level, irq_source_id);
-
-       /*
-        * Injection into either PIC or IOAPIC might need to scan all CPUs,
-        * which would need to be retried from thread context;  when same GSI
-        * is connected to both PIC and IOAPIC, we'd have to report a
-        * partial failure here.
-        * Since there's no easy way to do this, we only support injecting MSI
-        * which is limited to 1:1 GSI mapping.
-        */
-       idx = srcu_read_lock(&kvm->irq_srcu);
-       if (kvm_irq_map_gsi(kvm, entries, irq) > 0) {
-               e = &entries[0];
-               if (likely(e->type == KVM_IRQ_ROUTING_MSI))
-                       ret = kvm_set_msi_inatomic(e, kvm);
-               else
-                       ret = -EWOULDBLOCK;
-       }
-       srcu_read_unlock(&kvm->irq_srcu, idx);
-       return ret;
-}
-
-int kvm_request_irq_source_id(struct kvm *kvm)
-{
-       unsigned long *bitmap = &kvm->arch.irq_sources_bitmap;
-       int irq_source_id;
-
-       mutex_lock(&kvm->irq_lock);
-       irq_source_id = find_first_zero_bit(bitmap, BITS_PER_LONG);
-
-       if (irq_source_id >= BITS_PER_LONG) {
-               printk(KERN_WARNING "kvm: exhaust allocatable IRQ sources!\n");
-               irq_source_id = -EFAULT;
-               goto unlock;
-       }
-
-       ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
-#ifdef CONFIG_X86
-       ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID);
-#endif
-       set_bit(irq_source_id, bitmap);
-unlock:
-       mutex_unlock(&kvm->irq_lock);
-
-       return irq_source_id;
-}
-
-void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
-{
-       ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
-#ifdef CONFIG_X86
-       ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID);
-#endif
-
-       mutex_lock(&kvm->irq_lock);
-       if (irq_source_id < 0 ||
-           irq_source_id >= BITS_PER_LONG) {
-               printk(KERN_ERR "kvm: IRQ source ID out of range!\n");
-               goto unlock;
-       }
-       clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
-       if (!irqchip_in_kernel(kvm))
-               goto unlock;
-
-       kvm_ioapic_clear_all(kvm->arch.vioapic, irq_source_id);
-#ifdef CONFIG_X86
-       kvm_pic_clear_all(pic_irqchip(kvm), irq_source_id);
-#endif
-unlock:
-       mutex_unlock(&kvm->irq_lock);
-}
-
-void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
-                                   struct kvm_irq_mask_notifier *kimn)
-{
-       mutex_lock(&kvm->irq_lock);
-       kimn->irq = irq;
-       hlist_add_head_rcu(&kimn->link, &kvm->mask_notifier_list);
-       mutex_unlock(&kvm->irq_lock);
-}
-
-void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
-                                     struct kvm_irq_mask_notifier *kimn)
-{
-       mutex_lock(&kvm->irq_lock);
-       hlist_del_rcu(&kimn->link);
-       mutex_unlock(&kvm->irq_lock);
-       synchronize_srcu(&kvm->irq_srcu);
-}
-
-void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
-                            bool mask)
-{
-       struct kvm_irq_mask_notifier *kimn;
-       int idx, gsi;
-
-       idx = srcu_read_lock(&kvm->irq_srcu);
-       gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
-       if (gsi != -1)
-               hlist_for_each_entry_rcu(kimn, &kvm->mask_notifier_list, link)
-                       if (kimn->irq == gsi)
-                               kimn->func(kimn, mask);
-       srcu_read_unlock(&kvm->irq_srcu, idx);
-}
-
-int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
-                         const struct kvm_irq_routing_entry *ue)
-{
-       int r = -EINVAL;
-       int delta;
-       unsigned max_pin;
-
-       switch (ue->type) {
-       case KVM_IRQ_ROUTING_IRQCHIP:
-               delta = 0;
-               switch (ue->u.irqchip.irqchip) {
-               case KVM_IRQCHIP_PIC_MASTER:
-                       e->set = kvm_set_pic_irq;
-                       max_pin = PIC_NUM_PINS;
-                       break;
-               case KVM_IRQCHIP_PIC_SLAVE:
-                       e->set = kvm_set_pic_irq;
-                       max_pin = PIC_NUM_PINS;
-                       delta = 8;
-                       break;
-               case KVM_IRQCHIP_IOAPIC:
-                       max_pin = KVM_IOAPIC_NUM_PINS;
-                       e->set = kvm_set_ioapic_irq;
-                       break;
-               default:
-                       goto out;
-               }
-               e->irqchip.irqchip = ue->u.irqchip.irqchip;
-               e->irqchip.pin = ue->u.irqchip.pin + delta;
-               if (e->irqchip.pin >= max_pin)
-                       goto out;
-               break;
-       case KVM_IRQ_ROUTING_MSI:
-               e->set = kvm_set_msi;
-               e->msi.address_lo = ue->u.msi.address_lo;
-               e->msi.address_hi = ue->u.msi.address_hi;
-               e->msi.data = ue->u.msi.data;
-               break;
-       default:
-               goto out;
-       }
-
-       r = 0;
-out:
-       return r;
-}
-
-#define IOAPIC_ROUTING_ENTRY(irq) \
-       { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP,  \
-         .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } }
-#define ROUTING_ENTRY1(irq) IOAPIC_ROUTING_ENTRY(irq)
-
-#ifdef CONFIG_X86
-#  define PIC_ROUTING_ENTRY(irq) \
-       { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP,  \
-         .u.irqchip = { .irqchip = SELECT_PIC(irq), .pin = (irq) % 8 } }
-#  define ROUTING_ENTRY2(irq) \
-       IOAPIC_ROUTING_ENTRY(irq), PIC_ROUTING_ENTRY(irq)
-#else
-#  define ROUTING_ENTRY2(irq) \
-       IOAPIC_ROUTING_ENTRY(irq)
-#endif
-
-static const struct kvm_irq_routing_entry default_routing[] = {
-       ROUTING_ENTRY2(0), ROUTING_ENTRY2(1),
-       ROUTING_ENTRY2(2), ROUTING_ENTRY2(3),
-       ROUTING_ENTRY2(4), ROUTING_ENTRY2(5),
-       ROUTING_ENTRY2(6), ROUTING_ENTRY2(7),
-       ROUTING_ENTRY2(8), ROUTING_ENTRY2(9),
-       ROUTING_ENTRY2(10), ROUTING_ENTRY2(11),
-       ROUTING_ENTRY2(12), ROUTING_ENTRY2(13),
-       ROUTING_ENTRY2(14), ROUTING_ENTRY2(15),
-       ROUTING_ENTRY1(16), ROUTING_ENTRY1(17),
-       ROUTING_ENTRY1(18), ROUTING_ENTRY1(19),
-       ROUTING_ENTRY1(20), ROUTING_ENTRY1(21),
-       ROUTING_ENTRY1(22), ROUTING_ENTRY1(23),
-#ifdef CONFIG_IA64
-       ROUTING_ENTRY1(24), ROUTING_ENTRY1(25),
-       ROUTING_ENTRY1(26), ROUTING_ENTRY1(27),
-       ROUTING_ENTRY1(28), ROUTING_ENTRY1(29),
-       ROUTING_ENTRY1(30), ROUTING_ENTRY1(31),
-       ROUTING_ENTRY1(32), ROUTING_ENTRY1(33),
-       ROUTING_ENTRY1(34), ROUTING_ENTRY1(35),
-       ROUTING_ENTRY1(36), ROUTING_ENTRY1(37),
-       ROUTING_ENTRY1(38), ROUTING_ENTRY1(39),
-       ROUTING_ENTRY1(40), ROUTING_ENTRY1(41),
-       ROUTING_ENTRY1(42), ROUTING_ENTRY1(43),
-       ROUTING_ENTRY1(44), ROUTING_ENTRY1(45),
-       ROUTING_ENTRY1(46), ROUTING_ENTRY1(47),
-#endif
-};
-
-int kvm_setup_default_irq_routing(struct kvm *kvm)
-{
-       return kvm_set_irq_routing(kvm, default_routing,
-                                  ARRAY_SIZE(default_routing), 0);
-}
index 3cee7b1..f528343 100644 (file)
@@ -124,15 +124,6 @@ int vcpu_load(struct kvm_vcpu *vcpu)
 
        if (mutex_lock_killable(&vcpu->mutex))
                return -EINTR;
-       if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) {
-               /* The thread running this VCPU changed. */
-               struct pid *oldpid = vcpu->pid;
-               struct pid *newpid = get_task_pid(current, PIDTYPE_PID);
-               rcu_assign_pointer(vcpu->pid, newpid);
-               if (oldpid)
-                       synchronize_rcu();
-               put_pid(oldpid);
-       }
        cpu = get_cpu();
        preempt_notifier_register(&vcpu->preempt_notifier);
        kvm_arch_vcpu_load(vcpu, cpu);
@@ -468,9 +459,6 @@ static struct kvm *kvm_create_vm(unsigned long type)
        if (r)
                goto out_err_no_disable;
 
-#ifdef CONFIG_HAVE_KVM_IRQCHIP
-       INIT_HLIST_HEAD(&kvm->mask_notifier_list);
-#endif
 #ifdef CONFIG_HAVE_KVM_IRQFD
        INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
 #endif
@@ -668,48 +656,46 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
        return 0;
 }
 
-static int cmp_memslot(const void *slot1, const void *slot2)
-{
-       struct kvm_memory_slot *s1, *s2;
-
-       s1 = (struct kvm_memory_slot *)slot1;
-       s2 = (struct kvm_memory_slot *)slot2;
-
-       if (s1->npages < s2->npages)
-               return 1;
-       if (s1->npages > s2->npages)
-               return -1;
-
-       return 0;
-}
-
 /*
- * Sort the memslots base on its size, so the larger slots
- * will get better fit.
+ * Insert memslot and re-sort memslots based on their GFN,
+ * so binary search could be used to lookup GFN.
+ * Sorting algorithm takes advantage of having initially
+ * sorted array and known changed memslot position.
  */
-static void sort_memslots(struct kvm_memslots *slots)
-{
-       int i;
-
-       sort(slots->memslots, KVM_MEM_SLOTS_NUM,
-             sizeof(struct kvm_memory_slot), cmp_memslot, NULL);
-
-       for (i = 0; i < KVM_MEM_SLOTS_NUM; i++)
-               slots->id_to_index[slots->memslots[i].id] = i;
-}
-
 static void update_memslots(struct kvm_memslots *slots,
                            struct kvm_memory_slot *new)
 {
-       if (new) {
-               int id = new->id;
-               struct kvm_memory_slot *old = id_to_memslot(slots, id);
-               unsigned long npages = old->npages;
+       int id = new->id;
+       int i = slots->id_to_index[id];
+       struct kvm_memory_slot *mslots = slots->memslots;
 
-               *old = *new;
-               if (new->npages != npages)
-                       sort_memslots(slots);
+       WARN_ON(mslots[i].id != id);
+       if (!new->npages) {
+               new->base_gfn = 0;
+               if (mslots[i].npages)
+                       slots->used_slots--;
+       } else {
+               if (!mslots[i].npages)
+                       slots->used_slots++;
        }
+
+       while (i < KVM_MEM_SLOTS_NUM - 1 &&
+              new->base_gfn <= mslots[i + 1].base_gfn) {
+               if (!mslots[i + 1].npages)
+                       break;
+               mslots[i] = mslots[i + 1];
+               slots->id_to_index[mslots[i].id] = i;
+               i++;
+       }
+       while (i > 0 &&
+              new->base_gfn > mslots[i - 1].base_gfn) {
+               mslots[i] = mslots[i - 1];
+               slots->id_to_index[mslots[i].id] = i;
+               i--;
+       }
+
+       mslots[i] = *new;
+       slots->id_to_index[mslots[i].id] = i;
 }
 
 static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
@@ -727,7 +713,7 @@ static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
 }
 
 static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
-               struct kvm_memslots *slots, struct kvm_memory_slot *new)
+               struct kvm_memslots *slots)
 {
        struct kvm_memslots *old_memslots = kvm->memslots;
 
@@ -738,7 +724,6 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
        WARN_ON(old_memslots->generation & 1);
        slots->generation = old_memslots->generation + 1;
 
-       update_memslots(slots, new);
        rcu_assign_pointer(kvm->memslots, slots);
        synchronize_srcu_expedited(&kvm->srcu);
 
@@ -760,7 +745,7 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
  *
  * Discontiguous memory is allowed, mostly for framebuffers.
  *
- * Must be called holding mmap_sem for write.
+ * Must be called holding kvm->slots_lock for write.
  */
 int __kvm_set_memory_region(struct kvm *kvm,
                            struct kvm_userspace_memory_region *mem)
@@ -866,15 +851,16 @@ int __kvm_set_memory_region(struct kvm *kvm,
                        goto out_free;
        }
 
+       slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),
+                       GFP_KERNEL);
+       if (!slots)
+               goto out_free;
+
        if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) {
-               slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),
-                               GFP_KERNEL);
-               if (!slots)
-                       goto out_free;
                slot = id_to_memslot(slots, mem->slot);
                slot->flags |= KVM_MEMSLOT_INVALID;
 
-               old_memslots = install_new_memslots(kvm, slots, NULL);
+               old_memslots = install_new_memslots(kvm, slots);
 
                /* slot was deleted or moved, clear iommu mapping */
                kvm_iommu_unmap_pages(kvm, &old);
@@ -886,6 +872,12 @@ int __kvm_set_memory_region(struct kvm *kvm,
                 *      - kvm_is_visible_gfn (mmu_check_roots)
                 */
                kvm_arch_flush_shadow_memslot(kvm, slot);
+
+               /*
+                * We can re-use the old_memslots from above, the only difference
+                * from the currently installed memslots is the invalid flag.  This
+                * will get overwritten by update_memslots anyway.
+                */
                slots = old_memslots;
        }
 
@@ -893,26 +885,14 @@ int __kvm_set_memory_region(struct kvm *kvm,
        if (r)
                goto out_slots;
 
-       r = -ENOMEM;
-       /*
-        * We can re-use the old_memslots from above, the only difference
-        * from the currently installed memslots is the invalid flag.  This
-        * will get overwritten by update_memslots anyway.
-        */
-       if (!slots) {
-               slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),
-                               GFP_KERNEL);
-               if (!slots)
-                       goto out_free;
-       }
-
        /* actual memory is freed via old in kvm_free_physmem_slot below */
        if (change == KVM_MR_DELETE) {
                new.dirty_bitmap = NULL;
                memset(&new.arch, 0, sizeof(new.arch));
        }
 
-       old_memslots = install_new_memslots(kvm, slots, &new);
+       update_memslots(slots, &new);
+       old_memslots = install_new_memslots(kvm, slots);
 
        kvm_arch_commit_memory_region(kvm, mem, &old, change);
 
@@ -1799,10 +1779,6 @@ int kvm_vcpu_yield_to(struct kvm_vcpu *target)
        rcu_read_unlock();
        if (!task)
                return ret;
-       if (task->flags & PF_VCPU) {
-               put_task_struct(task);
-               return ret;
-       }
        ret = yield_to(task, 1);
        put_task_struct(task);
 
@@ -2065,6 +2041,15 @@ static long kvm_vcpu_ioctl(struct file *filp,
                r = -EINVAL;
                if (arg)
                        goto out;
+               if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) {
+                       /* The thread running this VCPU changed. */
+                       struct pid *oldpid = vcpu->pid;
+                       struct pid *newpid = get_task_pid(current, PIDTYPE_PID);
+                       rcu_assign_pointer(vcpu->pid, newpid);
+                       if (oldpid)
+                               synchronize_rcu();
+                       put_pid(oldpid);
+               }
                r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
                trace_kvm_userspace_exit(vcpu->run->exit_reason, r);
                break;
@@ -2599,8 +2584,6 @@ static long kvm_vm_ioctl(struct file *filp,
                break;
        default:
                r = kvm_arch_vm_ioctl(filp, ioctl, arg);
-               if (r == -ENOTTY)
-                       r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg);
        }
 out:
        return r;