Merge branch 'kvm-updates/2.6.32' of git://git.kernel.org/pub/scm/virt/kvm/kvm

author Linus Torvalds <torvalds@linux-foundation.org>

Tue, 15 Sep 2009 00:43:43 +0000 (17:43 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Tue, 15 Sep 2009 00:43:43 +0000 (17:43 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Tue, 15 Sep 2009 00:43:43 +0000 (17:43 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Tue, 15 Sep 2009 00:43:43 +0000 (17:43 -0700)
diff --combined Documentation/ioctl/ioctl-number.txt

index 1c058b5,999a201..aafca0a
--- 1/Documentation/ioctl/ioctl-number.txt
--- 2/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@@ -121,7 -121,6 +121,7 @@@ Code       Seq#    Include File            Comment
   'c'   00-7F   linux/comstats.h        conflict!
   'c'   00-7F   linux/coda.h            conflict!
   'c'   80-9F   arch/s390/include/asm/chsc.h
+ +'c'   A0-AF   arch/x86/include/asm/msr.h
   'd'   00-FF   linux/char/drm/drm/h    conflict!
   'd'   F0-FF   linux/digi1.h
   'e'   all     linux/digi1.h           conflict!
@@@ -193,7 -192,7 +193,7 @@@
   0xAD  00      Netfilter device        in development:
                                         <mailto:rusty@rustcorp.com.au>  
   0xAE  all     linux/kvm.h             Kernel-based Virtual Machine
-                                       <mailto:kvm-devel@lists.sourceforge.net>
+                                       <mailto:kvm@vger.kernel.org>
   0xB0  all     RATIO devices           in development:
                                         <mailto:vgo@ratio.de>
   0xB1  00-1F   PPPoX                   <mailto:mostrows@styx.uwaterloo.ca>
diff --combined Documentation/kernel-parameters.txt

index cb3a169,877a02a..3a23864
--- 1/Documentation/kernel-parameters.txt
--- 2/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@@ -57,6 -57,7 +57,7 @@@ parameter is applicable
         ISAPNP  ISA PnP code is enabled.
         ISDN    Appropriate ISDN support is enabled.
         JOY     Appropriate joystick support is enabled.
+       KVM     Kernel Virtual Machine support is enabled.
         LIBATA  Libata driver is enabled
         LP      Printer support is enabled.
         LOOP    Loopback device support is enabled.
@@@ -1098,6 -1099,44 +1099,44 @@@ and is between 256 and 4096 characters
         kstack=N        [X86] Print N words from the kernel stack
                         in oops dumps.
   
+       kvm.ignore_msrs=[KVM] Ignore guest accesses to unhandled MSRs.
+                       Default is 0 (don't ignore, but inject #GP)
+ 
+       kvm.oos_shadow= [KVM] Disable out-of-sync shadow paging.
+                       Default is 1 (enabled)
+ 
+       kvm-amd.nested= [KVM,AMD] Allow nested virtualization in KVM/SVM.
+                       Default is 0 (off)
+ 
+       kvm-amd.npt=    [KVM,AMD] Disable nested paging (virtualized MMU)
+                       for all guests.
+                       Default is 1 (enabled) if in 64bit or 32bit-PAE mode
+ 
+       kvm-intel.bypass_guest_pf=
+                       [KVM,Intel] Disables bypassing of guest page faults
+                       on Intel chips. Default is 1 (enabled)
+ 
+       kvm-intel.ept=  [KVM,Intel] Disable extended page tables
+                       (virtualized MMU) support on capable Intel chips.
+                       Default is 1 (enabled)
+ 
+       kvm-intel.emulate_invalid_guest_state=
+                       [KVM,Intel] Enable emulation of invalid guest states
+                       Default is 0 (disabled)
+ 
+       kvm-intel.flexpriority=
+                       [KVM,Intel] Disable FlexPriority feature (TPR shadow).
+                       Default is 1 (enabled)
+ 
+       kvm-intel.unrestricted_guest=
+                       [KVM,Intel] Disable unrestricted guest feature
+                       (virtualized real and unpaged mode) on capable
+                       Intel chips. Default is 1 (enabled)
+ 
+       kvm-intel.vpid= [KVM,Intel] Disable Virtual Processor Identification
+                       feature (tagged TLBs) on capable Intel chips.
+                       Default is 1 (enabled)
+ 
         l2cr=           [PPC]
   
         l3cr=           [PPC]
@@@ -1503,14 -1542,6 +1542,14 @@@
                         [NFS] set the TCP port on which the NFSv4 callback
                         channel should listen.
   
+ +      nfs.cache_getent=
+ +                      [NFS] sets the pathname to the program which is used
+ +                      to update the NFS client cache entries.
+ +
+ +      nfs.cache_getent_timeout=
+ +                      [NFS] sets the timeout after which an attempt to
+ +                      update a cache entry is deemed to have failed.
+ +
         nfs.idmap_cache_timeout=
                         [NFS] set the maximum lifetime for idmapper cache
                         entries.
@@@ -1543,11 -1574,6 +1582,11 @@@
                         symbolic names: lapic and ioapic
                         Example: nmi_watchdog=2 or nmi_watchdog=panic,lapic
   
+ +      netpoll.carrier_timeout=
+ +                      [NET] Specifies amount of time (in seconds) that
+ +                      netpoll should wait for a carrier. By default netpoll
+ +                      waits 4 seconds.
+ +
         no387           [BUGS=X86-32] Tells the kernel to use the 387 maths
                         emulation library even if a 387 maths coprocessor
                         is present.
@@@ -2408,18 -2434,6 +2447,18 @@@
         stifb=          [HW]
                         Format: bpp:<bpp1>[:<bpp2>[:<bpp3>...]]
   
+ +      sunrpc.min_resvport=
+ +      sunrpc.max_resvport=
+ +                      [NFS,SUNRPC]
+ +                      SunRPC servers often require that client requests
+ +                      originate from a privileged port (i.e. a port in the
+ +                      range 0 < portnr < 1024).
+ +                      An administrator who wishes to reserve some of these
+ +                      ports for other uses may adjust the range that the
+ +                      kernel's sunrpc client considers to be privileged
+ +                      using these two parameters to set the minimum and
+ +                      maximum port values.
+ +
         sunrpc.pool_mode=
                         [NFS]
                         Control how the NFS server code allocates CPUs to
@@@ -2436,15 -2450,6 +2475,15 @@@
                         pernode     one pool for each NUMA node (equivalent
                                     to global on non-NUMA machines)
   
+ +      sunrpc.tcp_slot_table_entries=
+ +      sunrpc.udp_slot_table_entries=
+ +                      [NFS,SUNRPC]
+ +                      Sets the upper limit on the number of simultaneous
+ +                      RPC calls that can be sent from the client to a
+ +                      server. Increasing these values may allow you to
+ +                      improve throughput, but will also increase the
+ +                      amount of memory reserved for use by the client.
+ +
         swiotlb=        [IA-64] Number of I/O TLB slabs
   
         switches=       [HW,M68k]
@@@ -2514,11 -2519,6 +2553,11 @@@
         trace_buf_size=nn[KMG]
                         [FTRACE] will set tracing buffer size.
   
+ +      trace_event=[event-list]
+ +                      [FTRACE] Set and start specified trace events in order
+ +                      to facilitate early boot debugging.
+ +                      See also Documentation/trace/events.txt
+ +
         trix=           [HW,OSS] MediaTrix AudioTrix Pro
                         Format:
                         <io>,<irq>,<dma>,<dma2>,<sb_io>,<sb_irq>,<sb_dma>,<mpu_io>,<mpu_irq>
diff --combined MAINTAINERS

index e95cb77,24f2694..1516936
--- 1/MAINTAINERS
--- 2/MAINTAINERS
+++ b/MAINTAINERS
@@@ -439,7 -439,7 +439,7 @@@ F: drivers/hwmon/ams
   AMSO1100 RNIC DRIVER
   M:    Tom Tucker <tom@opengridcomputing.com>
   M:    Steve Wise <swise@opengridcomputing.com>
- -L:    general@lists.openfabrics.org
+ +L:    linux-rdma@vger.kernel.org
   S:    Maintained
   F:    drivers/infiniband/hw/amso1100/
   
@@@ -876,7 -876,6 +876,7 @@@ M: "Luis R. Rodriguez" <lrodriguez@athe
   M:    Bob Copeland <me@bobcopeland.com>
   L:    linux-wireless@vger.kernel.org
   L:    ath5k-devel@lists.ath5k.org
+ +W:    http://wireless.kernel.org/en/users/Drivers/ath5k
   S:    Maintained
   F:    drivers/net/wireless/ath/ath5k/
   
@@@ -888,7 -887,6 +888,7 @@@ M: Vasanthakumar Thiagarajan <vasanth@a
   M:    Senthil Balasubramanian <senthilkumar@atheros.com>
   L:    linux-wireless@vger.kernel.org
   L:    ath9k-devel@lists.ath9k.org
+ +W:    http://wireless.kernel.org/en/users/Drivers/ath9k
   S:    Supported
   F:    drivers/net/wireless/ath/ath9k/
   
@@@ -1496,7 -1494,7 +1496,7 @@@ F:      drivers/net/cxgb3
   
   CXGB3 IWARP RNIC DRIVER (IW_CXGB3)
   M:    Steve Wise <swise@chelsio.com>
- -L:    general@lists.openfabrics.org
+ +L:    linux-rdma@vger.kernel.org
   W:    http://www.openfabrics.org
   S:    Supported
   F:    drivers/infiniband/hw/cxgb3/
@@@ -1870,7 -1868,7 +1870,7 @@@ F:      fs/efs
   EHCA (IBM GX bus InfiniBand adapter) DRIVER
   M:    Hoang-Nam Nguyen <hnguyen@de.ibm.com>
   M:    Christoph Raisch <raisch@de.ibm.com>
- -L:    general@lists.openfabrics.org
+ +L:    linux-rdma@vger.kernel.org
   S:    Supported
   F:    drivers/infiniband/hw/ehca/
   
@@@ -2554,7 -2552,7 +2554,7 @@@ INFINIBAND SUBSYSTE
   M:    Roland Dreier <rolandd@cisco.com>
   M:    Sean Hefty <sean.hefty@intel.com>
   M:    Hal Rosenstock <hal.rosenstock@gmail.com>
- -L:    general@lists.openfabrics.org (moderated for non-subscribers)
+ +L:    linux-rdma@vger.kernel.org
   W:    http://www.openib.org/
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband.git
   S:    Supported
@@@ -2662,21 -2660,25 +2662,21 @@@ F:   drivers/net/ixgbe
   
   INTEL PRO/WIRELESS 2100 NETWORK CONNECTION SUPPORT
   M:    Zhu Yi <yi.zhu@intel.com>
- -M:    James Ketrenos <jketreno@linux.intel.com>
   M:    Reinette Chatre <reinette.chatre@intel.com>
+ +M:    Intel Linux Wireless <ilw@linux.intel.com>
   L:    linux-wireless@vger.kernel.org
- -L:    ipw2100-devel@lists.sourceforge.net
- -W:    http://lists.sourceforge.net/mailman/listinfo/ipw2100-devel
   W:    http://ipw2100.sourceforge.net
- -S:    Supported
+ +S:    Odd Fixes
   F:    Documentation/networking/README.ipw2100
   F:    drivers/net/wireless/ipw2x00/ipw2100.*
   
   INTEL PRO/WIRELESS 2915ABG NETWORK CONNECTION SUPPORT
   M:    Zhu Yi <yi.zhu@intel.com>
- -M:    James Ketrenos <jketreno@linux.intel.com>
   M:    Reinette Chatre <reinette.chatre@intel.com>
+ +M:    Intel Linux Wireless <ilw@linux.intel.com>
   L:    linux-wireless@vger.kernel.org
- -L:    ipw2100-devel@lists.sourceforge.net
- -W:    http://lists.sourceforge.net/mailman/listinfo/ipw2100-devel
   W:    http://ipw2200.sourceforge.net
- -S:    Supported
+ +S:    Odd Fixes
   F:    Documentation/networking/README.ipw2200
   F:    drivers/net/wireless/ipw2x00/ipw2200.*
   
@@@ -2693,8 -2695,8 +2693,8 @@@ F:      include/linux/wimax/i2400m.
   INTEL WIRELESS WIFI LINK (iwlwifi)
   M:    Zhu Yi <yi.zhu@intel.com>
   M:    Reinette Chatre <reinette.chatre@intel.com>
+ +M:    Intel Linux Wireless <ilw@linux.intel.com>
   L:    linux-wireless@vger.kernel.org
- -L:    ipw3945-devel@lists.sourceforge.net
   W:    http://intellinuxwireless.org
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/iwlwifi/iwlwifi-2.6.git
   S:    Supported
@@@ -2727,7 -2729,7 +2727,7 @@@ F:      drivers/net/ipg.
   
   IPATH DRIVER
   M:    Ralph Campbell <infinipath@qlogic.com>
- -L:    general@lists.openfabrics.org
+ +L:    linux-rdma@vger.kernel.org
   T:    git git://git.qlogic.com/ipath-linux-2.6
   S:    Supported
   F:    drivers/infiniband/hw/ipath/
@@@ -2926,6 -2928,7 +2926,7 @@@ F:      include/linux/sunrpc
   
   KERNEL VIRTUAL MACHINE (KVM)
   M:    Avi Kivity <avi@redhat.com>
+ M:    Marcelo Tosatti <mtosatti@redhat.com>
   L:    kvm@vger.kernel.org
   W:    http://kvm.qumranet.com
   S:    Supported
@@@ -3277,12 -3280,6 +3278,12 @@@ S:    Supporte
   F:    drivers/net/mv643xx_eth.*
   F:    include/linux/mv643xx.h
   
+ +MARVELL MWL8K WIRELESS DRIVER
+ +M:    Lennert Buytenhek <buytenh@marvell.com>
+ +L:    linux-wireless@vger.kernel.org
+ +S:    Supported
+ +F:    drivers/net/wireless/mwl8k.c
+ +
   MARVELL SOC MMC/SD/SDIO CONTROLLER DRIVER
   M:    Nicolas Pitre <nico@cam.org>
   S:    Maintained
@@@ -3489,7 -3486,7 +3490,7 @@@ F:      drivers/scsi/NCR_D700.
   NETEFFECT IWARP RNIC DRIVER (IW_NES)
   M:    Faisal Latif <faisal.latif@intel.com>
   M:    Chien Tung <chien.tin.tung@intel.com>
- -L:    general@lists.openfabrics.org
+ +L:    linux-rdma@vger.kernel.org
   W:    http://www.neteffect.com
   S:    Supported
   F:    drivers/infiniband/hw/nes/
@@@ -3595,12 -3592,9 +3596,12 @@@ M:    "John W. Linville" <linville@tuxdriv
   L:    linux-wireless@vger.kernel.org
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-2.6.git
   S:    Maintained
+ +F:    net/mac80211/
+ +F:    net/rfkill/
   F:    net/wireless/
   F:    include/net/ieee80211*
   F:    include/linux/wireless.h
+ +F:    drivers/net/wireless/
   
   NETWORKING DRIVERS
   L:    netdev@vger.kernel.org
@@@ -4306,7 -4300,7 +4307,7 @@@ L:      linux-wireless@vger.kernel.or
   W:    http://linuxwireless.org/
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-testing.git
   S:    Maintained
- -F:    drivers/net/wireless/rtl818*
+ +F:    drivers/net/wireless/rtl818x/rtl8180*
   
   RTL8187 WIRELESS DRIVER
   M:    Herton Ronaldo Krzesinski <herton@mandriva.com.br>
@@@ -4533,10 -4527,9 +4534,10 @@@ S:    Supporte
   F:    drivers/net/benet/
   
   SFC NETWORK DRIVER
- -P:    Steve Hodgson
- -P:    Ben Hutchings
- -M:    Robert Stonehouse <linux-net-drivers@solarflare.com>
+ +M:    Solarflare linux maintainers <linux-net-drivers@solarflare.com>
+ +M:    Steve Hodgson <shodgson@solarflare.com>
+ +M:    Ben Hutchings <bhutchings@solarflare.com>
+ +L:    netdev@vger.kernel.org
   S:    Supported
   F:    drivers/net/sfc/
   
@@@ -5586,24 -5579,6 +5587,24 @@@ M:    Miloslav Trmac <mitr@volny.cz
   S:    Maintained
   F:    drivers/input/misc/wistron_btns.c
   
+ +WL1251 WIRELESS DRIVER
+ +P:    Kalle Valo
+ +M:    kalle.valo@nokia.com
+ +L:    linux-wireless@vger.kernel.org
+ +W:    http://wireless.kernel.org
+ +T:    git git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-testing.git
+ +S:    Maintained
+ +F:    drivers/net/wireless/wl12xx/*
+ +X:    drivers/net/wireless/wl12xx/wl1271*
+ +
+ +WL1271 WIRELESS DRIVER
+ +M:    Luciano Coelho <luciano.coelho@nokia.com>
+ +L:    linux-wireless@vger.kernel.org
+ +W:    http://wireless.kernel.org
+ +T:    git git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-testing.git
+ +S:    Maintained
+ +F:    drivers/net/wireless/wl12xx/wl1271*
+ +
   WL3501 WIRELESS PCMCIA CARD DRIVER
   M:    Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
   L:    linux-wireless@vger.kernel.org
diff --combined arch/s390/include/asm/kvm_host.h

index 698988f,78e07a6..27605b6
--- 1/arch/s390/include/asm/kvm_host.h
--- 2/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@@ -1,7 -1,7 +1,7 @@@
   /*
    * asm-s390/kvm_host.h - definition for kernel virtual machines on s390
    *
-  * Copyright IBM Corp. 2008
+  * Copyright IBM Corp. 2008,2009
    *
    * This program is free software; you can redistribute it and/or modify
    * it under the terms of the GNU General Public License (version 2 only)
@@@ -17,7 -17,7 +17,7 @@@
   #include <linux/interrupt.h>
   #include <linux/kvm_host.h>
   #include <asm/debug.h>
- -#include <asm/cpuid.h>
+ +#include <asm/cpu.h>
   
   #define KVM_MAX_VCPUS 64
   #define KVM_MEMORY_SLOTS 32
@@@ -40,7 -40,11 +40,11 @@@ struct sca_block 
         struct sca_entry cpu[64];
   } __attribute__((packed));
   
- #define KVM_PAGES_PER_HPAGE 256
+ #define KVM_NR_PAGE_SIZES 2
+ #define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + ((x) - 1) * 8)
+ #define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x))
+ #define KVM_HPAGE_MASK(x)     (~(KVM_HPAGE_SIZE(x) - 1))
+ #define KVM_PAGES_PER_HPAGE(x)        (KVM_HPAGE_SIZE(x) / PAGE_SIZE)
   
   #define CPUSTAT_HOST       0x80000000
   #define CPUSTAT_WAIT       0x10000000
@@@ -182,8 -186,9 +186,9 @@@ struct kvm_s390_interrupt_info 
   };
   
   /* for local_interrupt.action_flags */
- #define ACTION_STORE_ON_STOP 1
- #define ACTION_STOP_ON_STOP  2
+ #define ACTION_STORE_ON_STOP          (1<<0)
+ #define ACTION_STOP_ON_STOP           (1<<1)
+ #define ACTION_RELOADVCPU_ON_STOP     (1<<2)
   
   struct kvm_s390_local_interrupt {
         spinlock_t lock;
@@@ -217,8 -222,8 +222,8 @@@ struct kvm_vcpu_arch 
         struct hrtimer    ckc_timer;
         struct tasklet_struct tasklet;
         union  {
- -              cpuid_t   cpu_id;
- -              u64       stidp_data;
+ +              struct cpuid    cpu_id;
+ +              u64             stidp_data;
         };
   };
   
@@@ -227,8 -232,6 +232,6 @@@ struct kvm_vm_stat 
   };
   
   struct kvm_arch{
-       unsigned long guest_origin;
-       unsigned long guest_memsize;
         struct sca_block *sca;
         debug_info_t *dbf;
         struct kvm_s390_float_interrupt float_int;
diff --combined arch/x86/include/asm/apicdef.h

index 7386bfa,74ca38f..3b62da9
--- 1/arch/x86/include/asm/apicdef.h
--- 2/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@@ -8,13 -8,13 +8,14 @@@
    * Ingo Molnar <mingo@redhat.com>, 1999, 2000
    */
   
- -#define       APIC_DEFAULT_PHYS_BASE  0xfee00000
+ +#define IO_APIC_DEFAULT_PHYS_BASE     0xfec00000
+ +#define       APIC_DEFAULT_PHYS_BASE          0xfee00000
   
   #define       APIC_ID         0x20
   
   #define       APIC_LVR        0x30
   #define               APIC_LVR_MASK           0xFF00FF
+ #define               APIC_LVR_DIRECTED_EOI   (1 << 24)
   #define               GET_APIC_VERSION(x)     ((x) & 0xFFu)
   #define               GET_APIC_MAXLVT(x)      (((x) >> 16) & 0xFFu)
   #ifdef CONFIG_X86_32
@@@ -41,6 -41,7 +42,7 @@@
   #define               APIC_DFR_CLUSTER                0x0FFFFFFFul
   #define               APIC_DFR_FLAT                   0xFFFFFFFFul
   #define       APIC_SPIV       0xF0
+ #define               APIC_SPIV_DIRECTED_EOI          (1 << 12)
   #define               APIC_SPIV_FOCUS_DISABLED        (1 << 9)
   #define               APIC_SPIV_APIC_ENABLED          (1 << 8)
   #define       APIC_ISR        0x100
diff --combined arch/x86/kvm/x86.c

index 633ccc7,4137cc5..be451ee
--- 1/arch/x86/kvm/x86.c
--- 2/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@@ -37,11 -37,16 +37,16 @@@
   #include <linux/iommu.h>
   #include <linux/intel-iommu.h>
   #include <linux/cpufreq.h>
+ #include <trace/events/kvm.h>
+ #undef TRACE_INCLUDE_FILE
+ #define CREATE_TRACE_POINTS
+ #include "trace.h"
   
   #include <asm/uaccess.h>
   #include <asm/msr.h>
   #include <asm/desc.h>
   #include <asm/mtrr.h>
+ #include <asm/mce.h>
   
   #define MAX_IO_MSRS 256
   #define CR0_RESERVED_BITS                                             \
@@@ -55,6 -60,10 +60,10 @@@
                           | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
   
   #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
+ 
+ #define KVM_MAX_MCE_BANKS 32
+ #define KVM_MCE_CAP_SUPPORTED MCG_CTL_P
+ 
   /* EFER defaults:
    * - enable syscall per default because its emulated by KVM
    * - enable LME and LMA per default on 64 bit KVM
@@@ -68,14 -77,16 +77,16 @@@ static u64 __read_mostly efer_reserved_
   #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
   #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
   
+ static void update_cr8_intercept(struct kvm_vcpu *vcpu);
   static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
                                     struct kvm_cpuid_entry2 __user *entries);
- struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
-                                             u32 function, u32 index);
   
   struct kvm_x86_ops *kvm_x86_ops;
   EXPORT_SYMBOL_GPL(kvm_x86_ops);
   
+ int ignore_msrs = 0;
+ module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR);
+ 
   struct kvm_stats_debugfs_item debugfs_entries[] = {
         { "pf_fixed", VCPU_STAT(pf_fixed) },
         { "pf_guest", VCPU_STAT(pf_guest) },
@@@ -122,18 -133,16 +133,16 @@@ unsigned long segment_base(u16 selector
         if (selector == 0)
                 return 0;
   
-       asm("sgdt %0" : "=m"(gdt));
+       kvm_get_gdt(&gdt);
         table_base = gdt.base;
   
         if (selector & 4) {           /* from ldt */
-               u16 ldt_selector;
+               u16 ldt_selector = kvm_read_ldt();
   
-               asm("sldt %0" : "=g"(ldt_selector));
                 table_base = segment_base(ldt_selector);
         }
         d = (struct desc_struct *)(table_base + (selector & ~7));
-       v = d->base0 | ((unsigned long)d->base1 << 16) |
-               ((unsigned long)d->base2 << 24);
+       v = get_desc_base(d);
   #ifdef CONFIG_X86_64
         if (d->s == 0 && (d->type == 2 || d->type == 9 || d->type == 11))
                 v |= ((unsigned long)((struct ldttss_desc64 *)d)->base3) << 32;
@@@ -176,16 -185,22 +185,22 @@@ void kvm_inject_page_fault(struct kvm_v
         ++vcpu->stat.pf_guest;
   
         if (vcpu->arch.exception.pending) {
-               if (vcpu->arch.exception.nr == PF_VECTOR) {
-                       printk(KERN_DEBUG "kvm: inject_page_fault:"
-                                       " double fault 0x%lx\n", addr);
-                       vcpu->arch.exception.nr = DF_VECTOR;
-                       vcpu->arch.exception.error_code = 0;
-               } else if (vcpu->arch.exception.nr == DF_VECTOR) {
+               switch(vcpu->arch.exception.nr) {
+               case DF_VECTOR:
                         /* triple fault -> shutdown */
                         set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
+                       return;
+               case PF_VECTOR:
+                       vcpu->arch.exception.nr = DF_VECTOR;
+                       vcpu->arch.exception.error_code = 0;
+                       return;
+               default:
+                       /* replace previous exception with a new one in a hope
+                          that instruction re-execution will regenerate lost
+                          exception */
+                       vcpu->arch.exception.pending = false;
+                       break;
                 }
-               return;
         }
         vcpu->arch.cr2 = addr;
         kvm_queue_exception_e(vcpu, PF_VECTOR, error_code);
@@@ -207,12 -222,18 +222,18 @@@ void kvm_queue_exception_e(struct kvm_v
   }
   EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
   
- static void __queue_exception(struct kvm_vcpu *vcpu)
+ /*
+  * Checks if cpl <= required_cpl; if true, return true.  Otherwise queue
+  * a #GP and return false.
+  */
+ bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl)
   {
-       kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr,
-                                    vcpu->arch.exception.has_error_code,
-                                    vcpu->arch.exception.error_code);
+       if (kvm_x86_ops->get_cpl(vcpu) <= required_cpl)
+               return true;
+       kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
+       return false;
   }
+ EXPORT_SYMBOL_GPL(kvm_require_cpl);
   
   /*
    * Load the pae pdptrs.  Return true is they are all valid.
@@@ -232,7 -253,7 +253,7 @@@ int load_pdptrs(struct kvm_vcpu *vcpu, 
                 goto out;
         }
         for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
-               if (is_present_pte(pdpte[i]) &&
+               if (is_present_gpte(pdpte[i]) &&
                     (pdpte[i] & vcpu->arch.mmu.rsvd_bits_mask[0][2])) {
                         ret = 0;
                         goto out;
@@@ -241,6 -262,10 +262,10 @@@
         ret = 1;
   
         memcpy(vcpu->arch.pdptrs, pdpte, sizeof(vcpu->arch.pdptrs));
+       __set_bit(VCPU_EXREG_PDPTR,
+                 (unsigned long *)&vcpu->arch.regs_avail);
+       __set_bit(VCPU_EXREG_PDPTR,
+                 (unsigned long *)&vcpu->arch.regs_dirty);
   out:
   
         return ret;
@@@ -256,6 -281,10 +281,10 @@@ static bool pdptrs_changed(struct kvm_v
         if (is_long_mode(vcpu) || !is_pae(vcpu))
                 return false;
   
+       if (!test_bit(VCPU_EXREG_PDPTR,
+                     (unsigned long *)&vcpu->arch.regs_avail))
+               return true;
+ 
         r = kvm_read_guest(vcpu->kvm, vcpu->arch.cr3 & ~31u, pdpte, sizeof(pdpte));
         if (r < 0)
                 goto out;
@@@ -328,9 -357,6 +357,6 @@@ EXPORT_SYMBOL_GPL(kvm_set_cr0)
   void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
   {
         kvm_set_cr0(vcpu, (vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f));
-       KVMTRACE_1D(LMSW, vcpu,
-                   (u32)((vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f)),
-                   handler);
   }
   EXPORT_SYMBOL_GPL(kvm_lmsw);
   
@@@ -466,7 -492,7 +492,7 @@@ static u32 msrs_to_save[] = 
   #ifdef CONFIG_X86_64
         MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
   #endif
-       MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
+       MSR_IA32_TSC, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
         MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
   };
   
@@@ -644,8 -670,7 +670,7 @@@ static void kvm_write_guest_time(struc
   
         /* Keep irq disabled to prevent changes to the clock */
         local_irq_save(flags);
-       kvm_get_msr(v, MSR_IA32_TIME_STAMP_COUNTER,
-                         &vcpu->hv_clock.tsc_timestamp);
+       kvm_get_msr(v, MSR_IA32_TSC, &vcpu->hv_clock.tsc_timestamp);
         ktime_get_ts(&ts);
         local_irq_restore(flags);
   
@@@ -778,23 -803,60 +803,60 @@@ static int set_msr_mtrr(struct kvm_vcp
         return 0;
   }
   
+ static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+ {
+       u64 mcg_cap = vcpu->arch.mcg_cap;
+       unsigned bank_num = mcg_cap & 0xff;
+ 
+       switch (msr) {
+       case MSR_IA32_MCG_STATUS:
+               vcpu->arch.mcg_status = data;
+               break;
+       case MSR_IA32_MCG_CTL:
+               if (!(mcg_cap & MCG_CTL_P))
+                       return 1;
+               if (data != 0 && data != ~(u64)0)
+                       return -1;
+               vcpu->arch.mcg_ctl = data;
+               break;
+       default:
+               if (msr >= MSR_IA32_MC0_CTL &&
+                   msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
+                       u32 offset = msr - MSR_IA32_MC0_CTL;
+                       /* only 0 or all 1s can be written to IA32_MCi_CTL */
+                       if ((offset & 0x3) == 0 &&
+                           data != 0 && data != ~(u64)0)
+                               return -1;
+                       vcpu->arch.mce_banks[offset] = data;
+                       break;
+               }
+               return 1;
+       }
+       return 0;
+ }
+ 
   int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
   {
         switch (msr) {
         case MSR_EFER:
                 set_efer(vcpu, data);
                 break;
-       case MSR_IA32_MC0_STATUS:
-               pr_unimpl(vcpu, "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n",
-                      __func__, data);
+       case MSR_K7_HWCR:
+               data &= ~(u64)0x40;     /* ignore flush filter disable */
+               if (data != 0) {
+                       pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
+                               data);
+                       return 1;
+               }
                 break;
-       case MSR_IA32_MCG_STATUS:
-               pr_unimpl(vcpu, "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n",
-                       __func__, data);
+       case MSR_FAM10H_MMIO_CONF_BASE:
+               if (data != 0) {
+                       pr_unimpl(vcpu, "unimplemented MMIO_CONF_BASE wrmsr: "
+                               "0x%llx\n", data);
+                       return 1;
+               }
                 break;
-       case MSR_IA32_MCG_CTL:
-               pr_unimpl(vcpu, "%s: MSR_IA32_MCG_CTL 0x%llx, nop\n",
-                       __func__, data);
+       case MSR_AMD64_NB_CFG:
                 break;
         case MSR_IA32_DEBUGCTLMSR:
                 if (!data) {
@@@ -811,12 -873,15 +873,15 @@@
         case MSR_IA32_UCODE_REV:
         case MSR_IA32_UCODE_WRITE:
         case MSR_VM_HSAVE_PA:
+       case MSR_AMD64_PATCH_LOADER:
                 break;
         case 0x200 ... 0x2ff:
                 return set_msr_mtrr(vcpu, msr, data);
         case MSR_IA32_APICBASE:
                 kvm_set_apic_base(vcpu, data);
                 break;
+       case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
+               return kvm_x2apic_msr_write(vcpu, msr, data);
         case MSR_IA32_MISC_ENABLE:
                 vcpu->arch.ia32_misc_enable_msr = data;
                 break;
@@@ -850,9 -915,50 +915,50 @@@
                 kvm_request_guest_time_update(vcpu);
                 break;
         }
+       case MSR_IA32_MCG_CTL:
+       case MSR_IA32_MCG_STATUS:
+       case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
+               return set_msr_mce(vcpu, msr, data);
+ 
+       /* Performance counters are not protected by a CPUID bit,
+        * so we should check all of them in the generic path for the sake of
+        * cross vendor migration.
+        * Writing a zero into the event select MSRs disables them,
+        * which we perfectly emulate ;-). Any other value should be at least
+        * reported, some guests depend on them.
+        */
+       case MSR_P6_EVNTSEL0:
+       case MSR_P6_EVNTSEL1:
+       case MSR_K7_EVNTSEL0:
+       case MSR_K7_EVNTSEL1:
+       case MSR_K7_EVNTSEL2:
+       case MSR_K7_EVNTSEL3:
+               if (data != 0)
+                       pr_unimpl(vcpu, "unimplemented perfctr wrmsr: "
+                               "0x%x data 0x%llx\n", msr, data);
+               break;
+       /* at least RHEL 4 unconditionally writes to the perfctr registers,
+        * so we ignore writes to make it happy.
+        */
+       case MSR_P6_PERFCTR0:
+       case MSR_P6_PERFCTR1:
+       case MSR_K7_PERFCTR0:
+       case MSR_K7_PERFCTR1:
+       case MSR_K7_PERFCTR2:
+       case MSR_K7_PERFCTR3:
+               pr_unimpl(vcpu, "unimplemented perfctr wrmsr: "
+                       "0x%x data 0x%llx\n", msr, data);
+               break;
         default:
-               pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", msr, data);
-               return 1;
+               if (!ignore_msrs) {
+                       pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n",
+                               msr, data);
+                       return 1;
+               } else {
+                       pr_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n",
+                               msr, data);
+                       break;
+               }
         }
         return 0;
   }
@@@ -905,26 -1011,47 +1011,47 @@@ static int get_msr_mtrr(struct kvm_vcp
         return 0;
   }
   
- int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
+ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
   {
         u64 data;
+       u64 mcg_cap = vcpu->arch.mcg_cap;
+       unsigned bank_num = mcg_cap & 0xff;
   
         switch (msr) {
-       case 0xc0010010: /* SYSCFG */
-       case 0xc0010015: /* HWCR */
-       case MSR_IA32_PLATFORM_ID:
         case MSR_IA32_P5_MC_ADDR:
         case MSR_IA32_P5_MC_TYPE:
-       case MSR_IA32_MC0_CTL:
-       case MSR_IA32_MCG_STATUS:
+               data = 0;
+               break;
         case MSR_IA32_MCG_CAP:
+               data = vcpu->arch.mcg_cap;
+               break;
         case MSR_IA32_MCG_CTL:
-       case MSR_IA32_MC0_MISC:
-       case MSR_IA32_MC0_MISC+4:
-       case MSR_IA32_MC0_MISC+8:
-       case MSR_IA32_MC0_MISC+12:
-       case MSR_IA32_MC0_MISC+16:
-       case MSR_IA32_MC0_MISC+20:
+               if (!(mcg_cap & MCG_CTL_P))
+                       return 1;
+               data = vcpu->arch.mcg_ctl;
+               break;
+       case MSR_IA32_MCG_STATUS:
+               data = vcpu->arch.mcg_status;
+               break;
+       default:
+               if (msr >= MSR_IA32_MC0_CTL &&
+                   msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
+                       u32 offset = msr - MSR_IA32_MC0_CTL;
+                       data = vcpu->arch.mce_banks[offset];
+                       break;
+               }
+               return 1;
+       }
+       *pdata = data;
+       return 0;
+ }
+ 
+ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
+ {
+       u64 data;
+ 
+       switch (msr) {
+       case MSR_IA32_PLATFORM_ID:
         case MSR_IA32_UCODE_REV:
         case MSR_IA32_EBL_CR_POWERON:
         case MSR_IA32_DEBUGCTLMSR:
@@@ -932,10 -1059,18 +1059,18 @@@
         case MSR_IA32_LASTBRANCHTOIP:
         case MSR_IA32_LASTINTFROMIP:
         case MSR_IA32_LASTINTTOIP:
+       case MSR_K8_SYSCFG:
+       case MSR_K7_HWCR:
         case MSR_VM_HSAVE_PA:
+       case MSR_P6_PERFCTR0:
+       case MSR_P6_PERFCTR1:
         case MSR_P6_EVNTSEL0:
         case MSR_P6_EVNTSEL1:
         case MSR_K7_EVNTSEL0:
+       case MSR_K7_PERFCTR0:
+       case MSR_K8_INT_PENDING_MSG:
+       case MSR_AMD64_NB_CFG:
+       case MSR_FAM10H_MMIO_CONF_BASE:
                 data = 0;
                 break;
         case MSR_MTRRcap:
@@@ -949,6 -1084,9 +1084,9 @@@
         case MSR_IA32_APICBASE:
                 data = kvm_get_apic_base(vcpu);
                 break;
+       case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
+               return kvm_x2apic_msr_read(vcpu, msr, pdata);
+               break;
         case MSR_IA32_MISC_ENABLE:
                 data = vcpu->arch.ia32_misc_enable_msr;
                 break;
@@@ -967,9 -1105,22 +1105,22 @@@
         case MSR_KVM_SYSTEM_TIME:
                 data = vcpu->arch.time;
                 break;
+       case MSR_IA32_P5_MC_ADDR:
+       case MSR_IA32_P5_MC_TYPE:
+       case MSR_IA32_MCG_CAP:
+       case MSR_IA32_MCG_CTL:
+       case MSR_IA32_MCG_STATUS:
+       case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
+               return get_msr_mce(vcpu, msr, pdata);
         default:
-               pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
-               return 1;
+               if (!ignore_msrs) {
+                       pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
+                       return 1;
+               } else {
+                       pr_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr);
+                       data = 0;
+               }
+               break;
         }
         *pdata = data;
         return 0;
@@@ -1068,6 -1219,11 +1219,11 @@@ int kvm_dev_ioctl_check_extension(long 
         case KVM_CAP_REINJECT_CONTROL:
         case KVM_CAP_IRQ_INJECT_STATUS:
         case KVM_CAP_ASSIGN_DEV_IRQ:
+       case KVM_CAP_IRQFD:
+       case KVM_CAP_IOEVENTFD:
+       case KVM_CAP_PIT2:
+       case KVM_CAP_PIT_STATE2:
+       case KVM_CAP_SET_IDENTITY_MAP_ADDR:
                 r = 1;
                 break;
         case KVM_CAP_COALESCED_MMIO:
@@@ -1088,6 -1244,9 +1244,9 @@@
         case KVM_CAP_IOMMU:
                 r = iommu_found();
                 break;
+       case KVM_CAP_MCE:
+               r = KVM_MAX_MCE_BANKS;
+               break;
         default:
                 r = 0;
                 break;
@@@ -1147,6 -1306,16 +1306,16 @@@ long kvm_arch_dev_ioctl(struct file *fi
                 r = 0;
                 break;
         }
+       case KVM_X86_GET_MCE_CAP_SUPPORTED: {
+               u64 mce_cap;
+ 
+               mce_cap = KVM_MCE_CAP_SUPPORTED;
+               r = -EFAULT;
+               if (copy_to_user(argp, &mce_cap, sizeof mce_cap))
+                       goto out;
+               r = 0;
+               break;
+       }
         default:
                 r = -EINVAL;
         }
@@@ -1227,6 -1396,7 +1396,7 @@@ static int kvm_vcpu_ioctl_set_cpuid(str
         vcpu->arch.cpuid_nent = cpuid->nent;
         cpuid_fix_nx_cap(vcpu);
         r = 0;
+       kvm_apic_set_version(vcpu);
   
   out_free:
         vfree(cpuid_entries);
@@@ -1248,6 -1418,7 +1418,7 @@@ static int kvm_vcpu_ioctl_set_cpuid2(st
                            cpuid->nent * sizeof(struct kvm_cpuid_entry2)))
                 goto out;
         vcpu->arch.cpuid_nent = cpuid->nent;
+       kvm_apic_set_version(vcpu);
         return 0;
   
   out:
@@@ -1290,6 -1461,7 +1461,7 @@@ static void do_cpuid_ent(struct kvm_cpu
                          u32 index, int *nent, int maxnent)
   {
         unsigned f_nx = is_efer_nx() ? F(NX) : 0;
+       unsigned f_gbpages = kvm_x86_ops->gb_page_enable() ? F(GBPAGES) : 0;
   #ifdef CONFIG_X86_64
         unsigned f_lm = F(LM);
   #else
@@@ -1314,7 -1486,7 +1486,7 @@@
                 F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
                 F(PAT) | F(PSE36) | 0 /* Reserved */ |
                 f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
-               F(FXSR) | F(FXSR_OPT) | 0 /* GBPAGES */ | 0 /* RDTSCP */ |
+               F(FXSR) | F(FXSR_OPT) | f_gbpages | 0 /* RDTSCP */ |
                 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
         /* cpuid 1.ecx */
         const u32 kvm_supported_word4_x86_features =
@@@ -1323,7 -1495,7 +1495,7 @@@
                 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
                 0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ |
                 0 /* Reserved, DCA */ | F(XMM4_1) |
-               F(XMM4_2) | 0 /* x2APIC */ | F(MOVBE) | F(POPCNT) |
+               F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
                 0 /* Reserved, XSAVE, OSXSAVE */;
         /* cpuid 0x80000001.ecx */
         const u32 kvm_supported_word6_x86_features =
@@@ -1344,6 -1516,9 +1516,9 @@@
         case 1:
                 entry->edx &= kvm_supported_word0_x86_features;
                 entry->ecx &= kvm_supported_word4_x86_features;
+               /* we support x2apic emulation even if host does not support
+                * it since we emulate x2apic in software */
+               entry->ecx |= F(X2APIC);
                 break;
         /* function 2 entries are STATEFUL. That is, repeated cpuid commands
          * may return different values. This forces us to get_cpu() before
@@@ -1435,6 -1610,10 +1610,10 @@@ static int kvm_dev_ioctl_get_supported_
         for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func)
                 do_cpuid_ent(&cpuid_entries[nent], func, 0,
                              &nent, cpuid->nent);
+       r = -E2BIG;
+       if (nent >= cpuid->nent)
+               goto out_free;
+ 
         r = -EFAULT;
         if (copy_to_user(entries, cpuid_entries,
                          nent * sizeof(struct kvm_cpuid_entry2)))
@@@ -1464,6 -1643,7 +1643,7 @@@ static int kvm_vcpu_ioctl_set_lapic(str
         vcpu_load(vcpu);
         memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s);
         kvm_apic_post_state_restore(vcpu);
+       update_cr8_intercept(vcpu);
         vcpu_put(vcpu);
   
         return 0;
@@@ -1503,6 -1683,80 +1683,80 @@@ static int vcpu_ioctl_tpr_access_report
         return 0;
   }
   
+ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
+                                       u64 mcg_cap)
+ {
+       int r;
+       unsigned bank_num = mcg_cap & 0xff, bank;
+ 
+       r = -EINVAL;
+       if (!bank_num)
+               goto out;
+       if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000))
+               goto out;
+       r = 0;
+       vcpu->arch.mcg_cap = mcg_cap;
+       /* Init IA32_MCG_CTL to all 1s */
+       if (mcg_cap & MCG_CTL_P)
+               vcpu->arch.mcg_ctl = ~(u64)0;
+       /* Init IA32_MCi_CTL to all 1s */
+       for (bank = 0; bank < bank_num; bank++)
+               vcpu->arch.mce_banks[bank*4] = ~(u64)0;
+ out:
+       return r;
+ }
+ 
+ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
+                                     struct kvm_x86_mce *mce)
+ {
+       u64 mcg_cap = vcpu->arch.mcg_cap;
+       unsigned bank_num = mcg_cap & 0xff;
+       u64 *banks = vcpu->arch.mce_banks;
+ 
+       if (mce->bank >= bank_num || !(mce->status & MCI_STATUS_VAL))
+               return -EINVAL;
+       /*
+        * if IA32_MCG_CTL is not all 1s, the uncorrected error
+        * reporting is disabled
+        */
+       if ((mce->status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) &&
+           vcpu->arch.mcg_ctl != ~(u64)0)
+               return 0;
+       banks += 4 * mce->bank;
+       /*
+        * if IA32_MCi_CTL is not all 1s, the uncorrected error
+        * reporting is disabled for the bank
+        */
+       if ((mce->status & MCI_STATUS_UC) && banks[0] != ~(u64)0)
+               return 0;
+       if (mce->status & MCI_STATUS_UC) {
+               if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
+                   !(vcpu->arch.cr4 & X86_CR4_MCE)) {
+                       printk(KERN_DEBUG "kvm: set_mce: "
+                              "injects mce exception while "
+                              "previous one is in progress!\n");
+                       set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
+                       return 0;
+               }
+               if (banks[1] & MCI_STATUS_VAL)
+                       mce->status |= MCI_STATUS_OVER;
+               banks[2] = mce->addr;
+               banks[3] = mce->misc;
+               vcpu->arch.mcg_status = mce->mcg_status;
+               banks[1] = mce->status;
+               kvm_queue_exception(vcpu, MC_VECTOR);
+       } else if (!(banks[1] & MCI_STATUS_VAL)
+                  || !(banks[1] & MCI_STATUS_UC)) {
+               if (banks[1] & MCI_STATUS_VAL)
+                       mce->status |= MCI_STATUS_OVER;
+               banks[2] = mce->addr;
+               banks[3] = mce->misc;
+               banks[1] = mce->status;
+       } else
+               banks[1] |= MCI_STATUS_OVER;
+       return 0;
+ }
+ 
   long kvm_arch_vcpu_ioctl(struct file *filp,
                          unsigned int ioctl, unsigned long arg)
   {
@@@ -1636,6 -1890,24 +1890,24 @@@
                 kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
                 break;
         }
+       case KVM_X86_SETUP_MCE: {
+               u64 mcg_cap;
+ 
+               r = -EFAULT;
+               if (copy_from_user(&mcg_cap, argp, sizeof mcg_cap))
+                       goto out;
+               r = kvm_vcpu_ioctl_x86_setup_mce(vcpu, mcg_cap);
+               break;
+       }
+       case KVM_X86_SET_MCE: {
+               struct kvm_x86_mce mce;
+ 
+               r = -EFAULT;
+               if (copy_from_user(&mce, argp, sizeof mce))
+                       goto out;
+               r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
+               break;
+       }
         default:
                 r = -EINVAL;
         }
@@@ -1654,6 -1926,13 +1926,13 @@@ static int kvm_vm_ioctl_set_tss_addr(st
         return ret;
   }
   
+ static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm,
+                                             u64 ident_addr)
+ {
+       kvm->arch.ept_identity_map_addr = ident_addr;
+       return 0;
+ }
+ 
   static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
                                           u32 kvm_nr_mmu_pages)
   {
@@@ -1775,19 -2054,25 +2054,25 @@@ static int kvm_vm_ioctl_set_irqchip(str
         r = 0;
         switch (chip->chip_id) {
         case KVM_IRQCHIP_PIC_MASTER:
+               spin_lock(&pic_irqchip(kvm)->lock);
                 memcpy(&pic_irqchip(kvm)->pics[0],
                         &chip->chip.pic,
                         sizeof(struct kvm_pic_state));
+               spin_unlock(&pic_irqchip(kvm)->lock);
                 break;
         case KVM_IRQCHIP_PIC_SLAVE:
+               spin_lock(&pic_irqchip(kvm)->lock);
                 memcpy(&pic_irqchip(kvm)->pics[1],
                         &chip->chip.pic,
                         sizeof(struct kvm_pic_state));
+               spin_unlock(&pic_irqchip(kvm)->lock);
                 break;
         case KVM_IRQCHIP_IOAPIC:
+               mutex_lock(&kvm->irq_lock);
                 memcpy(ioapic_irqchip(kvm),
                         &chip->chip.ioapic,
                         sizeof(struct kvm_ioapic_state));
+               mutex_unlock(&kvm->irq_lock);
                 break;
         default:
                 r = -EINVAL;
@@@ -1801,7 -2086,9 +2086,9 @@@ static int kvm_vm_ioctl_get_pit(struct 
   {
         int r = 0;
   
+       mutex_lock(&kvm->arch.vpit->pit_state.lock);
         memcpy(ps, &kvm->arch.vpit->pit_state, sizeof(struct kvm_pit_state));
+       mutex_unlock(&kvm->arch.vpit->pit_state.lock);
         return r;
   }
   
@@@ -1809,8 -2096,39 +2096,39 @@@ static int kvm_vm_ioctl_set_pit(struct 
   {
         int r = 0;
   
+       mutex_lock(&kvm->arch.vpit->pit_state.lock);
         memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state));
-       kvm_pit_load_count(kvm, 0, ps->channels[0].count);
+       kvm_pit_load_count(kvm, 0, ps->channels[0].count, 0);
+       mutex_unlock(&kvm->arch.vpit->pit_state.lock);
+       return r;
+ }
+ 
+ static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
+ {
+       int r = 0;
+ 
+       mutex_lock(&kvm->arch.vpit->pit_state.lock);
+       memcpy(ps->channels, &kvm->arch.vpit->pit_state.channels,
+               sizeof(ps->channels));
+       ps->flags = kvm->arch.vpit->pit_state.flags;
+       mutex_unlock(&kvm->arch.vpit->pit_state.lock);
+       return r;
+ }
+ 
+ static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
+ {
+       int r = 0, start = 0;
+       u32 prev_legacy, cur_legacy;
+       mutex_lock(&kvm->arch.vpit->pit_state.lock);
+       prev_legacy = kvm->arch.vpit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY;
+       cur_legacy = ps->flags & KVM_PIT_FLAGS_HPET_LEGACY;
+       if (!prev_legacy && cur_legacy)
+               start = 1;
+       memcpy(&kvm->arch.vpit->pit_state.channels, &ps->channels,
+              sizeof(kvm->arch.vpit->pit_state.channels));
+       kvm->arch.vpit->pit_state.flags = ps->flags;
+       kvm_pit_load_count(kvm, 0, kvm->arch.vpit->pit_state.channels[0].count, start);
+       mutex_unlock(&kvm->arch.vpit->pit_state.lock);
         return r;
   }
   
@@@ -1819,7 -2137,9 +2137,9 @@@ static int kvm_vm_ioctl_reinject(struc
   {
         if (!kvm->arch.vpit)
                 return -ENXIO;
+       mutex_lock(&kvm->arch.vpit->pit_state.lock);
         kvm->arch.vpit->pit_state.pit_timer.reinject = control->pit_reinject;
+       mutex_unlock(&kvm->arch.vpit->pit_state.lock);
         return 0;
   }
   
@@@ -1845,7 -2165,6 +2165,6 @@@ int kvm_vm_ioctl_get_dirty_log(struct k
                 spin_lock(&kvm->mmu_lock);
                 kvm_mmu_slot_remove_write_access(kvm, log->slot);
                 spin_unlock(&kvm->mmu_lock);
-               kvm_flush_remote_tlbs(kvm);
                 memslot = &kvm->memslots[log->slot];
                 n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
                 memset(memslot->dirty_bitmap, 0, n);
@@@ -1869,7 -2188,9 +2188,9 @@@ long kvm_arch_vm_ioctl(struct file *fil
          */
         union {
                 struct kvm_pit_state ps;
+               struct kvm_pit_state2 ps2;
                 struct kvm_memory_alias alias;
+               struct kvm_pit_config pit_config;
         } u;
   
         switch (ioctl) {
@@@ -1878,6 -2199,17 +2199,17 @@@
                 if (r < 0)
                         goto out;
                 break;
+       case KVM_SET_IDENTITY_MAP_ADDR: {
+               u64 ident_addr;
+ 
+               r = -EFAULT;
+               if (copy_from_user(&ident_addr, argp, sizeof ident_addr))
+                       goto out;
+               r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr);
+               if (r < 0)
+                       goto out;
+               break;
+       }
         case KVM_SET_MEMORY_REGION: {
                 struct kvm_memory_region kvm_mem;
                 struct kvm_userspace_memory_region kvm_userspace_mem;
@@@ -1930,16 -2262,24 +2262,24 @@@
                 }
                 break;
         case KVM_CREATE_PIT:
-               mutex_lock(&kvm->lock);
+               u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY;
+               goto create_pit;
+       case KVM_CREATE_PIT2:
+               r = -EFAULT;
+               if (copy_from_user(&u.pit_config, argp,
+                                  sizeof(struct kvm_pit_config)))
+                       goto out;
+       create_pit:
+               down_write(&kvm->slots_lock);
                 r = -EEXIST;
                 if (kvm->arch.vpit)
                         goto create_pit_unlock;
                 r = -ENOMEM;
-               kvm->arch.vpit = kvm_create_pit(kvm);
+               kvm->arch.vpit = kvm_create_pit(kvm, u.pit_config.flags);
                 if (kvm->arch.vpit)
                         r = 0;
         create_pit_unlock:
-               mutex_unlock(&kvm->lock);
+               up_write(&kvm->slots_lock);
                 break;
         case KVM_IRQ_LINE_STATUS:
         case KVM_IRQ_LINE: {
@@@ -1950,10 -2290,10 +2290,10 @@@
                         goto out;
                 if (irqchip_in_kernel(kvm)) {
                         __s32 status;
-                       mutex_lock(&kvm->lock);
+                       mutex_lock(&kvm->irq_lock);
                         status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
                                         irq_event.irq, irq_event.level);
-                       mutex_unlock(&kvm->lock);
+                       mutex_unlock(&kvm->irq_lock);
                         if (ioctl == KVM_IRQ_LINE_STATUS) {
                                 irq_event.status = status;
                                 if (copy_to_user(argp, &irq_event,
@@@ -2042,6 -2382,32 +2382,32 @@@
                 r = 0;
                 break;
         }
+       case KVM_GET_PIT2: {
+               r = -ENXIO;
+               if (!kvm->arch.vpit)
+                       goto out;
+               r = kvm_vm_ioctl_get_pit2(kvm, &u.ps2);
+               if (r)
+                       goto out;
+               r = -EFAULT;
+               if (copy_to_user(argp, &u.ps2, sizeof(u.ps2)))
+                       goto out;
+               r = 0;
+               break;
+       }
+       case KVM_SET_PIT2: {
+               r = -EFAULT;
+               if (copy_from_user(&u.ps2, argp, sizeof(u.ps2)))
+                       goto out;
+               r = -ENXIO;
+               if (!kvm->arch.vpit)
+                       goto out;
+               r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2);
+               if (r)
+                       goto out;
+               r = 0;
+               break;
+       }
         case KVM_REINJECT_CONTROL: {
                 struct kvm_reinject_control control;
                 r =  -EFAULT;
@@@ -2075,35 -2441,23 +2441,23 @@@ static void kvm_init_msr_list(void
         num_msrs_to_save = j;
   }
   
- /*
-  * Only apic need an MMIO device hook, so shortcut now..
-  */
- static struct kvm_io_device *vcpu_find_pervcpu_dev(struct kvm_vcpu *vcpu,
-                                               gpa_t addr, int len,
-                                               int is_write)
+ static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
+                          const void *v)
   {
-       struct kvm_io_device *dev;
+       if (vcpu->arch.apic &&
+           !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v))
+               return 0;
   
-       if (vcpu->arch.apic) {
-               dev = &vcpu->arch.apic->dev;
-               if (dev->in_range(dev, addr, len, is_write))
-                       return dev;
-       }
-       return NULL;
+       return kvm_io_bus_write(&vcpu->kvm->mmio_bus, addr, len, v);
   }
   
- 
- static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
-                                               gpa_t addr, int len,
-                                               int is_write)
+ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
   {
-       struct kvm_io_device *dev;
+       if (vcpu->arch.apic &&
+           !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v))
+               return 0;
   
-       dev = vcpu_find_pervcpu_dev(vcpu, addr, len, is_write);
-       if (dev == NULL)
-               dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr, len,
-                                         is_write);
-       return dev;
+       return kvm_io_bus_read(&vcpu->kvm->mmio_bus, addr, len, v);
   }
   
   static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
@@@ -2172,11 -2526,12 +2526,12 @@@ static int emulator_read_emulated(unsig
                                   unsigned int bytes,
                                   struct kvm_vcpu *vcpu)
   {
-       struct kvm_io_device *mmio_dev;
         gpa_t                 gpa;
   
         if (vcpu->mmio_read_completed) {
                 memcpy(val, vcpu->mmio_data, bytes);
+               trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
+                              vcpu->mmio_phys_addr, *(u64 *)val);
                 vcpu->mmio_read_completed = 0;
                 return X86EMUL_CONTINUE;
         }
@@@ -2197,14 -2552,12 +2552,12 @@@ mmio
         /*
          * Is this MMIO handled locally?
          */
-       mutex_lock(&vcpu->kvm->lock);
-       mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 0);
-       if (mmio_dev) {
-               kvm_iodevice_read(mmio_dev, gpa, bytes, val);
-               mutex_unlock(&vcpu->kvm->lock);
+       if (!vcpu_mmio_read(vcpu, gpa, bytes, val)) {
+               trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, gpa, *(u64 *)val);
                 return X86EMUL_CONTINUE;
         }
-       mutex_unlock(&vcpu->kvm->lock);
+ 
+       trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
   
         vcpu->mmio_needed = 1;
         vcpu->mmio_phys_addr = gpa;
@@@ -2231,7 -2584,6 +2584,6 @@@ static int emulator_write_emulated_onep
                                            unsigned int bytes,
                                            struct kvm_vcpu *vcpu)
   {
-       struct kvm_io_device *mmio_dev;
         gpa_t                 gpa;
   
         gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
@@@ -2249,17 -2601,12 +2601,12 @@@
                 return X86EMUL_CONTINUE;
   
   mmio:
+       trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
         /*
          * Is this MMIO handled locally?
          */
-       mutex_lock(&vcpu->kvm->lock);
-       mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 1);
-       if (mmio_dev) {
-               kvm_iodevice_write(mmio_dev, gpa, bytes, val);
-               mutex_unlock(&vcpu->kvm->lock);
+       if (!vcpu_mmio_write(vcpu, gpa, bytes, val))
                 return X86EMUL_CONTINUE;
-       }
-       mutex_unlock(&vcpu->kvm->lock);
   
         vcpu->mmio_needed = 1;
         vcpu->mmio_phys_addr = gpa;
@@@ -2297,7 -2644,12 +2644,7 @@@ static int emulator_cmpxchg_emulated(un
                                      unsigned int bytes,
                                      struct kvm_vcpu *vcpu)
   {
- -      static int reported;
- -
- -      if (!reported) {
- -              reported = 1;
- -              printk(KERN_WARNING "kvm: emulating exchange as write\n");
- -      }
+ +      printk_once(KERN_WARNING "kvm: emulating exchange as write\n");
   #ifndef CONFIG_X86_64
         /* guests cmpxchg8b have to be emulated atomically */
         if (bytes == 8) {
@@@ -2343,7 -2695,6 +2690,6 @@@ int emulate_invlpg(struct kvm_vcpu *vcp
   
   int emulate_clts(struct kvm_vcpu *vcpu)
   {
-       KVMTRACE_0D(CLTS, vcpu, handler);
         kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 & ~X86_CR0_TS);
         return X86EMUL_CONTINUE;
   }
@@@ -2420,7 -2771,7 +2766,7 @@@ int emulate_instruction(struct kvm_vcp
         kvm_clear_exception_queue(vcpu);
         vcpu->arch.mmio_fault_cr2 = cr2;
         /*
-        * TODO: fix x86_emulate.c to use guest_read/write_register
+        * TODO: fix emulate.c to use guest_read/write_register
          * instead of direct ->regs accesses, can save hundred cycles
          * on Intel for instructions that don't read/change RSP, for
          * for example.
@@@ -2444,14 -2795,33 +2790,33 @@@
   
                 r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
   
-               /* Reject the instructions other than VMCALL/VMMCALL when
-                * try to emulate invalid opcode */
+               /* Only allow emulation of specific instructions on #UD
+                * (namely VMMCALL, sysenter, sysexit, syscall)*/
                 c = &vcpu->arch.emulate_ctxt.decode;
-               if ((emulation_type & EMULTYPE_TRAP_UD) &&
-                   (!(c->twobyte && c->b == 0x01 &&
-                     (c->modrm_reg == 0 || c->modrm_reg == 3) &&
-                      c->modrm_mod == 3 && c->modrm_rm == 1)))
-                       return EMULATE_FAIL;
+               if (emulation_type & EMULTYPE_TRAP_UD) {
+                       if (!c->twobyte)
+                               return EMULATE_FAIL;
+                       switch (c->b) {
+                       case 0x01: /* VMMCALL */
+                               if (c->modrm_mod != 3 || c->modrm_rm != 1)
+                                       return EMULATE_FAIL;
+                               break;
+                       case 0x34: /* sysenter */
+                       case 0x35: /* sysexit */
+                               if (c->modrm_mod != 0 || c->modrm_rm != 0)
+                                       return EMULATE_FAIL;
+                               break;
+                       case 0x05: /* syscall */
+                               if (c->modrm_mod != 0 || c->modrm_rm != 0)
+                                       return EMULATE_FAIL;
+                               break;
+                       default:
+                               return EMULATE_FAIL;
+                       }
+ 
+                       if (!(c->modrm_reg == 0 || c->modrm_reg == 3))
+                               return EMULATE_FAIL;
+               }
   
                 ++vcpu->stat.insn_emulation;
                 if (r)  {
@@@ -2571,52 -2941,40 +2936,40 @@@ int complete_pio(struct kvm_vcpu *vcpu
         return 0;
   }
   
- static void kernel_pio(struct kvm_io_device *pio_dev,
-                      struct kvm_vcpu *vcpu,
-                      void *pd)
+ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
   {
         /* TODO: String I/O for in kernel device */
+       int r;
   
-       mutex_lock(&vcpu->kvm->lock);
         if (vcpu->arch.pio.in)
-               kvm_iodevice_read(pio_dev, vcpu->arch.pio.port,
-                                 vcpu->arch.pio.size,
-                                 pd);
+               r = kvm_io_bus_read(&vcpu->kvm->pio_bus, vcpu->arch.pio.port,
+                                   vcpu->arch.pio.size, pd);
         else
-               kvm_iodevice_write(pio_dev, vcpu->arch.pio.port,
-                                  vcpu->arch.pio.size,
-                                  pd);
-       mutex_unlock(&vcpu->kvm->lock);
+               r = kvm_io_bus_write(&vcpu->kvm->pio_bus, vcpu->arch.pio.port,
+                                    vcpu->arch.pio.size, pd);
+       return r;
   }
   
- static void pio_string_write(struct kvm_io_device *pio_dev,
-                            struct kvm_vcpu *vcpu)
+ static int pio_string_write(struct kvm_vcpu *vcpu)
   {
         struct kvm_pio_request *io = &vcpu->arch.pio;
         void *pd = vcpu->arch.pio_data;
-       int i;
+       int i, r = 0;
   
-       mutex_lock(&vcpu->kvm->lock);
         for (i = 0; i < io->cur_count; i++) {
-               kvm_iodevice_write(pio_dev, io->port,
-                                  io->size,
-                                  pd);
+               if (kvm_io_bus_write(&vcpu->kvm->pio_bus,
+                                    io->port, io->size, pd)) {
+                       r = -EOPNOTSUPP;
+                       break;
+               }
                 pd += io->size;
         }
-       mutex_unlock(&vcpu->kvm->lock);
- }
- 
- static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu,
-                                              gpa_t addr, int len,
-                                              int is_write)
- {
-       return kvm_io_bus_find_dev(&vcpu->kvm->pio_bus, addr, len, is_write);
+       return r;
   }
   
   int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
                   int size, unsigned port)
   {
-       struct kvm_io_device *pio_dev;
         unsigned long val;
   
         vcpu->run->exit_reason = KVM_EXIT_IO;
@@@ -2630,19 -2988,13 +2983,13 @@@
         vcpu->arch.pio.down = 0;
         vcpu->arch.pio.rep = 0;
   
-       if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
-               KVMTRACE_2D(IO_READ, vcpu, vcpu->run->io.port, (u32)size,
-                           handler);
-       else
-               KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size,
-                           handler);
+       trace_kvm_pio(vcpu->run->io.direction == KVM_EXIT_IO_OUT, port,
+                     size, 1);
   
         val = kvm_register_read(vcpu, VCPU_REGS_RAX);
         memcpy(vcpu->arch.pio_data, &val, 4);
   
-       pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in);
-       if (pio_dev) {
-               kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data);
+       if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
                 complete_pio(vcpu);
                 return 1;
         }
@@@ -2656,7 -3008,6 +3003,6 @@@ int kvm_emulate_pio_string(struct kvm_v
   {
         unsigned now, in_page;
         int ret = 0;
-       struct kvm_io_device *pio_dev;
   
         vcpu->run->exit_reason = KVM_EXIT_IO;
         vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
@@@ -2669,12 -3020,8 +3015,8 @@@
         vcpu->arch.pio.down = down;
         vcpu->arch.pio.rep = rep;
   
-       if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
-               KVMTRACE_2D(IO_READ, vcpu, vcpu->run->io.port, (u32)size,
-                           handler);
-       else
-               KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size,
-                           handler);
+       trace_kvm_pio(vcpu->run->io.direction == KVM_EXIT_IO_OUT, port,
+                     size, count);
   
         if (!count) {
                 kvm_x86_ops->skip_emulated_instruction(vcpu);
@@@ -2704,9 -3051,6 +3046,6 @@@
   
         vcpu->arch.pio.guest_gva = address;
   
-       pio_dev = vcpu_find_pio_dev(vcpu, port,
-                                   vcpu->arch.pio.cur_count,
-                                   !vcpu->arch.pio.in);
         if (!vcpu->arch.pio.in) {
                 /* string PIO write */
                 ret = pio_copy_data(vcpu);
@@@ -2714,16 -3058,13 +3053,13 @@@
                         kvm_inject_gp(vcpu, 0);
                         return 1;
                 }
-               if (ret == 0 && pio_dev) {
-                       pio_string_write(pio_dev, vcpu);
+               if (ret == 0 && !pio_string_write(vcpu)) {
                         complete_pio(vcpu);
                         if (vcpu->arch.pio.count == 0)
                                 ret = 1;
                 }
-       } else if (pio_dev)
-               pr_unimpl(vcpu, "no string pio read support yet, "
-                      "port %x size %d count %ld\n",
-                       port, size, count);
+       }
+       /* no string PIO read support yet */
   
         return ret;
   }
@@@ -2756,10 -3097,7 +3092,7 @@@ static int kvmclock_cpufreq_notifier(st
   
         spin_lock(&kvm_lock);
         list_for_each_entry(kvm, &vm_list, vm_list) {
-               for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-                       vcpu = kvm->vcpus[i];
-                       if (!vcpu)
-                               continue;
+               kvm_for_each_vcpu(i, vcpu, kvm) {
                         if (vcpu->cpu != freq->cpu)
                                 continue;
                         if (!kvm_request_guest_time_update(vcpu))
@@@ -2852,7 -3190,6 +3185,6 @@@ void kvm_arch_exit(void
   int kvm_emulate_halt(struct kvm_vcpu *vcpu)
   {
         ++vcpu->stat.halt_exits;
-       KVMTRACE_0D(HLT, vcpu, handler);
         if (irqchip_in_kernel(vcpu->kvm)) {
                 vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
                 return 1;
@@@ -2883,7 -3220,7 +3215,7 @@@ int kvm_emulate_hypercall(struct kvm_vc
         a2 = kvm_register_read(vcpu, VCPU_REGS_RDX);
         a3 = kvm_register_read(vcpu, VCPU_REGS_RSI);
   
-       KVMTRACE_1D(VMMCALL, vcpu, (u32)nr, handler);
+       trace_kvm_hypercall(nr, a0, a1, a2, a3);
   
         if (!is_long_mode(vcpu)) {
                 nr &= 0xFFFFFFFF;
@@@ -2893,6 -3230,11 +3225,11 @@@
                 a3 &= 0xFFFFFFFF;
         }
   
+       if (kvm_x86_ops->get_cpl(vcpu) != 0) {
+               ret = -KVM_EPERM;
+               goto out;
+       }
+ 
         switch (nr) {
         case KVM_HC_VAPIC_POLL_IRQ:
                 ret = 0;
@@@ -2904,6 -3246,7 +3241,7 @@@
                 ret = -KVM_ENOSYS;
                 break;
         }
+ out:
         kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
         ++vcpu->stat.hypercalls;
         return r;
@@@ -2983,8 -3326,6 +3321,6 @@@ unsigned long realmode_get_cr(struct kv
                 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
                 return 0;
         }
-       KVMTRACE_3D(CR_READ, vcpu, (u32)cr, (u32)value,
-                   (u32)((u64)value >> 32), handler);
   
         return value;
   }
@@@ -2992,9 -3333,6 +3328,6 @@@
   void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
                      unsigned long *rflags)
   {
-       KVMTRACE_3D(CR_WRITE, vcpu, (u32)cr, (u32)val,
-                   (u32)((u64)val >> 32), handler);
- 
         switch (cr) {
         case 0:
                 kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val));
@@@ -3104,11 -3442,11 +3437,11 @@@ void kvm_emulate_cpuid(struct kvm_vcpu 
                 kvm_register_write(vcpu, VCPU_REGS_RDX, best->edx);
         }
         kvm_x86_ops->skip_emulated_instruction(vcpu);
-       KVMTRACE_5D(CPUID, vcpu, function,
-                   (u32)kvm_register_read(vcpu, VCPU_REGS_RAX),
-                   (u32)kvm_register_read(vcpu, VCPU_REGS_RBX),
-                   (u32)kvm_register_read(vcpu, VCPU_REGS_RCX),
-                   (u32)kvm_register_read(vcpu, VCPU_REGS_RDX), handler);
+       trace_kvm_cpuid(function,
+                       kvm_register_read(vcpu, VCPU_REGS_RAX),
+                       kvm_register_read(vcpu, VCPU_REGS_RBX),
+                       kvm_register_read(vcpu, VCPU_REGS_RCX),
+                       kvm_register_read(vcpu, VCPU_REGS_RDX));
   }
   EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
   
@@@ -3174,6 -3512,9 +3507,9 @@@ static void update_cr8_intercept(struc
         if (!kvm_x86_ops->update_cr8_intercept)
                 return;
   
+       if (!vcpu->arch.apic)
+               return;
+ 
         if (!vcpu->arch.apic->vapic_addr)
                 max_irr = kvm_lapic_find_highest_irr(vcpu);
         else
@@@ -3187,12 -3528,16 +3523,16 @@@
         kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
   }
   
- static void inject_pending_irq(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+ static void inject_pending_event(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
   {
-       if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
-               kvm_x86_ops->set_interrupt_shadow(vcpu, 0);
- 
         /* try to reinject previous events if any */
+       if (vcpu->arch.exception.pending) {
+               kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr,
+                                         vcpu->arch.exception.has_error_code,
+                                         vcpu->arch.exception.error_code);
+               return;
+       }
+ 
         if (vcpu->arch.nmi_injected) {
                 kvm_x86_ops->set_nmi(vcpu);
                 return;
@@@ -3266,16 -3611,14 +3606,14 @@@ static int vcpu_enter_guest(struct kvm_
         smp_mb__after_clear_bit();
   
         if (vcpu->requests || need_resched() || signal_pending(current)) {
+               set_bit(KVM_REQ_KICK, &vcpu->requests);
                 local_irq_enable();
                 preempt_enable();
                 r = 1;
                 goto out;
         }
   
-       if (vcpu->arch.exception.pending)
-               __queue_exception(vcpu);
-       else
-               inject_pending_irq(vcpu, kvm_run);
+       inject_pending_event(vcpu, kvm_run);
   
         /* enable NMI/IRQ window open exits if needed */
         if (vcpu->arch.nmi_pending)
@@@ -3292,14 -3635,7 +3630,7 @@@
   
         kvm_guest_enter();
   
-       get_debugreg(vcpu->arch.host_dr6, 6);
-       get_debugreg(vcpu->arch.host_dr7, 7);
         if (unlikely(vcpu->arch.switch_db_regs)) {
-               get_debugreg(vcpu->arch.host_db[0], 0);
-               get_debugreg(vcpu->arch.host_db[1], 1);
-               get_debugreg(vcpu->arch.host_db[2], 2);
-               get_debugreg(vcpu->arch.host_db[3], 3);
- 
                 set_debugreg(0, 7);
                 set_debugreg(vcpu->arch.eff_db[0], 0);
                 set_debugreg(vcpu->arch.eff_db[1], 1);
@@@ -3307,18 -3643,17 +3638,17 @@@
                 set_debugreg(vcpu->arch.eff_db[3], 3);
         }
   
-       KVMTRACE_0D(VMENTRY, vcpu, entryexit);
+       trace_kvm_entry(vcpu->vcpu_id);
         kvm_x86_ops->run(vcpu, kvm_run);
   
-       if (unlikely(vcpu->arch.switch_db_regs)) {
-               set_debugreg(0, 7);
-               set_debugreg(vcpu->arch.host_db[0], 0);
-               set_debugreg(vcpu->arch.host_db[1], 1);
-               set_debugreg(vcpu->arch.host_db[2], 2);
-               set_debugreg(vcpu->arch.host_db[3], 3);
+       if (unlikely(vcpu->arch.switch_db_regs || test_thread_flag(TIF_DEBUG))) {
+               set_debugreg(current->thread.debugreg0, 0);
+               set_debugreg(current->thread.debugreg1, 1);
+               set_debugreg(current->thread.debugreg2, 2);
+               set_debugreg(current->thread.debugreg3, 3);
+               set_debugreg(current->thread.debugreg6, 6);
+               set_debugreg(current->thread.debugreg7, 7);
         }
-       set_debugreg(vcpu->arch.host_dr6, 6);
-       set_debugreg(vcpu->arch.host_dr7, 7);
   
         set_bit(KVM_REQ_KICK, &vcpu->requests);
         local_irq_enable();
@@@ -3648,11 -3983,8 +3978,8 @@@ static void kvm_set_segment(struct kvm_
   static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector,
                                    struct kvm_segment *kvm_desct)
   {
-       kvm_desct->base = seg_desc->base0;
-       kvm_desct->base |= seg_desc->base1 << 16;
-       kvm_desct->base |= seg_desc->base2 << 24;
-       kvm_desct->limit = seg_desc->limit0;
-       kvm_desct->limit |= seg_desc->limit << 16;
+       kvm_desct->base = get_desc_base(seg_desc);
+       kvm_desct->limit = get_desc_limit(seg_desc);
         if (seg_desc->g) {
                 kvm_desct->limit <<= 12;
                 kvm_desct->limit |= 0xfff;
@@@ -3696,7 -4028,6 +4023,6 @@@ static void get_segment_descriptor_dtab
   static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
                                          struct desc_struct *seg_desc)
   {
-       gpa_t gpa;
         struct descriptor_table dtable;
         u16 index = selector >> 3;
   
@@@ -3706,16 -4037,13 +4032,13 @@@
                 kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc);
                 return 1;
         }
-       gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, dtable.base);
-       gpa += index * 8;
-       return kvm_read_guest(vcpu->kvm, gpa, seg_desc, 8);
+       return kvm_read_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu);
   }
   
   /* allowed just for 8 bytes segments */
   static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
                                          struct desc_struct *seg_desc)
   {
-       gpa_t gpa;
         struct descriptor_table dtable;
         u16 index = selector >> 3;
   
@@@ -3723,19 -4051,13 +4046,13 @@@
   
         if (dtable.limit < index * 8 + 7)
                 return 1;
-       gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, dtable.base);
-       gpa += index * 8;
-       return kvm_write_guest(vcpu->kvm, gpa, seg_desc, 8);
+       return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu);
   }
   
   static u32 get_tss_base_addr(struct kvm_vcpu *vcpu,
                              struct desc_struct *seg_desc)
   {
-       u32 base_addr;
- 
-       base_addr = seg_desc->base0;
-       base_addr |= (seg_desc->base1 << 16);
-       base_addr |= (seg_desc->base2 << 24);
+       u32 base_addr = get_desc_base(seg_desc);
   
         return vcpu->arch.mmu.gva_to_gpa(vcpu, base_addr);
   }
@@@ -3780,12 -4102,19 +4097,19 @@@ static int kvm_load_realmode_segment(st
         return 0;
   }
   
+ static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg)
+ {
+       return (seg != VCPU_SREG_LDTR) &&
+               (seg != VCPU_SREG_TR) &&
+               (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_VM);
+ }
+ 
   int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
                                 int type_bits, int seg)
   {
         struct kvm_segment kvm_seg;
   
-       if (!(vcpu->arch.cr0 & X86_CR0_PE))
+       if (is_vm86_segment(vcpu, seg) || !(vcpu->arch.cr0 & X86_CR0_PE))
                 return kvm_load_realmode_segment(vcpu, selector, seg);
         if (load_segment_descriptor_to_kvm_desct(vcpu, selector, &kvm_seg))
                 return 1;
@@@ -4024,7 -4353,7 +4348,7 @@@ int kvm_task_switch(struct kvm_vcpu *vc
                 }
         }
   
-       if (!nseg_desc.p || (nseg_desc.limit0 | nseg_desc.limit << 16) < 0x67) {
+       if (!nseg_desc.p || get_desc_limit(&nseg_desc) < 0x67) {
                 kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc);
                 return 1;
         }
@@@ -4094,13 -4423,7 +4418,7 @@@ int kvm_arch_vcpu_ioctl_set_sregs(struc
   
         vcpu->arch.cr2 = sregs->cr2;
         mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3;
- 
-       down_read(&vcpu->kvm->slots_lock);
-       if (gfn_to_memslot(vcpu->kvm, sregs->cr3 >> PAGE_SHIFT))
-               vcpu->arch.cr3 = sregs->cr3;
-       else
-               set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
-       up_read(&vcpu->kvm->slots_lock);
+       vcpu->arch.cr3 = sregs->cr3;
   
         kvm_set_cr8(vcpu, sregs->cr8);
   
@@@ -4142,8 -4465,10 +4460,10 @@@
         kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
         kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
   
+       update_cr8_intercept(vcpu);
+ 
         /* Older userspace won't unhalt the vcpu on reset. */
-       if (vcpu->vcpu_id == 0 && kvm_rip_read(vcpu) == 0xfff0 &&
+       if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 &&
             sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
             !(vcpu->arch.cr0 & X86_CR0_PE))
                 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
@@@ -4414,7 -4739,7 +4734,7 @@@ int kvm_arch_vcpu_init(struct kvm_vcpu 
         kvm = vcpu->kvm;
   
         vcpu->arch.mmu.root_hpa = INVALID_PAGE;
-       if (!irqchip_in_kernel(kvm) || vcpu->vcpu_id == 0)
+       if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu))
                 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
         else
                 vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
@@@ -4436,6 -4761,14 +4756,14 @@@
                         goto fail_mmu_destroy;
         }
   
+       vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
+                                      GFP_KERNEL);
+       if (!vcpu->arch.mce_banks) {
+               r = -ENOMEM;
+               goto fail_mmu_destroy;
+       }
+       vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
+ 
         return 0;
   
   fail_mmu_destroy:
@@@ -4483,20 -4816,22 +4811,22 @@@ static void kvm_unload_vcpu_mmu(struct 
   static void kvm_free_vcpus(struct kvm *kvm)
   {
         unsigned int i;
+       struct kvm_vcpu *vcpu;
   
         /*
          * Unpin any mmu pages first.
          */
-       for (i = 0; i < KVM_MAX_VCPUS; ++i)
-               if (kvm->vcpus[i])
-                       kvm_unload_vcpu_mmu(kvm->vcpus[i]);
-       for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-               if (kvm->vcpus[i]) {
-                       kvm_arch_vcpu_free(kvm->vcpus[i]);
-                       kvm->vcpus[i] = NULL;
-               }
-       }
+       kvm_for_each_vcpu(i, vcpu, kvm)
+               kvm_unload_vcpu_mmu(vcpu);
+       kvm_for_each_vcpu(i, vcpu, kvm)
+               kvm_arch_vcpu_free(vcpu);
+ 
+       mutex_lock(&kvm->lock);
+       for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
+               kvm->vcpus[i] = NULL;
   
+       atomic_set(&kvm->online_vcpus, 0);
+       mutex_unlock(&kvm->lock);
   }
   
   void kvm_arch_sync_events(struct kvm *kvm)
@@@ -4573,7 -4908,6 +4903,6 @@@ int kvm_arch_set_memory_region(struct k
   
         kvm_mmu_slot_remove_write_access(kvm, mem->slot);
         spin_unlock(&kvm->mmu_lock);
-       kvm_flush_remote_tlbs(kvm);
   
         return 0;
   }
@@@ -4587,8 -4921,10 +4916,10 @@@ void kvm_arch_flush_shadow(struct kvm *
   int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
   {
         return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE
-              || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
-              || vcpu->arch.nmi_pending;
+               || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
+               || vcpu->arch.nmi_pending ||
+               (kvm_arch_interrupt_allowed(vcpu) &&
+                kvm_cpu_has_interrupt(vcpu));
   }
   
   void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
@@@ -4612,3 -4948,9 +4943,9 @@@ int kvm_arch_interrupt_allowed(struct k
   {
         return kvm_x86_ops->interrupt_allowed(vcpu);
   }
+ 
+ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
+ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
+ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
+ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
+ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
diff --combined arch/x86/mm/highmem_32.c

index 1617958,572f47c..63a6ba6
--- 1/arch/x86/mm/highmem_32.c
--- 2/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@@ -24,7 -24,7 +24,7 @@@ void kunmap(struct page *page
    * no global lock is needed and because the kmap code must perform a global TLB
    * invalidation when the kmap pool wraps.
    *
- - * However when holding an atomic kmap is is not legal to sleep, so atomic
+ + * However when holding an atomic kmap it is not legal to sleep, so atomic
    * kmaps are appropriate for short, tight code paths only.
    */
   void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
@@@ -104,6 -104,7 +104,7 @@@ EXPORT_SYMBOL(kunmap)
   EXPORT_SYMBOL(kmap_atomic);
   EXPORT_SYMBOL(kunmap_atomic);
   EXPORT_SYMBOL(kmap_atomic_prot);
+ EXPORT_SYMBOL(kmap_atomic_to_page);
   
   void __init set_highmem_pages_init(void)
   {
author	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 15 Sep 2009 00:43:43 +0000 (17:43 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 15 Sep 2009 00:43:43 +0000 (17:43 -0700)
		1	2
Documentation/ioctl/ioctl-number.txt	patch \|	diff1 \|	diff2 \|	blob \| history
Documentation/kernel-parameters.txt	patch \|	diff1 \|	diff2 \|	blob \| history
MAINTAINERS	patch \|	diff1 \|	diff2 \|	blob \| history
arch/s390/include/asm/kvm_host.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/apicdef.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/x86.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/mm/highmem_32.c	patch \|	diff1 \|	diff2 \|	blob \| history