Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 15 Nov 2013 04:51:36 +0000 (13:51 +0900)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 15 Nov 2013 04:51:36 +0000 (13:51 +0900)
Pull KVM changes from Paolo Bonzini:
 "Here are the 3.13 KVM changes.  There was a lot of work on the PPC
  side: the HV and emulation flavors can now coexist in a single kernel
  is probably the most interesting change from a user point of view.

  On the x86 side there are nested virtualization improvements and a few
  bugfixes.

  ARM got transparent huge page support, improved overcommit, and
  support for big endian guests.

  Finally, there is a new interface to connect KVM with VFIO.  This
  helps with devices that use NoSnoop PCI transactions, letting the
  driver in the guest execute WBINVD instructions.  This includes some
  nVidia cards on Windows, that fail to start without these patches and
  the corresponding userspace changes"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (146 commits)
  kvm, vmx: Fix lazy FPU on nested guest
  arm/arm64: KVM: PSCI: propagate caller endianness to the incoming vcpu
  arm/arm64: KVM: MMIO support for BE guest
  kvm, cpuid: Fix sparse warning
  kvm: Delete prototype for non-existent function kvm_check_iopl
  kvm: Delete prototype for non-existent function complete_pio
  hung_task: add method to reset detector
  pvclock: detect watchdog reset at pvclock read
  kvm: optimize out smp_mb after srcu_read_unlock
  srcu: API for barrier after srcu read unlock
  KVM: remove vm mmap method
  KVM: IOMMU: hva align mapping page size
  KVM: x86: trace cpuid emulation when called from emulator
  KVM: emulator: cleanup decode_register_operand() a bit
  KVM: emulator: check rex prefix inside decode_register()
  KVM: x86: fix emulation of "movzbl %bpl, %eax"
  kvm_host: typo fix
  KVM: x86: emulate SAHF instruction
  MAINTAINERS: add tree for kvm.git
  Documentation/kvm: add a 00-INDEX file
  ...

15 files changed:
1  2 
MAINTAINERS
arch/arm/include/asm/pgtable-3level.h
arch/arm/kvm/arm.c
arch/powerpc/include/asm/processor.h
arch/powerpc/include/asm/reg.h
arch/powerpc/kernel/asm-offsets.c
arch/powerpc/kernel/traps.c
arch/powerpc/kvm/Kconfig
arch/powerpc/kvm/book3s_pr.c
arch/powerpc/kvm/booke.c
arch/s390/kvm/interrupt.c
arch/s390/kvm/kvm-s390.c
include/linux/sched.h
kernel/hung_task.c
virt/kvm/kvm_main.c

diff --combined MAINTAINERS
@@@ -253,20 -253,6 +253,20 @@@ F:       drivers/pci/*acpi
  F:    drivers/pci/*/*acpi*
  F:    drivers/pci/*/*/*acpi*
  
 +ACPI COMPONENT ARCHITECTURE (ACPICA)
 +M:    Robert Moore <robert.moore@intel.com>
 +M:    Lv Zheng <lv.zheng@intel.com>
 +M:    Rafael J. Wysocki <rafael.j.wysocki@intel.com>
 +L:    linux-acpi@vger.kernel.org
 +L:    devel@acpica.org
 +W:    https://acpica.org/
 +W:    https://github.com/acpica/acpica/
 +Q:    https://patchwork.kernel.org/project/linux-acpi/list/
 +T:    git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
 +S:    Supported
 +F:    drivers/acpi/acpica/
 +F:    include/acpi/
 +
  ACPI FAN DRIVER
  M:    Zhang Rui <rui.zhang@intel.com>
  L:    linux-acpi@vger.kernel.org
@@@ -777,10 -763,6 +777,10 @@@ W:       http://maxim.org.za/at91_26.htm
  W:    http://www.linux4sam.org
  S:    Supported
  F:    arch/arm/mach-at91/
 +F:    arch/arm/boot/dts/at91*.dts
 +F:    arch/arm/boot/dts/at91*.dtsi
 +F:    arch/arm/boot/dts/sama*.dts
 +F:    arch/arm/boot/dts/sama*.dtsi
  
  ARM/CALXEDA HIGHBANK ARCHITECTURE
  M:    Rob Herring <rob.herring@calxeda.com>
@@@ -947,7 -929,7 +947,7 @@@ M: Javier Martinez Canillas <javier@dow
  L:    linux-omap@vger.kernel.org
  L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  S:    Maintained
 -F:    arch/arm/mach-omap2/board-igep0020.c
 +F:    arch/arm/boot/dts/omap3-igep*
  
  ARM/INCOME PXA270 SUPPORT
  M:    Marek Vasut <marek.vasut@gmail.com>
@@@ -1027,7 -1009,6 +1027,7 @@@ ARM/Marvell Armada 370 and Armada XP SO
  M:    Jason Cooper <jason@lakedaemon.net>
  M:    Andrew Lunn <andrew@lunn.ch>
  M:    Gregory Clement <gregory.clement@free-electrons.com>
 +M:    Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
  L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  S:    Maintained
  F:    arch/arm/mach-mvebu/
  ARM/Marvell Dove/Kirkwood/MV78xx0/Orion SOC support
  M:    Jason Cooper <jason@lakedaemon.net>
  M:    Andrew Lunn <andrew@lunn.ch>
 +M:    Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
  L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  S:    Maintained
  F:    arch/arm/mach-dove/
@@@ -1168,12 -1148,10 +1168,12 @@@ F:   drivers/net/ethernet/i825xx/ether1
  F:    drivers/net/ethernet/seeq/ether3*
  F:    drivers/scsi/arm/
  
 -ARM/SHARK MACHINE SUPPORT
 -M:    Alexander Schulz <alex@shark-linux.de>
 -W:    http://www.shark-linux.de/shark.html
 +ARM/Rockchip SoC support
 +M:    Heiko Stuebner <heiko@sntech.de>
 +L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  S:    Maintained
 +F:    arch/arm/mach-rockchip/
 +F:    drivers/*/*rockchip*
  
  ARM/SAMSUNG ARM ARCHITECTURES
  M:    Ben Dooks <ben-linux@fluff.org>
@@@ -1182,8 -1160,6 +1182,8 @@@ L:      linux-arm-kernel@lists.infradead.or
  L:    linux-samsung-soc@vger.kernel.org (moderated for non-subscribers)
  W:    http://www.fluff.org/ben/linux/
  S:    Maintained
 +F:    arch/arm/boot/dts/s3c*
 +F:    arch/arm/boot/dts/exynos*
  F:    arch/arm/plat-samsung/
  F:    arch/arm/mach-s3c24*/
  F:    arch/arm/mach-s3c64xx/
@@@ -1675,15 -1651,16 +1675,15 @@@ S:   Maintaine
  F:    drivers/net/wireless/b43legacy/
  
  BACKLIGHT CLASS/SUBSYSTEM
 -M:    Richard Purdie <rpurdie@rpsys.net>
  M:    Jingoo Han <jg1.han@samsung.com>
  S:    Maintained
  F:    drivers/video/backlight/
  F:    include/linux/backlight.h
  
  BATMAN ADVANCED
 -M:    Marek Lindner <lindner_marek@yahoo.de>
 -M:    Simon Wunderlich <siwu@hrz.tu-chemnitz.de>
 -M:    Antonio Quartulli <ordex@autistici.org>
 +M:    Marek Lindner <mareklindner@neomailbox.ch>
 +M:    Simon Wunderlich <sw@simonwunderlich.de>
 +M:    Antonio Quartulli <antonio@meshcoding.com>
  L:    b.a.t.m.a.n@lists.open-mesh.org
  W:    http://www.open-mesh.org/
  S:    Maintained
@@@ -1814,7 -1791,6 +1814,7 @@@ F:      include/net/bluetooth
  
  BONDING DRIVER
  M:    Jay Vosburgh <fubar@us.ibm.com>
 +M:    Veaceslav Falico <vfalico@redhat.com>
  M:    Andy Gospodarek <andy@greyhouse.net>
  L:    netdev@vger.kernel.org
  W:    http://sourceforge.net/projects/bonding/
@@@ -1836,7 -1812,7 +1836,7 @@@ F:      drivers/net/ethernet/broadcom/bnx2.
  F:    drivers/net/ethernet/broadcom/bnx2_*
  
  BROADCOM BNX2X 10 GIGABIT ETHERNET DRIVER
 -M:    Eilon Greenstein <eilong@broadcom.com>
 +M:    Ariel Elior <ariele@broadcom.com>
  L:    netdev@vger.kernel.org
  S:    Supported
  F:    drivers/net/ethernet/broadcom/bnx2x/
@@@ -1881,7 -1857,7 +1881,7 @@@ S:      Supporte
  F:    drivers/net/wireless/brcm80211/
  
  BROADCOM BNX2FC 10 GIGABIT FCOE DRIVER
 -M:    Bhanu Prakash Gollapudi <bprakash@broadcom.com>
 +M:    Eddie Wai <eddie.wai@broadcom.com>
  L:    linux-scsi@vger.kernel.org
  S:    Supported
  F:    drivers/scsi/bnx2fc/
@@@ -2386,7 -2362,7 +2386,7 @@@ F:      kernel/cpuset.
  
  CRAMFS FILESYSTEM
  W:    http://sourceforge.net/projects/cramfs/
 -S:    Orphan
 +S:    Orphan / Obsolete
  F:    Documentation/filesystems/cramfs.txt
  F:    fs/cramfs/
  
@@@ -2661,7 -2637,6 +2661,7 @@@ M:      dm-devel@redhat.co
  L:    dm-devel@redhat.com
  W:    http://sources.redhat.com/dm
  Q:    http://patchwork.kernel.org/project/dm-devel/list/
 +T:    git git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm.git
  T:    quilt http://people.redhat.com/agk/patches/linux/editing/
  S:    Maintained
  F:    Documentation/device-mapper/
@@@ -2743,8 -2718,6 +2743,8 @@@ T:      git git://git.linaro.org/people/sumi
  DMA GENERIC OFFLOAD ENGINE SUBSYSTEM
  M:    Vinod Koul <vinod.koul@intel.com>
  M:    Dan Williams <dan.j.williams@intel.com>
 +L:    dmaengine@vger.kernel.org
 +Q:    https://patchwork.kernel.org/project/linux-dmaengine/list/
  S:    Supported
  F:    drivers/dma/
  F:    include/linux/dma*
@@@ -2848,7 -2821,7 +2848,7 @@@ M:      Terje Bergström <tbergstrom@nvidia.
  L:    dri-devel@lists.freedesktop.org
  L:    linux-tegra@vger.kernel.org
  T:    git git://anongit.freedesktop.org/tegra/linux.git
 -S:    Maintained
 +S:    Supported
  F:    drivers/gpu/host1x/
  F:    include/uapi/drm/tegra_drm.h
  F:    Documentation/devicetree/bindings/gpu/nvidia,tegra20-host1x.txt
@@@ -3651,12 -3624,6 +3651,12 @@@ L:    linux-scsi@vger.kernel.or
  S:    Odd Fixes (e.g., new signatures)
  F:    drivers/scsi/fdomain.*
  
 +GCOV BASED KERNEL PROFILING
 +M:    Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
 +S:    Maintained
 +F:    kernel/gcov/
 +F:    Documentation/gcov.txt
 +
  GDT SCSI DISK ARRAY CONTROLLER DRIVER
  M:    Achim Leubner <achim_leubner@adaptec.com>
  L:    linux-scsi@vger.kernel.org
@@@ -3706,14 -3673,6 +3706,14 @@@ S:    Maintaine
  F:    include/asm-generic/
  F:    include/uapi/asm-generic/
  
 +GENERIC PHY FRAMEWORK
 +M:    Kishon Vijay Abraham I <kishon@ti.com>
 +L:    linux-kernel@vger.kernel.org
 +T:    git git://git.kernel.org/pub/scm/linux/kernel/git/kishon/linux-phy.git
 +S:    Supported
 +F:    drivers/phy/
 +F:    include/linux/phy/
 +
  GENERIC UIO DRIVER FOR PCI DEVICES
  M:    "Michael S. Tsirkin" <mst@redhat.com>
  L:    kvm@vger.kernel.org
@@@ -4255,7 -4214,7 +4255,7 @@@ S:      Maintaine
  F:    drivers/media/rc/iguanair.c
  
  IIO SUBSYSTEM AND DRIVERS
 -M:    Jonathan Cameron <jic23@cam.ac.uk>
 +M:    Jonathan Cameron <jic23@kernel.org>
  L:    linux-iio@vger.kernel.org
  S:    Maintained
  F:    drivers/iio/
@@@ -4392,10 -4351,7 +4392,10 @@@ F:    arch/x86/kernel/microcode_intel.
  
  INTEL I/OAT DMA DRIVER
  M:    Dan Williams <dan.j.williams@intel.com>
 -S:    Maintained
 +M:    Dave Jiang <dave.jiang@intel.com>
 +L:    dmaengine@vger.kernel.org
 +Q:    https://patchwork.kernel.org/project/linux-dmaengine/list/
 +S:    Supported
  F:    drivers/dma/ioat*
  
  INTEL IOMMU (VT-d)
@@@ -4454,12 -4410,6 +4454,12 @@@ F:    Documentation/networking/ixgbevf.tx
  F:    Documentation/networking/i40e.txt
  F:    drivers/net/ethernet/intel/
  
 +INTEL-MID GPIO DRIVER
 +M:    David Cohen <david.a.cohen@linux.intel.com>
 +L:    linux-gpio@vger.kernel.org
 +S:    Maintained
 +F:    drivers/gpio/gpio-intel-mid.c
 +
  INTEL PRO/WIRELESS 2100, 2200BG, 2915ABG NETWORK CONNECTION SUPPORT
  M:    Stanislav Yakovlev <stas.yakovlev@gmail.com>
  L:    linux-wireless@vger.kernel.org
@@@ -4798,13 -4748,6 +4798,13 @@@ S:    Maintaine
  F:    Documentation/hwmon/k8temp
  F:    drivers/hwmon/k8temp.c
  
 +KTAP
 +M:    Jovi Zhangwei <jovi.zhangwei@gmail.com>
 +W:    http://www.ktap.org
 +L:    ktap@freelists.org
 +S:    Maintained
 +F:    drivers/staging/ktap/
 +
  KCONFIG
  M:    Michal Marek <mmarek@suse.cz>
  L:    linux-kbuild@vger.kernel.org
@@@ -4871,7 -4814,8 +4871,8 @@@ KERNEL VIRTUAL MACHINE (KVM
  M:    Gleb Natapov <gleb@redhat.com>
  M:    Paolo Bonzini <pbonzini@redhat.com>
  L:    kvm@vger.kernel.org
- W:    http://linux-kvm.org
+ W:    http://www.linux-kvm.org
+ T:    git git://git.kernel.org/pub/scm/virt/kvm/kvm.git
  S:    Supported
  F:    Documentation/*/kvm*.txt
  F:    Documentation/virtual/kvm/
@@@ -5389,7 -5333,7 +5390,7 @@@ S:      Orpha
  F:    drivers/net/wireless/libertas/
  
  MARVELL MV643XX ETHERNET DRIVER
 -M:    Lennert Buytenhek <buytenh@wantstofly.org>
 +M:    Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
  L:    netdev@vger.kernel.org
  S:    Maintained
  F:    drivers/net/ethernet/marvell/mv643xx_eth.*
@@@ -6145,12 -6089,6 +6146,12 @@@ L:    linux-omap@vger.kernel.or
  S:    Maintained
  F:    drivers/gpio/gpio-omap.c
  
 +OMAP/NEWFLOW NANOBONE MACHINE SUPPORT
 +M:    Mark Jackson <mpfj@newflow.co.uk>
 +L:    linux-omap@vger.kernel.org
 +S:    Maintained
 +F:    arch/arm/boot/dts/am335x-nano.dts
 +
  OMFS FILESYSTEM
  M:    Bob Copeland <me@bobcopeland.com>
  L:    linux-karma-devel@lists.sourceforge.net
@@@ -6427,7 -6365,6 +6428,7 @@@ S:      Supporte
  F:    Documentation/PCI/
  F:    drivers/pci/
  F:    include/linux/pci*
 +F:    arch/x86/pci/
  
  PCI DRIVER FOR NVIDIA TEGRA
  M:    Thierry Reding <thierry.reding@gmail.com>
@@@ -6436,12 -6373,6 +6437,12 @@@ S:    Supporte
  F:    Documentation/devicetree/bindings/pci/nvidia,tegra20-pcie.txt
  F:    drivers/pci/host/pci-tegra.c
  
 +PCI DRIVER FOR SAMSUNG EXYNOS
 +M:    Jingoo Han <jg1.han@samsung.com>
 +L:    linux-pci@vger.kernel.org
 +S:    Maintained
 +F:    drivers/pci/host/pci-exynos.c
 +
  PCMCIA SUBSYSTEM
  P:    Linux PCMCIA Team
  L:    linux-pcmcia@lists.infradead.org
@@@ -6912,14 -6843,6 +6913,14 @@@ L:    linux-hexagon@vger.kernel.or
  S:    Supported
  F:    arch/hexagon/
  
 +QUALCOMM WCN36XX WIRELESS DRIVER
 +M:    Eugene Krasnikov <k.eugene.e@gmail.com>
 +L:    wcn36xx@lists.infradead.org
 +W:    http://wireless.kernel.org/en/users/Drivers/wcn36xx
 +T:    git git://github.com/KrasnikovEugene/wcn36xx.git
 +S:    Supported
 +F:    drivers/net/wireless/ath/wcn36xx/
 +
  QUICKCAM PARALLEL PORT WEBCAMS
  M:    Hans Verkuil <hverkuil@xs4all.nl>
  L:    linux-media@vger.kernel.org
@@@ -7007,7 -6930,7 +7008,7 @@@ M:      "Paul E. McKenney" <paulmck@linux.vn
  S:    Supported
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git
  F:    Documentation/RCU/torture.txt
 -F:    kernel/rcutorture.c
 +F:    kernel/rcu/torture.c
  
  RDC R-321X SoC
  M:    Florian Fainelli <florian@openwrt.org>
@@@ -7034,9 -6957,8 +7035,9 @@@ T:      git git://git.kernel.org/pub/scm/lin
  F:    Documentation/RCU/
  X:    Documentation/RCU/torture.txt
  F:    include/linux/rcu*
 -F:    kernel/rcu*
 -X:    kernel/rcutorture.c
 +X:    include/linux/srcu.h
 +F:    kernel/rcu/
 +X:    kernel/rcu/torture.c
  
  REAL TIME CLOCK (RTC) SUBSYSTEM
  M:    Alessandro Zummo <a.zummo@towertech.it>
@@@ -7349,7 -7271,7 +7350,7 @@@ S:      Odd Fixe
  F:    drivers/media/usb/tlg2300/
  
  SC1200 WDT DRIVER
 -M:    Zwane Mwaikambo <zwane@arm.linux.org.uk>
 +M:    Zwane Mwaikambo <zwanem@gmail.com>
  S:    Maintained
  F:    drivers/watchdog/sc1200wdt.c
  
@@@ -7361,8 -7283,6 +7362,8 @@@ S:      Maintaine
  F:    kernel/sched/
  F:    include/linux/sched.h
  F:    include/uapi/linux/sched.h
 +F:    kernel/wait.c
 +F:    include/linux/wait.h
  
  SCORE ARCHITECTURE
  M:    Chen Liqin <liqin.linux@gmail.com>
@@@ -7725,8 -7645,8 +7726,8 @@@ M:      "Paul E. McKenney" <paulmck@linux.vn
  W:    http://www.rdrop.com/users/paulmck/RCU/
  S:    Supported
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git
 -F:    include/linux/srcu*
 -F:    kernel/srcu*
 +F:    include/linux/srcu.h
 +F:    kernel/rcu/srcu.c
  
  SMACK SECURITY MODULE
  M:    Casey Schaufler <casey@schaufler-ca.com>
@@@ -7897,13 -7817,6 +7898,13 @@@ F:    Documentation/sound/alsa/soc
  F:    sound/soc/
  F:    include/sound/soc*
  
 +SOUND - DMAENGINE HELPERS
 +M:    Lars-Peter Clausen <lars@metafoo.de>
 +S:    Supported
 +F:    include/sound/dmaengine_pcm.h
 +F:    sound/core/pcm_dmaengine.c
 +F:    sound/soc/soc-generic-dmaengine-pcm.c
 +
  SPARC + UltraSPARC (sparc/sparc64)
  M:    "David S. Miller" <davem@davemloft.net>
  L:    sparclinux@vger.kernel.org
@@@ -8061,7 -7974,7 +8062,7 @@@ S:      Maintaine
  F:    drivers/staging/media/go7007/
  
  STAGING - INDUSTRIAL IO
 -M:    Jonathan Cameron <jic23@cam.ac.uk>
 +M:    Jonathan Cameron <jic23@kernel.org>
  L:    linux-iio@vger.kernel.org
  S:    Odd Fixes
  F:    drivers/staging/iio/
@@@ -8376,72 -8289,14 +8377,72 @@@ L:   linux-media@vger.kernel.or
  S:    Maintained
  F:    drivers/media/rc/ttusbir.c
  
 -TEGRA SUPPORT
 +TEGRA ARCHITECTURE SUPPORT
  M:    Stephen Warren <swarren@wwwdotorg.org>
 +M:    Thierry Reding <thierry.reding@gmail.com>
  L:    linux-tegra@vger.kernel.org
  Q:    http://patchwork.ozlabs.org/project/linux-tegra/list/
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/swarren/linux-tegra.git
  S:    Supported
  N:    [^a-z]tegra
  
 +TEGRA ASOC DRIVER
 +M:    Stephen Warren <swarren@wwwdotorg.org>
 +S:    Supported
 +F:    sound/soc/tegra/
 +
 +TEGRA CLOCK DRIVER
 +M:    Peter De Schrijver <pdeschrijver@nvidia.com>
 +M:    Prashant Gaikwad <pgaikwad@nvidia.com>
 +S:    Supported
 +F:    drivers/clk/tegra/
 +
 +TEGRA DMA DRIVER
 +M:    Laxman Dewangan <ldewangan@nvidia.com>
 +S:    Supported
 +F:    drivers/dma/tegra20-apb-dma.c
 +
 +TEGRA GPIO DRIVER
 +M:    Stephen Warren <swarren@wwwdotorg.org>
 +S:    Supported
 +F:    drivers/gpio/gpio-tegra.c
 +
 +TEGRA I2C DRIVER
 +M:    Laxman Dewangan <ldewangan@nvidia.com>
 +S:    Supported
 +F:    drivers/i2c/busses/i2c-tegra.c
 +
 +TEGRA IOMMU DRIVERS
 +M:    Hiroshi Doyu <hdoyu@nvidia.com>
 +S:    Supported
 +F:    drivers/iommu/tegra*
 +
 +TEGRA KBC DRIVER
 +M:    Rakesh Iyer <riyer@nvidia.com>
 +M:    Laxman Dewangan <ldewangan@nvidia.com>
 +S:    Supported
 +F:    drivers/input/keyboard/tegra-kbc.c
 +
 +TEGRA PINCTRL DRIVER
 +M:    Stephen Warren <swarren@wwwdotorg.org>
 +S:    Supported
 +F:    drivers/pinctrl/pinctrl-tegra*
 +
 +TEGRA PWM DRIVER
 +M:    Thierry Reding <thierry.reding@gmail.com>
 +S:    Supported
 +F:    drivers/pwm/pwm-tegra.c
 +
 +TEGRA SERIAL DRIVER
 +M:    Laxman Dewangan <ldewangan@nvidia.com>
 +S:    Supported
 +F:    drivers/tty/serial/serial-tegra.c
 +
 +TEGRA SPI DRIVER
 +M:    Laxman Dewangan <ldewangan@nvidia.com>
 +S:    Supported
 +F:    drivers/spi/spi-tegra*
 +
  TEHUTI ETHERNET DRIVER
  M:    Andy Gospodarek <andy@greyhouse.net>
  L:    netdev@vger.kernel.org
@@@ -8742,6 -8597,14 +8743,6 @@@ S:     Maintaine
  F:    arch/m68k/*/*_no.*
  F:    arch/m68k/include/asm/*_no.*
  
 -UCLINUX FOR RENESAS H8/300 (H8300)
 -M:    Yoshinori Sato <ysato@users.sourceforge.jp>
 -W:    http://uclinux-h8.sourceforge.jp/
 -S:    Supported
 -F:    arch/h8300/
 -F:    drivers/ide/ide-h8300.c
 -F:    drivers/net/ethernet/8390/ne-h8300.c
 -
  UDF FILESYSTEM
  M:    Jan Kara <jack@suse.cz>
  S:    Maintained
@@@ -8969,14 -8832,61 +8970,14 @@@ W:   http://pegasus2.sourceforge.net
  S:    Maintained
  F:    drivers/net/usb/rtl8150.c
  
 -USB SERIAL BELKIN F5U103 DRIVER
 -M:    William Greathouse <wgreathouse@smva.com>
 -L:    linux-usb@vger.kernel.org
 -S:    Maintained
 -F:    drivers/usb/serial/belkin_sa.*
 -
 -USB SERIAL CYPRESS M8 DRIVER
 -M:    Lonnie Mendez <dignome@gmail.com>
 -L:    linux-usb@vger.kernel.org
 -S:    Maintained
 -W:    http://geocities.com/i0xox0i
 -W:    http://firstlight.net/cvs
 -F:    drivers/usb/serial/cypress_m8.*
 -
 -USB SERIAL CYBERJACK DRIVER
 -M:    Matthias Bruestle and Harald Welte <support@reiner-sct.com>
 -W:    http://www.reiner-sct.de/support/treiber_cyberjack.php
 -S:    Maintained
 -F:    drivers/usb/serial/cyberjack.c
 -
 -USB SERIAL DIGI ACCELEPORT DRIVER
 -M:    Peter Berger <pberger@brimson.com>
 -M:    Al Borchers <alborchers@steinerpoint.com>
 +USB SERIAL SUBSYSTEM
 +M:    Johan Hovold <jhovold@gmail.com>
  L:    linux-usb@vger.kernel.org
  S:    Maintained
 -F:    drivers/usb/serial/digi_acceleport.c
 -
 -USB SERIAL DRIVER
 -M:    Greg Kroah-Hartman <gregkh@linuxfoundation.org>
 -L:    linux-usb@vger.kernel.org
 -S:    Supported
  F:    Documentation/usb/usb-serial.txt
 -F:    drivers/usb/serial/generic.c
 -F:    drivers/usb/serial/usb-serial.c
 +F:    drivers/usb/serial/
  F:    include/linux/usb/serial.h
  
 -USB SERIAL EMPEG EMPEG-CAR MARK I/II DRIVER
 -M:    Gary Brubaker <xavyer@ix.netcom.com>
 -L:    linux-usb@vger.kernel.org
 -S:    Maintained
 -F:    drivers/usb/serial/empeg.c
 -
 -USB SERIAL KEYSPAN DRIVER
 -M:    Greg Kroah-Hartman <gregkh@linuxfoundation.org>
 -L:    linux-usb@vger.kernel.org
 -S:    Maintained
 -F:    drivers/usb/serial/*keyspan*
 -
 -USB SERIAL WHITEHEAT DRIVER
 -M:    Support Department <support@connecttech.com>
 -L:    linux-usb@vger.kernel.org
 -W:    http://www.connecttech.com
 -S:    Supported
 -F:    drivers/usb/serial/whiteheat*
 -
  USB SMSC75XX ETHERNET DRIVER
  M:    Steve Glendinning <steve.glendinning@shawell.net>
  L:    netdev@vger.kernel.org
  #define L_PTE_S2_RDONLY                (_AT(pteval_t, 1) << 6)   /* HAP[1]   */
  #define L_PTE_S2_RDWR          (_AT(pteval_t, 3) << 6)   /* HAP[2:1] */
  
+ #define L_PMD_S2_RDWR          (_AT(pmdval_t, 3) << 6)   /* HAP[2:1] */
  /*
   * Hyp-mode PL2 PTE definitions for LPAE.
   */
@@@ -206,9 -208,6 +208,9 @@@ static inline pmd_t *pmd_offset(pud_t *
  #define __HAVE_ARCH_PMD_WRITE
  #define pmd_write(pmd)                (!(pmd_val(pmd) & PMD_SECT_RDONLY))
  
 +#define pmd_hugewillfault(pmd)        (!pmd_young(pmd) || !pmd_write(pmd))
 +#define pmd_thp_or_huge(pmd)  (pmd_huge(pmd) || pmd_trans_huge(pmd))
 +
  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  #define pmd_trans_huge(pmd)   (pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT))
  #define pmd_trans_splitting(pmd) (pmd_val(pmd) & PMD_SECT_SPLITTING)
diff --combined arch/arm/kvm/arm.c
@@@ -65,7 -65,7 +65,7 @@@ static bool vgic_present
  static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
  {
        BUG_ON(preemptible());
 -      __get_cpu_var(kvm_arm_running_vcpu) = vcpu;
 +      __this_cpu_write(kvm_arm_running_vcpu, vcpu);
  }
  
  /**
@@@ -75,7 -75,7 +75,7 @@@
  struct kvm_vcpu *kvm_arm_get_running_vcpu(void)
  {
        BUG_ON(preemptible());
 -      return __get_cpu_var(kvm_arm_running_vcpu);
 +      return __this_cpu_read(kvm_arm_running_vcpu);
  }
  
  /**
@@@ -152,12 -152,13 +152,13 @@@ int kvm_arch_vcpu_fault(struct kvm_vcp
        return VM_FAULT_SIGBUS;
  }
  
- void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
                           struct kvm_memory_slot *dont)
  {
  }
  
- int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+                           unsigned long npages)
  {
        return 0;
  }
@@@ -797,6 -798,19 +798,19 @@@ long kvm_arch_vm_ioctl(struct file *fil
                        return -EFAULT;
                return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr);
        }
+       case KVM_ARM_PREFERRED_TARGET: {
+               int err;
+               struct kvm_vcpu_init init;
+               err = kvm_vcpu_preferred_target(&init);
+               if (err)
+                       return err;
+               if (copy_to_user(argp, &init, sizeof(init)))
+                       return -EFAULT;
+               return 0;
+       }
        default:
                return -EINVAL;
        }
@@@ -815,7 -829,7 +829,7 @@@ static void cpu_init_hyp_mode(void *dum
  
        boot_pgd_ptr = kvm_mmu_get_boot_httbr();
        pgd_ptr = kvm_mmu_get_httbr();
 -      stack_page = __get_cpu_var(kvm_arm_hyp_stack_page);
 +      stack_page = __this_cpu_read(kvm_arm_hyp_stack_page);
        hyp_stack_ptr = stack_page + PAGE_SIZE;
        vector_ptr = (unsigned long)__kvm_hyp_vector;
  
  
  #ifdef CONFIG_VSX
  #define TS_FPRWIDTH 2
 +
 +#ifdef __BIG_ENDIAN__
 +#define TS_FPROFFSET 0
 +#define TS_VSRLOWOFFSET 1
 +#else
 +#define TS_FPROFFSET 1
 +#define TS_VSRLOWOFFSET 0
 +#endif
 +
  #else
  #define TS_FPRWIDTH 1
 +#define TS_FPROFFSET 0
  #endif
  
  #ifdef CONFIG_PPC64
@@@ -152,20 -142,10 +152,20 @@@ typedef struct 
        unsigned long seg;
  } mm_segment_t;
  
 -#define TS_FPROFFSET 0
 -#define TS_VSRLOWOFFSET 1
 -#define TS_FPR(i) fpr[i][TS_FPROFFSET]
 -#define TS_TRANS_FPR(i) transact_fpr[i][TS_FPROFFSET]
 +#define TS_FPR(i) fp_state.fpr[i][TS_FPROFFSET]
 +#define TS_TRANS_FPR(i) transact_fp.fpr[i][TS_FPROFFSET]
 +
 +/* FP and VSX 0-31 register set */
 +struct thread_fp_state {
 +      u64     fpr[32][TS_FPRWIDTH] __attribute__((aligned(16)));
 +      u64     fpscr;          /* Floating point status */
 +};
 +
 +/* Complete AltiVec register set including VSCR */
 +struct thread_vr_state {
 +      vector128       vr[32] __attribute__((aligned(16)));
 +      vector128       vscr __attribute__((aligned(16)));
 +};
  
  struct debug_reg {
  #ifdef CONFIG_PPC_ADV_DEBUG_REGS
  
  struct thread_struct {
        unsigned long   ksp;            /* Kernel stack pointer */
  #ifdef CONFIG_PPC64
        unsigned long   ksp_vsid;
  #endif
        void            *pgdir;         /* root of page-table tree */
        unsigned long   ksp_limit;      /* if ksp <= ksp_limit stack overflow */
  #endif
+       /* Debug Registers */
        struct debug_reg debug;
 -
 -      /* FP and VSX 0-31 register set */
 -      double          fpr[32][TS_FPRWIDTH] __attribute__((aligned(16)));
 -      struct {
 -
 -              unsigned int pad;
 -              unsigned int val;       /* Floating point status */
 -      } fpscr;
 +      struct thread_fp_state  fp_state;
 +      struct thread_fp_state  *fp_save_area;
        int             fpexc_mode;     /* floating-point exception mode */
        unsigned int    align_ctl;      /* alignment handling control */
  #ifdef CONFIG_PPC64
        struct arch_hw_breakpoint hw_brk; /* info on the hardware breakpoint */
        unsigned long   trap_nr;        /* last trap # on this thread */
  #ifdef CONFIG_ALTIVEC
 -      /* Complete AltiVec register set */
 -      vector128       vr[32] __attribute__((aligned(16)));
 -      /* AltiVec status */
 -      vector128       vscr __attribute__((aligned(16)));
 +      struct thread_vr_state vr_state;
 +      struct thread_vr_state *vr_save_area;
        unsigned long   vrsave;
        int             used_vr;        /* set if process has used altivec */
  #endif /* CONFIG_ALTIVEC */
         * transact_fpr[] is the new set of transactional values.
         * VRs work the same way.
         */
 -      double          transact_fpr[32][TS_FPRWIDTH];
 -      struct {
 -              unsigned int pad;
 -              unsigned int val;       /* Floating point status */
 -      } transact_fpscr;
 -      vector128       transact_vr[32] __attribute__((aligned(16)));
 -      vector128       transact_vscr __attribute__((aligned(16)));
 +      struct thread_fp_state transact_fp;
 +      struct thread_vr_state transact_vr;
        unsigned long   transact_vrsave;
  #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
  #ifdef CONFIG_KVM_BOOK3S_32_HANDLER
        .ksp = INIT_SP, \
        .regs = (struct pt_regs *)INIT_SP - 1, /* XXX bogus, I think */ \
        .fs = KERNEL_DS, \
 -      .fpr = {{0}}, \
 -      .fpscr = { .val = 0, }, \
        .fpexc_mode = 0, \
        .ppr = INIT_PPR, \
  }
@@@ -371,11 -368,6 +373,11 @@@ extern int set_endian(struct task_struc
  extern int get_unalign_ctl(struct task_struct *tsk, unsigned long adr);
  extern int set_unalign_ctl(struct task_struct *tsk, unsigned int val);
  
 +extern void load_fp_state(struct thread_fp_state *fp);
 +extern void store_fp_state(struct thread_fp_state *fp);
 +extern void load_vr_state(struct thread_vr_state *vr);
 +extern void store_vr_state(struct thread_vr_state *vr);
 +
  static inline unsigned int __unpack_fe01(unsigned long msr_bits)
  {
        return ((msr_bits & MSR_FE0) >> 10) | ((msr_bits & MSR_FE1) >> 8);
  #define MSR_64BIT     MSR_SF
  
  /* Server variant */
 -#define MSR_          (MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_ISF |MSR_HV)
 +#define __MSR         (MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_ISF |MSR_HV)
 +#ifdef __BIG_ENDIAN__
 +#define MSR_          __MSR
 +#else
 +#define MSR_          (__MSR | MSR_LE)
 +#endif
  #define MSR_KERNEL    (MSR_ | MSR_64BIT)
  #define MSR_USER32    (MSR_ | MSR_PR | MSR_EE)
  #define MSR_USER64    (MSR_USER32 | MSR_64BIT)
  #define SPRN_TBRU     0x10D   /* Time Base Read Upper Register (user, R/O) */
  #define SPRN_TBWL     0x11C   /* Time Base Lower Register (super, R/W) */
  #define SPRN_TBWU     0x11D   /* Time Base Upper Register (super, R/W) */
+ #define SPRN_TBU40    0x11E   /* Timebase upper 40 bits (hyper, R/W) */
  #define SPRN_SPURR    0x134   /* Scaled PURR */
  #define SPRN_HSPRG0   0x130   /* Hypervisor Scratch 0 */
  #define SPRN_HSPRG1   0x131   /* Hypervisor Scratch 1 */
  #define   LPCR_ISL    (1ul << (63-2))
  #define   LPCR_VC_SH  (63-2)
  #define   LPCR_DPFD_SH        (63-11)
+ #define   LPCR_DPFD   (7ul << LPCR_DPFD_SH)
  #define   LPCR_VRMASD (0x1ful << (63-16))
  #define   LPCR_VRMA_L (1ul << (63-12))
  #define   LPCR_VRMA_LP0       (1ul << (63-15))
  #define     LPCR_PECE2        0x00001000      /* machine check etc can cause exit */
  #define   LPCR_MER    0x00000800      /* Mediated External Exception */
  #define   LPCR_MER_SH 11
+ #define   LPCR_TC      0x00000200     /* Translation control */
  #define   LPCR_LPES    0x0000000c
  #define   LPCR_LPES0   0x00000008      /* LPAR Env selector 0 */
  #define   LPCR_LPES1   0x00000004      /* LPAR Env selector 1 */
  #define   LPID_RSVD   0x3ff           /* Reserved LPID for partn switching */
  #define       SPRN_HMER       0x150   /* Hardware m? error recovery */
  #define       SPRN_HMEER      0x151   /* Hardware m? enable error recovery */
+ #define SPRN_PCR      0x152   /* Processor compatibility register */
+ #define   PCR_VEC_DIS (1ul << (63-0)) /* Vec. disable (bit NA since POWER8) */
+ #define   PCR_VSX_DIS (1ul << (63-1)) /* VSX disable (bit NA since POWER8) */
+ #define   PCR_ARCH_205        0x2             /* Architecture 2.05 */
  #define       SPRN_HEIR       0x153   /* Hypervisor Emulated Instruction Register */
  #define SPRN_TLBINDEXR        0x154   /* P7 TLB control register */
  #define SPRN_TLBVPNR  0x155   /* P7 TLB control register */
  #define        HID4_RMLS2_SH   (63 - 2)       /* Real mode limit bottom 2 bits */
  #define        HID4_LPID5_SH   (63 - 6)       /* partition ID bottom 4 bits */
  #define        HID4_RMOR_SH    (63 - 22)      /* real mode offset (16 bits) */
+ #define  HID4_RMOR     (0xFFFFul << HID4_RMOR_SH)
  #define  HID4_LPES1    (1 << (63-57)) /* LPAR env. sel. bit 1 */
  #define  HID4_RMLS0_SH         (63 - 58)      /* Real mode limit top bit */
  #define        HID4_LPID1_SH   0              /* partition ID top 2 bits */
  #define PVR_BE                0x0070
  #define PVR_PA6T      0x0090
  
+ /* "Logical" PVR values defined in PAPR, representing architecture levels */
+ #define PVR_ARCH_204  0x0f000001
+ #define PVR_ARCH_205  0x0f000002
+ #define PVR_ARCH_206  0x0f000003
+ #define PVR_ARCH_206p 0x0f100003
+ #define PVR_ARCH_207  0x0f000004
  /* Macros for setting and retrieving special purpose registers */
  #ifndef __ASSEMBLY__
  #define mfmsr()               ({unsigned long rval; \
@@@ -90,17 -90,16 +90,17 @@@ int main(void
        DEFINE(THREAD_NORMSAVES, offsetof(struct thread_struct, normsave[0]));
  #endif
        DEFINE(THREAD_FPEXC_MODE, offsetof(struct thread_struct, fpexc_mode));
 -      DEFINE(THREAD_FPR0, offsetof(struct thread_struct, fpr[0]));
 -      DEFINE(THREAD_FPSCR, offsetof(struct thread_struct, fpscr));
 +      DEFINE(THREAD_FPSTATE, offsetof(struct thread_struct, fp_state));
 +      DEFINE(THREAD_FPSAVEAREA, offsetof(struct thread_struct, fp_save_area));
 +      DEFINE(FPSTATE_FPSCR, offsetof(struct thread_fp_state, fpscr));
  #ifdef CONFIG_ALTIVEC
 -      DEFINE(THREAD_VR0, offsetof(struct thread_struct, vr[0]));
 +      DEFINE(THREAD_VRSTATE, offsetof(struct thread_struct, vr_state));
 +      DEFINE(THREAD_VRSAVEAREA, offsetof(struct thread_struct, vr_save_area));
        DEFINE(THREAD_VRSAVE, offsetof(struct thread_struct, vrsave));
 -      DEFINE(THREAD_VSCR, offsetof(struct thread_struct, vscr));
        DEFINE(THREAD_USED_VR, offsetof(struct thread_struct, used_vr));
 +      DEFINE(VRSTATE_VSCR, offsetof(struct thread_vr_state, vscr));
  #endif /* CONFIG_ALTIVEC */
  #ifdef CONFIG_VSX
 -      DEFINE(THREAD_VSR0, offsetof(struct thread_struct, fpr));
        DEFINE(THREAD_USED_VSR, offsetof(struct thread_struct, used_vsr));
  #endif /* CONFIG_VSX */
  #ifdef CONFIG_PPC64
        DEFINE(THREAD_TM_PPR, offsetof(struct thread_struct, tm_ppr));
        DEFINE(THREAD_TM_DSCR, offsetof(struct thread_struct, tm_dscr));
        DEFINE(PT_CKPT_REGS, offsetof(struct thread_struct, ckpt_regs));
 -      DEFINE(THREAD_TRANSACT_VR0, offsetof(struct thread_struct,
 -                                       transact_vr[0]));
 -      DEFINE(THREAD_TRANSACT_VSCR, offsetof(struct thread_struct,
 -                                        transact_vscr));
 +      DEFINE(THREAD_TRANSACT_VRSTATE, offsetof(struct thread_struct,
 +                                               transact_vr));
        DEFINE(THREAD_TRANSACT_VRSAVE, offsetof(struct thread_struct,
                                            transact_vrsave));
 -      DEFINE(THREAD_TRANSACT_FPR0, offsetof(struct thread_struct,
 -                                        transact_fpr[0]));
 -      DEFINE(THREAD_TRANSACT_FPSCR, offsetof(struct thread_struct,
 -                                         transact_fpscr));
 -#ifdef CONFIG_VSX
 -      DEFINE(THREAD_TRANSACT_VSR0, offsetof(struct thread_struct,
 -                                        transact_fpr[0]));
 -#endif
 +      DEFINE(THREAD_TRANSACT_FPSTATE, offsetof(struct thread_struct,
 +                                               transact_fp));
        /* Local pt_regs on stack for Transactional Memory funcs. */
        DEFINE(TM_FRAME_SIZE, STACK_FRAME_OVERHEAD +
               sizeof(struct pt_regs) + 16);
        DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
        DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
        DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc));
- #ifdef CONFIG_KVM_BOOK3S_64_HV
+ #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
        DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.shregs.msr));
        DEFINE(VCPU_SRR0, offsetof(struct kvm_vcpu, arch.shregs.srr0));
        DEFINE(VCPU_SRR1, offsetof(struct kvm_vcpu, arch.shregs.srr1));
        DEFINE(KVM_LPID, offsetof(struct kvm, arch.lpid));
  
        /* book3s */
- #ifdef CONFIG_KVM_BOOK3S_64_HV
+ #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
        DEFINE(KVM_SDR1, offsetof(struct kvm, arch.sdr1));
        DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid));
        DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr));
        DEFINE(VCPU_PRODDED, offsetof(struct kvm_vcpu, arch.prodded));
        DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr));
        DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc));
+       DEFINE(VCPU_SIAR, offsetof(struct kvm_vcpu, arch.siar));
+       DEFINE(VCPU_SDAR, offsetof(struct kvm_vcpu, arch.sdar));
        DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb));
        DEFINE(VCPU_SLB_MAX, offsetof(struct kvm_vcpu, arch.slb_max));
        DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr));
        DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap));
        DEFINE(VCPU_PTID, offsetof(struct kvm_vcpu, arch.ptid));
        DEFINE(VCPU_CFAR, offsetof(struct kvm_vcpu, arch.cfar));
+       DEFINE(VCPU_PPR, offsetof(struct kvm_vcpu, arch.ppr));
+       DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1));
        DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_count));
        DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count));
        DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest));
        DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads));
-       DEFINE(VCPU_SVCPU, offsetof(struct kvmppc_vcpu_book3s, shadow_vcpu) -
-                          offsetof(struct kvmppc_vcpu_book3s, vcpu));
+       DEFINE(VCORE_TB_OFFSET, offsetof(struct kvmppc_vcore, tb_offset));
+       DEFINE(VCORE_LPCR, offsetof(struct kvmppc_vcore, lpcr));
+       DEFINE(VCORE_PCR, offsetof(struct kvmppc_vcore, pcr));
        DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige));
        DEFINE(VCPU_SLB_V, offsetof(struct kvmppc_slb, origv));
        DEFINE(VCPU_SLB_SIZE, sizeof(struct kvmppc_slb));
  
  #ifdef CONFIG_PPC_BOOK3S_64
- #ifdef CONFIG_KVM_BOOK3S_PR
+ #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+       DEFINE(PACA_SVCPU, offsetof(struct paca_struct, shadow_vcpu));
  # define SVCPU_FIELD(x, f)    DEFINE(x, offsetof(struct paca_struct, shadow_vcpu.f))
  #else
  # define SVCPU_FIELD(x, f)
        HSTATE_FIELD(HSTATE_RESTORE_HID5, restore_hid5);
        HSTATE_FIELD(HSTATE_NAPPING, napping);
  
- #ifdef CONFIG_KVM_BOOK3S_64_HV
+ #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
        HSTATE_FIELD(HSTATE_HWTHREAD_REQ, hwthread_req);
        HSTATE_FIELD(HSTATE_HWTHREAD_STATE, hwthread_state);
        HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu);
        HSTATE_FIELD(HSTATE_DABR, dabr);
        HSTATE_FIELD(HSTATE_DECEXP, dec_expires);
        DEFINE(IPI_PRIORITY, IPI_PRIORITY);
- #endif /* CONFIG_KVM_BOOK3S_64_HV */
+ #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
  
  #ifdef CONFIG_PPC_BOOK3S_64
        HSTATE_FIELD(HSTATE_CFAR, cfar);
+       HSTATE_FIELD(HSTATE_PPR, ppr);
  #endif /* CONFIG_PPC_BOOK3S_64 */
  
  #else /* CONFIG_PPC_BOOK3S */
@@@ -816,7 -816,7 +816,7 @@@ static void parse_fpe(struct pt_regs *r
  
        flush_fp_to_thread(current);
  
 -      code = __parse_fpscr(current->thread.fpscr.val);
 +      code = __parse_fpscr(current->thread.fp_state.fpscr);
  
        _exception(SIGFPE, regs, code, regs->nip);
  }
@@@ -1018,13 -1018,6 +1018,13 @@@ static int emulate_instruction(struct p
                return emulate_isel(regs, instword);
        }
  
 +      /* Emulate sync instruction variants */
 +      if ((instword & PPC_INST_SYNC_MASK) == PPC_INST_SYNC) {
 +              PPC_WARN_EMULATED(sync, regs);
 +              asm volatile("sync");
 +              return 0;
 +      }
 +
  #ifdef CONFIG_PPC64
        /* Emulate the mfspr rD, DSCR. */
        if ((((instword & PPC_INST_MFSPR_DSCR_USER_MASK) ==
@@@ -1076,7 -1069,7 +1076,7 @@@ static int emulate_math(struct pt_regs 
                return 0;
        case 1: {
                        int code = 0;
 -                      code = __parse_fpscr(current->thread.fpscr.val);
 +                      code = __parse_fpscr(current->thread.fp_state.fpscr);
                        _exception(SIGFPE, regs, code, regs->nip);
                        return 0;
                }
@@@ -1378,6 -1371,8 +1378,6 @@@ void facility_unavailable_exception(str
  
  #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
  
 -extern void do_load_up_fpu(struct pt_regs *regs);
 -
  void fp_unavailable_tm(struct pt_regs *regs)
  {
        /* Note:  This does not handle any kind of FP laziness. */
  }
  
  #ifdef CONFIG_ALTIVEC
 -extern void do_load_up_altivec(struct pt_regs *regs);
 -
  void altivec_unavailable_tm(struct pt_regs *regs)
  {
        /* See the comments in fp_unavailable_tm().  This function operates
@@@ -1468,8 -1465,7 +1468,8 @@@ void SoftwareEmulation(struct pt_regs *
  
        if (!user_mode(regs)) {
                debugger(regs);
 -              die("Kernel Mode Software FPU Emulation", regs, SIGFPE);
 +              die("Kernel Mode Unimplemented Instruction or SW FPU Emulation",
 +                      regs, SIGFPE);
        }
  
        if (!emulate_math(regs))
@@@ -1529,7 -1525,7 +1529,7 @@@ static void handle_debug(struct pt_reg
         * back on or not.
         */
        if (DBCR_ACTIVE_EVENTS(current->thread.debug.dbcr0,
-           current->thread.debug.dbcr1))
+                              current->thread.debug.dbcr1))
                regs->msr |= MSR_DE;
        else
                /* Make sure the IDM flag is off */
@@@ -1639,7 -1635,7 +1639,7 @@@ void altivec_assist_exception(struct pt
                /* XXX quick hack for now: set the non-Java bit in the VSCR */
                printk_ratelimited(KERN_ERR "Unrecognized altivec instruction "
                                   "in %s at %lx\n", current->comm, regs->nip);
 -              current->thread.vscr.u[3] |= 0x10000;
 +              current->thread.vr_state.vscr.u[3] |= 0x10000;
        }
  }
  #endif /* CONFIG_ALTIVEC */
@@@ -1820,7 -1816,6 +1820,7 @@@ struct ppc_emulated ppc_emulated = 
        WARN_EMULATED_SETUP(popcntb),
        WARN_EMULATED_SETUP(spe),
        WARN_EMULATED_SETUP(string),
 +      WARN_EMULATED_SETUP(sync),
        WARN_EMULATED_SETUP(unaligned),
  #ifdef CONFIG_MATH_EMULATION
        WARN_EMULATED_SETUP(math),
diff --combined arch/powerpc/kvm/Kconfig
@@@ -6,7 -6,6 +6,7 @@@ source "virt/kvm/Kconfig
  
  menuconfig VIRTUALIZATION
        bool "Virtualization"
 +      depends on !CPU_LITTLE_ENDIAN
        ---help---
          Say Y here to get to see options for using your Linux host to run
          other operating systems inside virtual machines (guests).
@@@ -35,17 -34,20 +35,20 @@@ config KVM_BOOK3S_64_HANDLE
        bool
        select KVM_BOOK3S_HANDLER
  
- config KVM_BOOK3S_PR
+ config KVM_BOOK3S_PR_POSSIBLE
        bool
        select KVM_MMIO
        select MMU_NOTIFIER
  
+ config KVM_BOOK3S_HV_POSSIBLE
+       bool
  config KVM_BOOK3S_32
        tristate "KVM support for PowerPC book3s_32 processors"
        depends on PPC_BOOK3S_32 && !SMP && !PTE_64BIT
        select KVM
        select KVM_BOOK3S_32_HANDLER
-       select KVM_BOOK3S_PR
+       select KVM_BOOK3S_PR_POSSIBLE
        ---help---
          Support running unmodified book3s_32 guest kernels
          in virtual machines on book3s_32 host processors.
@@@ -60,6 -62,7 +63,7 @@@ config KVM_BOOK3S_6
        depends on PPC_BOOK3S_64
        select KVM_BOOK3S_64_HANDLER
        select KVM
+       select KVM_BOOK3S_PR_POSSIBLE if !KVM_BOOK3S_HV_POSSIBLE
        ---help---
          Support running unmodified book3s_64 and book3s_32 guest kernels
          in virtual machines on book3s_64 host processors.
@@@ -70,8 -73,9 +74,9 @@@
          If unsure, say N.
  
  config KVM_BOOK3S_64_HV
-       bool "KVM support for POWER7 and PPC970 using hypervisor mode in host"
+       tristate "KVM support for POWER7 and PPC970 using hypervisor mode in host"
        depends on KVM_BOOK3S_64
+       select KVM_BOOK3S_HV_POSSIBLE
        select MMU_NOTIFIER
        select CMA
        ---help---
          If unsure, say N.
  
  config KVM_BOOK3S_64_PR
-       def_bool y
-       depends on KVM_BOOK3S_64 && !KVM_BOOK3S_64_HV
-       select KVM_BOOK3S_PR
+       tristate "KVM support without using hypervisor mode in host"
+       depends on KVM_BOOK3S_64
+       select KVM_BOOK3S_PR_POSSIBLE
+       ---help---
+         Support running guest kernels in virtual machines on processors
+         without using hypervisor mode in the host, by running the
+         guest in user mode (problem state) and emulating all
+         privileged instructions and registers.
+         This is not as fast as using hypervisor mode, but works on
+         machines where hypervisor mode is not available or not usable,
+         and can emulate processors that are different from the host
+         processor, including emulating 32-bit processors on a 64-bit
+         host.
  
  config KVM_BOOKE_HV
        bool
  #include <linux/sched.h>
  #include <linux/vmalloc.h>
  #include <linux/highmem.h>
+ #include <linux/module.h>
  
- #include "trace.h"
+ #include "book3s.h"
+ #define CREATE_TRACE_POINTS
+ #include "trace_pr.h"
  
  /* #define EXIT_DEBUG */
  /* #define DEBUG_EXT */
@@@ -56,29 -60,25 +60,25 @@@ static int kvmppc_handle_ext(struct kvm
  #define HW_PAGE_SIZE PAGE_SIZE
  #endif
  
void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu)
  {
  #ifdef CONFIG_PPC_BOOK3S_64
        struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
        memcpy(svcpu->slb, to_book3s(vcpu)->slb_shadow, sizeof(svcpu->slb));
-       memcpy(&get_paca()->shadow_vcpu, to_book3s(vcpu)->shadow_vcpu,
-              sizeof(get_paca()->shadow_vcpu));
        svcpu->slb_max = to_book3s(vcpu)->slb_shadow_max;
        svcpu_put(svcpu);
  #endif
        vcpu->cpu = smp_processor_id();
  #ifdef CONFIG_PPC_BOOK3S_32
-       current->thread.kvm_shadow_vcpu = to_book3s(vcpu)->shadow_vcpu;
+       current->thread.kvm_shadow_vcpu = vcpu->arch.shadow_vcpu;
  #endif
  }
  
void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu)
  {
  #ifdef CONFIG_PPC_BOOK3S_64
        struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
        memcpy(to_book3s(vcpu)->slb_shadow, svcpu->slb, sizeof(svcpu->slb));
-       memcpy(to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu,
-              sizeof(get_paca()->shadow_vcpu));
        to_book3s(vcpu)->slb_shadow_max = svcpu->slb_max;
        svcpu_put(svcpu);
  #endif
        vcpu->cpu = -1;
  }
  
- int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
+ /* Copy data needed by real-mode code from vcpu to shadow vcpu */
+ void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu,
+                         struct kvm_vcpu *vcpu)
+ {
+       svcpu->gpr[0] = vcpu->arch.gpr[0];
+       svcpu->gpr[1] = vcpu->arch.gpr[1];
+       svcpu->gpr[2] = vcpu->arch.gpr[2];
+       svcpu->gpr[3] = vcpu->arch.gpr[3];
+       svcpu->gpr[4] = vcpu->arch.gpr[4];
+       svcpu->gpr[5] = vcpu->arch.gpr[5];
+       svcpu->gpr[6] = vcpu->arch.gpr[6];
+       svcpu->gpr[7] = vcpu->arch.gpr[7];
+       svcpu->gpr[8] = vcpu->arch.gpr[8];
+       svcpu->gpr[9] = vcpu->arch.gpr[9];
+       svcpu->gpr[10] = vcpu->arch.gpr[10];
+       svcpu->gpr[11] = vcpu->arch.gpr[11];
+       svcpu->gpr[12] = vcpu->arch.gpr[12];
+       svcpu->gpr[13] = vcpu->arch.gpr[13];
+       svcpu->cr  = vcpu->arch.cr;
+       svcpu->xer = vcpu->arch.xer;
+       svcpu->ctr = vcpu->arch.ctr;
+       svcpu->lr  = vcpu->arch.lr;
+       svcpu->pc  = vcpu->arch.pc;
+ }
+ /* Copy data touched by real-mode code from shadow vcpu back to vcpu */
+ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
+                           struct kvmppc_book3s_shadow_vcpu *svcpu)
+ {
+       vcpu->arch.gpr[0] = svcpu->gpr[0];
+       vcpu->arch.gpr[1] = svcpu->gpr[1];
+       vcpu->arch.gpr[2] = svcpu->gpr[2];
+       vcpu->arch.gpr[3] = svcpu->gpr[3];
+       vcpu->arch.gpr[4] = svcpu->gpr[4];
+       vcpu->arch.gpr[5] = svcpu->gpr[5];
+       vcpu->arch.gpr[6] = svcpu->gpr[6];
+       vcpu->arch.gpr[7] = svcpu->gpr[7];
+       vcpu->arch.gpr[8] = svcpu->gpr[8];
+       vcpu->arch.gpr[9] = svcpu->gpr[9];
+       vcpu->arch.gpr[10] = svcpu->gpr[10];
+       vcpu->arch.gpr[11] = svcpu->gpr[11];
+       vcpu->arch.gpr[12] = svcpu->gpr[12];
+       vcpu->arch.gpr[13] = svcpu->gpr[13];
+       vcpu->arch.cr  = svcpu->cr;
+       vcpu->arch.xer = svcpu->xer;
+       vcpu->arch.ctr = svcpu->ctr;
+       vcpu->arch.lr  = svcpu->lr;
+       vcpu->arch.pc  = svcpu->pc;
+       vcpu->arch.shadow_srr1 = svcpu->shadow_srr1;
+       vcpu->arch.fault_dar   = svcpu->fault_dar;
+       vcpu->arch.fault_dsisr = svcpu->fault_dsisr;
+       vcpu->arch.last_inst   = svcpu->last_inst;
+ }
+ static int kvmppc_core_check_requests_pr(struct kvm_vcpu *vcpu)
  {
        int r = 1; /* Indicate we want to get back into the guest */
  
  }
  
  /************* MMU Notifiers *************/
+ static void do_kvm_unmap_hva(struct kvm *kvm, unsigned long start,
+                            unsigned long end)
+ {
+       long i;
+       struct kvm_vcpu *vcpu;
+       struct kvm_memslots *slots;
+       struct kvm_memory_slot *memslot;
+       slots = kvm_memslots(kvm);
+       kvm_for_each_memslot(memslot, slots) {
+               unsigned long hva_start, hva_end;
+               gfn_t gfn, gfn_end;
+               hva_start = max(start, memslot->userspace_addr);
+               hva_end = min(end, memslot->userspace_addr +
+                                       (memslot->npages << PAGE_SHIFT));
+               if (hva_start >= hva_end)
+                       continue;
+               /*
+                * {gfn(page) | page intersects with [hva_start, hva_end)} =
+                * {gfn, gfn+1, ..., gfn_end-1}.
+                */
+               gfn = hva_to_gfn_memslot(hva_start, memslot);
+               gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
+               kvm_for_each_vcpu(i, vcpu, kvm)
+                       kvmppc_mmu_pte_pflush(vcpu, gfn << PAGE_SHIFT,
+                                             gfn_end << PAGE_SHIFT);
+       }
+ }
  
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
static int kvm_unmap_hva_pr(struct kvm *kvm, unsigned long hva)
  {
        trace_kvm_unmap_hva(hva);
  
-       /*
-        * Flush all shadow tlb entries everywhere. This is slow, but
-        * we are 100% sure that we catch the to be unmapped page
-        */
-       kvm_flush_remote_tlbs(kvm);
+       do_kvm_unmap_hva(kvm, hva, hva + PAGE_SIZE);
  
        return 0;
  }
  
- int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
+ static int kvm_unmap_hva_range_pr(struct kvm *kvm, unsigned long start,
+                                 unsigned long end)
  {
-       /* kvm_unmap_hva flushes everything anyways */
-       kvm_unmap_hva(kvm, start);
+       do_kvm_unmap_hva(kvm, start, end);
  
        return 0;
  }
  
int kvm_age_hva(struct kvm *kvm, unsigned long hva)
static int kvm_age_hva_pr(struct kvm *kvm, unsigned long hva)
  {
        /* XXX could be more clever ;) */
        return 0;
  }
  
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
static int kvm_test_age_hva_pr(struct kvm *kvm, unsigned long hva)
  {
        /* XXX could be more clever ;) */
        return 0;
  }
  
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
static void kvm_set_spte_hva_pr(struct kvm *kvm, unsigned long hva, pte_t pte)
  {
        /* The page will get remapped properly on its next fault */
-       kvm_unmap_hva(kvm, hva);
+       do_kvm_unmap_hva(kvm, hva, hva + PAGE_SIZE);
  }
  
  /*****************************************/
@@@ -159,7 -238,7 +238,7 @@@ static void kvmppc_recalc_shadow_msr(st
        vcpu->arch.shadow_msr = smsr;
  }
  
void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr)
  {
        ulong old_msr = vcpu->arch.shared->msr;
  
                kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
  }
  
- void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
+ void kvmppc_set_pvr_pr(struct kvm_vcpu *vcpu, u32 pvr)
  {
        u32 host_pvr;
  
        if (!strcmp(cur_cpu_spec->platform, "ppc-cell-be"))
                to_book3s(vcpu)->msr_mask &= ~(MSR_FE0 | MSR_FE1);
  
+       /*
+        * If they're asking for POWER6 or later, set the flag
+        * indicating that we can do multiple large page sizes
+        * and 1TB segments.
+        * Also set the flag that indicates that tlbie has the large
+        * page bit in the RB operand instead of the instruction.
+        */
+       switch (PVR_VER(pvr)) {
+       case PVR_POWER6:
+       case PVR_POWER7:
+       case PVR_POWER7p:
+       case PVR_POWER8:
+               vcpu->arch.hflags |= BOOK3S_HFLAG_MULTI_PGSIZE |
+                       BOOK3S_HFLAG_NEW_TLBIE;
+               break;
+       }
  #ifdef CONFIG_PPC_BOOK3S_32
        /* 32 bit Book3S always has 32 byte dcbz */
        vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
@@@ -334,6 -430,7 +430,7 @@@ int kvmppc_handle_pagefault(struct kvm_
                            ulong eaddr, int vec)
  {
        bool data = (vec == BOOK3S_INTERRUPT_DATA_STORAGE);
+       bool iswrite = false;
        int r = RESUME_GUEST;
        int relocated;
        int page_found = 0;
        u64 vsid;
  
        relocated = data ? dr : ir;
+       if (data && (vcpu->arch.fault_dsisr & DSISR_ISSTORE))
+               iswrite = true;
  
        /* Resolve real address if translation turned on */
        if (relocated) {
-               page_found = vcpu->arch.mmu.xlate(vcpu, eaddr, &pte, data);
+               page_found = vcpu->arch.mmu.xlate(vcpu, eaddr, &pte, data, iswrite);
        } else {
                pte.may_execute = true;
                pte.may_read = true;
                pte.raddr = eaddr & KVM_PAM;
                pte.eaddr = eaddr;
                pte.vpage = eaddr >> 12;
+               pte.page_size = MMU_PAGE_64K;
        }
  
        switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
  
        if (page_found == -ENOENT) {
                /* Page not found in guest PTE entries */
-               struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
                vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
-               vcpu->arch.shared->dsisr = svcpu->fault_dsisr;
+               vcpu->arch.shared->dsisr = vcpu->arch.fault_dsisr;
                vcpu->arch.shared->msr |=
-                       (svcpu->shadow_srr1 & 0x00000000f8000000ULL);
-               svcpu_put(svcpu);
+                       vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL;
                kvmppc_book3s_queue_irqprio(vcpu, vec);
        } else if (page_found == -EPERM) {
                /* Storage protection */
-               struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
                vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
-               vcpu->arch.shared->dsisr = svcpu->fault_dsisr & ~DSISR_NOHPTE;
+               vcpu->arch.shared->dsisr = vcpu->arch.fault_dsisr & ~DSISR_NOHPTE;
                vcpu->arch.shared->dsisr |= DSISR_PROTFAULT;
                vcpu->arch.shared->msr |=
-                       svcpu->shadow_srr1 & 0x00000000f8000000ULL;
-               svcpu_put(svcpu);
+                       vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL;
                kvmppc_book3s_queue_irqprio(vcpu, vec);
        } else if (page_found == -EINVAL) {
                /* Page not found in guest SLB */
                kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80);
        } else if (!is_mmio &&
                   kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) {
+               if (data && !(vcpu->arch.fault_dsisr & DSISR_NOHPTE)) {
+                       /*
+                        * There is already a host HPTE there, presumably
+                        * a read-only one for a page the guest thinks
+                        * is writable, so get rid of it first.
+                        */
+                       kvmppc_mmu_unmap_page(vcpu, &pte);
+               }
                /* The guest's PTE is not mapped yet. Map on the host */
-               kvmppc_mmu_map_page(vcpu, &pte);
+               kvmppc_mmu_map_page(vcpu, &pte, iswrite);
                if (data)
                        vcpu->stat.sp_storage++;
                else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
-                       (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32)))
+                        (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32)))
                        kvmppc_patch_dcbz(vcpu, &pte);
        } else {
                /* MMIO */
@@@ -444,7 -548,7 +548,7 @@@ void kvmppc_giveup_ext(struct kvm_vcpu 
  #ifdef CONFIG_VSX
        u64 *vcpu_vsx = vcpu->arch.vsr;
  #endif
 -      u64 *thread_fpr = (u64*)t->fpr;
 +      u64 *thread_fpr = &t->fp_state.fpr[0][0];
        int i;
  
        /*
                /*
                 * Note that on CPUs with VSX, giveup_fpu stores
                 * both the traditional FP registers and the added VSX
 -               * registers into thread.fpr[].
 +               * registers into thread.fp_state.fpr[].
                 */
                if (current->thread.regs->msr & MSR_FP)
                        giveup_fpu(current);
                for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
                        vcpu_fpr[i] = thread_fpr[get_fpr_index(i)];
  
 -              vcpu->arch.fpscr = t->fpscr.val;
 +              vcpu->arch.fpscr = t->fp_state.fpscr;
  
  #ifdef CONFIG_VSX
                if (cpu_has_feature(CPU_FTR_VSX))
        if (msr & MSR_VEC) {
                if (current->thread.regs->msr & MSR_VEC)
                        giveup_altivec(current);
 -              memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr));
 -              vcpu->arch.vscr = t->vscr;
 +              memcpy(vcpu->arch.vr, t->vr_state.vr, sizeof(vcpu->arch.vr));
 +              vcpu->arch.vscr = t->vr_state.vscr;
        }
  #endif
  
@@@ -539,7 -643,7 +643,7 @@@ static int kvmppc_handle_ext(struct kvm
  #ifdef CONFIG_VSX
        u64 *vcpu_vsx = vcpu->arch.vsr;
  #endif
 -      u64 *thread_fpr = (u64*)t->fpr;
 +      u64 *thread_fpr = &t->fp_state.fpr[0][0];
        int i;
  
        /* When we have paired singles, we emulate in software */
                for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr) / 2; i++)
                        thread_fpr[get_fpr_index(i) + 1] = vcpu_vsx[i];
  #endif
 -              t->fpscr.val = vcpu->arch.fpscr;
 +              t->fp_state.fpscr = vcpu->arch.fpscr;
                t->fpexc_mode = 0;
                kvmppc_load_up_fpu();
        }
  
        if (msr & MSR_VEC) {
  #ifdef CONFIG_ALTIVEC
 -              memcpy(t->vr, vcpu->arch.vr, sizeof(vcpu->arch.vr));
 -              t->vscr = vcpu->arch.vscr;
 +              memcpy(t->vr_state.vr, vcpu->arch.vr, sizeof(vcpu->arch.vr));
 +              t->vr_state.vscr = vcpu->arch.vscr;
                t->vrsave = -1;
                kvmppc_load_up_altivec();
  #endif
@@@ -619,13 -723,15 +723,15 @@@ static void kvmppc_handle_lost_ext(stru
  
        if (lost_ext & MSR_FP)
                kvmppc_load_up_fpu();
+ #ifdef CONFIG_ALTIVEC
        if (lost_ext & MSR_VEC)
                kvmppc_load_up_altivec();
+ #endif
        current->thread.regs->msr |= lost_ext;
  }
  
- int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
-                        unsigned int exit_nr)
+ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
+                         unsigned int exit_nr)
  {
        int r = RESUME_HOST;
        int s;
        switch (exit_nr) {
        case BOOK3S_INTERRUPT_INST_STORAGE:
        {
-               struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-               ulong shadow_srr1 = svcpu->shadow_srr1;
+               ulong shadow_srr1 = vcpu->arch.shadow_srr1;
                vcpu->stat.pf_instruc++;
  
  #ifdef CONFIG_PPC_BOOK3S_32
                /* We set segments as unused segments when invalidating them. So
                 * treat the respective fault as segment fault. */
-               if (svcpu->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT] == SR_INVALID) {
-                       kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
-                       r = RESUME_GUEST;
+               {
+                       struct kvmppc_book3s_shadow_vcpu *svcpu;
+                       u32 sr;
+                       svcpu = svcpu_get(vcpu);
+                       sr = svcpu->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT];
                        svcpu_put(svcpu);
-                       break;
+                       if (sr == SR_INVALID) {
+                               kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
+                               r = RESUME_GUEST;
+                               break;
+                       }
                }
  #endif
-               svcpu_put(svcpu);
  
                /* only care about PTEG not found errors, but leave NX alone */
                if (shadow_srr1 & 0x40000000) {
+                       int idx = srcu_read_lock(&vcpu->kvm->srcu);
                        r = kvmppc_handle_pagefault(run, vcpu, kvmppc_get_pc(vcpu), exit_nr);
+                       srcu_read_unlock(&vcpu->kvm->srcu, idx);
                        vcpu->stat.sp_instruc++;
                } else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
                          (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) {
        case BOOK3S_INTERRUPT_DATA_STORAGE:
        {
                ulong dar = kvmppc_get_fault_dar(vcpu);
-               struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-               u32 fault_dsisr = svcpu->fault_dsisr;
+               u32 fault_dsisr = vcpu->arch.fault_dsisr;
                vcpu->stat.pf_storage++;
  
  #ifdef CONFIG_PPC_BOOK3S_32
                /* We set segments as unused segments when invalidating them. So
                 * treat the respective fault as segment fault. */
-               if ((svcpu->sr[dar >> SID_SHIFT]) == SR_INVALID) {
-                       kvmppc_mmu_map_segment(vcpu, dar);
-                       r = RESUME_GUEST;
+               {
+                       struct kvmppc_book3s_shadow_vcpu *svcpu;
+                       u32 sr;
+                       svcpu = svcpu_get(vcpu);
+                       sr = svcpu->sr[dar >> SID_SHIFT];
                        svcpu_put(svcpu);
-                       break;
+                       if (sr == SR_INVALID) {
+                               kvmppc_mmu_map_segment(vcpu, dar);
+                               r = RESUME_GUEST;
+                               break;
+                       }
                }
  #endif
-               svcpu_put(svcpu);
  
-               /* The only case we need to handle is missing shadow PTEs */
-               if (fault_dsisr & DSISR_NOHPTE) {
+               /*
+                * We need to handle missing shadow PTEs, and
+                * protection faults due to us mapping a page read-only
+                * when the guest thinks it is writable.
+                */
+               if (fault_dsisr & (DSISR_NOHPTE | DSISR_PROTFAULT)) {
+                       int idx = srcu_read_lock(&vcpu->kvm->srcu);
                        r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr);
+                       srcu_read_unlock(&vcpu->kvm->srcu, idx);
                } else {
                        vcpu->arch.shared->dar = dar;
                        vcpu->arch.shared->dsisr = fault_dsisr;
        case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
        {
                enum emulation_result er;
-               struct kvmppc_book3s_shadow_vcpu *svcpu;
                ulong flags;
  
  program_interrupt:
-               svcpu = svcpu_get(vcpu);
-               flags = svcpu->shadow_srr1 & 0x1f0000ull;
-               svcpu_put(svcpu);
+               flags = vcpu->arch.shadow_srr1 & 0x1f0000ull;
  
                if (vcpu->arch.shared->msr & MSR_PR) {
  #ifdef EXIT_DEBUG
                        ulong cmd = kvmppc_get_gpr(vcpu, 3);
                        int i;
  
- #ifdef CONFIG_KVM_BOOK3S_64_PR
+ #ifdef CONFIG_PPC_BOOK3S_64
                        if (kvmppc_h_pr(vcpu, cmd) == EMULATE_DONE) {
                                r = RESUME_GUEST;
                                break;
                break;
        default:
        {
-               struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-               ulong shadow_srr1 = svcpu->shadow_srr1;
-               svcpu_put(svcpu);
+               ulong shadow_srr1 = vcpu->arch.shadow_srr1;
                /* Ugh - bork here! What did we get? */
                printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n",
                        exit_nr, kvmppc_get_pc(vcpu), shadow_srr1);
        return r;
  }
  
int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
-                                   struct kvm_sregs *sregs)
static int kvm_arch_vcpu_ioctl_get_sregs_pr(struct kvm_vcpu *vcpu,
+                                           struct kvm_sregs *sregs)
  {
        struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
        int i;
        return 0;
  }
  
int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
-                                   struct kvm_sregs *sregs)
static int kvm_arch_vcpu_ioctl_set_sregs_pr(struct kvm_vcpu *vcpu,
+                                           struct kvm_sregs *sregs)
  {
        struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
        int i;
  
-       kvmppc_set_pvr(vcpu, sregs->pvr);
+       kvmppc_set_pvr_pr(vcpu, sregs->pvr);
  
        vcpu3s->sdr1 = sregs->u.s.sdr1;
        if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) {
        return 0;
  }
  
- int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
+ static int kvmppc_get_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
+                                union kvmppc_one_reg *val)
  {
        int r = 0;
  
        return r;
  }
  
- int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
+ static int kvmppc_set_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
+                                union kvmppc_one_reg *val)
  {
        int r = 0;
  
        return r;
  }
  
- int kvmppc_core_check_processor_compat(void)
- {
-       return 0;
- }
- struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+ static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm,
+                                                  unsigned int id)
  {
        struct kvmppc_vcpu_book3s *vcpu_book3s;
        struct kvm_vcpu *vcpu;
        int err = -ENOMEM;
        unsigned long p;
  
-       vcpu_book3s = vzalloc(sizeof(struct kvmppc_vcpu_book3s));
-       if (!vcpu_book3s)
+       vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
+       if (!vcpu)
                goto out;
  
-       vcpu_book3s->shadow_vcpu =
-               kzalloc(sizeof(*vcpu_book3s->shadow_vcpu), GFP_KERNEL);
-       if (!vcpu_book3s->shadow_vcpu)
+       vcpu_book3s = vzalloc(sizeof(struct kvmppc_vcpu_book3s));
+       if (!vcpu_book3s)
                goto free_vcpu;
+       vcpu->arch.book3s = vcpu_book3s;
+ #ifdef CONFIG_KVM_BOOK3S_32
+       vcpu->arch.shadow_vcpu =
+               kzalloc(sizeof(*vcpu->arch.shadow_vcpu), GFP_KERNEL);
+       if (!vcpu->arch.shadow_vcpu)
+               goto free_vcpu3s;
+ #endif
  
-       vcpu = &vcpu_book3s->vcpu;
        err = kvm_vcpu_init(vcpu, kvm, id);
        if (err)
                goto free_shadow_vcpu;
        vcpu->arch.shared = (void *)(p + PAGE_SIZE - 4096);
  
  #ifdef CONFIG_PPC_BOOK3S_64
-       /* default to book3s_64 (970fx) */
+       /*
+        * Default to the same as the host if we're on sufficiently
+        * recent machine that we have 1TB segments;
+        * otherwise default to PPC970FX.
+        */
        vcpu->arch.pvr = 0x3C0301;
+       if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
+               vcpu->arch.pvr = mfspr(SPRN_PVR);
  #else
        /* default to book3s_32 (750) */
        vcpu->arch.pvr = 0x84202;
  #endif
-       kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
+       kvmppc_set_pvr_pr(vcpu, vcpu->arch.pvr);
        vcpu->arch.slb_nr = 64;
  
        vcpu->arch.shadow_msr = MSR_USER64;
  uninit_vcpu:
        kvm_vcpu_uninit(vcpu);
  free_shadow_vcpu:
-       kfree(vcpu_book3s->shadow_vcpu);
- free_vcpu:
+ #ifdef CONFIG_KVM_BOOK3S_32
+       kfree(vcpu->arch.shadow_vcpu);
+ free_vcpu3s:
+ #endif
        vfree(vcpu_book3s);
+ free_vcpu:
+       kmem_cache_free(kvm_vcpu_cache, vcpu);
  out:
        return ERR_PTR(err);
  }
  
void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
static void kvmppc_core_vcpu_free_pr(struct kvm_vcpu *vcpu)
  {
        struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
  
        free_page((unsigned long)vcpu->arch.shared & PAGE_MASK);
        kvm_vcpu_uninit(vcpu);
-       kfree(vcpu_book3s->shadow_vcpu);
+ #ifdef CONFIG_KVM_BOOK3S_32
+       kfree(vcpu->arch.shadow_vcpu);
+ #endif
        vfree(vcpu_book3s);
+       kmem_cache_free(kvm_vcpu_cache, vcpu);
  }
  
int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
  {
        int ret;
 -      double fpr[32][TS_FPRWIDTH];
 -      unsigned int fpscr;
 +      struct thread_fp_state fp;
        int fpexc_mode;
  #ifdef CONFIG_ALTIVEC
 -      vector128 vr[32];
 -      vector128 vscr;
 +      struct thread_vr_state vr;
        unsigned long uninitialized_var(vrsave);
        int used_vr;
  #endif
        /* Save FPU state in stack */
        if (current->thread.regs->msr & MSR_FP)
                giveup_fpu(current);
 -      memcpy(fpr, current->thread.fpr, sizeof(current->thread.fpr));
 -      fpscr = current->thread.fpscr.val;
 +      fp = current->thread.fp_state;
        fpexc_mode = current->thread.fpexc_mode;
  
  #ifdef CONFIG_ALTIVEC
        if (used_vr) {
                if (current->thread.regs->msr & MSR_VEC)
                        giveup_altivec(current);
 -              memcpy(vr, current->thread.vr, sizeof(current->thread.vr));
 -              vscr = current->thread.vscr;
 +              vr = current->thread.vr_state;
                vrsave = current->thread.vrsave;
        }
  #endif
        current->thread.regs->msr = ext_msr;
  
        /* Restore FPU/VSX state from stack */
 -      memcpy(current->thread.fpr, fpr, sizeof(current->thread.fpr));
 -      current->thread.fpscr.val = fpscr;
 +      current->thread.fp_state = fp;
        current->thread.fpexc_mode = fpexc_mode;
  
  #ifdef CONFIG_ALTIVEC
        /* Restore Altivec state from stack */
        if (used_vr && current->thread.used_vr) {
 -              memcpy(current->thread.vr, vr, sizeof(current->thread.vr));
 -              current->thread.vscr = vscr;
 +              current->thread.vr_state = vr;
                current->thread.vrsave = vrsave;
        }
        current->thread.used_vr = used_vr;
@@@ -1216,8 -1358,8 +1352,8 @@@ out
  /*
   * Get (and clear) the dirty memory log for a memory slot.
   */
int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
-                                     struct kvm_dirty_log *log)
static int kvm_vm_ioctl_get_dirty_log_pr(struct kvm *kvm,
+                                        struct kvm_dirty_log *log)
  {
        struct kvm_memory_slot *memslot;
        struct kvm_vcpu *vcpu;
        return r;
  }
  
- #ifdef CONFIG_PPC64
int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info)
+ static void kvmppc_core_flush_memslot_pr(struct kvm *kvm,
                                       struct kvm_memory_slot *memslot)
  {
-       info->flags = KVM_PPC_1T_SEGMENTS;
-       /* SLB is always 64 entries */
-       info->slb_size = 64;
-       /* Standard 4k base page size segment */
-       info->sps[0].page_shift = 12;
-       info->sps[0].slb_enc = 0;
-       info->sps[0].enc[0].page_shift = 12;
-       info->sps[0].enc[0].pte_enc = 0;
-       /* Standard 16M large page size segment */
-       info->sps[1].page_shift = 24;
-       info->sps[1].slb_enc = SLB_VSID_L;
-       info->sps[1].enc[0].page_shift = 24;
-       info->sps[1].enc[0].pte_enc = 0;
+       return;
+ }
  
+ static int kvmppc_core_prepare_memory_region_pr(struct kvm *kvm,
+                                       struct kvm_memory_slot *memslot,
+                                       struct kvm_userspace_memory_region *mem)
+ {
        return 0;
  }
- #endif /* CONFIG_PPC64 */
  
- void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
-                             struct kvm_memory_slot *dont)
+ static void kvmppc_core_commit_memory_region_pr(struct kvm *kvm,
+                               struct kvm_userspace_memory_region *mem,
+                               const struct kvm_memory_slot *old)
  {
+       return;
  }
  
int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
-                              unsigned long npages)
static void kvmppc_core_free_memslot_pr(struct kvm_memory_slot *free,
+                                       struct kvm_memory_slot *dont)
  {
-       return 0;
+       return;
  }
  
- int kvmppc_core_prepare_memory_region(struct kvm *kvm,
-                                     struct kvm_memory_slot *memslot,
-                                     struct kvm_userspace_memory_region *mem)
+ static int kvmppc_core_create_memslot_pr(struct kvm_memory_slot *slot,
+                                        unsigned long npages)
  {
        return 0;
  }
  
- void kvmppc_core_commit_memory_region(struct kvm *kvm,
-                               struct kvm_userspace_memory_region *mem,
-                               const struct kvm_memory_slot *old)
+ #ifdef CONFIG_PPC64
+ static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm,
+                                        struct kvm_ppc_smmu_info *info)
  {
- }
+       long int i;
+       struct kvm_vcpu *vcpu;
+       info->flags = 0;
+       /* SLB is always 64 entries */
+       info->slb_size = 64;
+       /* Standard 4k base page size segment */
+       info->sps[0].page_shift = 12;
+       info->sps[0].slb_enc = 0;
+       info->sps[0].enc[0].page_shift = 12;
+       info->sps[0].enc[0].pte_enc = 0;
+       /*
+        * 64k large page size.
+        * We only want to put this in if the CPUs we're emulating
+        * support it, but unfortunately we don't have a vcpu easily
+        * to hand here to test.  Just pick the first vcpu, and if
+        * that doesn't exist yet, report the minimum capability,
+        * i.e., no 64k pages.
+        * 1T segment support goes along with 64k pages.
+        */
+       i = 1;
+       vcpu = kvm_get_vcpu(kvm, 0);
+       if (vcpu && (vcpu->arch.hflags & BOOK3S_HFLAG_MULTI_PGSIZE)) {
+               info->flags = KVM_PPC_1T_SEGMENTS;
+               info->sps[i].page_shift = 16;
+               info->sps[i].slb_enc = SLB_VSID_L | SLB_VSID_LP_01;
+               info->sps[i].enc[0].page_shift = 16;
+               info->sps[i].enc[0].pte_enc = 1;
+               ++i;
+       }
+       /* Standard 16M large page size segment */
+       info->sps[i].page_shift = 24;
+       info->sps[i].slb_enc = SLB_VSID_L;
+       info->sps[i].enc[0].page_shift = 24;
+       info->sps[i].enc[0].pte_enc = 0;
  
- void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
+       return 0;
+ }
+ #else
+ static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm,
+                                        struct kvm_ppc_smmu_info *info)
  {
+       /* We should not get called */
+       BUG();
  }
+ #endif /* CONFIG_PPC64 */
  
  static unsigned int kvm_global_user_count = 0;
  static DEFINE_SPINLOCK(kvm_global_user_count_lock);
  
int kvmppc_core_init_vm(struct kvm *kvm)
static int kvmppc_core_init_vm_pr(struct kvm *kvm)
  {
- #ifdef CONFIG_PPC64
-       INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
-       INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
- #endif
+       mutex_init(&kvm->arch.hpt_mutex);
  
        if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
                spin_lock(&kvm_global_user_count_lock);
        return 0;
  }
  
void kvmppc_core_destroy_vm(struct kvm *kvm)
static void kvmppc_core_destroy_vm_pr(struct kvm *kvm)
  {
  #ifdef CONFIG_PPC64
        WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
        }
  }
  
- static int kvmppc_book3s_init(void)
+ static int kvmppc_core_check_processor_compat_pr(void)
  {
-       int r;
+       /* we are always compatible */
+       return 0;
+ }
  
-       r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_book3s), 0,
-                    THIS_MODULE);
+ static long kvm_arch_vm_ioctl_pr(struct file *filp,
+                                unsigned int ioctl, unsigned long arg)
+ {
+       return -ENOTTY;
+ }
  
-       if (r)
+ static struct kvmppc_ops kvm_ops_pr = {
+       .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_pr,
+       .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_pr,
+       .get_one_reg = kvmppc_get_one_reg_pr,
+       .set_one_reg = kvmppc_set_one_reg_pr,
+       .vcpu_load   = kvmppc_core_vcpu_load_pr,
+       .vcpu_put    = kvmppc_core_vcpu_put_pr,
+       .set_msr     = kvmppc_set_msr_pr,
+       .vcpu_run    = kvmppc_vcpu_run_pr,
+       .vcpu_create = kvmppc_core_vcpu_create_pr,
+       .vcpu_free   = kvmppc_core_vcpu_free_pr,
+       .check_requests = kvmppc_core_check_requests_pr,
+       .get_dirty_log = kvm_vm_ioctl_get_dirty_log_pr,
+       .flush_memslot = kvmppc_core_flush_memslot_pr,
+       .prepare_memory_region = kvmppc_core_prepare_memory_region_pr,
+       .commit_memory_region = kvmppc_core_commit_memory_region_pr,
+       .unmap_hva = kvm_unmap_hva_pr,
+       .unmap_hva_range = kvm_unmap_hva_range_pr,
+       .age_hva  = kvm_age_hva_pr,
+       .test_age_hva = kvm_test_age_hva_pr,
+       .set_spte_hva = kvm_set_spte_hva_pr,
+       .mmu_destroy  = kvmppc_mmu_destroy_pr,
+       .free_memslot = kvmppc_core_free_memslot_pr,
+       .create_memslot = kvmppc_core_create_memslot_pr,
+       .init_vm = kvmppc_core_init_vm_pr,
+       .destroy_vm = kvmppc_core_destroy_vm_pr,
+       .get_smmu_info = kvm_vm_ioctl_get_smmu_info_pr,
+       .emulate_op = kvmppc_core_emulate_op_pr,
+       .emulate_mtspr = kvmppc_core_emulate_mtspr_pr,
+       .emulate_mfspr = kvmppc_core_emulate_mfspr_pr,
+       .fast_vcpu_kick = kvm_vcpu_kick,
+       .arch_vm_ioctl  = kvm_arch_vm_ioctl_pr,
+ };
+ int kvmppc_book3s_init_pr(void)
+ {
+       int r;
+       r = kvmppc_core_check_processor_compat_pr();
+       if (r < 0)
                return r;
  
-       r = kvmppc_mmu_hpte_sysinit();
+       kvm_ops_pr.owner = THIS_MODULE;
+       kvmppc_pr_ops = &kvm_ops_pr;
  
+       r = kvmppc_mmu_hpte_sysinit();
        return r;
  }
  
static void kvmppc_book3s_exit(void)
void kvmppc_book3s_exit_pr(void)
  {
+       kvmppc_pr_ops = NULL;
        kvmppc_mmu_hpte_sysexit();
-       kvm_exit();
  }
  
- module_init(kvmppc_book3s_init);
- module_exit(kvmppc_book3s_exit);
+ /*
+  * We only support separate modules for book3s 64
+  */
+ #ifdef CONFIG_PPC_BOOK3S_64
+ module_init(kvmppc_book3s_init_pr);
+ module_exit(kvmppc_book3s_exit_pr);
+ MODULE_LICENSE("GPL");
+ #endif
diff --combined arch/powerpc/kvm/booke.c
@@@ -40,7 -40,9 +40,9 @@@
  
  #include "timing.h"
  #include "booke.h"
- #include "trace.h"
+ #define CREATE_TRACE_POINTS
+ #include "trace_booke.h"
  
  unsigned long kvmppc_booke_handlers;
  
@@@ -133,6 -135,29 +135,29 @@@ static void kvmppc_vcpu_sync_fpu(struc
  #endif
  }
  
+ static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu)
+ {
+       /* Synchronize guest's desire to get debug interrupts into shadow MSR */
+ #ifndef CONFIG_KVM_BOOKE_HV
+       vcpu->arch.shadow_msr &= ~MSR_DE;
+       vcpu->arch.shadow_msr |= vcpu->arch.shared->msr & MSR_DE;
+ #endif
+       /* Force enable debug interrupts when user space wants to debug */
+       if (vcpu->guest_debug) {
+ #ifdef CONFIG_KVM_BOOKE_HV
+               /*
+                * Since there is no shadow MSR, sync MSR_DE into the guest
+                * visible MSR.
+                */
+               vcpu->arch.shared->msr |= MSR_DE;
+ #else
+               vcpu->arch.shadow_msr |= MSR_DE;
+               vcpu->arch.shared->msr &= ~MSR_DE;
+ #endif
+       }
+ }
  /*
   * Helper function for "full" MSR writes.  No need to call this if only
   * EE/CE/ME/DE/RI are changing.
@@@ -150,6 -175,7 +175,7 @@@ void kvmppc_set_msr(struct kvm_vcpu *vc
        kvmppc_mmu_msr_notify(vcpu, old_msr);
        kvmppc_vcpu_sync_spe(vcpu);
        kvmppc_vcpu_sync_fpu(vcpu);
+       kvmppc_vcpu_sync_debug(vcpu);
  }
  
  static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu,
@@@ -655,9 -681,11 +681,10 @@@ int kvmppc_core_check_requests(struct k
  int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
  {
        int ret, s;
+       struct thread_struct thread;
  #ifdef CONFIG_PPC_FPU
 -      unsigned int fpscr;
 +      struct thread_fp_state fp;
        int fpexc_mode;
 -      u64 fpr[32];
  #endif
  
        if (!vcpu->arch.sane) {
  #ifdef CONFIG_PPC_FPU
        /* Save userspace FPU state in stack */
        enable_kernel_fp();
 -      memcpy(fpr, current->thread.fpr, sizeof(current->thread.fpr));
 -      fpscr = current->thread.fpscr.val;
 +      fp = current->thread.fp_state;
        fpexc_mode = current->thread.fpexc_mode;
  
        /* Restore guest FPU state to thread */
 -      memcpy(current->thread.fpr, vcpu->arch.fpr, sizeof(vcpu->arch.fpr));
 -      current->thread.fpscr.val = vcpu->arch.fpscr;
 +      memcpy(current->thread.fp_state.fpr, vcpu->arch.fpr,
 +             sizeof(vcpu->arch.fpr));
 +      current->thread.fp_state.fpscr = vcpu->arch.fpscr;
  
        /*
         * Since we can't trap on MSR_FP in GS-mode, we consider the guest
        kvmppc_load_guest_fp(vcpu);
  #endif
  
+       /* Switch to guest debug context */
+       thread.debug = vcpu->arch.shadow_dbg_reg;
+       switch_booke_debug_regs(&thread);
+       thread.debug = current->thread.debug;
+       current->thread.debug = vcpu->arch.shadow_dbg_reg;
        kvmppc_fix_ee_before_entry();
  
        ret = __kvmppc_vcpu_run(kvm_run, vcpu);
        /* No need for kvm_guest_exit. It's done in handle_exit.
           We also get here with interrupts enabled. */
  
+       /* Switch back to user space debug context */
+       switch_booke_debug_regs(&thread);
+       current->thread.debug = thread.debug;
  #ifdef CONFIG_PPC_FPU
        kvmppc_save_guest_fp(vcpu);
  
        vcpu->fpu_active = 0;
  
        /* Save guest FPU state from thread */
 -      memcpy(vcpu->arch.fpr, current->thread.fpr, sizeof(vcpu->arch.fpr));
 -      vcpu->arch.fpscr = current->thread.fpscr.val;
 +      memcpy(vcpu->arch.fpr, current->thread.fp_state.fpr,
 +             sizeof(vcpu->arch.fpr));
 +      vcpu->arch.fpscr = current->thread.fp_state.fpscr;
  
        /* Restore userspace FPU state from stack */
 -      memcpy(current->thread.fpr, fpr, sizeof(current->thread.fpr));
 -      current->thread.fpscr.val = fpscr;
 +      current->thread.fp_state = fp;
        current->thread.fpexc_mode = fpexc_mode;
  #endif
  
@@@ -757,6 -795,30 +794,30 @@@ static int emulation_exit(struct kvm_ru
        }
  }
  
+ static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu *vcpu)
+ {
+       struct debug_reg *dbg_reg = &(vcpu->arch.shadow_dbg_reg);
+       u32 dbsr = vcpu->arch.dbsr;
+       run->debug.arch.status = 0;
+       run->debug.arch.address = vcpu->arch.pc;
+       if (dbsr & (DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4)) {
+               run->debug.arch.status |= KVMPPC_DEBUG_BREAKPOINT;
+       } else {
+               if (dbsr & (DBSR_DAC1W | DBSR_DAC2W))
+                       run->debug.arch.status |= KVMPPC_DEBUG_WATCH_WRITE;
+               else if (dbsr & (DBSR_DAC1R | DBSR_DAC2R))
+                       run->debug.arch.status |= KVMPPC_DEBUG_WATCH_READ;
+               if (dbsr & (DBSR_DAC1R | DBSR_DAC1W))
+                       run->debug.arch.address = dbg_reg->dac1;
+               else if (dbsr & (DBSR_DAC2R | DBSR_DAC2W))
+                       run->debug.arch.address = dbg_reg->dac2;
+       }
+       return RESUME_HOST;
+ }
  static void kvmppc_fill_pt_regs(struct pt_regs *regs)
  {
        ulong r1, ip, msr, lr;
@@@ -817,6 -879,11 +878,11 @@@ static void kvmppc_restart_interrupt(st
        case BOOKE_INTERRUPT_CRITICAL:
                unknown_exception(&regs);
                break;
+       case BOOKE_INTERRUPT_DEBUG:
+               /* Save DBSR before preemption is enabled */
+               vcpu->arch.dbsr = mfspr(SPRN_DBSR);
+               kvmppc_clear_dbsr();
+               break;
        }
  }
  
@@@ -1134,18 -1201,10 +1200,10 @@@ int kvmppc_handle_exit(struct kvm_run *
        }
  
        case BOOKE_INTERRUPT_DEBUG: {
-               u32 dbsr;
-               vcpu->arch.pc = mfspr(SPRN_CSRR0);
-               /* clear IAC events in DBSR register */
-               dbsr = mfspr(SPRN_DBSR);
-               dbsr &= DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4;
-               mtspr(SPRN_DBSR, dbsr);
-               run->exit_reason = KVM_EXIT_DEBUG;
+               r = kvmppc_handle_debug(run, vcpu);
+               if (r == RESUME_HOST)
+                       run->exit_reason = KVM_EXIT_DEBUG;
                kvmppc_account_exit(vcpu, DEBUG_EXITS);
-               r = RESUME_HOST;
                break;
        }
  
@@@ -1196,7 -1255,7 +1254,7 @@@ int kvm_arch_vcpu_setup(struct kvm_vcp
        kvmppc_set_msr(vcpu, 0);
  
  #ifndef CONFIG_KVM_BOOKE_HV
-       vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS;
+       vcpu->arch.shadow_msr = MSR_USER | MSR_IS | MSR_DS;
        vcpu->arch.shadow_pid = 1;
        vcpu->arch.shared->msr = 0;
  #endif
@@@ -1358,7 -1417,7 +1416,7 @@@ static int set_sregs_arch206(struct kvm
        return 0;
  }
  
void kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
int kvmppc_get_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
  {
        sregs->u.e.features |= KVM_SREGS_E_IVOR;
  
        sregs->u.e.ivor_low[13] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS];
        sregs->u.e.ivor_low[14] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS];
        sregs->u.e.ivor_low[15] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
+       return 0;
  }
  
  int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
@@@ -1412,8 -1472,7 +1471,7 @@@ int kvm_arch_vcpu_ioctl_get_sregs(struc
  
        get_sregs_base(vcpu, sregs);
        get_sregs_arch206(vcpu, sregs);
-       kvmppc_core_get_sregs(vcpu, sregs);
-       return 0;
+       return vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs);
  }
  
  int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
        if (ret < 0)
                return ret;
  
-       return kvmppc_core_set_sregs(vcpu, sregs);
+       return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs);
  }
  
  int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
        int r = 0;
        union kvmppc_one_reg val;
        int size;
-       long int i;
  
        size = one_reg_size(reg->id);
        if (size > sizeof(val))
  
        switch (reg->id) {
        case KVM_REG_PPC_IAC1:
+               val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac1);
+               break;
        case KVM_REG_PPC_IAC2:
+               val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac2);
+               break;
+ #if CONFIG_PPC_ADV_DEBUG_IACS > 2
        case KVM_REG_PPC_IAC3:
+               val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac3);
+               break;
        case KVM_REG_PPC_IAC4:
-               i = reg->id - KVM_REG_PPC_IAC1;
-               val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac[i]);
+               val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac4);
                break;
+ #endif
        case KVM_REG_PPC_DAC1:
+               val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac1);
+               break;
        case KVM_REG_PPC_DAC2:
-               i = reg->id - KVM_REG_PPC_DAC1;
-               val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac[i]);
+               val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac2);
                break;
        case KVM_REG_PPC_EPR: {
                u32 epr = get_guest_epr(vcpu);
                val = get_reg_val(reg->id, vcpu->arch.tsr);
                break;
        case KVM_REG_PPC_DEBUG_INST:
-               val = get_reg_val(reg->id, KVMPPC_INST_EHPRIV);
+               val = get_reg_val(reg->id, KVMPPC_INST_EHPRIV_DEBUG);
+               break;
+       case KVM_REG_PPC_VRSAVE:
+               val = get_reg_val(reg->id, vcpu->arch.vrsave);
                break;
        default:
-               r = kvmppc_get_one_reg(vcpu, reg->id, &val);
+               r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, reg->id, &val);
                break;
        }
  
@@@ -1497,7 -1566,6 +1565,6 @@@ int kvm_vcpu_ioctl_set_one_reg(struct k
        int r = 0;
        union kvmppc_one_reg val;
        int size;
-       long int i;
  
        size = one_reg_size(reg->id);
        if (size > sizeof(val))
  
        switch (reg->id) {
        case KVM_REG_PPC_IAC1:
+               vcpu->arch.dbg_reg.iac1 = set_reg_val(reg->id, val);
+               break;
        case KVM_REG_PPC_IAC2:
+               vcpu->arch.dbg_reg.iac2 = set_reg_val(reg->id, val);
+               break;
+ #if CONFIG_PPC_ADV_DEBUG_IACS > 2
        case KVM_REG_PPC_IAC3:
+               vcpu->arch.dbg_reg.iac3 = set_reg_val(reg->id, val);
+               break;
        case KVM_REG_PPC_IAC4:
-               i = reg->id - KVM_REG_PPC_IAC1;
-               vcpu->arch.dbg_reg.iac[i] = set_reg_val(reg->id, val);
+               vcpu->arch.dbg_reg.iac4 = set_reg_val(reg->id, val);
                break;
+ #endif
        case KVM_REG_PPC_DAC1:
+               vcpu->arch.dbg_reg.dac1 = set_reg_val(reg->id, val);
+               break;
        case KVM_REG_PPC_DAC2:
-               i = reg->id - KVM_REG_PPC_DAC1;
-               vcpu->arch.dbg_reg.dac[i] = set_reg_val(reg->id, val);
+               vcpu->arch.dbg_reg.dac2 = set_reg_val(reg->id, val);
                break;
        case KVM_REG_PPC_EPR: {
                u32 new_epr = set_reg_val(reg->id, val);
                kvmppc_set_tcr(vcpu, tcr);
                break;
        }
+       case KVM_REG_PPC_VRSAVE:
+               vcpu->arch.vrsave = set_reg_val(reg->id, val);
+               break;
        default:
-               r = kvmppc_set_one_reg(vcpu, reg->id, &val);
+               r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, reg->id, &val);
                break;
        }
  
        return r;
  }
  
- int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
-                                        struct kvm_guest_debug *dbg)
- {
-       return -EINVAL;
- }
  int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
  {
        return -ENOTSUPP;
@@@ -1589,12 -1662,12 +1661,12 @@@ int kvm_vm_ioctl_get_dirty_log(struct k
        return -ENOTSUPP;
  }
  
- void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
+ void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
                              struct kvm_memory_slot *dont)
  {
  }
  
- int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
+ int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
                               unsigned long npages)
  {
        return 0;
@@@ -1670,6 -1743,157 +1742,157 @@@ void kvmppc_decrementer_func(unsigned l
        kvmppc_set_tsr_bits(vcpu, TSR_DIS);
  }
  
+ static int kvmppc_booke_add_breakpoint(struct debug_reg *dbg_reg,
+                                      uint64_t addr, int index)
+ {
+       switch (index) {
+       case 0:
+               dbg_reg->dbcr0 |= DBCR0_IAC1;
+               dbg_reg->iac1 = addr;
+               break;
+       case 1:
+               dbg_reg->dbcr0 |= DBCR0_IAC2;
+               dbg_reg->iac2 = addr;
+               break;
+ #if CONFIG_PPC_ADV_DEBUG_IACS > 2
+       case 2:
+               dbg_reg->dbcr0 |= DBCR0_IAC3;
+               dbg_reg->iac3 = addr;
+               break;
+       case 3:
+               dbg_reg->dbcr0 |= DBCR0_IAC4;
+               dbg_reg->iac4 = addr;
+               break;
+ #endif
+       default:
+               return -EINVAL;
+       }
+       dbg_reg->dbcr0 |= DBCR0_IDM;
+       return 0;
+ }
+ static int kvmppc_booke_add_watchpoint(struct debug_reg *dbg_reg, uint64_t addr,
+                                      int type, int index)
+ {
+       switch (index) {
+       case 0:
+               if (type & KVMPPC_DEBUG_WATCH_READ)
+                       dbg_reg->dbcr0 |= DBCR0_DAC1R;
+               if (type & KVMPPC_DEBUG_WATCH_WRITE)
+                       dbg_reg->dbcr0 |= DBCR0_DAC1W;
+               dbg_reg->dac1 = addr;
+               break;
+       case 1:
+               if (type & KVMPPC_DEBUG_WATCH_READ)
+                       dbg_reg->dbcr0 |= DBCR0_DAC2R;
+               if (type & KVMPPC_DEBUG_WATCH_WRITE)
+                       dbg_reg->dbcr0 |= DBCR0_DAC2W;
+               dbg_reg->dac2 = addr;
+               break;
+       default:
+               return -EINVAL;
+       }
+       dbg_reg->dbcr0 |= DBCR0_IDM;
+       return 0;
+ }
+ void kvm_guest_protect_msr(struct kvm_vcpu *vcpu, ulong prot_bitmap, bool set)
+ {
+       /* XXX: Add similar MSR protection for BookE-PR */
+ #ifdef CONFIG_KVM_BOOKE_HV
+       BUG_ON(prot_bitmap & ~(MSRP_UCLEP | MSRP_DEP | MSRP_PMMP));
+       if (set) {
+               if (prot_bitmap & MSR_UCLE)
+                       vcpu->arch.shadow_msrp |= MSRP_UCLEP;
+               if (prot_bitmap & MSR_DE)
+                       vcpu->arch.shadow_msrp |= MSRP_DEP;
+               if (prot_bitmap & MSR_PMM)
+                       vcpu->arch.shadow_msrp |= MSRP_PMMP;
+       } else {
+               if (prot_bitmap & MSR_UCLE)
+                       vcpu->arch.shadow_msrp &= ~MSRP_UCLEP;
+               if (prot_bitmap & MSR_DE)
+                       vcpu->arch.shadow_msrp &= ~MSRP_DEP;
+               if (prot_bitmap & MSR_PMM)
+                       vcpu->arch.shadow_msrp &= ~MSRP_PMMP;
+       }
+ #endif
+ }
+ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+                                        struct kvm_guest_debug *dbg)
+ {
+       struct debug_reg *dbg_reg;
+       int n, b = 0, w = 0;
+       if (!(dbg->control & KVM_GUESTDBG_ENABLE)) {
+               vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
+               vcpu->guest_debug = 0;
+               kvm_guest_protect_msr(vcpu, MSR_DE, false);
+               return 0;
+       }
+       kvm_guest_protect_msr(vcpu, MSR_DE, true);
+       vcpu->guest_debug = dbg->control;
+       vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
+       /* Set DBCR0_EDM in guest visible DBCR0 register. */
+       vcpu->arch.dbg_reg.dbcr0 = DBCR0_EDM;
+       if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+               vcpu->arch.shadow_dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC;
+       /* Code below handles only HW breakpoints */
+       dbg_reg = &(vcpu->arch.shadow_dbg_reg);
+ #ifdef CONFIG_KVM_BOOKE_HV
+       /*
+        * On BookE-HV (e500mc) the guest is always executed with MSR.GS=1
+        * DBCR1 and DBCR2 are set to trigger debug events when MSR.PR is 0
+        */
+       dbg_reg->dbcr1 = 0;
+       dbg_reg->dbcr2 = 0;
+ #else
+       /*
+        * On BookE-PR (e500v2) the guest is always executed with MSR.PR=1
+        * We set DBCR1 and DBCR2 to only trigger debug events when MSR.PR
+        * is set.
+        */
+       dbg_reg->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US | DBCR1_IAC3US |
+                         DBCR1_IAC4US;
+       dbg_reg->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US;
+ #endif
+       if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
+               return 0;
+       for (n = 0; n < (KVMPPC_BOOKE_IAC_NUM + KVMPPC_BOOKE_DAC_NUM); n++) {
+               uint64_t addr = dbg->arch.bp[n].addr;
+               uint32_t type = dbg->arch.bp[n].type;
+               if (type == KVMPPC_DEBUG_NONE)
+                       continue;
+               if (type & !(KVMPPC_DEBUG_WATCH_READ |
+                            KVMPPC_DEBUG_WATCH_WRITE |
+                            KVMPPC_DEBUG_BREAKPOINT))
+                       return -EINVAL;
+               if (type & KVMPPC_DEBUG_BREAKPOINT) {
+                       /* Setting H/W breakpoint */
+                       if (kvmppc_booke_add_breakpoint(dbg_reg, addr, b++))
+                               return -EINVAL;
+               } else {
+                       /* Setting H/W watchpoint */
+                       if (kvmppc_booke_add_watchpoint(dbg_reg, addr,
+                                                       type, w++))
+                               return -EINVAL;
+               }
+       }
+       return 0;
+ }
  void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
  {
        vcpu->cpu = smp_processor_id();
@@@ -1680,6 -1904,44 +1903,44 @@@ void kvmppc_booke_vcpu_put(struct kvm_v
  {
        current->thread.kvm_vcpu = NULL;
        vcpu->cpu = -1;
+       /* Clear pending debug event in DBSR */
+       kvmppc_clear_dbsr();
+ }
+ void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
+ {
+       vcpu->kvm->arch.kvm_ops->mmu_destroy(vcpu);
+ }
+ int kvmppc_core_init_vm(struct kvm *kvm)
+ {
+       return kvm->arch.kvm_ops->init_vm(kvm);
+ }
+ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+ {
+       return kvm->arch.kvm_ops->vcpu_create(kvm, id);
+ }
+ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
+ {
+       vcpu->kvm->arch.kvm_ops->vcpu_free(vcpu);
+ }
+ void kvmppc_core_destroy_vm(struct kvm *kvm)
+ {
+       kvm->arch.kvm_ops->destroy_vm(kvm);
+ }
+ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+ {
+       vcpu->kvm->arch.kvm_ops->vcpu_load(vcpu, cpu);
+ }
+ void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
+ {
+       vcpu->kvm->arch.kvm_ops->vcpu_put(vcpu);
  }
  
  int __init kvmppc_booke_init(void)
@@@ -385,7 -385,7 +385,7 @@@ static int kvm_cpu_has_interrupt(struc
        }
  
        if ((!rc) && (vcpu->arch.sie_block->ckc <
 -              get_tod_clock() + vcpu->arch.sie_block->epoch)) {
 +              get_tod_clock_fast() + vcpu->arch.sie_block->epoch)) {
                if ((!psw_extint_disabled(vcpu)) &&
                        (vcpu->arch.sie_block->gcr[0] & 0x800ul))
                        rc = 1;
@@@ -425,7 -425,7 +425,7 @@@ int kvm_s390_handle_wait(struct kvm_vcp
                goto no_timer;
        }
  
 -      now = get_tod_clock() + vcpu->arch.sie_block->epoch;
 +      now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch;
        if (vcpu->arch.sie_block->ckc < now) {
                __unset_cpu_idle(vcpu);
                return 0;
        hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL);
        VCPU_EVENT(vcpu, 5, "enabled wait via clock comparator: %llx ns", sltime);
  no_timer:
+       srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
        spin_lock(&vcpu->arch.local_int.float_int->lock);
        spin_lock_bh(&vcpu->arch.local_int.lock);
        add_wait_queue(&vcpu->wq, &wait);
        remove_wait_queue(&vcpu->wq, &wait);
        spin_unlock_bh(&vcpu->arch.local_int.lock);
        spin_unlock(&vcpu->arch.local_int.float_int->lock);
+       vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
        hrtimer_try_to_cancel(&vcpu->arch.ckc_timer);
        return 0;
  }
@@@ -515,7 -518,7 +518,7 @@@ void kvm_s390_deliver_pending_interrupt
        }
  
        if ((vcpu->arch.sie_block->ckc <
 -              get_tod_clock() + vcpu->arch.sie_block->epoch))
 +              get_tod_clock_fast() + vcpu->arch.sie_block->epoch))
                __try_deliver_ckc_interrupt(vcpu);
  
        if (atomic_read(&fi->active)) {
diff --combined arch/s390/kvm/kvm-s390.c
@@@ -343,11 -343,10 +343,11 @@@ void kvm_arch_vcpu_uninit(struct kvm_vc
  
  void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
  {
 -      save_fp_regs(&vcpu->arch.host_fpregs);
 +      save_fp_ctl(&vcpu->arch.host_fpregs.fpc);
 +      save_fp_regs(vcpu->arch.host_fpregs.fprs);
        save_access_regs(vcpu->arch.host_acrs);
 -      vcpu->arch.guest_fpregs.fpc &= FPC_VALID_MASK;
 -      restore_fp_regs(&vcpu->arch.guest_fpregs);
 +      restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
 +      restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
        restore_access_regs(vcpu->run->s.regs.acrs);
        gmap_enable(vcpu->arch.gmap);
        atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
@@@ -357,11 -356,9 +357,11 @@@ void kvm_arch_vcpu_put(struct kvm_vcpu 
  {
        atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
        gmap_disable(vcpu->arch.gmap);
 -      save_fp_regs(&vcpu->arch.guest_fpregs);
 +      save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
 +      save_fp_regs(vcpu->arch.guest_fpregs.fprs);
        save_access_regs(vcpu->run->s.regs.acrs);
 -      restore_fp_regs(&vcpu->arch.host_fpregs);
 +      restore_fp_ctl(&vcpu->arch.host_fpregs.fpc);
 +      restore_fp_regs(vcpu->arch.host_fpregs.fprs);
        restore_access_regs(vcpu->arch.host_acrs);
  }
  
@@@ -621,12 -618,9 +621,12 @@@ int kvm_arch_vcpu_ioctl_get_sregs(struc
  
  int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
  {
 +      if (test_fp_ctl(fpu->fpc))
 +              return -EINVAL;
        memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
 -      vcpu->arch.guest_fpregs.fpc = fpu->fpc & FPC_VALID_MASK;
 -      restore_fp_regs(&vcpu->arch.guest_fpregs);
 +      vcpu->arch.guest_fpregs.fpc = fpu->fpc;
 +      restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
 +      restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
        return 0;
  }
  
@@@ -695,9 -689,9 +695,9 @@@ static int kvm_s390_handle_requests(str
        return 0;
  }
  
- static int __vcpu_run(struct kvm_vcpu *vcpu)
+ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
  {
-       int rc;
+       int rc, cpuflags;
  
        memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
  
                return rc;
  
        vcpu->arch.sie_block->icptcode = 0;
-       VCPU_EVENT(vcpu, 6, "entering sie flags %x",
-                  atomic_read(&vcpu->arch.sie_block->cpuflags));
-       trace_kvm_s390_sie_enter(vcpu,
-                                atomic_read(&vcpu->arch.sie_block->cpuflags));
+       cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
+       VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
+       trace_kvm_s390_sie_enter(vcpu, cpuflags);
  
-       /*
-        * As PF_VCPU will be used in fault handler, between guest_enter
-        * and guest_exit should be no uaccess.
-        */
-       preempt_disable();
-       kvm_guest_enter();
-       preempt_enable();
-       rc = sie64a(vcpu->arch.sie_block, vcpu->run->s.regs.gprs);
-       kvm_guest_exit();
+       return 0;
+ }
+ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
+ {
+       int rc;
  
        VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
                   vcpu->arch.sie_block->icptcode);
        trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
  
-       if (rc > 0)
+       if (exit_reason >= 0) {
                rc = 0;
-       if (rc < 0) {
+       } else {
                if (kvm_is_ucontrol(vcpu->kvm)) {
                        rc = SIE_INTERCEPT_UCONTROL;
                } else {
        }
  
        memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
+       if (rc == 0) {
+               if (kvm_is_ucontrol(vcpu->kvm))
+                       rc = -EOPNOTSUPP;
+               else
+                       rc = kvm_handle_sie_intercept(vcpu);
+       }
+       return rc;
+ }
+ static int __vcpu_run(struct kvm_vcpu *vcpu)
+ {
+       int rc, exit_reason;
+       /*
+        * We try to hold kvm->srcu during most of vcpu_run (except when run-
+        * ning the guest), so that memslots (and other stuff) are protected
+        */
+       vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+       do {
+               rc = vcpu_pre_run(vcpu);
+               if (rc)
+                       break;
+               srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+               /*
+                * As PF_VCPU will be used in fault handler, between
+                * guest_enter and guest_exit should be no uaccess.
+                */
+               preempt_disable();
+               kvm_guest_enter();
+               preempt_enable();
+               exit_reason = sie64a(vcpu->arch.sie_block,
+                                    vcpu->run->s.regs.gprs);
+               kvm_guest_exit();
+               vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+               rc = vcpu_post_run(vcpu, exit_reason);
+       } while (!signal_pending(current) && !rc);
+       srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
        return rc;
  }
  
@@@ -755,7 -788,6 +794,6 @@@ int kvm_arch_vcpu_ioctl_run(struct kvm_
        int rc;
        sigset_t sigsaved;
  
- rerun_vcpu:
        if (vcpu->sigset_active)
                sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
  
        }
  
        might_fault();
-       do {
-               rc = __vcpu_run(vcpu);
-               if (rc)
-                       break;
-               if (kvm_is_ucontrol(vcpu->kvm))
-                       rc = -EOPNOTSUPP;
-               else
-                       rc = kvm_handle_sie_intercept(vcpu);
-       } while (!signal_pending(current) && !rc);
-       if (rc == SIE_INTERCEPT_RERUNVCPU)
-               goto rerun_vcpu;
+       rc = __vcpu_run(vcpu);
  
        if (signal_pending(current) && !rc) {
                kvm_run->exit_reason = KVM_EXIT_INTR;
@@@ -882,8 -902,7 +908,8 @@@ int kvm_s390_vcpu_store_status(struct k
         * copying in vcpu load/put. Lets update our copies before we save
         * it into the save area
         */
 -      save_fp_regs(&vcpu->arch.guest_fpregs);
 +      save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
 +      save_fp_regs(vcpu->arch.guest_fpregs.fprs);
        save_access_regs(vcpu->run->s.regs.acrs);
  
        if (__guestcopy(vcpu, addr + offsetof(struct save_area, fp_regs),
@@@ -958,6 -977,7 +984,7 @@@ long kvm_arch_vcpu_ioctl(struct file *f
  {
        struct kvm_vcpu *vcpu = filp->private_data;
        void __user *argp = (void __user *)arg;
+       int idx;
        long r;
  
        switch (ioctl) {
                break;
        }
        case KVM_S390_STORE_STATUS:
+               idx = srcu_read_lock(&vcpu->kvm->srcu);
                r = kvm_s390_vcpu_store_status(vcpu, arg);
+               srcu_read_unlock(&vcpu->kvm->srcu, idx);
                break;
        case KVM_S390_SET_INITIAL_PSW: {
                psw_t psw;
@@@ -1067,12 -1089,13 +1096,13 @@@ int kvm_arch_vcpu_fault(struct kvm_vcp
        return VM_FAULT_SIGBUS;
  }
  
- void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
                           struct kvm_memory_slot *dont)
  {
  }
  
- int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+                           unsigned long npages)
  {
        return 0;
  }
diff --combined include/linux/sched.h
@@@ -22,7 -22,6 +22,7 @@@ struct sched_param 
  #include <linux/errno.h>
  #include <linux/nodemask.h>
  #include <linux/mm_types.h>
 +#include <linux/preempt.h>
  
  #include <asm/page.h>
  #include <asm/ptrace.h>
@@@ -286,6 -285,14 +286,14 @@@ static inline void lockup_detector_init
  }
  #endif
  
+ #ifdef CONFIG_DETECT_HUNG_TASK
+ void reset_hung_task_detector(void);
+ #else
+ static inline void reset_hung_task_detector(void)
+ {
+ }
+ #endif
  /* Attach to any functions which should be ignored in wchan output. */
  #define __sched               __attribute__((__section__(".sched.text")))
  
@@@ -323,10 -330,6 +331,10 @@@ static inline void arch_pick_mmap_layou
  extern void set_dumpable(struct mm_struct *mm, int value);
  extern int get_dumpable(struct mm_struct *mm);
  
 +#define SUID_DUMP_DISABLE     0       /* No setuid dumping */
 +#define SUID_DUMP_USER                1       /* Dump as user of process */
 +#define SUID_DUMP_ROOT                2       /* Dump as root */
 +
  /* mm flags */
  /* dumpable bits */
  #define MMF_DUMPABLE      0  /* core dump is permitted */
@@@ -432,14 -435,6 +440,14 @@@ struct task_cputime 
                .sum_exec_runtime = 0,                          \
        }
  
 +#define PREEMPT_ENABLED               (PREEMPT_NEED_RESCHED)
 +
 +#ifdef CONFIG_PREEMPT_COUNT
 +#define PREEMPT_DISABLED      (1 + PREEMPT_ENABLED)
 +#else
 +#define PREEMPT_DISABLED      PREEMPT_ENABLED
 +#endif
 +
  /*
   * Disable preemption until the scheduler is running.
   * Reset by start_kernel()->sched_init()->init_idle().
   * We include PREEMPT_ACTIVE to avoid cond_resched() from working
   * before the scheduler is active -- see should_resched().
   */
 -#define INIT_PREEMPT_COUNT    (1 + PREEMPT_ACTIVE)
 +#define INIT_PREEMPT_COUNT    (PREEMPT_DISABLED + PREEMPT_ACTIVE)
  
  /**
   * struct thread_group_cputimer - thread group interval timer counts
@@@ -781,7 -776,6 +789,7 @@@ enum cpu_idle_type 
  #define SD_ASYM_PACKING               0x0800  /* Place busy groups earlier in the domain */
  #define SD_PREFER_SIBLING     0x1000  /* Prefer to place tasks in a sibling domain */
  #define SD_OVERLAP            0x2000  /* sched_domains of this level overlap */
 +#define SD_NUMA                       0x4000  /* cross-node balancing */
  
  extern int __weak arch_sd_sibiling_asym_packing(void);
  
@@@ -825,10 -819,6 +833,10 @@@ struct sched_domain 
  
        u64 last_update;
  
 +      /* idle_balance() stats */
 +      u64 max_newidle_lb_cost;
 +      unsigned long next_decay_max_lb_cost;
 +
  #ifdef CONFIG_SCHEDSTATS
        /* load_balance() stats */
        unsigned int lb_count[CPU_MAX_IDLE_TYPES];
@@@ -1047,8 -1037,6 +1055,8 @@@ struct task_struct 
        struct task_struct *last_wakee;
        unsigned long wakee_flips;
        unsigned long wakee_flip_decay_ts;
 +
 +      int wake_cpu;
  #endif
        int on_rq;
  
        struct hlist_head preempt_notifiers;
  #endif
  
 -      /*
 -       * fpu_counter contains the number of consecutive context switches
 -       * that the FPU is used. If this is over a threshold, the lazy fpu
 -       * saving becomes unlazy to save the trap. This is an unsigned char
 -       * so that after 256 times the counter wraps and the behavior turns
 -       * lazy again; this to deal with bursty apps that only use FPU for
 -       * a short time
 -       */
 -      unsigned char fpu_counter;
  #ifdef CONFIG_BLK_DEV_IO_TRACE
        unsigned int btrace_seq;
  #endif
  #endif
  #ifdef CONFIG_NUMA_BALANCING
        int numa_scan_seq;
 -      int numa_migrate_seq;
        unsigned int numa_scan_period;
 +      unsigned int numa_scan_period_max;
 +      int numa_preferred_nid;
 +      int numa_migrate_deferred;
 +      unsigned long numa_migrate_retry;
        u64 node_stamp;                 /* migration stamp  */
        struct callback_head numa_work;
 +
 +      struct list_head numa_entry;
 +      struct numa_group *numa_group;
 +
 +      /*
 +       * Exponential decaying average of faults on a per-node basis.
 +       * Scheduling placement decisions are made based on the these counts.
 +       * The values remain static for the duration of a PTE scan
 +       */
 +      unsigned long *numa_faults;
 +      unsigned long total_numa_faults;
 +
 +      /*
 +       * numa_faults_buffer records faults per node during the current
 +       * scan window. When the scan completes, the counts in numa_faults
 +       * decay and these values are copied.
 +       */
 +      unsigned long *numa_faults_buffer;
 +
 +      /*
 +       * numa_faults_locality tracks if faults recorded during the last
 +       * scan window were remote/local. The task scan period is adapted
 +       * based on the locality of the faults with different weights
 +       * depending on whether they were shared or private faults
 +       */
 +      unsigned long numa_faults_locality[2];
 +
 +      unsigned long numa_pages_migrated;
  #endif /* CONFIG_NUMA_BALANCING */
  
        struct rcu_head rcu;
        } memcg_batch;
        unsigned int memcg_kmem_skip_account;
        struct memcg_oom_info {
 +              struct mem_cgroup *memcg;
 +              gfp_t gfp_mask;
 +              int order;
                unsigned int may_oom:1;
 -              unsigned int in_memcg_oom:1;
 -              unsigned int oom_locked:1;
 -              int wakeups;
 -              struct mem_cgroup *wait_on_memcg;
        } memcg_oom;
  #endif
  #ifdef CONFIG_UPROBES
  /* Future-safe accessor for struct task_struct's cpus_allowed. */
  #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
  
 +#define TNF_MIGRATED  0x01
 +#define TNF_NO_GROUP  0x02
 +#define TNF_SHARED    0x04
 +#define TNF_FAULT_LOCAL       0x08
 +
  #ifdef CONFIG_NUMA_BALANCING
 -extern void task_numa_fault(int node, int pages, bool migrated);
 +extern void task_numa_fault(int last_node, int node, int pages, int flags);
 +extern pid_t task_numa_group_id(struct task_struct *p);
  extern void set_numabalancing_state(bool enabled);
 +extern void task_numa_free(struct task_struct *p);
 +
 +extern unsigned int sysctl_numa_balancing_migrate_deferred;
  #else
 -static inline void task_numa_fault(int node, int pages, bool migrated)
 +static inline void task_numa_fault(int last_node, int node, int pages,
 +                                 int flags)
 +{
 +}
 +static inline pid_t task_numa_group_id(struct task_struct *p)
  {
 +      return 0;
  }
  static inline void set_numabalancing_state(bool enabled)
  {
  }
 +static inline void task_numa_free(struct task_struct *p)
 +{
 +}
  #endif
  
  static inline struct pid *task_pid(struct task_struct *task)
@@@ -2033,7 -1983,7 +2041,7 @@@ extern void wake_up_new_task(struct tas
  #else
   static inline void kick_process(struct task_struct *tsk) { }
  #endif
 -extern void sched_fork(struct task_struct *p);
 +extern void sched_fork(unsigned long clone_flags, struct task_struct *p);
  extern void sched_dead(struct task_struct *p);
  
  extern void proc_caches_init(void);
@@@ -2460,6 -2410,11 +2468,6 @@@ static inline int signal_pending_state(
        return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
  }
  
 -static inline int need_resched(void)
 -{
 -      return unlikely(test_thread_flag(TIF_NEED_RESCHED));
 -}
 -
  /*
   * cond_resched() and cond_resched_lock(): latency reduction via
   * explicit rescheduling in places that are safe. The return
@@@ -2528,105 -2483,36 +2536,105 @@@ static inline int tsk_is_polling(struc
  {
        return task_thread_info(p)->status & TS_POLLING;
  }
 -static inline void current_set_polling(void)
 +static inline void __current_set_polling(void)
  {
        current_thread_info()->status |= TS_POLLING;
  }
  
 -static inline void current_clr_polling(void)
 +static inline bool __must_check current_set_polling_and_test(void)
 +{
 +      __current_set_polling();
 +
 +      /*
 +       * Polling state must be visible before we test NEED_RESCHED,
 +       * paired by resched_task()
 +       */
 +      smp_mb();
 +
 +      return unlikely(tif_need_resched());
 +}
 +
 +static inline void __current_clr_polling(void)
  {
        current_thread_info()->status &= ~TS_POLLING;
 -      smp_mb__after_clear_bit();
 +}
 +
 +static inline bool __must_check current_clr_polling_and_test(void)
 +{
 +      __current_clr_polling();
 +
 +      /*
 +       * Polling state must be visible before we test NEED_RESCHED,
 +       * paired by resched_task()
 +       */
 +      smp_mb();
 +
 +      return unlikely(tif_need_resched());
  }
  #elif defined(TIF_POLLING_NRFLAG)
  static inline int tsk_is_polling(struct task_struct *p)
  {
        return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG);
  }
 -static inline void current_set_polling(void)
 +
 +static inline void __current_set_polling(void)
  {
        set_thread_flag(TIF_POLLING_NRFLAG);
  }
  
 -static inline void current_clr_polling(void)
 +static inline bool __must_check current_set_polling_and_test(void)
 +{
 +      __current_set_polling();
 +
 +      /*
 +       * Polling state must be visible before we test NEED_RESCHED,
 +       * paired by resched_task()
 +       *
 +       * XXX: assumes set/clear bit are identical barrier wise.
 +       */
 +      smp_mb__after_clear_bit();
 +
 +      return unlikely(tif_need_resched());
 +}
 +
 +static inline void __current_clr_polling(void)
  {
        clear_thread_flag(TIF_POLLING_NRFLAG);
  }
 +
 +static inline bool __must_check current_clr_polling_and_test(void)
 +{
 +      __current_clr_polling();
 +
 +      /*
 +       * Polling state must be visible before we test NEED_RESCHED,
 +       * paired by resched_task()
 +       */
 +      smp_mb__after_clear_bit();
 +
 +      return unlikely(tif_need_resched());
 +}
 +
  #else
  static inline int tsk_is_polling(struct task_struct *p) { return 0; }
 -static inline void current_set_polling(void) { }
 -static inline void current_clr_polling(void) { }
 +static inline void __current_set_polling(void) { }
 +static inline void __current_clr_polling(void) { }
 +
 +static inline bool __must_check current_set_polling_and_test(void)
 +{
 +      return unlikely(tif_need_resched());
 +}
 +static inline bool __must_check current_clr_polling_and_test(void)
 +{
 +      return unlikely(tif_need_resched());
 +}
  #endif
  
 +static __always_inline bool need_resched(void)
 +{
 +      return unlikely(tif_need_resched());
 +}
 +
  /*
   * Thread group CPU time accounting.
   */
@@@ -2668,11 -2554,6 +2676,11 @@@ static inline unsigned int task_cpu(con
        return task_thread_info(p)->cpu;
  }
  
 +static inline int task_node(const struct task_struct *p)
 +{
 +      return cpu_to_node(task_cpu(p));
 +}
 +
  extern void set_task_cpu(struct task_struct *p, unsigned int cpu);
  
  #else
diff --combined kernel/hung_task.c
  #include <linux/export.h>
  #include <linux/sysctl.h>
  #include <linux/utsname.h>
 +#include <trace/events/sched.h>
  
  /*
   * The number of tasks checked:
   */
 -unsigned long __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;
 +int __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;
  
  /*
   * Limit number of tasks checked in a batch.
@@@ -93,9 -92,6 +93,9 @@@ static void check_hung_task(struct task
                t->last_switch_count = switch_count;
                return;
        }
 +
 +      trace_sched_process_hang(t);
 +
        if (!sysctl_hung_task_warnings)
                return;
        sysctl_hung_task_warnings--;
@@@ -207,6 -203,14 +207,14 @@@ int proc_dohung_task_timeout_secs(struc
        return ret;
  }
  
+ static atomic_t reset_hung_task = ATOMIC_INIT(0);
+ void reset_hung_task_detector(void)
+ {
+       atomic_set(&reset_hung_task, 1);
+ }
+ EXPORT_SYMBOL_GPL(reset_hung_task_detector);
  /*
   * kthread which checks for tasks stuck in D state
   */
@@@ -220,6 -224,9 +228,9 @@@ static int watchdog(void *dummy
                while (schedule_timeout_interruptible(timeout_jiffies(timeout)))
                        timeout = sysctl_hung_task_timeout_secs;
  
+               if (atomic_xchg(&reset_hung_task, 0))
+                       continue;
                check_hung_uninterruptible_tasks(timeout);
        }
  
diff --combined virt/kvm/kvm_main.c
@@@ -70,7 -70,8 +70,8 @@@ MODULE_LICENSE("GPL")
   *            kvm->lock --> kvm->slots_lock --> kvm->irq_lock
   */
  
- DEFINE_RAW_SPINLOCK(kvm_lock);
+ DEFINE_SPINLOCK(kvm_lock);
+ static DEFINE_RAW_SPINLOCK(kvm_count_lock);
  LIST_HEAD(vm_list);
  
  static cpumask_var_t cpus_hardware_enabled;
@@@ -186,6 -187,7 +187,7 @@@ void kvm_flush_remote_tlbs(struct kvm *
                ++kvm->stat.remote_tlb_flush;
        cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
  }
+ EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
  
  void kvm_reload_remote_mmus(struct kvm *kvm)
  {
@@@ -490,9 -492,9 +492,9 @@@ static struct kvm *kvm_create_vm(unsign
        if (r)
                goto out_err;
  
-       raw_spin_lock(&kvm_lock);
+       spin_lock(&kvm_lock);
        list_add(&kvm->vm_list, &vm_list);
-       raw_spin_unlock(&kvm_lock);
+       spin_unlock(&kvm_lock);
  
        return kvm;
  
@@@ -540,13 -542,13 +542,13 @@@ static void kvm_destroy_dirty_bitmap(st
  /*
   * Free any memory in @free but not in @dont.
   */
- static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
+ static void kvm_free_physmem_slot(struct kvm *kvm, struct kvm_memory_slot *free,
                                  struct kvm_memory_slot *dont)
  {
        if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
                kvm_destroy_dirty_bitmap(free);
  
-       kvm_arch_free_memslot(free, dont);
+       kvm_arch_free_memslot(kvm, free, dont);
  
        free->npages = 0;
  }
@@@ -557,7 -559,7 +559,7 @@@ void kvm_free_physmem(struct kvm *kvm
        struct kvm_memory_slot *memslot;
  
        kvm_for_each_memslot(memslot, slots)
-               kvm_free_physmem_slot(memslot, NULL);
+               kvm_free_physmem_slot(kvm, memslot, NULL);
  
        kfree(kvm->memslots);
  }
@@@ -581,9 -583,9 +583,9 @@@ static void kvm_destroy_vm(struct kvm *
        struct mm_struct *mm = kvm->mm;
  
        kvm_arch_sync_events(kvm);
-       raw_spin_lock(&kvm_lock);
+       spin_lock(&kvm_lock);
        list_del(&kvm->vm_list);
-       raw_spin_unlock(&kvm_lock);
+       spin_unlock(&kvm_lock);
        kvm_free_irq_routing(kvm);
        for (i = 0; i < KVM_NR_BUSES; i++)
                kvm_io_bus_destroy(kvm->buses[i]);
@@@ -821,7 -823,7 +823,7 @@@ int __kvm_set_memory_region(struct kvm 
        if (change == KVM_MR_CREATE) {
                new.userspace_addr = mem->userspace_addr;
  
-               if (kvm_arch_create_memslot(&new, npages))
+               if (kvm_arch_create_memslot(kvm, &new, npages))
                        goto out_free;
        }
  
                        goto out_free;
        }
  
+       /* actual memory is freed via old in kvm_free_physmem_slot below */
+       if (change == KVM_MR_DELETE) {
+               new.dirty_bitmap = NULL;
+               memset(&new.arch, 0, sizeof(new.arch));
+       }
+       old_memslots = install_new_memslots(kvm, slots, &new);
+       kvm_arch_commit_memory_region(kvm, mem, &old, change);
+       kvm_free_physmem_slot(kvm, &old, &new);
+       kfree(old_memslots);
        /*
         * IOMMU mapping:  New slots need to be mapped.  Old slots need to be
         * un-mapped and re-mapped if their base changes.  Since base change
         */
        if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
                r = kvm_iommu_map_pages(kvm, &new);
-               if (r)
-                       goto out_slots;
-       }
-       /* actual memory is freed via old in kvm_free_physmem_slot below */
-       if (change == KVM_MR_DELETE) {
-               new.dirty_bitmap = NULL;
-               memset(&new.arch, 0, sizeof(new.arch));
+               return r;
        }
  
-       old_memslots = install_new_memslots(kvm, slots, &new);
-       kvm_arch_commit_memory_region(kvm, mem, &old, change);
-       kvm_free_physmem_slot(&old, &new);
-       kfree(old_memslots);
        return 0;
  
  out_slots:
        kfree(slots);
  out_free:
-       kvm_free_physmem_slot(&new, &old);
+       kvm_free_physmem_slot(kvm, &new, &old);
  out:
        return r;
  }
@@@ -964,6 -965,7 +965,7 @@@ int kvm_get_dirty_log(struct kvm *kvm
  out:
        return r;
  }
+ EXPORT_SYMBOL_GPL(kvm_get_dirty_log);
  
  bool kvm_largepages_enabled(void)
  {
@@@ -1654,6 -1656,7 +1656,7 @@@ void mark_page_dirty(struct kvm *kvm, g
        memslot = gfn_to_memslot(kvm, gfn);
        mark_page_dirty_in_slot(kvm, memslot, gfn);
  }
+ EXPORT_SYMBOL_GPL(mark_page_dirty);
  
  /*
   * The vCPU has executed a HLT instruction with in-kernel mode enabled.
@@@ -1679,6 -1682,7 +1682,7 @@@ void kvm_vcpu_block(struct kvm_vcpu *vc
  
        finish_wait(&vcpu->wq, &wait);
  }
+ EXPORT_SYMBOL_GPL(kvm_vcpu_block);
  
  #ifndef CONFIG_S390
  /*
@@@ -2270,6 -2274,11 +2274,11 @@@ static int kvm_ioctl_create_device(stru
        case KVM_DEV_TYPE_XICS:
                ops = &kvm_xics_ops;
                break;
+ #endif
+ #ifdef CONFIG_KVM_VFIO
+       case KVM_DEV_TYPE_VFIO:
+               ops = &kvm_vfio_ops;
+               break;
  #endif
        default:
                return -ENODEV;
  }
  #endif
  
- static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
- {
-       struct page *page[1];
-       unsigned long addr;
-       int npages;
-       gfn_t gfn = vmf->pgoff;
-       struct kvm *kvm = vma->vm_file->private_data;
-       addr = gfn_to_hva(kvm, gfn);
-       if (kvm_is_error_hva(addr))
-               return VM_FAULT_SIGBUS;
-       npages = get_user_pages(current, current->mm, addr, 1, 1, 0, page,
-                               NULL);
-       if (unlikely(npages != 1))
-               return VM_FAULT_SIGBUS;
-       vmf->page = page[0];
-       return 0;
- }
- static const struct vm_operations_struct kvm_vm_vm_ops = {
-       .fault = kvm_vm_fault,
- };
- static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma)
- {
-       vma->vm_ops = &kvm_vm_vm_ops;
-       return 0;
- }
  static struct file_operations kvm_vm_fops = {
        .release        = kvm_vm_release,
        .unlocked_ioctl = kvm_vm_ioctl,
  #ifdef CONFIG_COMPAT
        .compat_ioctl   = kvm_vm_compat_ioctl,
  #endif
-       .mmap           = kvm_vm_mmap,
        .llseek         = noop_llseek,
  };
  
@@@ -2683,11 -2660,12 +2660,12 @@@ static void hardware_enable_nolock(voi
        }
  }
  
- static void hardware_enable(void *junk)
+ static void hardware_enable(void)
  {
-       raw_spin_lock(&kvm_lock);
-       hardware_enable_nolock(junk);
-       raw_spin_unlock(&kvm_lock);
+       raw_spin_lock(&kvm_count_lock);
+       if (kvm_usage_count)
+               hardware_enable_nolock(NULL);
+       raw_spin_unlock(&kvm_count_lock);
  }
  
  static void hardware_disable_nolock(void *junk)
        kvm_arch_hardware_disable(NULL);
  }
  
- static void hardware_disable(void *junk)
+ static void hardware_disable(void)
  {
-       raw_spin_lock(&kvm_lock);
-       hardware_disable_nolock(junk);
-       raw_spin_unlock(&kvm_lock);
+       raw_spin_lock(&kvm_count_lock);
+       if (kvm_usage_count)
+               hardware_disable_nolock(NULL);
+       raw_spin_unlock(&kvm_count_lock);
  }
  
  static void hardware_disable_all_nolock(void)
  
  static void hardware_disable_all(void)
  {
-       raw_spin_lock(&kvm_lock);
+       raw_spin_lock(&kvm_count_lock);
        hardware_disable_all_nolock();
-       raw_spin_unlock(&kvm_lock);
+       raw_spin_unlock(&kvm_count_lock);
  }
  
  static int hardware_enable_all(void)
  {
        int r = 0;
  
-       raw_spin_lock(&kvm_lock);
+       raw_spin_lock(&kvm_count_lock);
  
        kvm_usage_count++;
        if (kvm_usage_count == 1) {
                }
        }
  
-       raw_spin_unlock(&kvm_lock);
+       raw_spin_unlock(&kvm_count_lock);
  
        return r;
  }
@@@ -2750,20 -2729,17 +2729,17 @@@ static int kvm_cpu_hotplug(struct notif
  {
        int cpu = (long)v;
  
-       if (!kvm_usage_count)
-               return NOTIFY_OK;
        val &= ~CPU_TASKS_FROZEN;
        switch (val) {
        case CPU_DYING:
                printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
                       cpu);
-               hardware_disable(NULL);
+               hardware_disable();
                break;
        case CPU_STARTING:
                printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
                       cpu);
-               hardware_enable(NULL);
+               hardware_enable();
                break;
        }
        return NOTIFY_OK;
@@@ -3056,10 -3032,10 +3032,10 @@@ static int vm_stat_get(void *_offset, u
        struct kvm *kvm;
  
        *val = 0;
-       raw_spin_lock(&kvm_lock);
+       spin_lock(&kvm_lock);
        list_for_each_entry(kvm, &vm_list, vm_list)
                *val += *(u32 *)((void *)kvm + offset);
-       raw_spin_unlock(&kvm_lock);
+       spin_unlock(&kvm_lock);
        return 0;
  }
  
@@@ -3073,12 -3049,12 +3049,12 @@@ static int vcpu_stat_get(void *_offset
        int i;
  
        *val = 0;
-       raw_spin_lock(&kvm_lock);
+       spin_lock(&kvm_lock);
        list_for_each_entry(kvm, &vm_list, vm_list)
                kvm_for_each_vcpu(i, vcpu, kvm)
                        *val += *(u32 *)((void *)vcpu + offset);
  
-       raw_spin_unlock(&kvm_lock);
+       spin_unlock(&kvm_lock);
        return 0;
  }
  
@@@ -3091,7 -3067,7 +3067,7 @@@ static const struct file_operations *st
  
  static int kvm_init_debug(void)
  {
 -      int r = -EFAULT;
 +      int r = -EEXIST;
        struct kvm_stats_debugfs_item *p;
  
        kvm_debugfs_dir = debugfs_create_dir("kvm", NULL);
@@@ -3133,7 -3109,7 +3109,7 @@@ static int kvm_suspend(void
  static void kvm_resume(void)
  {
        if (kvm_usage_count) {
-               WARN_ON(raw_spin_is_locked(&kvm_lock));
+               WARN_ON(raw_spin_is_locked(&kvm_count_lock));
                hardware_enable_nolock(NULL);
        }
  }