From: Linus Torvalds Date: Tue, 2 Aug 2016 20:11:27 +0000 (-0400) Subject: Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm X-Git-Tag: v4.8-rc1~60 X-Git-Url: http://git.cascardo.info/?p=cascardo%2Flinux.git;a=commitdiff_plain;h=221bb8a46e230b9824204ae86537183d9991ff2a Merge tag 'for-linus' of git://git./virt/kvm/kvm Pull KVM updates from Paolo Bonzini: - ARM: GICv3 ITS emulation and various fixes. Removal of the old VGIC implementation. - s390: support for trapping software breakpoints, nested virtualization (vSIE), the STHYI opcode, initial extensions for CPU model support. - MIPS: support for MIPS64 hosts (32-bit guests only) and lots of cleanups, preliminary to this and the upcoming support for hardware virtualization extensions. - x86: support for execute-only mappings in nested EPT; reduced vmexit latency for TSC deadline timer (by about 30%) on Intel hosts; support for more than 255 vCPUs. - PPC: bugfixes. * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (302 commits) KVM: PPC: Introduce KVM_CAP_PPC_HTM MIPS: Select HAVE_KVM for MIPS64_R{2,6} MIPS: KVM: Reset CP0_PageMask during host TLB flush MIPS: KVM: Fix ptr->int cast via KVM_GUEST_KSEGX() MIPS: KVM: Sign extend MFC0/RDHWR results MIPS: KVM: Fix 64-bit big endian dynamic translation MIPS: KVM: Fail if ebase doesn't fit in CP0_EBase MIPS: KVM: Use 64-bit CP0_EBase when appropriate MIPS: KVM: Set CP0_Status.KX on MIPS64 MIPS: KVM: Make entry code MIPS64 friendly MIPS: KVM: Use kmap instead of CKSEG0ADDR() MIPS: KVM: Use virt_to_phys() to get commpage PFN MIPS: Fix definition of KSEGX() for 64-bit KVM: VMX: Add VMCS to CPU's loaded VMCSs before VMPTRLD kvm: x86: nVMX: maintain internal copy of current VMCS KVM: PPC: Book3S HV: Save/restore TM state in H_CEDE KVM: PPC: Book3S HV: Pull out TM state save/restore into separate procedures KVM: arm64: vgic-its: Simplify MAPI error handling KVM: arm64: vgic-its: Make vgic_its_cmd_handle_mapi similar to other handlers KVM: arm64: vgic-its: Turn device_id validation into generic ID validation ... --- 221bb8a46e230b9824204ae86537183d9991ff2a diff --cc arch/powerpc/include/asm/paca.h index ad171e979ab0,4b17bd058e01..148303e7771f --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@@ -25,7 -25,7 +25,8 @@@ #ifdef CONFIG_KVM_BOOK3S_64_HANDLER #include #endif +#include + #include register struct paca_struct *local_paca asm("r13"); diff --cc arch/powerpc/kernel/Makefile index fe4c075bcf50,6972a23433d3..b2027a5cf508 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@@ -41,7 -41,8 +41,7 @@@ obj-$(CONFIG_VDSO32) += vdso32 obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o - obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o + obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o hmi.o -obj64-$(CONFIG_RELOCATABLE) += reloc_64.o obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o obj-$(CONFIG_PPC64) += vdso64/ obj-$(CONFIG_ALTIVEC) += vecemu.o diff --cc arch/powerpc/kernel/exceptions-64s.S index 6200e4925d26,0eba47e074b9..694def6c9d61 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@@ -669,8 -680,8 +669,10 @@@ _GLOBAL(__replay_interrupt BEGIN_FTR_SECTION cmpwi r3,0xe80 beq h_doorbell_common + cmpwi r3,0xea0 + beq h_virt_irq_common + cmpwi r3,0xe60 + beq hmi_exception_common FTR_SECTION_ELSE cmpwi r3,0xa00 beq doorbell_super_common @@@ -1161,18 -1172,9 +1163,18 @@@ fwnmi_data_area . = 0x8000 #endif /* defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */ + STD_EXCEPTION_COMMON(0xf60, facility_unavailable, facility_unavailable_exception) + STD_EXCEPTION_COMMON(0xf80, hv_facility_unavailable, facility_unavailable_exception) + +#ifdef CONFIG_CBE_RAS + STD_EXCEPTION_COMMON(0x1200, cbe_system_error, cbe_system_error_exception) + STD_EXCEPTION_COMMON(0x1600, cbe_maintenance, cbe_maintenance_exception) + STD_EXCEPTION_COMMON(0x1800, cbe_thermal, cbe_thermal_exception) +#endif /* CONFIG_CBE_RAS */ + .globl hmi_exception_early hmi_exception_early: - EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0xe60) + EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST, 0xe62) mr r10,r1 /* Save r1 */ ld r1,PACAEMERGSP(r13) /* Use emergency stack */ subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */ diff --cc arch/powerpc/kernel/idle_book3s.S index 335eb6cedae5,000000000000..8a56a51fc0cb mode 100644,000000..100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@@ -1,662 -1,0 +1,664 @@@ +/* + * This file contains idle entry/exit functions for POWER7, + * POWER8 and POWER9 CPUs. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#undef DEBUG + +/* + * Use unused space in the interrupt stack to save and restore + * registers for winkle support. + */ +#define _SDR1 GPR3 +#define _RPR GPR4 +#define _SPURR GPR5 +#define _PURR GPR6 +#define _TSCR GPR7 +#define _DSCR GPR8 +#define _AMOR GPR9 +#define _WORT GPR10 +#define _WORC GPR11 +#define _PTCR GPR12 + +#define PSSCR_HV_TEMPLATE PSSCR_ESL | PSSCR_EC | \ + PSSCR_PSLL_MASK | PSSCR_TR_MASK | \ + PSSCR_MTL_MASK + +/* Idle state entry routines */ + +#define IDLE_STATE_ENTER_SEQ(IDLE_INST) \ + /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \ + std r0,0(r1); \ + ptesync; \ + ld r0,0(r1); \ +1: cmp cr0,r0,r0; \ + bne 1b; \ + IDLE_INST; \ + b . + + .text + +/* + * Used by threads before entering deep idle states. Saves SPRs + * in interrupt stack frame + */ +save_sprs_to_stack: + /* + * Note all register i.e per-core, per-subcore or per-thread is saved + * here since any thread in the core might wake up first + */ +BEGIN_FTR_SECTION + mfspr r3,SPRN_PTCR + std r3,_PTCR(r1) + /* + * Note - SDR1 is dropped in Power ISA v3. Hence not restoring + * SDR1 here + */ +FTR_SECTION_ELSE + mfspr r3,SPRN_SDR1 + std r3,_SDR1(r1) +ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300) + mfspr r3,SPRN_RPR + std r3,_RPR(r1) + mfspr r3,SPRN_SPURR + std r3,_SPURR(r1) + mfspr r3,SPRN_PURR + std r3,_PURR(r1) + mfspr r3,SPRN_TSCR + std r3,_TSCR(r1) + mfspr r3,SPRN_DSCR + std r3,_DSCR(r1) + mfspr r3,SPRN_AMOR + std r3,_AMOR(r1) + mfspr r3,SPRN_WORT + std r3,_WORT(r1) + mfspr r3,SPRN_WORC + std r3,_WORC(r1) + + blr + +/* + * Used by threads when the lock bit of core_idle_state is set. + * Threads will spin in HMT_LOW until the lock bit is cleared. + * r14 - pointer to core_idle_state + * r15 - used to load contents of core_idle_state + */ + +core_idle_lock_held: + HMT_LOW +3: lwz r15,0(r14) + andi. r15,r15,PNV_CORE_IDLE_LOCK_BIT + bne 3b + HMT_MEDIUM + lwarx r15,0,r14 + blr + +/* + * Pass requested state in r3: + * r3 - PNV_THREAD_NAP/SLEEP/WINKLE in POWER8 + * - Requested STOP state in POWER9 + * + * To check IRQ_HAPPENED in r4 + * 0 - don't check + * 1 - check + * + * Address to 'rfid' to in r5 + */ +_GLOBAL(pnv_powersave_common) + /* Use r3 to pass state nap/sleep/winkle */ + /* NAP is a state loss, we create a regs frame on the + * stack, fill it up with the state we care about and + * stick a pointer to it in PACAR1. We really only + * need to save PC, some CR bits and the NV GPRs, + * but for now an interrupt frame will do. + */ + mflr r0 + std r0,16(r1) + stdu r1,-INT_FRAME_SIZE(r1) + std r0,_LINK(r1) + std r0,_NIP(r1) + + /* Hard disable interrupts */ + mfmsr r9 + rldicl r9,r9,48,1 + rotldi r9,r9,16 + mtmsrd r9,1 /* hard-disable interrupts */ + + /* Check if something happened while soft-disabled */ + lbz r0,PACAIRQHAPPENED(r13) + andi. r0,r0,~PACA_IRQ_HARD_DIS@l + beq 1f + cmpwi cr0,r4,0 + beq 1f + addi r1,r1,INT_FRAME_SIZE + ld r0,16(r1) + li r3,0 /* Return 0 (no nap) */ + mtlr r0 + blr + +1: /* We mark irqs hard disabled as this is the state we'll + * be in when returning and we need to tell arch_local_irq_restore() + * about it + */ + li r0,PACA_IRQ_HARD_DIS + stb r0,PACAIRQHAPPENED(r13) + + /* We haven't lost state ... yet */ + li r0,0 + stb r0,PACA_NAPSTATELOST(r13) + + /* Continue saving state */ + SAVE_GPR(2, r1) + SAVE_NVGPRS(r1) + mfcr r4 + std r4,_CCR(r1) + std r9,_MSR(r1) + std r1,PACAR1(r13) + +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + /* Tell KVM we're entering idle */ + li r4,KVM_HWTHREAD_IN_IDLE + stb r4,HSTATE_HWTHREAD_STATE(r13) +#endif + + /* + * Go to real mode to do the nap, as required by the architecture. + * Also, we need to be in real mode before setting hwthread_state, + * because as soon as we do that, another thread can switch + * the MMU context to the guest. + */ + LOAD_REG_IMMEDIATE(r7, MSR_IDLE) + li r6, MSR_RI + andc r6, r9, r6 + mtmsrd r6, 1 /* clear RI before setting SRR0/1 */ + mtspr SPRN_SRR0, r5 + mtspr SPRN_SRR1, r7 + rfid + + .globl pnv_enter_arch207_idle_mode +pnv_enter_arch207_idle_mode: + stb r3,PACA_THREAD_IDLE_STATE(r13) + cmpwi cr3,r3,PNV_THREAD_SLEEP + bge cr3,2f + IDLE_STATE_ENTER_SEQ(PPC_NAP) + /* No return */ +2: + /* Sleep or winkle */ + lbz r7,PACA_THREAD_MASK(r13) + ld r14,PACA_CORE_IDLE_STATE_PTR(r13) +lwarx_loop1: + lwarx r15,0,r14 + + andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT + bnel core_idle_lock_held + + andc r15,r15,r7 /* Clear thread bit */ + + andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS + +/* + * If cr0 = 0, then current thread is the last thread of the core entering + * sleep. Last thread needs to execute the hardware bug workaround code if + * required by the platform. + * Make the workaround call unconditionally here. The below branch call is + * patched out when the idle states are discovered if the platform does not + * require it. + */ +.global pnv_fastsleep_workaround_at_entry +pnv_fastsleep_workaround_at_entry: + beq fastsleep_workaround_at_entry + + stwcx. r15,0,r14 + bne- lwarx_loop1 + isync + +common_enter: /* common code for all the threads entering sleep or winkle */ + bgt cr3,enter_winkle + IDLE_STATE_ENTER_SEQ(PPC_SLEEP) + +fastsleep_workaround_at_entry: + ori r15,r15,PNV_CORE_IDLE_LOCK_BIT + stwcx. r15,0,r14 + bne- lwarx_loop1 + isync + + /* Fast sleep workaround */ + li r3,1 + li r4,1 + bl opal_rm_config_cpu_idle_state + + /* Clear Lock bit */ + li r0,0 + lwsync + stw r0,0(r14) + b common_enter + +enter_winkle: + bl save_sprs_to_stack + + IDLE_STATE_ENTER_SEQ(PPC_WINKLE) + +/* + * r3 - requested stop state + */ +power_enter_stop: +/* + * Check if the requested state is a deep idle state. + */ + LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state) + ld r4,ADDROFF(pnv_first_deep_stop_state)(r5) + cmpd r3,r4 + bge 2f + IDLE_STATE_ENTER_SEQ(PPC_STOP) +2: +/* + * Entering deep idle state. + * Clear thread bit in PACA_CORE_IDLE_STATE, save SPRs to + * stack and enter stop + */ + lbz r7,PACA_THREAD_MASK(r13) + ld r14,PACA_CORE_IDLE_STATE_PTR(r13) + +lwarx_loop_stop: + lwarx r15,0,r14 + andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT + bnel core_idle_lock_held + andc r15,r15,r7 /* Clear thread bit */ + + stwcx. r15,0,r14 + bne- lwarx_loop_stop + isync + + bl save_sprs_to_stack + + IDLE_STATE_ENTER_SEQ(PPC_STOP) + +_GLOBAL(power7_idle) + /* Now check if user or arch enabled NAP mode */ + LOAD_REG_ADDRBASE(r3,powersave_nap) + lwz r4,ADDROFF(powersave_nap)(r3) + cmpwi 0,r4,0 + beqlr + li r3, 1 + /* fall through */ + +_GLOBAL(power7_nap) + mr r4,r3 + li r3,PNV_THREAD_NAP + LOAD_REG_ADDR(r5, pnv_enter_arch207_idle_mode) + b pnv_powersave_common + /* No return */ + +_GLOBAL(power7_sleep) + li r3,PNV_THREAD_SLEEP + li r4,1 + LOAD_REG_ADDR(r5, pnv_enter_arch207_idle_mode) + b pnv_powersave_common + /* No return */ + +_GLOBAL(power7_winkle) + li r3,PNV_THREAD_WINKLE + li r4,1 + LOAD_REG_ADDR(r5, pnv_enter_arch207_idle_mode) + b pnv_powersave_common + /* No return */ + +#define CHECK_HMI_INTERRUPT \ + mfspr r0,SPRN_SRR1; \ +BEGIN_FTR_SECTION_NESTED(66); \ + rlwinm r0,r0,45-31,0xf; /* extract wake reason field (P8) */ \ +FTR_SECTION_ELSE_NESTED(66); \ + rlwinm r0,r0,45-31,0xe; /* P7 wake reason field is 3 bits */ \ +ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \ + cmpwi r0,0xa; /* Hypervisor maintenance ? */ \ + bne 20f; \ + /* Invoke opal call to handle hmi */ \ + ld r2,PACATOC(r13); \ + ld r1,PACAR1(r13); \ + std r3,ORIG_GPR3(r1); /* Save original r3 */ \ - bl opal_rm_handle_hmi; \ ++ li r3,0; /* NULL argument */ \ ++ bl hmi_exception_realmode; \ ++ nop; \ + ld r3,ORIG_GPR3(r1); /* Restore original r3 */ \ +20: nop; + + +/* + * r3 - requested stop state + */ +_GLOBAL(power9_idle_stop) + LOAD_REG_IMMEDIATE(r4, PSSCR_HV_TEMPLATE) + or r4,r4,r3 + mtspr SPRN_PSSCR, r4 + li r4, 1 + LOAD_REG_ADDR(r5,power_enter_stop) + b pnv_powersave_common + /* No return */ +/* + * Called from reset vector. Check whether we have woken up with + * hypervisor state loss. If yes, restore hypervisor state and return + * back to reset vector. + * + * r13 - Contents of HSPRG0 + * cr3 - set to gt if waking up with partial/complete hypervisor state loss + */ +_GLOBAL(pnv_restore_hyp_resource) + ld r2,PACATOC(r13); +BEGIN_FTR_SECTION + /* + * POWER ISA 3. Use PSSCR to determine if we + * are waking up from deep idle state + */ + LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state) + ld r4,ADDROFF(pnv_first_deep_stop_state)(r5) + + mfspr r5,SPRN_PSSCR + /* + * 0-3 bits correspond to Power-Saving Level Status + * which indicates the idle state we are waking up from + */ + rldicl r5,r5,4,60 + cmpd cr4,r5,r4 + bge cr4,pnv_wakeup_tb_loss + /* + * Waking up without hypervisor state loss. Return to + * reset vector + */ + blr + +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) + + /* + * POWER ISA 2.07 or less. + * Check if last bit of HSPGR0 is set. This indicates whether we are + * waking up from winkle. + */ + clrldi r5,r13,63 + clrrdi r13,r13,1 + cmpwi cr4,r5,1 + mtspr SPRN_HSPRG0,r13 + + lbz r0,PACA_THREAD_IDLE_STATE(r13) + cmpwi cr2,r0,PNV_THREAD_NAP + bgt cr2,pnv_wakeup_tb_loss /* Either sleep or Winkle */ + + /* + * We fall through here if PACA_THREAD_IDLE_STATE shows we are waking + * up from nap. At this stage CR3 shouldn't contains 'gt' since that + * indicates we are waking with hypervisor state loss from nap. + */ + bgt cr3,. + + blr /* Return back to System Reset vector from where + pnv_restore_hyp_resource was invoked */ + +/* + * Called if waking up from idle state which can cause either partial or + * complete hyp state loss. + * In POWER8, called if waking up from fastsleep or winkle + * In POWER9, called if waking up from stop state >= pnv_first_deep_stop_state + * + * r13 - PACA + * cr3 - gt if waking up with partial/complete hypervisor state loss + * cr4 - eq if waking up from complete hypervisor state loss. + */ +_GLOBAL(pnv_wakeup_tb_loss) + ld r1,PACAR1(r13) + /* + * Before entering any idle state, the NVGPRs are saved in the stack + * and they are restored before switching to the process context. Hence + * until they are restored, they are free to be used. + * + * Save SRR1 and LR in NVGPRs as they might be clobbered in + * opal_call() (called in CHECK_HMI_INTERRUPT). SRR1 is required + * to determine the wakeup reason if we branch to kvm_start_guest. LR + * is required to return back to reset vector after hypervisor state + * restore is complete. + */ + mflr r17 + mfspr r16,SPRN_SRR1 +BEGIN_FTR_SECTION + CHECK_HMI_INTERRUPT +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) + + lbz r7,PACA_THREAD_MASK(r13) + ld r14,PACA_CORE_IDLE_STATE_PTR(r13) +lwarx_loop2: + lwarx r15,0,r14 + andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT + /* + * Lock bit is set in one of the 2 cases- + * a. In the sleep/winkle enter path, the last thread is executing + * fastsleep workaround code. + * b. In the wake up path, another thread is executing fastsleep + * workaround undo code or resyncing timebase or restoring context + * In either case loop until the lock bit is cleared. + */ + bnel core_idle_lock_held + + cmpwi cr2,r15,0 + + /* + * At this stage + * cr2 - eq if first thread to wakeup in core + * cr3- gt if waking up with partial/complete hypervisor state loss + * cr4 - eq if waking up from complete hypervisor state loss. + */ + + ori r15,r15,PNV_CORE_IDLE_LOCK_BIT + stwcx. r15,0,r14 + bne- lwarx_loop2 + isync + +BEGIN_FTR_SECTION + lbz r4,PACA_SUBCORE_SIBLING_MASK(r13) + and r4,r4,r15 + cmpwi r4,0 /* Check if first in subcore */ + + or r15,r15,r7 /* Set thread bit */ + beq first_thread_in_subcore +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) + + or r15,r15,r7 /* Set thread bit */ + beq cr2,first_thread_in_core + + /* Not first thread in core or subcore to wake up */ + b clear_lock + +first_thread_in_subcore: + /* + * If waking up from sleep, subcore state is not lost. Hence + * skip subcore state restore + */ + bne cr4,subcore_state_restored + + /* Restore per-subcore state */ + ld r4,_SDR1(r1) + mtspr SPRN_SDR1,r4 + + ld r4,_RPR(r1) + mtspr SPRN_RPR,r4 + ld r4,_AMOR(r1) + mtspr SPRN_AMOR,r4 + +subcore_state_restored: + /* + * Check if the thread is also the first thread in the core. If not, + * skip to clear_lock. + */ + bne cr2,clear_lock + +first_thread_in_core: + + /* + * First thread in the core waking up from any state which can cause + * partial or complete hypervisor state loss. It needs to + * call the fastsleep workaround code if the platform requires it. + * Call it unconditionally here. The below branch instruction will + * be patched out if the platform does not have fastsleep or does not + * require the workaround. Patching will be performed during the + * discovery of idle-states. + */ +.global pnv_fastsleep_workaround_at_exit +pnv_fastsleep_workaround_at_exit: + b fastsleep_workaround_at_exit + +timebase_resync: + /* + * Use cr3 which indicates that we are waking up with atleast partial + * hypervisor state loss to determine if TIMEBASE RESYNC is needed. + */ + ble cr3,clear_lock + /* Time base re-sync */ + bl opal_rm_resync_timebase; + /* + * If waking up from sleep, per core state is not lost, skip to + * clear_lock. + */ + bne cr4,clear_lock + + /* + * First thread in the core to wake up and its waking up with + * complete hypervisor state loss. Restore per core hypervisor + * state. + */ +BEGIN_FTR_SECTION + ld r4,_PTCR(r1) + mtspr SPRN_PTCR,r4 + ld r4,_RPR(r1) + mtspr SPRN_RPR,r4 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) + + ld r4,_TSCR(r1) + mtspr SPRN_TSCR,r4 + ld r4,_WORC(r1) + mtspr SPRN_WORC,r4 + +clear_lock: + andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS + lwsync + stw r15,0(r14) + +common_exit: + /* + * Common to all threads. + * + * If waking up from sleep, hypervisor state is not lost. Hence + * skip hypervisor state restore. + */ + bne cr4,hypervisor_state_restored + + /* Waking up from winkle */ + +BEGIN_MMU_FTR_SECTION + b no_segments +END_MMU_FTR_SECTION_IFSET(MMU_FTR_RADIX) + /* Restore SLB from PACA */ + ld r8,PACA_SLBSHADOWPTR(r13) + + .rept SLB_NUM_BOLTED + li r3, SLBSHADOW_SAVEAREA + LDX_BE r5, r8, r3 + addi r3, r3, 8 + LDX_BE r6, r8, r3 + andis. r7,r5,SLB_ESID_V@h + beq 1f + slbmte r6,r5 +1: addi r8,r8,16 + .endr +no_segments: + + /* Restore per thread state */ + + ld r4,_SPURR(r1) + mtspr SPRN_SPURR,r4 + ld r4,_PURR(r1) + mtspr SPRN_PURR,r4 + ld r4,_DSCR(r1) + mtspr SPRN_DSCR,r4 + ld r4,_WORT(r1) + mtspr SPRN_WORT,r4 + + /* Call cur_cpu_spec->cpu_restore() */ + LOAD_REG_ADDR(r4, cur_cpu_spec) + ld r4,0(r4) + ld r12,CPU_SPEC_RESTORE(r4) +#ifdef PPC64_ELF_ABI_v1 + ld r12,0(r12) +#endif + mtctr r12 + bctrl + +hypervisor_state_restored: + + mtspr SPRN_SRR1,r16 + mtlr r17 + blr /* Return back to System Reset vector from where + pnv_restore_hyp_resource was invoked */ + +fastsleep_workaround_at_exit: + li r3,1 + li r4,0 + bl opal_rm_config_cpu_idle_state + b timebase_resync + +/* + * R3 here contains the value that will be returned to the caller + * of power7_nap. + */ +_GLOBAL(pnv_wakeup_loss) + ld r1,PACAR1(r13) +BEGIN_FTR_SECTION + CHECK_HMI_INTERRUPT +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) + REST_NVGPRS(r1) + REST_GPR(2, r1) + ld r6,_CCR(r1) + ld r4,_MSR(r1) + ld r5,_NIP(r1) + addi r1,r1,INT_FRAME_SIZE + mtcr r6 + mtspr SPRN_SRR1,r4 + mtspr SPRN_SRR0,r5 + rfid + +/* + * R3 here contains the value that will be returned to the caller + * of power7_nap. + */ +_GLOBAL(pnv_wakeup_noloss) + lbz r0,PACA_NAPSTATELOST(r13) + cmpwi r0,0 + bne pnv_wakeup_loss +BEGIN_FTR_SECTION + CHECK_HMI_INTERRUPT +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) + ld r1,PACAR1(r13) + ld r6,_CCR(r1) + ld r4,_MSR(r1) + ld r5,_NIP(r1) + addi r1,r1,INT_FRAME_SIZE + mtcr r6 + mtspr SPRN_SRR1,r4 + mtspr SPRN_SRR0,r5 + rfid diff --cc arch/powerpc/kernel/traps.c index f7e2f2e318bd,9ec95daccad9..2cb589264cb7 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@@ -60,7 -60,7 +60,8 @@@ #include #include #include +#include + #include #include #if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC) diff --cc arch/s390/include/asm/mmu.h index 18226437a832,b941528cc49e..6d39329c894b --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@@ -6,10 -6,11 +6,11 @@@ typedef struct { cpumask_t cpu_attach_mask; - atomic_t attach_count; + atomic_t flush_count; unsigned int flush_mm; - spinlock_t list_lock; + spinlock_t pgtable_lock; struct list_head pgtable_list; + spinlock_t gmap_lock; struct list_head gmap_list; unsigned long asce; unsigned long asce_limit; diff --cc arch/s390/include/asm/mmu_context.h index f77c638bf397,3ce3854b7a41..c6a088c91aee --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@@ -15,11 -15,12 +15,12 @@@ static inline int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { - spin_lock_init(&mm->context.list_lock); + spin_lock_init(&mm->context.pgtable_lock); INIT_LIST_HEAD(&mm->context.pgtable_list); + spin_lock_init(&mm->context.gmap_lock); INIT_LIST_HEAD(&mm->context.gmap_list); cpumask_clear(&mm->context.cpu_attach_mask); - atomic_set(&mm->context.attach_count, 0); + atomic_set(&mm->context.flush_count, 0); mm->context.flush_mm = 0; #ifdef CONFIG_PGSTE mm->context.alloc_pgste = page_table_allocate_pgste; diff --cc arch/s390/kernel/diag.c index 48b37b8357e6,a44faf4a0454..a97354c8c667 --- a/arch/s390/kernel/diag.c +++ b/arch/s390/kernel/diag.c @@@ -162,6 -162,28 +162,30 @@@ int diag14(unsigned long rx, unsigned l } EXPORT_SYMBOL(diag14); -static inline int __diag204(unsigned long subcode, unsigned long size, void *addr) ++static inline int __diag204(unsigned long *subcode, unsigned long size, void *addr) + { - register unsigned long _subcode asm("0") = subcode; ++ register unsigned long _subcode asm("0") = *subcode; + register unsigned long _size asm("1") = size; + + asm volatile( + " diag %2,%0,0x204\n" - "0:\n" ++ "0: nopr %%r7\n" + EX_TABLE(0b,0b) + : "+d" (_subcode), "+d" (_size) : "d" (addr) : "memory"); - if (_subcode) - return -1; ++ *subcode = _subcode; + return _size; + } + + int diag204(unsigned long subcode, unsigned long size, void *addr) + { + diag_stat_inc(DIAG_STAT_X204); - return __diag204(subcode, size, addr); ++ size = __diag204(&subcode, size, addr); ++ if (subcode) ++ return -1; ++ return size; + } + EXPORT_SYMBOL(diag204); + /* * Diagnose 210: Get information about a virtual device */ diff --cc arch/s390/kvm/kvm-s390.c index 6f5c344cd785,63ac7c1641a7..3f3ae4865d57 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@@ -26,15 -27,18 +27,18 @@@ #include #include #include + #include #include #include -#include +#include #include #include #include #include #include #include + #include -#include ++#include #include "kvm-s390.h" #include "gaccess.h" @@@ -61,9 -65,9 +65,10 @@@ struct kvm_stats_debugfs_item debugfs_e { "exit_external_request", VCPU_STAT(exit_external_request) }, { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) }, { "exit_instruction", VCPU_STAT(exit_instruction) }, + { "exit_pei", VCPU_STAT(exit_pei) }, { "exit_program_interruption", VCPU_STAT(exit_program_interruption) }, { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) }, + { "exit_operation_exception", VCPU_STAT(exit_operation_exception) }, { "halt_successful_poll", VCPU_STAT(halt_successful_poll) }, { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) }, { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) }, @@@ -188,6 -211,101 +212,103 @@@ void kvm_arch_hardware_unsetup(void &kvm_clock_notifier); } + static void allow_cpu_feat(unsigned long nr) + { + set_bit_inv(nr, kvm_s390_available_cpu_feat); + } + + static inline int plo_test_bit(unsigned char nr) + { + register unsigned long r0 asm("0") = (unsigned long) nr | 0x100; + int cc = 3; /* subfunction not available */ + + asm volatile( + /* Parameter registers are ignored for "test bit" */ + " plo 0,0,0,0(0)\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (cc) + : "d" (r0) + : "cc"); + return cc == 0; + } + + static void kvm_s390_cpu_feat_init(void) + { + int i; + + for (i = 0; i < 256; ++i) { + if (plo_test_bit(i)) + kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7); + } + + if (test_facility(28)) /* TOD-clock steering */ - etr_ptff(kvm_s390_available_subfunc.ptff, ETR_PTFF_QAF); ++ ptff(kvm_s390_available_subfunc.ptff, ++ sizeof(kvm_s390_available_subfunc.ptff), ++ PTFF_QAF); + + if (test_facility(17)) { /* MSA */ + __cpacf_query(CPACF_KMAC, kvm_s390_available_subfunc.kmac); + __cpacf_query(CPACF_KMC, kvm_s390_available_subfunc.kmc); + __cpacf_query(CPACF_KM, kvm_s390_available_subfunc.km); + __cpacf_query(CPACF_KIMD, kvm_s390_available_subfunc.kimd); + __cpacf_query(CPACF_KLMD, kvm_s390_available_subfunc.klmd); + } + if (test_facility(76)) /* MSA3 */ + __cpacf_query(CPACF_PCKMO, kvm_s390_available_subfunc.pckmo); + if (test_facility(77)) { /* MSA4 */ + __cpacf_query(CPACF_KMCTR, kvm_s390_available_subfunc.kmctr); + __cpacf_query(CPACF_KMF, kvm_s390_available_subfunc.kmf); + __cpacf_query(CPACF_KMO, kvm_s390_available_subfunc.kmo); + __cpacf_query(CPACF_PCC, kvm_s390_available_subfunc.pcc); + } + if (test_facility(57)) /* MSA5 */ + __cpacf_query(CPACF_PPNO, kvm_s390_available_subfunc.ppno); + + if (MACHINE_HAS_ESOP) + allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP); + /* + * We need SIE support, ESOP (PROT_READ protection for gmap_shadow), + * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing). + */ + if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao || + !test_facility(3) || !nested) + return; + allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2); + if (sclp.has_64bscao) + allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO); + if (sclp.has_siif) + allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF); + if (sclp.has_gpere) + allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE); + if (sclp.has_gsls) + allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS); + if (sclp.has_ib) + allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB); + if (sclp.has_cei) + allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI); + if (sclp.has_ibs) + allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS); + /* + * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make + * all skey handling functions read/set the skey from the PGSTE + * instead of the real storage key. + * + * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make + * pages being detected as preserved although they are resident. + * + * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will + * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY. + * + * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and + * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be + * correctly shadowed. We can do that for the PGSTE but not for PTE.I. + * + * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We + * cannot easily shadow the SCA because of the ipte lock. + */ + } + int kvm_arch_init(void *opaque) { kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long)); diff --cc arch/s390/mm/pgtable.c index b98d1a152d46,293130b5aee7..5f092015aaa7 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@@ -456,9 -415,92 +459,93 @@@ void ptep_set_notify(struct mm_struct * pgste = pgste_get_lock(ptep); pgste_val(pgste) |= PGSTE_IN_BIT; pgste_set_unlock(ptep, pgste); + preempt_enable(); } + /** + * ptep_force_prot - change access rights of a locked pte + * @mm: pointer to the process mm_struct + * @addr: virtual address in the guest address space + * @ptep: pointer to the page table entry + * @prot: indicates guest access rights: PROT_NONE, PROT_READ or PROT_WRITE + * @bit: pgste bit to set (e.g. for notification) + * + * Returns 0 if the access rights were changed and -EAGAIN if the current + * and requested access rights are incompatible. + */ + int ptep_force_prot(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, int prot, unsigned long bit) + { + pte_t entry; + pgste_t pgste; + int pte_i, pte_p; + + pgste = pgste_get_lock(ptep); + entry = *ptep; + /* Check pte entry after all locks have been acquired */ + pte_i = pte_val(entry) & _PAGE_INVALID; + pte_p = pte_val(entry) & _PAGE_PROTECT; + if ((pte_i && (prot != PROT_NONE)) || + (pte_p && (prot & PROT_WRITE))) { + pgste_set_unlock(ptep, pgste); + return -EAGAIN; + } + /* Change access rights and set pgste bit */ + if (prot == PROT_NONE && !pte_i) { + ptep_flush_direct(mm, addr, ptep); + pgste = pgste_update_all(entry, pgste, mm); + pte_val(entry) |= _PAGE_INVALID; + } + if (prot == PROT_READ && !pte_p) { + ptep_flush_direct(mm, addr, ptep); + pte_val(entry) &= ~_PAGE_INVALID; + pte_val(entry) |= _PAGE_PROTECT; + } + pgste_val(pgste) |= bit; + pgste = pgste_set_pte(ptep, pgste, entry); + pgste_set_unlock(ptep, pgste); + return 0; + } + + int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr, + pte_t *sptep, pte_t *tptep, pte_t pte) + { + pgste_t spgste, tpgste; + pte_t spte, tpte; + int rc = -EAGAIN; + + if (!(pte_val(*tptep) & _PAGE_INVALID)) + return 0; /* already shadowed */ + spgste = pgste_get_lock(sptep); + spte = *sptep; + if (!(pte_val(spte) & _PAGE_INVALID) && + !((pte_val(spte) & _PAGE_PROTECT) && + !(pte_val(pte) & _PAGE_PROTECT))) { + pgste_val(spgste) |= PGSTE_VSIE_BIT; + tpgste = pgste_get_lock(tptep); + pte_val(tpte) = (pte_val(spte) & PAGE_MASK) | + (pte_val(pte) & _PAGE_PROTECT); + /* don't touch the storage key - it belongs to parent pgste */ + tpgste = pgste_set_pte(tptep, tpgste, tpte); + pgste_set_unlock(tptep, tpgste); + rc = 1; + } + pgste_set_unlock(sptep, spgste); + return rc; + } + + void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep) + { + pgste_t pgste; + + pgste = pgste_get_lock(ptep); + /* notifier is called by the caller */ + ptep_flush_direct(mm, saddr, ptep); + /* don't touch the storage key - it belongs to parent pgste */ + pgste = pgste_set_pte(ptep, pgste, __pte(_PAGE_INVALID)); + pgste_set_unlock(ptep, pgste); + } + static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry) { if (!non_swap_entry(entry)) diff --cc arch/x86/kvm/iommu.c index 95e0e6481f07,4f2010c5feba..b181426f67b4 --- a/arch/x86/kvm/iommu.c +++ b/arch/x86/kvm/iommu.c @@@ -25,12 -25,10 +25,10 @@@ #include #include -#include +#include #include #include - #include #include - #include #include "assigned-dev.h" static bool allow_unsafe_assigned_interrupts; diff --cc arch/x86/kvm/lapic.c index 57549ed47ca5,6895fd28aae9..730cf174090a --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@@ -1310,10 -1313,111 +1313,112 @@@ void wait_lapic_expire(struct kvm_vcpu /* __delay is delay_tsc whenever the hardware has TSC, thus always. */ if (guest_tsc < tsc_deadline) - __delay(tsc_deadline - guest_tsc); + __delay(min(tsc_deadline - guest_tsc, + nsec_to_cycles(vcpu, lapic_timer_advance_ns))); } + static void start_sw_tscdeadline(struct kvm_lapic *apic) + { + u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline; + u64 ns = 0; + ktime_t expire; + struct kvm_vcpu *vcpu = apic->vcpu; + unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz; + unsigned long flags; + ktime_t now; + + if (unlikely(!tscdeadline || !this_tsc_khz)) + return; + + local_irq_save(flags); + + now = apic->lapic_timer.timer.base->get_time(); + guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); + if (likely(tscdeadline > guest_tsc)) { + ns = (tscdeadline - guest_tsc) * 1000000ULL; + do_div(ns, this_tsc_khz); + expire = ktime_add_ns(now, ns); + expire = ktime_sub_ns(expire, lapic_timer_advance_ns); + hrtimer_start(&apic->lapic_timer.timer, + expire, HRTIMER_MODE_ABS_PINNED); + } else + apic_timer_expired(apic); + + local_irq_restore(flags); + } + + bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu) + { + return vcpu->arch.apic->lapic_timer.hv_timer_in_use; + } + EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use); + + static void cancel_hv_tscdeadline(struct kvm_lapic *apic) + { + kvm_x86_ops->cancel_hv_timer(apic->vcpu); + apic->lapic_timer.hv_timer_in_use = false; + } + + void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu) + { + struct kvm_lapic *apic = vcpu->arch.apic; + + WARN_ON(!apic->lapic_timer.hv_timer_in_use); + WARN_ON(swait_active(&vcpu->wq)); + cancel_hv_tscdeadline(apic); + apic_timer_expired(apic); + } + EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer); + + static bool start_hv_tscdeadline(struct kvm_lapic *apic) + { + u64 tscdeadline = apic->lapic_timer.tscdeadline; + + if (atomic_read(&apic->lapic_timer.pending) || + kvm_x86_ops->set_hv_timer(apic->vcpu, tscdeadline)) { + if (apic->lapic_timer.hv_timer_in_use) + cancel_hv_tscdeadline(apic); + } else { + apic->lapic_timer.hv_timer_in_use = true; + hrtimer_cancel(&apic->lapic_timer.timer); + + /* In case the sw timer triggered in the window */ + if (atomic_read(&apic->lapic_timer.pending)) + cancel_hv_tscdeadline(apic); + } + trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, + apic->lapic_timer.hv_timer_in_use); + return apic->lapic_timer.hv_timer_in_use; + } + + void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu) + { + struct kvm_lapic *apic = vcpu->arch.apic; + + WARN_ON(apic->lapic_timer.hv_timer_in_use); + + if (apic_lvtt_tscdeadline(apic)) + start_hv_tscdeadline(apic); + } + EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_hv_timer); + + void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu) + { + struct kvm_lapic *apic = vcpu->arch.apic; + + /* Possibly the TSC deadline timer is not enabled yet */ + if (!apic->lapic_timer.hv_timer_in_use) + return; + + cancel_hv_tscdeadline(apic); + + if (atomic_read(&apic->lapic_timer.pending)) + return; + + start_sw_tscdeadline(apic); + } + EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer); + static void start_apic_timer(struct kvm_lapic *apic) { ktime_t now; diff --cc arch/x86/kvm/vmx.c index df07a0a4611f,b2f559159f3a..bc354f003ce1 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@@ -7560,6 -7708,19 +7711,12 @@@ static int handle_pml_full(struct kvm_v return 1; } -static int handle_pcommit(struct kvm_vcpu *vcpu) -{ - /* we never catch pcommit instruct for L1 guest. */ - WARN_ON(1); - return 1; -} - + static int handle_preemption_timer(struct kvm_vcpu *vcpu) + { + kvm_lapic_expired_hv_timer(vcpu); + return 1; + } + /* * The exit handlers return 1 if the exit was handled fully and guest execution * may resume. Otherwise they set the kvm_run parameter to indicate what needs @@@ -7610,6 -7771,8 +7767,7 @@@ static int (*const kvm_vmx_exit_handler [EXIT_REASON_XSAVES] = handle_xsaves, [EXIT_REASON_XRSTORS] = handle_xrstors, [EXIT_REASON_PML_FULL] = handle_pml_full, - [EXIT_REASON_PCOMMIT] = handle_pcommit, + [EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer, }; static const int kvm_vmx_max_exit_handlers = @@@ -7918,6 -8081,10 +8076,8 @@@ static bool nested_vmx_exit_handled(str * the XSS exit bitmap in vmcs12. */ return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES); - case EXIT_REASON_PCOMMIT: - return nested_cpu_has2(vmcs12, SECONDARY_EXEC_PCOMMIT); + case EXIT_REASON_PREEMPTION_TIMER: + return false; default: return true; } @@@ -8940,6 -9115,20 +9120,8 @@@ static struct kvm_vcpu *vmx_create_vcpu vmx->nested.current_vmptr = -1ull; vmx->nested.current_vmcs12 = NULL; - /* - * If PML is turned on, failure on enabling PML just results in failure - * of creating the vcpu, therefore we can simplify PML logic (by - * avoiding dealing with cases, such as enabling PML partially on vcpus - * for the guest, etc. - */ - if (enable_pml) { - err = vmx_create_pml_buffer(vmx); - if (err) - goto free_vmcs; - } - + vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED; + return &vmx->vcpu; free_vmcs: @@@ -9080,6 -9267,22 +9262,13 @@@ static void vmx_cpuid_update(struct kvm if (cpu_has_secondary_exec_ctrls()) vmcs_set_secondary_exec_control(secondary_exec_ctl); + - if (static_cpu_has(X86_FEATURE_PCOMMIT) && nested) { - if (guest_cpuid_has_pcommit(vcpu)) - vmx->nested.nested_vmx_secondary_ctls_high |= - SECONDARY_EXEC_PCOMMIT; - else - vmx->nested.nested_vmx_secondary_ctls_high &= - ~SECONDARY_EXEC_PCOMMIT; - } - + if (nested_vmx_allowed(vcpu)) + to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |= + FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; + else + to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &= + ~FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; } static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) diff --cc arch/x86/kvm/x86.c index 9c496c7e8c00,a27b33033700..19f9f9e05c2a --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@@ -66,12 -68,10 +66,13 @@@ #include #include +#define CREATE_TRACE_POINTS +#include "trace.h" + #define MAX_IO_MSRS 256 #define KVM_MAX_MCE_BANKS 32 - #define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P) + u64 __read_mostly kvm_mce_cap_supported = MCG_CTL_P | MCG_SER_P; + EXPORT_SYMBOL_GPL(kvm_mce_cap_supported); #define emul_to_vcpu(ctxt) \ container_of(ctxt, struct kvm_vcpu, arch.emulate_ctxt) diff --cc include/linux/irqchip/arm-gic-v3.h index 107eed475b94,700b4216c87a..56b0b7ec66aa --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@@ -229,7 -300,7 +300,8 @@@ #define GITS_BASER_PAGE_SIZE_64K (2UL << GITS_BASER_PAGE_SIZE_SHIFT) #define GITS_BASER_PAGE_SIZE_MASK (3UL << GITS_BASER_PAGE_SIZE_SHIFT) #define GITS_BASER_PAGES_MAX 256 +#define GITS_BASER_PAGES_SHIFT (0) + #define GITS_BASER_NR_PAGES(r) (((r) & 0xff) + 1) #define GITS_BASER_TYPE_NONE 0 #define GITS_BASER_TYPE_DEVICE 1 diff --cc virt/kvm/kvm_main.c index 2e791367c576,61b31a5f76c8..cc081ccfcaa3 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@@ -3487,6 -3545,34 +3543,30 @@@ int kvm_io_bus_unregister_dev(struct kv return r; } + struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx, + gpa_t addr) + { + struct kvm_io_bus *bus; + int dev_idx, srcu_idx; + struct kvm_io_device *iodev = NULL; + + srcu_idx = srcu_read_lock(&kvm->srcu); + + bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); + + dev_idx = kvm_io_bus_get_first_dev(bus, addr, 1); + if (dev_idx < 0) + goto out_unlock; + + iodev = bus->range[dev_idx].dev; + + out_unlock: + srcu_read_unlock(&kvm->srcu, srcu_idx); + + return iodev; + } + EXPORT_SYMBOL_GPL(kvm_io_bus_get_dev); + -static struct notifier_block kvm_cpu_notifier = { - .notifier_call = kvm_cpu_hotplug, -}; - static int kvm_debugfs_open(struct inode *inode, struct file *file, int (*get)(void *, u64 *), int (*set)(void *, u64), const char *fmt)