From: Paul Mackerras Date: Fri, 9 Sep 2016 06:24:23 +0000 (+1000) Subject: Merge branch 'kvm-ppc-infrastructure' into kvm-ppc-next X-Git-Tag: v4.9-rc1~110^2~19^2~17 X-Git-Url: http://git.cascardo.info/?a=commitdiff_plain;h=99212c864e1f6d2800e62bc8c52d6335bfca029f;hp=2a27f514a47d39c50aaa5c07831ab35178955d47;p=cascardo%2Flinux.git Merge branch 'kvm-ppc-infrastructure' into kvm-ppc-next This merges the topic branch 'kvm-ppc-infrastructure' into kvm-ppc-next so that I can then apply further patches that need the changes in the kvm-ppc-infrastructure branch. Signed-off-by: Paul Mackerras --- diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 287a656ceb57..e407af2b7333 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -244,6 +244,43 @@ static inline int segment_shift(int ssize) return SID_SHIFT_1T; } +/* + * This array is indexed by the LP field of the HPTE second dword. + * Since this field may contain some RPN bits, some entries are + * replicated so that we get the same value irrespective of RPN. + * The top 4 bits are the page size index (MMU_PAGE_*) for the + * actual page size, the bottom 4 bits are the base page size. + */ +extern u8 hpte_page_sizes[1 << LP_BITS]; + +static inline unsigned long __hpte_page_size(unsigned long h, unsigned long l, + bool is_base_size) +{ + unsigned int i, lp; + + if (!(h & HPTE_V_LARGE)) + return 1ul << 12; + + /* Look at the 8 bit LP value */ + lp = (l >> LP_SHIFT) & ((1 << LP_BITS) - 1); + i = hpte_page_sizes[lp]; + if (!i) + return 0; + if (!is_base_size) + i >>= 4; + return 1ul << mmu_psize_defs[i & 0xf].shift; +} + +static inline unsigned long hpte_page_size(unsigned long h, unsigned long l) +{ + return __hpte_page_size(h, l, 0); +} + +static inline unsigned long hpte_base_page_size(unsigned long h, unsigned long l) +{ + return __hpte_page_size(h, l, 1); +} + /* * The current system page and segment sizes */ diff --git a/arch/powerpc/include/asm/hmi.h b/arch/powerpc/include/asm/hmi.h index 88b4901ac4ee..85b7a1a21e22 100644 --- a/arch/powerpc/include/asm/hmi.h +++ b/arch/powerpc/include/asm/hmi.h @@ -21,7 +21,7 @@ #ifndef __ASM_PPC64_HMI_H__ #define __ASM_PPC64_HMI_H__ -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE #define CORE_TB_RESYNC_REQ_BIT 63 #define MAX_SUBCORE_PER_CORE 4 diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index 2fd1690b79d2..f6fda8482f60 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -241,6 +241,35 @@ static inline void out_be64(volatile u64 __iomem *addr, u64 val) #endif #endif /* __powerpc64__ */ + +/* + * Simple Cache inhibited accessors + * Unlike the DEF_MMIO_* macros, these don't include any h/w memory + * barriers, callers need to manage memory barriers on their own. + * These can only be used in hypervisor real mode. + */ + +static inline u32 _lwzcix(unsigned long addr) +{ + u32 ret; + + __asm__ __volatile__("lwzcix %0,0, %1" + : "=r" (ret) : "r" (addr) : "memory"); + return ret; +} + +static inline void _stbcix(u64 addr, u8 val) +{ + __asm__ __volatile__("stbcix %0,0,%1" + : : "r" (val), "r" (addr) : "memory"); +} + +static inline void _stwcix(u64 addr, u32 val) +{ + __asm__ __volatile__("stwcix %0,0,%1" + : : "r" (val), "r" (addr) : "memory"); +} + /* * Low level IO stream instructions are defined out of line for now */ diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 88d17b4ea9c8..4ffd5a1e788d 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -20,6 +20,8 @@ #ifndef __ASM_KVM_BOOK3S_64_H__ #define __ASM_KVM_BOOK3S_64_H__ +#include + #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE static inline struct kvmppc_book3s_shadow_vcpu *svcpu_get(struct kvm_vcpu *vcpu) { @@ -97,56 +99,20 @@ static inline void __unlock_hpte(__be64 *hpte, unsigned long hpte_v) hpte[0] = cpu_to_be64(hpte_v); } -static inline int __hpte_actual_psize(unsigned int lp, int psize) -{ - int i, shift; - unsigned int mask; - - /* start from 1 ignoring MMU_PAGE_4K */ - for (i = 1; i < MMU_PAGE_COUNT; i++) { - - /* invalid penc */ - if (mmu_psize_defs[psize].penc[i] == -1) - continue; - /* - * encoding bits per actual page size - * PTE LP actual page size - * rrrr rrrz >=8KB - * rrrr rrzz >=16KB - * rrrr rzzz >=32KB - * rrrr zzzz >=64KB - * ....... - */ - shift = mmu_psize_defs[i].shift - LP_SHIFT; - if (shift > LP_BITS) - shift = LP_BITS; - mask = (1 << shift) - 1; - if ((lp & mask) == mmu_psize_defs[psize].penc[i]) - return i; - } - return -1; -} - static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, unsigned long pte_index) { - int b_psize = MMU_PAGE_4K, a_psize = MMU_PAGE_4K; + int i, b_psize = MMU_PAGE_4K, a_psize = MMU_PAGE_4K; unsigned int penc; unsigned long rb = 0, va_low, sllp; unsigned int lp = (r >> LP_SHIFT) & ((1 << LP_BITS) - 1); if (v & HPTE_V_LARGE) { - for (b_psize = 0; b_psize < MMU_PAGE_COUNT; b_psize++) { - - /* valid entries have a shift value */ - if (!mmu_psize_defs[b_psize].shift) - continue; - - a_psize = __hpte_actual_psize(lp, b_psize); - if (a_psize != -1) - break; - } + i = hpte_page_sizes[lp]; + b_psize = i & 0xf; + a_psize = i >> 4; } + /* * Ignore the top 14 bits of va * v have top two bits covering segment size, hence move @@ -215,45 +181,6 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, return rb; } -static inline unsigned long __hpte_page_size(unsigned long h, unsigned long l, - bool is_base_size) -{ - - int size, a_psize; - /* Look at the 8 bit LP value */ - unsigned int lp = (l >> LP_SHIFT) & ((1 << LP_BITS) - 1); - - /* only handle 4k, 64k and 16M pages for now */ - if (!(h & HPTE_V_LARGE)) - return 1ul << 12; - else { - for (size = 0; size < MMU_PAGE_COUNT; size++) { - /* valid entries have a shift value */ - if (!mmu_psize_defs[size].shift) - continue; - - a_psize = __hpte_actual_psize(lp, size); - if (a_psize != -1) { - if (is_base_size) - return 1ul << mmu_psize_defs[size].shift; - return 1ul << mmu_psize_defs[a_psize].shift; - } - } - - } - return 0; -} - -static inline unsigned long hpte_page_size(unsigned long h, unsigned long l) -{ - return __hpte_page_size(h, l, 0); -} - -static inline unsigned long hpte_base_page_size(unsigned long h, unsigned long l) -{ - return __hpte_page_size(h, l, 1); -} - static inline unsigned long hpte_rpn(unsigned long ptel, unsigned long psize) { return ((ptel & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT; diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index e2fb408f8398..b78e8d3377f6 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -271,6 +271,7 @@ static inline bool early_radix_enabled(void) #define MMU_PAGE_16G 13 #define MMU_PAGE_64G 14 +/* N.B. we need to change the type of hpte_page_sizes if this gets to be > 16 */ #define MMU_PAGE_COUNT 15 #ifdef CONFIG_PPC_BOOK3S_64 diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 148303e7771f..6a6792bb39fb 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -183,11 +183,6 @@ struct paca_struct { */ u16 in_mce; u8 hmi_event_available; /* HMI event is available */ - /* - * Bitmap for sibling subcore status. See kvm/book3s_hv_ras.c for - * more details - */ - struct sibling_subcore_state *sibling_subcore_state; #endif /* Stuff for accurate time accounting */ @@ -202,6 +197,13 @@ struct paca_struct { struct kvmppc_book3s_shadow_vcpu shadow_vcpu; #endif struct kvmppc_host_state kvm_hstate; +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + /* + * Bitmap for sibling subcore status. See kvm/book3s_hv_ras.c for + * more details + */ + struct sibling_subcore_state *sibling_subcore_state; +#endif #endif }; diff --git a/arch/powerpc/include/asm/pnv-pci.h b/arch/powerpc/include/asm/pnv-pci.h index 0cbd8134ce81..1b46b52d3212 100644 --- a/arch/powerpc/include/asm/pnv-pci.h +++ b/arch/powerpc/include/asm/pnv-pci.h @@ -12,6 +12,7 @@ #include #include +#include #include #include @@ -33,6 +34,8 @@ int pnv_cxl_alloc_hwirqs(struct pci_dev *dev, int num); void pnv_cxl_release_hwirqs(struct pci_dev *dev, int hwirq, int num); int pnv_cxl_get_irq_count(struct pci_dev *dev); struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev); +int64_t pnv_opal_pci_msi_eoi(struct irq_chip *chip, unsigned int hw_irq); +bool is_pnv_opal_msi(struct irq_chip *chip); #ifdef CONFIG_CXL_BASE int pnv_cxl_alloc_hwirq_ranges(struct cxl_irq_ranges *irqs, diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index b2027a5cf508..fe4c075bcf50 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -41,7 +41,7 @@ obj-$(CONFIG_VDSO32) += vdso32/ obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o -obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o hmi.o +obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o obj-$(CONFIG_PPC64) += vdso64/ obj-$(CONFIG_ALTIVEC) += vecemu.o diff --git a/arch/powerpc/kernel/hmi.c b/arch/powerpc/kernel/hmi.c deleted file mode 100644 index e3f738eb1cac..000000000000 --- a/arch/powerpc/kernel/hmi.c +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Hypervisor Maintenance Interrupt (HMI) handling. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. - * - * Copyright 2015 IBM Corporation - * Author: Mahesh Salgaonkar - */ - -#undef DEBUG - -#include -#include -#include -#include - -void wait_for_subcore_guest_exit(void) -{ - int i; - - /* - * NULL bitmap pointer indicates that KVM module hasn't - * been loaded yet and hence no guests are running. - * If no KVM is in use, no need to co-ordinate among threads - * as all of them will always be in host and no one is going - * to modify TB other than the opal hmi handler. - * Hence, just return from here. - */ - if (!local_paca->sibling_subcore_state) - return; - - for (i = 0; i < MAX_SUBCORE_PER_CORE; i++) - while (local_paca->sibling_subcore_state->in_guest[i]) - cpu_relax(); -} - -void wait_for_tb_resync(void) -{ - if (!local_paca->sibling_subcore_state) - return; - - while (test_bit(CORE_TB_RESYNC_REQ_BIT, - &local_paca->sibling_subcore_state->flags)) - cpu_relax(); -} diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index bd9b82f62f2f..7dd89b79d038 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -77,6 +77,7 @@ kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \ ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \ + book3s_hv_hmi.o \ book3s_hv_rmhandlers.o \ book3s_hv_rm_mmu.o \ book3s_hv_ras.o \ diff --git a/arch/powerpc/kvm/book3s_hv_hmi.c b/arch/powerpc/kvm/book3s_hv_hmi.c new file mode 100644 index 000000000000..e3f738eb1cac --- /dev/null +++ b/arch/powerpc/kvm/book3s_hv_hmi.c @@ -0,0 +1,56 @@ +/* + * Hypervisor Maintenance Interrupt (HMI) handling. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. + * + * Copyright 2015 IBM Corporation + * Author: Mahesh Salgaonkar + */ + +#undef DEBUG + +#include +#include +#include +#include + +void wait_for_subcore_guest_exit(void) +{ + int i; + + /* + * NULL bitmap pointer indicates that KVM module hasn't + * been loaded yet and hence no guests are running. + * If no KVM is in use, no need to co-ordinate among threads + * as all of them will always be in host and no one is going + * to modify TB other than the opal hmi handler. + * Hence, just return from here. + */ + if (!local_paca->sibling_subcore_state) + return; + + for (i = 0; i < MAX_SUBCORE_PER_CORE; i++) + while (local_paca->sibling_subcore_state->in_guest[i]) + cpu_relax(); +} + +void wait_for_tb_resync(void) +{ + if (!local_paca->sibling_subcore_state) + return; + + while (test_bit(CORE_TB_RESYNC_REQ_BIT, + &local_paca->sibling_subcore_state->flags)) + cpu_relax(); +} diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 0e4e9654bd2c..83ddc0e171b0 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -493,36 +493,6 @@ static void native_hugepage_invalidate(unsigned long vsid, } #endif -static inline int __hpte_actual_psize(unsigned int lp, int psize) -{ - int i, shift; - unsigned int mask; - - /* start from 1 ignoring MMU_PAGE_4K */ - for (i = 1; i < MMU_PAGE_COUNT; i++) { - - /* invalid penc */ - if (mmu_psize_defs[psize].penc[i] == -1) - continue; - /* - * encoding bits per actual page size - * PTE LP actual page size - * rrrr rrrz >=8KB - * rrrr rrzz >=16KB - * rrrr rzzz >=32KB - * rrrr zzzz >=64KB - * ....... - */ - shift = mmu_psize_defs[i].shift - LP_SHIFT; - if (shift > LP_BITS) - shift = LP_BITS; - mask = (1 << shift) - 1; - if ((lp & mask) == mmu_psize_defs[psize].penc[i]) - return i; - } - return -1; -} - static void hpte_decode(struct hash_pte *hpte, unsigned long slot, int *psize, int *apsize, int *ssize, unsigned long *vpn) { @@ -538,16 +508,8 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot, size = MMU_PAGE_4K; a_size = MMU_PAGE_4K; } else { - for (size = 0; size < MMU_PAGE_COUNT; size++) { - - /* valid entries have a shift value */ - if (!mmu_psize_defs[size].shift) - continue; - - a_size = __hpte_actual_psize(lp, size); - if (a_size != -1) - break; - } + size = hpte_page_sizes[lp] & 0xf; + a_size = hpte_page_sizes[lp] >> 4; } /* This works for all page sizes, and for 256M and 1T segments */ if (cpu_has_feature(CPU_FTR_ARCH_300)) diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 0821556e16f4..ef3ae891a3db 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -93,6 +93,9 @@ static unsigned long _SDR1; struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; EXPORT_SYMBOL_GPL(mmu_psize_defs); +u8 hpte_page_sizes[1 << LP_BITS]; +EXPORT_SYMBOL_GPL(hpte_page_sizes); + struct hash_pte *htab_address; unsigned long htab_size_bytes; unsigned long htab_hash_mask; @@ -564,8 +567,60 @@ static void __init htab_scan_page_sizes(void) #endif /* CONFIG_HUGETLB_PAGE */ } +/* + * Fill in the hpte_page_sizes[] array. + * We go through the mmu_psize_defs[] array looking for all the + * supported base/actual page size combinations. Each combination + * has a unique pagesize encoding (penc) value in the low bits of + * the LP field of the HPTE. For actual page sizes less than 1MB, + * some of the upper LP bits are used for RPN bits, meaning that + * we need to fill in several entries in hpte_page_sizes[]. + * + * In diagrammatic form, with r = RPN bits and z = page size bits: + * PTE LP actual page size + * rrrr rrrz >=8KB + * rrrr rrzz >=16KB + * rrrr rzzz >=32KB + * rrrr zzzz >=64KB + * ... + * + * The zzzz bits are implementation-specific but are chosen so that + * no encoding for a larger page size uses the same value in its + * low-order N bits as the encoding for the 2^(12+N) byte page size + * (if it exists). + */ +static void init_hpte_page_sizes(void) +{ + long int ap, bp; + long int shift, penc; + + for (bp = 0; bp < MMU_PAGE_COUNT; ++bp) { + if (!mmu_psize_defs[bp].shift) + continue; /* not a supported page size */ + for (ap = bp; ap < MMU_PAGE_COUNT; ++ap) { + penc = mmu_psize_defs[bp].penc[ap]; + if (penc == -1) + continue; + shift = mmu_psize_defs[ap].shift - LP_SHIFT; + if (shift <= 0) + continue; /* should never happen */ + /* + * For page sizes less than 1MB, this loop + * replicates the entry for all possible values + * of the rrrr bits. + */ + while (penc < (1 << LP_BITS)) { + hpte_page_sizes[penc] = (ap << 4) | bp; + penc += 1 << shift; + } + } + } +} + static void __init htab_init_page_sizes(void) { + init_hpte_page_sizes(); + if (!debug_pagealloc_enabled()) { /* * Pick a size for the linear mapping. Currently, we only diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index fd9444f9fb0c..9ce48ae55062 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2710,15 +2710,21 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, } #ifdef CONFIG_PCI_MSI -static void pnv_ioda2_msi_eoi(struct irq_data *d) +int64_t pnv_opal_pci_msi_eoi(struct irq_chip *chip, unsigned int hw_irq) { - unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); - struct irq_chip *chip = irq_data_get_irq_chip(d); struct pnv_phb *phb = container_of(chip, struct pnv_phb, ioda.irq_chip); + + return opal_pci_msi_eoi(phb->opal_id, hw_irq); +} + +static void pnv_ioda2_msi_eoi(struct irq_data *d) +{ int64_t rc; + unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); + struct irq_chip *chip = irq_data_get_irq_chip(d); - rc = opal_pci_msi_eoi(phb->opal_id, hw_irq); + rc = pnv_opal_pci_msi_eoi(chip, hw_irq); WARN_ON_ONCE(rc); icp_native_eoi(d); @@ -2748,6 +2754,16 @@ void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq) irq_set_chip(virq, &phb->ioda.irq_chip); } +/* + * Returns true iff chip is something that we could call + * pnv_opal_pci_msi_eoi for. + */ +bool is_pnv_opal_msi(struct irq_chip *chip) +{ + return chip->irq_eoi == pnv_ioda2_msi_eoi; +} +EXPORT_SYMBOL_GPL(is_pnv_opal_msi); + static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, unsigned int hwirq, unsigned int virq, unsigned int is_64, struct msi_msg *msg)