Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mpe/linux
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 21 Oct 2014 14:48:56 +0000 (07:48 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 21 Oct 2014 14:48:56 +0000 (07:48 -0700)
Pull more powerpc updates from Michael Ellerman:
 "Here's some more updates for powerpc for 3.18.

  They are a bit late I know, though must are actually bug fixes.  In my
  defence I nearly cut the top of my finger off last weekend in a
  gruesome bike maintenance accident, so I spent a good part of the week
  waiting around for doctors.  True story, I can send photos if you like :)

  Probably the most interesting fix is the sys_call_table one, which
  enables syscall tracing for powerpc.  There's a fix for HMI handling
  for old firmware, more endian fixes for firmware interfaces, more EEH
  fixes, Anton fixed our routine that gets the current stack pointer,
  and a few other misc bits"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mpe/linux: (22 commits)
  powerpc: Only do dynamic DMA zone limits on platforms that need it
  powerpc: sync pseries_le_defconfig with pseries_defconfig
  powerpc: Add printk levels to setup_system output
  powerpc/vphn: NUMA node code expects big-endian
  powerpc/msi: Use WARN_ON() in msi bitmap selftests
  powerpc/msi: Fix the msi bitmap alignment tests
  powerpc/eeh: Block CFG upon frozen Shiner adapter
  powerpc/eeh: Don't collect logs on PE with blocked config space
  powerpc/eeh: Block PCI config access upon frozen PE
  powerpc/pseries: Drop config requests in EEH accessors
  powerpc/powernv: Drop config requests in EEH accessors
  powerpc/eeh: Rename flag EEH_PE_RESET to EEH_PE_CFG_BLOCKED
  powerpc/eeh: Fix condition for isolated state
  powerpc/pseries: Make CPU hotplug path endian safe
  powerpc/pseries: Use dump_stack instead of show_stack
  powerpc: Rename __get_SP() to current_stack_pointer()
  powerpc: Reimplement __get_SP() as a function not a define
  powerpc/numa: Add ability to disable and debug topology updates
  powerpc/numa: check error return from proc_create
  powerpc/powernv: Fallback to old HMI handling behavior for old firmware
  ...

28 files changed:
Documentation/kernel-parameters.txt
arch/powerpc/configs/pseries_le_defconfig
arch/powerpc/include/asm/eeh.h
arch/powerpc/include/asm/perf_event.h
arch/powerpc/include/asm/reg.h
arch/powerpc/include/asm/syscall.h
arch/powerpc/kernel/dma.c
arch/powerpc/kernel/eeh.c
arch/powerpc/kernel/eeh_driver.c
arch/powerpc/kernel/eeh_pe.c
arch/powerpc/kernel/exceptions-64s.S
arch/powerpc/kernel/irq.c
arch/powerpc/kernel/misc.S
arch/powerpc/kernel/ppc_ksyms.c
arch/powerpc/kernel/process.c
arch/powerpc/kernel/rtas_pci.c
arch/powerpc/kernel/setup_64.c
arch/powerpc/kernel/stacktrace.c
arch/powerpc/mm/numa.c
arch/powerpc/platforms/powernv/eeh-ioda.c
arch/powerpc/platforms/powernv/eeh-powernv.c
arch/powerpc/platforms/powernv/opal.c
arch/powerpc/platforms/powernv/pci.c
arch/powerpc/platforms/pseries/dlpar.c
arch/powerpc/platforms/pseries/hotplug-cpu.c
arch/powerpc/platforms/pseries/iommu.c
arch/powerpc/platforms/pseries/pseries.h
arch/powerpc/sysdev/msi_bitmap.c

index 7dbe5ec..988160a 100644 (file)
@@ -3465,6 +3465,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                        e.g. base its process migration decisions on it.
                        Default is on.
 
+       topology_updates= [KNL, PPC, NUMA]
+                       Format: {off}
+                       Specify if the kernel should ignore (off)
+                       topology updates sent by the hypervisor to this
+                       LPAR.
+
        tp720=          [HW,PS2]
 
        tpm_suspend_pcr=[HW,TPM]
index 63392f4..d200888 100644 (file)
@@ -48,7 +48,6 @@ CONFIG_KEXEC=y
 CONFIG_IRQ_ALL_CPUS=y
 CONFIG_MEMORY_HOTPLUG=y
 CONFIG_MEMORY_HOTREMOVE=y
-CONFIG_CMA=y
 CONFIG_PPC_64K_PAGES=y
 CONFIG_PPC_SUBPAGE_PROT=y
 CONFIG_SCHED_SMT=y
@@ -138,6 +137,7 @@ CONFIG_NETCONSOLE=y
 CONFIG_NETPOLL_TRAP=y
 CONFIG_TUN=m
 CONFIG_VIRTIO_NET=m
+CONFIG_VHOST_NET=m
 CONFIG_VORTEX=y
 CONFIG_ACENIC=m
 CONFIG_ACENIC_OMIT_TIGON_I=y
@@ -303,4 +303,9 @@ CONFIG_CRYPTO_LZO=m
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_DEV_NX=y
 CONFIG_CRYPTO_DEV_NX_ENCRYPT=m
+CONFIG_VIRTUALIZATION=y
+CONFIG_KVM_BOOK3S_64=m
+CONFIG_KVM_BOOK3S_64_HV=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS=y
 CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
index 3b260ef..ca07f9c 100644 (file)
@@ -71,9 +71,10 @@ struct device_node;
 
 #define EEH_PE_ISOLATED                (1 << 0)        /* Isolated PE          */
 #define EEH_PE_RECOVERING      (1 << 1)        /* Recovering PE        */
-#define EEH_PE_RESET           (1 << 2)        /* PE reset in progress */
+#define EEH_PE_CFG_BLOCKED     (1 << 2)        /* Block config access  */
 
 #define EEH_PE_KEEP            (1 << 8)        /* Keep PE on hotplug   */
+#define EEH_PE_CFG_RESTRICTED  (1 << 9)        /* Block config on error */
 
 struct eeh_pe {
        int type;                       /* PE type: PHB/Bus/Device      */
index 0bb2372..8bf1b63 100644 (file)
@@ -34,7 +34,7 @@
        do {                                                    \
                (regs)->result = 0;                             \
                (regs)->nip = __ip;                             \
-               (regs)->gpr[1] = *(unsigned long *)__get_SP();  \
+               (regs)->gpr[1] = current_stack_pointer();       \
                asm volatile("mfmsr %0" : "=r" ((regs)->msr));  \
        } while (0)
 #endif
index fe3f948..c998279 100644 (file)
@@ -1265,8 +1265,7 @@ static inline unsigned long mfvtb (void)
 
 #define proc_trap()    asm volatile("trap")
 
-#define __get_SP()     ({unsigned long sp; \
-                       asm volatile("mr %0,1": "=r" (sp)); sp;})
+extern unsigned long current_stack_pointer(void);
 
 extern unsigned long scom970_read(unsigned int address);
 extern void scom970_write(unsigned int address, unsigned long value);
index 6fa2708..6240698 100644 (file)
@@ -19,7 +19,7 @@
 
 /* ftrace syscalls requires exporting the sys_call_table */
 #ifdef CONFIG_FTRACE_SYSCALLS
-extern const unsigned long *sys_call_table;
+extern const unsigned long sys_call_table[];
 #endif /* CONFIG_FTRACE_SYSCALLS */
 
 static inline long syscall_get_nr(struct task_struct *task,
index adac9dc..484b2d4 100644 (file)
@@ -53,9 +53,16 @@ void *dma_direct_alloc_coherent(struct device *dev, size_t size,
 #else
        struct page *page;
        int node = dev_to_node(dev);
+#ifdef CONFIG_FSL_SOC
        u64 pfn = get_pfn_limit(dev);
        int zone;
 
+       /*
+        * This code should be OK on other platforms, but we have drivers that
+        * don't set coherent_dma_mask. As a workaround we just ifdef it. This
+        * whole routine needs some serious cleanup.
+        */
+
        zone = dma_pfn_limit_to_zone(pfn);
        if (zone < 0) {
                dev_err(dev, "%s: No suitable zone for pfn %#llx\n",
@@ -73,6 +80,7 @@ void *dma_direct_alloc_coherent(struct device *dev, size_t size,
                break;
 #endif
        };
+#endif /* CONFIG_FSL_SOC */
 
        /* ignore region specifiers */
        flag  &= ~(__GFP_HIGHMEM);
index d543e41..2248a19 100644 (file)
@@ -257,6 +257,13 @@ static void *eeh_dump_pe_log(void *data, void *flag)
        struct eeh_dev *edev, *tmp;
        size_t *plen = flag;
 
+       /* If the PE's config space is blocked, 0xFF's will be
+        * returned. It's pointless to collect the log in this
+        * case.
+        */
+       if (pe->state & EEH_PE_CFG_BLOCKED)
+               return NULL;
+
        eeh_pe_for_each_dev(pe, edev, tmp)
                *plen += eeh_dump_dev_log(edev, pci_regs_buf + *plen,
                                          EEH_PCI_REGS_LOG_LEN - *plen);
@@ -673,18 +680,18 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
        switch (state) {
        case pcie_deassert_reset:
                eeh_ops->reset(pe, EEH_RESET_DEACTIVATE);
-               eeh_pe_state_clear(pe, EEH_PE_RESET);
+               eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED);
                break;
        case pcie_hot_reset:
-               eeh_pe_state_mark(pe, EEH_PE_RESET);
+               eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED);
                eeh_ops->reset(pe, EEH_RESET_HOT);
                break;
        case pcie_warm_reset:
-               eeh_pe_state_mark(pe, EEH_PE_RESET);
+               eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED);
                eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL);
                break;
        default:
-               eeh_pe_state_clear(pe, EEH_PE_RESET);
+               eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED);
                return -EINVAL;
        };
 
@@ -1523,7 +1530,7 @@ int eeh_pe_reset(struct eeh_pe *pe, int option)
        switch (option) {
        case EEH_RESET_DEACTIVATE:
                ret = eeh_ops->reset(pe, option);
-               eeh_pe_state_clear(pe, EEH_PE_RESET);
+               eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED);
                if (ret)
                        break;
 
@@ -1538,7 +1545,7 @@ int eeh_pe_reset(struct eeh_pe *pe, int option)
                 */
                eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE);
 
-               eeh_pe_state_mark(pe, EEH_PE_RESET);
+               eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED);
                ret = eeh_ops->reset(pe, option);
                break;
        default:
index 3fd514f..6535936 100644 (file)
@@ -528,13 +528,13 @@ int eeh_pe_reset_and_recover(struct eeh_pe *pe)
        eeh_pe_dev_traverse(pe, eeh_report_error, &result);
 
        /* Issue reset */
-       eeh_pe_state_mark(pe, EEH_PE_RESET);
+       eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED);
        ret = eeh_reset_pe(pe);
        if (ret) {
-               eeh_pe_state_clear(pe, EEH_PE_RECOVERING | EEH_PE_RESET);
+               eeh_pe_state_clear(pe, EEH_PE_RECOVERING | EEH_PE_CFG_BLOCKED);
                return ret;
        }
-       eeh_pe_state_clear(pe, EEH_PE_RESET);
+       eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED);
 
        /* Unfreeze the PE */
        ret = eeh_clear_pe_frozen_state(pe, true);
@@ -601,10 +601,10 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
         * config accesses. So we prefer to block them. However, controlled
         * PCI config accesses initiated from EEH itself are allowed.
         */
-       eeh_pe_state_mark(pe, EEH_PE_RESET);
+       eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED);
        rc = eeh_reset_pe(pe);
        if (rc) {
-               eeh_pe_state_clear(pe, EEH_PE_RESET);
+               eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED);
                return rc;
        }
 
@@ -613,7 +613,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
        /* Restore PE */
        eeh_ops->configure_bridge(pe);
        eeh_pe_restore_bars(pe);
-       eeh_pe_state_clear(pe, EEH_PE_RESET);
+       eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED);
 
        /* Clear frozen state */
        rc = eeh_clear_pe_frozen_state(pe, false);
index 53dd091..5a63e2b 100644 (file)
@@ -525,7 +525,7 @@ static void *__eeh_pe_state_mark(void *data, void *flag)
        pe->state |= state;
 
        /* Offline PCI devices if applicable */
-       if (state != EEH_PE_ISOLATED)
+       if (!(state & EEH_PE_ISOLATED))
                return NULL;
 
        eeh_pe_for_each_dev(pe, edev, tmp) {
@@ -534,6 +534,10 @@ static void *__eeh_pe_state_mark(void *data, void *flag)
                        pdev->error_state = pci_channel_io_frozen;
        }
 
+       /* Block PCI config access if required */
+       if (pe->state & EEH_PE_CFG_RESTRICTED)
+               pe->state |= EEH_PE_CFG_BLOCKED;
+
        return NULL;
 }
 
@@ -611,6 +615,10 @@ static void *__eeh_pe_state_clear(void *data, void *flag)
                pdev->error_state = pci_channel_io_normal;
        }
 
+       /* Unblock PCI config access if required */
+       if (pe->state & EEH_PE_CFG_RESTRICTED)
+               pe->state &= ~EEH_PE_CFG_BLOCKED;
+
        return NULL;
 }
 
index 050f79a..72e783e 100644 (file)
@@ -1270,11 +1270,6 @@ hmi_exception_early:
        addi    r3,r1,STACK_FRAME_OVERHEAD
        bl      hmi_exception_realmode
        /* Windup the stack. */
-       /* Clear MSR_RI before setting SRR0 and SRR1. */
-       li      r0,MSR_RI
-       mfmsr   r9                      /* get MSR value */
-       andc    r9,r9,r0
-       mtmsrd  r9,1                    /* Clear MSR_RI */
        /* Move original HSRR0 and HSRR1 into the respective regs */
        ld      r9,_MSR(r1)
        mtspr   SPRN_HSRR1,r9
index 8eb857f..c143835 100644 (file)
@@ -466,7 +466,7 @@ static inline void check_stack_overflow(void)
 #ifdef CONFIG_DEBUG_STACKOVERFLOW
        long sp;
 
-       sp = __get_SP() & (THREAD_SIZE-1);
+       sp = current_stack_pointer() & (THREAD_SIZE-1);
 
        /* check for stack overflow: is there less than 2KB free? */
        if (unlikely(sp < (sizeof(struct thread_info) + 2048))) {
index 7ce26d4..0d43219 100644 (file)
@@ -114,3 +114,7 @@ _GLOBAL(longjmp)
        mtlr    r0
        mr      r3,r4
        blr
+
+_GLOBAL(current_stack_pointer)
+       PPC_LL  r3,0(r1)
+       blr
index c4dfff6..202963e 100644 (file)
@@ -41,3 +41,5 @@ EXPORT_SYMBOL(giveup_spe);
 #ifdef CONFIG_EPAPR_PARAVIRT
 EXPORT_SYMBOL(epapr_hypercall_start);
 #endif
+
+EXPORT_SYMBOL(current_stack_pointer);
index aa1df89..923cd2d 100644 (file)
@@ -1545,7 +1545,7 @@ void show_stack(struct task_struct *tsk, unsigned long *stack)
                tsk = current;
        if (sp == 0) {
                if (tsk == current)
-                       asm("mr %0,1" : "=r" (sp));
+                       sp = current_stack_pointer();
                else
                        sp = tsk->thread.ksp;
        }
index c168337..7c55b86 100644 (file)
@@ -66,6 +66,11 @@ int rtas_read_config(struct pci_dn *pdn, int where, int size, u32 *val)
                return PCIBIOS_DEVICE_NOT_FOUND;
        if (!config_access_valid(pdn, where))
                return PCIBIOS_BAD_REGISTER_NUMBER;
+#ifdef CONFIG_EEH
+       if (pdn->edev && pdn->edev->pe &&
+           (pdn->edev->pe->state & EEH_PE_CFG_BLOCKED))
+               return PCIBIOS_SET_FAILED;
+#endif
 
        addr = rtas_config_addr(pdn->busno, pdn->devfn, where);
        buid = pdn->phb->buid;
@@ -90,9 +95,6 @@ static int rtas_pci_read_config(struct pci_bus *bus,
        struct device_node *busdn, *dn;
        struct pci_dn *pdn;
        bool found = false;
-#ifdef CONFIG_EEH
-       struct eeh_dev *edev;
-#endif
        int ret;
 
        /* Search only direct children of the bus */
@@ -109,11 +111,6 @@ static int rtas_pci_read_config(struct pci_bus *bus,
 
        if (!found)
                return PCIBIOS_DEVICE_NOT_FOUND;
-#ifdef CONFIG_EEH
-       edev = of_node_to_eeh_dev(dn);
-       if (edev && edev->pe && edev->pe->state & EEH_PE_RESET)
-               return PCIBIOS_DEVICE_NOT_FOUND;
-#endif
 
        ret = rtas_read_config(pdn, where, size, val);
        if (*val == EEH_IO_ERROR_VALUE(size) &&
@@ -132,6 +129,11 @@ int rtas_write_config(struct pci_dn *pdn, int where, int size, u32 val)
                return PCIBIOS_DEVICE_NOT_FOUND;
        if (!config_access_valid(pdn, where))
                return PCIBIOS_BAD_REGISTER_NUMBER;
+#ifdef CONFIG_EEH
+       if (pdn->edev && pdn->edev->pe &&
+           (pdn->edev->pe->state & EEH_PE_CFG_BLOCKED))
+               return PCIBIOS_SET_FAILED;
+#endif
 
        addr = rtas_config_addr(pdn->busno, pdn->devfn, where);
        buid = pdn->phb->buid;
@@ -155,10 +157,6 @@ static int rtas_pci_write_config(struct pci_bus *bus,
        struct device_node *busdn, *dn;
        struct pci_dn *pdn;
        bool found = false;
-#ifdef CONFIG_EEH
-       struct eeh_dev *edev;
-#endif
-       int ret;
 
        /* Search only direct children of the bus */
        busdn = pci_bus_to_OF_node(bus);
@@ -173,14 +171,8 @@ static int rtas_pci_write_config(struct pci_bus *bus,
 
        if (!found)
                return PCIBIOS_DEVICE_NOT_FOUND;
-#ifdef CONFIG_EEH
-       edev = of_node_to_eeh_dev(dn);
-       if (edev && edev->pe && (edev->pe->state & EEH_PE_RESET))
-               return PCIBIOS_DEVICE_NOT_FOUND;
-#endif
-       ret = rtas_write_config(pdn, where, size, val);
 
-       return ret;
+       return rtas_write_config(pdn, where, size, val);
 }
 
 static struct pci_ops rtas_pci_ops = {
index cd07d79..4f3cfe1 100644 (file)
@@ -522,36 +522,36 @@ void __init setup_system(void)
        smp_release_cpus();
 #endif
 
-       printk("Starting Linux PPC64 %s\n", init_utsname()->version);
+       pr_info("Starting Linux PPC64 %s\n", init_utsname()->version);
 
-       printk("-----------------------------------------------------\n");
-       printk("ppc64_pft_size    = 0x%llx\n", ppc64_pft_size);
-       printk("phys_mem_size     = 0x%llx\n", memblock_phys_mem_size());
+       pr_info("-----------------------------------------------------\n");
+       pr_info("ppc64_pft_size    = 0x%llx\n", ppc64_pft_size);
+       pr_info("phys_mem_size     = 0x%llx\n", memblock_phys_mem_size());
 
        if (ppc64_caches.dline_size != 0x80)
-               printk("dcache_line_size  = 0x%x\n", ppc64_caches.dline_size);
+               pr_info("dcache_line_size  = 0x%x\n", ppc64_caches.dline_size);
        if (ppc64_caches.iline_size != 0x80)
-               printk("icache_line_size  = 0x%x\n", ppc64_caches.iline_size);
+               pr_info("icache_line_size  = 0x%x\n", ppc64_caches.iline_size);
 
-       printk("cpu_features      = 0x%016lx\n", cur_cpu_spec->cpu_features);
-       printk("  possible        = 0x%016lx\n", CPU_FTRS_POSSIBLE);
-       printk("  always          = 0x%016lx\n", CPU_FTRS_ALWAYS);
-       printk("cpu_user_features = 0x%08x 0x%08x\n", cur_cpu_spec->cpu_user_features,
+       pr_info("cpu_features      = 0x%016lx\n", cur_cpu_spec->cpu_features);
+       pr_info("  possible        = 0x%016lx\n", CPU_FTRS_POSSIBLE);
+       pr_info("  always          = 0x%016lx\n", CPU_FTRS_ALWAYS);
+       pr_info("cpu_user_features = 0x%08x 0x%08x\n", cur_cpu_spec->cpu_user_features,
                cur_cpu_spec->cpu_user_features2);
-       printk("mmu_features      = 0x%08x\n", cur_cpu_spec->mmu_features);
-       printk("firmware_features = 0x%016lx\n", powerpc_firmware_features);
+       pr_info("mmu_features      = 0x%08x\n", cur_cpu_spec->mmu_features);
+       pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features);
 
 #ifdef CONFIG_PPC_STD_MMU_64
        if (htab_address)
-               printk("htab_address      = 0x%p\n", htab_address);
+               pr_info("htab_address      = 0x%p\n", htab_address);
 
-       printk("htab_hash_mask    = 0x%lx\n", htab_hash_mask);
+       pr_info("htab_hash_mask    = 0x%lx\n", htab_hash_mask);
 #endif
 
        if (PHYSICAL_START > 0)
-               printk("physical_start    = 0x%llx\n",
+               pr_info("physical_start    = 0x%llx\n",
                       (unsigned long long)PHYSICAL_START);
-       printk("-----------------------------------------------------\n");
+       pr_info("-----------------------------------------------------\n");
 
        DBG(" <- setup_system()\n");
 }
index 3d30ef1..ea43a34 100644 (file)
@@ -50,7 +50,7 @@ void save_stack_trace(struct stack_trace *trace)
 {
        unsigned long sp;
 
-       asm("mr %0,1" : "=r" (sp));
+       sp = current_stack_pointer();
 
        save_context_stack(trace, sp, current, 1);
 }
index 649666d..e5236c2 100644 (file)
@@ -8,6 +8,8 @@
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  */
+#define pr_fmt(fmt) "numa: " fmt
+
 #include <linux/threads.h>
 #include <linux/bootmem.h>
 #include <linux/init.h>
@@ -1153,6 +1155,22 @@ static int __init early_numa(char *p)
 }
 early_param("numa", early_numa);
 
+static bool topology_updates_enabled = true;
+
+static int __init early_topology_updates(char *p)
+{
+       if (!p)
+               return 0;
+
+       if (!strcmp(p, "off")) {
+               pr_info("Disabling topology updates\n");
+               topology_updates_enabled = false;
+       }
+
+       return 0;
+}
+early_param("topology_updates", early_topology_updates);
+
 #ifdef CONFIG_MEMORY_HOTPLUG
 /*
  * Find the node associated with a hot added memory section for
@@ -1442,8 +1460,11 @@ static long hcall_vphn(unsigned long cpu, __be32 *associativity)
        long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
        u64 flags = 1;
        int hwcpu = get_hard_smp_processor_id(cpu);
+       int i;
 
        rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, hwcpu);
+       for (i = 0; i < 6; i++)
+               retbuf[i] = cpu_to_be64(retbuf[i]);
        vphn_unpack_associativity(retbuf, associativity);
 
        return rc;
@@ -1539,6 +1560,9 @@ int arch_update_cpu_topology(void)
        struct device *dev;
        int weight, new_nid, i = 0;
 
+       if (!prrn_enabled && !vphn_enabled)
+               return 0;
+
        weight = cpumask_weight(&cpu_associativity_changes_mask);
        if (!weight)
                return 0;
@@ -1592,6 +1616,15 @@ int arch_update_cpu_topology(void)
                cpu = cpu_last_thread_sibling(cpu);
        }
 
+       pr_debug("Topology update for the following CPUs:\n");
+       if (cpumask_weight(&updated_cpus)) {
+               for (ud = &updates[0]; ud; ud = ud->next) {
+                       pr_debug("cpu %d moving from node %d "
+                                         "to %d\n", ud->cpu,
+                                         ud->old_nid, ud->new_nid);
+               }
+       }
+
        /*
         * In cases where we have nothing to update (because the updates list
         * is too short or because the new topology is same as the old one),
@@ -1800,8 +1833,12 @@ static const struct file_operations topology_ops = {
 
 static int topology_update_init(void)
 {
-       start_topology_update();
-       proc_create("powerpc/topology_updates", 0644, NULL, &topology_ops);
+       /* Do not poll for changes if disabled at boot */
+       if (topology_updates_enabled)
+               start_topology_update();
+
+       if (!proc_create("powerpc/topology_updates", 0644, NULL, &topology_ops))
+               return -ENOMEM;
 
        return 0;
 }
index 426814a..eba9cb1 100644 (file)
@@ -373,7 +373,7 @@ static int ioda_eeh_get_pe_state(struct eeh_pe *pe)
         * moving forward, we have to return operational
         * state during PE reset.
         */
-       if (pe->state & EEH_PE_RESET) {
+       if (pe->state & EEH_PE_CFG_BLOCKED) {
                result = (EEH_STATE_MMIO_ACTIVE  |
                          EEH_STATE_DMA_ACTIVE   |
                          EEH_STATE_MMIO_ENABLED |
index 3e89cbf..1d19e79 100644 (file)
@@ -168,6 +168,26 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag)
                return ret;
        }
 
+       /*
+        * If the PE contains any one of following adapters, the
+        * PCI config space can't be accessed when dumping EEH log.
+        * Otherwise, we will run into fenced PHB caused by shortage
+        * of outbound credits in the adapter. The PCI config access
+        * should be blocked until PE reset. MMIO access is dropped
+        * by hardware certainly. In order to drop PCI config requests,
+        * one more flag (EEH_PE_CFG_RESTRICTED) is introduced, which
+        * will be checked in the backend for PE state retrival. If
+        * the PE becomes frozen for the first time and the flag has
+        * been set for the PE, we will set EEH_PE_CFG_BLOCKED for
+        * that PE to block its config space.
+        *
+        * Broadcom Austin 4-ports NICs (14e4:1657)
+        * Broadcom Shiner 2-ports 10G NICs (14e4:168e)
+        */
+       if ((dev->vendor == PCI_VENDOR_ID_BROADCOM && dev->device == 0x1657) ||
+           (dev->vendor == PCI_VENDOR_ID_BROADCOM && dev->device == 0x168e))
+               edev->pe->state |= EEH_PE_CFG_RESTRICTED;
+
        /*
         * Cache the PE primary bus, which can't be fetched when
         * full hotplug is in progress. In that case, all child
@@ -383,6 +403,39 @@ static int powernv_eeh_err_inject(struct eeh_pe *pe, int type, int func,
        return ret;
 }
 
+static inline bool powernv_eeh_cfg_blocked(struct device_node *dn)
+{
+       struct eeh_dev *edev = of_node_to_eeh_dev(dn);
+
+       if (!edev || !edev->pe)
+               return false;
+
+       if (edev->pe->state & EEH_PE_CFG_BLOCKED)
+               return true;
+
+       return false;
+}
+
+static int powernv_eeh_read_config(struct device_node *dn,
+                                  int where, int size, u32 *val)
+{
+       if (powernv_eeh_cfg_blocked(dn)) {
+               *val = 0xFFFFFFFF;
+               return PCIBIOS_SET_FAILED;
+       }
+
+       return pnv_pci_cfg_read(dn, where, size, val);
+}
+
+static int powernv_eeh_write_config(struct device_node *dn,
+                                   int where, int size, u32 val)
+{
+       if (powernv_eeh_cfg_blocked(dn))
+               return PCIBIOS_SET_FAILED;
+
+       return pnv_pci_cfg_write(dn, where, size, val);
+}
+
 /**
  * powernv_eeh_next_error - Retrieve next EEH error to handle
  * @pe: Affected PE
@@ -440,8 +493,8 @@ static struct eeh_ops powernv_eeh_ops = {
        .get_log                = powernv_eeh_get_log,
        .configure_bridge       = powernv_eeh_configure_bridge,
        .err_inject             = powernv_eeh_err_inject,
-       .read_config            = pnv_pci_cfg_read,
-       .write_config           = pnv_pci_cfg_write,
+       .read_config            = powernv_eeh_read_config,
+       .write_config           = powernv_eeh_write_config,
        .next_error             = powernv_eeh_next_error,
        .restore_config         = powernv_eeh_restore_config
 };
index b642b05..d019b08 100644 (file)
@@ -194,6 +194,27 @@ static int __init opal_register_exception_handlers(void)
         * fwnmi area at 0x7000 to provide the glue space to OPAL
         */
        glue = 0x7000;
+
+       /*
+        * Check if we are running on newer firmware that exports
+        * OPAL_HANDLE_HMI token. If yes, then don't ask OPAL to patch
+        * the HMI interrupt and we catch it directly in Linux.
+        *
+        * For older firmware (i.e currently released POWER8 System Firmware
+        * as of today <= SV810_087), we fallback to old behavior and let OPAL
+        * patch the HMI vector and handle it inside OPAL firmware.
+        *
+        * For newer firmware (in development/yet to be released) we will
+        * start catching/handling HMI directly in Linux.
+        */
+       if (!opal_check_token(OPAL_HANDLE_HMI)) {
+               pr_info("opal: Old firmware detected, OPAL handles HMIs.\n");
+               opal_register_exception_handler(
+                               OPAL_HYPERVISOR_MAINTENANCE_HANDLER,
+                               0, glue);
+               glue += 128;
+       }
+
        opal_register_exception_handler(OPAL_SOFTPATCH_HANDLER, 0, glue);
 #endif
 
index b3ca77d..b2187d0 100644 (file)
@@ -505,7 +505,7 @@ static bool pnv_pci_cfg_check(struct pci_controller *hose,
        edev = of_node_to_eeh_dev(dn);
        if (edev) {
                if (edev->pe &&
-                   (edev->pe->state & EEH_PE_RESET))
+                   (edev->pe->state & EEH_PE_CFG_BLOCKED))
                        return false;
 
                if (edev->mode & EEH_DEV_REMOVED)
index fdf01b6..6ad83bd 100644 (file)
 #include <asm/rtas.h>
 
 struct cc_workarea {
-       u32     drc_index;
-       u32     zero;
-       u32     name_offset;
-       u32     prop_length;
-       u32     prop_offset;
+       __be32  drc_index;
+       __be32  zero;
+       __be32  name_offset;
+       __be32  prop_length;
+       __be32  prop_offset;
 };
 
 void dlpar_free_cc_property(struct property *prop)
@@ -49,11 +49,11 @@ static struct property *dlpar_parse_cc_property(struct cc_workarea *ccwa)
        if (!prop)
                return NULL;
 
-       name = (char *)ccwa + ccwa->name_offset;
+       name = (char *)ccwa + be32_to_cpu(ccwa->name_offset);
        prop->name = kstrdup(name, GFP_KERNEL);
 
-       prop->length = ccwa->prop_length;
-       value = (char *)ccwa + ccwa->prop_offset;
+       prop->length = be32_to_cpu(ccwa->prop_length);
+       value = (char *)ccwa + be32_to_cpu(ccwa->prop_offset);
        prop->value = kmemdup(value, prop->length, GFP_KERNEL);
        if (!prop->value) {
                dlpar_free_cc_property(prop);
@@ -79,7 +79,7 @@ static struct device_node *dlpar_parse_cc_node(struct cc_workarea *ccwa,
        if (!dn)
                return NULL;
 
-       name = (char *)ccwa + ccwa->name_offset;
+       name = (char *)ccwa + be32_to_cpu(ccwa->name_offset);
        dn->full_name = kasprintf(GFP_KERNEL, "%s/%s", path, name);
        if (!dn->full_name) {
                kfree(dn);
@@ -126,7 +126,7 @@ void dlpar_free_cc_nodes(struct device_node *dn)
 #define CALL_AGAIN     -2
 #define ERR_CFG_USE     -9003
 
-struct device_node *dlpar_configure_connector(u32 drc_index,
+struct device_node *dlpar_configure_connector(__be32 drc_index,
                                              struct device_node *parent)
 {
        struct device_node *dn;
@@ -414,7 +414,7 @@ static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
        if (!parent)
                return -ENODEV;
 
-       dn = dlpar_configure_connector(drc_index, parent);
+       dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent);
        if (!dn)
                return -EINVAL;
 
index b174fa7..5c375f9 100644 (file)
@@ -247,7 +247,7 @@ static int pseries_add_processor(struct device_node *np)
        unsigned int cpu;
        cpumask_var_t candidate_mask, tmp;
        int err = -ENOSPC, len, nthreads, i;
-       const u32 *intserv;
+       const __be32 *intserv;
 
        intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", &len);
        if (!intserv)
@@ -293,7 +293,7 @@ static int pseries_add_processor(struct device_node *np)
        for_each_cpu(cpu, tmp) {
                BUG_ON(cpu_present(cpu));
                set_cpu_present(cpu, true);
-               set_hard_smp_processor_id(cpu, *intserv++);
+               set_hard_smp_processor_id(cpu, be32_to_cpu(*intserv++));
        }
        err = 0;
 out_unlock:
index de1ec54..e32e009 100644 (file)
@@ -30,7 +30,6 @@
 #include <linux/mm.h>
 #include <linux/memblock.h>
 #include <linux/spinlock.h>
-#include <linux/sched.h>       /* for show_stack */
 #include <linux/string.h>
 #include <linux/pci.h>
 #include <linux/dma-mapping.h>
@@ -168,7 +167,7 @@ static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
                        printk("\tindex   = 0x%llx\n", (u64)tbl->it_index);
                        printk("\ttcenum  = 0x%llx\n", (u64)tcenum);
                        printk("\ttce val = 0x%llx\n", tce );
-                       show_stack(current, (unsigned long *)__get_SP());
+                       dump_stack();
                }
 
                tcenum++;
@@ -257,7 +256,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
                printk("\tindex   = 0x%llx\n", (u64)tbl->it_index);
                printk("\tnpages  = 0x%llx\n", (u64)npages);
                printk("\ttce[0] val = 0x%llx\n", tcep[0]);
-               show_stack(current, (unsigned long *)__get_SP());
+               dump_stack();
        }
        return ret;
 }
@@ -273,7 +272,7 @@ static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages
                        printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc);
                        printk("\tindex   = 0x%llx\n", (u64)tbl->it_index);
                        printk("\ttcenum  = 0x%llx\n", (u64)tcenum);
-                       show_stack(current, (unsigned long *)__get_SP());
+                       dump_stack();
                }
 
                tcenum++;
@@ -292,7 +291,7 @@ static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long n
                printk("\trc      = %lld\n", rc);
                printk("\tindex   = 0x%llx\n", (u64)tbl->it_index);
                printk("\tnpages  = 0x%llx\n", (u64)npages);
-               show_stack(current, (unsigned long *)__get_SP());
+               dump_stack();
        }
 }
 
@@ -307,7 +306,7 @@ static unsigned long tce_get_pSeriesLP(struct iommu_table *tbl, long tcenum)
                printk("tce_get_pSeriesLP: plpar_tce_get failed. rc=%lld\n", rc);
                printk("\tindex   = 0x%llx\n", (u64)tbl->it_index);
                printk("\ttcenum  = 0x%llx\n", (u64)tcenum);
-               show_stack(current, (unsigned long *)__get_SP());
+               dump_stack();
        }
 
        return tce_ret;
index 361add6..1796c54 100644 (file)
@@ -56,7 +56,8 @@ extern void hvc_vio_init_early(void);
 /* Dynamic logical Partitioning/Mobility */
 extern void dlpar_free_cc_nodes(struct device_node *);
 extern void dlpar_free_cc_property(struct property *);
-extern struct device_node *dlpar_configure_connector(u32, struct device_node *);
+extern struct device_node *dlpar_configure_connector(__be32,
+                                               struct device_node *);
 extern int dlpar_attach_node(struct device_node *);
 extern int dlpar_detach_node(struct device_node *);
 
index 0c75214..73b64c7 100644 (file)
@@ -145,59 +145,64 @@ void msi_bitmap_free(struct msi_bitmap *bmp)
 
 #ifdef CONFIG_MSI_BITMAP_SELFTEST
 
-#define check(x)       \
-       if (!(x)) printk("msi_bitmap: test failed at line %d\n", __LINE__);
-
 static void __init test_basics(void)
 {
        struct msi_bitmap bmp;
-       int i, size = 512;
+       int rc, i, size = 512;
 
        /* Can't allocate a bitmap of 0 irqs */
-       check(msi_bitmap_alloc(&bmp, 0, NULL) != 0);
+       WARN_ON(msi_bitmap_alloc(&bmp, 0, NULL) == 0);
 
        /* of_node may be NULL */
-       check(0 == msi_bitmap_alloc(&bmp, size, NULL));
+       WARN_ON(msi_bitmap_alloc(&bmp, size, NULL));
 
        /* Should all be free by default */
-       check(0 == bitmap_find_free_region(bmp.bitmap, size,
-                                          get_count_order(size)));
+       WARN_ON(bitmap_find_free_region(bmp.bitmap, size, get_count_order(size)));
        bitmap_release_region(bmp.bitmap, 0, get_count_order(size));
 
        /* With no node, there's no msi-available-ranges, so expect > 0 */
-       check(msi_bitmap_reserve_dt_hwirqs(&bmp) > 0);
+       WARN_ON(msi_bitmap_reserve_dt_hwirqs(&bmp) <= 0);
 
        /* Should all still be free */
-       check(0 == bitmap_find_free_region(bmp.bitmap, size,
-                                          get_count_order(size)));
+       WARN_ON(bitmap_find_free_region(bmp.bitmap, size, get_count_order(size)));
        bitmap_release_region(bmp.bitmap, 0, get_count_order(size));
 
        /* Check we can fill it up and then no more */
        for (i = 0; i < size; i++)
-               check(msi_bitmap_alloc_hwirqs(&bmp, 1) >= 0);
+               WARN_ON(msi_bitmap_alloc_hwirqs(&bmp, 1) < 0);
 
-       check(msi_bitmap_alloc_hwirqs(&bmp, 1) < 0);
+       WARN_ON(msi_bitmap_alloc_hwirqs(&bmp, 1) >= 0);
 
        /* Should all be allocated */
-       check(bitmap_find_free_region(bmp.bitmap, size, 0) < 0);
+       WARN_ON(bitmap_find_free_region(bmp.bitmap, size, 0) >= 0);
 
        /* And if we free one we can then allocate another */
        msi_bitmap_free_hwirqs(&bmp, size / 2, 1);
-       check(msi_bitmap_alloc_hwirqs(&bmp, 1) == size / 2);
+       WARN_ON(msi_bitmap_alloc_hwirqs(&bmp, 1) != size / 2);
+
+       /* Free most of them for the alignment tests */
+       msi_bitmap_free_hwirqs(&bmp, 3, size - 3);
 
        /* Check we get a naturally aligned offset */
-       check(msi_bitmap_alloc_hwirqs(&bmp, 2) % 2 == 0);
-       check(msi_bitmap_alloc_hwirqs(&bmp, 4) % 4 == 0);
-       check(msi_bitmap_alloc_hwirqs(&bmp, 8) % 8 == 0);
-       check(msi_bitmap_alloc_hwirqs(&bmp, 9) % 16 == 0);
-       check(msi_bitmap_alloc_hwirqs(&bmp, 3) % 4 == 0);
-       check(msi_bitmap_alloc_hwirqs(&bmp, 7) % 8 == 0);
-       check(msi_bitmap_alloc_hwirqs(&bmp, 121) % 128 == 0);
+       rc = msi_bitmap_alloc_hwirqs(&bmp, 2);
+       WARN_ON(rc < 0 && rc % 2 != 0);
+       rc = msi_bitmap_alloc_hwirqs(&bmp, 4);
+       WARN_ON(rc < 0 && rc % 4 != 0);
+       rc = msi_bitmap_alloc_hwirqs(&bmp, 8);
+       WARN_ON(rc < 0 && rc % 8 != 0);
+       rc = msi_bitmap_alloc_hwirqs(&bmp, 9);
+       WARN_ON(rc < 0 && rc % 16 != 0);
+       rc = msi_bitmap_alloc_hwirqs(&bmp, 3);
+       WARN_ON(rc < 0 && rc % 4 != 0);
+       rc = msi_bitmap_alloc_hwirqs(&bmp, 7);
+       WARN_ON(rc < 0 && rc % 8 != 0);
+       rc = msi_bitmap_alloc_hwirqs(&bmp, 121);
+       WARN_ON(rc < 0 && rc % 128 != 0);
 
        msi_bitmap_free(&bmp);
 
-       /* Clients may check bitmap == NULL for "not-allocated" */
-       check(bmp.bitmap == NULL);
+       /* Clients may WARN_ON bitmap == NULL for "not-allocated" */
+       WARN_ON(bmp.bitmap != NULL);
 
        kfree(bmp.bitmap);
 }
@@ -219,14 +224,13 @@ static void __init test_of_node(void)
        of_node_init(&of_node);
        of_node.full_name = node_name;
 
-       check(0 == msi_bitmap_alloc(&bmp, size, &of_node));
+       WARN_ON(msi_bitmap_alloc(&bmp, size, &of_node));
 
        /* No msi-available-ranges, so expect > 0 */
-       check(msi_bitmap_reserve_dt_hwirqs(&bmp) > 0);
+       WARN_ON(msi_bitmap_reserve_dt_hwirqs(&bmp) <= 0);
 
        /* Should all still be free */
-       check(0 == bitmap_find_free_region(bmp.bitmap, size,
-                                          get_count_order(size)));
+       WARN_ON(bitmap_find_free_region(bmp.bitmap, size, get_count_order(size)));
        bitmap_release_region(bmp.bitmap, 0, get_count_order(size));
 
        /* Now create a fake msi-available-ranges property */
@@ -240,11 +244,11 @@ static void __init test_of_node(void)
        of_node.properties = &prop;
 
        /* msi-available-ranges, so expect == 0 */
-       check(msi_bitmap_reserve_dt_hwirqs(&bmp) == 0);
+       WARN_ON(msi_bitmap_reserve_dt_hwirqs(&bmp));
 
        /* Check we got the expected result */
-       check(0 == bitmap_parselist(expected_str, expected, size));
-       check(bitmap_equal(expected, bmp.bitmap, size));
+       WARN_ON(bitmap_parselist(expected_str, expected, size));
+       WARN_ON(!bitmap_equal(expected, bmp.bitmap, size));
 
        msi_bitmap_free(&bmp);
        kfree(bmp.bitmap);