Merge branch 'smp-hotplug-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 4 Oct 2016 02:43:08 +0000 (19:43 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 4 Oct 2016 02:43:08 +0000 (19:43 -0700)
Pull CPU hotplug updates from Thomas Gleixner:
 "Yet another batch of cpu hotplug core updates and conversions:

   - Provide core infrastructure for multi instance drivers so the
     drivers do not have to keep custom lists.

   - Convert custom lists to the new infrastructure. The block-mq custom
     list conversion comes through the block tree and makes the diffstat
     tip over to more lines removed than added.

   - Handle unbalanced hotplug enable/disable calls more gracefully.

   - Remove the obsolete CPU_STARTING/DYING notifier support.

   - Convert another batch of notifier users.

   The relayfs changes which conflicted with the conversion have been
   shipped to me by Andrew.

   The remaining lot is targeted for 4.10 so that we finally can remove
   the rest of the notifiers"

* 'smp-hotplug-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (46 commits)
  cpufreq: Fix up conversion to hotplug state machine
  blk/mq: Reserve hotplug states for block multiqueue
  x86/apic/uv: Convert to hotplug state machine
  s390/mm/pfault: Convert to hotplug state machine
  mips/loongson/smp: Convert to hotplug state machine
  mips/octeon/smp: Convert to hotplug state machine
  fault-injection/cpu: Convert to hotplug state machine
  padata: Convert to hotplug state machine
  cpufreq: Convert to hotplug state machine
  ACPI/processor: Convert to hotplug state machine
  virtio scsi: Convert to hotplug state machine
  oprofile/timer: Convert to hotplug state machine
  block/softirq: Convert to hotplug state machine
  lib/irq_poll: Convert to hotplug state machine
  x86/microcode: Convert to hotplug state machine
  sh/SH-X3 SMP: Convert to hotplug state machine
  ia64/mca: Convert to hotplug state machine
  ARM/OMAP/wakeupgen: Convert to hotplug state machine
  ARM/shmobile: Convert to hotplug state machine
  arm64/FP/SIMD: Convert to hotplug state machine
  ...

15 files changed:
1  2 
arch/ia64/kernel/mca.c
arch/x86/kernel/apic/x2apic_uv_x.c
arch/x86/kernel/kvm.c
arch/x86/kernel/smpboot.c
drivers/acpi/processor_driver.c
drivers/bus/arm-cci.c
drivers/bus/arm-ccn.c
drivers/cpufreq/cpufreq.c
drivers/md/raid5.c
drivers/perf/arm_pmu.c
include/linux/cpu.h
include/linux/cpuhotplug.h
include/linux/perf/arm_pmu.h
kernel/cpu.c
kernel/softirq.c

diff --combined arch/ia64/kernel/mca.c
@@@ -986,7 -986,7 +986,7 @@@ ia64_mca_modify_original_stack(struct p
        int cpu = smp_processor_id();
  
        previous_current = curr_task(cpu);
 -      set_curr_task(cpu, current);
 +      ia64_set_curr_task(cpu, current);
        if ((p = strchr(current->comm, ' ')))
                *p = '\0';
  
@@@ -1360,14 -1360,14 +1360,14 @@@ ia64_mca_handler(struct pt_regs *regs, 
                                cpumask_clear_cpu(i, &mca_cpu); /* wake next cpu */
                                while (monarch_cpu != -1)
                                        cpu_relax();    /* spin until last cpu leaves */
 -                              set_curr_task(cpu, previous_current);
 +                              ia64_set_curr_task(cpu, previous_current);
                                ia64_mc_info.imi_rendez_checkin[cpu]
                                                = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
                                return;
                        }
                }
        }
 -      set_curr_task(cpu, previous_current);
 +      ia64_set_curr_task(cpu, previous_current);
        ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
        monarch_cpu = -1;       /* This frees the slaves and previous monarchs */
  }
@@@ -1729,7 -1729,7 +1729,7 @@@ ia64_init_handler(struct pt_regs *regs
                NOTIFY_INIT(DIE_INIT_SLAVE_LEAVE, regs, (long)&nd, 1);
  
                mprintk("Slave on cpu %d returning to normal service.\n", cpu);
 -              set_curr_task(cpu, previous_current);
 +              ia64_set_curr_task(cpu, previous_current);
                ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
                atomic_dec(&slaves);
                return;
  
        mprintk("\nINIT dump complete.  Monarch on cpu %d returning to normal service.\n", cpu);
        atomic_dec(&monarchs);
 -      set_curr_task(cpu, previous_current);
 +      ia64_set_curr_task(cpu, previous_current);
        monarch_cpu = -1;
        return;
  }
@@@ -1890,7 -1890,7 +1890,7 @@@ ia64_mca_cpu_init(void *cpu_data
                                                              PAGE_KERNEL)));
  }
  
- static void ia64_mca_cmc_vector_adjust(void *dummy)
+ static int ia64_mca_cpu_online(unsigned int cpu)
  {
        unsigned long flags;
  
        if (!cmc_polling_enabled)
                ia64_mca_cmc_vector_enable(NULL);
        local_irq_restore(flags);
+       return 0;
  }
  
- static int mca_cpu_callback(struct notifier_block *nfb,
-                                     unsigned long action,
-                                     void *hcpu)
- {
-       switch (action) {
-       case CPU_ONLINE:
-       case CPU_ONLINE_FROZEN:
-               ia64_mca_cmc_vector_adjust(NULL);
-               break;
-       }
-       return NOTIFY_OK;
- }
- static struct notifier_block mca_cpu_notifier = {
-       .notifier_call = mca_cpu_callback
- };
  /*
   * ia64_mca_init
   *
@@@ -2111,15 -2095,13 +2095,13 @@@ ia64_mca_late_init(void
        if (!mca_init)
                return 0;
  
-       register_hotcpu_notifier(&mca_cpu_notifier);
        /* Setup the CMCI/P vector and handler */
        setup_timer(&cmc_poll_timer, ia64_mca_cmc_poll, 0UL);
  
        /* Unmask/enable the vector */
        cmc_polling_enabled = 0;
-       schedule_work(&cmc_enable_work);
+       cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "ia64/mca:online",
+                         ia64_mca_cpu_online, NULL);
        IA64_MCA_DEBUG("%s: CMCI/P setup and enabled.\n", __func__);
  
  #ifdef CONFIG_ACPI
@@@ -533,8 -533,11 +533,8 @@@ static unsigned int x2apic_get_apic_id(
  
  static unsigned long set_apic_id(unsigned int id)
  {
 -      unsigned long x;
 -
 -      /* maskout x2apic_extra_bits ? */
 -      x = id;
 -      return x;
 +      /* CHECKME: Do we need to mask out the xapic extra bits? */
 +      return id;
  }
  
  static unsigned int uv_read_apic_id(void)
@@@ -557,7 -560,7 +557,7 @@@ static int uv_probe(void
        return apic == &apic_x2apic_uv_x;
  }
  
 -static struct apic __refdata apic_x2apic_uv_x = {
 +static struct apic apic_x2apic_uv_x __ro_after_init = {
  
        .name                           = "UV large system",
        .probe                          = uv_probe,
@@@ -924,7 -927,7 +924,7 @@@ static void uv_heartbeat(unsigned long 
        mod_timer(timer, jiffies + SCIR_CPU_HB_INTERVAL);
  }
  
- static void uv_heartbeat_enable(int cpu)
+ static int uv_heartbeat_enable(unsigned int cpu)
  {
        while (!uv_cpu_scir_info(cpu)->enabled) {
                struct timer_list *timer = &uv_cpu_scir_info(cpu)->timer;
                /* also ensure that boot cpu is enabled */
                cpu = 0;
        }
+       return 0;
  }
  
  #ifdef CONFIG_HOTPLUG_CPU
- static void uv_heartbeat_disable(int cpu)
+ static int uv_heartbeat_disable(unsigned int cpu)
  {
        if (uv_cpu_scir_info(cpu)->enabled) {
                uv_cpu_scir_info(cpu)->enabled = 0;
                del_timer(&uv_cpu_scir_info(cpu)->timer);
        }
        uv_set_cpu_scir_bits(cpu, 0xff);
- }
- /*
-  * cpu hotplug notifier
-  */
- static int uv_scir_cpu_notify(struct notifier_block *self, unsigned long action,
-                             void *hcpu)
- {
-       long cpu = (long)hcpu;
-       switch (action & ~CPU_TASKS_FROZEN) {
-       case CPU_DOWN_FAILED:
-       case CPU_ONLINE:
-               uv_heartbeat_enable(cpu);
-               break;
-       case CPU_DOWN_PREPARE:
-               uv_heartbeat_disable(cpu);
-               break;
-       default:
-               break;
-       }
-       return NOTIFY_OK;
+       return 0;
  }
  
  static __init void uv_scir_register_cpu_notifier(void)
  {
-       hotcpu_notifier(uv_scir_cpu_notify, 0);
+       cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/x2apic-uvx:online",
+                                 uv_heartbeat_enable, uv_heartbeat_disable);
  }
  
  #else /* !CONFIG_HOTPLUG_CPU */
diff --combined arch/x86/kernel/kvm.c
@@@ -423,12 -423,7 +423,7 @@@ static void __init kvm_smp_prepare_boot
        kvm_spinlock_init();
  }
  
- static void kvm_guest_cpu_online(void *dummy)
- {
-       kvm_guest_cpu_init();
- }
- static void kvm_guest_cpu_offline(void *dummy)
+ static void kvm_guest_cpu_offline(void)
  {
        kvm_disable_steal_time();
        if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
        apf_task_wake_all();
  }
  
- static int kvm_cpu_notify(struct notifier_block *self, unsigned long action,
-                         void *hcpu)
+ static int kvm_cpu_online(unsigned int cpu)
  {
-       int cpu = (unsigned long)hcpu;
-       switch (action) {
-       case CPU_ONLINE:
-       case CPU_DOWN_FAILED:
-       case CPU_ONLINE_FROZEN:
-               smp_call_function_single(cpu, kvm_guest_cpu_online, NULL, 0);
-               break;
-       case CPU_DOWN_PREPARE:
-       case CPU_DOWN_PREPARE_FROZEN:
-               smp_call_function_single(cpu, kvm_guest_cpu_offline, NULL, 1);
-               break;
-       default:
-               break;
-       }
-       return NOTIFY_OK;
+       local_irq_disable();
+       kvm_guest_cpu_init();
+       local_irq_enable();
+       return 0;
  }
  
- static struct notifier_block kvm_cpu_notifier = {
-         .notifier_call  = kvm_cpu_notify,
- };
+ static int kvm_cpu_down_prepare(unsigned int cpu)
+ {
+       local_irq_disable();
+       kvm_guest_cpu_offline();
+       local_irq_enable();
+       return 0;
+ }
  #endif
  
  static void __init kvm_apf_trap_init(void)
@@@ -494,7 -481,9 +481,9 @@@ void __init kvm_guest_init(void
  
  #ifdef CONFIG_SMP
        smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
-       register_cpu_notifier(&kvm_cpu_notifier);
+       if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/kvm:online",
+                                     kvm_cpu_online, kvm_cpu_down_prepare) < 0)
+               pr_err("kvm_guest: Failed to install cpu hotplug callbacks\n");
  #else
        kvm_guest_cpu_init();
  #endif
@@@ -575,6 -564,9 +564,6 @@@ static void kvm_kick_cpu(int cpu
        kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid);
  }
  
 -
 -#ifdef CONFIG_QUEUED_SPINLOCKS
 -
  #include <asm/qspinlock.h>
  
  static void kvm_wait(u8 *ptr, u8 val)
@@@ -603,6 -595,243 +592,6 @@@ out
        local_irq_restore(flags);
  }
  
 -#else /* !CONFIG_QUEUED_SPINLOCKS */
 -
 -enum kvm_contention_stat {
 -      TAKEN_SLOW,
 -      TAKEN_SLOW_PICKUP,
 -      RELEASED_SLOW,
 -      RELEASED_SLOW_KICKED,
 -      NR_CONTENTION_STATS
 -};
 -
 -#ifdef CONFIG_KVM_DEBUG_FS
 -#define HISTO_BUCKETS 30
 -
 -static struct kvm_spinlock_stats
 -{
 -      u32 contention_stats[NR_CONTENTION_STATS];
 -      u32 histo_spin_blocked[HISTO_BUCKETS+1];
 -      u64 time_blocked;
 -} spinlock_stats;
 -
 -static u8 zero_stats;
 -
 -static inline void check_zero(void)
 -{
 -      u8 ret;
 -      u8 old;
 -
 -      old = READ_ONCE(zero_stats);
 -      if (unlikely(old)) {
 -              ret = cmpxchg(&zero_stats, old, 0);
 -              /* This ensures only one fellow resets the stat */
 -              if (ret == old)
 -                      memset(&spinlock_stats, 0, sizeof(spinlock_stats));
 -      }
 -}
 -
 -static inline void add_stats(enum kvm_contention_stat var, u32 val)
 -{
 -      check_zero();
 -      spinlock_stats.contention_stats[var] += val;
 -}
 -
 -
 -static inline u64 spin_time_start(void)
 -{
 -      return sched_clock();
 -}
 -
 -static void __spin_time_accum(u64 delta, u32 *array)
 -{
 -      unsigned index;
 -
 -      index = ilog2(delta);
 -      check_zero();
 -
 -      if (index < HISTO_BUCKETS)
 -              array[index]++;
 -      else
 -              array[HISTO_BUCKETS]++;
 -}
 -
 -static inline void spin_time_accum_blocked(u64 start)
 -{
 -      u32 delta;
 -
 -      delta = sched_clock() - start;
 -      __spin_time_accum(delta, spinlock_stats.histo_spin_blocked);
 -      spinlock_stats.time_blocked += delta;
 -}
 -
 -static struct dentry *d_spin_debug;
 -static struct dentry *d_kvm_debug;
 -
 -static struct dentry *kvm_init_debugfs(void)
 -{
 -      d_kvm_debug = debugfs_create_dir("kvm-guest", NULL);
 -      if (!d_kvm_debug)
 -              printk(KERN_WARNING "Could not create 'kvm' debugfs directory\n");
 -
 -      return d_kvm_debug;
 -}
 -
 -static int __init kvm_spinlock_debugfs(void)
 -{
 -      struct dentry *d_kvm;
 -
 -      d_kvm = kvm_init_debugfs();
 -      if (d_kvm == NULL)
 -              return -ENOMEM;
 -
 -      d_spin_debug = debugfs_create_dir("spinlocks", d_kvm);
 -
 -      debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats);
 -
 -      debugfs_create_u32("taken_slow", 0444, d_spin_debug,
 -                 &spinlock_stats.contention_stats[TAKEN_SLOW]);
 -      debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug,
 -                 &spinlock_stats.contention_stats[TAKEN_SLOW_PICKUP]);
 -
 -      debugfs_create_u32("released_slow", 0444, d_spin_debug,
 -                 &spinlock_stats.contention_stats[RELEASED_SLOW]);
 -      debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug,
 -                 &spinlock_stats.contention_stats[RELEASED_SLOW_KICKED]);
 -
 -      debugfs_create_u64("time_blocked", 0444, d_spin_debug,
 -                         &spinlock_stats.time_blocked);
 -
 -      debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug,
 -                   spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1);
 -
 -      return 0;
 -}
 -fs_initcall(kvm_spinlock_debugfs);
 -#else  /* !CONFIG_KVM_DEBUG_FS */
 -static inline void add_stats(enum kvm_contention_stat var, u32 val)
 -{
 -}
 -
 -static inline u64 spin_time_start(void)
 -{
 -      return 0;
 -}
 -
 -static inline void spin_time_accum_blocked(u64 start)
 -{
 -}
 -#endif  /* CONFIG_KVM_DEBUG_FS */
 -
 -struct kvm_lock_waiting {
 -      struct arch_spinlock *lock;
 -      __ticket_t want;
 -};
 -
 -/* cpus 'waiting' on a spinlock to become available */
 -static cpumask_t waiting_cpus;
 -
 -/* Track spinlock on which a cpu is waiting */
 -static DEFINE_PER_CPU(struct kvm_lock_waiting, klock_waiting);
 -
 -__visible void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
 -{
 -      struct kvm_lock_waiting *w;
 -      int cpu;
 -      u64 start;
 -      unsigned long flags;
 -      __ticket_t head;
 -
 -      if (in_nmi())
 -              return;
 -
 -      w = this_cpu_ptr(&klock_waiting);
 -      cpu = smp_processor_id();
 -      start = spin_time_start();
 -
 -      /*
 -       * Make sure an interrupt handler can't upset things in a
 -       * partially setup state.
 -       */
 -      local_irq_save(flags);
 -
 -      /*
 -       * The ordering protocol on this is that the "lock" pointer
 -       * may only be set non-NULL if the "want" ticket is correct.
 -       * If we're updating "want", we must first clear "lock".
 -       */
 -      w->lock = NULL;
 -      smp_wmb();
 -      w->want = want;
 -      smp_wmb();
 -      w->lock = lock;
 -
 -      add_stats(TAKEN_SLOW, 1);
 -
 -      /*
 -       * This uses set_bit, which is atomic but we should not rely on its
 -       * reordering gurantees. So barrier is needed after this call.
 -       */
 -      cpumask_set_cpu(cpu, &waiting_cpus);
 -
 -      barrier();
 -
 -      /*
 -       * Mark entry to slowpath before doing the pickup test to make
 -       * sure we don't deadlock with an unlocker.
 -       */
 -      __ticket_enter_slowpath(lock);
 -
 -      /* make sure enter_slowpath, which is atomic does not cross the read */
 -      smp_mb__after_atomic();
 -
 -      /*
 -       * check again make sure it didn't become free while
 -       * we weren't looking.
 -       */
 -      head = READ_ONCE(lock->tickets.head);
 -      if (__tickets_equal(head, want)) {
 -              add_stats(TAKEN_SLOW_PICKUP, 1);
 -              goto out;
 -      }
 -
 -      /*
 -       * halt until it's our turn and kicked. Note that we do safe halt
 -       * for irq enabled case to avoid hang when lock info is overwritten
 -       * in irq spinlock slowpath and no spurious interrupt occur to save us.
 -       */
 -      if (arch_irqs_disabled_flags(flags))
 -              halt();
 -      else
 -              safe_halt();
 -
 -out:
 -      cpumask_clear_cpu(cpu, &waiting_cpus);
 -      w->lock = NULL;
 -      local_irq_restore(flags);
 -      spin_time_accum_blocked(start);
 -}
 -PV_CALLEE_SAVE_REGS_THUNK(kvm_lock_spinning);
 -
 -/* Kick vcpu waiting on @lock->head to reach value @ticket */
 -static void kvm_unlock_kick(struct arch_spinlock *lock, __ticket_t ticket)
 -{
 -      int cpu;
 -
 -      add_stats(RELEASED_SLOW, 1);
 -      for_each_cpu(cpu, &waiting_cpus) {
 -              const struct kvm_lock_waiting *w = &per_cpu(klock_waiting, cpu);
 -              if (READ_ONCE(w->lock) == lock &&
 -                  READ_ONCE(w->want) == ticket) {
 -                      add_stats(RELEASED_SLOW_KICKED, 1);
 -                      kvm_kick_cpu(cpu);
 -                      break;
 -              }
 -      }
 -}
 -
 -#endif /* !CONFIG_QUEUED_SPINLOCKS */
 -
  /*
   * Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present.
   */
@@@ -614,11 -843,16 +603,11 @@@ void __init kvm_spinlock_init(void
        if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
                return;
  
 -#ifdef CONFIG_QUEUED_SPINLOCKS
        __pv_init_lock_hash();
        pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
        pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock);
        pv_lock_ops.wait = kvm_wait;
        pv_lock_ops.kick = kvm_kick_cpu;
 -#else /* !CONFIG_QUEUED_SPINLOCKS */
 -      pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning);
 -      pv_lock_ops.unlock_kick = kvm_unlock_kick;
 -#endif
  }
  
  static __init int kvm_spinlock_init_jump(void)
@@@ -471,7 -471,7 +471,7 @@@ static bool match_die(struct cpuinfo_x8
        return false;
  }
  
 -static struct sched_domain_topology_level numa_inside_package_topology[] = {
 +static struct sched_domain_topology_level x86_numa_in_package_topology[] = {
  #ifdef CONFIG_SCHED_SMT
        { cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
  #endif
  #endif
        { NULL, },
  };
 +
 +static struct sched_domain_topology_level x86_topology[] = {
 +#ifdef CONFIG_SCHED_SMT
 +      { cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
 +#endif
 +#ifdef CONFIG_SCHED_MC
 +      { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
 +#endif
 +      { cpu_cpu_mask, SD_INIT_NAME(DIE) },
 +      { NULL, },
 +};
 +
  /*
 - * set_sched_topology() sets the topology internal to a CPU.  The
 - * NUMA topologies are layered on top of it to build the full
 - * system topology.
 - *
 - * If NUMA nodes are observed to occur within a CPU package, this
 - * function should be called.  It forces the sched domain code to
 - * only use the SMT level for the CPU portion of the topology.
 - * This essentially falls back to relying on NUMA information
 - * from the SRAT table to describe the entire system topology
 - * (except for hyperthreads).
 + * Set if a package/die has multiple NUMA nodes inside.
 + * AMD Magny-Cours and Intel Cluster-on-Die have this.
   */
 -static void primarily_use_numa_for_topology(void)
 -{
 -      set_sched_topology(numa_inside_package_topology);
 -}
 +static bool x86_has_numa_in_package;
  
  void set_cpu_sibling_map(int cpu)
  {
                                c->booted_cores = cpu_data(i).booted_cores;
                }
                if (match_die(c, o) && !topology_same_node(c, o))
 -                      primarily_use_numa_for_topology();
 +                      x86_has_numa_in_package = true;
        }
  
        threads = cpumask_weight(topology_sibling_cpumask(cpu));
@@@ -691,7 -690,7 +691,7 @@@ wakeup_secondary_cpu_via_nmi(int apicid
         * Give the other CPU some time to accept the IPI.
         */
        udelay(200);
 -      if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
 +      if (APIC_INTEGRATED(boot_cpu_apic_version)) {
                maxlvt = lapic_get_maxlvt();
                if (maxlvt > 3)                 /* Due to the Pentium erratum 3AP.  */
                        apic_write(APIC_ESR, 0);
@@@ -718,7 -717,7 +718,7 @@@ wakeup_secondary_cpu_via_init(int phys_
        /*
         * Be paranoid about clearing APIC errors.
         */
 -      if (APIC_INTEGRATED(apic_version[phys_apicid])) {
 +      if (APIC_INTEGRATED(boot_cpu_apic_version)) {
                if (maxlvt > 3)         /* Due to the Pentium erratum 3AP.  */
                        apic_write(APIC_ESR, 0);
                apic_read(APIC_ESR);
         * Determine this based on the APIC version.
         * If we don't have an integrated APIC, don't send the STARTUP IPIs.
         */
 -      if (APIC_INTEGRATED(apic_version[phys_apicid]))
 +      if (APIC_INTEGRATED(boot_cpu_apic_version))
                num_starts = 2;
        else
                num_starts = 0;
@@@ -943,6 -942,7 +943,6 @@@ void common_cpu_up(unsigned int cpu, st
        per_cpu(cpu_current_top_of_stack, cpu) =
                (unsigned long)task_stack_page(idle) + THREAD_SIZE;
  #else
 -      clear_tsk_thread_flag(idle, TIF_FORK);
        initial_gs = per_cpu_offset(cpu);
  #endif
  }
@@@ -969,7 -969,7 +969,7 @@@ static int do_boot_cpu(int apicid, int 
  
        early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
        initial_code = (unsigned long)start_secondary;
 -      stack_start  = idle->thread.sp;
 +      initial_stack  = idle->thread.sp;
  
        /*
         * Enable the espfix hack for this CPU
                /*
                 * Be paranoid about clearing APIC errors.
                */
 -              if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
 +              if (APIC_INTEGRATED(boot_cpu_apic_version)) {
                        apic_write(APIC_ESR, 0);
                        apic_read(APIC_ESR);
                }
@@@ -1115,17 -1115,8 +1115,8 @@@ int native_cpu_up(unsigned int cpu, str
  
        common_cpu_up(cpu, tidle);
  
-       /*
-        * We have to walk the irq descriptors to setup the vector
-        * space for the cpu which comes online.  Prevent irq
-        * alloc/free across the bringup.
-        */
-       irq_lock_sparse();
        err = do_boot_cpu(apicid, cpu, tidle);
        if (err) {
-               irq_unlock_sparse();
                pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu);
                return -EIO;
        }
                touch_nmi_watchdog();
        }
  
-       irq_unlock_sparse();
        return 0;
  }
  
@@@ -1249,7 -1238,7 +1238,7 @@@ static int __init smp_sanity_check(unsi
        /*
         * If we couldn't find a local APIC, then get out of here now!
         */
 -      if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) &&
 +      if (APIC_INTEGRATED(boot_cpu_apic_version) &&
            !boot_cpu_has(X86_FEATURE_APIC)) {
                if (!disable_apic) {
                        pr_err("BIOS bug, local APIC #%d not detected!...\n",
@@@ -1304,16 -1293,6 +1293,16 @@@ void __init native_smp_prepare_cpus(uns
                zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
                zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
        }
 +
 +      /*
 +       * Set 'default' x86 topology, this matches default_topology() in that
 +       * it has NUMA nodes as a topology level. See also
 +       * native_smp_cpus_done().
 +       *
 +       * Must be done before set_cpus_sibling_map() is ran.
 +       */
 +      set_sched_topology(x86_topology);
 +
        set_cpu_sibling_map(0);
  
        switch (smp_sanity_check(max_cpus)) {
                break;
        }
  
 -      default_setup_apic_routing();
 -
        if (read_apic_id() != boot_cpu_physical_apicid) {
                panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
                     read_apic_id(), boot_cpu_physical_apicid);
                /* Or can we switch back to PIC here? */
        }
  
 +      default_setup_apic_routing();
        cpu0_logical_apicid = apic_bsp_setup(false);
  
        pr_info("CPU%d: ", 0);
@@@ -1379,9 -1359,6 +1368,9 @@@ void __init native_smp_cpus_done(unsign
  {
        pr_debug("Boot done\n");
  
 +      if (x86_has_numa_in_package)
 +              set_sched_topology(x86_numa_in_package_topology);
 +
        nmi_selftest();
        impress_friends();
        setup_ioapic_dest();
@@@ -110,55 -110,46 +110,46 @@@ static void acpi_processor_notify(acpi_
  
  static int __acpi_processor_start(struct acpi_device *device);
  
- static int acpi_cpu_soft_notify(struct notifier_block *nfb,
-                                         unsigned long action, void *hcpu)
+ static int acpi_soft_cpu_online(unsigned int cpu)
  {
-       unsigned int cpu = (unsigned long)hcpu;
        struct acpi_processor *pr = per_cpu(processors, cpu);
        struct acpi_device *device;
-       action &= ~CPU_TASKS_FROZEN;
-       switch (action) {
-       case CPU_ONLINE:
-       case CPU_DEAD:
-               break;
-       default:
-               return NOTIFY_DONE;
-       }
  
        if (!pr || acpi_bus_get_device(pr->handle, &device))
-               return NOTIFY_DONE;
-       if (action == CPU_ONLINE) {
-               /*
-                * CPU got physically hotplugged and onlined for the first time:
-                * Initialize missing things.
-                */
-               if (pr->flags.need_hotplug_init) {
-                       int ret;
-                       pr_info("Will online and init hotplugged CPU: %d\n",
-                               pr->id);
-                       pr->flags.need_hotplug_init = 0;
-                       ret = __acpi_processor_start(device);
-                       WARN(ret, "Failed to start CPU: %d\n", pr->id);
-               } else {
-                       /* Normal CPU soft online event. */
-                       acpi_processor_ppc_has_changed(pr, 0);
-                       acpi_processor_hotplug(pr);
-                       acpi_processor_reevaluate_tstate(pr, action);
-                       acpi_processor_tstate_has_changed(pr);
-               }
-       } else if (action == CPU_DEAD) {
-               /* Invalidate flag.throttling after the CPU is offline. */
-               acpi_processor_reevaluate_tstate(pr, action);
+               return 0;
+       /*
+        * CPU got physically hotplugged and onlined for the first time:
+        * Initialize missing things.
+        */
+       if (pr->flags.need_hotplug_init) {
+               int ret;
+               pr_info("Will online and init hotplugged CPU: %d\n",
+                       pr->id);
+               pr->flags.need_hotplug_init = 0;
+               ret = __acpi_processor_start(device);
+               WARN(ret, "Failed to start CPU: %d\n", pr->id);
+       } else {
+               /* Normal CPU soft online event. */
+               acpi_processor_ppc_has_changed(pr, 0);
+               acpi_processor_hotplug(pr);
+               acpi_processor_reevaluate_tstate(pr, false);
+               acpi_processor_tstate_has_changed(pr);
        }
-       return NOTIFY_OK;
+       return 0;
  }
  
- static struct notifier_block acpi_cpu_notifier = {
-           .notifier_call = acpi_cpu_soft_notify,
- };
+ static int acpi_soft_cpu_dead(unsigned int cpu)
+ {
+       struct acpi_processor *pr = per_cpu(processors, cpu);
+       struct acpi_device *device;
+       if (!pr || acpi_bus_get_device(pr->handle, &device))
+               return 0;
+       acpi_processor_reevaluate_tstate(pr, true);
+       return 0;
+ }
  
  #ifdef CONFIG_ACPI_CPU_FREQ_PSS
  static int acpi_pss_perf_init(struct acpi_processor *pr,
@@@ -245,8 -236,8 +236,8 @@@ static int __acpi_processor_start(struc
                return 0;
  
        result = acpi_cppc_processor_probe(pr);
 -      if (result)
 -              return -ENODEV;
 +      if (result && !IS_ENABLED(CONFIG_ACPI_CPU_FREQ_PSS))
 +              dev_warn(&device->dev, "CPPC data invalid or not present\n");
  
        if (!cpuidle_get_driver() || cpuidle_get_driver() == &acpi_idle_driver)
                acpi_processor_power_init(pr);
@@@ -303,7 -294,7 +294,7 @@@ static int acpi_processor_stop(struct d
   * This is needed for the powernow-k8 driver, that works even without
   * ACPI, but needs symbols from this driver
   */
+ static enum cpuhp_state hp_online;
  static int __init acpi_processor_driver_init(void)
  {
        int result = 0;
        if (result < 0)
                return result;
  
-       register_hotcpu_notifier(&acpi_cpu_notifier);
+       result = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
+                                          "acpi/cpu-drv:online",
+                                          acpi_soft_cpu_online, NULL);
+       if (result < 0)
+               goto err;
+       hp_online = result;
+       cpuhp_setup_state_nocalls(CPUHP_ACPI_CPUDRV_DEAD, "acpi/cpu-drv:dead",
+                                 NULL, acpi_soft_cpu_dead);
        acpi_thermal_cpufreq_init();
        acpi_processor_ppc_init();
        acpi_processor_throttling_init();
        return 0;
+ err:
+       driver_unregister(&acpi_processor_driver);
+       return result;
  }
  
  static void __exit acpi_processor_driver_exit(void)
  
        acpi_processor_ppc_exit();
        acpi_thermal_cpufreq_exit();
-       unregister_hotcpu_notifier(&acpi_cpu_notifier);
+       cpuhp_remove_state_nocalls(hp_online);
+       cpuhp_remove_state_nocalls(CPUHP_ACPI_CPUDRV_DEAD);
        driver_unregister(&acpi_processor_driver);
  }
  
diff --combined drivers/bus/arm-cci.c
@@@ -144,15 -144,12 +144,12 @@@ struct cci_pmu 
        int num_cntrs;
        atomic_t active_events;
        struct mutex reserve_mutex;
-       struct list_head entry;
+       struct hlist_node node;
        cpumask_t cpus;
  };
  
  #define to_cci_pmu(c) (container_of(c, struct cci_pmu, pmu))
  
- static DEFINE_MUTEX(cci_pmu_mutex);
- static LIST_HEAD(cci_pmu_list);
  enum cci_models {
  #ifdef CONFIG_ARM_CCI400_PMU
        CCI400_R0,
@@@ -551,7 -548,7 +548,7 @@@ static struct attribute *cci5xx_pmu_eve
        CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_wrq, 0xB),
        CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_cd_hs, 0xC),
        CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_rq_stall_addr_hazard, 0xD),
 -      CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snopp_rq_stall_tt_full, 0xE),
 +      CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_rq_stall_tt_full, 0xE),
        CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_rq_tzmp1_prot, 0xF),
        NULL
  };
@@@ -1506,25 -1503,21 +1503,21 @@@ static int cci_pmu_init(struct cci_pmu 
        return perf_pmu_register(&cci_pmu->pmu, name, -1);
  }
  
- static int cci_pmu_offline_cpu(unsigned int cpu)
+ static int cci_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
  {
-       struct cci_pmu *cci_pmu;
+       struct cci_pmu *cci_pmu = hlist_entry_safe(node, struct cci_pmu, node);
        unsigned int target;
  
-       mutex_lock(&cci_pmu_mutex);
-       list_for_each_entry(cci_pmu, &cci_pmu_list, entry) {
-               if (!cpumask_test_and_clear_cpu(cpu, &cci_pmu->cpus))
-                       continue;
-               target = cpumask_any_but(cpu_online_mask, cpu);
-               if (target >= nr_cpu_ids)
-                       continue;
-               /*
-                * TODO: migrate context once core races on event->ctx have
-                * been fixed.
-                */
-               cpumask_set_cpu(target, &cci_pmu->cpus);
-       }
-       mutex_unlock(&cci_pmu_mutex);
+       if (!cpumask_test_and_clear_cpu(cpu, &cci_pmu->cpus))
+               return 0;
+       target = cpumask_any_but(cpu_online_mask, cpu);
+       if (target >= nr_cpu_ids)
+               return 0;
+       /*
+        * TODO: migrate context once core races on event->ctx have
+        * been fixed.
+        */
+       cpumask_set_cpu(target, &cci_pmu->cpus);
        return 0;
  }
  
@@@ -1768,10 -1761,8 +1761,8 @@@ static int cci_pmu_probe(struct platfor
        if (ret)
                return ret;
  
-       mutex_lock(&cci_pmu_mutex);
-       list_add(&cci_pmu->entry, &cci_pmu_list);
-       mutex_unlock(&cci_pmu_mutex);
+       cpuhp_state_add_instance_nocalls(CPUHP_AP_PERF_ARM_CCI_ONLINE,
+                                        &cci_pmu->node);
        pr_info("ARM %s PMU driver probed", cci_pmu->model->name);
        return 0;
  }
@@@ -1804,9 -1795,9 +1795,9 @@@ static int __init cci_platform_init(voi
  {
        int ret;
  
-       ret = cpuhp_setup_state_nocalls(CPUHP_AP_PERF_ARM_CCI_ONLINE,
-                                       "AP_PERF_ARM_CCI_ONLINE", NULL,
-                                       cci_pmu_offline_cpu);
+       ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_CCI_ONLINE,
+                                     "AP_PERF_ARM_CCI_ONLINE", NULL,
+                                     cci_pmu_offline_cpu);
        if (ret)
                return ret;
  
diff --combined drivers/bus/arm-ccn.c
@@@ -167,7 -167,7 +167,7 @@@ struct arm_ccn_dt 
        struct hrtimer hrtimer;
  
        cpumask_t cpu;
-       struct list_head entry;
+       struct hlist_node node;
  
        struct pmu pmu;
  };
@@@ -187,12 -187,8 +187,9 @@@ struct arm_ccn 
        struct arm_ccn_component *xp;
  
        struct arm_ccn_dt dt;
 +      int mn_id;
  };
  
- static DEFINE_MUTEX(arm_ccn_mutex);
- static LIST_HEAD(arm_ccn_list);
  static int arm_ccn_node_to_xp(int node)
  {
        return node / CCN_NUM_XP_PORTS;
@@@ -213,7 -209,6 +210,7 @@@ static int arm_ccn_node_to_xp_port(int 
  #define CCN_CONFIG_TYPE(_config)      (((_config) >> 8) & 0xff)
  #define CCN_CONFIG_EVENT(_config)     (((_config) >> 16) & 0xff)
  #define CCN_CONFIG_PORT(_config)      (((_config) >> 24) & 0x3)
 +#define CCN_CONFIG_BUS(_config)               (((_config) >> 24) & 0x3)
  #define CCN_CONFIG_VC(_config)                (((_config) >> 26) & 0x7)
  #define CCN_CONFIG_DIR(_config)               (((_config) >> 29) & 0x1)
  #define CCN_CONFIG_MASK(_config)      (((_config) >> 30) & 0xf)
@@@ -243,7 -238,6 +240,7 @@@ static CCN_FORMAT_ATTR(xp, "config:0-7"
  static CCN_FORMAT_ATTR(type, "config:8-15");
  static CCN_FORMAT_ATTR(event, "config:16-23");
  static CCN_FORMAT_ATTR(port, "config:24-25");
 +static CCN_FORMAT_ATTR(bus, "config:24-25");
  static CCN_FORMAT_ATTR(vc, "config:26-28");
  static CCN_FORMAT_ATTR(dir, "config:29-29");
  static CCN_FORMAT_ATTR(mask, "config:30-33");
@@@ -256,7 -250,6 +253,7 @@@ static struct attribute *arm_ccn_pmu_fo
        &arm_ccn_pmu_format_attr_type.attr.attr,
        &arm_ccn_pmu_format_attr_event.attr.attr,
        &arm_ccn_pmu_format_attr_port.attr.attr,
 +      &arm_ccn_pmu_format_attr_bus.attr.attr,
        &arm_ccn_pmu_format_attr_vc.attr.attr,
        &arm_ccn_pmu_format_attr_dir.attr.attr,
        &arm_ccn_pmu_format_attr_mask.attr.attr,
@@@ -332,7 -325,6 +329,7 @@@ struct arm_ccn_pmu_event 
  static ssize_t arm_ccn_pmu_event_show(struct device *dev,
                struct device_attribute *attr, char *buf)
  {
 +      struct arm_ccn *ccn = pmu_to_arm_ccn(dev_get_drvdata(dev));
        struct arm_ccn_pmu_event *event = container_of(attr,
                        struct arm_ccn_pmu_event, attr);
        ssize_t res;
                break;
        case CCN_TYPE_XP:
                res += snprintf(buf + res, PAGE_SIZE - res,
 -                              ",xp=?,port=?,vc=?,dir=?");
 +                              ",xp=?,vc=?");
                if (event->event == CCN_EVENT_WATCHPOINT)
                        res += snprintf(buf + res, PAGE_SIZE - res,
 -                                      ",cmp_l=?,cmp_h=?,mask=?");
 +                                      ",port=?,dir=?,cmp_l=?,cmp_h=?,mask=?");
 +              else
 +                      res += snprintf(buf + res, PAGE_SIZE - res,
 +                                      ",bus=?");
 +
 +              break;
 +      case CCN_TYPE_MN:
 +              res += snprintf(buf + res, PAGE_SIZE - res, ",node=%d", ccn->mn_id);
                break;
        default:
                res += snprintf(buf + res, PAGE_SIZE - res, ",node=?");
@@@ -395,9 -380,9 +392,9 @@@ static umode_t arm_ccn_pmu_events_is_vi
  }
  
  static struct arm_ccn_pmu_event arm_ccn_pmu_events[] = {
 -      CCN_EVENT_MN(eobarrier, "dir=0,vc=0,cmp_h=0x1c00", CCN_IDX_MASK_OPCODE),
 -      CCN_EVENT_MN(ecbarrier, "dir=0,vc=0,cmp_h=0x1e00", CCN_IDX_MASK_OPCODE),
 -      CCN_EVENT_MN(dvmop, "dir=0,vc=0,cmp_h=0x2800", CCN_IDX_MASK_OPCODE),
 +      CCN_EVENT_MN(eobarrier, "dir=1,vc=0,cmp_h=0x1c00", CCN_IDX_MASK_OPCODE),
 +      CCN_EVENT_MN(ecbarrier, "dir=1,vc=0,cmp_h=0x1e00", CCN_IDX_MASK_OPCODE),
 +      CCN_EVENT_MN(dvmop, "dir=1,vc=0,cmp_h=0x2800", CCN_IDX_MASK_OPCODE),
        CCN_EVENT_HNI(txdatflits, "dir=1,vc=3", CCN_IDX_MASK_ANY),
        CCN_EVENT_HNI(rxdatflits, "dir=0,vc=3", CCN_IDX_MASK_ANY),
        CCN_EVENT_HNI(txreqflits, "dir=1,vc=0", CCN_IDX_MASK_ANY),
@@@ -745,10 -730,9 +742,10 @@@ static int arm_ccn_pmu_event_init(struc
  
        if (has_branch_stack(event) || event->attr.exclude_user ||
                        event->attr.exclude_kernel || event->attr.exclude_hv ||
 -                      event->attr.exclude_idle) {
 +                      event->attr.exclude_idle || event->attr.exclude_host ||
 +                      event->attr.exclude_guest) {
                dev_warn(ccn->dev, "Can't exclude execution levels!\n");
 -              return -EOPNOTSUPP;
 +              return -EINVAL;
        }
  
        if (event->cpu < 0) {
  
        /* Validate node/xp vs topology */
        switch (type) {
 +      case CCN_TYPE_MN:
 +              if (node_xp != ccn->mn_id) {
 +                      dev_warn(ccn->dev, "Invalid MN ID %d!\n", node_xp);
 +                      return -EINVAL;
 +              }
 +              break;
        case CCN_TYPE_XP:
                if (node_xp >= ccn->num_xps) {
                        dev_warn(ccn->dev, "Invalid XP ID %d!\n", node_xp);
@@@ -905,10 -883,6 +902,10 @@@ static void arm_ccn_pmu_xp_dt_config(st
        struct arm_ccn_component *xp;
        u32 val, dt_cfg;
  
 +      /* Nothing to do for cycle counter */
 +      if (hw->idx == CCN_IDX_PMU_CYCLE_COUNTER)
 +              return;
 +
        if (CCN_CONFIG_TYPE(event->attr.config) == CCN_TYPE_XP)
                xp = &ccn->xp[CCN_CONFIG_XP(event->attr.config)];
        else
@@@ -940,17 -914,38 +937,17 @@@ static void arm_ccn_pmu_event_start(str
                        arm_ccn_pmu_read_counter(ccn, hw->idx));
        hw->state = 0;
  
 -      /*
 -       * Pin the timer, so that the overflows are handled by the chosen
 -       * event->cpu (this is the same one as presented in "cpumask"
 -       * attribute).
 -       */
 -      if (!ccn->irq)
 -              hrtimer_start(&ccn->dt.hrtimer, arm_ccn_pmu_timer_period(),
 -                              HRTIMER_MODE_REL_PINNED);
 -
        /* Set the DT bus input, engaging the counter */
        arm_ccn_pmu_xp_dt_config(event, 1);
  }
  
  static void arm_ccn_pmu_event_stop(struct perf_event *event, int flags)
  {
 -      struct arm_ccn *ccn = pmu_to_arm_ccn(event->pmu);
        struct hw_perf_event *hw = &event->hw;
 -      u64 timeout;
  
        /* Disable counting, setting the DT bus to pass-through mode */
        arm_ccn_pmu_xp_dt_config(event, 0);
  
 -      if (!ccn->irq)
 -              hrtimer_cancel(&ccn->dt.hrtimer);
 -
 -      /* Let the DT bus drain */
 -      timeout = arm_ccn_pmu_read_counter(ccn, CCN_IDX_PMU_CYCLE_COUNTER) +
 -                      ccn->num_xps;
 -      while (arm_ccn_pmu_read_counter(ccn, CCN_IDX_PMU_CYCLE_COUNTER) <
 -                      timeout)
 -              cpu_relax();
 -
        if (flags & PERF_EF_UPDATE)
                arm_ccn_pmu_event_update(event);
  
@@@ -990,7 -985,7 +987,7 @@@ static void arm_ccn_pmu_xp_watchpoint_c
  
        /* Comparison values */
        writel(cmp_l & 0xffffffff, source->base + CCN_XP_DT_CMP_VAL_L(wp));
 -      writel((cmp_l >> 32) & 0xefffffff,
 +      writel((cmp_l >> 32) & 0x7fffffff,
                        source->base + CCN_XP_DT_CMP_VAL_L(wp) + 4);
        writel(cmp_h & 0xffffffff, source->base + CCN_XP_DT_CMP_VAL_H(wp));
        writel((cmp_h >> 32) & 0x0fffffff,
  
        /* Mask */
        writel(mask_l & 0xffffffff, source->base + CCN_XP_DT_CMP_MASK_L(wp));
 -      writel((mask_l >> 32) & 0xefffffff,
 +      writel((mask_l >> 32) & 0x7fffffff,
                        source->base + CCN_XP_DT_CMP_MASK_L(wp) + 4);
        writel(mask_h & 0xffffffff, source->base + CCN_XP_DT_CMP_MASK_H(wp));
        writel((mask_h >> 32) & 0x0fffffff,
@@@ -1016,7 -1011,7 +1013,7 @@@ static void arm_ccn_pmu_xp_event_config
        hw->event_base = CCN_XP_DT_CONFIG__DT_CFG__XP_PMU_EVENT(hw->config_base);
  
        id = (CCN_CONFIG_VC(event->attr.config) << 4) |
 -                      (CCN_CONFIG_PORT(event->attr.config) << 3) |
 +                      (CCN_CONFIG_BUS(event->attr.config) << 3) |
                        (CCN_CONFIG_EVENT(event->attr.config) << 0);
  
        val = readl(source->base + CCN_XP_PMU_EVENT_SEL);
@@@ -1101,31 -1096,15 +1098,31 @@@ static void arm_ccn_pmu_event_config(st
        spin_unlock(&ccn->dt.config_lock);
  }
  
 +static int arm_ccn_pmu_active_counters(struct arm_ccn *ccn)
 +{
 +      return bitmap_weight(ccn->dt.pmu_counters_mask,
 +                           CCN_NUM_PMU_EVENT_COUNTERS + 1);
 +}
 +
  static int arm_ccn_pmu_event_add(struct perf_event *event, int flags)
  {
        int err;
        struct hw_perf_event *hw = &event->hw;
 +      struct arm_ccn *ccn = pmu_to_arm_ccn(event->pmu);
  
        err = arm_ccn_pmu_event_alloc(event);
        if (err)
                return err;
  
 +      /*
 +       * Pin the timer, so that the overflows are handled by the chosen
 +       * event->cpu (this is the same one as presented in "cpumask"
 +       * attribute).
 +       */
 +      if (!ccn->irq && arm_ccn_pmu_active_counters(ccn) == 1)
 +              hrtimer_start(&ccn->dt.hrtimer, arm_ccn_pmu_timer_period(),
 +                            HRTIMER_MODE_REL_PINNED);
 +
        arm_ccn_pmu_event_config(event);
  
        hw->state = PERF_HES_STOPPED;
  
  static void arm_ccn_pmu_event_del(struct perf_event *event, int flags)
  {
 +      struct arm_ccn *ccn = pmu_to_arm_ccn(event->pmu);
 +
        arm_ccn_pmu_event_stop(event, PERF_EF_UPDATE);
  
        arm_ccn_pmu_event_release(event);
 +
 +      if (!ccn->irq && arm_ccn_pmu_active_counters(ccn) == 0)
 +              hrtimer_cancel(&ccn->dt.hrtimer);
  }
  
  static void arm_ccn_pmu_event_read(struct perf_event *event)
        arm_ccn_pmu_event_update(event);
  }
  
 +static void arm_ccn_pmu_enable(struct pmu *pmu)
 +{
 +      struct arm_ccn *ccn = pmu_to_arm_ccn(pmu);
 +
 +      u32 val = readl(ccn->dt.base + CCN_DT_PMCR);
 +      val |= CCN_DT_PMCR__PMU_EN;
 +      writel(val, ccn->dt.base + CCN_DT_PMCR);
 +}
 +
 +static void arm_ccn_pmu_disable(struct pmu *pmu)
 +{
 +      struct arm_ccn *ccn = pmu_to_arm_ccn(pmu);
 +
 +      u32 val = readl(ccn->dt.base + CCN_DT_PMCR);
 +      val &= ~CCN_DT_PMCR__PMU_EN;
 +      writel(val, ccn->dt.base + CCN_DT_PMCR);
 +}
 +
  static irqreturn_t arm_ccn_pmu_overflow_handler(struct arm_ccn_dt *dt)
  {
        u32 pmovsr = readl(dt->base + CCN_DT_PMOVSR);
@@@ -1214,30 -1170,24 +1211,24 @@@ static enum hrtimer_restart arm_ccn_pmu
  }
  
  
- static int arm_ccn_pmu_offline_cpu(unsigned int cpu)
+ static int arm_ccn_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
  {
-       struct arm_ccn_dt *dt;
+       struct arm_ccn_dt *dt = hlist_entry_safe(node, struct arm_ccn_dt, node);
+       struct arm_ccn *ccn = container_of(dt, struct arm_ccn, dt);
        unsigned int target;
  
-       mutex_lock(&arm_ccn_mutex);
-       list_for_each_entry(dt, &arm_ccn_list, entry) {
-               struct arm_ccn *ccn = container_of(dt, struct arm_ccn, dt);
-               if (!cpumask_test_and_clear_cpu(cpu, &dt->cpu))
-                       continue;
-               target = cpumask_any_but(cpu_online_mask, cpu);
-               if (target >= nr_cpu_ids)
-                       continue;
-               perf_pmu_migrate_context(&dt->pmu, cpu, target);
-               cpumask_set_cpu(target, &dt->cpu);
-               if (ccn->irq)
-                       WARN_ON(irq_set_affinity_hint(ccn->irq, &dt->cpu) != 0);
-       }
-       mutex_unlock(&arm_ccn_mutex);
+       if (!cpumask_test_and_clear_cpu(cpu, &dt->cpu))
+               return 0;
+       target = cpumask_any_but(cpu_online_mask, cpu);
+       if (target >= nr_cpu_ids)
+               return 0;
+       perf_pmu_migrate_context(&dt->pmu, cpu, target);
+       cpumask_set_cpu(target, &dt->cpu);
+       if (ccn->irq)
+               WARN_ON(irq_set_affinity_hint(ccn->irq, &dt->cpu) != 0);
        return 0;
  }
  
  static DEFINE_IDA(arm_ccn_pmu_ida);
  
  static int arm_ccn_pmu_init(struct arm_ccn *ccn)
                .start = arm_ccn_pmu_event_start,
                .stop = arm_ccn_pmu_event_stop,
                .read = arm_ccn_pmu_event_read,
 +              .pmu_enable = arm_ccn_pmu_enable,
 +              .pmu_disable = arm_ccn_pmu_disable,
        };
  
        /* No overflow interrupt? Have to use a timer instead. */
        if (err)
                goto error_pmu_register;
  
-       mutex_lock(&arm_ccn_mutex);
-       list_add(&ccn->dt.entry, &arm_ccn_list);
-       mutex_unlock(&arm_ccn_mutex);
+       cpuhp_state_add_instance_nocalls(CPUHP_AP_PERF_ARM_CCN_ONLINE,
+                                        &ccn->dt.node);
        return 0;
  
  error_pmu_register:
@@@ -1339,10 -1286,8 +1329,8 @@@ static void arm_ccn_pmu_cleanup(struct 
  {
        int i;
  
-       mutex_lock(&arm_ccn_mutex);
-       list_del(&ccn->dt.entry);
-       mutex_unlock(&arm_ccn_mutex);
+       cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_CCN_ONLINE,
+                                           &ccn->dt.node);
        if (ccn->irq)
                irq_set_affinity_hint(ccn->irq, NULL);
        for (i = 0; i < ccn->num_xps; i++)
@@@ -1404,8 -1349,6 +1392,8 @@@ static int arm_ccn_init_nodes(struct ar
  
        switch (type) {
        case CCN_TYPE_MN:
 +              ccn->mn_id = id;
 +              return 0;
        case CCN_TYPE_DT:
                return 0;
        case CCN_TYPE_XP:
@@@ -1516,9 -1459,8 +1504,9 @@@ static int arm_ccn_probe(struct platfor
                /* Can set 'disable' bits, so can acknowledge interrupts */
                writel(CCN_MN_ERRINT_STATUS__PMU_EVENTS__ENABLE,
                                ccn->base + CCN_MN_ERRINT_STATUS);
 -              err = devm_request_irq(ccn->dev, irq, arm_ccn_irq_handler, 0,
 -                              dev_name(ccn->dev), ccn);
 +              err = devm_request_irq(ccn->dev, irq, arm_ccn_irq_handler,
 +                                     IRQF_NOBALANCING | IRQF_NO_THREAD,
 +                                     dev_name(ccn->dev), ccn);
                if (err)
                        return err;
  
@@@ -1573,9 -1515,9 +1561,9 @@@ static int __init arm_ccn_init(void
  {
        int i, ret;
  
-       ret = cpuhp_setup_state_nocalls(CPUHP_AP_PERF_ARM_CCN_ONLINE,
-                                       "AP_PERF_ARM_CCN_ONLINE", NULL,
-                                       arm_ccn_pmu_offline_cpu);
+       ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_CCN_ONLINE,
+                                     "AP_PERF_ARM_CCN_ONLINE", NULL,
+                                     arm_ccn_pmu_offline_cpu);
        if (ret)
                return ret;
  
  
  static void __exit arm_ccn_exit(void)
  {
-       cpuhp_remove_state_nocalls(CPUHP_AP_PERF_ARM_CCN_ONLINE);
+       cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_CCN_ONLINE);
        platform_driver_unregister(&arm_ccn_driver);
  }
  
@@@ -916,18 -916,58 +916,18 @@@ static struct kobj_type ktype_cpufreq 
        .release        = cpufreq_sysfs_release,
  };
  
 -static int add_cpu_dev_symlink(struct cpufreq_policy *policy, int cpu)
 +static int add_cpu_dev_symlink(struct cpufreq_policy *policy,
 +                             struct device *dev)
  {
 -      struct device *cpu_dev;
 -
 -      pr_debug("%s: Adding symlink for CPU: %u\n", __func__, cpu);
 -
 -      if (!policy)
 -              return 0;
 -
 -      cpu_dev = get_cpu_device(cpu);
 -      if (WARN_ON(!cpu_dev))
 -              return 0;
 -
 -      return sysfs_create_link(&cpu_dev->kobj, &policy->kobj, "cpufreq");
 -}
 -
 -static void remove_cpu_dev_symlink(struct cpufreq_policy *policy, int cpu)
 -{
 -      struct device *cpu_dev;
 -
 -      pr_debug("%s: Removing symlink for CPU: %u\n", __func__, cpu);
 -
 -      cpu_dev = get_cpu_device(cpu);
 -      if (WARN_ON(!cpu_dev))
 -              return;
 -
 -      sysfs_remove_link(&cpu_dev->kobj, "cpufreq");
 +      dev_dbg(dev, "%s: Adding symlink\n", __func__);
 +      return sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq");
  }
  
 -/* Add/remove symlinks for all related CPUs */
 -static int cpufreq_add_dev_symlink(struct cpufreq_policy *policy)
 +static void remove_cpu_dev_symlink(struct cpufreq_policy *policy,
 +                                 struct device *dev)
  {
 -      unsigned int j;
 -      int ret = 0;
 -
 -      /* Some related CPUs might not be present (physically hotplugged) */
 -      for_each_cpu(j, policy->real_cpus) {
 -              ret = add_cpu_dev_symlink(policy, j);
 -              if (ret)
 -                      break;
 -      }
 -
 -      return ret;
 -}
 -
 -static void cpufreq_remove_dev_symlink(struct cpufreq_policy *policy)
 -{
 -      unsigned int j;
 -
 -      /* Some related CPUs might not be present (physically hotplugged) */
 -      for_each_cpu(j, policy->real_cpus)
 -              remove_cpu_dev_symlink(policy, j);
 +      dev_dbg(dev, "%s: Removing symlink\n", __func__);
 +      sysfs_remove_link(&dev->kobj, "cpufreq");
  }
  
  static int cpufreq_add_dev_interface(struct cpufreq_policy *policy)
                        return ret;
        }
  
 -      return cpufreq_add_dev_symlink(policy);
 +      return 0;
  }
  
  __weak struct cpufreq_governor *cpufreq_default_governor(void)
@@@ -1033,9 -1073,13 +1033,9 @@@ static void handle_update(struct work_s
  
  static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu)
  {
 -      struct device *dev = get_cpu_device(cpu);
        struct cpufreq_policy *policy;
        int ret;
  
 -      if (WARN_ON(!dev))
 -              return NULL;
 -
        policy = kzalloc(sizeof(*policy), GFP_KERNEL);
        if (!policy)
                return NULL;
@@@ -1089,6 -1133,7 +1089,6 @@@ static void cpufreq_policy_put_kobj(str
  
        down_write(&policy->rwsem);
        cpufreq_stats_free_table(policy);
 -      cpufreq_remove_dev_symlink(policy);
        kobj = &policy->kobj;
        cmp = &policy->kobj_unregister;
        up_write(&policy->rwsem);
@@@ -1170,8 -1215,8 +1170,8 @@@ static int cpufreq_online(unsigned int 
        if (new_policy) {
                /* related_cpus should at least include policy->cpus. */
                cpumask_copy(policy->related_cpus, policy->cpus);
 -              /* Remember CPUs present at the policy creation time. */
 -              cpumask_and(policy->real_cpus, policy->cpus, cpu_present_mask);
 +              /* Clear mask of registered CPUs */
 +              cpumask_clear(policy->real_cpus);
        }
  
        /*
@@@ -1286,8 -1331,6 +1286,8 @@@ out_free_policy
        return ret;
  }
  
- static void cpufreq_offline(unsigned int cpu);
++static int cpufreq_offline(unsigned int cpu);
 +
  /**
   * cpufreq_add_dev - the cpufreq interface for a CPU device.
   * @dev: CPU device.
@@@ -1297,31 -1340,25 +1297,31 @@@ static int cpufreq_add_dev(struct devic
  {
        struct cpufreq_policy *policy;
        unsigned cpu = dev->id;
 +      int ret;
  
        dev_dbg(dev, "%s: adding CPU%u\n", __func__, cpu);
  
 -      if (cpu_online(cpu))
 -              return cpufreq_online(cpu);
 +      if (cpu_online(cpu)) {
 +              ret = cpufreq_online(cpu);
 +              if (ret)
 +                      return ret;
 +      }
  
 -      /*
 -       * A hotplug notifier will follow and we will handle it as CPU online
 -       * then.  For now, just create the sysfs link, unless there is no policy
 -       * or the link is already present.
 -       */
 +      /* Create sysfs link on CPU registration */
        policy = per_cpu(cpufreq_cpu_data, cpu);
        if (!policy || cpumask_test_and_set_cpu(cpu, policy->real_cpus))
                return 0;
  
 -      return add_cpu_dev_symlink(policy, cpu);
 +      ret = add_cpu_dev_symlink(policy, dev);
 +      if (ret) {
 +              cpumask_clear_cpu(cpu, policy->real_cpus);
 +              cpufreq_offline(cpu);
 +      }
 +
 +      return ret;
  }
  
- static void cpufreq_offline(unsigned int cpu)
+ static int cpufreq_offline(unsigned int cpu)
  {
        struct cpufreq_policy *policy;
        int ret;
        policy = cpufreq_cpu_get_raw(cpu);
        if (!policy) {
                pr_debug("%s: No cpu_data found\n", __func__);
-               return;
+               return 0;
        }
  
        down_write(&policy->rwsem);
  
  unlock:
        up_write(&policy->rwsem);
+       return 0;
  }
  
  /**
@@@ -1399,7 -1437,7 +1400,7 @@@ static void cpufreq_remove_dev(struct d
                cpufreq_offline(cpu);
  
        cpumask_clear_cpu(cpu, policy->real_cpus);
 -      remove_cpu_dev_symlink(policy, cpu);
 +      remove_cpu_dev_symlink(policy, dev);
  
        if (cpumask_empty(policy->real_cpus))
                cpufreq_policy_free(policy, true);
@@@ -2295,28 -2333,6 +2296,6 @@@ unlock
  }
  EXPORT_SYMBOL(cpufreq_update_policy);
  
- static int cpufreq_cpu_callback(struct notifier_block *nfb,
-                                       unsigned long action, void *hcpu)
- {
-       unsigned int cpu = (unsigned long)hcpu;
-       switch (action & ~CPU_TASKS_FROZEN) {
-       case CPU_ONLINE:
-       case CPU_DOWN_FAILED:
-               cpufreq_online(cpu);
-               break;
-       case CPU_DOWN_PREPARE:
-               cpufreq_offline(cpu);
-               break;
-       }
-       return NOTIFY_OK;
- }
- static struct notifier_block __refdata cpufreq_cpu_notifier = {
-       .notifier_call = cpufreq_cpu_callback,
- };
  /*********************************************************************
   *               BOOST                                                     *
   *********************************************************************/
@@@ -2418,6 -2434,7 +2397,7 @@@ EXPORT_SYMBOL_GPL(cpufreq_boost_enabled
  /*********************************************************************
   *               REGISTER / UNREGISTER CPUFREQ DRIVER                *
   *********************************************************************/
+ static enum cpuhp_state hp_online;
  
  /**
   * cpufreq_register_driver - register a CPU Frequency driver
@@@ -2480,7 -2497,14 +2460,14 @@@ int cpufreq_register_driver(struct cpuf
                goto err_if_unreg;
        }
  
-       register_hotcpu_notifier(&cpufreq_cpu_notifier);
+       ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "cpufreq:online",
+                                       cpufreq_online,
+                                       cpufreq_offline);
+       if (ret < 0)
+               goto err_if_unreg;
+       hp_online = ret;
+       ret = 0;
        pr_debug("driver %s up and running\n", driver_data->name);
        goto out;
  
@@@ -2519,7 -2543,7 +2506,7 @@@ int cpufreq_unregister_driver(struct cp
        get_online_cpus();
        subsys_interface_unregister(&cpufreq_interface);
        remove_boost_sysfs_file();
-       unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
+       cpuhp_remove_state_nocalls(hp_online);
  
        write_lock_irqsave(&cpufreq_driver_lock, flags);
  
diff --combined drivers/md/raid5.c
@@@ -659,7 -659,6 +659,7 @@@ raid5_get_active_stripe(struct r5conf *
  {
        struct stripe_head *sh;
        int hash = stripe_hash_locks_hash(sector);
 +      int inc_empty_inactive_list_flag;
  
        pr_debug("get_stripe, sector %llu\n", (unsigned long long)sector);
  
                                        atomic_inc(&conf->active_stripes);
                                BUG_ON(list_empty(&sh->lru) &&
                                       !test_bit(STRIPE_EXPANDING, &sh->state));
 +                              inc_empty_inactive_list_flag = 0;
 +                              if (!list_empty(conf->inactive_list + hash))
 +                                      inc_empty_inactive_list_flag = 1;
                                list_del_init(&sh->lru);
 +                              if (list_empty(conf->inactive_list + hash) && inc_empty_inactive_list_flag)
 +                                      atomic_inc(&conf->empty_inactive_list_nr);
                                if (sh->group) {
                                        sh->group->stripes_cnt--;
                                        sh->group = NULL;
@@@ -768,7 -762,6 +768,7 @@@ static void stripe_add_to_batch_list(st
        sector_t head_sector, tmp_sec;
        int hash;
        int dd_idx;
 +      int inc_empty_inactive_list_flag;
  
        /* Don't cross chunks, so stripe pd_idx/qd_idx is the same */
        tmp_sec = sh->sector;
                                atomic_inc(&conf->active_stripes);
                        BUG_ON(list_empty(&head->lru) &&
                               !test_bit(STRIPE_EXPANDING, &head->state));
 +                      inc_empty_inactive_list_flag = 0;
 +                      if (!list_empty(conf->inactive_list + hash))
 +                              inc_empty_inactive_list_flag = 1;
                        list_del_init(&head->lru);
 +                      if (list_empty(conf->inactive_list + hash) && inc_empty_inactive_list_flag)
 +                              atomic_inc(&conf->empty_inactive_list_nr);
                        if (head->group) {
                                head->group->stripes_cnt--;
                                head->group = NULL;
@@@ -1005,6 -993,7 +1005,6 @@@ again
  
                        set_bit(STRIPE_IO_STARTED, &sh->state);
  
 -                      bio_reset(bi);
                        bi->bi_bdev = rdev->bdev;
                        bio_set_op_attrs(bi, op, op_flags);
                        bi->bi_end_io = op_is_write(op)
  
                        set_bit(STRIPE_IO_STARTED, &sh->state);
  
 -                      bio_reset(rbi);
                        rbi->bi_bdev = rrdev->bdev;
                        bio_set_op_attrs(rbi, op, op_flags);
                        BUG_ON(!op_is_write(op));
@@@ -1988,11 -1978,9 +1988,11 @@@ static void raid_run_ops(struct stripe_
        put_cpu();
  }
  
 -static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp)
 +static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp,
 +      int disks)
  {
        struct stripe_head *sh;
 +      int i;
  
        sh = kmem_cache_zalloc(sc, gfp);
        if (sh) {
                INIT_LIST_HEAD(&sh->batch_list);
                INIT_LIST_HEAD(&sh->lru);
                atomic_set(&sh->count, 1);
 +              for (i = 0; i < disks; i++) {
 +                      struct r5dev *dev = &sh->dev[i];
 +
 +                      bio_init(&dev->req);
 +                      dev->req.bi_io_vec = &dev->vec;
 +                      dev->req.bi_max_vecs = 1;
 +
 +                      bio_init(&dev->rreq);
 +                      dev->rreq.bi_io_vec = &dev->rvec;
 +                      dev->rreq.bi_max_vecs = 1;
 +              }
        }
        return sh;
  }
@@@ -2019,7 -1996,7 +2019,7 @@@ static int grow_one_stripe(struct r5con
  {
        struct stripe_head *sh;
  
 -      sh = alloc_stripe(conf->slab_cache, gfp);
 +      sh = alloc_stripe(conf->slab_cache, gfp, conf->pool_size);
        if (!sh)
                return 0;
  
@@@ -2190,7 -2167,7 +2190,7 @@@ static int resize_stripes(struct r5con
        mutex_lock(&conf->cache_size_mutex);
  
        for (i = conf->max_nr_stripes; i; i--) {
 -              nsh = alloc_stripe(sc, GFP_KERNEL);
 +              nsh = alloc_stripe(sc, GFP_KERNEL, newsize);
                if (!nsh)
                        break;
  
@@@ -2322,7 -2299,6 +2322,7 @@@ static void raid5_end_read_request(stru
                (unsigned long long)sh->sector, i, atomic_read(&sh->count),
                bi->bi_error);
        if (i == disks) {
 +              bio_reset(bi);
                BUG();
                return;
        }
                }
        }
        rdev_dec_pending(rdev, conf->mddev);
 +      bio_reset(bi);
        clear_bit(R5_LOCKED, &sh->dev[i].flags);
        set_bit(STRIPE_HANDLE, &sh->state);
        raid5_release_stripe(sh);
@@@ -2461,7 -2436,6 +2461,7 @@@ static void raid5_end_write_request(str
                (unsigned long long)sh->sector, i, atomic_read(&sh->count),
                bi->bi_error);
        if (i == disks) {
 +              bio_reset(bi);
                BUG();
                return;
        }
        if (sh->batch_head && bi->bi_error && !replacement)
                set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state);
  
 +      bio_reset(bi);
        if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags))
                clear_bit(R5_LOCKED, &sh->dev[i].flags);
        set_bit(STRIPE_HANDLE, &sh->state);
@@@ -2512,6 -2485,16 +2512,6 @@@ static void raid5_build_block(struct st
  {
        struct r5dev *dev = &sh->dev[i];
  
 -      bio_init(&dev->req);
 -      dev->req.bi_io_vec = &dev->vec;
 -      dev->req.bi_max_vecs = 1;
 -      dev->req.bi_private = sh;
 -
 -      bio_init(&dev->rreq);
 -      dev->rreq.bi_io_vec = &dev->rvec;
 -      dev->rreq.bi_max_vecs = 1;
 -      dev->rreq.bi_private = sh;
 -
        dev->flags = 0;
        dev->sector = raid5_compute_blocknr(sh, i, previous);
  }
@@@ -4645,9 -4628,7 +4645,9 @@@ finish
        }
  
        if (!bio_list_empty(&s.return_bi)) {
 -              if (test_bit(MD_CHANGE_PENDING, &conf->mddev->flags)) {
 +              if (test_bit(MD_CHANGE_PENDING, &conf->mddev->flags) &&
 +                              (s.failed <= conf->max_degraded ||
 +                                      conf->mddev->external == 0)) {
                        spin_lock_irq(&conf->device_lock);
                        bio_list_merge(&conf->return_bi, &s.return_bi);
                        spin_unlock_irq(&conf->device_lock);
@@@ -6349,22 -6330,20 +6349,20 @@@ static int alloc_scratch_buffer(struct 
        return 0;
  }
  
- static void raid5_free_percpu(struct r5conf *conf)
+ static int raid456_cpu_dead(unsigned int cpu, struct hlist_node *node)
  {
-       unsigned long cpu;
+       struct r5conf *conf = hlist_entry_safe(node, struct r5conf, node);
+       free_scratch_buffer(conf, per_cpu_ptr(conf->percpu, cpu));
+       return 0;
+ }
  
+ static void raid5_free_percpu(struct r5conf *conf)
+ {
        if (!conf->percpu)
                return;
  
- #ifdef CONFIG_HOTPLUG_CPU
-       unregister_cpu_notifier(&conf->cpu_notify);
- #endif
-       get_online_cpus();
-       for_each_possible_cpu(cpu)
-               free_scratch_buffer(conf, per_cpu_ptr(conf->percpu, cpu));
-       put_online_cpus();
+       cpuhp_state_remove_instance(CPUHP_MD_RAID5_PREPARE, &conf->node);
        free_percpu(conf->percpu);
  }
  
@@@ -6383,64 -6362,28 +6381,28 @@@ static void free_conf(struct r5conf *co
        kfree(conf);
  }
  
- #ifdef CONFIG_HOTPLUG_CPU
- static int raid456_cpu_notify(struct notifier_block *nfb, unsigned long action,
-                             void *hcpu)
+ static int raid456_cpu_up_prepare(unsigned int cpu, struct hlist_node *node)
  {
-       struct r5conf *conf = container_of(nfb, struct r5conf, cpu_notify);
-       long cpu = (long)hcpu;
+       struct r5conf *conf = hlist_entry_safe(node, struct r5conf, node);
        struct raid5_percpu *percpu = per_cpu_ptr(conf->percpu, cpu);
  
-       switch (action) {
-       case CPU_UP_PREPARE:
-       case CPU_UP_PREPARE_FROZEN:
-               if (alloc_scratch_buffer(conf, percpu)) {
-                       pr_err("%s: failed memory allocation for cpu%ld\n",
-                              __func__, cpu);
-                       return notifier_from_errno(-ENOMEM);
-               }
-               break;
-       case CPU_DEAD:
-       case CPU_DEAD_FROZEN:
-       case CPU_UP_CANCELED:
-       case CPU_UP_CANCELED_FROZEN:
-               free_scratch_buffer(conf, per_cpu_ptr(conf->percpu, cpu));
-               break;
-       default:
-               break;
+       if (alloc_scratch_buffer(conf, percpu)) {
+               pr_err("%s: failed memory allocation for cpu%u\n",
+                      __func__, cpu);
+               return -ENOMEM;
        }
-       return NOTIFY_OK;
+       return 0;
  }
- #endif
  
  static int raid5_alloc_percpu(struct r5conf *conf)
  {
-       unsigned long cpu;
        int err = 0;
  
        conf->percpu = alloc_percpu(struct raid5_percpu);
        if (!conf->percpu)
                return -ENOMEM;
  
- #ifdef CONFIG_HOTPLUG_CPU
-       conf->cpu_notify.notifier_call = raid456_cpu_notify;
-       conf->cpu_notify.priority = 0;
-       err = register_cpu_notifier(&conf->cpu_notify);
-       if (err)
-               return err;
- #endif
-       get_online_cpus();
-       for_each_present_cpu(cpu) {
-               err = alloc_scratch_buffer(conf, per_cpu_ptr(conf->percpu, cpu));
-               if (err) {
-                       pr_err("%s: failed memory allocation for cpu%ld\n",
-                              __func__, cpu);
-                       break;
-               }
-       }
-       put_online_cpus();
+       err = cpuhp_state_add_instance(CPUHP_MD_RAID5_PREPARE, &conf->node);
        if (!err) {
                conf->scribble_disks = max(conf->raid_disks,
                        conf->previous_raid_disks);
@@@ -6639,16 -6582,6 +6601,16 @@@ static struct r5conf *setup_conf(struc
        }
  
        conf->min_nr_stripes = NR_STRIPES;
 +      if (mddev->reshape_position != MaxSector) {
 +              int stripes = max_t(int,
 +                      ((mddev->chunk_sectors << 9) / STRIPE_SIZE) * 4,
 +                      ((mddev->new_chunk_sectors << 9) / STRIPE_SIZE) * 4);
 +              conf->min_nr_stripes = max(NR_STRIPES, stripes);
 +              if (conf->min_nr_stripes != NR_STRIPES)
 +                      printk(KERN_INFO
 +                              "md/raid:%s: force stripe size %d for reshape\n",
 +                              mdname(mddev), conf->min_nr_stripes);
 +      }
        memory = conf->min_nr_stripes * (sizeof(struct stripe_head) +
                 max_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024;
        atomic_set(&conf->empty_inactive_list_nr, NR_STRIPE_HASH_LOCKS);
@@@ -6855,14 -6788,11 +6817,14 @@@ static int raid5_run(struct mddev *mdde
        if (IS_ERR(conf))
                return PTR_ERR(conf);
  
 -      if (test_bit(MD_HAS_JOURNAL, &mddev->flags) && !journal_dev) {
 -              printk(KERN_ERR "md/raid:%s: journal disk is missing, force array readonly\n",
 -                     mdname(mddev));
 -              mddev->ro = 1;
 -              set_disk_ro(mddev->gendisk, 1);
 +      if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
 +              if (!journal_dev) {
 +                      pr_err("md/raid:%s: journal disk is missing, force array readonly\n",
 +                             mdname(mddev));
 +                      mddev->ro = 1;
 +                      set_disk_ro(mddev->gendisk, 1);
 +              } else if (mddev->recovery_cp == MaxSector)
 +                      set_bit(MD_JOURNAL_CLEAN, &mddev->flags);
        }
  
        conf->min_offset_diff = min_offset_diff;
@@@ -7985,10 -7915,21 +7947,21 @@@ static struct md_personality raid4_pers
  
  static int __init raid5_init(void)
  {
+       int ret;
        raid5_wq = alloc_workqueue("raid5wq",
                WQ_UNBOUND|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE|WQ_SYSFS, 0);
        if (!raid5_wq)
                return -ENOMEM;
+       ret = cpuhp_setup_state_multi(CPUHP_MD_RAID5_PREPARE,
+                                     "md/raid5:prepare",
+                                     raid456_cpu_up_prepare,
+                                     raid456_cpu_dead);
+       if (ret) {
+               destroy_workqueue(raid5_wq);
+               return ret;
+       }
        register_md_personality(&raid6_personality);
        register_md_personality(&raid5_personality);
        register_md_personality(&raid4_personality);
@@@ -8000,6 -7941,7 +7973,7 @@@ static void raid5_exit(void
        unregister_md_personality(&raid6_personality);
        unregister_md_personality(&raid5_personality);
        unregister_md_personality(&raid4_personality);
+       cpuhp_remove_multi_state(CPUHP_MD_RAID5_PREPARE);
        destroy_workqueue(raid5_wq);
  }
  
diff --combined drivers/perf/arm_pmu.c
@@@ -534,24 -534,6 +534,24 @@@ static int armpmu_filter_match(struct p
        return cpumask_test_cpu(cpu, &armpmu->supported_cpus);
  }
  
 +static ssize_t armpmu_cpumask_show(struct device *dev,
 +                                 struct device_attribute *attr, char *buf)
 +{
 +      struct arm_pmu *armpmu = to_arm_pmu(dev_get_drvdata(dev));
 +      return cpumap_print_to_pagebuf(true, buf, &armpmu->supported_cpus);
 +}
 +
 +static DEVICE_ATTR(cpus, S_IRUGO, armpmu_cpumask_show, NULL);
 +
 +static struct attribute *armpmu_common_attrs[] = {
 +      &dev_attr_cpus.attr,
 +      NULL,
 +};
 +
 +static struct attribute_group armpmu_common_attr_group = {
 +      .attrs = armpmu_common_attrs,
 +};
 +
  static void armpmu_init(struct arm_pmu *armpmu)
  {
        atomic_set(&armpmu->active_events, 0);
                .stop           = armpmu_stop,
                .read           = armpmu_read,
                .filter_match   = armpmu_filter_match,
 +              .attr_groups    = armpmu->attr_groups,
        };
 +      armpmu->attr_groups[ARMPMU_ATTR_GROUP_COMMON] =
 +              &armpmu_common_attr_group;
  }
  
  /* Set at runtime when we know what CPU type we are. */
@@@ -623,7 -602,7 +623,7 @@@ static void cpu_pmu_free_irq(struct arm
        irqs = min(pmu_device->num_resources, num_possible_cpus());
  
        irq = platform_get_irq(pmu_device, 0);
 -      if (irq >= 0 && irq_is_percpu(irq)) {
 +      if (irq > 0 && irq_is_percpu(irq)) {
                on_each_cpu_mask(&cpu_pmu->supported_cpus,
                                 cpu_pmu_disable_percpu_irq, &irq, 1);
                free_percpu_irq(irq, &hw_events->percpu_pmu);
                        if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs))
                                continue;
                        irq = platform_get_irq(pmu_device, i);
 -                      if (irq >= 0)
 +                      if (irq > 0)
                                free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, cpu));
                }
        }
@@@ -659,7 -638,7 +659,7 @@@ static int cpu_pmu_request_irq(struct a
        }
  
        irq = platform_get_irq(pmu_device, 0);
 -      if (irq >= 0 && irq_is_percpu(irq)) {
 +      if (irq > 0 && irq_is_percpu(irq)) {
                err = request_percpu_irq(irq, handler, "arm-pmu",
                                         &hw_events->percpu_pmu);
                if (err) {
        return 0;
  }
  
- static DEFINE_SPINLOCK(arm_pmu_lock);
- static LIST_HEAD(arm_pmu_list);
  /*
   * PMU hardware loses all context when a CPU goes offline.
   * When a CPU is hotplugged back in, since some hardware registers are
   * UNKNOWN at reset, the PMU must be explicitly reset to avoid reading
   * junk values out of them.
   */
- static int arm_perf_starting_cpu(unsigned int cpu)
+ static int arm_perf_starting_cpu(unsigned int cpu, struct hlist_node *node)
  {
-       struct arm_pmu *pmu;
-       spin_lock(&arm_pmu_lock);
-       list_for_each_entry(pmu, &arm_pmu_list, entry) {
+       struct arm_pmu *pmu = hlist_entry_safe(node, struct arm_pmu, node);
  
-               if (!cpumask_test_cpu(cpu, &pmu->supported_cpus))
-                       continue;
-               if (pmu->reset)
-                       pmu->reset(pmu);
-       }
-       spin_unlock(&arm_pmu_lock);
+       if (!cpumask_test_cpu(cpu, &pmu->supported_cpus))
+               return 0;
+       if (pmu->reset)
+               pmu->reset(pmu);
        return 0;
  }
  
@@@ -842,9 -813,10 +834,10 @@@ static int cpu_pmu_init(struct arm_pmu 
        if (!cpu_hw_events)
                return -ENOMEM;
  
-       spin_lock(&arm_pmu_lock);
-       list_add_tail(&cpu_pmu->entry, &arm_pmu_list);
-       spin_unlock(&arm_pmu_lock);
+       err = cpuhp_state_add_instance_nocalls(CPUHP_AP_PERF_ARM_STARTING,
+                                              &cpu_pmu->node);
+       if (err)
+               goto out_free;
  
        err = cpu_pm_pmu_register(cpu_pmu);
        if (err)
        return 0;
  
  out_unregister:
-       spin_lock(&arm_pmu_lock);
-       list_del(&cpu_pmu->entry);
-       spin_unlock(&arm_pmu_lock);
+       cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_STARTING,
+                                           &cpu_pmu->node);
+ out_free:
        free_percpu(cpu_hw_events);
        return err;
  }
  static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu)
  {
        cpu_pm_pmu_unregister(cpu_pmu);
-       spin_lock(&arm_pmu_lock);
-       list_del(&cpu_pmu->entry);
-       spin_unlock(&arm_pmu_lock);
+       cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_STARTING,
+                                           &cpu_pmu->node);
        free_percpu(cpu_pmu->hw_events);
  }
  
@@@ -940,13 -911,12 +932,13 @@@ static int of_pmu_irq_cfg(struct arm_pm
  
                /* Check the IRQ type and prohibit a mix of PPIs and SPIs */
                irq = platform_get_irq(pdev, i);
 -              if (irq >= 0) {
 +              if (irq > 0) {
                        bool spi = !irq_is_percpu(irq);
  
                        if (i > 0 && spi != using_spi) {
                                pr_err("PPI/SPI IRQ type mismatch for %s!\n",
                                        dn->name);
 +                              of_node_put(dn);
                                kfree(irqs);
                                return -EINVAL;
                        }
        if (cpumask_weight(&pmu->supported_cpus) == 0) {
                int irq = platform_get_irq(pdev, 0);
  
 -              if (irq_is_percpu(irq)) {
 +              if (irq > 0 && irq_is_percpu(irq)) {
                        /* If using PPIs, check the affinity of the partition */
                        int ret;
  
@@@ -1050,7 -1020,7 +1042,7 @@@ int arm_pmu_device_probe(struct platfor
                ret = of_pmu_irq_cfg(pmu);
                if (!ret)
                        ret = init_fn(pmu);
 -      } else {
 +      } else if (probe_table) {
                cpumask_setall(&pmu->supported_cpus);
                ret = probe_current_pmu(pmu, probe_table);
        }
                goto out_free;
        }
  
 +
        ret = cpu_pmu_init(pmu);
        if (ret)
                goto out_free;
@@@ -1091,9 -1060,9 +1083,9 @@@ static int arm_pmu_hp_init(void
  {
        int ret;
  
-       ret = cpuhp_setup_state_nocalls(CPUHP_AP_PERF_ARM_STARTING,
-                                       "AP_PERF_ARM_STARTING",
-                                       arm_perf_starting_cpu, NULL);
+       ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_STARTING,
+                                     "AP_PERF_ARM_STARTING",
+                                     arm_perf_starting_cpu, NULL);
        if (ret)
                pr_err("CPU hotplug notifier for ARM PMU could not be registered: %d\n",
                       ret);
diff --combined include/linux/cpu.h
@@@ -61,17 -61,8 +61,8 @@@ struct notifier_block
  #define CPU_DOWN_PREPARE      0x0005 /* CPU (unsigned)v going down */
  #define CPU_DOWN_FAILED               0x0006 /* CPU (unsigned)v NOT going down */
  #define CPU_DEAD              0x0007 /* CPU (unsigned)v dead */
- #define CPU_DYING             0x0008 /* CPU (unsigned)v not running any task,
-                                       * not handling interrupts, soon dead.
-                                       * Called on the dying cpu, interrupts
-                                       * are already disabled. Must not
-                                       * sleep, must not fail */
  #define CPU_POST_DEAD         0x0009 /* CPU (unsigned)v dead, cpu_hotplug
                                        * lock is dropped */
- #define CPU_STARTING          0x000A /* CPU (unsigned)v soon running.
-                                       * Called on the new cpu, just before
-                                       * enabling interrupts. Must not sleep,
-                                       * must not fail */
  #define CPU_BROKEN            0x000B /* CPU (unsigned)v did not die properly,
                                        * perhaps due to preemption. */
  
@@@ -86,9 -77,6 +77,6 @@@
  #define CPU_DOWN_PREPARE_FROZEN       (CPU_DOWN_PREPARE | CPU_TASKS_FROZEN)
  #define CPU_DOWN_FAILED_FROZEN        (CPU_DOWN_FAILED | CPU_TASKS_FROZEN)
  #define CPU_DEAD_FROZEN               (CPU_DEAD | CPU_TASKS_FROZEN)
- #define CPU_DYING_FROZEN      (CPU_DYING | CPU_TASKS_FROZEN)
- #define CPU_STARTING_FROZEN   (CPU_STARTING | CPU_TASKS_FROZEN)
  
  #ifdef CONFIG_SMP
  extern bool cpuhp_tasks_frozen;
@@@ -228,11 -216,7 +216,11 @@@ static inline void cpu_hotplug_done(voi
  #endif                /* CONFIG_HOTPLUG_CPU */
  
  #ifdef CONFIG_PM_SLEEP_SMP
 -extern int disable_nonboot_cpus(void);
 +extern int freeze_secondary_cpus(int primary);
 +static inline int disable_nonboot_cpus(void)
 +{
 +      return freeze_secondary_cpus(0);
 +}
  extern void enable_nonboot_cpus(void);
  #else /* !CONFIG_PM_SLEEP_SMP */
  static inline int disable_nonboot_cpus(void) { return 0; }
@@@ -1,8 -1,6 +1,8 @@@
  #ifndef __CPUHOTPLUG_H
  #define __CPUHOTPLUG_H
  
 +#include <linux/types.h>
 +
  enum cpuhp_state {
        CPUHP_OFFLINE,
        CPUHP_CREATE_THREADS,
        CPUHP_PERF_SUPERH,
        CPUHP_X86_HPET_DEAD,
        CPUHP_X86_APB_DEAD,
+       CPUHP_VIRT_NET_DEAD,
+       CPUHP_SLUB_DEAD,
+       CPUHP_MM_WRITEBACK_DEAD,
+       CPUHP_SOFTIRQ_DEAD,
+       CPUHP_NET_MVNETA_DEAD,
+       CPUHP_CPUIDLE_DEAD,
+       CPUHP_ARM64_FPSIMD_DEAD,
+       CPUHP_ARM_OMAP_WAKE_DEAD,
+       CPUHP_IRQ_POLL_DEAD,
+       CPUHP_BLOCK_SOFTIRQ_DEAD,
+       CPUHP_VIRT_SCSI_DEAD,
+       CPUHP_ACPI_CPUDRV_DEAD,
+       CPUHP_S390_PFAULT_DEAD,
+       CPUHP_BLK_MQ_DEAD,
        CPUHP_WORKQUEUE_PREP,
        CPUHP_POWER_NUMA_PREPARE,
        CPUHP_HRTIMERS_PREPARE,
        CPUHP_PROFILE_PREPARE,
        CPUHP_X2APIC_PREPARE,
        CPUHP_SMPCFD_PREPARE,
+       CPUHP_RELAY_PREPARE,
+       CPUHP_SLAB_PREPARE,
+       CPUHP_MD_RAID5_PREPARE,
        CPUHP_RCUTREE_PREP,
+       CPUHP_CPUIDLE_COUPLED_PREPARE,
+       CPUHP_POWERPC_PMAC_PREPARE,
+       CPUHP_POWERPC_MMU_CTX_PREPARE,
        CPUHP_NOTIFY_PREPARE,
+       CPUHP_ARM_SHMOBILE_SCU_PREPARE,
+       CPUHP_SH_SH3X_PREPARE,
+       CPUHP_BLK_MQ_PREPARE,
        CPUHP_TIMERS_DEAD,
+       CPUHP_NOTF_ERR_INJ_PREPARE,
+       CPUHP_MIPS_SOC_PREPARE,
        CPUHP_BRINGUP_CPU,
        CPUHP_AP_IDLE_DEAD,
        CPUHP_AP_OFFLINE,
@@@ -47,8 -70,6 +72,8 @@@
        CPUHP_AP_PERF_METAG_STARTING,
        CPUHP_AP_MIPS_OP_LOONGSON3_STARTING,
        CPUHP_AP_ARM_VFP_STARTING,
 +      CPUHP_AP_ARM64_DEBUG_MONITORS_STARTING,
 +      CPUHP_AP_PERF_ARM_HW_BREAKPOINT_STARTING,
        CPUHP_AP_PERF_ARM_STARTING,
        CPUHP_AP_ARM_L2X0_STARTING,
        CPUHP_AP_ARM_ARCH_TIMER_STARTING,
@@@ -72,7 -93,6 +97,6 @@@
        CPUHP_AP_ARM64_ISNDEP_STARTING,
        CPUHP_AP_SMPCFD_DYING,
        CPUHP_AP_X86_TBOOT_DYING,
-       CPUHP_AP_NOTIFY_STARTING,
        CPUHP_AP_ONLINE,
        CPUHP_TEARDOWN_CPU,
        CPUHP_AP_ONLINE_IDLE,
  
  int __cpuhp_setup_state(enum cpuhp_state state,       const char *name, bool invoke,
                        int (*startup)(unsigned int cpu),
-                       int (*teardown)(unsigned int cpu));
+                       int (*teardown)(unsigned int cpu), bool multi_instance);
  
  /**
   * cpuhp_setup_state - Setup hotplug state callbacks with calling the callbacks
@@@ -120,7 -140,7 +144,7 @@@ static inline int cpuhp_setup_state(enu
                                    int (*startup)(unsigned int cpu),
                                    int (*teardown)(unsigned int cpu))
  {
-       return __cpuhp_setup_state(state, name, true, startup, teardown);
+       return __cpuhp_setup_state(state, name, true, startup, teardown, false);
  }
  
  /**
@@@ -139,7 -159,66 +163,66 @@@ static inline int cpuhp_setup_state_noc
                                            int (*startup)(unsigned int cpu),
                                            int (*teardown)(unsigned int cpu))
  {
-       return __cpuhp_setup_state(state, name, false, startup, teardown);
+       return __cpuhp_setup_state(state, name, false, startup, teardown,
+                                  false);
+ }
+ /**
+  * cpuhp_setup_state_multi - Add callbacks for multi state
+  * @state:    The state for which the calls are installed
+  * @name:     Name of the callback.
+  * @startup:  startup callback function
+  * @teardown: teardown callback function
+  *
+  * Sets the internal multi_instance flag and prepares a state to work as a multi
+  * instance callback. No callbacks are invoked at this point. The callbacks are
+  * invoked once an instance for this state are registered via
+  * @cpuhp_state_add_instance or @cpuhp_state_add_instance_nocalls.
+  */
+ static inline int cpuhp_setup_state_multi(enum cpuhp_state state,
+                                         const char *name,
+                                         int (*startup)(unsigned int cpu,
+                                                        struct hlist_node *node),
+                                         int (*teardown)(unsigned int cpu,
+                                                         struct hlist_node *node))
+ {
+       return __cpuhp_setup_state(state, name, false,
+                                  (void *) startup,
+                                  (void *) teardown, true);
+ }
+ int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
+                              bool invoke);
+ /**
+  * cpuhp_state_add_instance - Add an instance for a state and invoke startup
+  *                            callback.
+  * @state:    The state for which the instance is installed
+  * @node:     The node for this individual state.
+  *
+  * Installs the instance for the @state and invokes the startup callback on
+  * the present cpus which have already reached the @state. The @state must have
+  * been earlier marked as multi-instance by @cpuhp_setup_state_multi.
+  */
+ static inline int cpuhp_state_add_instance(enum cpuhp_state state,
+                                          struct hlist_node *node)
+ {
+       return __cpuhp_state_add_instance(state, node, true);
+ }
+ /**
+  * cpuhp_state_add_instance_nocalls - Add an instance for a state without
+  *                                    invoking the startup callback.
+  * @state:    The state for which the instance is installed
+  * @node:     The node for this individual state.
+  *
+  * Installs the instance for the @state The @state must have been earlier
+  * marked as multi-instance by @cpuhp_setup_state_multi.
+  */
+ static inline int cpuhp_state_add_instance_nocalls(enum cpuhp_state state,
+                                                  struct hlist_node *node)
+ {
+       return __cpuhp_state_add_instance(state, node, false);
  }
  
  void __cpuhp_remove_state(enum cpuhp_state state, bool invoke);
@@@ -166,6 -245,51 +249,51 @@@ static inline void cpuhp_remove_state_n
        __cpuhp_remove_state(state, false);
  }
  
+ /**
+  * cpuhp_remove_multi_state - Remove hotplug multi state callback
+  * @state:    The state for which the calls are removed
+  *
+  * Removes the callback functions from a multi state. This is the reverse of
+  * cpuhp_setup_state_multi(). All instances should have been removed before
+  * invoking this function.
+  */
+ static inline void cpuhp_remove_multi_state(enum cpuhp_state state)
+ {
+       __cpuhp_remove_state(state, false);
+ }
+ int __cpuhp_state_remove_instance(enum cpuhp_state state,
+                                 struct hlist_node *node, bool invoke);
+ /**
+  * cpuhp_state_remove_instance - Remove hotplug instance from state and invoke
+  *                               the teardown callback
+  * @state:    The state from which the instance is removed
+  * @node:     The node for this individual state.
+  *
+  * Removes the instance and invokes the teardown callback on the present cpus
+  * which have already reached the @state.
+  */
+ static inline int cpuhp_state_remove_instance(enum cpuhp_state state,
+                                             struct hlist_node *node)
+ {
+       return __cpuhp_state_remove_instance(state, node, true);
+ }
+ /**
+  * cpuhp_state_remove_instance_nocalls - Remove hotplug instance from state
+  *                                     without invoking the reatdown callback
+  * @state:    The state from which the instance is removed
+  * @node:     The node for this individual state.
+  *
+  * Removes the instance without invoking the teardown callback.
+  */
+ static inline int cpuhp_state_remove_instance_nocalls(enum cpuhp_state state,
+                                                     struct hlist_node *node)
+ {
+       return __cpuhp_state_remove_instance(state, node, false);
+ }
  #ifdef CONFIG_SMP
  void cpuhp_online_idle(enum cpuhp_state state);
  #else
@@@ -14,7 -14,7 +14,7 @@@
  
  #include <linux/interrupt.h>
  #include <linux/perf_event.h>
 -
 +#include <linux/sysfs.h>
  #include <asm/cputype.h>
  
  /*
@@@ -77,13 -77,6 +77,13 @@@ struct pmu_hw_events 
        struct arm_pmu          *percpu_pmu;
  };
  
 +enum armpmu_attr_groups {
 +      ARMPMU_ATTR_GROUP_COMMON,
 +      ARMPMU_ATTR_GROUP_EVENTS,
 +      ARMPMU_ATTR_GROUP_FORMATS,
 +      ARMPMU_NR_ATTR_GROUPS
 +};
 +
  struct arm_pmu {
        struct pmu      pmu;
        cpumask_t       active_irqs;
        DECLARE_BITMAP(pmceid_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS);
        struct platform_device  *plat_device;
        struct pmu_hw_events    __percpu *hw_events;
-       struct list_head        entry;
+       struct hlist_node       node;
        struct notifier_block   cpu_pm_nb;
 +      /* the attr_groups array must be NULL-terminated */
 +      const struct attribute_group *attr_groups[ARMPMU_NR_ATTR_GROUPS + 1];
  };
  
  #define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu))
@@@ -160,8 -151,6 +160,8 @@@ int arm_pmu_device_probe(struct platfor
                         const struct of_device_id *of_table,
                         const struct pmu_probe_info *probe_table);
  
 +#define ARMV8_PMU_PDEV_NAME "armv8-pmu"
 +
  #endif /* CONFIG_ARM_PMU */
  
  #endif /* __ARM_PMU_H__ */
diff --combined kernel/cpu.c
@@@ -23,6 -23,8 +23,8 @@@
  #include <linux/tick.h>
  #include <linux/irq.h>
  #include <linux/smpboot.h>
+ #include <linux/relay.h>
+ #include <linux/slab.h>
  
  #include <trace/events/power.h>
  #define CREATE_TRACE_POINTS
@@@ -37,8 -39,9 +39,9 @@@
   * @thread:   Pointer to the hotplug thread
   * @should_run:       Thread should execute
   * @rollback: Perform a rollback
-  * @cb_stat:  The state for a single callback (install/uninstall)
-  * @cb:               Single callback function (install/uninstall)
+  * @single:   Single callback invocation
+  * @bringup:  Single callback bringup or teardown selector
+  * @cb_state: The state for a single callback (install/uninstall)
   * @result:   Result of the operation
   * @done:     Signal completion to the issuer of the task
   */
@@@ -49,8 -52,10 +52,10 @@@ struct cpuhp_cpu_state 
        struct task_struct      *thread;
        bool                    should_run;
        bool                    rollback;
+       bool                    single;
+       bool                    bringup;
+       struct hlist_node       *node;
        enum cpuhp_state        cb_state;
-       int                     (*cb)(unsigned int cpu);
        int                     result;
        struct completion       done;
  #endif
@@@ -68,35 -73,103 +73,103 @@@ static DEFINE_PER_CPU(struct cpuhp_cpu_
   * @cant_stop:        Bringup/teardown can't be stopped at this step
   */
  struct cpuhp_step {
-       const char      *name;
-       int             (*startup)(unsigned int cpu);
-       int             (*teardown)(unsigned int cpu);
-       bool            skip_onerr;
-       bool            cant_stop;
+       const char              *name;
+       union {
+               int             (*single)(unsigned int cpu);
+               int             (*multi)(unsigned int cpu,
+                                        struct hlist_node *node);
+       } startup;
+       union {
+               int             (*single)(unsigned int cpu);
+               int             (*multi)(unsigned int cpu,
+                                        struct hlist_node *node);
+       } teardown;
+       struct hlist_head       list;
+       bool                    skip_onerr;
+       bool                    cant_stop;
+       bool                    multi_instance;
  };
  
  static DEFINE_MUTEX(cpuhp_state_mutex);
  static struct cpuhp_step cpuhp_bp_states[];
  static struct cpuhp_step cpuhp_ap_states[];
  
+ static bool cpuhp_is_ap_state(enum cpuhp_state state)
+ {
+       /*
+        * The extra check for CPUHP_TEARDOWN_CPU is only for documentation
+        * purposes as that state is handled explicitly in cpu_down.
+        */
+       return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU;
+ }
+ static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
+ {
+       struct cpuhp_step *sp;
+       sp = cpuhp_is_ap_state(state) ? cpuhp_ap_states : cpuhp_bp_states;
+       return sp + state;
+ }
  /**
   * cpuhp_invoke_callback _ Invoke the callbacks for a given state
   * @cpu:      The cpu for which the callback should be invoked
   * @step:     The step in the state machine
-  * @cb:               The callback function to invoke
+  * @bringup:  True if the bringup callback should be invoked
   *
-  * Called from cpu hotplug and from the state register machinery
+  * Called from cpu hotplug and from the state register machinery.
   */
- static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state step,
-                                int (*cb)(unsigned int))
+ static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
+                                bool bringup, struct hlist_node *node)
  {
        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
-       int ret = 0;
-       if (cb) {
-               trace_cpuhp_enter(cpu, st->target, step, cb);
+       struct cpuhp_step *step = cpuhp_get_step(state);
+       int (*cbm)(unsigned int cpu, struct hlist_node *node);
+       int (*cb)(unsigned int cpu);
+       int ret, cnt;
+       if (!step->multi_instance) {
+               cb = bringup ? step->startup.single : step->teardown.single;
+               if (!cb)
+                       return 0;
+               trace_cpuhp_enter(cpu, st->target, state, cb);
                ret = cb(cpu);
-               trace_cpuhp_exit(cpu, st->state, step, ret);
+               trace_cpuhp_exit(cpu, st->state, state, ret);
+               return ret;
+       }
+       cbm = bringup ? step->startup.multi : step->teardown.multi;
+       if (!cbm)
+               return 0;
+       /* Single invocation for instance add/remove */
+       if (node) {
+               trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
+               ret = cbm(cpu, node);
+               trace_cpuhp_exit(cpu, st->state, state, ret);
+               return ret;
+       }
+       /* State transition. Invoke on all instances */
+       cnt = 0;
+       hlist_for_each(node, &step->list) {
+               trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
+               ret = cbm(cpu, node);
+               trace_cpuhp_exit(cpu, st->state, state, ret);
+               if (ret)
+                       goto err;
+               cnt++;
+       }
+       return 0;
+ err:
+       /* Rollback the instances if one failed */
+       cbm = !bringup ? step->startup.multi : step->teardown.multi;
+       if (!cbm)
+               return ret;
+       hlist_for_each(node, &step->list) {
+               if (!cnt--)
+                       break;
+               cbm(cpu, node);
        }
        return ret;
  }
@@@ -260,10 -333,17 +333,17 @@@ void cpu_hotplug_disable(void
  }
  EXPORT_SYMBOL_GPL(cpu_hotplug_disable);
  
+ static void __cpu_hotplug_enable(void)
+ {
+       if (WARN_ONCE(!cpu_hotplug_disabled, "Unbalanced cpu hotplug enable\n"))
+               return;
+       cpu_hotplug_disabled--;
+ }
  void cpu_hotplug_enable(void)
  {
        cpu_maps_update_begin();
-       WARN_ON(--cpu_hotplug_disabled < 0);
+       __cpu_hotplug_enable();
        cpu_maps_update_done();
  }
  EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
@@@ -330,12 -410,6 +410,6 @@@ static int notify_online(unsigned int c
        return 0;
  }
  
- static int notify_starting(unsigned int cpu)
- {
-       cpu_notify(CPU_STARTING, cpu);
-       return 0;
- }
  static int bringup_wait_for_ap(unsigned int cpu)
  {
        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
@@@ -349,8 -423,16 +423,16 @@@ static int bringup_cpu(unsigned int cpu
        struct task_struct *idle = idle_thread_get(cpu);
        int ret;
  
+       /*
+        * Some architectures have to walk the irq descriptors to
+        * setup the vector space for the cpu which comes online.
+        * Prevent irq alloc/free across the bringup.
+        */
+       irq_lock_sparse();
        /* Arch-specific enabling code. */
        ret = __cpu_up(cpu, idle);
+       irq_unlock_sparse();
        if (ret) {
                cpu_notify(CPU_UP_CANCELED, cpu);
                return ret;
  /*
   * Hotplug state machine related functions
   */
- static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st,
-                         struct cpuhp_step *steps)
+ static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st)
  {
        for (st->state++; st->state < st->target; st->state++) {
-               struct cpuhp_step *step = steps + st->state;
+               struct cpuhp_step *step = cpuhp_get_step(st->state);
  
                if (!step->skip_onerr)
-                       cpuhp_invoke_callback(cpu, st->state, step->startup);
+                       cpuhp_invoke_callback(cpu, st->state, true, NULL);
        }
  }
  
  static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
-                               struct cpuhp_step *steps, enum cpuhp_state target)
+                               enum cpuhp_state target)
  {
        enum cpuhp_state prev_state = st->state;
        int ret = 0;
  
        for (; st->state > target; st->state--) {
-               struct cpuhp_step *step = steps + st->state;
-               ret = cpuhp_invoke_callback(cpu, st->state, step->teardown);
+               ret = cpuhp_invoke_callback(cpu, st->state, false, NULL);
                if (ret) {
                        st->target = prev_state;
-                       undo_cpu_down(cpu, st, steps);
+                       undo_cpu_down(cpu, st);
                        break;
                }
        }
        return ret;
  }
  
- static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st,
-                       struct cpuhp_step *steps)
+ static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st)
  {
        for (st->state--; st->state > st->target; st->state--) {
-               struct cpuhp_step *step = steps + st->state;
+               struct cpuhp_step *step = cpuhp_get_step(st->state);
  
                if (!step->skip_onerr)
-                       cpuhp_invoke_callback(cpu, st->state, step->teardown);
+                       cpuhp_invoke_callback(cpu, st->state, false, NULL);
        }
  }
  
  static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
-                             struct cpuhp_step *steps, enum cpuhp_state target)
+                             enum cpuhp_state target)
  {
        enum cpuhp_state prev_state = st->state;
        int ret = 0;
  
        while (st->state < target) {
-               struct cpuhp_step *step;
                st->state++;
-               step = steps + st->state;
-               ret = cpuhp_invoke_callback(cpu, st->state, step->startup);
+               ret = cpuhp_invoke_callback(cpu, st->state, true, NULL);
                if (ret) {
                        st->target = prev_state;
-                       undo_cpu_up(cpu, st, steps);
+                       undo_cpu_up(cpu, st);
                        break;
                }
        }
@@@ -447,13 -522,13 +522,13 @@@ static int cpuhp_ap_offline(unsigned in
  {
        enum cpuhp_state target = max((int)st->target, CPUHP_TEARDOWN_CPU);
  
-       return cpuhp_down_callbacks(cpu, st, cpuhp_ap_states, target);
+       return cpuhp_down_callbacks(cpu, st, target);
  }
  
  /* Execute the online startup callbacks. Used to be CPU_ONLINE */
  static int cpuhp_ap_online(unsigned int cpu, struct cpuhp_cpu_state *st)
  {
-       return cpuhp_up_callbacks(cpu, st, cpuhp_ap_states, st->target);
+       return cpuhp_up_callbacks(cpu, st, st->target);
  }
  
  /*
@@@ -476,18 -551,20 +551,20 @@@ static void cpuhp_thread_fun(unsigned i
        st->should_run = false;
  
        /* Single callback invocation for [un]install ? */
-       if (st->cb) {
+       if (st->single) {
                if (st->cb_state < CPUHP_AP_ONLINE) {
                        local_irq_disable();
-                       ret = cpuhp_invoke_callback(cpu, st->cb_state, st->cb);
+                       ret = cpuhp_invoke_callback(cpu, st->cb_state,
+                                                   st->bringup, st->node);
                        local_irq_enable();
                } else {
-                       ret = cpuhp_invoke_callback(cpu, st->cb_state, st->cb);
+                       ret = cpuhp_invoke_callback(cpu, st->cb_state,
+                                                   st->bringup, st->node);
                }
        } else if (st->rollback) {
                BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE);
  
-               undo_cpu_down(cpu, st, cpuhp_ap_states);
+               undo_cpu_down(cpu, st);
                /*
                 * This is a momentary workaround to keep the notifier users
                 * happy. Will go away once we got rid of the notifiers.
  }
  
  /* Invoke a single callback on a remote cpu */
- static int cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state,
-                                   int (*cb)(unsigned int))
+ static int
+ cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup,
+                        struct hlist_node *node)
  {
        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
  
         * we invoke the thread function directly.
         */
        if (!st->thread)
-               return cpuhp_invoke_callback(cpu, state, cb);
+               return cpuhp_invoke_callback(cpu, state, bringup, node);
  
        st->cb_state = state;
-       st->cb = cb;
+       st->single = true;
+       st->bringup = bringup;
+       st->node = node;
        /*
         * Make sure the above stores are visible before should_run becomes
         * true. Paired with the mb() above in cpuhp_thread_fun()
  static void __cpuhp_kick_ap_work(struct cpuhp_cpu_state *st)
  {
        st->result = 0;
-       st->cb = NULL;
+       st->single = false;
        /*
         * Make sure the above stores are visible before should_run becomes
         * true. Paired with the mb() above in cpuhp_thread_fun()
@@@ -674,12 -755,6 +755,6 @@@ static int notify_down_prepare(unsigne
        return err;
  }
  
- static int notify_dying(unsigned int cpu)
- {
-       cpu_notify(CPU_DYING, cpu);
-       return 0;
- }
  /* Take this CPU down. */
  static int take_cpu_down(void *_param)
  {
        if (err < 0)
                return err;
  
+       /*
+        * We get here while we are in CPUHP_TEARDOWN_CPU state and we must not
+        * do this step again.
+        */
+       WARN_ON(st->state != CPUHP_TEARDOWN_CPU);
+       st->state--;
        /* Invoke the former CPU_DYING callbacks */
-       for (; st->state > target; st->state--) {
-               struct cpuhp_step *step = cpuhp_ap_states + st->state;
+       for (; st->state > target; st->state--)
+               cpuhp_invoke_callback(cpu, st->state, false, NULL);
  
-               cpuhp_invoke_callback(cpu, st->state, step->teardown);
-       }
        /* Give up timekeeping duties */
        tick_handover_do_timer();
        /* Park the stopper thread */
@@@ -734,7 -813,7 +813,7 @@@ static int takedown_cpu(unsigned int cp
        BUG_ON(cpu_online(cpu));
  
        /*
-        * The migration_call() CPU_DYING callback will have removed all
+        * The CPUHP_AP_SCHED_MIGRATE_DYING callback will have removed all
         * runnable tasks from the cpu, there's only the idle task left now
         * that the migration thread is done doing the stop_machine thing.
         *
@@@ -787,7 -866,6 +866,6 @@@ void cpuhp_report_idle_dead(void
  #define notify_down_prepare   NULL
  #define takedown_cpu          NULL
  #define notify_dead           NULL
- #define notify_dying          NULL
  #endif
  
  #ifdef CONFIG_HOTPLUG_CPU
@@@ -836,7 -914,7 +914,7 @@@ static int __ref _cpu_down(unsigned in
         * The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need
         * to do the further cleanups.
         */
-       ret = cpuhp_down_callbacks(cpu, st, cpuhp_bp_states, target);
+       ret = cpuhp_down_callbacks(cpu, st, target);
        if (ret && st->state > CPUHP_TEARDOWN_CPU && st->state < prev_state) {
                st->target = prev_state;
                st->rollback = true;
@@@ -877,10 -955,9 +955,9 @@@ EXPORT_SYMBOL(cpu_down)
  #endif /*CONFIG_HOTPLUG_CPU*/
  
  /**
-  * notify_cpu_starting(cpu) - call the CPU_STARTING notifiers
+  * notify_cpu_starting(cpu) - Invoke the callbacks on the starting CPU
   * @cpu: cpu that just started
   *
-  * This function calls the cpu_chain notifiers with CPU_STARTING.
   * It must be called by the arch code on the new cpu, before the new cpu
   * enables interrupts and before the "boot" cpu returns from __cpu_up().
   */
@@@ -889,13 -966,9 +966,10 @@@ void notify_cpu_starting(unsigned int c
        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
        enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);
  
 +      rcu_cpu_starting(cpu);  /* Enables RCU usage on this CPU. */
        while (st->state < target) {
-               struct cpuhp_step *step;
                st->state++;
-               step = cpuhp_ap_states + st->state;
-               cpuhp_invoke_callback(cpu, st->state, step->startup);
+               cpuhp_invoke_callback(cpu, st->state, true, NULL);
        }
  }
  
@@@ -980,7 -1053,7 +1054,7 @@@ static int _cpu_up(unsigned int cpu, in
         * responsible for bringing it up to the target state.
         */
        target = min((int)target, CPUHP_BRINGUP_CPU);
-       ret = cpuhp_up_callbacks(cpu, st, cpuhp_bp_states, target);
+       ret = cpuhp_up_callbacks(cpu, st, target);
  out:
        cpu_hotplug_done();
        return ret;
@@@ -1025,13 -1098,12 +1099,13 @@@ EXPORT_SYMBOL_GPL(cpu_up)
  #ifdef CONFIG_PM_SLEEP_SMP
  static cpumask_var_t frozen_cpus;
  
 -int disable_nonboot_cpus(void)
 +int freeze_secondary_cpus(int primary)
  {
 -      int cpu, first_cpu, error = 0;
 +      int cpu, error = 0;
  
        cpu_maps_update_begin();
 -      first_cpu = cpumask_first(cpu_online_mask);
 +      if (!cpu_online(primary))
 +              primary = cpumask_first(cpu_online_mask);
        /*
         * We take down all of the non-boot CPUs in one shot to avoid races
         * with the userspace trying to use the CPU hotplug at the same time
  
        pr_info("Disabling non-boot CPUs ...\n");
        for_each_online_cpu(cpu) {
 -              if (cpu == first_cpu)
 +              if (cpu == primary)
                        continue;
                trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
                error = _cpu_down(cpu, 1, CPUHP_OFFLINE);
@@@ -1083,7 -1155,7 +1157,7 @@@ void enable_nonboot_cpus(void
  
        /* Allow everyone to use the CPU hotplug again */
        cpu_maps_update_begin();
-       WARN_ON(--cpu_hotplug_disabled < 0);
+       __cpu_hotplug_enable();
        if (cpumask_empty(frozen_cpus))
                goto out;
  
@@@ -1172,40 -1244,50 +1246,50 @@@ core_initcall(cpu_hotplug_pm_sync_init)
  static struct cpuhp_step cpuhp_bp_states[] = {
        [CPUHP_OFFLINE] = {
                .name                   = "offline",
-               .startup                = NULL,
-               .teardown               = NULL,
+               .startup.single         = NULL,
+               .teardown.single        = NULL,
        },
  #ifdef CONFIG_SMP
        [CPUHP_CREATE_THREADS]= {
-               .name                   = "threads:create",
-               .startup                = smpboot_create_threads,
-               .teardown               = NULL,
+               .name                   = "threads:prepare",
+               .startup.single         = smpboot_create_threads,
+               .teardown.single        = NULL,
                .cant_stop              = true,
        },
        [CPUHP_PERF_PREPARE] = {
-               .name = "perf prepare",
-               .startup = perf_event_init_cpu,
-               .teardown = perf_event_exit_cpu,
+               .name                   = "perf:prepare",
+               .startup.single         = perf_event_init_cpu,
+               .teardown.single        = perf_event_exit_cpu,
        },
        [CPUHP_WORKQUEUE_PREP] = {
-               .name = "workqueue prepare",
-               .startup = workqueue_prepare_cpu,
-               .teardown = NULL,
+               .name                   = "workqueue:prepare",
+               .startup.single         = workqueue_prepare_cpu,
+               .teardown.single        = NULL,
        },
        [CPUHP_HRTIMERS_PREPARE] = {
-               .name = "hrtimers prepare",
-               .startup = hrtimers_prepare_cpu,
-               .teardown = hrtimers_dead_cpu,
+               .name                   = "hrtimers:prepare",
+               .startup.single         = hrtimers_prepare_cpu,
+               .teardown.single        = hrtimers_dead_cpu,
        },
        [CPUHP_SMPCFD_PREPARE] = {
-               .name = "SMPCFD prepare",
-               .startup = smpcfd_prepare_cpu,
-               .teardown = smpcfd_dead_cpu,
+               .name                   = "smpcfd:prepare",
+               .startup.single         = smpcfd_prepare_cpu,
+               .teardown.single        = smpcfd_dead_cpu,
+       },
+       [CPUHP_RELAY_PREPARE] = {
+               .name                   = "relay:prepare",
+               .startup.single         = relay_prepare_cpu,
+               .teardown.single        = NULL,
+       },
+       [CPUHP_SLAB_PREPARE] = {
+               .name                   = "slab:prepare",
+               .startup.single         = slab_prepare_cpu,
+               .teardown.single        = slab_dead_cpu,
        },
        [CPUHP_RCUTREE_PREP] = {
-               .name = "RCU-tree prepare",
-               .startup = rcutree_prepare_cpu,
-               .teardown = rcutree_dead_cpu,
+               .name                   = "RCU/tree:prepare",
+               .startup.single         = rcutree_prepare_cpu,
+               .teardown.single        = rcutree_dead_cpu,
        },
        /*
         * Preparatory and dead notifiers. Will be replaced once the notifiers
         */
        [CPUHP_NOTIFY_PREPARE] = {
                .name                   = "notify:prepare",
-               .startup                = notify_prepare,
-               .teardown               = notify_dead,
+               .startup.single         = notify_prepare,
+               .teardown.single        = notify_dead,
                .skip_onerr             = true,
                .cant_stop              = true,
        },
         * otherwise a RCU stall occurs.
         */
        [CPUHP_TIMERS_DEAD] = {
-               .name = "timers dead",
-               .startup = NULL,
-               .teardown = timers_dead_cpu,
+               .name                   = "timers:dead",
+               .startup.single         = NULL,
+               .teardown.single        = timers_dead_cpu,
        },
        /* Kicks the plugged cpu into life */
        [CPUHP_BRINGUP_CPU] = {
                .name                   = "cpu:bringup",
-               .startup                = bringup_cpu,
-               .teardown               = NULL,
+               .startup.single         = bringup_cpu,
+               .teardown.single        = NULL,
                .cant_stop              = true,
        },
        [CPUHP_AP_SMPCFD_DYING] = {
-               .startup = NULL,
-               .teardown = smpcfd_dying_cpu,
+               .name                   = "smpcfd:dying",
+               .startup.single         = NULL,
+               .teardown.single        = smpcfd_dying_cpu,
        },
        /*
         * Handled on controll processor until the plugged processor manages
         */
        [CPUHP_TEARDOWN_CPU] = {
                .name                   = "cpu:teardown",
-               .startup                = NULL,
-               .teardown               = takedown_cpu,
+               .startup.single         = NULL,
+               .teardown.single        = takedown_cpu,
                .cant_stop              = true,
        },
  #else
@@@ -1272,24 -1355,13 +1357,13 @@@ static struct cpuhp_step cpuhp_ap_state
        /* First state is scheduler control. Interrupts are disabled */
        [CPUHP_AP_SCHED_STARTING] = {
                .name                   = "sched:starting",
-               .startup                = sched_cpu_starting,
-               .teardown               = sched_cpu_dying,
+               .startup.single         = sched_cpu_starting,
+               .teardown.single        = sched_cpu_dying,
        },
        [CPUHP_AP_RCUTREE_DYING] = {
-               .startup = NULL,
-               .teardown = rcutree_dying_cpu,
-       },
-       /*
-        * Low level startup/teardown notifiers. Run with interrupts
-        * disabled. Will be removed once the notifiers are converted to
-        * states.
-        */
-       [CPUHP_AP_NOTIFY_STARTING] = {
-               .name                   = "notify:starting",
-               .startup                = notify_starting,
-               .teardown               = notify_dying,
-               .skip_onerr             = true,
-               .cant_stop              = true,
+               .name                   = "RCU/tree:dying",
+               .startup.single         = NULL,
+               .teardown.single        = rcutree_dying_cpu,
        },
        /* Entry state on starting. Interrupts enabled from here on. Transient
         * state for synchronsization */
        },
        /* Handle smpboot threads park/unpark */
        [CPUHP_AP_SMPBOOT_THREADS] = {
-               .name                   = "smpboot:threads",
-               .startup                = smpboot_unpark_threads,
-               .teardown               = NULL,
+               .name                   = "smpboot/threads:online",
+               .startup.single         = smpboot_unpark_threads,
+               .teardown.single        = NULL,
        },
        [CPUHP_AP_PERF_ONLINE] = {
-               .name = "perf online",
-               .startup = perf_event_init_cpu,
-               .teardown = perf_event_exit_cpu,
+               .name                   = "perf:online",
+               .startup.single         = perf_event_init_cpu,
+               .teardown.single        = perf_event_exit_cpu,
        },
        [CPUHP_AP_WORKQUEUE_ONLINE] = {
-               .name = "workqueue online",
-               .startup = workqueue_online_cpu,
-               .teardown = workqueue_offline_cpu,
+               .name                   = "workqueue:online",
+               .startup.single         = workqueue_online_cpu,
+               .teardown.single        = workqueue_offline_cpu,
        },
        [CPUHP_AP_RCUTREE_ONLINE] = {
-               .name = "RCU-tree online",
-               .startup = rcutree_online_cpu,
-               .teardown = rcutree_offline_cpu,
+               .name                   = "RCU/tree:online",
+               .startup.single         = rcutree_online_cpu,
+               .teardown.single        = rcutree_offline_cpu,
        },
  
        /*
         */
        [CPUHP_AP_NOTIFY_ONLINE] = {
                .name                   = "notify:online",
-               .startup                = notify_online,
-               .teardown               = notify_down_prepare,
+               .startup.single         = notify_online,
+               .teardown.single        = notify_down_prepare,
                .skip_onerr             = true,
        },
  #endif
        /* Last state is scheduler control setting the cpu active */
        [CPUHP_AP_ACTIVE] = {
                .name                   = "sched:active",
-               .startup                = sched_cpu_activate,
-               .teardown               = sched_cpu_deactivate,
+               .startup.single         = sched_cpu_activate,
+               .teardown.single        = sched_cpu_deactivate,
        },
  #endif
  
        /* CPU is fully up and running. */
        [CPUHP_ONLINE] = {
                .name                   = "online",
-               .startup                = NULL,
-               .teardown               = NULL,
+               .startup.single         = NULL,
+               .teardown.single        = NULL,
        },
  };
  
@@@ -1358,54 -1430,42 +1432,42 @@@ static int cpuhp_cb_check(enum cpuhp_st
        return 0;
  }
  
- static bool cpuhp_is_ap_state(enum cpuhp_state state)
- {
-       /*
-        * The extra check for CPUHP_TEARDOWN_CPU is only for documentation
-        * purposes as that state is handled explicitely in cpu_down.
-        */
-       return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU;
- }
- static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
- {
-       struct cpuhp_step *sp;
-       sp = cpuhp_is_ap_state(state) ? cpuhp_ap_states : cpuhp_bp_states;
-       return sp + state;
- }
  static void cpuhp_store_callbacks(enum cpuhp_state state,
                                  const char *name,
                                  int (*startup)(unsigned int cpu),
-                                 int (*teardown)(unsigned int cpu))
+                                 int (*teardown)(unsigned int cpu),
+                                 bool multi_instance)
  {
        /* (Un)Install the callbacks for further cpu hotplug operations */
        struct cpuhp_step *sp;
  
        mutex_lock(&cpuhp_state_mutex);
        sp = cpuhp_get_step(state);
-       sp->startup = startup;
-       sp->teardown = teardown;
+       sp->startup.single = startup;
+       sp->teardown.single = teardown;
        sp->name = name;
+       sp->multi_instance = multi_instance;
+       INIT_HLIST_HEAD(&sp->list);
        mutex_unlock(&cpuhp_state_mutex);
  }
  
  static void *cpuhp_get_teardown_cb(enum cpuhp_state state)
  {
-       return cpuhp_get_step(state)->teardown;
+       return cpuhp_get_step(state)->teardown.single;
  }
  
  /*
   * Call the startup/teardown function for a step either on the AP or
   * on the current CPU.
   */
- static int cpuhp_issue_call(int cpu, enum cpuhp_state state,
-                           int (*cb)(unsigned int), bool bringup)
+ static int cpuhp_issue_call(int cpu, enum cpuhp_state state, bool bringup,
+                           struct hlist_node *node)
  {
+       struct cpuhp_step *sp = cpuhp_get_step(state);
        int ret;
  
-       if (!cb)
+       if ((bringup && !sp->startup.single) ||
+           (!bringup && !sp->teardown.single))
                return 0;
        /*
         * The non AP bound callbacks can fail on bringup. On teardown
         */
  #ifdef CONFIG_SMP
        if (cpuhp_is_ap_state(state))
-               ret = cpuhp_invoke_ap_callback(cpu, state, cb);
+               ret = cpuhp_invoke_ap_callback(cpu, state, bringup, node);
        else
-               ret = cpuhp_invoke_callback(cpu, state, cb);
+               ret = cpuhp_invoke_callback(cpu, state, bringup, node);
  #else
-       ret = cpuhp_invoke_callback(cpu, state, cb);
+       ret = cpuhp_invoke_callback(cpu, state, bringup, node);
  #endif
        BUG_ON(ret && !bringup);
        return ret;
   * Note: The teardown callbacks for rollback are not allowed to fail!
   */
  static void cpuhp_rollback_install(int failedcpu, enum cpuhp_state state,
-                                  int (*teardown)(unsigned int cpu))
+                                  struct hlist_node *node)
  {
        int cpu;
  
-       if (!teardown)
-               return;
        /* Roll back the already executed steps on the other cpus */
        for_each_present_cpu(cpu) {
                struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
  
                /* Did we invoke the startup call on that cpu ? */
                if (cpustate >= state)
-                       cpuhp_issue_call(cpu, state, teardown, false);
+                       cpuhp_issue_call(cpu, state, false, node);
        }
  }
  
@@@ -1473,6 -1530,52 +1532,52 @@@ static int cpuhp_reserve_state(enum cpu
        return -ENOSPC;
  }
  
+ int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
+                              bool invoke)
+ {
+       struct cpuhp_step *sp;
+       int cpu;
+       int ret;
+       sp = cpuhp_get_step(state);
+       if (sp->multi_instance == false)
+               return -EINVAL;
+       get_online_cpus();
+       if (!invoke || !sp->startup.multi)
+               goto add_node;
+       /*
+        * Try to call the startup callback for each present cpu
+        * depending on the hotplug state of the cpu.
+        */
+       for_each_present_cpu(cpu) {
+               struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+               int cpustate = st->state;
+               if (cpustate < state)
+                       continue;
+               ret = cpuhp_issue_call(cpu, state, true, node);
+               if (ret) {
+                       if (sp->teardown.multi)
+                               cpuhp_rollback_install(cpu, state, node);
+                       goto err;
+               }
+       }
+ add_node:
+       ret = 0;
+       mutex_lock(&cpuhp_state_mutex);
+       hlist_add_head(node, &sp->list);
+       mutex_unlock(&cpuhp_state_mutex);
+ err:
+       put_online_cpus();
+       return ret;
+ }
+ EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);
  /**
   * __cpuhp_setup_state - Setup the callbacks for an hotplug machine state
   * @state:    The state to setup
  int __cpuhp_setup_state(enum cpuhp_state state,
                        const char *name, bool invoke,
                        int (*startup)(unsigned int cpu),
-                       int (*teardown)(unsigned int cpu))
+                       int (*teardown)(unsigned int cpu),
+                       bool multi_instance)
  {
        int cpu, ret = 0;
        int dyn_state = 0;
                state = ret;
        }
  
-       cpuhp_store_callbacks(state, name, startup, teardown);
+       cpuhp_store_callbacks(state, name, startup, teardown, multi_instance);
  
        if (!invoke || !startup)
                goto out;
                if (cpustate < state)
                        continue;
  
-               ret = cpuhp_issue_call(cpu, state, startup, true);
+               ret = cpuhp_issue_call(cpu, state, true, NULL);
                if (ret) {
-                       cpuhp_rollback_install(cpu, state, teardown);
-                       cpuhp_store_callbacks(state, NULL, NULL, NULL);
+                       if (teardown)
+                               cpuhp_rollback_install(cpu, state, NULL);
+                       cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
                        goto out;
                }
        }
  }
  EXPORT_SYMBOL(__cpuhp_setup_state);
  
+ int __cpuhp_state_remove_instance(enum cpuhp_state state,
+                                 struct hlist_node *node, bool invoke)
+ {
+       struct cpuhp_step *sp = cpuhp_get_step(state);
+       int cpu;
+       BUG_ON(cpuhp_cb_check(state));
+       if (!sp->multi_instance)
+               return -EINVAL;
+       get_online_cpus();
+       if (!invoke || !cpuhp_get_teardown_cb(state))
+               goto remove;
+       /*
+        * Call the teardown callback for each present cpu depending
+        * on the hotplug state of the cpu. This function is not
+        * allowed to fail currently!
+        */
+       for_each_present_cpu(cpu) {
+               struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+               int cpustate = st->state;
+               if (cpustate >= state)
+                       cpuhp_issue_call(cpu, state, false, node);
+       }
+ remove:
+       mutex_lock(&cpuhp_state_mutex);
+       hlist_del(node);
+       mutex_unlock(&cpuhp_state_mutex);
+       put_online_cpus();
+       return 0;
+ }
+ EXPORT_SYMBOL_GPL(__cpuhp_state_remove_instance);
  /**
   * __cpuhp_remove_state - Remove the callbacks for an hotplug machine state
   * @state:    The state to remove
   */
  void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
  {
-       int (*teardown)(unsigned int cpu) = cpuhp_get_teardown_cb(state);
+       struct cpuhp_step *sp = cpuhp_get_step(state);
        int cpu;
  
        BUG_ON(cpuhp_cb_check(state));
  
        get_online_cpus();
  
-       if (!invoke || !teardown)
+       if (sp->multi_instance) {
+               WARN(!hlist_empty(&sp->list),
+                    "Error: Removing state %d which has instances left.\n",
+                    state);
+               goto remove;
+       }
+       if (!invoke || !cpuhp_get_teardown_cb(state))
                goto remove;
  
        /*
                int cpustate = st->state;
  
                if (cpustate >= state)
-                       cpuhp_issue_call(cpu, state, teardown, false);
+                       cpuhp_issue_call(cpu, state, false, NULL);
        }
  remove:
-       cpuhp_store_callbacks(state, NULL, NULL, NULL);
+       cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
        put_online_cpus();
  }
  EXPORT_SYMBOL(__cpuhp_remove_state);
diff --combined kernel/softirq.c
@@@ -77,17 -77,6 +77,17 @@@ static void wakeup_softirqd(void
                wake_up_process(tsk);
  }
  
 +/*
 + * If ksoftirqd is scheduled, we do not want to process pending softirqs
 + * right now. Let ksoftirqd handle this at its own rate, to get fairness.
 + */
 +static bool ksoftirqd_running(void)
 +{
 +      struct task_struct *tsk = __this_cpu_read(ksoftirqd);
 +
 +      return tsk && (tsk->state == TASK_RUNNING);
 +}
 +
  /*
   * preempt_count and SOFTIRQ_OFFSET usage:
   * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving
@@@ -324,7 -313,7 +324,7 @@@ asmlinkage __visible void do_softirq(vo
  
        pending = local_softirq_pending();
  
 -      if (pending)
 +      if (pending && !ksoftirqd_running())
                do_softirq_own_stack();
  
        local_irq_restore(flags);
@@@ -351,9 -340,6 +351,9 @@@ void irq_enter(void
  
  static inline void invoke_softirq(void)
  {
 +      if (ksoftirqd_running())
 +              return;
 +
        if (!force_irqthreads) {
  #ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK
                /*
@@@ -714,7 -700,7 +714,7 @@@ void tasklet_kill_immediate(struct task
        BUG();
  }
  
- static void takeover_tasklets(unsigned int cpu)
+ static int takeover_tasklets(unsigned int cpu)
  {
        /* CPU is dead, so no lock needed. */
        local_irq_disable();
        raise_softirq_irqoff(HI_SOFTIRQ);
  
        local_irq_enable();
+       return 0;
  }
+ #else
+ #define takeover_tasklets     NULL
  #endif /* CONFIG_HOTPLUG_CPU */
  
- static int cpu_callback(struct notifier_block *nfb, unsigned long action,
-                       void *hcpu)
- {
-       switch (action) {
- #ifdef CONFIG_HOTPLUG_CPU
-       case CPU_DEAD:
-       case CPU_DEAD_FROZEN:
-               takeover_tasklets((unsigned long)hcpu);
-               break;
- #endif /* CONFIG_HOTPLUG_CPU */
-       }
-       return NOTIFY_OK;
- }
- static struct notifier_block cpu_nfb = {
-       .notifier_call = cpu_callback
- };
  static struct smp_hotplug_thread softirq_threads = {
        .store                  = &ksoftirqd,
        .thread_should_run      = ksoftirqd_should_run,
  
  static __init int spawn_ksoftirqd(void)
  {
-       register_cpu_notifier(&cpu_nfb);
+       cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL,
+                                 takeover_tasklets);
        BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
  
        return 0;