cpu/hotplug: Add multi instance support
[cascardo/linux.git] / kernel / cpu.c
1 /* CPU control.
2  * (C) 2001, 2002, 2003, 2004 Rusty Russell
3  *
4  * This code is licenced under the GPL.
5  */
6 #include <linux/proc_fs.h>
7 #include <linux/smp.h>
8 #include <linux/init.h>
9 #include <linux/notifier.h>
10 #include <linux/sched.h>
11 #include <linux/unistd.h>
12 #include <linux/cpu.h>
13 #include <linux/oom.h>
14 #include <linux/rcupdate.h>
15 #include <linux/export.h>
16 #include <linux/bug.h>
17 #include <linux/kthread.h>
18 #include <linux/stop_machine.h>
19 #include <linux/mutex.h>
20 #include <linux/gfp.h>
21 #include <linux/suspend.h>
22 #include <linux/lockdep.h>
23 #include <linux/tick.h>
24 #include <linux/irq.h>
25 #include <linux/smpboot.h>
26
27 #include <trace/events/power.h>
28 #define CREATE_TRACE_POINTS
29 #include <trace/events/cpuhp.h>
30
31 #include "smpboot.h"
32
33 /**
34  * cpuhp_cpu_state - Per cpu hotplug state storage
35  * @state:      The current cpu state
36  * @target:     The target state
37  * @thread:     Pointer to the hotplug thread
38  * @should_run: Thread should execute
39  * @rollback:   Perform a rollback
40  * @single:     Single callback invocation
41  * @bringup:    Single callback bringup or teardown selector
42  * @cb_state:   The state for a single callback (install/uninstall)
43  * @result:     Result of the operation
44  * @done:       Signal completion to the issuer of the task
45  */
46 struct cpuhp_cpu_state {
47         enum cpuhp_state        state;
48         enum cpuhp_state        target;
49 #ifdef CONFIG_SMP
50         struct task_struct      *thread;
51         bool                    should_run;
52         bool                    rollback;
53         bool                    single;
54         bool                    bringup;
55         struct hlist_node       *node;
56         enum cpuhp_state        cb_state;
57         int                     result;
58         struct completion       done;
59 #endif
60 };
61
62 static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state);
63
64 /**
65  * cpuhp_step - Hotplug state machine step
66  * @name:       Name of the step
67  * @startup:    Startup function of the step
68  * @teardown:   Teardown function of the step
69  * @skip_onerr: Do not invoke the functions on error rollback
70  *              Will go away once the notifiers are gone
71  * @cant_stop:  Bringup/teardown can't be stopped at this step
72  */
73 struct cpuhp_step {
74         const char              *name;
75         union {
76                 int             (*startup)(unsigned int cpu);
77                 int             (*startup_multi)(unsigned int cpu,
78                                                  struct hlist_node *node);
79         };
80         union {
81                 int             (*teardown)(unsigned int cpu);
82                 int             (*teardown_multi)(unsigned int cpu,
83                                                   struct hlist_node *node);
84         };
85         struct hlist_head       list;
86         bool                    skip_onerr;
87         bool                    cant_stop;
88         bool                    multi_instance;
89 };
90
91 static DEFINE_MUTEX(cpuhp_state_mutex);
92 static struct cpuhp_step cpuhp_bp_states[];
93 static struct cpuhp_step cpuhp_ap_states[];
94
95 static bool cpuhp_is_ap_state(enum cpuhp_state state)
96 {
97         /*
98          * The extra check for CPUHP_TEARDOWN_CPU is only for documentation
99          * purposes as that state is handled explicitly in cpu_down.
100          */
101         return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU;
102 }
103
104 static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
105 {
106         struct cpuhp_step *sp;
107
108         sp = cpuhp_is_ap_state(state) ? cpuhp_ap_states : cpuhp_bp_states;
109         return sp + state;
110 }
111
112 /**
113  * cpuhp_invoke_callback _ Invoke the callbacks for a given state
114  * @cpu:        The cpu for which the callback should be invoked
115  * @step:       The step in the state machine
116  * @bringup:    True if the bringup callback should be invoked
117  *
118  * Called from cpu hotplug and from the state register machinery.
119  */
120 static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
121                                  bool bringup, struct hlist_node *node)
122 {
123         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
124         struct cpuhp_step *step = cpuhp_get_step(state);
125         int (*cbm)(unsigned int cpu, struct hlist_node *node);
126         int (*cb)(unsigned int cpu);
127         int ret, cnt;
128
129         if (!step->multi_instance) {
130                 cb = bringup ? step->startup : step->teardown;
131                 if (!cb)
132                         return 0;
133                 trace_cpuhp_enter(cpu, st->target, state, cb);
134                 ret = cb(cpu);
135                 trace_cpuhp_exit(cpu, st->state, state, ret);
136                 return ret;
137         }
138         cbm = bringup ? step->startup_multi : step->teardown_multi;
139         if (!cbm)
140                 return 0;
141
142         /* Single invocation for instance add/remove */
143         if (node) {
144                 trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
145                 ret = cbm(cpu, node);
146                 trace_cpuhp_exit(cpu, st->state, state, ret);
147                 return ret;
148         }
149
150         /* State transition. Invoke on all instances */
151         cnt = 0;
152         hlist_for_each(node, &step->list) {
153                 trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
154                 ret = cbm(cpu, node);
155                 trace_cpuhp_exit(cpu, st->state, state, ret);
156                 if (ret)
157                         goto err;
158                 cnt++;
159         }
160         return 0;
161 err:
162         /* Rollback the instances if one failed */
163         cbm = !bringup ? step->startup_multi : step->teardown_multi;
164         if (!cbm)
165                 return ret;
166
167         hlist_for_each(node, &step->list) {
168                 if (!cnt--)
169                         break;
170                 cbm(cpu, node);
171         }
172         return ret;
173 }
174
175 #ifdef CONFIG_SMP
176 /* Serializes the updates to cpu_online_mask, cpu_present_mask */
177 static DEFINE_MUTEX(cpu_add_remove_lock);
178 bool cpuhp_tasks_frozen;
179 EXPORT_SYMBOL_GPL(cpuhp_tasks_frozen);
180
181 /*
182  * The following two APIs (cpu_maps_update_begin/done) must be used when
183  * attempting to serialize the updates to cpu_online_mask & cpu_present_mask.
184  * The APIs cpu_notifier_register_begin/done() must be used to protect CPU
185  * hotplug callback (un)registration performed using __register_cpu_notifier()
186  * or __unregister_cpu_notifier().
187  */
188 void cpu_maps_update_begin(void)
189 {
190         mutex_lock(&cpu_add_remove_lock);
191 }
192 EXPORT_SYMBOL(cpu_notifier_register_begin);
193
194 void cpu_maps_update_done(void)
195 {
196         mutex_unlock(&cpu_add_remove_lock);
197 }
198 EXPORT_SYMBOL(cpu_notifier_register_done);
199
200 static RAW_NOTIFIER_HEAD(cpu_chain);
201
202 /* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
203  * Should always be manipulated under cpu_add_remove_lock
204  */
205 static int cpu_hotplug_disabled;
206
207 #ifdef CONFIG_HOTPLUG_CPU
208
209 static struct {
210         struct task_struct *active_writer;
211         /* wait queue to wake up the active_writer */
212         wait_queue_head_t wq;
213         /* verifies that no writer will get active while readers are active */
214         struct mutex lock;
215         /*
216          * Also blocks the new readers during
217          * an ongoing cpu hotplug operation.
218          */
219         atomic_t refcount;
220
221 #ifdef CONFIG_DEBUG_LOCK_ALLOC
222         struct lockdep_map dep_map;
223 #endif
224 } cpu_hotplug = {
225         .active_writer = NULL,
226         .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
227         .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
228 #ifdef CONFIG_DEBUG_LOCK_ALLOC
229         .dep_map = {.name = "cpu_hotplug.lock" },
230 #endif
231 };
232
233 /* Lockdep annotations for get/put_online_cpus() and cpu_hotplug_begin/end() */
234 #define cpuhp_lock_acquire_read() lock_map_acquire_read(&cpu_hotplug.dep_map)
235 #define cpuhp_lock_acquire_tryread() \
236                                   lock_map_acquire_tryread(&cpu_hotplug.dep_map)
237 #define cpuhp_lock_acquire()      lock_map_acquire(&cpu_hotplug.dep_map)
238 #define cpuhp_lock_release()      lock_map_release(&cpu_hotplug.dep_map)
239
240
241 void get_online_cpus(void)
242 {
243         might_sleep();
244         if (cpu_hotplug.active_writer == current)
245                 return;
246         cpuhp_lock_acquire_read();
247         mutex_lock(&cpu_hotplug.lock);
248         atomic_inc(&cpu_hotplug.refcount);
249         mutex_unlock(&cpu_hotplug.lock);
250 }
251 EXPORT_SYMBOL_GPL(get_online_cpus);
252
253 void put_online_cpus(void)
254 {
255         int refcount;
256
257         if (cpu_hotplug.active_writer == current)
258                 return;
259
260         refcount = atomic_dec_return(&cpu_hotplug.refcount);
261         if (WARN_ON(refcount < 0)) /* try to fix things up */
262                 atomic_inc(&cpu_hotplug.refcount);
263
264         if (refcount <= 0 && waitqueue_active(&cpu_hotplug.wq))
265                 wake_up(&cpu_hotplug.wq);
266
267         cpuhp_lock_release();
268
269 }
270 EXPORT_SYMBOL_GPL(put_online_cpus);
271
272 /*
273  * This ensures that the hotplug operation can begin only when the
274  * refcount goes to zero.
275  *
276  * Note that during a cpu-hotplug operation, the new readers, if any,
277  * will be blocked by the cpu_hotplug.lock
278  *
279  * Since cpu_hotplug_begin() is always called after invoking
280  * cpu_maps_update_begin(), we can be sure that only one writer is active.
281  *
282  * Note that theoretically, there is a possibility of a livelock:
283  * - Refcount goes to zero, last reader wakes up the sleeping
284  *   writer.
285  * - Last reader unlocks the cpu_hotplug.lock.
286  * - A new reader arrives at this moment, bumps up the refcount.
287  * - The writer acquires the cpu_hotplug.lock finds the refcount
288  *   non zero and goes to sleep again.
289  *
290  * However, this is very difficult to achieve in practice since
291  * get_online_cpus() not an api which is called all that often.
292  *
293  */
294 void cpu_hotplug_begin(void)
295 {
296         DEFINE_WAIT(wait);
297
298         cpu_hotplug.active_writer = current;
299         cpuhp_lock_acquire();
300
301         for (;;) {
302                 mutex_lock(&cpu_hotplug.lock);
303                 prepare_to_wait(&cpu_hotplug.wq, &wait, TASK_UNINTERRUPTIBLE);
304                 if (likely(!atomic_read(&cpu_hotplug.refcount)))
305                                 break;
306                 mutex_unlock(&cpu_hotplug.lock);
307                 schedule();
308         }
309         finish_wait(&cpu_hotplug.wq, &wait);
310 }
311
312 void cpu_hotplug_done(void)
313 {
314         cpu_hotplug.active_writer = NULL;
315         mutex_unlock(&cpu_hotplug.lock);
316         cpuhp_lock_release();
317 }
318
319 /*
320  * Wait for currently running CPU hotplug operations to complete (if any) and
321  * disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects
322  * the 'cpu_hotplug_disabled' flag. The same lock is also acquired by the
323  * hotplug path before performing hotplug operations. So acquiring that lock
324  * guarantees mutual exclusion from any currently running hotplug operations.
325  */
326 void cpu_hotplug_disable(void)
327 {
328         cpu_maps_update_begin();
329         cpu_hotplug_disabled++;
330         cpu_maps_update_done();
331 }
332 EXPORT_SYMBOL_GPL(cpu_hotplug_disable);
333
334 void cpu_hotplug_enable(void)
335 {
336         cpu_maps_update_begin();
337         WARN_ON(--cpu_hotplug_disabled < 0);
338         cpu_maps_update_done();
339 }
340 EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
341 #endif  /* CONFIG_HOTPLUG_CPU */
342
343 /* Need to know about CPUs going up/down? */
344 int register_cpu_notifier(struct notifier_block *nb)
345 {
346         int ret;
347         cpu_maps_update_begin();
348         ret = raw_notifier_chain_register(&cpu_chain, nb);
349         cpu_maps_update_done();
350         return ret;
351 }
352
353 int __register_cpu_notifier(struct notifier_block *nb)
354 {
355         return raw_notifier_chain_register(&cpu_chain, nb);
356 }
357
358 static int __cpu_notify(unsigned long val, unsigned int cpu, int nr_to_call,
359                         int *nr_calls)
360 {
361         unsigned long mod = cpuhp_tasks_frozen ? CPU_TASKS_FROZEN : 0;
362         void *hcpu = (void *)(long)cpu;
363
364         int ret;
365
366         ret = __raw_notifier_call_chain(&cpu_chain, val | mod, hcpu, nr_to_call,
367                                         nr_calls);
368
369         return notifier_to_errno(ret);
370 }
371
372 static int cpu_notify(unsigned long val, unsigned int cpu)
373 {
374         return __cpu_notify(val, cpu, -1, NULL);
375 }
376
377 static void cpu_notify_nofail(unsigned long val, unsigned int cpu)
378 {
379         BUG_ON(cpu_notify(val, cpu));
380 }
381
382 /* Notifier wrappers for transitioning to state machine */
383 static int notify_prepare(unsigned int cpu)
384 {
385         int nr_calls = 0;
386         int ret;
387
388         ret = __cpu_notify(CPU_UP_PREPARE, cpu, -1, &nr_calls);
389         if (ret) {
390                 nr_calls--;
391                 printk(KERN_WARNING "%s: attempt to bring up CPU %u failed\n",
392                                 __func__, cpu);
393                 __cpu_notify(CPU_UP_CANCELED, cpu, nr_calls, NULL);
394         }
395         return ret;
396 }
397
398 static int notify_online(unsigned int cpu)
399 {
400         cpu_notify(CPU_ONLINE, cpu);
401         return 0;
402 }
403
404 static int notify_starting(unsigned int cpu)
405 {
406         cpu_notify(CPU_STARTING, cpu);
407         return 0;
408 }
409
410 static int bringup_wait_for_ap(unsigned int cpu)
411 {
412         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
413
414         wait_for_completion(&st->done);
415         return st->result;
416 }
417
418 static int bringup_cpu(unsigned int cpu)
419 {
420         struct task_struct *idle = idle_thread_get(cpu);
421         int ret;
422
423         /*
424          * Some architectures have to walk the irq descriptors to
425          * setup the vector space for the cpu which comes online.
426          * Prevent irq alloc/free across the bringup.
427          */
428         irq_lock_sparse();
429
430         /* Arch-specific enabling code. */
431         ret = __cpu_up(cpu, idle);
432         irq_unlock_sparse();
433         if (ret) {
434                 cpu_notify(CPU_UP_CANCELED, cpu);
435                 return ret;
436         }
437         ret = bringup_wait_for_ap(cpu);
438         BUG_ON(!cpu_online(cpu));
439         return ret;
440 }
441
442 /*
443  * Hotplug state machine related functions
444  */
445 static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st)
446 {
447         for (st->state++; st->state < st->target; st->state++) {
448                 struct cpuhp_step *step = cpuhp_get_step(st->state);
449
450                 if (!step->skip_onerr)
451                         cpuhp_invoke_callback(cpu, st->state, true, NULL);
452         }
453 }
454
455 static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
456                                 enum cpuhp_state target)
457 {
458         enum cpuhp_state prev_state = st->state;
459         int ret = 0;
460
461         for (; st->state > target; st->state--) {
462                 ret = cpuhp_invoke_callback(cpu, st->state, false, NULL);
463                 if (ret) {
464                         st->target = prev_state;
465                         undo_cpu_down(cpu, st);
466                         break;
467                 }
468         }
469         return ret;
470 }
471
472 static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st)
473 {
474         for (st->state--; st->state > st->target; st->state--) {
475                 struct cpuhp_step *step = cpuhp_get_step(st->state);
476
477                 if (!step->skip_onerr)
478                         cpuhp_invoke_callback(cpu, st->state, false, NULL);
479         }
480 }
481
482 static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
483                               enum cpuhp_state target)
484 {
485         enum cpuhp_state prev_state = st->state;
486         int ret = 0;
487
488         while (st->state < target) {
489                 st->state++;
490                 ret = cpuhp_invoke_callback(cpu, st->state, true, NULL);
491                 if (ret) {
492                         st->target = prev_state;
493                         undo_cpu_up(cpu, st);
494                         break;
495                 }
496         }
497         return ret;
498 }
499
500 /*
501  * The cpu hotplug threads manage the bringup and teardown of the cpus
502  */
503 static void cpuhp_create(unsigned int cpu)
504 {
505         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
506
507         init_completion(&st->done);
508 }
509
510 static int cpuhp_should_run(unsigned int cpu)
511 {
512         struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
513
514         return st->should_run;
515 }
516
517 /* Execute the teardown callbacks. Used to be CPU_DOWN_PREPARE */
518 static int cpuhp_ap_offline(unsigned int cpu, struct cpuhp_cpu_state *st)
519 {
520         enum cpuhp_state target = max((int)st->target, CPUHP_TEARDOWN_CPU);
521
522         return cpuhp_down_callbacks(cpu, st, target);
523 }
524
525 /* Execute the online startup callbacks. Used to be CPU_ONLINE */
526 static int cpuhp_ap_online(unsigned int cpu, struct cpuhp_cpu_state *st)
527 {
528         return cpuhp_up_callbacks(cpu, st, st->target);
529 }
530
531 /*
532  * Execute teardown/startup callbacks on the plugged cpu. Also used to invoke
533  * callbacks when a state gets [un]installed at runtime.
534  */
535 static void cpuhp_thread_fun(unsigned int cpu)
536 {
537         struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
538         int ret = 0;
539
540         /*
541          * Paired with the mb() in cpuhp_kick_ap_work and
542          * cpuhp_invoke_ap_callback, so the work set is consistent visible.
543          */
544         smp_mb();
545         if (!st->should_run)
546                 return;
547
548         st->should_run = false;
549
550         /* Single callback invocation for [un]install ? */
551         if (st->single) {
552                 if (st->cb_state < CPUHP_AP_ONLINE) {
553                         local_irq_disable();
554                         ret = cpuhp_invoke_callback(cpu, st->cb_state,
555                                                     st->bringup, st->node);
556                         local_irq_enable();
557                 } else {
558                         ret = cpuhp_invoke_callback(cpu, st->cb_state,
559                                                     st->bringup, st->node);
560                 }
561         } else if (st->rollback) {
562                 BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE);
563
564                 undo_cpu_down(cpu, st);
565                 /*
566                  * This is a momentary workaround to keep the notifier users
567                  * happy. Will go away once we got rid of the notifiers.
568                  */
569                 cpu_notify_nofail(CPU_DOWN_FAILED, cpu);
570                 st->rollback = false;
571         } else {
572                 /* Cannot happen .... */
573                 BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE);
574
575                 /* Regular hotplug work */
576                 if (st->state < st->target)
577                         ret = cpuhp_ap_online(cpu, st);
578                 else if (st->state > st->target)
579                         ret = cpuhp_ap_offline(cpu, st);
580         }
581         st->result = ret;
582         complete(&st->done);
583 }
584
585 /* Invoke a single callback on a remote cpu */
586 static int
587 cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup,
588                          struct hlist_node *node)
589 {
590         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
591
592         if (!cpu_online(cpu))
593                 return 0;
594
595         /*
596          * If we are up and running, use the hotplug thread. For early calls
597          * we invoke the thread function directly.
598          */
599         if (!st->thread)
600                 return cpuhp_invoke_callback(cpu, state, bringup, node);
601
602         st->cb_state = state;
603         st->single = true;
604         st->bringup = bringup;
605         st->node = node;
606
607         /*
608          * Make sure the above stores are visible before should_run becomes
609          * true. Paired with the mb() above in cpuhp_thread_fun()
610          */
611         smp_mb();
612         st->should_run = true;
613         wake_up_process(st->thread);
614         wait_for_completion(&st->done);
615         return st->result;
616 }
617
618 /* Regular hotplug invocation of the AP hotplug thread */
619 static void __cpuhp_kick_ap_work(struct cpuhp_cpu_state *st)
620 {
621         st->result = 0;
622         st->single = false;
623         /*
624          * Make sure the above stores are visible before should_run becomes
625          * true. Paired with the mb() above in cpuhp_thread_fun()
626          */
627         smp_mb();
628         st->should_run = true;
629         wake_up_process(st->thread);
630 }
631
632 static int cpuhp_kick_ap_work(unsigned int cpu)
633 {
634         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
635         enum cpuhp_state state = st->state;
636
637         trace_cpuhp_enter(cpu, st->target, state, cpuhp_kick_ap_work);
638         __cpuhp_kick_ap_work(st);
639         wait_for_completion(&st->done);
640         trace_cpuhp_exit(cpu, st->state, state, st->result);
641         return st->result;
642 }
643
644 static struct smp_hotplug_thread cpuhp_threads = {
645         .store                  = &cpuhp_state.thread,
646         .create                 = &cpuhp_create,
647         .thread_should_run      = cpuhp_should_run,
648         .thread_fn              = cpuhp_thread_fun,
649         .thread_comm            = "cpuhp/%u",
650         .selfparking            = true,
651 };
652
653 void __init cpuhp_threads_init(void)
654 {
655         BUG_ON(smpboot_register_percpu_thread(&cpuhp_threads));
656         kthread_unpark(this_cpu_read(cpuhp_state.thread));
657 }
658
659 #ifdef CONFIG_HOTPLUG_CPU
660 EXPORT_SYMBOL(register_cpu_notifier);
661 EXPORT_SYMBOL(__register_cpu_notifier);
662 void unregister_cpu_notifier(struct notifier_block *nb)
663 {
664         cpu_maps_update_begin();
665         raw_notifier_chain_unregister(&cpu_chain, nb);
666         cpu_maps_update_done();
667 }
668 EXPORT_SYMBOL(unregister_cpu_notifier);
669
670 void __unregister_cpu_notifier(struct notifier_block *nb)
671 {
672         raw_notifier_chain_unregister(&cpu_chain, nb);
673 }
674 EXPORT_SYMBOL(__unregister_cpu_notifier);
675
676 /**
677  * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU
678  * @cpu: a CPU id
679  *
680  * This function walks all processes, finds a valid mm struct for each one and
681  * then clears a corresponding bit in mm's cpumask.  While this all sounds
682  * trivial, there are various non-obvious corner cases, which this function
683  * tries to solve in a safe manner.
684  *
685  * Also note that the function uses a somewhat relaxed locking scheme, so it may
686  * be called only for an already offlined CPU.
687  */
688 void clear_tasks_mm_cpumask(int cpu)
689 {
690         struct task_struct *p;
691
692         /*
693          * This function is called after the cpu is taken down and marked
694          * offline, so its not like new tasks will ever get this cpu set in
695          * their mm mask. -- Peter Zijlstra
696          * Thus, we may use rcu_read_lock() here, instead of grabbing
697          * full-fledged tasklist_lock.
698          */
699         WARN_ON(cpu_online(cpu));
700         rcu_read_lock();
701         for_each_process(p) {
702                 struct task_struct *t;
703
704                 /*
705                  * Main thread might exit, but other threads may still have
706                  * a valid mm. Find one.
707                  */
708                 t = find_lock_task_mm(p);
709                 if (!t)
710                         continue;
711                 cpumask_clear_cpu(cpu, mm_cpumask(t->mm));
712                 task_unlock(t);
713         }
714         rcu_read_unlock();
715 }
716
717 static inline void check_for_tasks(int dead_cpu)
718 {
719         struct task_struct *g, *p;
720
721         read_lock(&tasklist_lock);
722         for_each_process_thread(g, p) {
723                 if (!p->on_rq)
724                         continue;
725                 /*
726                  * We do the check with unlocked task_rq(p)->lock.
727                  * Order the reading to do not warn about a task,
728                  * which was running on this cpu in the past, and
729                  * it's just been woken on another cpu.
730                  */
731                 rmb();
732                 if (task_cpu(p) != dead_cpu)
733                         continue;
734
735                 pr_warn("Task %s (pid=%d) is on cpu %d (state=%ld, flags=%x)\n",
736                         p->comm, task_pid_nr(p), dead_cpu, p->state, p->flags);
737         }
738         read_unlock(&tasklist_lock);
739 }
740
741 static int notify_down_prepare(unsigned int cpu)
742 {
743         int err, nr_calls = 0;
744
745         err = __cpu_notify(CPU_DOWN_PREPARE, cpu, -1, &nr_calls);
746         if (err) {
747                 nr_calls--;
748                 __cpu_notify(CPU_DOWN_FAILED, cpu, nr_calls, NULL);
749                 pr_warn("%s: attempt to take down CPU %u failed\n",
750                                 __func__, cpu);
751         }
752         return err;
753 }
754
755 static int notify_dying(unsigned int cpu)
756 {
757         cpu_notify(CPU_DYING, cpu);
758         return 0;
759 }
760
761 /* Take this CPU down. */
762 static int take_cpu_down(void *_param)
763 {
764         struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
765         enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE);
766         int err, cpu = smp_processor_id();
767
768         /* Ensure this CPU doesn't handle any more interrupts. */
769         err = __cpu_disable();
770         if (err < 0)
771                 return err;
772
773         /*
774          * We get here while we are in CPUHP_TEARDOWN_CPU state and we must not
775          * do this step again.
776          */
777         WARN_ON(st->state != CPUHP_TEARDOWN_CPU);
778         st->state--;
779         /* Invoke the former CPU_DYING callbacks */
780         for (; st->state > target; st->state--)
781                 cpuhp_invoke_callback(cpu, st->state, false, NULL);
782
783         /* Give up timekeeping duties */
784         tick_handover_do_timer();
785         /* Park the stopper thread */
786         stop_machine_park(cpu);
787         return 0;
788 }
789
790 static int takedown_cpu(unsigned int cpu)
791 {
792         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
793         int err;
794
795         /* Park the smpboot threads */
796         kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread);
797         smpboot_park_threads(cpu);
798
799         /*
800          * Prevent irq alloc/free while the dying cpu reorganizes the
801          * interrupt affinities.
802          */
803         irq_lock_sparse();
804
805         /*
806          * So now all preempt/rcu users must observe !cpu_active().
807          */
808         err = stop_machine(take_cpu_down, NULL, cpumask_of(cpu));
809         if (err) {
810                 /* CPU refused to die */
811                 irq_unlock_sparse();
812                 /* Unpark the hotplug thread so we can rollback there */
813                 kthread_unpark(per_cpu_ptr(&cpuhp_state, cpu)->thread);
814                 return err;
815         }
816         BUG_ON(cpu_online(cpu));
817
818         /*
819          * The migration_call() CPU_DYING callback will have removed all
820          * runnable tasks from the cpu, there's only the idle task left now
821          * that the migration thread is done doing the stop_machine thing.
822          *
823          * Wait for the stop thread to go away.
824          */
825         wait_for_completion(&st->done);
826         BUG_ON(st->state != CPUHP_AP_IDLE_DEAD);
827
828         /* Interrupts are moved away from the dying cpu, reenable alloc/free */
829         irq_unlock_sparse();
830
831         hotplug_cpu__broadcast_tick_pull(cpu);
832         /* This actually kills the CPU. */
833         __cpu_die(cpu);
834
835         tick_cleanup_dead_cpu(cpu);
836         return 0;
837 }
838
839 static int notify_dead(unsigned int cpu)
840 {
841         cpu_notify_nofail(CPU_DEAD, cpu);
842         check_for_tasks(cpu);
843         return 0;
844 }
845
846 static void cpuhp_complete_idle_dead(void *arg)
847 {
848         struct cpuhp_cpu_state *st = arg;
849
850         complete(&st->done);
851 }
852
853 void cpuhp_report_idle_dead(void)
854 {
855         struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
856
857         BUG_ON(st->state != CPUHP_AP_OFFLINE);
858         rcu_report_dead(smp_processor_id());
859         st->state = CPUHP_AP_IDLE_DEAD;
860         /*
861          * We cannot call complete after rcu_report_dead() so we delegate it
862          * to an online cpu.
863          */
864         smp_call_function_single(cpumask_first(cpu_online_mask),
865                                  cpuhp_complete_idle_dead, st, 0);
866 }
867
868 #else
869 #define notify_down_prepare     NULL
870 #define takedown_cpu            NULL
871 #define notify_dead             NULL
872 #define notify_dying            NULL
873 #endif
874
875 #ifdef CONFIG_HOTPLUG_CPU
876
877 /* Requires cpu_add_remove_lock to be held */
878 static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
879                            enum cpuhp_state target)
880 {
881         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
882         int prev_state, ret = 0;
883         bool hasdied = false;
884
885         if (num_online_cpus() == 1)
886                 return -EBUSY;
887
888         if (!cpu_present(cpu))
889                 return -EINVAL;
890
891         cpu_hotplug_begin();
892
893         cpuhp_tasks_frozen = tasks_frozen;
894
895         prev_state = st->state;
896         st->target = target;
897         /*
898          * If the current CPU state is in the range of the AP hotplug thread,
899          * then we need to kick the thread.
900          */
901         if (st->state > CPUHP_TEARDOWN_CPU) {
902                 ret = cpuhp_kick_ap_work(cpu);
903                 /*
904                  * The AP side has done the error rollback already. Just
905                  * return the error code..
906                  */
907                 if (ret)
908                         goto out;
909
910                 /*
911                  * We might have stopped still in the range of the AP hotplug
912                  * thread. Nothing to do anymore.
913                  */
914                 if (st->state > CPUHP_TEARDOWN_CPU)
915                         goto out;
916         }
917         /*
918          * The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need
919          * to do the further cleanups.
920          */
921         ret = cpuhp_down_callbacks(cpu, st, target);
922         if (ret && st->state > CPUHP_TEARDOWN_CPU && st->state < prev_state) {
923                 st->target = prev_state;
924                 st->rollback = true;
925                 cpuhp_kick_ap_work(cpu);
926         }
927
928         hasdied = prev_state != st->state && st->state == CPUHP_OFFLINE;
929 out:
930         cpu_hotplug_done();
931         /* This post dead nonsense must die */
932         if (!ret && hasdied)
933                 cpu_notify_nofail(CPU_POST_DEAD, cpu);
934         return ret;
935 }
936
937 static int do_cpu_down(unsigned int cpu, enum cpuhp_state target)
938 {
939         int err;
940
941         cpu_maps_update_begin();
942
943         if (cpu_hotplug_disabled) {
944                 err = -EBUSY;
945                 goto out;
946         }
947
948         err = _cpu_down(cpu, 0, target);
949
950 out:
951         cpu_maps_update_done();
952         return err;
953 }
954 int cpu_down(unsigned int cpu)
955 {
956         return do_cpu_down(cpu, CPUHP_OFFLINE);
957 }
958 EXPORT_SYMBOL(cpu_down);
959 #endif /*CONFIG_HOTPLUG_CPU*/
960
961 /**
962  * notify_cpu_starting(cpu) - call the CPU_STARTING notifiers
963  * @cpu: cpu that just started
964  *
965  * This function calls the cpu_chain notifiers with CPU_STARTING.
966  * It must be called by the arch code on the new cpu, before the new cpu
967  * enables interrupts and before the "boot" cpu returns from __cpu_up().
968  */
969 void notify_cpu_starting(unsigned int cpu)
970 {
971         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
972         enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);
973
974         while (st->state < target) {
975                 st->state++;
976                 cpuhp_invoke_callback(cpu, st->state, true, NULL);
977         }
978 }
979
980 /*
981  * Called from the idle task. We need to set active here, so we can kick off
982  * the stopper thread and unpark the smpboot threads. If the target state is
983  * beyond CPUHP_AP_ONLINE_IDLE we kick cpuhp thread and let it bring up the
984  * cpu further.
985  */
986 void cpuhp_online_idle(enum cpuhp_state state)
987 {
988         struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
989         unsigned int cpu = smp_processor_id();
990
991         /* Happens for the boot cpu */
992         if (state != CPUHP_AP_ONLINE_IDLE)
993                 return;
994
995         st->state = CPUHP_AP_ONLINE_IDLE;
996
997         /* Unpark the stopper thread and the hotplug thread of this cpu */
998         stop_machine_unpark(cpu);
999         kthread_unpark(st->thread);
1000
1001         /* Should we go further up ? */
1002         if (st->target > CPUHP_AP_ONLINE_IDLE)
1003                 __cpuhp_kick_ap_work(st);
1004         else
1005                 complete(&st->done);
1006 }
1007
1008 /* Requires cpu_add_remove_lock to be held */
1009 static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
1010 {
1011         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1012         struct task_struct *idle;
1013         int ret = 0;
1014
1015         cpu_hotplug_begin();
1016
1017         if (!cpu_present(cpu)) {
1018                 ret = -EINVAL;
1019                 goto out;
1020         }
1021
1022         /*
1023          * The caller of do_cpu_up might have raced with another
1024          * caller. Ignore it for now.
1025          */
1026         if (st->state >= target)
1027                 goto out;
1028
1029         if (st->state == CPUHP_OFFLINE) {
1030                 /* Let it fail before we try to bring the cpu up */
1031                 idle = idle_thread_get(cpu);
1032                 if (IS_ERR(idle)) {
1033                         ret = PTR_ERR(idle);
1034                         goto out;
1035                 }
1036         }
1037
1038         cpuhp_tasks_frozen = tasks_frozen;
1039
1040         st->target = target;
1041         /*
1042          * If the current CPU state is in the range of the AP hotplug thread,
1043          * then we need to kick the thread once more.
1044          */
1045         if (st->state > CPUHP_BRINGUP_CPU) {
1046                 ret = cpuhp_kick_ap_work(cpu);
1047                 /*
1048                  * The AP side has done the error rollback already. Just
1049                  * return the error code..
1050                  */
1051                 if (ret)
1052                         goto out;
1053         }
1054
1055         /*
1056          * Try to reach the target state. We max out on the BP at
1057          * CPUHP_BRINGUP_CPU. After that the AP hotplug thread is
1058          * responsible for bringing it up to the target state.
1059          */
1060         target = min((int)target, CPUHP_BRINGUP_CPU);
1061         ret = cpuhp_up_callbacks(cpu, st, target);
1062 out:
1063         cpu_hotplug_done();
1064         return ret;
1065 }
1066
1067 static int do_cpu_up(unsigned int cpu, enum cpuhp_state target)
1068 {
1069         int err = 0;
1070
1071         if (!cpu_possible(cpu)) {
1072                 pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n",
1073                        cpu);
1074 #if defined(CONFIG_IA64)
1075                 pr_err("please check additional_cpus= boot parameter\n");
1076 #endif
1077                 return -EINVAL;
1078         }
1079
1080         err = try_online_node(cpu_to_node(cpu));
1081         if (err)
1082                 return err;
1083
1084         cpu_maps_update_begin();
1085
1086         if (cpu_hotplug_disabled) {
1087                 err = -EBUSY;
1088                 goto out;
1089         }
1090
1091         err = _cpu_up(cpu, 0, target);
1092 out:
1093         cpu_maps_update_done();
1094         return err;
1095 }
1096
1097 int cpu_up(unsigned int cpu)
1098 {
1099         return do_cpu_up(cpu, CPUHP_ONLINE);
1100 }
1101 EXPORT_SYMBOL_GPL(cpu_up);
1102
1103 #ifdef CONFIG_PM_SLEEP_SMP
1104 static cpumask_var_t frozen_cpus;
1105
1106 int disable_nonboot_cpus(void)
1107 {
1108         int cpu, first_cpu, error = 0;
1109
1110         cpu_maps_update_begin();
1111         first_cpu = cpumask_first(cpu_online_mask);
1112         /*
1113          * We take down all of the non-boot CPUs in one shot to avoid races
1114          * with the userspace trying to use the CPU hotplug at the same time
1115          */
1116         cpumask_clear(frozen_cpus);
1117
1118         pr_info("Disabling non-boot CPUs ...\n");
1119         for_each_online_cpu(cpu) {
1120                 if (cpu == first_cpu)
1121                         continue;
1122                 trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
1123                 error = _cpu_down(cpu, 1, CPUHP_OFFLINE);
1124                 trace_suspend_resume(TPS("CPU_OFF"), cpu, false);
1125                 if (!error)
1126                         cpumask_set_cpu(cpu, frozen_cpus);
1127                 else {
1128                         pr_err("Error taking CPU%d down: %d\n", cpu, error);
1129                         break;
1130                 }
1131         }
1132
1133         if (!error)
1134                 BUG_ON(num_online_cpus() > 1);
1135         else
1136                 pr_err("Non-boot CPUs are not disabled\n");
1137
1138         /*
1139          * Make sure the CPUs won't be enabled by someone else. We need to do
1140          * this even in case of failure as all disable_nonboot_cpus() users are
1141          * supposed to do enable_nonboot_cpus() on the failure path.
1142          */
1143         cpu_hotplug_disabled++;
1144
1145         cpu_maps_update_done();
1146         return error;
1147 }
1148
1149 void __weak arch_enable_nonboot_cpus_begin(void)
1150 {
1151 }
1152
1153 void __weak arch_enable_nonboot_cpus_end(void)
1154 {
1155 }
1156
1157 void enable_nonboot_cpus(void)
1158 {
1159         int cpu, error;
1160
1161         /* Allow everyone to use the CPU hotplug again */
1162         cpu_maps_update_begin();
1163         WARN_ON(--cpu_hotplug_disabled < 0);
1164         if (cpumask_empty(frozen_cpus))
1165                 goto out;
1166
1167         pr_info("Enabling non-boot CPUs ...\n");
1168
1169         arch_enable_nonboot_cpus_begin();
1170
1171         for_each_cpu(cpu, frozen_cpus) {
1172                 trace_suspend_resume(TPS("CPU_ON"), cpu, true);
1173                 error = _cpu_up(cpu, 1, CPUHP_ONLINE);
1174                 trace_suspend_resume(TPS("CPU_ON"), cpu, false);
1175                 if (!error) {
1176                         pr_info("CPU%d is up\n", cpu);
1177                         continue;
1178                 }
1179                 pr_warn("Error taking CPU%d up: %d\n", cpu, error);
1180         }
1181
1182         arch_enable_nonboot_cpus_end();
1183
1184         cpumask_clear(frozen_cpus);
1185 out:
1186         cpu_maps_update_done();
1187 }
1188
1189 static int __init alloc_frozen_cpus(void)
1190 {
1191         if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL|__GFP_ZERO))
1192                 return -ENOMEM;
1193         return 0;
1194 }
1195 core_initcall(alloc_frozen_cpus);
1196
1197 /*
1198  * When callbacks for CPU hotplug notifications are being executed, we must
1199  * ensure that the state of the system with respect to the tasks being frozen
1200  * or not, as reported by the notification, remains unchanged *throughout the
1201  * duration* of the execution of the callbacks.
1202  * Hence we need to prevent the freezer from racing with regular CPU hotplug.
1203  *
1204  * This synchronization is implemented by mutually excluding regular CPU
1205  * hotplug and Suspend/Hibernate call paths by hooking onto the Suspend/
1206  * Hibernate notifications.
1207  */
1208 static int
1209 cpu_hotplug_pm_callback(struct notifier_block *nb,
1210                         unsigned long action, void *ptr)
1211 {
1212         switch (action) {
1213
1214         case PM_SUSPEND_PREPARE:
1215         case PM_HIBERNATION_PREPARE:
1216                 cpu_hotplug_disable();
1217                 break;
1218
1219         case PM_POST_SUSPEND:
1220         case PM_POST_HIBERNATION:
1221                 cpu_hotplug_enable();
1222                 break;
1223
1224         default:
1225                 return NOTIFY_DONE;
1226         }
1227
1228         return NOTIFY_OK;
1229 }
1230
1231
1232 static int __init cpu_hotplug_pm_sync_init(void)
1233 {
1234         /*
1235          * cpu_hotplug_pm_callback has higher priority than x86
1236          * bsp_pm_callback which depends on cpu_hotplug_pm_callback
1237          * to disable cpu hotplug to avoid cpu hotplug race.
1238          */
1239         pm_notifier(cpu_hotplug_pm_callback, 0);
1240         return 0;
1241 }
1242 core_initcall(cpu_hotplug_pm_sync_init);
1243
1244 #endif /* CONFIG_PM_SLEEP_SMP */
1245
1246 #endif /* CONFIG_SMP */
1247
1248 /* Boot processor state steps */
1249 static struct cpuhp_step cpuhp_bp_states[] = {
1250         [CPUHP_OFFLINE] = {
1251                 .name                   = "offline",
1252                 .startup                = NULL,
1253                 .teardown               = NULL,
1254         },
1255 #ifdef CONFIG_SMP
1256         [CPUHP_CREATE_THREADS]= {
1257                 .name                   = "threads:create",
1258                 .startup                = smpboot_create_threads,
1259                 .teardown               = NULL,
1260                 .cant_stop              = true,
1261         },
1262         [CPUHP_PERF_PREPARE] = {
1263                 .name = "perf prepare",
1264                 .startup = perf_event_init_cpu,
1265                 .teardown = perf_event_exit_cpu,
1266         },
1267         [CPUHP_WORKQUEUE_PREP] = {
1268                 .name = "workqueue prepare",
1269                 .startup = workqueue_prepare_cpu,
1270                 .teardown = NULL,
1271         },
1272         [CPUHP_HRTIMERS_PREPARE] = {
1273                 .name = "hrtimers prepare",
1274                 .startup = hrtimers_prepare_cpu,
1275                 .teardown = hrtimers_dead_cpu,
1276         },
1277         [CPUHP_SMPCFD_PREPARE] = {
1278                 .name = "SMPCFD prepare",
1279                 .startup = smpcfd_prepare_cpu,
1280                 .teardown = smpcfd_dead_cpu,
1281         },
1282         [CPUHP_RCUTREE_PREP] = {
1283                 .name = "RCU-tree prepare",
1284                 .startup = rcutree_prepare_cpu,
1285                 .teardown = rcutree_dead_cpu,
1286         },
1287         /*
1288          * Preparatory and dead notifiers. Will be replaced once the notifiers
1289          * are converted to states.
1290          */
1291         [CPUHP_NOTIFY_PREPARE] = {
1292                 .name                   = "notify:prepare",
1293                 .startup                = notify_prepare,
1294                 .teardown               = notify_dead,
1295                 .skip_onerr             = true,
1296                 .cant_stop              = true,
1297         },
1298         /*
1299          * On the tear-down path, timers_dead_cpu() must be invoked
1300          * before blk_mq_queue_reinit_notify() from notify_dead(),
1301          * otherwise a RCU stall occurs.
1302          */
1303         [CPUHP_TIMERS_DEAD] = {
1304                 .name = "timers dead",
1305                 .startup = NULL,
1306                 .teardown = timers_dead_cpu,
1307         },
1308         /* Kicks the plugged cpu into life */
1309         [CPUHP_BRINGUP_CPU] = {
1310                 .name                   = "cpu:bringup",
1311                 .startup                = bringup_cpu,
1312                 .teardown               = NULL,
1313                 .cant_stop              = true,
1314         },
1315         [CPUHP_AP_SMPCFD_DYING] = {
1316                 .startup = NULL,
1317                 .teardown = smpcfd_dying_cpu,
1318         },
1319         /*
1320          * Handled on controll processor until the plugged processor manages
1321          * this itself.
1322          */
1323         [CPUHP_TEARDOWN_CPU] = {
1324                 .name                   = "cpu:teardown",
1325                 .startup                = NULL,
1326                 .teardown               = takedown_cpu,
1327                 .cant_stop              = true,
1328         },
1329 #else
1330         [CPUHP_BRINGUP_CPU] = { },
1331 #endif
1332 };
1333
1334 /* Application processor state steps */
1335 static struct cpuhp_step cpuhp_ap_states[] = {
1336 #ifdef CONFIG_SMP
1337         /* Final state before CPU kills itself */
1338         [CPUHP_AP_IDLE_DEAD] = {
1339                 .name                   = "idle:dead",
1340         },
1341         /*
1342          * Last state before CPU enters the idle loop to die. Transient state
1343          * for synchronization.
1344          */
1345         [CPUHP_AP_OFFLINE] = {
1346                 .name                   = "ap:offline",
1347                 .cant_stop              = true,
1348         },
1349         /* First state is scheduler control. Interrupts are disabled */
1350         [CPUHP_AP_SCHED_STARTING] = {
1351                 .name                   = "sched:starting",
1352                 .startup                = sched_cpu_starting,
1353                 .teardown               = sched_cpu_dying,
1354         },
1355         [CPUHP_AP_RCUTREE_DYING] = {
1356                 .startup = NULL,
1357                 .teardown = rcutree_dying_cpu,
1358         },
1359         /*
1360          * Low level startup/teardown notifiers. Run with interrupts
1361          * disabled. Will be removed once the notifiers are converted to
1362          * states.
1363          */
1364         [CPUHP_AP_NOTIFY_STARTING] = {
1365                 .name                   = "notify:starting",
1366                 .startup                = notify_starting,
1367                 .teardown               = notify_dying,
1368                 .skip_onerr             = true,
1369                 .cant_stop              = true,
1370         },
1371         /* Entry state on starting. Interrupts enabled from here on. Transient
1372          * state for synchronsization */
1373         [CPUHP_AP_ONLINE] = {
1374                 .name                   = "ap:online",
1375         },
1376         /* Handle smpboot threads park/unpark */
1377         [CPUHP_AP_SMPBOOT_THREADS] = {
1378                 .name                   = "smpboot:threads",
1379                 .startup                = smpboot_unpark_threads,
1380                 .teardown               = NULL,
1381         },
1382         [CPUHP_AP_PERF_ONLINE] = {
1383                 .name = "perf online",
1384                 .startup = perf_event_init_cpu,
1385                 .teardown = perf_event_exit_cpu,
1386         },
1387         [CPUHP_AP_WORKQUEUE_ONLINE] = {
1388                 .name = "workqueue online",
1389                 .startup = workqueue_online_cpu,
1390                 .teardown = workqueue_offline_cpu,
1391         },
1392         [CPUHP_AP_RCUTREE_ONLINE] = {
1393                 .name = "RCU-tree online",
1394                 .startup = rcutree_online_cpu,
1395                 .teardown = rcutree_offline_cpu,
1396         },
1397
1398         /*
1399          * Online/down_prepare notifiers. Will be removed once the notifiers
1400          * are converted to states.
1401          */
1402         [CPUHP_AP_NOTIFY_ONLINE] = {
1403                 .name                   = "notify:online",
1404                 .startup                = notify_online,
1405                 .teardown               = notify_down_prepare,
1406                 .skip_onerr             = true,
1407         },
1408 #endif
1409         /*
1410          * The dynamically registered state space is here
1411          */
1412
1413 #ifdef CONFIG_SMP
1414         /* Last state is scheduler control setting the cpu active */
1415         [CPUHP_AP_ACTIVE] = {
1416                 .name                   = "sched:active",
1417                 .startup                = sched_cpu_activate,
1418                 .teardown               = sched_cpu_deactivate,
1419         },
1420 #endif
1421
1422         /* CPU is fully up and running. */
1423         [CPUHP_ONLINE] = {
1424                 .name                   = "online",
1425                 .startup                = NULL,
1426                 .teardown               = NULL,
1427         },
1428 };
1429
1430 /* Sanity check for callbacks */
1431 static int cpuhp_cb_check(enum cpuhp_state state)
1432 {
1433         if (state <= CPUHP_OFFLINE || state >= CPUHP_ONLINE)
1434                 return -EINVAL;
1435         return 0;
1436 }
1437
1438 static void cpuhp_store_callbacks(enum cpuhp_state state,
1439                                   const char *name,
1440                                   int (*startup)(unsigned int cpu),
1441                                   int (*teardown)(unsigned int cpu),
1442                                   bool multi_instance)
1443 {
1444         /* (Un)Install the callbacks for further cpu hotplug operations */
1445         struct cpuhp_step *sp;
1446
1447         mutex_lock(&cpuhp_state_mutex);
1448         sp = cpuhp_get_step(state);
1449         sp->startup = startup;
1450         sp->teardown = teardown;
1451         sp->name = name;
1452         sp->multi_instance = multi_instance;
1453         INIT_HLIST_HEAD(&sp->list);
1454         mutex_unlock(&cpuhp_state_mutex);
1455 }
1456
1457 static void *cpuhp_get_teardown_cb(enum cpuhp_state state)
1458 {
1459         return cpuhp_get_step(state)->teardown;
1460 }
1461
1462 /*
1463  * Call the startup/teardown function for a step either on the AP or
1464  * on the current CPU.
1465  */
1466 static int cpuhp_issue_call(int cpu, enum cpuhp_state state, bool bringup,
1467                             struct hlist_node *node)
1468 {
1469         struct cpuhp_step *sp = cpuhp_get_step(state);
1470         int ret;
1471
1472         if ((bringup && !sp->startup) || (!bringup && !sp->teardown))
1473                 return 0;
1474         /*
1475          * The non AP bound callbacks can fail on bringup. On teardown
1476          * e.g. module removal we crash for now.
1477          */
1478 #ifdef CONFIG_SMP
1479         if (cpuhp_is_ap_state(state))
1480                 ret = cpuhp_invoke_ap_callback(cpu, state, bringup, node);
1481         else
1482                 ret = cpuhp_invoke_callback(cpu, state, bringup, node);
1483 #else
1484         ret = cpuhp_invoke_callback(cpu, state, bringup, node);
1485 #endif
1486         BUG_ON(ret && !bringup);
1487         return ret;
1488 }
1489
1490 /*
1491  * Called from __cpuhp_setup_state on a recoverable failure.
1492  *
1493  * Note: The teardown callbacks for rollback are not allowed to fail!
1494  */
1495 static void cpuhp_rollback_install(int failedcpu, enum cpuhp_state state,
1496                                    struct hlist_node *node)
1497 {
1498         int cpu;
1499
1500         /* Roll back the already executed steps on the other cpus */
1501         for_each_present_cpu(cpu) {
1502                 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1503                 int cpustate = st->state;
1504
1505                 if (cpu >= failedcpu)
1506                         break;
1507
1508                 /* Did we invoke the startup call on that cpu ? */
1509                 if (cpustate >= state)
1510                         cpuhp_issue_call(cpu, state, false, node);
1511         }
1512 }
1513
1514 /*
1515  * Returns a free for dynamic slot assignment of the Online state. The states
1516  * are protected by the cpuhp_slot_states mutex and an empty slot is identified
1517  * by having no name assigned.
1518  */
1519 static int cpuhp_reserve_state(enum cpuhp_state state)
1520 {
1521         enum cpuhp_state i;
1522
1523         mutex_lock(&cpuhp_state_mutex);
1524         for (i = CPUHP_AP_ONLINE_DYN; i <= CPUHP_AP_ONLINE_DYN_END; i++) {
1525                 if (cpuhp_ap_states[i].name)
1526                         continue;
1527
1528                 cpuhp_ap_states[i].name = "Reserved";
1529                 mutex_unlock(&cpuhp_state_mutex);
1530                 return i;
1531         }
1532         mutex_unlock(&cpuhp_state_mutex);
1533         WARN(1, "No more dynamic states available for CPU hotplug\n");
1534         return -ENOSPC;
1535 }
1536
1537 int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
1538                                bool invoke)
1539 {
1540         struct cpuhp_step *sp;
1541         int cpu;
1542         int ret;
1543
1544         sp = cpuhp_get_step(state);
1545         if (sp->multi_instance == false)
1546                 return -EINVAL;
1547
1548         get_online_cpus();
1549
1550         if (!invoke || !sp->startup_multi)
1551                 goto add_node;
1552
1553         /*
1554          * Try to call the startup callback for each present cpu
1555          * depending on the hotplug state of the cpu.
1556          */
1557         for_each_present_cpu(cpu) {
1558                 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1559                 int cpustate = st->state;
1560
1561                 if (cpustate < state)
1562                         continue;
1563
1564                 ret = cpuhp_issue_call(cpu, state, true, node);
1565                 if (ret) {
1566                         if (sp->teardown_multi)
1567                                 cpuhp_rollback_install(cpu, state, node);
1568                         goto err;
1569                 }
1570         }
1571 add_node:
1572         ret = 0;
1573         mutex_lock(&cpuhp_state_mutex);
1574         hlist_add_head(node, &sp->list);
1575         mutex_unlock(&cpuhp_state_mutex);
1576
1577 err:
1578         put_online_cpus();
1579         return ret;
1580 }
1581 EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);
1582
1583 /**
1584  * __cpuhp_setup_state - Setup the callbacks for an hotplug machine state
1585  * @state:      The state to setup
1586  * @invoke:     If true, the startup function is invoked for cpus where
1587  *              cpu state >= @state
1588  * @startup:    startup callback function
1589  * @teardown:   teardown callback function
1590  *
1591  * Returns 0 if successful, otherwise a proper error code
1592  */
1593 int __cpuhp_setup_state(enum cpuhp_state state,
1594                         const char *name, bool invoke,
1595                         int (*startup)(unsigned int cpu),
1596                         int (*teardown)(unsigned int cpu),
1597                         bool multi_instance)
1598 {
1599         int cpu, ret = 0;
1600         int dyn_state = 0;
1601
1602         if (cpuhp_cb_check(state) || !name)
1603                 return -EINVAL;
1604
1605         get_online_cpus();
1606
1607         /* currently assignments for the ONLINE state are possible */
1608         if (state == CPUHP_AP_ONLINE_DYN) {
1609                 dyn_state = 1;
1610                 ret = cpuhp_reserve_state(state);
1611                 if (ret < 0)
1612                         goto out;
1613                 state = ret;
1614         }
1615
1616         cpuhp_store_callbacks(state, name, startup, teardown, multi_instance);
1617
1618         if (!invoke || !startup)
1619                 goto out;
1620
1621         /*
1622          * Try to call the startup callback for each present cpu
1623          * depending on the hotplug state of the cpu.
1624          */
1625         for_each_present_cpu(cpu) {
1626                 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1627                 int cpustate = st->state;
1628
1629                 if (cpustate < state)
1630                         continue;
1631
1632                 ret = cpuhp_issue_call(cpu, state, true, NULL);
1633                 if (ret) {
1634                         if (teardown)
1635                                 cpuhp_rollback_install(cpu, state, NULL);
1636                         cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
1637                         goto out;
1638                 }
1639         }
1640 out:
1641         put_online_cpus();
1642         if (!ret && dyn_state)
1643                 return state;
1644         return ret;
1645 }
1646 EXPORT_SYMBOL(__cpuhp_setup_state);
1647
1648 int __cpuhp_state_remove_instance(enum cpuhp_state state,
1649                                   struct hlist_node *node, bool invoke)
1650 {
1651         struct cpuhp_step *sp = cpuhp_get_step(state);
1652         int cpu;
1653
1654         BUG_ON(cpuhp_cb_check(state));
1655
1656         if (!sp->multi_instance)
1657                 return -EINVAL;
1658
1659         get_online_cpus();
1660         if (!invoke || !cpuhp_get_teardown_cb(state))
1661                 goto remove;
1662         /*
1663          * Call the teardown callback for each present cpu depending
1664          * on the hotplug state of the cpu. This function is not
1665          * allowed to fail currently!
1666          */
1667         for_each_present_cpu(cpu) {
1668                 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1669                 int cpustate = st->state;
1670
1671                 if (cpustate >= state)
1672                         cpuhp_issue_call(cpu, state, false, node);
1673         }
1674
1675 remove:
1676         mutex_lock(&cpuhp_state_mutex);
1677         hlist_del(node);
1678         mutex_unlock(&cpuhp_state_mutex);
1679         put_online_cpus();
1680
1681         return 0;
1682 }
1683 EXPORT_SYMBOL_GPL(__cpuhp_state_remove_instance);
1684 /**
1685  * __cpuhp_remove_state - Remove the callbacks for an hotplug machine state
1686  * @state:      The state to remove
1687  * @invoke:     If true, the teardown function is invoked for cpus where
1688  *              cpu state >= @state
1689  *
1690  * The teardown callback is currently not allowed to fail. Think
1691  * about module removal!
1692  */
1693 void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
1694 {
1695         struct cpuhp_step *sp = cpuhp_get_step(state);
1696         int cpu;
1697
1698         BUG_ON(cpuhp_cb_check(state));
1699
1700         get_online_cpus();
1701
1702         if (sp->multi_instance) {
1703                 WARN(!hlist_empty(&sp->list),
1704                      "Error: Removing state %d which has instances left.\n",
1705                      state);
1706                 goto remove;
1707         }
1708
1709         if (!invoke || !cpuhp_get_teardown_cb(state))
1710                 goto remove;
1711
1712         /*
1713          * Call the teardown callback for each present cpu depending
1714          * on the hotplug state of the cpu. This function is not
1715          * allowed to fail currently!
1716          */
1717         for_each_present_cpu(cpu) {
1718                 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1719                 int cpustate = st->state;
1720
1721                 if (cpustate >= state)
1722                         cpuhp_issue_call(cpu, state, false, NULL);
1723         }
1724 remove:
1725         cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
1726         put_online_cpus();
1727 }
1728 EXPORT_SYMBOL(__cpuhp_remove_state);
1729
1730 #if defined(CONFIG_SYSFS) && defined(CONFIG_HOTPLUG_CPU)
1731 static ssize_t show_cpuhp_state(struct device *dev,
1732                                 struct device_attribute *attr, char *buf)
1733 {
1734         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
1735
1736         return sprintf(buf, "%d\n", st->state);
1737 }
1738 static DEVICE_ATTR(state, 0444, show_cpuhp_state, NULL);
1739
1740 static ssize_t write_cpuhp_target(struct device *dev,
1741                                   struct device_attribute *attr,
1742                                   const char *buf, size_t count)
1743 {
1744         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
1745         struct cpuhp_step *sp;
1746         int target, ret;
1747
1748         ret = kstrtoint(buf, 10, &target);
1749         if (ret)
1750                 return ret;
1751
1752 #ifdef CONFIG_CPU_HOTPLUG_STATE_CONTROL
1753         if (target < CPUHP_OFFLINE || target > CPUHP_ONLINE)
1754                 return -EINVAL;
1755 #else
1756         if (target != CPUHP_OFFLINE && target != CPUHP_ONLINE)
1757                 return -EINVAL;
1758 #endif
1759
1760         ret = lock_device_hotplug_sysfs();
1761         if (ret)
1762                 return ret;
1763
1764         mutex_lock(&cpuhp_state_mutex);
1765         sp = cpuhp_get_step(target);
1766         ret = !sp->name || sp->cant_stop ? -EINVAL : 0;
1767         mutex_unlock(&cpuhp_state_mutex);
1768         if (ret)
1769                 return ret;
1770
1771         if (st->state < target)
1772                 ret = do_cpu_up(dev->id, target);
1773         else
1774                 ret = do_cpu_down(dev->id, target);
1775
1776         unlock_device_hotplug();
1777         return ret ? ret : count;
1778 }
1779
1780 static ssize_t show_cpuhp_target(struct device *dev,
1781                                  struct device_attribute *attr, char *buf)
1782 {
1783         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
1784
1785         return sprintf(buf, "%d\n", st->target);
1786 }
1787 static DEVICE_ATTR(target, 0644, show_cpuhp_target, write_cpuhp_target);
1788
1789 static struct attribute *cpuhp_cpu_attrs[] = {
1790         &dev_attr_state.attr,
1791         &dev_attr_target.attr,
1792         NULL
1793 };
1794
1795 static struct attribute_group cpuhp_cpu_attr_group = {
1796         .attrs = cpuhp_cpu_attrs,
1797         .name = "hotplug",
1798         NULL
1799 };
1800
1801 static ssize_t show_cpuhp_states(struct device *dev,
1802                                  struct device_attribute *attr, char *buf)
1803 {
1804         ssize_t cur, res = 0;
1805         int i;
1806
1807         mutex_lock(&cpuhp_state_mutex);
1808         for (i = CPUHP_OFFLINE; i <= CPUHP_ONLINE; i++) {
1809                 struct cpuhp_step *sp = cpuhp_get_step(i);
1810
1811                 if (sp->name) {
1812                         cur = sprintf(buf, "%3d: %s\n", i, sp->name);
1813                         buf += cur;
1814                         res += cur;
1815                 }
1816         }
1817         mutex_unlock(&cpuhp_state_mutex);
1818         return res;
1819 }
1820 static DEVICE_ATTR(states, 0444, show_cpuhp_states, NULL);
1821
1822 static struct attribute *cpuhp_cpu_root_attrs[] = {
1823         &dev_attr_states.attr,
1824         NULL
1825 };
1826
1827 static struct attribute_group cpuhp_cpu_root_attr_group = {
1828         .attrs = cpuhp_cpu_root_attrs,
1829         .name = "hotplug",
1830         NULL
1831 };
1832
1833 static int __init cpuhp_sysfs_init(void)
1834 {
1835         int cpu, ret;
1836
1837         ret = sysfs_create_group(&cpu_subsys.dev_root->kobj,
1838                                  &cpuhp_cpu_root_attr_group);
1839         if (ret)
1840                 return ret;
1841
1842         for_each_possible_cpu(cpu) {
1843                 struct device *dev = get_cpu_device(cpu);
1844
1845                 if (!dev)
1846                         continue;
1847                 ret = sysfs_create_group(&dev->kobj, &cpuhp_cpu_attr_group);
1848                 if (ret)
1849                         return ret;
1850         }
1851         return 0;
1852 }
1853 device_initcall(cpuhp_sysfs_init);
1854 #endif
1855
1856 /*
1857  * cpu_bit_bitmap[] is a special, "compressed" data structure that
1858  * represents all NR_CPUS bits binary values of 1<<nr.
1859  *
1860  * It is used by cpumask_of() to get a constant address to a CPU
1861  * mask value that has a single bit set only.
1862  */
1863
1864 /* cpu_bit_bitmap[0] is empty - so we can back into it */
1865 #define MASK_DECLARE_1(x)       [x+1][0] = (1UL << (x))
1866 #define MASK_DECLARE_2(x)       MASK_DECLARE_1(x), MASK_DECLARE_1(x+1)
1867 #define MASK_DECLARE_4(x)       MASK_DECLARE_2(x), MASK_DECLARE_2(x+2)
1868 #define MASK_DECLARE_8(x)       MASK_DECLARE_4(x), MASK_DECLARE_4(x+4)
1869
1870 const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {
1871
1872         MASK_DECLARE_8(0),      MASK_DECLARE_8(8),
1873         MASK_DECLARE_8(16),     MASK_DECLARE_8(24),
1874 #if BITS_PER_LONG > 32
1875         MASK_DECLARE_8(32),     MASK_DECLARE_8(40),
1876         MASK_DECLARE_8(48),     MASK_DECLARE_8(56),
1877 #endif
1878 };
1879 EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
1880
1881 const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
1882 EXPORT_SYMBOL(cpu_all_bits);
1883
1884 #ifdef CONFIG_INIT_ALL_POSSIBLE
1885 struct cpumask __cpu_possible_mask __read_mostly
1886         = {CPU_BITS_ALL};
1887 #else
1888 struct cpumask __cpu_possible_mask __read_mostly;
1889 #endif
1890 EXPORT_SYMBOL(__cpu_possible_mask);
1891
1892 struct cpumask __cpu_online_mask __read_mostly;
1893 EXPORT_SYMBOL(__cpu_online_mask);
1894
1895 struct cpumask __cpu_present_mask __read_mostly;
1896 EXPORT_SYMBOL(__cpu_present_mask);
1897
1898 struct cpumask __cpu_active_mask __read_mostly;
1899 EXPORT_SYMBOL(__cpu_active_mask);
1900
1901 void init_cpu_present(const struct cpumask *src)
1902 {
1903         cpumask_copy(&__cpu_present_mask, src);
1904 }
1905
1906 void init_cpu_possible(const struct cpumask *src)
1907 {
1908         cpumask_copy(&__cpu_possible_mask, src);
1909 }
1910
1911 void init_cpu_online(const struct cpumask *src)
1912 {
1913         cpumask_copy(&__cpu_online_mask, src);
1914 }
1915
1916 /*
1917  * Activate the first processor.
1918  */
1919 void __init boot_cpu_init(void)
1920 {
1921         int cpu = smp_processor_id();
1922
1923         /* Mark the boot cpu "present", "online" etc for SMP and UP case */
1924         set_cpu_online(cpu, true);
1925         set_cpu_active(cpu, true);
1926         set_cpu_present(cpu, true);
1927         set_cpu_possible(cpu, true);
1928 }
1929
1930 /*
1931  * Must be called _AFTER_ setting up the per_cpu areas
1932  */
1933 void __init boot_cpu_state_init(void)
1934 {
1935         per_cpu_ptr(&cpuhp_state, smp_processor_id())->state = CPUHP_ONLINE;
1936 }