include/linux/sched.h

   1 #ifndef _LINUX_SCHED_H
   2 #define _LINUX_SCHED_H
   3
   4 #include <uapi/linux/sched.h>
   5
   6 #include <linux/sched/prio.h>
   7
   8
   9 struct sched_param {
  10         int sched_priority;
  11 };
  12
  13 #include <asm/param.h>  /* for HZ */
  14
  15 #include <linux/capability.h>
  16 #include <linux/threads.h>
  17 #include <linux/kernel.h>
  18 #include <linux/types.h>
  19 #include <linux/timex.h>
  20 #include <linux/jiffies.h>
  21 #include <linux/plist.h>
  22 #include <linux/rbtree.h>
  23 #include <linux/thread_info.h>
  24 #include <linux/cpumask.h>
  25 #include <linux/errno.h>
  26 #include <linux/nodemask.h>
  27 #include <linux/mm_types.h>
  28 #include <linux/preempt.h>
  29
  30 #include <asm/page.h>
  31 #include <asm/ptrace.h>
  32 #include <linux/cputime.h>
  33
  34 #include <linux/smp.h>
  35 #include <linux/sem.h>
  36 #include <linux/shm.h>
  37 #include <linux/signal.h>
  38 #include <linux/compiler.h>
  39 #include <linux/completion.h>
  40 #include <linux/pid.h>
  41 #include <linux/percpu.h>
  42 #include <linux/topology.h>
  43 #include <linux/proportions.h>
  44 #include <linux/seccomp.h>
  45 #include <linux/rcupdate.h>
  46 #include <linux/rculist.h>
  47 #include <linux/rtmutex.h>
  48
  49 #include <linux/time.h>
  50 #include <linux/param.h>
  51 #include <linux/resource.h>
  52 #include <linux/timer.h>
  53 #include <linux/hrtimer.h>
  54 #include <linux/kcov.h>
  55 #include <linux/task_io_accounting.h>
  56 #include <linux/latencytop.h>
  57 #include <linux/cred.h>
  58 #include <linux/llist.h>
  59 #include <linux/uidgid.h>
  60 #include <linux/gfp.h>
  61 #include <linux/magic.h>
  62 #include <linux/cgroup-defs.h>
  63
  64 #include <asm/processor.h>
  65
  66 #define SCHED_ATTR_SIZE_VER0    48      /* sizeof first published struct */
  67
  68 /*
  69  * Extended scheduling parameters data structure.
  70  *
  71  * This is needed because the original struct sched_param can not be
  72  * altered without introducing ABI issues with legacy applications
  73  * (e.g., in sched_getparam()).
  74  *
  75  * However, the possibility of specifying more than just a priority for
  76  * the tasks may be useful for a wide variety of application fields, e.g.,
  77  * multimedia, streaming, automation and control, and many others.
  78  *
  79  * This variant (sched_attr) is meant at describing a so-called
  80  * sporadic time-constrained task. In such model a task is specified by:
  81  *  - the activation period or minimum instance inter-arrival time;
  82  *  - the maximum (or average, depending on the actual scheduling
  83  *    discipline) computation time of all instances, a.k.a. runtime;
  84  *  - the deadline (relative to the actual activation time) of each
  85  *    instance.
  86  * Very briefly, a periodic (sporadic) task asks for the execution of
  87  * some specific computation --which is typically called an instance--
  88  * (at most) every period. Moreover, each instance typically lasts no more
  89  * than the runtime and must be completed by time instant t equal to
  90  * the instance activation time + the deadline.
  91  *
  92  * This is reflected by the actual fields of the sched_attr structure:
  93  *
  94  *  @size               size of the structure, for fwd/bwd compat.
  95  *
  96  *  @sched_policy       task's scheduling policy
  97  *  @sched_flags        for customizing the scheduler behaviour
  98  *  @sched_nice         task's nice value      (SCHED_NORMAL/BATCH)
  99  *  @sched_priority     task's static priority (SCHED_FIFO/RR)
 100  *  @sched_deadline     representative of the task's deadline
 101  *  @sched_runtime      representative of the task's runtime
 102  *  @sched_period       representative of the task's period
 103  *
 104  * Given this task model, there are a multiplicity of scheduling algorithms
 105  * and policies, that can be used to ensure all the tasks will make their
 106  * timing constraints.
 107  *
 108  * As of now, the SCHED_DEADLINE policy (sched_dl scheduling class) is the
 109  * only user of this new interface. More information about the algorithm
 110  * available in the scheduling class file or in Documentation/.
 111  */
 112 struct sched_attr {
 113         u32 size;
 114
 115         u32 sched_policy;
 116         u64 sched_flags;
 117
 118         /* SCHED_NORMAL, SCHED_BATCH */
 119         s32 sched_nice;
 120
 121         /* SCHED_FIFO, SCHED_RR */
 122         u32 sched_priority;
 123
 124         /* SCHED_DEADLINE */
 125         u64 sched_runtime;
 126         u64 sched_deadline;
 127         u64 sched_period;
 128 };
 129
 130 struct futex_pi_state;
 131 struct robust_list_head;
 132 struct bio_list;
 133 struct fs_struct;
 134 struct perf_event_context;
 135 struct blk_plug;
 136 struct filename;
 137 struct nameidata;
 138
 139 #define VMACACHE_BITS 2
 140 #define VMACACHE_SIZE (1U << VMACACHE_BITS)
 141 #define VMACACHE_MASK (VMACACHE_SIZE - 1)
 142
 143 /*
 144  * These are the constant used to fake the fixed-point load-average
 145  * counting. Some notes:
 146  *  - 11 bit fractions expand to 22 bits by the multiplies: this gives
 147  *    a load-average precision of 10 bits integer + 11 bits fractional
 148  *  - if you want to count load-averages more often, you need more
 149  *    precision, or rounding will get you. With 2-second counting freq,
 150  *    the EXP_n values would be 1981, 2034 and 2043 if still using only
 151  *    11 bit fractions.
 152  */
 153 extern unsigned long avenrun[];         /* Load averages */
 154 extern void get_avenrun(unsigned long *loads, unsigned long offset, int shift);
 155
 156 #define FSHIFT          11              /* nr of bits of precision */
 157 #define FIXED_1         (1<<FSHIFT)     /* 1.0 as fixed-point */
 158 #define LOAD_FREQ       (5*HZ+1)        /* 5 sec intervals */
 159 #define EXP_1           1884            /* 1/exp(5sec/1min) as fixed-point */
 160 #define EXP_5           2014            /* 1/exp(5sec/5min) */
 161 #define EXP_15          2037            /* 1/exp(5sec/15min) */
 162
 163 #define CALC_LOAD(load,exp,n) \
 164         load *= exp; \
 165         load += n*(FIXED_1-exp); \
 166         load >>= FSHIFT;
 167
 168 extern unsigned long total_forks;
 169 extern int nr_threads;
 170 DECLARE_PER_CPU(unsigned long, process_counts);
 171 extern int nr_processes(void);
 172 extern unsigned long nr_running(void);
 173 extern bool single_task_running(void);
 174 extern unsigned long nr_iowait(void);
 175 extern unsigned long nr_iowait_cpu(int cpu);
 176 extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load);
 177
 178 extern void calc_global_load(unsigned long ticks);
 179
 180 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
 181 extern void update_cpu_load_nohz(int active);
 182 #else
 183 static inline void update_cpu_load_nohz(int active) { }
 184 #endif
 185
 186 extern void dump_cpu_task(int cpu);
 187
 188 struct seq_file;
 189 struct cfs_rq;
 190 struct task_group;
 191 #ifdef CONFIG_SCHED_DEBUG
 192 extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m);
 193 extern void proc_sched_set_task(struct task_struct *p);
 194 #endif
 195
 196 /*
 197  * Task state bitmask. NOTE! These bits are also
 198  * encoded in fs/proc/array.c: get_task_state().
 199  *
 200  * We have two separate sets of flags: task->state
 201  * is about runnability, while task->exit_state are
 202  * about the task exiting. Confusing, but this way
 203  * modifying one set can't modify the other one by
 204  * mistake.
 205  */
 206 #define TASK_RUNNING            0
 207 #define TASK_INTERRUPTIBLE      1
 208 #define TASK_UNINTERRUPTIBLE    2
 209 #define __TASK_STOPPED          4
 210 #define __TASK_TRACED           8
 211 /* in tsk->exit_state */
 212 #define EXIT_DEAD               16
 213 #define EXIT_ZOMBIE             32
 214 #define EXIT_TRACE              (EXIT_ZOMBIE | EXIT_DEAD)
 215 /* in tsk->state again */
 216 #define TASK_DEAD               64
 217 #define TASK_WAKEKILL           128
 218 #define TASK_WAKING             256
 219 #define TASK_PARKED             512
 220 #define TASK_NOLOAD             1024
 221 #define TASK_STATE_MAX          2048
 222
 223 #define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPN"
 224
 225 extern char ___assert_task_state[1 - 2*!!(
 226                 sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)];
 227
 228 /* Convenience macros for the sake of set_task_state */
 229 #define TASK_KILLABLE           (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
 230 #define TASK_STOPPED            (TASK_WAKEKILL | __TASK_STOPPED)
 231 #define TASK_TRACED             (TASK_WAKEKILL | __TASK_TRACED)
 232
 233 #define TASK_IDLE               (TASK_UNINTERRUPTIBLE | TASK_NOLOAD)
 234
 235 /* Convenience macros for the sake of wake_up */
 236 #define TASK_NORMAL             (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)
 237 #define TASK_ALL                (TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED)
 238
 239 /* get_task_state() */
 240 #define TASK_REPORT             (TASK_RUNNING | TASK_INTERRUPTIBLE | \
 241                                  TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \
 242                                  __TASK_TRACED | EXIT_ZOMBIE | EXIT_DEAD)
 243
 244 #define task_is_traced(task)    ((task->state & __TASK_TRACED) != 0)
 245 #define task_is_stopped(task)   ((task->state & __TASK_STOPPED) != 0)
 246 #define task_is_stopped_or_traced(task) \
 247                         ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0)
 248 #define task_contributes_to_load(task)  \
 249                                 ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \
 250                                  (task->flags & PF_FROZEN) == 0 && \
 251                                  (task->state & TASK_NOLOAD) == 0)
 252
 253 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
 254
 255 #define __set_task_state(tsk, state_value)                      \
 256         do {                                                    \
 257                 (tsk)->task_state_change = _THIS_IP_;           \
 258                 (tsk)->state = (state_value);                   \
 259         } while (0)
 260 #define set_task_state(tsk, state_value)                        \
 261         do {                                                    \
 262                 (tsk)->task_state_change = _THIS_IP_;           \
 263                 smp_store_mb((tsk)->state, (state_value));              \
 264         } while (0)
 265
 266 /*
 267  * set_current_state() includes a barrier so that the write of current->state
 268  * is correctly serialised wrt the caller's subsequent test of whether to
 269  * actually sleep:
 270  *
 271  *      set_current_state(TASK_UNINTERRUPTIBLE);
 272  *      if (do_i_need_to_sleep())
 273  *              schedule();
 274  *
 275  * If the caller does not need such serialisation then use __set_current_state()
 276  */
 277 #define __set_current_state(state_value)                        \
 278         do {                                                    \
 279                 current->task_state_change = _THIS_IP_;         \
 280                 current->state = (state_value);                 \
 281         } while (0)
 282 #define set_current_state(state_value)                          \
 283         do {                                                    \
 284                 current->task_state_change = _THIS_IP_;         \
 285                 smp_store_mb(current->state, (state_value));            \
 286         } while (0)
 287
 288 #else
 289
 290 #define __set_task_state(tsk, state_value)              \
 291         do { (tsk)->state = (state_value); } while (0)
 292 #define set_task_state(tsk, state_value)                \
 293         smp_store_mb((tsk)->state, (state_value))
 294
 295 /*
 296  * set_current_state() includes a barrier so that the write of current->state
 297  * is correctly serialised wrt the caller's subsequent test of whether to
 298  * actually sleep:
 299  *
 300  *      set_current_state(TASK_UNINTERRUPTIBLE);
 301  *      if (do_i_need_to_sleep())
 302  *              schedule();
 303  *
 304  * If the caller does not need such serialisation then use __set_current_state()
 305  */
 306 #define __set_current_state(state_value)                \
 307         do { current->state = (state_value); } while (0)
 308 #define set_current_state(state_value)                  \
 309         smp_store_mb(current->state, (state_value))
 310
 311 #endif
 312
 313 /* Task command name length */
 314 #define TASK_COMM_LEN 16
 315
 316 #include <linux/spinlock.h>
 317
 318 /*
 319  * This serializes "schedule()" and also protects
 320  * the run-queue from deletions/modifications (but
 321  * _adding_ to the beginning of the run-queue has
 322  * a separate lock).
 323  */
 324 extern rwlock_t tasklist_lock;
 325 extern spinlock_t mmlist_lock;
 326
 327 struct task_struct;
 328
 329 #ifdef CONFIG_PROVE_RCU
 330 extern int lockdep_tasklist_lock_is_held(void);
 331 #endif /* #ifdef CONFIG_PROVE_RCU */
 332
 333 extern void sched_init(void);
 334 extern void sched_init_smp(void);
 335 extern asmlinkage void schedule_tail(struct task_struct *prev);
 336 extern void init_idle(struct task_struct *idle, int cpu);
 337 extern void init_idle_bootup_task(struct task_struct *idle);
 338
 339 extern cpumask_var_t cpu_isolated_map;
 340
 341 extern int runqueue_is_locked(int cpu);
 342
 343 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
 344 extern void nohz_balance_enter_idle(int cpu);
 345 extern void set_cpu_sd_state_idle(void);
 346 extern int get_nohz_timer_target(void);
 347 #else
 348 static inline void nohz_balance_enter_idle(int cpu) { }
 349 static inline void set_cpu_sd_state_idle(void) { }
 350 #endif
 351
 352 /*
 353  * Only dump TASK_* tasks. (0 for all tasks)
 354  */
 355 extern void show_state_filter(unsigned long state_filter);
 356
 357 static inline void show_state(void)
 358 {
 359         show_state_filter(0);
 360 }
 361
 362 extern void show_regs(struct pt_regs *);
 363
 364 /*
 365  * TASK is a pointer to the task whose backtrace we want to see (or NULL for current
 366  * task), SP is the stack pointer of the first frame that should be shown in the back
 367  * trace (or NULL if the entire call-chain of the task should be shown).
 368  */
 369 extern void show_stack(struct task_struct *task, unsigned long *sp);
 370
 371 extern void cpu_init (void);
 372 extern void trap_init(void);
 373 extern void update_process_times(int user);
 374 extern void scheduler_tick(void);
 375 extern int sched_cpu_starting(unsigned int cpu);
 376 extern int sched_cpu_activate(unsigned int cpu);
 377 extern int sched_cpu_deactivate(unsigned int cpu);
 378
 379 #ifdef CONFIG_HOTPLUG_CPU
 380 extern int sched_cpu_dying(unsigned int cpu);
 381 #else
 382 # define sched_cpu_dying        NULL
 383 #endif
 384
 385 extern void sched_show_task(struct task_struct *p);
 386
 387 #ifdef CONFIG_LOCKUP_DETECTOR
 388 extern void touch_softlockup_watchdog_sched(void);
 389 extern void touch_softlockup_watchdog(void);
 390 extern void touch_softlockup_watchdog_sync(void);
 391 extern void touch_all_softlockup_watchdogs(void);
 392 extern int proc_dowatchdog_thresh(struct ctl_table *table, int write,
 393                                   void __user *buffer,
 394                                   size_t *lenp, loff_t *ppos);
 395 extern unsigned int  softlockup_panic;
 396 extern unsigned int  hardlockup_panic;
 397 void lockup_detector_init(void);
 398 #else
 399 static inline void touch_softlockup_watchdog_sched(void)
 400 {
 401 }
 402 static inline void touch_softlockup_watchdog(void)
 403 {
 404 }
 405 static inline void touch_softlockup_watchdog_sync(void)
 406 {
 407 }
 408 static inline void touch_all_softlockup_watchdogs(void)
 409 {
 410 }
 411 static inline void lockup_detector_init(void)
 412 {
 413 }
 414 #endif
 415
 416 #ifdef CONFIG_DETECT_HUNG_TASK
 417 void reset_hung_task_detector(void);
 418 #else
 419 static inline void reset_hung_task_detector(void)
 420 {
 421 }
 422 #endif
 423
 424 /* Attach to any functions which should be ignored in wchan output. */
 425 #define __sched         __attribute__((__section__(".sched.text")))
 426
 427 /* Linker adds these: start and end of __sched functions */
 428 extern char __sched_text_start[], __sched_text_end[];
 429
 430 /* Is this address in the __sched functions? */
 431 extern int in_sched_functions(unsigned long addr);
 432
 433 #define MAX_SCHEDULE_TIMEOUT    LONG_MAX
 434 extern signed long schedule_timeout(signed long timeout);
 435 extern signed long schedule_timeout_interruptible(signed long timeout);
 436 extern signed long schedule_timeout_killable(signed long timeout);
 437 extern signed long schedule_timeout_uninterruptible(signed long timeout);
 438 extern signed long schedule_timeout_idle(signed long timeout);
 439 asmlinkage void schedule(void);
 440 extern void schedule_preempt_disabled(void);
 441
 442 extern long io_schedule_timeout(long timeout);
 443
 444 static inline void io_schedule(void)
 445 {
 446         io_schedule_timeout(MAX_SCHEDULE_TIMEOUT);
 447 }
 448
 449 struct nsproxy;
 450 struct user_namespace;
 451
 452 #ifdef CONFIG_MMU
 453 extern void arch_pick_mmap_layout(struct mm_struct *mm);
 454 extern unsigned long
 455 arch_get_unmapped_area(struct file *, unsigned long, unsigned long,
 456                        unsigned long, unsigned long);
 457 extern unsigned long
 458 arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
 459                           unsigned long len, unsigned long pgoff,
 460                           unsigned long flags);
 461 #else
 462 static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
 463 #endif
 464
 465 #define SUID_DUMP_DISABLE       0       /* No setuid dumping */
 466 #define SUID_DUMP_USER          1       /* Dump as user of process */
 467 #define SUID_DUMP_ROOT          2       /* Dump as root */
 468
 469 /* mm flags */
 470
 471 /* for SUID_DUMP_* above */
 472 #define MMF_DUMPABLE_BITS 2
 473 #define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1)
 474
 475 extern void set_dumpable(struct mm_struct *mm, int value);
 476 /*
 477  * This returns the actual value of the suid_dumpable flag. For things
 478  * that are using this for checking for privilege transitions, it must
 479  * test against SUID_DUMP_USER rather than treating it as a boolean
 480  * value.
 481  */
 482 static inline int __get_dumpable(unsigned long mm_flags)
 483 {
 484         return mm_flags & MMF_DUMPABLE_MASK;
 485 }
 486
 487 static inline int get_dumpable(struct mm_struct *mm)
 488 {
 489         return __get_dumpable(mm->flags);
 490 }
 491
 492 /* coredump filter bits */
 493 #define MMF_DUMP_ANON_PRIVATE   2
 494 #define MMF_DUMP_ANON_SHARED    3
 495 #define MMF_DUMP_MAPPED_PRIVATE 4
 496 #define MMF_DUMP_MAPPED_SHARED  5
 497 #define MMF_DUMP_ELF_HEADERS    6
 498 #define MMF_DUMP_HUGETLB_PRIVATE 7
 499 #define MMF_DUMP_HUGETLB_SHARED  8
 500 #define MMF_DUMP_DAX_PRIVATE    9
 501 #define MMF_DUMP_DAX_SHARED     10
 502
 503 #define MMF_DUMP_FILTER_SHIFT   MMF_DUMPABLE_BITS
 504 #define MMF_DUMP_FILTER_BITS    9
 505 #define MMF_DUMP_FILTER_MASK \
 506         (((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT)
 507 #define MMF_DUMP_FILTER_DEFAULT \
 508         ((1 << MMF_DUMP_ANON_PRIVATE) | (1 << MMF_DUMP_ANON_SHARED) |\
 509          (1 << MMF_DUMP_HUGETLB_PRIVATE) | MMF_DUMP_MASK_DEFAULT_ELF)
 510
 511 #ifdef CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS
 512 # define MMF_DUMP_MASK_DEFAULT_ELF      (1 << MMF_DUMP_ELF_HEADERS)
 513 #else
 514 # define MMF_DUMP_MASK_DEFAULT_ELF      0
 515 #endif
 516                                         /* leave room for more dump flags */
 517 #define MMF_VM_MERGEABLE        16      /* KSM may merge identical pages */
 518 #define MMF_VM_HUGEPAGE         17      /* set when VM_HUGEPAGE is set on vma */
 519 #define MMF_EXE_FILE_CHANGED    18      /* see prctl_set_mm_exe_file() */
 520
 521 #define MMF_HAS_UPROBES         19      /* has uprobes */
 522 #define MMF_RECALC_UPROBES      20      /* MMF_HAS_UPROBES can be wrong */
 523
 524 #define MMF_INIT_MASK           (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)
 525
 526 struct sighand_struct {
 527         atomic_t                count;
 528         struct k_sigaction      action[_NSIG];
 529         spinlock_t              siglock;
 530         wait_queue_head_t       signalfd_wqh;
 531 };
 532
 533 struct pacct_struct {
 534         int                     ac_flag;
 535         long                    ac_exitcode;
 536         unsigned long           ac_mem;
 537         cputime_t               ac_utime, ac_stime;
 538         unsigned long           ac_minflt, ac_majflt;
 539 };
 540
 541 struct cpu_itimer {
 542         cputime_t expires;
 543         cputime_t incr;
 544         u32 error;
 545         u32 incr_error;
 546 };
 547
 548 /**
 549  * struct prev_cputime - snaphsot of system and user cputime
 550  * @utime: time spent in user mode
 551  * @stime: time spent in system mode
 552  * @lock: protects the above two fields
 553  *
 554  * Stores previous user/system time values such that we can guarantee
 555  * monotonicity.
 556  */
 557 struct prev_cputime {
 558 #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 559         cputime_t utime;
 560         cputime_t stime;
 561         raw_spinlock_t lock;
 562 #endif
 563 };
 564
 565 static inline void prev_cputime_init(struct prev_cputime *prev)
 566 {
 567 #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 568         prev->utime = prev->stime = 0;
 569         raw_spin_lock_init(&prev->lock);
 570 #endif
 571 }
 572
 573 /**
 574  * struct task_cputime - collected CPU time counts
 575  * @utime:              time spent in user mode, in &cputime_t units
 576  * @stime:              time spent in kernel mode, in &cputime_t units
 577  * @sum_exec_runtime:   total time spent on the CPU, in nanoseconds
 578  *
 579  * This structure groups together three kinds of CPU time that are tracked for
 580  * threads and thread groups.  Most things considering CPU time want to group
 581  * these counts together and treat all three of them in parallel.
 582  */
 583 struct task_cputime {
 584         cputime_t utime;
 585         cputime_t stime;
 586         unsigned long long sum_exec_runtime;
 587 };
 588
 589 /* Alternate field names when used to cache expirations. */
 590 #define virt_exp        utime
 591 #define prof_exp        stime
 592 #define sched_exp       sum_exec_runtime
 593
 594 #define INIT_CPUTIME    \
 595         (struct task_cputime) {                                 \
 596                 .utime = 0,                                     \
 597                 .stime = 0,                                     \
 598                 .sum_exec_runtime = 0,                          \
 599         }
 600
 601 /*
 602  * This is the atomic variant of task_cputime, which can be used for
 603  * storing and updating task_cputime statistics without locking.
 604  */
 605 struct task_cputime_atomic {
 606         atomic64_t utime;
 607         atomic64_t stime;
 608         atomic64_t sum_exec_runtime;
 609 };
 610
 611 #define INIT_CPUTIME_ATOMIC \
 612         (struct task_cputime_atomic) {                          \
 613                 .utime = ATOMIC64_INIT(0),                      \
 614                 .stime = ATOMIC64_INIT(0),                      \
 615                 .sum_exec_runtime = ATOMIC64_INIT(0),           \
 616         }
 617
 618 #define PREEMPT_DISABLED        (PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED)
 619
 620 /*
 621  * Disable preemption until the scheduler is running -- use an unconditional
 622  * value so that it also works on !PREEMPT_COUNT kernels.
 623  *
 624  * Reset by start_kernel()->sched_init()->init_idle()->init_idle_preempt_count().
 625  */
 626 #define INIT_PREEMPT_COUNT      PREEMPT_OFFSET
 627
 628 /*
 629  * Initial preempt_count value; reflects the preempt_count schedule invariant
 630  * which states that during context switches:
 631  *
 632  *    preempt_count() == 2*PREEMPT_DISABLE_OFFSET
 633  *
 634  * Note: PREEMPT_DISABLE_OFFSET is 0 for !PREEMPT_COUNT kernels.
 635  * Note: See finish_task_switch().
 636  */
 637 #define FORK_PREEMPT_COUNT      (2*PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED)
 638
 639 /**
 640  * struct thread_group_cputimer - thread group interval timer counts
 641  * @cputime_atomic:     atomic thread group interval timers.
 642  * @running:            true when there are timers running and
 643  *                      @cputime_atomic receives updates.
 644  * @checking_timer:     true when a thread in the group is in the
 645  *                      process of checking for thread group timers.
 646  *
 647  * This structure contains the version of task_cputime, above, that is
 648  * used for thread group CPU timer calculations.
 649  */
 650 struct thread_group_cputimer {
 651         struct task_cputime_atomic cputime_atomic;
 652         bool running;
 653         bool checking_timer;
 654 };
 655
 656 #include <linux/rwsem.h>
 657 struct autogroup;
 658
 659 /*
 660  * NOTE! "signal_struct" does not have its own
 661  * locking, because a shared signal_struct always
 662  * implies a shared sighand_struct, so locking
 663  * sighand_struct is always a proper superset of
 664  * the locking of signal_struct.
 665  */
 666 struct signal_struct {
 667         atomic_t                sigcnt;
 668         atomic_t                live;
 669         int                     nr_threads;
 670         struct list_head        thread_head;
 671
 672         wait_queue_head_t       wait_chldexit;  /* for wait4() */
 673
 674         /* current thread group signal load-balancing target: */
 675         struct task_struct      *curr_target;
 676
 677         /* shared signal handling: */
 678         struct sigpending       shared_pending;
 679
 680         /* thread group exit support */
 681         int                     group_exit_code;
 682         /* overloaded:
 683          * - notify group_exit_task when ->count is equal to notify_count
 684          * - everyone except group_exit_task is stopped during signal delivery
 685          *   of fatal signals, group_exit_task processes the signal.
 686          */
 687         int                     notify_count;
 688         struct task_struct      *group_exit_task;
 689
 690         /* thread group stop support, overloads group_exit_code too */
 691         int                     group_stop_count;
 692         unsigned int            flags; /* see SIGNAL_* flags below */
 693
 694         /*
 695          * PR_SET_CHILD_SUBREAPER marks a process, like a service
 696          * manager, to re-parent orphan (double-forking) child processes
 697          * to this process instead of 'init'. The service manager is
 698          * able to receive SIGCHLD signals and is able to investigate
 699          * the process until it calls wait(). All children of this
 700          * process will inherit a flag if they should look for a
 701          * child_subreaper process at exit.
 702          */
 703         unsigned int            is_child_subreaper:1;
 704         unsigned int            has_child_subreaper:1;
 705
 706         /* POSIX.1b Interval Timers */
 707         int                     posix_timer_id;
 708         struct list_head        posix_timers;
 709
 710         /* ITIMER_REAL timer for the process */
 711         struct hrtimer real_timer;
 712         struct pid *leader_pid;
 713         ktime_t it_real_incr;
 714
 715         /*
 716          * ITIMER_PROF and ITIMER_VIRTUAL timers for the process, we use
 717          * CPUCLOCK_PROF and CPUCLOCK_VIRT for indexing array as these
 718          * values are defined to 0 and 1 respectively
 719          */
 720         struct cpu_itimer it[2];
 721
 722         /*
 723          * Thread group totals for process CPU timers.
 724          * See thread_group_cputimer(), et al, for details.
 725          */
 726         struct thread_group_cputimer cputimer;
 727
 728         /* Earliest-expiration cache. */
 729         struct task_cputime cputime_expires;
 730
 731 #ifdef CONFIG_NO_HZ_FULL
 732         atomic_t tick_dep_mask;
 733 #endif
 734
 735         struct list_head cpu_timers[3];
 736
 737         struct pid *tty_old_pgrp;
 738
 739         /* boolean value for session group leader */
 740         int leader;
 741
 742         struct tty_struct *tty; /* NULL if no tty */
 743
 744 #ifdef CONFIG_SCHED_AUTOGROUP
 745         struct autogroup *autogroup;
 746 #endif
 747         /*
 748          * Cumulative resource counters for dead threads in the group,
 749          * and for reaped dead child processes forked by this group.
 750          * Live threads maintain their own counters and add to these
 751          * in __exit_signal, except for the group leader.
 752          */
 753         seqlock_t stats_lock;
 754         cputime_t utime, stime, cutime, cstime;
 755         cputime_t gtime;
 756         cputime_t cgtime;
 757         struct prev_cputime prev_cputime;
 758         unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
 759         unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt;
 760         unsigned long inblock, oublock, cinblock, coublock;
 761         unsigned long maxrss, cmaxrss;
 762         struct task_io_accounting ioac;
 763
 764         /*
 765          * Cumulative ns of schedule CPU time fo dead threads in the
 766          * group, not including a zombie group leader, (This only differs
 767          * from jiffies_to_ns(utime + stime) if sched_clock uses something
 768          * other than jiffies.)
 769          */
 770         unsigned long long sum_sched_runtime;
 771
 772         /*
 773          * We don't bother to synchronize most readers of this at all,
 774          * because there is no reader checking a limit that actually needs
 775          * to get both rlim_cur and rlim_max atomically, and either one
 776          * alone is a single word that can safely be read normally.
 777          * getrlimit/setrlimit use task_lock(current->group_leader) to
 778          * protect this instead of the siglock, because they really
 779          * have no need to disable irqs.
 780          */
 781         struct rlimit rlim[RLIM_NLIMITS];
 782
 783 #ifdef CONFIG_BSD_PROCESS_ACCT
 784         struct pacct_struct pacct;      /* per-process accounting information */
 785 #endif
 786 #ifdef CONFIG_TASKSTATS
 787         struct taskstats *stats;
 788 #endif
 789 #ifdef CONFIG_AUDIT
 790         unsigned audit_tty;
 791         struct tty_audit_buf *tty_audit_buf;
 792 #endif
 793
 794         oom_flags_t oom_flags;
 795         short oom_score_adj;            /* OOM kill score adjustment */
 796         short oom_score_adj_min;        /* OOM kill score adjustment min value.
 797                                          * Only settable by CAP_SYS_RESOURCE. */
 798
 799         struct mutex cred_guard_mutex;  /* guard against foreign influences on
 800                                          * credential calculations
 801                                          * (notably. ptrace) */
 802 };
 803
 804 /*
 805  * Bits in flags field of signal_struct.
 806  */
 807 #define SIGNAL_STOP_STOPPED     0x00000001 /* job control stop in effect */
 808 #define SIGNAL_STOP_CONTINUED   0x00000002 /* SIGCONT since WCONTINUED reap */
 809 #define SIGNAL_GROUP_EXIT       0x00000004 /* group exit in progress */
 810 #define SIGNAL_GROUP_COREDUMP   0x00000008 /* coredump in progress */
 811 /*
 812  * Pending notifications to parent.
 813  */
 814 #define SIGNAL_CLD_STOPPED      0x00000010
 815 #define SIGNAL_CLD_CONTINUED    0x00000020
 816 #define SIGNAL_CLD_MASK         (SIGNAL_CLD_STOPPED|SIGNAL_CLD_CONTINUED)
 817
 818 #define SIGNAL_UNKILLABLE       0x00000040 /* for init: ignore fatal signals */
 819
 820 /* If true, all threads except ->group_exit_task have pending SIGKILL */
 821 static inline int signal_group_exit(const struct signal_struct *sig)
 822 {
 823         return  (sig->flags & SIGNAL_GROUP_EXIT) ||
 824                 (sig->group_exit_task != NULL);
 825 }
 826
 827 /*
 828  * Some day this will be a full-fledged user tracking system..
 829  */
 830 struct user_struct {
 831         atomic_t __count;       /* reference count */
 832         atomic_t processes;     /* How many processes does this user have? */
 833         atomic_t sigpending;    /* How many pending signals does this user have? */
 834 #ifdef CONFIG_INOTIFY_USER
 835         atomic_t inotify_watches; /* How many inotify watches does this user have? */
 836         atomic_t inotify_devs;  /* How many inotify devs does this user have opened? */
 837 #endif
 838 #ifdef CONFIG_FANOTIFY
 839         atomic_t fanotify_listeners;
 840 #endif
 841 #ifdef CONFIG_EPOLL
 842         atomic_long_t epoll_watches; /* The number of file descriptors currently watched */
 843 #endif
 844 #ifdef CONFIG_POSIX_MQUEUE
 845         /* protected by mq_lock */
 846         unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */
 847 #endif
 848         unsigned long locked_shm; /* How many pages of mlocked shm ? */
 849         unsigned long unix_inflight;    /* How many files in flight in unix sockets */
 850         atomic_long_t pipe_bufs;  /* how many pages are allocated in pipe buffers */
 851
 852 #ifdef CONFIG_KEYS
 853         struct key *uid_keyring;        /* UID specific keyring */
 854         struct key *session_keyring;    /* UID's default session keyring */
 855 #endif
 856
 857         /* Hash table maintenance information */
 858         struct hlist_node uidhash_node;
 859         kuid_t uid;
 860
 861 #if defined(CONFIG_PERF_EVENTS) || defined(CONFIG_BPF_SYSCALL)
 862         atomic_long_t locked_vm;
 863 #endif
 864 };
 865
 866 extern int uids_sysfs_init(void);
 867
 868 extern struct user_struct *find_user(kuid_t);
 869
 870 extern struct user_struct root_user;
 871 #define INIT_USER (&root_user)
 872
 873
 874 struct backing_dev_info;
 875 struct reclaim_state;
 876
 877 #ifdef CONFIG_SCHED_INFO
 878 struct sched_info {
 879         /* cumulative counters */
 880         unsigned long pcount;         /* # of times run on this cpu */
 881         unsigned long long run_delay; /* time spent waiting on a runqueue */
 882
 883         /* timestamps */
 884         unsigned long long last_arrival,/* when we last ran on a cpu */
 885                            last_queued; /* when we were last queued to run */
 886 };
 887 #endif /* CONFIG_SCHED_INFO */
 888
 889 #ifdef CONFIG_TASK_DELAY_ACCT
 890 struct task_delay_info {
 891         spinlock_t      lock;
 892         unsigned int    flags;  /* Private per-task flags */
 893
 894         /* For each stat XXX, add following, aligned appropriately
 895          *
 896          * struct timespec XXX_start, XXX_end;
 897          * u64 XXX_delay;
 898          * u32 XXX_count;
 899          *
 900          * Atomicity of updates to XXX_delay, XXX_count protected by
 901          * single lock above (split into XXX_lock if contention is an issue).
 902          */
 903
 904         /*
 905          * XXX_count is incremented on every XXX operation, the delay
 906          * associated with the operation is added to XXX_delay.
 907          * XXX_delay contains the accumulated delay time in nanoseconds.
 908          */
 909         u64 blkio_start;        /* Shared by blkio, swapin */
 910         u64 blkio_delay;        /* wait for sync block io completion */
 911         u64 swapin_delay;       /* wait for swapin block io completion */
 912         u32 blkio_count;        /* total count of the number of sync block */
 913                                 /* io operations performed */
 914         u32 swapin_count;       /* total count of the number of swapin block */
 915                                 /* io operations performed */
 916
 917         u64 freepages_start;
 918         u64 freepages_delay;    /* wait for memory reclaim */
 919         u32 freepages_count;    /* total count of memory reclaim */
 920 };
 921 #endif  /* CONFIG_TASK_DELAY_ACCT */
 922
 923 static inline int sched_info_on(void)
 924 {
 925 #ifdef CONFIG_SCHEDSTATS
 926         return 1;
 927 #elif defined(CONFIG_TASK_DELAY_ACCT)
 928         extern int delayacct_on;
 929         return delayacct_on;
 930 #else
 931         return 0;
 932 #endif
 933 }
 934
 935 #ifdef CONFIG_SCHEDSTATS
 936 void force_schedstat_enabled(void);
 937 #endif
 938
 939 enum cpu_idle_type {
 940         CPU_IDLE,
 941         CPU_NOT_IDLE,
 942         CPU_NEWLY_IDLE,
 943         CPU_MAX_IDLE_TYPES
 944 };
 945
 946 /*
 947  * Increase resolution of cpu_capacity calculations
 948  */
 949 #define SCHED_CAPACITY_SHIFT    10
 950 #define SCHED_CAPACITY_SCALE    (1L << SCHED_CAPACITY_SHIFT)
 951
 952 /*
 953  * Wake-queues are lists of tasks with a pending wakeup, whose
 954  * callers have already marked the task as woken internally,
 955  * and can thus carry on. A common use case is being able to
 956  * do the wakeups once the corresponding user lock as been
 957  * released.
 958  *
 959  * We hold reference to each task in the list across the wakeup,
 960  * thus guaranteeing that the memory is still valid by the time
 961  * the actual wakeups are performed in wake_up_q().
 962  *
 963  * One per task suffices, because there's never a need for a task to be
 964  * in two wake queues simultaneously; it is forbidden to abandon a task
 965  * in a wake queue (a call to wake_up_q() _must_ follow), so if a task is
 966  * already in a wake queue, the wakeup will happen soon and the second
 967  * waker can just skip it.
 968  *
 969  * The WAKE_Q macro declares and initializes the list head.
 970  * wake_up_q() does NOT reinitialize the list; it's expected to be
 971  * called near the end of a function, where the fact that the queue is
 972  * not used again will be easy to see by inspection.
 973  *
 974  * Note that this can cause spurious wakeups. schedule() callers
 975  * must ensure the call is done inside a loop, confirming that the
 976  * wakeup condition has in fact occurred.
 977  */
 978 struct wake_q_node {
 979         struct wake_q_node *next;
 980 };
 981
 982 struct wake_q_head {
 983         struct wake_q_node *first;
 984         struct wake_q_node **lastp;
 985 };
 986
 987 #define WAKE_Q_TAIL ((struct wake_q_node *) 0x01)
 988
 989 #define WAKE_Q(name)                                    \
 990         struct wake_q_head name = { WAKE_Q_TAIL, &name.first }
 991
 992 extern void wake_q_add(struct wake_q_head *head,
 993                        struct task_struct *task);
 994 extern void wake_up_q(struct wake_q_head *head);
 995
 996 /*
 997  * sched-domains (multiprocessor balancing) declarations:
 998  */
 999 #ifdef CONFIG_SMP
1000 #define SD_LOAD_BALANCE         0x0001  /* Do load balancing on this domain. */
1001 #define SD_BALANCE_NEWIDLE      0x0002  /* Balance when about to become idle */
1002 #define SD_BALANCE_EXEC         0x0004  /* Balance on exec */
1003 #define SD_BALANCE_FORK         0x0008  /* Balance on fork, clone */
1004 #define SD_BALANCE_WAKE         0x0010  /* Balance on wakeup */
1005 #define SD_WAKE_AFFINE          0x0020  /* Wake task to waking CPU */
1006 #define SD_SHARE_CPUCAPACITY    0x0080  /* Domain members share cpu power */
1007 #define SD_SHARE_POWERDOMAIN    0x0100  /* Domain members share power domain */
1008 #define SD_SHARE_PKG_RESOURCES  0x0200  /* Domain members share cpu pkg resources */
1009 #define SD_SERIALIZE            0x0400  /* Only a single load balancing instance */
1010 #define SD_ASYM_PACKING         0x0800  /* Place busy groups earlier in the domain */
1011 #define SD_PREFER_SIBLING       0x1000  /* Prefer to place tasks in a sibling domain */
1012 #define SD_OVERLAP              0x2000  /* sched_domains of this level overlap */
1013 #define SD_NUMA                 0x4000  /* cross-node balancing */
1014
1015 #ifdef CONFIG_SCHED_SMT
1016 static inline int cpu_smt_flags(void)
1017 {
1018         return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
1019 }
1020 #endif
1021
1022 #ifdef CONFIG_SCHED_MC
1023 static inline int cpu_core_flags(void)
1024 {
1025         return SD_SHARE_PKG_RESOURCES;
1026 }
1027 #endif
1028
1029 #ifdef CONFIG_NUMA
1030 static inline int cpu_numa_flags(void)
1031 {
1032         return SD_NUMA;
1033 }
1034 #endif
1035
1036 struct sched_domain_attr {
1037         int relax_domain_level;
1038 };
1039
1040 #define SD_ATTR_INIT    (struct sched_domain_attr) {    \
1041         .relax_domain_level = -1,                       \
1042 }
1043
1044 extern int sched_domain_level_max;
1045
1046 struct sched_group;
1047
1048 struct sched_domain {
1049         /* These fields must be setup */
1050         struct sched_domain *parent;    /* top domain must be null terminated */
1051         struct sched_domain *child;     /* bottom domain must be null terminated */
1052         struct sched_group *groups;     /* the balancing groups of the domain */
1053         unsigned long min_interval;     /* Minimum balance interval ms */
1054         unsigned long max_interval;     /* Maximum balance interval ms */
1055         unsigned int busy_factor;       /* less balancing by factor if busy */
1056         unsigned int imbalance_pct;     /* No balance until over watermark */
1057         unsigned int cache_nice_tries;  /* Leave cache hot tasks for # tries */
1058         unsigned int busy_idx;
1059         unsigned int idle_idx;
1060         unsigned int newidle_idx;
1061         unsigned int wake_idx;
1062         unsigned int forkexec_idx;
1063         unsigned int smt_gain;
1064
1065         int nohz_idle;                  /* NOHZ IDLE status */
1066         int flags;                      /* See SD_* */
1067         int level;
1068
1069         /* Runtime fields. */
1070         unsigned long last_balance;     /* init to jiffies. units in jiffies */
1071         unsigned int balance_interval;  /* initialise to 1. units in ms. */
1072         unsigned int nr_balance_failed; /* initialise to 0 */
1073
1074         /* idle_balance() stats */
1075         u64 max_newidle_lb_cost;
1076         unsigned long next_decay_max_lb_cost;
1077
1078 #ifdef CONFIG_SCHEDSTATS
1079         /* load_balance() stats */
1080         unsigned int lb_count[CPU_MAX_IDLE_TYPES];
1081         unsigned int lb_failed[CPU_MAX_IDLE_TYPES];
1082         unsigned int lb_balanced[CPU_MAX_IDLE_TYPES];
1083         unsigned int lb_imbalance[CPU_MAX_IDLE_TYPES];
1084         unsigned int lb_gained[CPU_MAX_IDLE_TYPES];
1085         unsigned int lb_hot_gained[CPU_MAX_IDLE_TYPES];
1086         unsigned int lb_nobusyg[CPU_MAX_IDLE_TYPES];
1087         unsigned int lb_nobusyq[CPU_MAX_IDLE_TYPES];
1088
1089         /* Active load balancing */
1090         unsigned int alb_count;
1091         unsigned int alb_failed;
1092         unsigned int alb_pushed;
1093
1094         /* SD_BALANCE_EXEC stats */
1095         unsigned int sbe_count;
1096         unsigned int sbe_balanced;
1097         unsigned int sbe_pushed;
1098
1099         /* SD_BALANCE_FORK stats */
1100         unsigned int sbf_count;
1101         unsigned int sbf_balanced;
1102         unsigned int sbf_pushed;
1103
1104         /* try_to_wake_up() stats */
1105         unsigned int ttwu_wake_remote;
1106         unsigned int ttwu_move_affine;
1107         unsigned int ttwu_move_balance;
1108 #endif
1109 #ifdef CONFIG_SCHED_DEBUG
1110         char *name;
1111 #endif
1112         union {
1113                 void *private;          /* used during construction */
1114                 struct rcu_head rcu;    /* used during destruction */
1115         };
1116
1117         unsigned int span_weight;
1118         /*
1119          * Span of all CPUs in this domain.
1120          *
1121          * NOTE: this field is variable length. (Allocated dynamically
1122          * by attaching extra space to the end of the structure,
1123          * depending on how many CPUs the kernel has booted up with)
1124          */
1125         unsigned long span[0];
1126 };
1127
1128 static inline struct cpumask *sched_domain_span(struct sched_domain *sd)
1129 {
1130         return to_cpumask(sd->span);
1131 }
1132
1133 extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
1134                                     struct sched_domain_attr *dattr_new);
1135
1136 /* Allocate an array of sched domains, for partition_sched_domains(). */
1137 cpumask_var_t *alloc_sched_domains(unsigned int ndoms);
1138 void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms);
1139
1140 bool cpus_share_cache(int this_cpu, int that_cpu);
1141
1142 typedef const struct cpumask *(*sched_domain_mask_f)(int cpu);
1143 typedef int (*sched_domain_flags_f)(void);
1144
1145 #define SDTL_OVERLAP    0x01
1146
1147 struct sd_data {
1148         struct sched_domain **__percpu sd;
1149         struct sched_group **__percpu sg;
1150         struct sched_group_capacity **__percpu sgc;
1151 };
1152
1153 struct sched_domain_topology_level {
1154         sched_domain_mask_f mask;
1155         sched_domain_flags_f sd_flags;
1156         int                 flags;
1157         int                 numa_level;
1158         struct sd_data      data;
1159 #ifdef CONFIG_SCHED_DEBUG
1160         char                *name;
1161 #endif
1162 };
1163
1164 extern void set_sched_topology(struct sched_domain_topology_level *tl);
1165 extern void wake_up_if_idle(int cpu);
1166
1167 #ifdef CONFIG_SCHED_DEBUG
1168 # define SD_INIT_NAME(type)             .name = #type
1169 #else
1170 # define SD_INIT_NAME(type)
1171 #endif
1172
1173 #else /* CONFIG_SMP */
1174
1175 struct sched_domain_attr;
1176
1177 static inline void
1178 partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
1179                         struct sched_domain_attr *dattr_new)
1180 {
1181 }
1182
1183 static inline bool cpus_share_cache(int this_cpu, int that_cpu)
1184 {
1185         return true;
1186 }
1187
1188 #endif  /* !CONFIG_SMP */
1189
1190
1191 struct io_context;                      /* See blkdev.h */
1192
1193
1194 #ifdef ARCH_HAS_PREFETCH_SWITCH_STACK
1195 extern void prefetch_stack(struct task_struct *t);
1196 #else
1197 static inline void prefetch_stack(struct task_struct *t) { }
1198 #endif
1199
1200 struct audit_context;           /* See audit.c */
1201 struct mempolicy;
1202 struct pipe_inode_info;
1203 struct uts_namespace;
1204
1205 struct load_weight {
1206         unsigned long weight;
1207         u32 inv_weight;
1208 };
1209
1210 /*
1211  * The load_avg/util_avg accumulates an infinite geometric series.
1212  * 1) load_avg factors frequency scaling into the amount of time that a
1213  * sched_entity is runnable on a rq into its weight. For cfs_rq, it is the
1214  * aggregated such weights of all runnable and blocked sched_entities.
1215  * 2) util_avg factors frequency and cpu scaling into the amount of time
1216  * that a sched_entity is running on a CPU, in the range [0..SCHED_LOAD_SCALE].
1217  * For cfs_rq, it is the aggregated such times of all runnable and
1218  * blocked sched_entities.
1219  * The 64 bit load_sum can:
1220  * 1) for cfs_rq, afford 4353082796 (=2^64/47742/88761) entities with
1221  * the highest weight (=88761) always runnable, we should not overflow
1222  * 2) for entity, support any load.weight always runnable
1223  */
1224 struct sched_avg {
1225         u64 last_update_time, load_sum;
1226         u32 util_sum, period_contrib;
1227         unsigned long load_avg, util_avg;
1228 };
1229
1230 #ifdef CONFIG_SCHEDSTATS
1231 struct sched_statistics {
1232         u64                     wait_start;
1233         u64                     wait_max;
1234         u64                     wait_count;
1235         u64                     wait_sum;
1236         u64                     iowait_count;
1237         u64                     iowait_sum;
1238
1239         u64                     sleep_start;
1240         u64                     sleep_max;
1241         s64                     sum_sleep_runtime;
1242
1243         u64                     block_start;
1244         u64                     block_max;
1245         u64                     exec_max;
1246         u64                     slice_max;
1247
1248         u64                     nr_migrations_cold;
1249         u64                     nr_failed_migrations_affine;
1250         u64                     nr_failed_migrations_running;
1251         u64                     nr_failed_migrations_hot;
1252         u64                     nr_forced_migrations;
1253
1254         u64                     nr_wakeups;
1255         u64                     nr_wakeups_sync;
1256         u64                     nr_wakeups_migrate;
1257         u64                     nr_wakeups_local;
1258         u64                     nr_wakeups_remote;
1259         u64                     nr_wakeups_affine;
1260         u64                     nr_wakeups_affine_attempts;
1261         u64                     nr_wakeups_passive;
1262         u64                     nr_wakeups_idle;
1263 };
1264 #endif
1265
1266 struct sched_entity {
1267         struct load_weight      load;           /* for load-balancing */
1268         struct rb_node          run_node;
1269         struct list_head        group_node;
1270         unsigned int            on_rq;
1271
1272         u64                     exec_start;
1273         u64                     sum_exec_runtime;
1274         u64                     vruntime;
1275         u64                     prev_sum_exec_runtime;
1276
1277         u64                     nr_migrations;
1278
1279 #ifdef CONFIG_SCHEDSTATS
1280         struct sched_statistics statistics;
1281 #endif
1282
1283 #ifdef CONFIG_FAIR_GROUP_SCHED
1284         int                     depth;
1285         struct sched_entity     *parent;
1286         /* rq on which this entity is (to be) queued: */
1287         struct cfs_rq           *cfs_rq;
1288         /* rq "owned" by this entity/group: */
1289         struct cfs_rq           *my_q;
1290 #endif
1291
1292 #ifdef CONFIG_SMP
1293         /*
1294          * Per entity load average tracking.
1295          *
1296          * Put into separate cache line so it does not
1297          * collide with read-mostly values above.
1298          */
1299         struct sched_avg        avg ____cacheline_aligned_in_smp;
1300 #endif
1301 };
1302
1303 struct sched_rt_entity {
1304         struct list_head run_list;
1305         unsigned long timeout;
1306         unsigned long watchdog_stamp;
1307         unsigned int time_slice;
1308         unsigned short on_rq;
1309         unsigned short on_list;
1310
1311         struct sched_rt_entity *back;
1312 #ifdef CONFIG_RT_GROUP_SCHED
1313         struct sched_rt_entity  *parent;
1314         /* rq on which this entity is (to be) queued: */
1315         struct rt_rq            *rt_rq;
1316         /* rq "owned" by this entity/group: */
1317         struct rt_rq            *my_q;
1318 #endif
1319 };
1320
1321 struct sched_dl_entity {
1322         struct rb_node  rb_node;
1323
1324         /*
1325          * Original scheduling parameters. Copied here from sched_attr
1326          * during sched_setattr(), they will remain the same until
1327          * the next sched_setattr().
1328          */
1329         u64 dl_runtime;         /* maximum runtime for each instance    */
1330         u64 dl_deadline;        /* relative deadline of each instance   */
1331         u64 dl_period;          /* separation of two instances (period) */
1332         u64 dl_bw;              /* dl_runtime / dl_deadline             */
1333
1334         /*
1335          * Actual scheduling parameters. Initialized with the values above,
1336          * they are continously updated during task execution. Note that
1337          * the remaining runtime could be < 0 in case we are in overrun.
1338          */
1339         s64 runtime;            /* remaining runtime for this instance  */
1340         u64 deadline;           /* absolute deadline for this instance  */
1341         unsigned int flags;     /* specifying the scheduler behaviour   */
1342
1343         /*
1344          * Some bool flags:
1345          *
1346          * @dl_throttled tells if we exhausted the runtime. If so, the
1347          * task has to wait for a replenishment to be performed at the
1348          * next firing of dl_timer.
1349          *
1350          * @dl_boosted tells if we are boosted due to DI. If so we are
1351          * outside bandwidth enforcement mechanism (but only until we
1352          * exit the critical section);
1353          *
1354          * @dl_yielded tells if task gave up the cpu before consuming
1355          * all its available runtime during the last job.
1356          */
1357         int dl_throttled, dl_boosted, dl_yielded;
1358
1359         /*
1360          * Bandwidth enforcement timer. Each -deadline task has its
1361          * own bandwidth to be enforced, thus we need one timer per task.
1362          */
1363         struct hrtimer dl_timer;
1364 };
1365
1366 union rcu_special {
1367         struct {
1368                 u8 blocked;
1369                 u8 need_qs;
1370                 u8 exp_need_qs;
1371                 u8 pad; /* Otherwise the compiler can store garbage here. */
1372         } b; /* Bits. */
1373         u32 s; /* Set of bits. */
1374 };
1375 struct rcu_node;
1376
1377 enum perf_event_task_context {
1378         perf_invalid_context = -1,
1379         perf_hw_context = 0,
1380         perf_sw_context,
1381         perf_nr_task_contexts,
1382 };
1383
1384 /* Track pages that require TLB flushes */
1385 struct tlbflush_unmap_batch {
1386         /*
1387          * Each bit set is a CPU that potentially has a TLB entry for one of
1388          * the PFNs being flushed. See set_tlb_ubc_flush_pending().
1389          */
1390         struct cpumask cpumask;
1391
1392         /* True if any bit in cpumask is set */
1393         bool flush_required;
1394
1395         /*
1396          * If true then the PTE was dirty when unmapped. The entry must be
1397          * flushed before IO is initiated or a stale TLB entry potentially
1398          * allows an update without redirtying the page.
1399          */
1400         bool writable;
1401 };
1402
1403 struct task_struct {
1404         volatile long state;    /* -1 unrunnable, 0 runnable, >0 stopped */
1405         void *stack;
1406         atomic_t usage;
1407         unsigned int flags;     /* per process flags, defined below */
1408         unsigned int ptrace;
1409
1410 #ifdef CONFIG_SMP
1411         struct llist_node wake_entry;
1412         int on_cpu;
1413         unsigned int wakee_flips;
1414         unsigned long wakee_flip_decay_ts;
1415         struct task_struct *last_wakee;
1416
1417         int wake_cpu;
1418 #endif
1419         int on_rq;
1420
1421         int prio, static_prio, normal_prio;
1422         unsigned int rt_priority;
1423         const struct sched_class *sched_class;
1424         struct sched_entity se;
1425         struct sched_rt_entity rt;
1426 #ifdef CONFIG_CGROUP_SCHED
1427         struct task_group *sched_task_group;
1428 #endif
1429         struct sched_dl_entity dl;
1430
1431 #ifdef CONFIG_PREEMPT_NOTIFIERS
1432         /* list of struct preempt_notifier: */
1433         struct hlist_head preempt_notifiers;
1434 #endif
1435
1436 #ifdef CONFIG_BLK_DEV_IO_TRACE
1437         unsigned int btrace_seq;
1438 #endif
1439
1440         unsigned int policy;
1441         int nr_cpus_allowed;
1442         cpumask_t cpus_allowed;
1443
1444 #ifdef CONFIG_PREEMPT_RCU
1445         int rcu_read_lock_nesting;
1446         union rcu_special rcu_read_unlock_special;
1447         struct list_head rcu_node_entry;
1448         struct rcu_node *rcu_blocked_node;
1449 #endif /* #ifdef CONFIG_PREEMPT_RCU */
1450 #ifdef CONFIG_TASKS_RCU
1451         unsigned long rcu_tasks_nvcsw;
1452         bool rcu_tasks_holdout;
1453         struct list_head rcu_tasks_holdout_list;
1454         int rcu_tasks_idle_cpu;
1455 #endif /* #ifdef CONFIG_TASKS_RCU */
1456
1457 #ifdef CONFIG_SCHED_INFO
1458         struct sched_info sched_info;
1459 #endif
1460
1461         struct list_head tasks;
1462 #ifdef CONFIG_SMP
1463         struct plist_node pushable_tasks;
1464         struct rb_node pushable_dl_tasks;
1465 #endif
1466
1467         struct mm_struct *mm, *active_mm;
1468         /* per-thread vma caching */
1469         u32 vmacache_seqnum;
1470         struct vm_area_struct *vmacache[VMACACHE_SIZE];
1471 #if defined(SPLIT_RSS_COUNTING)
1472         struct task_rss_stat    rss_stat;
1473 #endif
1474 /* task state */
1475         int exit_state;
1476         int exit_code, exit_signal;
1477         int pdeath_signal;  /*  The signal sent when the parent dies  */
1478         unsigned long jobctl;   /* JOBCTL_*, siglock protected */
1479
1480         /* Used for emulating ABI behavior of previous Linux versions */
1481         unsigned int personality;
1482
1483         /* scheduler bits, serialized by scheduler locks */
1484         unsigned sched_reset_on_fork:1;
1485         unsigned sched_contributes_to_load:1;
1486         unsigned sched_migrated:1;
1487         unsigned :0; /* force alignment to the next boundary */
1488
1489         /* unserialized, strictly 'current' */
1490         unsigned in_execve:1; /* bit to tell LSMs we're in execve */
1491         unsigned in_iowait:1;
1492 #ifdef CONFIG_MEMCG
1493         unsigned memcg_may_oom:1;
1494 #ifndef CONFIG_SLOB
1495         unsigned memcg_kmem_skip_account:1;
1496 #endif
1497 #endif
1498 #ifdef CONFIG_COMPAT_BRK
1499         unsigned brk_randomized:1;
1500 #endif
1501
1502         unsigned long atomic_flags; /* Flags needing atomic access. */
1503
1504         struct restart_block restart_block;
1505
1506         pid_t pid;
1507         pid_t tgid;
1508
1509 #ifdef CONFIG_CC_STACKPROTECTOR
1510         /* Canary value for the -fstack-protector gcc feature */
1511         unsigned long stack_canary;
1512 #endif
1513         /*
1514          * pointers to (original) parent process, youngest child, younger sibling,
1515          * older sibling, respectively.  (p->father can be replaced with
1516          * p->real_parent->pid)
1517          */
1518         struct task_struct __rcu *real_parent; /* real parent process */
1519         struct task_struct __rcu *parent; /* recipient of SIGCHLD, wait4() reports */
1520         /*
1521          * children/sibling forms the list of my natural children
1522          */
1523         struct list_head children;      /* list of my children */
1524         struct list_head sibling;       /* linkage in my parent's children list */
1525         struct task_struct *group_leader;       /* threadgroup leader */
1526
1527         /*
1528          * ptraced is the list of tasks this task is using ptrace on.
1529          * This includes both natural children and PTRACE_ATTACH targets.
1530          * p->ptrace_entry is p's link on the p->parent->ptraced list.
1531          */
1532         struct list_head ptraced;
1533         struct list_head ptrace_entry;
1534
1535         /* PID/PID hash table linkage. */
1536         struct pid_link pids[PIDTYPE_MAX];
1537         struct list_head thread_group;
1538         struct list_head thread_node;
1539
1540         struct completion *vfork_done;          /* for vfork() */
1541         int __user *set_child_tid;              /* CLONE_CHILD_SETTID */
1542         int __user *clear_child_tid;            /* CLONE_CHILD_CLEARTID */
1543
1544         cputime_t utime, stime, utimescaled, stimescaled;
1545         cputime_t gtime;
1546         struct prev_cputime prev_cputime;
1547 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
1548         seqcount_t vtime_seqcount;
1549         unsigned long long vtime_snap;
1550         enum {
1551                 /* Task is sleeping or running in a CPU with VTIME inactive */
1552                 VTIME_INACTIVE = 0,
1553                 /* Task runs in userspace in a CPU with VTIME active */
1554                 VTIME_USER,
1555                 /* Task runs in kernelspace in a CPU with VTIME active */
1556                 VTIME_SYS,
1557         } vtime_snap_whence;
1558 #endif
1559
1560 #ifdef CONFIG_NO_HZ_FULL
1561         atomic_t tick_dep_mask;
1562 #endif
1563         unsigned long nvcsw, nivcsw; /* context switch counts */
1564         u64 start_time;         /* monotonic time in nsec */
1565         u64 real_start_time;    /* boot based time in nsec */
1566 /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
1567         unsigned long min_flt, maj_flt;
1568
1569         struct task_cputime cputime_expires;
1570         struct list_head cpu_timers[3];
1571
1572 /* process credentials */
1573         const struct cred __rcu *real_cred; /* objective and real subjective task
1574                                          * credentials (COW) */
1575         const struct cred __rcu *cred;  /* effective (overridable) subjective task
1576                                          * credentials (COW) */
1577         char comm[TASK_COMM_LEN]; /* executable name excluding path
1578                                      - access with [gs]et_task_comm (which lock
1579                                        it with task_lock())
1580                                      - initialized normally by setup_new_exec */
1581 /* file system info */
1582         struct nameidata *nameidata;
1583 #ifdef CONFIG_SYSVIPC
1584 /* ipc stuff */
1585         struct sysv_sem sysvsem;
1586         struct sysv_shm sysvshm;
1587 #endif
1588 #ifdef CONFIG_DETECT_HUNG_TASK
1589 /* hung task detection */
1590         unsigned long last_switch_count;
1591 #endif
1592 /* filesystem information */
1593         struct fs_struct *fs;
1594 /* open file information */
1595         struct files_struct *files;
1596 /* namespaces */
1597         struct nsproxy *nsproxy;
1598 /* signal handlers */
1599         struct signal_struct *signal;
1600         struct sighand_struct *sighand;
1601
1602         sigset_t blocked, real_blocked;
1603         sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */
1604         struct sigpending pending;
1605
1606         unsigned long sas_ss_sp;
1607         size_t sas_ss_size;
1608
1609         struct callback_head *task_works;
1610
1611         struct audit_context *audit_context;
1612 #ifdef CONFIG_AUDITSYSCALL
1613         kuid_t loginuid;
1614         unsigned int sessionid;
1615 #endif
1616         struct seccomp seccomp;
1617
1618 /* Thread group tracking */
1619         u32 parent_exec_id;
1620         u32 self_exec_id;
1621 /* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed,
1622  * mempolicy */
1623         spinlock_t alloc_lock;
1624
1625         /* Protection of the PI data structures: */
1626         raw_spinlock_t pi_lock;
1627
1628         struct wake_q_node wake_q;
1629
1630 #ifdef CONFIG_RT_MUTEXES
1631         /* PI waiters blocked on a rt_mutex held by this task */
1632         struct rb_root pi_waiters;
1633         struct rb_node *pi_waiters_leftmost;
1634         /* Deadlock detection and priority inheritance handling */
1635         struct rt_mutex_waiter *pi_blocked_on;
1636 #endif
1637
1638 #ifdef CONFIG_DEBUG_MUTEXES
1639         /* mutex deadlock detection */
1640         struct mutex_waiter *blocked_on;
1641 #endif
1642 #ifdef CONFIG_TRACE_IRQFLAGS
1643         unsigned int irq_events;
1644         unsigned long hardirq_enable_ip;
1645         unsigned long hardirq_disable_ip;
1646         unsigned int hardirq_enable_event;
1647         unsigned int hardirq_disable_event;
1648         int hardirqs_enabled;
1649         int hardirq_context;
1650         unsigned long softirq_disable_ip;
1651         unsigned long softirq_enable_ip;
1652         unsigned int softirq_disable_event;
1653         unsigned int softirq_enable_event;
1654         int softirqs_enabled;
1655         int softirq_context;
1656 #endif
1657 #ifdef CONFIG_LOCKDEP
1658 # define MAX_LOCK_DEPTH 48UL
1659         u64 curr_chain_key;
1660         int lockdep_depth;
1661         unsigned int lockdep_recursion;
1662         struct held_lock held_locks[MAX_LOCK_DEPTH];
1663         gfp_t lockdep_reclaim_gfp;
1664 #endif
1665 #ifdef CONFIG_UBSAN
1666         unsigned int in_ubsan;
1667 #endif
1668
1669 /* journalling filesystem info */
1670         void *journal_info;
1671
1672 /* stacked block device info */
1673         struct bio_list *bio_list;
1674
1675 #ifdef CONFIG_BLOCK
1676 /* stack plugging */
1677         struct blk_plug *plug;
1678 #endif
1679
1680 /* VM state */
1681         struct reclaim_state *reclaim_state;
1682
1683         struct backing_dev_info *backing_dev_info;
1684
1685         struct io_context *io_context;
1686
1687         unsigned long ptrace_message;
1688         siginfo_t *last_siginfo; /* For ptrace use.  */
1689         struct task_io_accounting ioac;
1690 #if defined(CONFIG_TASK_XACCT)
1691         u64 acct_rss_mem1;      /* accumulated rss usage */
1692         u64 acct_vm_mem1;       /* accumulated virtual memory usage */
1693         cputime_t acct_timexpd; /* stime + utime since last update */
1694 #endif
1695 #ifdef CONFIG_CPUSETS
1696         nodemask_t mems_allowed;        /* Protected by alloc_lock */
1697         seqcount_t mems_allowed_seq;    /* Seqence no to catch updates */
1698         int cpuset_mem_spread_rotor;
1699         int cpuset_slab_spread_rotor;
1700 #endif
1701 #ifdef CONFIG_CGROUPS
1702         /* Control Group info protected by css_set_lock */
1703         struct css_set __rcu *cgroups;
1704         /* cg_list protected by css_set_lock and tsk->alloc_lock */
1705         struct list_head cg_list;
1706 #endif
1707 #ifdef CONFIG_FUTEX
1708         struct robust_list_head __user *robust_list;
1709 #ifdef CONFIG_COMPAT
1710         struct compat_robust_list_head __user *compat_robust_list;
1711 #endif
1712         struct list_head pi_state_list;
1713         struct futex_pi_state *pi_state_cache;
1714 #endif
1715 #ifdef CONFIG_PERF_EVENTS
1716         struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];
1717         struct mutex perf_event_mutex;
1718         struct list_head perf_event_list;
1719 #endif
1720 #ifdef CONFIG_DEBUG_PREEMPT
1721         unsigned long preempt_disable_ip;
1722 #endif
1723 #ifdef CONFIG_NUMA
1724         struct mempolicy *mempolicy;    /* Protected by alloc_lock */
1725         short il_next;
1726         short pref_node_fork;
1727 #endif
1728 #ifdef CONFIG_NUMA_BALANCING
1729         int numa_scan_seq;
1730         unsigned int numa_scan_period;
1731         unsigned int numa_scan_period_max;
1732         int numa_preferred_nid;
1733         unsigned long numa_migrate_retry;
1734         u64 node_stamp;                 /* migration stamp  */
1735         u64 last_task_numa_placement;
1736         u64 last_sum_exec_runtime;
1737         struct callback_head numa_work;
1738
1739         struct list_head numa_entry;
1740         struct numa_group *numa_group;
1741
1742         /*
1743          * numa_faults is an array split into four regions:
1744          * faults_memory, faults_cpu, faults_memory_buffer, faults_cpu_buffer
1745          * in this precise order.
1746          *
1747          * faults_memory: Exponential decaying average of faults on a per-node
1748          * basis. Scheduling placement decisions are made based on these
1749          * counts. The values remain static for the duration of a PTE scan.
1750          * faults_cpu: Track the nodes the process was running on when a NUMA
1751          * hinting fault was incurred.
1752          * faults_memory_buffer and faults_cpu_buffer: Record faults per node
1753          * during the current scan window. When the scan completes, the counts
1754          * in faults_memory and faults_cpu decay and these values are copied.
1755          */
1756         unsigned long *numa_faults;
1757         unsigned long total_numa_faults;
1758
1759         /*
1760          * numa_faults_locality tracks if faults recorded during the last
1761          * scan window were remote/local or failed to migrate. The task scan
1762          * period is adapted based on the locality of the faults with different
1763          * weights depending on whether they were shared or private faults
1764          */
1765         unsigned long numa_faults_locality[3];
1766
1767         unsigned long numa_pages_migrated;
1768 #endif /* CONFIG_NUMA_BALANCING */
1769
1770 #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
1771         struct tlbflush_unmap_batch tlb_ubc;
1772 #endif
1773
1774         struct rcu_head rcu;
1775
1776         /*
1777          * cache last used pipe for splice
1778          */
1779         struct pipe_inode_info *splice_pipe;
1780
1781         struct page_frag task_frag;
1782
1783 #ifdef  CONFIG_TASK_DELAY_ACCT
1784         struct task_delay_info *delays;
1785 #endif
1786 #ifdef CONFIG_FAULT_INJECTION
1787         int make_it_fail;
1788 #endif
1789         /*
1790          * when (nr_dirtied >= nr_dirtied_pause), it's time to call
1791          * balance_dirty_pages() for some dirty throttling pause
1792          */
1793         int nr_dirtied;
1794         int nr_dirtied_pause;
1795         unsigned long dirty_paused_when; /* start of a write-and-pause period */
1796
1797 #ifdef CONFIG_LATENCYTOP
1798         int latency_record_count;
1799         struct latency_record latency_record[LT_SAVECOUNT];
1800 #endif
1801         /*
1802          * time slack values; these are used to round up poll() and
1803          * select() etc timeout values. These are in nanoseconds.
1804          */
1805         u64 timer_slack_ns;
1806         u64 default_timer_slack_ns;
1807
1808 #ifdef CONFIG_KASAN
1809         unsigned int kasan_depth;
1810 #endif
1811 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
1812         /* Index of current stored address in ret_stack */
1813         int curr_ret_stack;
1814         /* Stack of return addresses for return function tracing */
1815         struct ftrace_ret_stack *ret_stack;
1816         /* time stamp for last schedule */
1817         unsigned long long ftrace_timestamp;
1818         /*
1819          * Number of functions that haven't been traced
1820          * because of depth overrun.
1821          */
1822         atomic_t trace_overrun;
1823         /* Pause for the tracing */
1824         atomic_t tracing_graph_pause;
1825 #endif
1826 #ifdef CONFIG_TRACING
1827         /* state flags for use by tracers */
1828         unsigned long trace;
1829         /* bitmask and counter of trace recursion */
1830         unsigned long trace_recursion;
1831 #endif /* CONFIG_TRACING */
1832 #ifdef CONFIG_KCOV
1833         /* Coverage collection mode enabled for this task (0 if disabled). */
1834         enum kcov_mode kcov_mode;
1835         /* Size of the kcov_area. */
1836         unsigned        kcov_size;
1837         /* Buffer for coverage collection. */
1838         void            *kcov_area;
1839         /* kcov desciptor wired with this task or NULL. */
1840         struct kcov     *kcov;
1841 #endif
1842 #ifdef CONFIG_MEMCG
1843         struct mem_cgroup *memcg_in_oom;
1844         gfp_t memcg_oom_gfp_mask;
1845         int memcg_oom_order;
1846
1847         /* number of pages to reclaim on returning to userland */
1848         unsigned int memcg_nr_pages_over_high;
1849 #endif
1850 #ifdef CONFIG_UPROBES
1851         struct uprobe_task *utask;
1852 #endif
1853 #if defined(CONFIG_BCACHE) || defined(CONFIG_BCACHE_MODULE)
1854         unsigned int    sequential_io;
1855         unsigned int    sequential_io_avg;
1856 #endif
1857 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
1858         unsigned long   task_state_change;
1859 #endif
1860         int pagefault_disabled;
1861 #ifdef CONFIG_MMU
1862         struct task_struct *oom_reaper_list;
1863 #endif
1864 /* CPU-specific state of this task */
1865         struct thread_struct thread;
1866 /*
1867  * WARNING: on x86, 'thread_struct' contains a variable-sized
1868  * structure.  It *MUST* be at the end of 'task_struct'.
1869  *
1870  * Do not put anything below here!
1871  */
1872 };
1873
1874 #ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
1875 extern int arch_task_struct_size __read_mostly;
1876 #else
1877 # define arch_task_struct_size (sizeof(struct task_struct))
1878 #endif
1879
1880 /* Future-safe accessor for struct task_struct's cpus_allowed. */
1881 #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
1882
1883 #define TNF_MIGRATED    0x01
1884 #define TNF_NO_GROUP    0x02
1885 #define TNF_SHARED      0x04
1886 #define TNF_FAULT_LOCAL 0x08
1887 #define TNF_MIGRATE_FAIL 0x10
1888
1889 #ifdef CONFIG_NUMA_BALANCING
1890 extern void task_numa_fault(int last_node, int node, int pages, int flags);
1891 extern pid_t task_numa_group_id(struct task_struct *p);
1892 extern void set_numabalancing_state(bool enabled);
1893 extern void task_numa_free(struct task_struct *p);
1894 extern bool should_numa_migrate_memory(struct task_struct *p, struct page *page,
1895                                         int src_nid, int dst_cpu);
1896 #else
1897 static inline void task_numa_fault(int last_node, int node, int pages,
1898                                    int flags)
1899 {
1900 }
1901 static inline pid_t task_numa_group_id(struct task_struct *p)
1902 {
1903         return 0;
1904 }
1905 static inline void set_numabalancing_state(bool enabled)
1906 {
1907 }
1908 static inline void task_numa_free(struct task_struct *p)
1909 {
1910 }
1911 static inline bool should_numa_migrate_memory(struct task_struct *p,
1912                                 struct page *page, int src_nid, int dst_cpu)
1913 {
1914         return true;
1915 }
1916 #endif
1917
1918 static inline struct pid *task_pid(struct task_struct *task)
1919 {
1920         return task->pids[PIDTYPE_PID].pid;
1921 }
1922
1923 static inline struct pid *task_tgid(struct task_struct *task)
1924 {
1925         return task->group_leader->pids[PIDTYPE_PID].pid;
1926 }
1927
1928 /*
1929  * Without tasklist or rcu lock it is not safe to dereference
1930  * the result of task_pgrp/task_session even if task == current,
1931  * we can race with another thread doing sys_setsid/sys_setpgid.
1932  */
1933 static inline struct pid *task_pgrp(struct task_struct *task)
1934 {
1935         return task->group_leader->pids[PIDTYPE_PGID].pid;
1936 }
1937
1938 static inline struct pid *task_session(struct task_struct *task)
1939 {
1940         return task->group_leader->pids[PIDTYPE_SID].pid;
1941 }
1942
1943 struct pid_namespace;
1944
1945 /*
1946  * the helpers to get the task's different pids as they are seen
1947  * from various namespaces
1948  *
1949  * task_xid_nr()     : global id, i.e. the id seen from the init namespace;
1950  * task_xid_vnr()    : virtual id, i.e. the id seen from the pid namespace of
1951  *                     current.
1952  * task_xid_nr_ns()  : id seen from the ns specified;
1953  *
1954  * set_task_vxid()   : assigns a virtual id to a task;
1955  *
1956  * see also pid_nr() etc in include/linux/pid.h
1957  */
1958 pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
1959                         struct pid_namespace *ns);
1960
1961 static inline pid_t task_pid_nr(struct task_struct *tsk)
1962 {
1963         return tsk->pid;
1964 }
1965
1966 static inline pid_t task_pid_nr_ns(struct task_struct *tsk,
1967                                         struct pid_namespace *ns)
1968 {
1969         return __task_pid_nr_ns(tsk, PIDTYPE_PID, ns);
1970 }
1971
1972 static inline pid_t task_pid_vnr(struct task_struct *tsk)
1973 {
1974         return __task_pid_nr_ns(tsk, PIDTYPE_PID, NULL);
1975 }
1976
1977
1978 static inline pid_t task_tgid_nr(struct task_struct *tsk)
1979 {
1980         return tsk->tgid;
1981 }
1982
1983 pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns);
1984
1985 static inline pid_t task_tgid_vnr(struct task_struct *tsk)
1986 {
1987         return pid_vnr(task_tgid(tsk));
1988 }
1989
1990
1991 static inline int pid_alive(const struct task_struct *p);
1992 static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns)
1993 {
1994         pid_t pid = 0;
1995
1996         rcu_read_lock();
1997         if (pid_alive(tsk))
1998                 pid = task_tgid_nr_ns(rcu_dereference(tsk->real_parent), ns);
1999         rcu_read_unlock();
2000
2001         return pid;
2002 }
2003
2004 static inline pid_t task_ppid_nr(const struct task_struct *tsk)
2005 {
2006         return task_ppid_nr_ns(tsk, &init_pid_ns);
2007 }
2008
2009 static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk,
2010                                         struct pid_namespace *ns)
2011 {
2012         return __task_pid_nr_ns(tsk, PIDTYPE_PGID, ns);
2013 }
2014
2015 static inline pid_t task_pgrp_vnr(struct task_struct *tsk)
2016 {
2017         return __task_pid_nr_ns(tsk, PIDTYPE_PGID, NULL);
2018 }
2019
2020
2021 static inline pid_t task_session_nr_ns(struct task_struct *tsk,
2022                                         struct pid_namespace *ns)
2023 {
2024         return __task_pid_nr_ns(tsk, PIDTYPE_SID, ns);
2025 }
2026
2027 static inline pid_t task_session_vnr(struct task_struct *tsk)
2028 {
2029         return __task_pid_nr_ns(tsk, PIDTYPE_SID, NULL);
2030 }
2031
2032 /* obsolete, do not use */
2033 static inline pid_t task_pgrp_nr(struct task_struct *tsk)
2034 {
2035         return task_pgrp_nr_ns(tsk, &init_pid_ns);
2036 }
2037
2038 /**
2039  * pid_alive - check that a task structure is not stale
2040  * @p: Task structure to be checked.
2041  *
2042  * Test if a process is not yet dead (at most zombie state)
2043  * If pid_alive fails, then pointers within the task structure
2044  * can be stale and must not be dereferenced.
2045  *
2046  * Return: 1 if the process is alive. 0 otherwise.
2047  */
2048 static inline int pid_alive(const struct task_struct *p)
2049 {
2050         return p->pids[PIDTYPE_PID].pid != NULL;
2051 }
2052
2053 /**
2054  * is_global_init - check if a task structure is init. Since init
2055  * is free to have sub-threads we need to check tgid.
2056  * @tsk: Task structure to be checked.
2057  *
2058  * Check if a task structure is the first user space task the kernel created.
2059  *
2060  * Return: 1 if the task structure is init. 0 otherwise.
2061  */
2062 static inline int is_global_init(struct task_struct *tsk)
2063 {
2064         return task_tgid_nr(tsk) == 1;
2065 }
2066
2067 extern struct pid *cad_pid;
2068
2069 extern void free_task(struct task_struct *tsk);
2070 #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
2071
2072 extern void __put_task_struct(struct task_struct *t);
2073
2074 static inline void put_task_struct(struct task_struct *t)
2075 {
2076         if (atomic_dec_and_test(&t->usage))
2077                 __put_task_struct(t);
2078 }
2079
2080 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
2081 extern void task_cputime(struct task_struct *t,
2082                          cputime_t *utime, cputime_t *stime);
2083 extern void task_cputime_scaled(struct task_struct *t,
2084                                 cputime_t *utimescaled, cputime_t *stimescaled);
2085 extern cputime_t task_gtime(struct task_struct *t);
2086 #else
2087 static inline void task_cputime(struct task_struct *t,
2088                                 cputime_t *utime, cputime_t *stime)
2089 {
2090         if (utime)
2091                 *utime = t->utime;
2092         if (stime)
2093                 *stime = t->stime;
2094 }
2095
2096 static inline void task_cputime_scaled(struct task_struct *t,
2097                                        cputime_t *utimescaled,
2098                                        cputime_t *stimescaled)
2099 {
2100         if (utimescaled)
2101                 *utimescaled = t->utimescaled;
2102         if (stimescaled)
2103                 *stimescaled = t->stimescaled;
2104 }
2105
2106 static inline cputime_t task_gtime(struct task_struct *t)
2107 {
2108         return t->gtime;
2109 }
2110 #endif
2111 extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
2112 extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
2113
2114 /*
2115  * Per process flags
2116  */
2117 #define PF_EXITING      0x00000004      /* getting shut down */
2118 #define PF_EXITPIDONE   0x00000008      /* pi exit done on shut down */
2119 #define PF_VCPU         0x00000010      /* I'm a virtual CPU */
2120 #define PF_WQ_WORKER    0x00000020      /* I'm a workqueue worker */
2121 #define PF_FORKNOEXEC   0x00000040      /* forked but didn't exec */
2122 #define PF_MCE_PROCESS  0x00000080      /* process policy on mce errors */
2123 #define PF_SUPERPRIV    0x00000100      /* used super-user privileges */
2124 #define PF_DUMPCORE     0x00000200      /* dumped core */
2125 #define PF_SIGNALED     0x00000400      /* killed by a signal */
2126 #define PF_MEMALLOC     0x00000800      /* Allocating memory */
2127 #define PF_NPROC_EXCEEDED 0x00001000    /* set_user noticed that RLIMIT_NPROC was exceeded */
2128 #define PF_USED_MATH    0x00002000      /* if unset the fpu must be initialized before use */
2129 #define PF_USED_ASYNC   0x00004000      /* used async_schedule*(), used by module init */
2130 #define PF_NOFREEZE     0x00008000      /* this thread should not be frozen */
2131 #define PF_FROZEN       0x00010000      /* frozen for system suspend */
2132 #define PF_FSTRANS      0x00020000      /* inside a filesystem transaction */
2133 #define PF_KSWAPD       0x00040000      /* I am kswapd */
2134 #define PF_MEMALLOC_NOIO 0x00080000     /* Allocating memory without IO involved */
2135 #define PF_LESS_THROTTLE 0x00100000     /* Throttle me less: I clean memory */
2136 #define PF_KTHREAD      0x00200000      /* I am a kernel thread */
2137 #define PF_RANDOMIZE    0x00400000      /* randomize virtual address space */
2138 #define PF_SWAPWRITE    0x00800000      /* Allowed to write to swap */
2139 #define PF_NO_SETAFFINITY 0x04000000    /* Userland is not allowed to meddle with cpus_allowed */
2140 #define PF_MCE_EARLY    0x08000000      /* Early kill for mce process policy */
2141 #define PF_MUTEX_TESTER 0x20000000      /* Thread belongs to the rt mutex tester */
2142 #define PF_FREEZER_SKIP 0x40000000      /* Freezer should not count it as freezable */
2143 #define PF_SUSPEND_TASK 0x80000000      /* this thread called freeze_processes and should not be frozen */
2144
2145 /*
2146  * Only the _current_ task can read/write to tsk->flags, but other
2147  * tasks can access tsk->flags in readonly mode for example
2148  * with tsk_used_math (like during threaded core dumping).
2149  * There is however an exception to this rule during ptrace
2150  * or during fork: the ptracer task is allowed to write to the
2151  * child->flags of its traced child (same goes for fork, the parent
2152  * can write to the child->flags), because we're guaranteed the
2153  * child is not running and in turn not changing child->flags
2154  * at the same time the parent does it.
2155  */
2156 #define clear_stopped_child_used_math(child) do { (child)->flags &= ~PF_USED_MATH; } while (0)
2157 #define set_stopped_child_used_math(child) do { (child)->flags |= PF_USED_MATH; } while (0)
2158 #define clear_used_math() clear_stopped_child_used_math(current)
2159 #define set_used_math() set_stopped_child_used_math(current)
2160 #define conditional_stopped_child_used_math(condition, child) \
2161         do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= (condition) ? PF_USED_MATH : 0; } while (0)
2162 #define conditional_used_math(condition) \
2163         conditional_stopped_child_used_math(condition, current)
2164 #define copy_to_stopped_child_used_math(child) \
2165         do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= current->flags & PF_USED_MATH; } while (0)
2166 /* NOTE: this will return 0 or PF_USED_MATH, it will never return 1 */
2167 #define tsk_used_math(p) ((p)->flags & PF_USED_MATH)
2168 #define used_math() tsk_used_math(current)
2169
2170 /* __GFP_IO isn't allowed if PF_MEMALLOC_NOIO is set in current->flags
2171  * __GFP_FS is also cleared as it implies __GFP_IO.
2172  */
2173 static inline gfp_t memalloc_noio_flags(gfp_t flags)
2174 {
2175         if (unlikely(current->flags & PF_MEMALLOC_NOIO))
2176                 flags &= ~(__GFP_IO | __GFP_FS);
2177         return flags;
2178 }
2179
2180 static inline unsigned int memalloc_noio_save(void)
2181 {
2182         unsigned int flags = current->flags & PF_MEMALLOC_NOIO;
2183         current->flags |= PF_MEMALLOC_NOIO;
2184         return flags;
2185 }
2186
2187 static inline void memalloc_noio_restore(unsigned int flags)
2188 {
2189         current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags;
2190 }
2191
2192 /* Per-process atomic flags. */
2193 #define PFA_NO_NEW_PRIVS 0      /* May not gain new privileges. */
2194 #define PFA_SPREAD_PAGE  1      /* Spread page cache over cpuset */
2195 #define PFA_SPREAD_SLAB  2      /* Spread some slab caches over cpuset */
2196
2197
2198 #define TASK_PFA_TEST(name, func)                                       \
2199         static inline bool task_##func(struct task_struct *p)           \
2200         { return test_bit(PFA_##name, &p->atomic_flags); }
2201 #define TASK_PFA_SET(name, func)                                        \
2202         static inline void task_set_##func(struct task_struct *p)       \
2203         { set_bit(PFA_##name, &p->atomic_flags); }
2204 #define TASK_PFA_CLEAR(name, func)                                      \
2205         static inline void task_clear_##func(struct task_struct *p)     \
2206         { clear_bit(PFA_##name, &p->atomic_flags); }
2207
2208 TASK_PFA_TEST(NO_NEW_PRIVS, no_new_privs)
2209 TASK_PFA_SET(NO_NEW_PRIVS, no_new_privs)
2210
2211 TASK_PFA_TEST(SPREAD_PAGE, spread_page)
2212 TASK_PFA_SET(SPREAD_PAGE, spread_page)
2213 TASK_PFA_CLEAR(SPREAD_PAGE, spread_page)
2214
2215 TASK_PFA_TEST(SPREAD_SLAB, spread_slab)
2216 TASK_PFA_SET(SPREAD_SLAB, spread_slab)
2217 TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab)
2218
2219 /*
2220  * task->jobctl flags
2221  */
2222 #define JOBCTL_STOP_SIGMASK     0xffff  /* signr of the last group stop */
2223
2224 #define JOBCTL_STOP_DEQUEUED_BIT 16     /* stop signal dequeued */
2225 #define JOBCTL_STOP_PENDING_BIT 17      /* task should stop for group stop */
2226 #define JOBCTL_STOP_CONSUME_BIT 18      /* consume group stop count */
2227 #define JOBCTL_TRAP_STOP_BIT    19      /* trap for STOP */
2228 #define JOBCTL_TRAP_NOTIFY_BIT  20      /* trap for NOTIFY */
2229 #define JOBCTL_TRAPPING_BIT     21      /* switching to TRACED */
2230 #define JOBCTL_LISTENING_BIT    22      /* ptracer is listening for events */
2231
2232 #define JOBCTL_STOP_DEQUEUED    (1UL << JOBCTL_STOP_DEQUEUED_BIT)
2233 #define JOBCTL_STOP_PENDING     (1UL << JOBCTL_STOP_PENDING_BIT)
2234 #define JOBCTL_STOP_CONSUME     (1UL << JOBCTL_STOP_CONSUME_BIT)
2235 #define JOBCTL_TRAP_STOP        (1UL << JOBCTL_TRAP_STOP_BIT)
2236 #define JOBCTL_TRAP_NOTIFY      (1UL << JOBCTL_TRAP_NOTIFY_BIT)
2237 #define JOBCTL_TRAPPING         (1UL << JOBCTL_TRAPPING_BIT)
2238 #define JOBCTL_LISTENING        (1UL << JOBCTL_LISTENING_BIT)
2239
2240 #define JOBCTL_TRAP_MASK        (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY)
2241 #define JOBCTL_PENDING_MASK     (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK)
2242
2243 extern bool task_set_jobctl_pending(struct task_struct *task,
2244                                     unsigned long mask);
2245 extern void task_clear_jobctl_trapping(struct task_struct *task);
2246 extern void task_clear_jobctl_pending(struct task_struct *task,
2247                                       unsigned long mask);
2248
2249 static inline void rcu_copy_process(struct task_struct *p)
2250 {
2251 #ifdef CONFIG_PREEMPT_RCU
2252         p->rcu_read_lock_nesting = 0;
2253         p->rcu_read_unlock_special.s = 0;
2254         p->rcu_blocked_node = NULL;
2255         INIT_LIST_HEAD(&p->rcu_node_entry);
2256 #endif /* #ifdef CONFIG_PREEMPT_RCU */
2257 #ifdef CONFIG_TASKS_RCU
2258         p->rcu_tasks_holdout = false;
2259         INIT_LIST_HEAD(&p->rcu_tasks_holdout_list);
2260         p->rcu_tasks_idle_cpu = -1;
2261 #endif /* #ifdef CONFIG_TASKS_RCU */
2262 }
2263
2264 static inline void tsk_restore_flags(struct task_struct *task,
2265                                 unsigned long orig_flags, unsigned long flags)
2266 {
2267         task->flags &= ~flags;
2268         task->flags |= orig_flags & flags;
2269 }
2270
2271 extern int cpuset_cpumask_can_shrink(const struct cpumask *cur,
2272                                      const struct cpumask *trial);
2273 extern int task_can_attach(struct task_struct *p,
2274                            const struct cpumask *cs_cpus_allowed);
2275 #ifdef CONFIG_SMP
2276 extern void do_set_cpus_allowed(struct task_struct *p,
2277                                const struct cpumask *new_mask);
2278
2279 extern int set_cpus_allowed_ptr(struct task_struct *p,
2280                                 const struct cpumask *new_mask);
2281 #else
2282 static inline void do_set_cpus_allowed(struct task_struct *p,
2283                                       const struct cpumask *new_mask)
2284 {
2285 }
2286 static inline int set_cpus_allowed_ptr(struct task_struct *p,
2287                                        const struct cpumask *new_mask)
2288 {
2289         if (!cpumask_test_cpu(0, new_mask))
2290                 return -EINVAL;
2291         return 0;
2292 }
2293 #endif
2294
2295 #ifdef CONFIG_NO_HZ_COMMON
2296 void calc_load_enter_idle(void);
2297 void calc_load_exit_idle(void);
2298 #else
2299 static inline void calc_load_enter_idle(void) { }
2300 static inline void calc_load_exit_idle(void) { }
2301 #endif /* CONFIG_NO_HZ_COMMON */
2302
2303 /*
2304  * Do not use outside of architecture code which knows its limitations.
2305  *
2306  * sched_clock() has no promise of monotonicity or bounded drift between
2307  * CPUs, use (which you should not) requires disabling IRQs.
2308  *
2309  * Please use one of the three interfaces below.
2310  */
2311 extern unsigned long long notrace sched_clock(void);
2312 /*
2313  * See the comment in kernel/sched/clock.c
2314  */
2315 extern u64 cpu_clock(int cpu);
2316 extern u64 local_clock(void);
2317 extern u64 running_clock(void);
2318 extern u64 sched_clock_cpu(int cpu);
2319
2320
2321 extern void sched_clock_init(void);
2322
2323 #ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
2324 static inline void sched_clock_tick(void)
2325 {
2326 }
2327
2328 static inline void sched_clock_idle_sleep_event(void)
2329 {
2330 }
2331
2332 static inline void sched_clock_idle_wakeup_event(u64 delta_ns)
2333 {
2334 }
2335 #else
2336 /*
2337  * Architectures can set this to 1 if they have specified
2338  * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig,
2339  * but then during bootup it turns out that sched_clock()
2340  * is reliable after all:
2341  */
2342 extern int sched_clock_stable(void);
2343 extern void set_sched_clock_stable(void);
2344 extern void clear_sched_clock_stable(void);
2345
2346 extern void sched_clock_tick(void);
2347 extern void sched_clock_idle_sleep_event(void);
2348 extern void sched_clock_idle_wakeup_event(u64 delta_ns);
2349 #endif
2350
2351 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
2352 /*
2353  * An i/f to runtime opt-in for irq time accounting based off of sched_clock.
2354  * The reason for this explicit opt-in is not to have perf penalty with
2355  * slow sched_clocks.
2356  */
2357 extern void enable_sched_clock_irqtime(void);
2358 extern void disable_sched_clock_irqtime(void);
2359 #else
2360 static inline void enable_sched_clock_irqtime(void) {}
2361 static inline void disable_sched_clock_irqtime(void) {}
2362 #endif
2363
2364 extern unsigned long long
2365 task_sched_runtime(struct task_struct *task);
2366
2367 /* sched_exec is called by processes performing an exec */
2368 #ifdef CONFIG_SMP
2369 extern void sched_exec(void);
2370 #else
2371 #define sched_exec()   {}
2372 #endif
2373
2374 extern void sched_clock_idle_sleep_event(void);
2375 extern void sched_clock_idle_wakeup_event(u64 delta_ns);
2376
2377 #ifdef CONFIG_HOTPLUG_CPU
2378 extern void idle_task_exit(void);
2379 #else
2380 static inline void idle_task_exit(void) {}
2381 #endif
2382
2383 #if defined(CONFIG_NO_HZ_COMMON) && defined(CONFIG_SMP)
2384 extern void wake_up_nohz_cpu(int cpu);
2385 #else
2386 static inline void wake_up_nohz_cpu(int cpu) { }
2387 #endif
2388
2389 #ifdef CONFIG_NO_HZ_FULL
2390 extern u64 scheduler_tick_max_deferment(void);
2391 #endif
2392
2393 #ifdef CONFIG_SCHED_AUTOGROUP
2394 extern void sched_autogroup_create_attach(struct task_struct *p);
2395 extern void sched_autogroup_detach(struct task_struct *p);
2396 extern void sched_autogroup_fork(struct signal_struct *sig);
2397 extern void sched_autogroup_exit(struct signal_struct *sig);
2398 #ifdef CONFIG_PROC_FS
2399 extern void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m);
2400 extern int proc_sched_autogroup_set_nice(struct task_struct *p, int nice);
2401 #endif
2402 #else
2403 static inline void sched_autogroup_create_attach(struct task_struct *p) { }
2404 static inline void sched_autogroup_detach(struct task_struct *p) { }
2405 static inline void sched_autogroup_fork(struct signal_struct *sig) { }
2406 static inline void sched_autogroup_exit(struct signal_struct *sig) { }
2407 #endif
2408
2409 extern int yield_to(struct task_struct *p, bool preempt);
2410 extern void set_user_nice(struct task_struct *p, long nice);
2411 extern int task_prio(const struct task_struct *p);
2412 /**
2413  * task_nice - return the nice value of a given task.
2414  * @p: the task in question.
2415  *
2416  * Return: The nice value [ -20 ... 0 ... 19 ].
2417  */
2418 static inline int task_nice(const struct task_struct *p)
2419 {
2420         return PRIO_TO_NICE((p)->static_prio);
2421 }
2422 extern int can_nice(const struct task_struct *p, const int nice);
2423 extern int task_curr(const struct task_struct *p);
2424 extern int idle_cpu(int cpu);
2425 extern int sched_setscheduler(struct task_struct *, int,
2426                               const struct sched_param *);
2427 extern int sched_setscheduler_nocheck(struct task_struct *, int,
2428                                       const struct sched_param *);
2429 extern int sched_setattr(struct task_struct *,
2430                          const struct sched_attr *);
2431 extern struct task_struct *idle_task(int cpu);
2432 /**
2433  * is_idle_task - is the specified task an idle task?
2434  * @p: the task in question.
2435  *
2436  * Return: 1 if @p is an idle task. 0 otherwise.
2437  */
2438 static inline bool is_idle_task(const struct task_struct *p)
2439 {
2440         return p->pid == 0;
2441 }
2442 extern struct task_struct *curr_task(int cpu);
2443 extern void set_curr_task(int cpu, struct task_struct *p);
2444
2445 void yield(void);
2446
2447 union thread_union {
2448         struct thread_info thread_info;
2449         unsigned long stack[THREAD_SIZE/sizeof(long)];
2450 };
2451
2452 #ifndef __HAVE_ARCH_KSTACK_END
2453 static inline int kstack_end(void *addr)
2454 {
2455         /* Reliable end of stack detection:
2456          * Some APM bios versions misalign the stack
2457          */
2458         return !(((unsigned long)addr+sizeof(void*)-1) & (THREAD_SIZE-sizeof(void*)));
2459 }
2460 #endif
2461
2462 extern union thread_union init_thread_union;
2463 extern struct task_struct init_task;
2464
2465 extern struct   mm_struct init_mm;
2466
2467 extern struct pid_namespace init_pid_ns;
2468
2469 /*
2470  * find a task by one of its numerical ids
2471  *
2472  * find_task_by_pid_ns():
2473  *      finds a task by its pid in the specified namespace
2474  * find_task_by_vpid():
2475  *      finds a task by its virtual pid
2476  *
2477  * see also find_vpid() etc in include/linux/pid.h
2478  */
2479
2480 extern struct task_struct *find_task_by_vpid(pid_t nr);
2481 extern struct task_struct *find_task_by_pid_ns(pid_t nr,
2482                 struct pid_namespace *ns);
2483
2484 /* per-UID process charging. */
2485 extern struct user_struct * alloc_uid(kuid_t);
2486 static inline struct user_struct *get_uid(struct user_struct *u)
2487 {
2488         atomic_inc(&u->__count);
2489         return u;
2490 }
2491 extern void free_uid(struct user_struct *);
2492
2493 #include <asm/current.h>
2494
2495 extern void xtime_update(unsigned long ticks);
2496
2497 extern int wake_up_state(struct task_struct *tsk, unsigned int state);
2498 extern int wake_up_process(struct task_struct *tsk);
2499 extern void wake_up_new_task(struct task_struct *tsk);
2500 #ifdef CONFIG_SMP
2501  extern void kick_process(struct task_struct *tsk);
2502 #else
2503  static inline void kick_process(struct task_struct *tsk) { }
2504 #endif
2505 extern int sched_fork(unsigned long clone_flags, struct task_struct *p);
2506 extern void sched_dead(struct task_struct *p);
2507
2508 extern void proc_caches_init(void);
2509 extern void flush_signals(struct task_struct *);
2510 extern void ignore_signals(struct task_struct *);
2511 extern void flush_signal_handlers(struct task_struct *, int force_default);
2512 extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info);
2513
2514 static inline int kernel_dequeue_signal(siginfo_t *info)
2515 {
2516         struct task_struct *tsk = current;
2517         siginfo_t __info;
2518         int ret;
2519
2520         spin_lock_irq(&tsk->sighand->siglock);
2521         ret = dequeue_signal(tsk, &tsk->blocked, info ?: &__info);
2522         spin_unlock_irq(&tsk->sighand->siglock);
2523
2524         return ret;
2525 }
2526
2527 static inline void kernel_signal_stop(void)
2528 {
2529         spin_lock_irq(&current->sighand->siglock);
2530         if (current->jobctl & JOBCTL_STOP_DEQUEUED)
2531                 __set_current_state(TASK_STOPPED);
2532         spin_unlock_irq(&current->sighand->siglock);
2533
2534         schedule();
2535 }
2536
2537 extern void release_task(struct task_struct * p);
2538 extern int send_sig_info(int, struct siginfo *, struct task_struct *);
2539 extern int force_sigsegv(int, struct task_struct *);
2540 extern int force_sig_info(int, struct siginfo *, struct task_struct *);
2541 extern int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp);
2542 extern int kill_pid_info(int sig, struct siginfo *info, struct pid *pid);
2543 extern int kill_pid_info_as_cred(int, struct siginfo *, struct pid *,
2544                                 const struct cred *, u32);
2545 extern int kill_pgrp(struct pid *pid, int sig, int priv);
2546 extern int kill_pid(struct pid *pid, int sig, int priv);
2547 extern int kill_proc_info(int, struct siginfo *, pid_t);
2548 extern __must_check bool do_notify_parent(struct task_struct *, int);
2549 extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent);
2550 extern void force_sig(int, struct task_struct *);
2551 extern int send_sig(int, struct task_struct *, int);
2552 extern int zap_other_threads(struct task_struct *p);
2553 extern struct sigqueue *sigqueue_alloc(void);
2554 extern void sigqueue_free(struct sigqueue *);
2555 extern int send_sigqueue(struct sigqueue *,  struct task_struct *, int group);
2556 extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *);
2557
2558 static inline void restore_saved_sigmask(void)
2559 {
2560         if (test_and_clear_restore_sigmask())
2561                 __set_current_blocked(&current->saved_sigmask);
2562 }
2563
2564 static inline sigset_t *sigmask_to_save(void)
2565 {
2566         sigset_t *res = &current->blocked;
2567         if (unlikely(test_restore_sigmask()))
2568                 res = &current->saved_sigmask;
2569         return res;
2570 }
2571
2572 static inline int kill_cad_pid(int sig, int priv)
2573 {
2574         return kill_pid(cad_pid, sig, priv);
2575 }
2576
2577 /* These can be the second arg to send_sig_info/send_group_sig_info.  */
2578 #define SEND_SIG_NOINFO ((struct siginfo *) 0)
2579 #define SEND_SIG_PRIV   ((struct siginfo *) 1)
2580 #define SEND_SIG_FORCED ((struct siginfo *) 2)
2581
2582 /*
2583  * True if we are on the alternate signal stack.
2584  */
2585 static inline int on_sig_stack(unsigned long sp)
2586 {
2587 #ifdef CONFIG_STACK_GROWSUP
2588         return sp >= current->sas_ss_sp &&
2589                 sp - current->sas_ss_sp < current->sas_ss_size;
2590 #else
2591         return sp > current->sas_ss_sp &&
2592                 sp - current->sas_ss_sp <= current->sas_ss_size;
2593 #endif
2594 }
2595
2596 static inline int sas_ss_flags(unsigned long sp)
2597 {
2598         if (!current->sas_ss_size)
2599                 return SS_DISABLE;
2600
2601         return on_sig_stack(sp) ? SS_ONSTACK : 0;
2602 }
2603
2604 static inline unsigned long sigsp(unsigned long sp, struct ksignal *ksig)
2605 {
2606         if (unlikely((ksig->ka.sa.sa_flags & SA_ONSTACK)) && ! sas_ss_flags(sp))
2607 #ifdef CONFIG_STACK_GROWSUP
2608                 return current->sas_ss_sp;
2609 #else
2610                 return current->sas_ss_sp + current->sas_ss_size;
2611 #endif
2612         return sp;
2613 }
2614
2615 /*
2616  * Routines for handling mm_structs
2617  */
2618 extern struct mm_struct * mm_alloc(void);
2619
2620 /* mmdrop drops the mm and the page tables */
2621 extern void __mmdrop(struct mm_struct *);
2622 static inline void mmdrop(struct mm_struct * mm)
2623 {
2624         if (unlikely(atomic_dec_and_test(&mm->mm_count)))
2625                 __mmdrop(mm);
2626 }
2627
2628 /* mmput gets rid of the mappings and all user-space */
2629 extern void mmput(struct mm_struct *);
2630 /* Grab a reference to a task's mm, if it is not already going away */
2631 extern struct mm_struct *get_task_mm(struct task_struct *task);
2632 /*
2633  * Grab a reference to a task's mm, if it is not already going away
2634  * and ptrace_may_access with the mode parameter passed to it
2635  * succeeds.
2636  */
2637 extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode);
2638 /* Remove the current tasks stale references to the old mm_struct */
2639 extern void mm_release(struct task_struct *, struct mm_struct *);
2640
2641 #ifdef CONFIG_HAVE_COPY_THREAD_TLS
2642 extern int copy_thread_tls(unsigned long, unsigned long, unsigned long,
2643                         struct task_struct *, unsigned long);
2644 #else
2645 extern int copy_thread(unsigned long, unsigned long, unsigned long,
2646                         struct task_struct *);
2647
2648 /* Architectures that haven't opted into copy_thread_tls get the tls argument
2649  * via pt_regs, so ignore the tls argument passed via C. */
2650 static inline int copy_thread_tls(
2651                 unsigned long clone_flags, unsigned long sp, unsigned long arg,
2652                 struct task_struct *p, unsigned long tls)
2653 {
2654         return copy_thread(clone_flags, sp, arg, p);
2655 }
2656 #endif
2657 extern void flush_thread(void);
2658 extern void exit_thread(void);
2659
2660 extern void exit_files(struct task_struct *);
2661 extern void __cleanup_sighand(struct sighand_struct *);
2662
2663 extern void exit_itimers(struct signal_struct *);
2664 extern void flush_itimer_signals(void);
2665
2666 extern void do_group_exit(int);
2667
2668 extern int do_execve(struct filename *,
2669                      const char __user * const __user *,
2670                      const char __user * const __user *);
2671 extern int do_execveat(int, struct filename *,
2672                        const char __user * const __user *,
2673                        const char __user * const __user *,
2674                        int);
2675 extern long _do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *, unsigned long);
2676 extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *);
2677 struct task_struct *fork_idle(int);
2678 extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
2679
2680 extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec);
2681 static inline void set_task_comm(struct task_struct *tsk, const char *from)
2682 {
2683         __set_task_comm(tsk, from, false);
2684 }
2685 extern char *get_task_comm(char *to, struct task_struct *tsk);
2686
2687 #ifdef CONFIG_SMP
2688 void scheduler_ipi(void);
2689 extern unsigned long wait_task_inactive(struct task_struct *, long match_state);
2690 #else
2691 static inline void scheduler_ipi(void) { }
2692 static inline unsigned long wait_task_inactive(struct task_struct *p,
2693                                                long match_state)
2694 {
2695         return 1;
2696 }
2697 #endif
2698
2699 #define tasklist_empty() \
2700         list_empty(&init_task.tasks)
2701
2702 #define next_task(p) \
2703         list_entry_rcu((p)->tasks.next, struct task_struct, tasks)
2704
2705 #define for_each_process(p) \
2706         for (p = &init_task ; (p = next_task(p)) != &init_task ; )
2707
2708 extern bool current_is_single_threaded(void);
2709
2710 /*
2711  * Careful: do_each_thread/while_each_thread is a double loop so
2712  *          'break' will not work as expected - use goto instead.
2713  */
2714 #define do_each_thread(g, t) \
2715         for (g = t = &init_task ; (g = t = next_task(g)) != &init_task ; ) do
2716
2717 #define while_each_thread(g, t) \
2718         while ((t = next_thread(t)) != g)
2719
2720 #define __for_each_thread(signal, t)    \
2721         list_for_each_entry_rcu(t, &(signal)->thread_head, thread_node)
2722
2723 #define for_each_thread(p, t)           \
2724         __for_each_thread((p)->signal, t)
2725
2726 /* Careful: this is a double loop, 'break' won't work as expected. */
2727 #define for_each_process_thread(p, t)   \
2728         for_each_process(p) for_each_thread(p, t)
2729
2730 static inline int get_nr_threads(struct task_struct *tsk)
2731 {
2732         return tsk->signal->nr_threads;
2733 }
2734
2735 static inline bool thread_group_leader(struct task_struct *p)
2736 {
2737         return p->exit_signal >= 0;
2738 }
2739
2740 /* Do to the insanities of de_thread it is possible for a process
2741  * to have the pid of the thread group leader without actually being
2742  * the thread group leader.  For iteration through the pids in proc
2743  * all we care about is that we have a task with the appropriate
2744  * pid, we don't actually care if we have the right task.
2745  */
2746 static inline bool has_group_leader_pid(struct task_struct *p)
2747 {
2748         return task_pid(p) == p->signal->leader_pid;
2749 }
2750
2751 static inline
2752 bool same_thread_group(struct task_struct *p1, struct task_struct *p2)
2753 {
2754         return p1->signal == p2->signal;
2755 }
2756
2757 static inline struct task_struct *next_thread(const struct task_struct *p)
2758 {
2759         return list_entry_rcu(p->thread_group.next,
2760                               struct task_struct, thread_group);
2761 }
2762
2763 static inline int thread_group_empty(struct task_struct *p)
2764 {
2765         return list_empty(&p->thread_group);
2766 }
2767
2768 #define delay_group_leader(p) \
2769                 (thread_group_leader(p) && !thread_group_empty(p))
2770
2771 /*
2772  * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring
2773  * subscriptions and synchronises with wait4().  Also used in procfs.  Also
2774  * pins the final release of task.io_context.  Also protects ->cpuset and
2775  * ->cgroup.subsys[]. And ->vfork_done.
2776  *
2777  * Nests both inside and outside of read_lock(&tasklist_lock).
2778  * It must not be nested with write_lock_irq(&tasklist_lock),
2779  * neither inside nor outside.
2780  */
2781 static inline void task_lock(struct task_struct *p)
2782 {
2783         spin_lock(&p->alloc_lock);
2784 }
2785
2786 static inline void task_unlock(struct task_struct *p)
2787 {
2788         spin_unlock(&p->alloc_lock);
2789 }
2790
2791 extern struct sighand_struct *__lock_task_sighand(struct task_struct *tsk,
2792                                                         unsigned long *flags);
2793
2794 static inline struct sighand_struct *lock_task_sighand(struct task_struct *tsk,
2795                                                        unsigned long *flags)
2796 {
2797         struct sighand_struct *ret;
2798
2799         ret = __lock_task_sighand(tsk, flags);
2800         (void)__cond_lock(&tsk->sighand->siglock, ret);
2801         return ret;
2802 }
2803
2804 static inline void unlock_task_sighand(struct task_struct *tsk,
2805                                                 unsigned long *flags)
2806 {
2807         spin_unlock_irqrestore(&tsk->sighand->siglock, *flags);
2808 }
2809
2810 /**
2811  * threadgroup_change_begin - mark the beginning of changes to a threadgroup
2812  * @tsk: task causing the changes
2813  *
2814  * All operations which modify a threadgroup - a new thread joining the
2815  * group, death of a member thread (the assertion of PF_EXITING) and
2816  * exec(2) dethreading the process and replacing the leader - are wrapped
2817  * by threadgroup_change_{begin|end}().  This is to provide a place which
2818  * subsystems needing threadgroup stability can hook into for
2819  * synchronization.
2820  */
2821 static inline void threadgroup_change_begin(struct task_struct *tsk)
2822 {
2823         might_sleep();
2824         cgroup_threadgroup_change_begin(tsk);
2825 }
2826
2827 /**
2828  * threadgroup_change_end - mark the end of changes to a threadgroup
2829  * @tsk: task causing the changes
2830  *
2831  * See threadgroup_change_begin().
2832  */
2833 static inline void threadgroup_change_end(struct task_struct *tsk)
2834 {
2835         cgroup_threadgroup_change_end(tsk);
2836 }
2837
2838 #ifndef __HAVE_THREAD_FUNCTIONS
2839
2840 #define task_thread_info(task)  ((struct thread_info *)(task)->stack)
2841 #define task_stack_page(task)   ((task)->stack)
2842
2843 static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org)
2844 {
2845         *task_thread_info(p) = *task_thread_info(org);
2846         task_thread_info(p)->task = p;
2847 }
2848
2849 /*
2850  * Return the address of the last usable long on the stack.
2851  *
2852  * When the stack grows down, this is just above the thread
2853  * info struct. Going any lower will corrupt the threadinfo.
2854  *
2855  * When the stack grows up, this is the highest address.
2856  * Beyond that position, we corrupt data on the next page.
2857  */
2858 static inline unsigned long *end_of_stack(struct task_struct *p)
2859 {
2860 #ifdef CONFIG_STACK_GROWSUP
2861         return (unsigned long *)((unsigned long)task_thread_info(p) + THREAD_SIZE) - 1;
2862 #else
2863         return (unsigned long *)(task_thread_info(p) + 1);
2864 #endif
2865 }
2866
2867 #endif
2868 #define task_stack_end_corrupted(task) \
2869                 (*(end_of_stack(task)) != STACK_END_MAGIC)
2870
2871 static inline int object_is_on_stack(void *obj)
2872 {
2873         void *stack = task_stack_page(current);
2874
2875         return (obj >= stack) && (obj < (stack + THREAD_SIZE));
2876 }
2877
2878 extern void thread_info_cache_init(void);
2879
2880 #ifdef CONFIG_DEBUG_STACK_USAGE
2881 static inline unsigned long stack_not_used(struct task_struct *p)
2882 {
2883         unsigned long *n = end_of_stack(p);
2884
2885         do {    /* Skip over canary */
2886 # ifdef CONFIG_STACK_GROWSUP
2887                 n--;
2888 # else
2889                 n++;
2890 # endif
2891         } while (!*n);
2892
2893 # ifdef CONFIG_STACK_GROWSUP
2894         return (unsigned long)end_of_stack(p) - (unsigned long)n;
2895 # else
2896         return (unsigned long)n - (unsigned long)end_of_stack(p);
2897 # endif
2898 }
2899 #endif
2900 extern void set_task_stack_end_magic(struct task_struct *tsk);
2901
2902 /* set thread flags in other task's structures
2903  * - see asm/thread_info.h for TIF_xxxx flags available
2904  */
2905 static inline void set_tsk_thread_flag(struct task_struct *tsk, int flag)
2906 {
2907         set_ti_thread_flag(task_thread_info(tsk), flag);
2908 }
2909
2910 static inline void clear_tsk_thread_flag(struct task_struct *tsk, int flag)
2911 {
2912         clear_ti_thread_flag(task_thread_info(tsk), flag);
2913 }
2914
2915 static inline int test_and_set_tsk_thread_flag(struct task_struct *tsk, int flag)
2916 {
2917         return test_and_set_ti_thread_flag(task_thread_info(tsk), flag);
2918 }
2919
2920 static inline int test_and_clear_tsk_thread_flag(struct task_struct *tsk, int flag)
2921 {
2922         return test_and_clear_ti_thread_flag(task_thread_info(tsk), flag);
2923 }
2924
2925 static inline int test_tsk_thread_flag(struct task_struct *tsk, int flag)
2926 {
2927         return test_ti_thread_flag(task_thread_info(tsk), flag);
2928 }
2929
2930 static inline void set_tsk_need_resched(struct task_struct *tsk)
2931 {
2932         set_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
2933 }
2934
2935 static inline void clear_tsk_need_resched(struct task_struct *tsk)
2936 {
2937         clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
2938 }
2939
2940 static inline int test_tsk_need_resched(struct task_struct *tsk)
2941 {
2942         return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
2943 }
2944
2945 static inline int restart_syscall(void)
2946 {
2947         set_tsk_thread_flag(current, TIF_SIGPENDING);
2948         return -ERESTARTNOINTR;
2949 }
2950
2951 static inline int signal_pending(struct task_struct *p)
2952 {
2953         return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING));
2954 }
2955
2956 static inline int __fatal_signal_pending(struct task_struct *p)
2957 {
2958         return unlikely(sigismember(&p->pending.signal, SIGKILL));
2959 }
2960
2961 static inline int fatal_signal_pending(struct task_struct *p)
2962 {
2963         return signal_pending(p) && __fatal_signal_pending(p);
2964 }
2965
2966 static inline int signal_pending_state(long state, struct task_struct *p)
2967 {
2968         if (!(state & (TASK_INTERRUPTIBLE | TASK_WAKEKILL)))
2969                 return 0;
2970         if (!signal_pending(p))
2971                 return 0;
2972
2973         return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
2974 }
2975
2976 /*
2977  * cond_resched() and cond_resched_lock(): latency reduction via
2978  * explicit rescheduling in places that are safe. The return
2979  * value indicates whether a reschedule was done in fact.
2980  * cond_resched_lock() will drop the spinlock before scheduling,
2981  * cond_resched_softirq() will enable bhs before scheduling.
2982  */
2983 extern int _cond_resched(void);
2984
2985 #define cond_resched() ({                       \
2986         ___might_sleep(__FILE__, __LINE__, 0);  \
2987         _cond_resched();                        \
2988 })
2989
2990 extern int __cond_resched_lock(spinlock_t *lock);
2991
2992 #define cond_resched_lock(lock) ({                              \
2993         ___might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);\
2994         __cond_resched_lock(lock);                              \
2995 })
2996
2997 extern int __cond_resched_softirq(void);
2998
2999 #define cond_resched_softirq() ({                                       \
3000         ___might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET);     \
3001         __cond_resched_softirq();                                       \
3002 })
3003
3004 static inline void cond_resched_rcu(void)
3005 {
3006 #if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU)
3007         rcu_read_unlock();
3008         cond_resched();
3009         rcu_read_lock();
3010 #endif
3011 }
3012
3013 /*
3014  * Does a critical section need to be broken due to another
3015  * task waiting?: (technically does not depend on CONFIG_PREEMPT,
3016  * but a general need for low latency)
3017  */
3018 static inline int spin_needbreak(spinlock_t *lock)
3019 {
3020 #ifdef CONFIG_PREEMPT
3021         return spin_is_contended(lock);
3022 #else
3023         return 0;
3024 #endif
3025 }
3026
3027 /*
3028  * Idle thread specific functions to determine the need_resched
3029  * polling state.
3030  */
3031 #ifdef TIF_POLLING_NRFLAG
3032 static inline int tsk_is_polling(struct task_struct *p)
3033 {
3034         return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG);
3035 }
3036
3037 static inline void __current_set_polling(void)
3038 {
3039         set_thread_flag(TIF_POLLING_NRFLAG);
3040 }
3041
3042 static inline bool __must_check current_set_polling_and_test(void)
3043 {
3044         __current_set_polling();
3045
3046         /*
3047          * Polling state must be visible before we test NEED_RESCHED,
3048          * paired by resched_curr()
3049          */
3050         smp_mb__after_atomic();
3051
3052         return unlikely(tif_need_resched());
3053 }
3054
3055 static inline void __current_clr_polling(void)
3056 {
3057         clear_thread_flag(TIF_POLLING_NRFLAG);
3058 }
3059
3060 static inline bool __must_check current_clr_polling_and_test(void)
3061 {
3062         __current_clr_polling();
3063
3064         /*
3065          * Polling state must be visible before we test NEED_RESCHED,
3066          * paired by resched_curr()
3067          */
3068         smp_mb__after_atomic();
3069
3070         return unlikely(tif_need_resched());
3071 }
3072
3073 #else
3074 static inline int tsk_is_polling(struct task_struct *p) { return 0; }
3075 static inline void __current_set_polling(void) { }
3076 static inline void __current_clr_polling(void) { }
3077
3078 static inline bool __must_check current_set_polling_and_test(void)
3079 {
3080         return unlikely(tif_need_resched());
3081 }
3082 static inline bool __must_check current_clr_polling_and_test(void)
3083 {
3084         return unlikely(tif_need_resched());
3085 }
3086 #endif
3087
3088 static inline void current_clr_polling(void)
3089 {
3090         __current_clr_polling();
3091
3092         /*
3093          * Ensure we check TIF_NEED_RESCHED after we clear the polling bit.
3094          * Once the bit is cleared, we'll get IPIs with every new
3095          * TIF_NEED_RESCHED and the IPI handler, scheduler_ipi(), will also
3096          * fold.
3097          */
3098         smp_mb(); /* paired with resched_curr() */
3099
3100         preempt_fold_need_resched();
3101 }
3102
3103 static __always_inline bool need_resched(void)
3104 {
3105         return unlikely(tif_need_resched());
3106 }
3107
3108 /*
3109  * Thread group CPU time accounting.
3110  */
3111 void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times);
3112 void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times);
3113
3114 /*
3115  * Reevaluate whether the task has signals pending delivery.
3116  * Wake the task if so.
3117  * This is required every time the blocked sigset_t changes.
3118  * callers must hold sighand->siglock.
3119  */
3120 extern void recalc_sigpending_and_wake(struct task_struct *t);
3121 extern void recalc_sigpending(void);
3122
3123 extern void signal_wake_up_state(struct task_struct *t, unsigned int state);
3124
3125 static inline void signal_wake_up(struct task_struct *t, bool resume)
3126 {
3127         signal_wake_up_state(t, resume ? TASK_WAKEKILL : 0);
3128 }
3129 static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume)
3130 {
3131         signal_wake_up_state(t, resume ? __TASK_TRACED : 0);
3132 }
3133
3134 /*
3135  * Wrappers for p->thread_info->cpu access. No-op on UP.
3136  */
3137 #ifdef CONFIG_SMP
3138
3139 static inline unsigned int task_cpu(const struct task_struct *p)
3140 {
3141         return task_thread_info(p)->cpu;
3142 }
3143
3144 static inline int task_node(const struct task_struct *p)
3145 {
3146         return cpu_to_node(task_cpu(p));
3147 }
3148
3149 extern void set_task_cpu(struct task_struct *p, unsigned int cpu);
3150
3151 #else
3152
3153 static inline unsigned int task_cpu(const struct task_struct *p)
3154 {
3155         return 0;
3156 }
3157
3158 static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
3159 {
3160 }
3161
3162 #endif /* CONFIG_SMP */
3163
3164 extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
3165 extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
3166
3167 #ifdef CONFIG_CGROUP_SCHED
3168 extern struct task_group root_task_group;
3169 #endif /* CONFIG_CGROUP_SCHED */
3170
3171 extern int task_can_switch_user(struct user_struct *up,
3172                                         struct task_struct *tsk);
3173
3174 #ifdef CONFIG_TASK_XACCT
3175 static inline void add_rchar(struct task_struct *tsk, ssize_t amt)
3176 {
3177         tsk->ioac.rchar += amt;
3178 }
3179
3180 static inline void add_wchar(struct task_struct *tsk, ssize_t amt)
3181 {
3182         tsk->ioac.wchar += amt;
3183 }
3184
3185 static inline void inc_syscr(struct task_struct *tsk)
3186 {
3187         tsk->ioac.syscr++;
3188 }
3189
3190 static inline void inc_syscw(struct task_struct *tsk)
3191 {
3192         tsk->ioac.syscw++;
3193 }
3194 #else
3195 static inline void add_rchar(struct task_struct *tsk, ssize_t amt)
3196 {
3197 }
3198
3199 static inline void add_wchar(struct task_struct *tsk, ssize_t amt)
3200 {
3201 }
3202
3203 static inline void inc_syscr(struct task_struct *tsk)
3204 {
3205 }
3206
3207 static inline void inc_syscw(struct task_struct *tsk)
3208 {
3209 }
3210 #endif
3211
3212 #ifndef TASK_SIZE_OF
3213 #define TASK_SIZE_OF(tsk)       TASK_SIZE
3214 #endif
3215
3216 #ifdef CONFIG_MEMCG
3217 extern void mm_update_next_owner(struct mm_struct *mm);
3218 #else
3219 static inline void mm_update_next_owner(struct mm_struct *mm)
3220 {
3221 }
3222 #endif /* CONFIG_MEMCG */
3223
3224 static inline unsigned long task_rlimit(const struct task_struct *tsk,
3225                 unsigned int limit)
3226 {
3227         return READ_ONCE(tsk->signal->rlim[limit].rlim_cur);
3228 }
3229
3230 static inline unsigned long task_rlimit_max(const struct task_struct *tsk,
3231                 unsigned int limit)
3232 {
3233         return READ_ONCE(tsk->signal->rlim[limit].rlim_max);
3234 }
3235
3236 static inline unsigned long rlimit(unsigned int limit)
3237 {
3238         return task_rlimit(current, limit);
3239 }
3240
3241 static inline unsigned long rlimit_max(unsigned int limit)
3242 {
3243         return task_rlimit_max(current, limit);
3244 }
3245
3246 #ifdef CONFIG_CPU_FREQ
3247 struct update_util_data {
3248         void (*func)(struct update_util_data *data,
3249                      u64 time, unsigned long util, unsigned long max);
3250 };
3251
3252 void cpufreq_set_update_util_data(int cpu, struct update_util_data *data);
3253 #endif /* CONFIG_CPU_FREQ */
3254
3255 #endif