Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...

author Linus Torvalds <torvalds@linux-foundation.org>

Tue, 13 Aug 2013 23:58:17 +0000 (16:58 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Tue, 13 Aug 2013 23:58:17 +0000 (16:58 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Tue, 13 Aug 2013 23:58:17 +0000 (16:58 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Tue, 13 Aug 2013 23:58:17 +0000 (16:58 -0700)
diff --combined include/linux/sched.h

index d722490,8230024..078066d
--- 1/include/linux/sched.h
--- 2/include/linux/sched.h
+++ b/include/linux/sched.h
@@@ -1532,6 -1532,8 +1532,8 @@@ static inline pid_t task_pgrp_nr(struc
    * Test if a process is not yet dead (at most zombie state)
    * If pid_alive fails, then pointers within the task structure
    * can be stale and must not be dereferenced.
+  *
+  * Return: 1 if the process is alive. 0 otherwise.
    */
   static inline int pid_alive(struct task_struct *p)
   {
@@@ -1543,6 -1545,8 +1545,8 @@@
    * @tsk: Task structure to be checked.
    *
    * Check if a task structure is the first user space task the kernel created.
+  *
+  * Return: 1 if the task structure is init. 0 otherwise.
    */
   static inline int is_global_init(struct task_struct *tsk)
   {
@@@ -1628,7 -1632,6 +1632,7 @@@ extern void thread_group_cputime_adjust
   #define PF_MEMPOLICY  0x10000000      /* Non-default NUMA mempolicy */
   #define PF_MUTEX_TESTER       0x20000000      /* Thread belongs to the rt mutex tester */
   #define PF_FREEZER_SKIP       0x40000000      /* Freezer should not count it as freezable */
+ +#define PF_SUSPEND_TASK 0x80000000      /* this thread called freeze_processes and should not be frozen */
   
   /*
    * Only the _current_ task can read/write to tsk->flags, but other
@@@ -1894,6 -1897,8 +1898,8 @@@ extern struct task_struct *idle_task(in
   /**
    * is_idle_task - is the specified task an idle task?
    * @p: the task in question.
+  *
+  * Return: 1 if @p is an idle task. 0 otherwise.
    */
   static inline bool is_idle_task(const struct task_struct *p)
   {
diff --combined kernel/sched/core.c

index ef51b0e,4c3967f..05c39f0
--- 1/kernel/sched/core.c
--- 2/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@@ -933,6 -933,8 +933,8 @@@ static int effective_prio(struct task_s
   /**
    * task_curr - is this task currently executing on a CPU?
    * @p: the task in question.
+  *
+  * Return: 1 if the task is currently executing. 0 otherwise.
    */
   inline int task_curr(const struct task_struct *p)
   {
@@@ -1482,7 -1484,7 +1484,7 @@@ static void ttwu_queue(struct task_stru
    * the simpler "current->state = TASK_RUNNING" to mark yourself
    * runnable without the overhead of this.
    *
-  * Returns %true if @p was woken up, %false if it was already running
+  * Return: %true if @p was woken up, %false if it was already running.
    * or @state didn't match @p's state.
    */
   static int
@@@ -1491,13 -1493,7 +1493,13 @@@ try_to_wake_up(struct task_struct *p, u
         unsigned long flags;
         int cpu, success = 0;
   
- -      smp_wmb();
+ +      /*
+ +       * If we are going to wake up a thread waiting for CONDITION we
+ +       * need to ensure that CONDITION=1 done by the caller can not be
+ +       * reordered with p->state check below. This pairs with mb() in
+ +       * set_current_state() the waiting thread does.
+ +       */
+ +      smp_mb__before_spinlock();
         raw_spin_lock_irqsave(&p->pi_lock, flags);
         if (!(p->state & state))
                 goto out;
@@@ -1583,8 -1579,9 +1585,9 @@@ out
    * @p: The process to be woken up.
    *
    * Attempt to wake up the nominated process and move it to the set of runnable
-  * processes.  Returns 1 if the process was woken up, 0 if it was already
-  * running.
+  * processes.
+  *
+  * Return: 1 if the process was woken up, 0 if it was already running.
    *
    * It may be assumed that this function implies a write memory barrier before
    * changing the task state if and only if any tasks are woken up.
@@@ -2197,6 -2194,8 +2200,8 @@@ void scheduler_tick(void
    * This makes sure that uptime, CFS vruntime, load
    * balancing, etc... continue to move forward, even
    * with a very low granularity.
+  *
+  * Return: Maximum deferment in nanoseconds.
    */
   u64 scheduler_tick_max_deferment(void)
   {
@@@ -2400,12 -2399,6 +2405,12 @@@ need_resched
         if (sched_feat(HRTICK))
                 hrtick_clear(rq);
   
+ +      /*
+ +       * Make sure that signal_pending_state()->signal_pending() below
+ +       * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE)
+ +       * done by the caller to avoid the race with signal_wake_up().
+ +       */
+ +      smp_mb__before_spinlock();
         raw_spin_lock_irq(&rq->lock);
   
         switch_count = &prev->nivcsw;
@@@ -2808,8 -2801,8 +2813,8 @@@ EXPORT_SYMBOL(wait_for_completion)
    * specified timeout to expire. The timeout is in jiffies. It is not
    * interruptible.
    *
-  * The return value is 0 if timed out, and positive (at least 1, or number of
-  * jiffies left till timeout) if completed.
+  * Return: 0 if timed out, and positive (at least 1, or number of jiffies left
+  * till timeout) if completed.
    */
   unsigned long __sched
   wait_for_completion_timeout(struct completion *x, unsigned long timeout)
@@@ -2841,8 -2834,8 +2846,8 @@@ EXPORT_SYMBOL(wait_for_completion_io)
    * specified timeout to expire. The timeout is in jiffies. It is not
    * interruptible. The caller is accounted as waiting for IO.
    *
-  * The return value is 0 if timed out, and positive (at least 1, or number of
-  * jiffies left till timeout) if completed.
+  * Return: 0 if timed out, and positive (at least 1, or number of jiffies left
+  * till timeout) if completed.
    */
   unsigned long __sched
   wait_for_completion_io_timeout(struct completion *x, unsigned long timeout)
@@@ -2858,7 -2851,7 +2863,7 @@@ EXPORT_SYMBOL(wait_for_completion_io_ti
    * This waits for completion of a specific task to be signaled. It is
    * interruptible.
    *
-  * The return value is -ERESTARTSYS if interrupted, 0 if completed.
+  * Return: -ERESTARTSYS if interrupted, 0 if completed.
    */
   int __sched wait_for_completion_interruptible(struct completion *x)
   {
@@@ -2877,8 -2870,8 +2882,8 @@@ EXPORT_SYMBOL(wait_for_completion_inter
    * This waits for either a completion of a specific task to be signaled or for a
    * specified timeout to expire. It is interruptible. The timeout is in jiffies.
    *
-  * The return value is -ERESTARTSYS if interrupted, 0 if timed out,
-  * positive (at least 1, or number of jiffies left till timeout) if completed.
+  * Return: -ERESTARTSYS if interrupted, 0 if timed out, positive (at least 1,
+  * or number of jiffies left till timeout) if completed.
    */
   long __sched
   wait_for_completion_interruptible_timeout(struct completion *x,
@@@ -2895,7 -2888,7 +2900,7 @@@ EXPORT_SYMBOL(wait_for_completion_inter
    * This waits to be signaled for completion of a specific task. It can be
    * interrupted by a kill signal.
    *
-  * The return value is -ERESTARTSYS if interrupted, 0 if completed.
+  * Return: -ERESTARTSYS if interrupted, 0 if completed.
    */
   int __sched wait_for_completion_killable(struct completion *x)
   {
@@@ -2915,8 -2908,8 +2920,8 @@@ EXPORT_SYMBOL(wait_for_completion_killa
    * signaled or for a specified timeout to expire. It can be
    * interrupted by a kill signal. The timeout is in jiffies.
    *
-  * The return value is -ERESTARTSYS if interrupted, 0 if timed out,
-  * positive (at least 1, or number of jiffies left till timeout) if completed.
+  * Return: -ERESTARTSYS if interrupted, 0 if timed out, positive (at least 1,
+  * or number of jiffies left till timeout) if completed.
    */
   long __sched
   wait_for_completion_killable_timeout(struct completion *x,
@@@ -2930,7 -2923,7 +2935,7 @@@ EXPORT_SYMBOL(wait_for_completion_killa
    *    try_wait_for_completion - try to decrement a completion without blocking
    *    @x:     completion structure
    *
-  *    Returns: 0 if a decrement cannot be done without blocking
+  *    Return: 0 if a decrement cannot be done without blocking
    *             1 if a decrement succeeded.
    *
    *    If a completion is being used as a counting completion,
@@@ -2957,7 -2950,7 +2962,7 @@@ EXPORT_SYMBOL(try_wait_for_completion)
    *    completion_done - Test to see if a completion has any waiters
    *    @x:     completion structure
    *
-  *    Returns: 0 if there are waiters (wait_for_completion() in progress)
+  *    Return: 0 if there are waiters (wait_for_completion() in progress)
    *             1 if there are no waiters.
    *
    */
@@@ -3194,7 -3187,7 +3199,7 @@@ SYSCALL_DEFINE1(nice, int, increment
    * task_prio - return the priority value of a given task.
    * @p: the task in question.
    *
-  * This is the priority value as seen by users in /proc.
+  * Return: The priority value as seen by users in /proc.
    * RT tasks are offset by -200. Normal tasks are centered
    * around 0, value goes from -16 to +15.
    */
@@@ -3206,6 -3199,8 +3211,8 @@@ int task_prio(const struct task_struct 
   /**
    * task_nice - return the nice value of a given task.
    * @p: the task in question.
+  *
+  * Return: The nice value [ -20 ... 0 ... 19 ].
    */
   int task_nice(const struct task_struct *p)
   {
@@@ -3216,6 -3211,8 +3223,8 @@@ EXPORT_SYMBOL(task_nice)
   /**
    * idle_cpu - is a given cpu idle currently?
    * @cpu: the processor in question.
+  *
+  * Return: 1 if the CPU is currently idle. 0 otherwise.
    */
   int idle_cpu(int cpu)
   {
@@@ -3238,6 -3235,8 +3247,8 @@@
   /**
    * idle_task - return the idle task for a given cpu.
    * @cpu: the processor in question.
+  *
+  * Return: The idle task for the cpu @cpu.
    */
   struct task_struct *idle_task(int cpu)
   {
@@@ -3247,6 -3246,8 +3258,8 @@@
   /**
    * find_process_by_pid - find a process with a matching PID value.
    * @pid: the pid in question.
+  *
+  * The task of @pid, if found. %NULL otherwise.
    */
   static struct task_struct *find_process_by_pid(pid_t pid)
   {
@@@ -3444,6 -3445,8 +3457,8 @@@ recheck
    * @policy: new policy.
    * @param: structure containing the new RT priority.
    *
+  * Return: 0 on success. An error code otherwise.
+  *
    * NOTE that the task may be already dead.
    */
   int sched_setscheduler(struct task_struct *p, int policy,
@@@ -3463,6 -3466,8 +3478,8 @@@ EXPORT_SYMBOL_GPL(sched_setscheduler)
    * current context has permission.  For example, this is needed in
    * stop_machine(): we create temporary high priority worker threads,
    * but our caller might not have that capability.
+  *
+  * Return: 0 on success. An error code otherwise.
    */
   int sched_setscheduler_nocheck(struct task_struct *p, int policy,
                                const struct sched_param *param)
@@@ -3497,6 -3502,8 +3514,8 @@@ do_sched_setscheduler(pid_t pid, int po
    * @pid: the pid in question.
    * @policy: new policy.
    * @param: structure containing the new RT priority.
+  *
+  * Return: 0 on success. An error code otherwise.
    */
   SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy,
                 struct sched_param __user *, param)
@@@ -3512,6 -3519,8 +3531,8 @@@
    * sys_sched_setparam - set/change the RT priority of a thread
    * @pid: the pid in question.
    * @param: structure containing the new RT priority.
+  *
+  * Return: 0 on success. An error code otherwise.
    */
   SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param)
   {
@@@ -3521,6 -3530,9 +3542,9 @@@
   /**
    * sys_sched_getscheduler - get the policy (scheduling class) of a thread
    * @pid: the pid in question.
+  *
+  * Return: On success, the policy of the thread. Otherwise, a negative error
+  * code.
    */
   SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
   {
@@@ -3547,6 -3559,9 +3571,9 @@@
    * sys_sched_getparam - get the RT priority of a thread
    * @pid: the pid in question.
    * @param: structure containing the RT priority.
+  *
+  * Return: On success, 0 and the RT priority is in @param. Otherwise, an error
+  * code.
    */
   SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
   {
@@@ -3671,6 -3686,8 +3698,8 @@@ static int get_user_cpu_mask(unsigned l
    * @pid: pid of the process
    * @len: length in bytes of the bitmask pointed to by user_mask_ptr
    * @user_mask_ptr: user-space pointer to the new cpu mask
+  *
+  * Return: 0 on success. An error code otherwise.
    */
   SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len,
                 unsigned long __user *, user_mask_ptr)
@@@ -3722,6 -3739,8 +3751,8 @@@ out_unlock
    * @pid: pid of the process
    * @len: length in bytes of the bitmask pointed to by user_mask_ptr
    * @user_mask_ptr: user-space pointer to hold the current cpu mask
+  *
+  * Return: 0 on success. An error code otherwise.
    */
   SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
                 unsigned long __user *, user_mask_ptr)
@@@ -3756,6 -3775,8 +3787,8 @@@
    *
    * This function yields the current CPU to other tasks. If there are no
    * other threads running on this CPU then this function will return.
+  *
+  * Return: 0.
    */
   SYSCALL_DEFINE0(sched_yield)
   {
@@@ -3881,7 -3902,7 +3914,7 @@@ EXPORT_SYMBOL(yield)
    * It's the caller's job to ensure that the target task struct
    * can't go away on us before we can do any checks.
    *
-  * Returns:
+  * Return:
    *    true (>0) if we indeed boosted the target task.
    *    false (0) if we failed to boost the target.
    *    -ESRCH if there's no task to yield to.
@@@ -3984,8 -4005,9 +4017,9 @@@ long __sched io_schedule_timeout(long t
    * sys_sched_get_priority_max - return maximum RT priority.
    * @policy: scheduling class.
    *
-  * this syscall returns the maximum rt_priority that can be used
-  * by a given scheduling class.
+  * Return: On success, this syscall returns the maximum
+  * rt_priority that can be used by a given scheduling class.
+  * On failure, a negative error code is returned.
    */
   SYSCALL_DEFINE1(sched_get_priority_max, int, policy)
   {
@@@ -4009,8 -4031,9 +4043,9 @@@
    * sys_sched_get_priority_min - return minimum RT priority.
    * @policy: scheduling class.
    *
-  * this syscall returns the minimum rt_priority that can be used
-  * by a given scheduling class.
+  * Return: On success, this syscall returns the minimum
+  * rt_priority that can be used by a given scheduling class.
+  * On failure, a negative error code is returned.
    */
   SYSCALL_DEFINE1(sched_get_priority_min, int, policy)
   {
@@@ -4036,6 -4059,9 +4071,9 @@@
    *
    * this syscall writes the default timeslice value of a given process
    * into the user-space timespec buffer. A value of '0' means infinity.
+  *
+  * Return: On success, 0 and the timeslice is in @interval. Otherwise,
+  * an error code.
    */
   SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
                 struct timespec __user *, interval)
@@@ -4145,7 -4171,7 +4183,7 @@@ void show_state_filter(unsigned long st
                 debug_show_all_locks();
   }
   
- -void __cpuinit init_idle_bootup_task(struct task_struct *idle)
+ +void init_idle_bootup_task(struct task_struct *idle)
   {
         idle->sched_class = &idle_sched_class;
   }
@@@ -4158,7 -4184,7 +4196,7 @@@
    * NOTE: this function does not set the idle thread's NEED_RESCHED
    * flag, to make booting more robust.
    */
- -void __cpuinit init_idle(struct task_struct *idle, int cpu)
+ +void init_idle(struct task_struct *idle, int cpu)
   {
         struct rq *rq = cpu_rq(cpu);
         unsigned long flags;
@@@ -4642,7 -4668,7 +4680,7 @@@ static void set_rq_offline(struct rq *r
    * migration_call - callback that gets triggered when a CPU is added.
    * Here we can start up the necessary migration thread for the new CPU.
    */
- -static int __cpuinit
+ +static int
   migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
   {
         int cpu = (long)hcpu;
@@@ -4696,12 -4722,12 +4734,12 @@@
    * happens before everything else.  This has to be lower priority than
    * the notifier in the perf_event subsystem, though.
    */
- -static struct notifier_block __cpuinitdata migration_notifier = {
+ +static struct notifier_block migration_notifier = {
         .notifier_call = migration_call,
         .priority = CPU_PRI_MIGRATION,
   };
   
- -static int __cpuinit sched_cpu_active(struct notifier_block *nfb,
+ +static int sched_cpu_active(struct notifier_block *nfb,
                                       unsigned long action, void *hcpu)
   {
         switch (action & ~CPU_TASKS_FROZEN) {
@@@ -4714,7 -4740,7 +4752,7 @@@
         }
   }
   
- -static int __cpuinit sched_cpu_inactive(struct notifier_block *nfb,
+ +static int sched_cpu_inactive(struct notifier_block *nfb,
                                         unsigned long action, void *hcpu)
   {
         switch (action & ~CPU_TASKS_FROZEN) {
@@@ -6644,6 -6670,8 +6682,8 @@@ void normalize_rt_tasks(void
    * @cpu: the processor in question.
    *
    * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
+  *
+  * Return: The current task for @cpu.
    */
   struct task_struct *curr_task(int cpu)
   {
diff --combined kernel/sched/fair.c

index 9565645,06db94b..68f1609
--- 1/kernel/sched/fair.c
--- 2/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@@ -851,7 -851,7 +851,7 @@@ void task_numa_fault(int node, int page
   {
         struct task_struct *p = current;
   
- -      if (!sched_feat_numa(NUMA))
+ +      if (!numabalancing_enabled)
                 return;
   
         /* FIXME: Allocate task-specific structure for placement policy here */
@@@ -2032,6 -2032,7 +2032,7 @@@ entity_tick(struct cfs_rq *cfs_rq, stru
          */
         update_entity_load_avg(curr, 1);
         update_cfs_rq_blocked_load(cfs_rq, 1);
+       update_cfs_shares(cfs_rq);
   
   #ifdef CONFIG_SCHED_HRTICK
         /*
@@@ -4280,6 -4281,8 +4281,8 @@@ struct sg_lb_stats 
    * get_sd_load_idx - Obtain the load index for a given sched domain.
    * @sd: The sched_domain whose load_idx is to be obtained.
    * @idle: The Idle status of the CPU for whose sd load_icx is obtained.
+  *
+  * Return: The load index.
    */
   static inline int get_sd_load_idx(struct sched_domain *sd,
                                         enum cpu_idle_type idle)
@@@ -4574,6 -4577,9 +4577,9 @@@ static inline void update_sg_lb_stats(s
    *
    * Determine if @sg is a busier group than the previously selected
    * busiest group.
+  *
+  * Return: %true if @sg is a busier group than the previously selected
+  * busiest group. %false otherwise.
    */
   static bool update_sd_pick_busiest(struct lb_env *env,
                                    struct sd_lb_stats *sds,
@@@ -4691,7 -4697,7 +4697,7 @@@ static inline void update_sd_lb_stats(s
    * assuming lower CPU number will be equivalent to lower a SMT thread
    * number.
    *
-  * Returns 1 when packing is required and a task should be moved to
+  * Return: 1 when packing is required and a task should be moved to
    * this CPU.  The amount of the imbalance is returned in *imbalance.
    *
    * @env: The load balancing environment.
@@@ -4869,7 -4875,7 +4875,7 @@@ static inline void calculate_imbalance(
    * @balance: Pointer to a variable indicating if this_cpu
    *    is the appropriate cpu to perform load balancing at this_level.
    *
-  * Returns:   - the busiest group if imbalance exists.
+  * Return:    - The busiest group if imbalance exists.
    *            - If no imbalance and user has opted for power-savings balance,
    *               return the least loaded group whose CPUs can be
    *               put to idle by rebalancing its tasks onto our group.
@@@ -5506,7 -5512,7 +5512,7 @@@ void nohz_balance_enter_idle(int cpu
         set_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu));
   }
   
- -static int __cpuinit sched_ilb_notifier(struct notifier_block *nfb,
+ +static int sched_ilb_notifier(struct notifier_block *nfb,
                                         unsigned long action, void *hcpu)
   {
         switch (action & ~CPU_TASKS_FROZEN) {
@@@ -5786,7 -5792,7 +5792,7 @@@ static void task_tick_fair(struct rq *r
                 entity_tick(cfs_rq, se, queued);
         }
   
- -      if (sched_feat_numa(NUMA))
+ +      if (numabalancing_enabled)
                 task_tick_numa(rq, curr);
   
         update_rq_runnable_avg(rq, 1);
author	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 13 Aug 2013 23:58:17 +0000 (16:58 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 13 Aug 2013 23:58:17 +0000 (16:58 -0700)
		1	2
include/linux/sched.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched/core.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched/fair.c	patch \|	diff1 \|	diff2 \|	blob \| history