Merge branch 'kvm-ppc-next' of git://github.com/agraf/linux-2.6 into kvm-queue
[cascardo/linux.git] / kernel / posix-cpu-timers.c
index c5d1ef5..3b89464 100644 (file)
@@ -233,7 +233,8 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times)
 
 /*
  * Sample a process (thread group) clock for the given group_leader task.
- * Must be called with tasklist_lock held for reading.
+ * Must be called with task sighand lock held for safe while_each_thread()
+ * traversal.
  */
 static int cpu_clock_sample_group(const clockid_t which_clock,
                                  struct task_struct *p,
@@ -260,30 +261,53 @@ static int cpu_clock_sample_group(const clockid_t which_clock,
        return 0;
 }
 
+static int posix_cpu_clock_get_task(struct task_struct *tsk,
+                                   const clockid_t which_clock,
+                                   struct timespec *tp)
+{
+       int err = -EINVAL;
+       unsigned long long rtn;
+
+       if (CPUCLOCK_PERTHREAD(which_clock)) {
+               if (same_thread_group(tsk, current))
+                       err = cpu_clock_sample(which_clock, tsk, &rtn);
+       } else {
+               unsigned long flags;
+               struct sighand_struct *sighand;
+
+               /*
+                * while_each_thread() is not yet entirely RCU safe,
+                * keep locking the group while sampling process
+                * clock for now.
+                */
+               sighand = lock_task_sighand(tsk, &flags);
+               if (!sighand)
+                       return err;
+
+               if (tsk == current || thread_group_leader(tsk))
+                       err = cpu_clock_sample_group(which_clock, tsk, &rtn);
+
+               unlock_task_sighand(tsk, &flags);
+       }
+
+       if (!err)
+               sample_to_timespec(which_clock, rtn, tp);
+
+       return err;
+}
+
 
 static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
 {
        const pid_t pid = CPUCLOCK_PID(which_clock);
-       int error = -EINVAL;
-       unsigned long long rtn;
+       int err = -EINVAL;
 
        if (pid == 0) {
                /*
                 * Special case constant value for our own clocks.
                 * We don't have to do any lookup to find ourselves.
                 */
-               if (CPUCLOCK_PERTHREAD(which_clock)) {
-                       /*
-                        * Sampling just ourselves we can do with no locking.
-                        */
-                       error = cpu_clock_sample(which_clock,
-                                                current, &rtn);
-               } else {
-                       read_lock(&tasklist_lock);
-                       error = cpu_clock_sample_group(which_clock,
-                                                      current, &rtn);
-                       read_unlock(&tasklist_lock);
-               }
+               err = posix_cpu_clock_get_task(current, which_clock, tp);
        } else {
                /*
                 * Find the given PID, and validate that the caller
@@ -292,29 +316,12 @@ static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
                struct task_struct *p;
                rcu_read_lock();
                p = find_task_by_vpid(pid);
-               if (p) {
-                       if (CPUCLOCK_PERTHREAD(which_clock)) {
-                               if (same_thread_group(p, current)) {
-                                       error = cpu_clock_sample(which_clock,
-                                                                p, &rtn);
-                               }
-                       } else {
-                               read_lock(&tasklist_lock);
-                               if (thread_group_leader(p) && p->sighand) {
-                                       error =
-                                           cpu_clock_sample_group(which_clock,
-                                                                  p, &rtn);
-                               }
-                               read_unlock(&tasklist_lock);
-                       }
-               }
+               if (p)
+                       err = posix_cpu_clock_get_task(p, which_clock, tp);
                rcu_read_unlock();
        }
 
-       if (error)
-               return error;
-       sample_to_timespec(which_clock, rtn, tp);
-       return 0;
+       return err;
 }
 
 
@@ -371,36 +378,40 @@ static int posix_cpu_timer_create(struct k_itimer *new_timer)
  */
 static int posix_cpu_timer_del(struct k_itimer *timer)
 {
-       struct task_struct *p = timer->it.cpu.task;
        int ret = 0;
+       unsigned long flags;
+       struct sighand_struct *sighand;
+       struct task_struct *p = timer->it.cpu.task;
 
-       if (likely(p != NULL)) {
-               read_lock(&tasklist_lock);
-               if (unlikely(p->sighand == NULL)) {
-                       /*
-                        * We raced with the reaping of the task.
-                        * The deletion should have cleared us off the list.
-                        */
-                       BUG_ON(!list_empty(&timer->it.cpu.entry));
-               } else {
-                       spin_lock(&p->sighand->siglock);
-                       if (timer->it.cpu.firing)
-                               ret = TIMER_RETRY;
-                       else
-                               list_del(&timer->it.cpu.entry);
-                       spin_unlock(&p->sighand->siglock);
-               }
-               read_unlock(&tasklist_lock);
+       WARN_ON_ONCE(p == NULL);
 
-               if (!ret)
-                       put_task_struct(p);
+       /*
+        * Protect against sighand release/switch in exit/exec and process/
+        * thread timer list entry concurrent read/writes.
+        */
+       sighand = lock_task_sighand(p, &flags);
+       if (unlikely(sighand == NULL)) {
+               /*
+                * We raced with the reaping of the task.
+                * The deletion should have cleared us off the list.
+                */
+               WARN_ON_ONCE(!list_empty(&timer->it.cpu.entry));
+       } else {
+               if (timer->it.cpu.firing)
+                       ret = TIMER_RETRY;
+               else
+                       list_del(&timer->it.cpu.entry);
+
+               unlock_task_sighand(p, &flags);
        }
 
+       if (!ret)
+               put_task_struct(p);
+
        return ret;
 }
 
-static void cleanup_timers_list(struct list_head *head,
-                               unsigned long long curr)
+static void cleanup_timers_list(struct list_head *head)
 {
        struct cpu_timer_list *timer, *next;
 
@@ -414,16 +425,11 @@ static void cleanup_timers_list(struct list_head *head,
  * time for later timer_gettime calls to return.
  * This must be called with the siglock held.
  */
-static void cleanup_timers(struct list_head *head,
-                          cputime_t utime, cputime_t stime,
-                          unsigned long long sum_exec_runtime)
+static void cleanup_timers(struct list_head *head)
 {
-
-       cputime_t ptime = utime + stime;
-
-       cleanup_timers_list(head, cputime_to_expires(ptime));
-       cleanup_timers_list(++head, cputime_to_expires(utime));
-       cleanup_timers_list(++head, sum_exec_runtime);
+       cleanup_timers_list(head);
+       cleanup_timers_list(++head);
+       cleanup_timers_list(++head);
 }
 
 /*
@@ -433,24 +439,14 @@ static void cleanup_timers(struct list_head *head,
  */
 void posix_cpu_timers_exit(struct task_struct *tsk)
 {
-       cputime_t utime, stime;
-
        add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
                                                sizeof(unsigned long long));
-       task_cputime(tsk, &utime, &stime);
-       cleanup_timers(tsk->cpu_timers,
-                      utime, stime, tsk->se.sum_exec_runtime);
+       cleanup_timers(tsk->cpu_timers);
 
 }
 void posix_cpu_timers_exit_group(struct task_struct *tsk)
 {
-       struct signal_struct *const sig = tsk->signal;
-       cputime_t utime, stime;
-
-       task_cputime(tsk, &utime, &stime);
-       cleanup_timers(tsk->signal->cpu_timers,
-                      utime + sig->utime, stime + sig->stime,
-                      tsk->se.sum_exec_runtime + sig->sum_sched_runtime);
+       cleanup_timers(tsk->signal->cpu_timers);
 }
 
 static inline int expires_gt(cputime_t expires, cputime_t new_exp)
@@ -460,8 +456,7 @@ static inline int expires_gt(cputime_t expires, cputime_t new_exp)
 
 /*
  * Insert the timer on the appropriate list before any timers that
- * expire later.  This must be called with the tasklist_lock held
- * for reading, interrupts disabled and p->sighand->siglock taken.
+ * expire later.  This must be called with the sighand lock held.
  */
 static void arm_timer(struct k_itimer *timer)
 {
@@ -552,7 +547,8 @@ static void cpu_timer_fire(struct k_itimer *timer)
 
 /*
  * Sample a process (thread group) timer for the given group_leader task.
- * Must be called with tasklist_lock held for reading.
+ * Must be called with task sighand lock held for safe while_each_thread()
+ * traversal.
  */
 static int cpu_timer_sample_group(const clockid_t which_clock,
                                  struct task_struct *p,
@@ -615,43 +611,39 @@ static inline void posix_cpu_timer_kick_nohz(void) { }
  * If we return TIMER_RETRY, it's necessary to release the timer's lock
  * and try again.  (This happens when the timer is in the middle of firing.)
  */
-static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
+static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
                               struct itimerspec *new, struct itimerspec *old)
 {
+       unsigned long flags;
+       struct sighand_struct *sighand;
        struct task_struct *p = timer->it.cpu.task;
        unsigned long long old_expires, new_expires, old_incr, val;
        int ret;
 
-       if (unlikely(p == NULL)) {
-               /*
-                * Timer refers to a dead task's clock.
-                */
-               return -ESRCH;
-       }
+       WARN_ON_ONCE(p == NULL);
 
        new_expires = timespec_to_sample(timer->it_clock, &new->it_value);
 
-       read_lock(&tasklist_lock);
        /*
-        * We need the tasklist_lock to protect against reaping that
-        * clears p->sighand.  If p has just been reaped, we can no
+        * Protect against sighand release/switch in exit/exec and p->cpu_timers
+        * and p->signal->cpu_timers read/write in arm_timer()
+        */
+       sighand = lock_task_sighand(p, &flags);
+       /*
+        * If p has just been reaped, we can no
         * longer get any information about it at all.
         */
-       if (unlikely(p->sighand == NULL)) {
-               read_unlock(&tasklist_lock);
-               put_task_struct(p);
-               timer->it.cpu.task = NULL;
+       if (unlikely(sighand == NULL)) {
                return -ESRCH;
        }
 
        /*
         * Disarm any old timer after extracting its expiry time.
         */
-       BUG_ON(!irqs_disabled());
+       WARN_ON_ONCE(!irqs_disabled());
 
        ret = 0;
        old_incr = timer->it.cpu.incr;
-       spin_lock(&p->sighand->siglock);
        old_expires = timer->it.cpu.expires;
        if (unlikely(timer->it.cpu.firing)) {
                timer->it.cpu.firing = -1;
@@ -708,12 +700,11 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
                 * disable this firing since we are already reporting
                 * it as an overrun (thanks to bump_cpu_timer above).
                 */
-               spin_unlock(&p->sighand->siglock);
-               read_unlock(&tasklist_lock);
+               unlock_task_sighand(p, &flags);
                goto out;
        }
 
-       if (new_expires != 0 && !(flags & TIMER_ABSTIME)) {
+       if (new_expires != 0 && !(timer_flags & TIMER_ABSTIME)) {
                new_expires += val;
        }
 
@@ -727,9 +718,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
                arm_timer(timer);
        }
 
-       spin_unlock(&p->sighand->siglock);
-       read_unlock(&tasklist_lock);
-
+       unlock_task_sighand(p, &flags);
        /*
         * Install the new reload setting, and
         * set up the signal and overrun bookkeeping.
@@ -772,6 +761,8 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
        unsigned long long now;
        struct task_struct *p = timer->it.cpu.task;
 
+       WARN_ON_ONCE(p == NULL);
+
        /*
         * Easy part: convert the reload time.
         */
@@ -783,40 +774,34 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
                return;
        }
 
-       if (unlikely(p == NULL)) {
-               WARN_ON_ONCE(CPUCLOCK_PERTHREAD(timer->it_clock));
-               /*
-                * This task already died and the timer will never fire.
-                * In this case, expires is actually the dead value.
-                */
-       dead:
-               sample_to_timespec(timer->it_clock, timer->it.cpu.expires,
-                                  &itp->it_value);
-               return;
-       }
-
        /*
         * Sample the clock to take the difference with the expiry time.
         */
        if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
                cpu_clock_sample(timer->it_clock, p, &now);
        } else {
-               read_lock(&tasklist_lock);
-               if (unlikely(p->sighand == NULL)) {
+               struct sighand_struct *sighand;
+               unsigned long flags;
+
+               /*
+                * Protect against sighand release/switch in exit/exec and
+                * also make timer sampling safe if it ends up calling
+                * thread_group_cputime().
+                */
+               sighand = lock_task_sighand(p, &flags);
+               if (unlikely(sighand == NULL)) {
                        /*
                         * The process has been reaped.
                         * We can't even collect a sample any more.
                         * Call the timer disarmed, nothing else to do.
                         */
-                       put_task_struct(p);
-                       timer->it.cpu.task = NULL;
                        timer->it.cpu.expires = 0;
-                       read_unlock(&tasklist_lock);
-                       goto dead;
+                       sample_to_timespec(timer->it_clock, timer->it.cpu.expires,
+                                          &itp->it_value);
                } else {
                        cpu_timer_sample_group(timer->it_clock, p, &now);
+                       unlock_task_sighand(p, &flags);
                }
-               read_unlock(&tasklist_lock);
        }
 
        if (now < timer->it.cpu.expires) {
@@ -1030,16 +1015,12 @@ static void check_process_timers(struct task_struct *tsk,
  */
 void posix_cpu_timer_schedule(struct k_itimer *timer)
 {
+       struct sighand_struct *sighand;
+       unsigned long flags;
        struct task_struct *p = timer->it.cpu.task;
        unsigned long long now;
 
-       if (unlikely(p == NULL)) {
-               WARN_ON_ONCE(CPUCLOCK_PERTHREAD(timer->it_clock));
-               /*
-                * The task was cleaned up already, no future firings.
-                */
-               goto out;
-       }
+       WARN_ON_ONCE(p == NULL);
 
        /*
         * Fetch the current sample and update the timer's expiry time.
@@ -1050,42 +1031,42 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
                if (unlikely(p->exit_state))
                        goto out;
 
-               read_lock(&tasklist_lock); /* arm_timer needs it.  */
-               spin_lock(&p->sighand->siglock);
+               /* Protect timer list r/w in arm_timer() */
+               sighand = lock_task_sighand(p, &flags);
+               if (!sighand)
+                       goto out;
        } else {
-               read_lock(&tasklist_lock);
-               if (unlikely(p->sighand == NULL)) {
+               /*
+                * Protect arm_timer() and timer sampling in case of call to
+                * thread_group_cputime().
+                */
+               sighand = lock_task_sighand(p, &flags);
+               if (unlikely(sighand == NULL)) {
                        /*
                         * The process has been reaped.
                         * We can't even collect a sample any more.
                         */
-                       put_task_struct(p);
-                       timer->it.cpu.task = p = NULL;
                        timer->it.cpu.expires = 0;
-                       read_unlock(&tasklist_lock);
                        goto out;
                } else if (unlikely(p->exit_state) && thread_group_empty(p)) {
-                       read_unlock(&tasklist_lock);
+                       unlock_task_sighand(p, &flags);
                        /* Optimizations: if the process is dying, no need to rearm */
                        goto out;
                }
-               spin_lock(&p->sighand->siglock);
                cpu_timer_sample_group(timer->it_clock, p, &now);
                bump_cpu_timer(timer, now);
-               /* Leave the tasklist_lock locked for the call below.  */
+               /* Leave the sighand locked for the call below.  */
        }
 
        /*
         * Now re-arm for the new expiry time.
         */
-       BUG_ON(!irqs_disabled());
+       WARN_ON_ONCE(!irqs_disabled());
        arm_timer(timer);
-       spin_unlock(&p->sighand->siglock);
-       read_unlock(&tasklist_lock);
+       unlock_task_sighand(p, &flags);
 
        /* Kick full dynticks CPUs in case they need to tick on the new timer */
        posix_cpu_timer_kick_nohz();
-
 out:
        timer->it_overrun_last = timer->it_overrun;
        timer->it_overrun = -1;
@@ -1169,7 +1150,7 @@ void run_posix_cpu_timers(struct task_struct *tsk)
        struct k_itimer *timer, *next;
        unsigned long flags;
 
-       BUG_ON(!irqs_disabled());
+       WARN_ON_ONCE(!irqs_disabled());
 
        /*
         * The fast path checks that there are no expired thread or thread
@@ -1236,7 +1217,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
 {
        unsigned long long now;
 
-       BUG_ON(clock_idx == CPUCLOCK_SCHED);
+       WARN_ON_ONCE(clock_idx == CPUCLOCK_SCHED);
        cpu_timer_sample_group(clock_idx, tsk, &now);
 
        if (oldval) {