exit: wait: drop tasklist_lock before psig->c* accounting
[cascardo/linux.git] / kernel / exit.c
index 5d30019..6297eb0 100644 (file)
@@ -529,15 +529,7 @@ static struct task_struct *find_new_reaper(struct task_struct *father)
 static void reparent_leader(struct task_struct *father, struct task_struct *p,
                                struct list_head *dead)
 {
-       list_move_tail(&p->sibling, &p->real_parent->children);
-
-       if (p->exit_state == EXIT_DEAD)
-               return;
-       /*
-        * If this is a threaded reparent there is no need to
-        * notify anyone anything has happened.
-        */
-       if (same_thread_group(p->real_parent, father))
+       if (unlikely(p->exit_state == EXIT_DEAD))
                return;
 
        /* We don't want people slaying init. */
@@ -548,7 +540,7 @@ static void reparent_leader(struct task_struct *father, struct task_struct *p,
            p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) {
                if (do_notify_parent(p, p->exit_signal)) {
                        p->exit_state = EXIT_DEAD;
-                       list_move_tail(&p->sibling, dead);
+                       list_add(&p->ptrace_entry, dead);
                }
        }
 
@@ -557,38 +549,37 @@ static void reparent_leader(struct task_struct *father, struct task_struct *p,
 
 static void forget_original_parent(struct task_struct *father)
 {
-       struct task_struct *p, *n, *reaper;
+       struct task_struct *p, *t, *n, *reaper;
        LIST_HEAD(dead_children);
 
        write_lock_irq(&tasklist_lock);
-       /*
-        * Note that exit_ptrace() and find_new_reaper() might
-        * drop tasklist_lock and reacquire it.
-        */
-       exit_ptrace(father);
-       reaper = find_new_reaper(father);
-
-       list_for_each_entry_safe(p, n, &father->children, sibling) {
-               struct task_struct *t = p;
+       if (unlikely(!list_empty(&father->ptraced)))
+               exit_ptrace(father, &dead_children);
 
-               do {
+       /* Can drop and reacquire tasklist_lock */
+       reaper = find_new_reaper(father);
+       list_for_each_entry(p, &father->children, sibling) {
+               for_each_thread(p, t) {
                        t->real_parent = reaper;
-                       if (t->parent == father) {
-                               BUG_ON(t->ptrace);
+                       BUG_ON((!t->ptrace) != (t->parent == father));
+                       if (likely(!t->ptrace))
                                t->parent = t->real_parent;
-                       }
                        if (t->pdeath_signal)
                                group_send_sig_info(t->pdeath_signal,
                                                    SEND_SIG_NOINFO, t);
-               } while_each_thread(p, t);
-               reparent_leader(father, p, &dead_children);
+               }
+               /*
+                * If this is a threaded reparent there is no need to
+                * notify anyone anything has happened.
+                */
+               if (!same_thread_group(reaper, father))
+                       reparent_leader(father, p, &dead_children);
        }
+       list_splice_tail_init(&father->children, &reaper->children);
        write_unlock_irq(&tasklist_lock);
 
-       BUG_ON(!list_empty(&father->children));
-
-       list_for_each_entry_safe(p, n, &dead_children, sibling) {
-               list_del_init(&p->sibling);
+       list_for_each_entry_safe(p, n, &dead_children, ptrace_entry) {
+               list_del_init(&p->ptrace_entry);
                release_task(p);
        }
 }
@@ -982,8 +973,7 @@ static int wait_noreap_copyout(struct wait_opts *wo, struct task_struct *p,
  */
 static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
 {
-       unsigned long state;
-       int retval, status, traced;
+       int state, retval, status;
        pid_t pid = task_pid_vnr(p);
        uid_t uid = from_kuid_munged(current_user_ns(), task_uid(p));
        struct siginfo __user *infop;
@@ -997,6 +987,8 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
 
                get_task_struct(p);
                read_unlock(&tasklist_lock);
+               sched_annotate_sleep();
+
                if ((exit_code & 0x7f) == 0) {
                        why = CLD_EXITED;
                        status = exit_code >> 8;
@@ -1006,21 +998,25 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
                }
                return wait_noreap_copyout(wo, p, pid, uid, why, status);
        }
-
-       traced = ptrace_reparented(p);
        /*
         * Move the task's state to DEAD/TRACE, only one thread can do this.
         */
-       state = traced && thread_group_leader(p) ? EXIT_TRACE : EXIT_DEAD;
+       state = (ptrace_reparented(p) && thread_group_leader(p)) ?
+               EXIT_TRACE : EXIT_DEAD;
        if (cmpxchg(&p->exit_state, EXIT_ZOMBIE, state) != EXIT_ZOMBIE)
                return 0;
        /*
-        * It can be ptraced but not reparented, check
-        * thread_group_leader() to filter out sub-threads.
+        * We own this thread, nobody else can reap it.
         */
-       if (likely(!traced) && thread_group_leader(p)) {
-               struct signal_struct *psig;
-               struct signal_struct *sig;
+       read_unlock(&tasklist_lock);
+       sched_annotate_sleep();
+
+       /*
+        * Check thread_group_leader() to exclude the traced sub-threads.
+        */
+       if (state == EXIT_DEAD && thread_group_leader(p)) {
+               struct signal_struct *sig = p->signal;
+               struct signal_struct *psig = current->signal;
                unsigned long maxrss;
                cputime_t tgutime, tgstime;
 
@@ -1032,21 +1028,20 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
                 * accumulate in the parent's signal_struct c* fields.
                 *
                 * We don't bother to take a lock here to protect these
-                * p->signal fields, because they are only touched by
-                * __exit_signal, which runs with tasklist_lock
-                * write-locked anyway, and so is excluded here.  We do
-                * need to protect the access to parent->signal fields,
-                * as other threads in the parent group can be right
-                * here reaping other children at the same time.
+                * p->signal fields because the whole thread group is dead
+                * and nobody can change them.
+                *
+                * psig->stats_lock also protects us from our sub-theads
+                * which can reap other children at the same time. Until
+                * we change k_getrusage()-like users to rely on this lock
+                * we have to take ->siglock as well.
                 *
                 * We use thread_group_cputime_adjusted() to get times for
                 * the thread group, which consolidates times for all threads
                 * in the group including the group leader.
                 */
                thread_group_cputime_adjusted(p, &tgutime, &tgstime);
-               spin_lock_irq(&p->real_parent->sighand->siglock);
-               psig = p->real_parent->signal;
-               sig = p->signal;
+               spin_lock_irq(&current->sighand->siglock);
                write_seqlock(&psig->stats_lock);
                psig->cutime += tgutime + sig->cutime;
                psig->cstime += tgstime + sig->cstime;
@@ -1071,15 +1066,9 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
                task_io_accounting_add(&psig->ioac, &p->ioac);
                task_io_accounting_add(&psig->ioac, &sig->ioac);
                write_sequnlock(&psig->stats_lock);
-               spin_unlock_irq(&p->real_parent->sighand->siglock);
+               spin_unlock_irq(&current->sighand->siglock);
        }
 
-       /*
-        * Now we are sure this task is interesting, and no other
-        * thread can reap it because we its state == DEAD/TRACE.
-        */
-       read_unlock(&tasklist_lock);
-
        retval = wo->wo_rusage
                ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
        status = (p->signal->flags & SIGNAL_GROUP_EXIT)
@@ -1210,6 +1199,7 @@ unlock_sig:
        pid = task_pid_vnr(p);
        why = ptrace ? CLD_TRAPPED : CLD_STOPPED;
        read_unlock(&tasklist_lock);
+       sched_annotate_sleep();
 
        if (unlikely(wo->wo_flags & WNOWAIT))
                return wait_noreap_copyout(wo, p, pid, uid, why, exit_code);
@@ -1272,6 +1262,7 @@ static int wait_task_continued(struct wait_opts *wo, struct task_struct *p)
        pid = task_pid_vnr(p);
        get_task_struct(p);
        read_unlock(&tasklist_lock);
+       sched_annotate_sleep();
 
        if (!wo->wo_info) {
                retval = wo->wo_rusage