exec: RT sub-thread can livelock and monopolize CPU on exec
authorOleg Nesterov <oleg@tv-sign.ru>
Wed, 17 Oct 2007 06:27:23 +0000 (23:27 -0700)
committerLinus Torvalds <torvalds@woody.linux-foundation.org>
Wed, 17 Oct 2007 15:42:54 +0000 (08:42 -0700)
de_thread() yields waiting for ->group_leader to be a zombie. This deadlocks
if an rt-prio execer shares the same cpu with ->group_leader. Change the code
to use ->group_exit_task/notify_count mechanics.

This patch certainly uglifies the code, perhaps someone can suggest something
better.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
fs/exec.c
kernel/exit.c

index ab5a4a3..aa470a9 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -801,16 +801,15 @@ static int de_thread(struct task_struct *tsk)
                        hrtimer_restart(&sig->real_timer);
                spin_lock_irq(lock);
        }
+
+       sig->notify_count = count;
+       sig->group_exit_task = tsk;
        while (atomic_read(&sig->count) > count) {
-               sig->group_exit_task = tsk;
-               sig->notify_count = count;
                __set_current_state(TASK_UNINTERRUPTIBLE);
                spin_unlock_irq(lock);
                schedule();
                spin_lock_irq(lock);
        }
-       sig->group_exit_task = NULL;
-       sig->notify_count = 0;
        spin_unlock_irq(lock);
 
        /*
@@ -819,14 +818,17 @@ static int de_thread(struct task_struct *tsk)
         * and to assume its PID:
         */
        if (!thread_group_leader(tsk)) {
-               /*
-                * Wait for the thread group leader to be a zombie.
-                * It should already be zombie at this point, most
-                * of the time.
-                */
                leader = tsk->group_leader;
-               while (leader->exit_state != EXIT_ZOMBIE)
-                       yield();
+
+               sig->notify_count = -1;
+               for (;;) {
+                       write_lock_irq(&tasklist_lock);
+                       if (likely(leader->exit_state))
+                               break;
+                       __set_current_state(TASK_UNINTERRUPTIBLE);
+                       write_unlock_irq(&tasklist_lock);
+                       schedule();
+               }
 
                /*
                 * The only record we have of the real-time age of a
@@ -840,8 +842,6 @@ static int de_thread(struct task_struct *tsk)
                 */
                tsk->start_time = leader->start_time;
 
-               write_lock_irq(&tasklist_lock);
-
                BUG_ON(leader->tgid != tsk->tgid);
                BUG_ON(tsk->pid == tsk->tgid);
                /*
@@ -874,6 +874,8 @@ static int de_thread(struct task_struct *tsk)
                write_unlock_irq(&tasklist_lock);
         }
 
+       sig->group_exit_task = NULL;
+       sig->notify_count = 0;
        /*
         * There may be one thread left which is just exiting,
         * but it's safe to stop telling the group to kill themselves.
index 25f6805..4c108df 100644 (file)
@@ -92,10 +92,9 @@ static void __exit_signal(struct task_struct *tsk)
                 * If there is any task waiting for the group exit
                 * then notify it:
                 */
-               if (sig->group_exit_task && atomic_read(&sig->count) == sig->notify_count) {
+               if (sig->group_exit_task && atomic_read(&sig->count) == sig->notify_count)
                        wake_up_process(sig->group_exit_task);
-                       sig->group_exit_task = NULL;
-               }
+
                if (tsk == sig->curr_target)
                        sig->curr_target = next_thread(tsk);
                /*
@@ -827,6 +826,11 @@ static void exit_notify(struct task_struct *tsk)
                state = EXIT_DEAD;
        tsk->exit_state = state;
 
+       if (thread_group_leader(tsk) &&
+           tsk->signal->notify_count < 0 &&
+           tsk->signal->group_exit_task)
+               wake_up_process(tsk->signal->group_exit_task);
+
        write_unlock_irq(&tasklist_lock);
 
        list_for_each_safe(_p, _n, &ptrace_dead) {