sched/api: Introduce task_rcu_dereference() and try_get_task_struct()

author Oleg Nesterov <oleg@redhat.com>

Wed, 18 May 2016 17:02:18 +0000 (19:02 +0200)

committer Ingo Molnar <mingo@kernel.org>

Fri, 3 Jun 2016 07:18:57 +0000 (09:18 +0200)
author Oleg Nesterov <oleg@redhat.com>
Wed, 18 May 2016 17:02:18 +0000 (19:02 +0200)
committer Ingo Molnar <mingo@kernel.org>
Fri, 3 Jun 2016 07:18:57 +0000 (09:18 +0200)
diff --git a/include/linux/sched.h b/include/linux/sched.h

index 6e42ada..dee41bf 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2139,6 +2139,9 @@ static inline void put_task_struct(struct task_struct *t)
                 __put_task_struct(t);
  }
  
+struct task_struct *task_rcu_dereference(struct task_struct **ptask);
+struct task_struct *try_get_task_struct(struct task_struct **ptask);
+
  #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
  extern void task_cputime(struct task_struct *t,
                          cputime_t *utime, cputime_t *stime);
diff --git a/kernel/exit.c b/kernel/exit.c

index 9e6e135..2fb4d44 100644 (file)
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -210,6 +210,82 @@ repeat:
                 goto repeat;
  }
  
+/*
+ * Note that if this function returns a valid task_struct pointer (!NULL)
+ * task->usage must remain >0 for the duration of the RCU critical section.
+ */
+struct task_struct *task_rcu_dereference(struct task_struct **ptask)
+{
+       struct sighand_struct *sighand;
+       struct task_struct *task;
+
+       /*
+        * We need to verify that release_task() was not called and thus
+        * delayed_put_task_struct() can't run and drop the last reference
+        * before rcu_read_unlock(). We check task->sighand != NULL,
+        * but we can read the already freed and reused memory.
+        */
+retry:
+       task = rcu_dereference(*ptask);
+       if (!task)
+               return NULL;
+
+       probe_kernel_address(&task->sighand, sighand);
+
+       /*
+        * Pairs with atomic_dec_and_test() in put_task_struct(). If this task
+        * was already freed we can not miss the preceding update of this
+        * pointer.
+        */
+       smp_rmb();
+       if (unlikely(task != READ_ONCE(*ptask)))
+               goto retry;
+
+       /*
+        * We've re-checked that "task == *ptask", now we have two different
+        * cases:
+        *
+        * 1. This is actually the same task/task_struct. In this case
+        *    sighand != NULL tells us it is still alive.
+        *
+        * 2. This is another task which got the same memory for task_struct.
+        *    We can't know this of course, and we can not trust
+        *    sighand != NULL.
+        *
+        *    In this case we actually return a random value, but this is
+        *    correct.
+        *
+        *    If we return NULL - we can pretend that we actually noticed that
+        *    *ptask was updated when the previous task has exited. Or pretend
+        *    that probe_slab_address(&sighand) reads NULL.
+        *
+        *    If we return the new task (because sighand is not NULL for any
+        *    reason) - this is fine too. This (new) task can't go away before
+        *    another gp pass.
+        *
+        *    And note: We could even eliminate the false positive if re-read
+        *    task->sighand once again to avoid the falsely NULL. But this case
+        *    is very unlikely so we don't care.
+        */
+       if (!sighand)
+               return NULL;
+
+       return task;
+}
+
+struct task_struct *try_get_task_struct(struct task_struct **ptask)
+{
+       struct task_struct *task;
+
+       rcu_read_lock();
+       task = task_rcu_dereference(ptask);
+       if (task)
+               get_task_struct(task);
+       rcu_read_unlock();
+
+       return task;
+}
+
  /*
   * Determine if a process group is "orphaned", according to the POSIX
   * definition in 2.2.2.52.  Orphaned process groups are not to be affected
author	Oleg Nesterov <oleg@redhat.com>
	Wed, 18 May 2016 17:02:18 +0000 (19:02 +0200)
committer	Ingo Molnar <mingo@kernel.org>
	Fri, 3 Jun 2016 07:18:57 +0000 (09:18 +0200)
include/linux/sched.h		patch \| blob \| history
kernel/exit.c		patch \| blob \| history