Merge tag 'usb-4.9-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb

[cascardo/linux.git] / kernel / fork.c
diff --git a/kernel/fork.c b/kernel/fork.c

index 9b85f6b..c060c7e 100644 (file)
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -159,15 +159,41 @@ void __weak arch_release_thread_stack(unsigned long *stack)
   * kmemcache based allocator.
   */
  # if THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK)
+
+#ifdef CONFIG_VMAP_STACK
+/*
+ * vmalloc() is a bit slow, and calling vfree() enough times will force a TLB
+ * flush.  Try to minimize the number of calls by caching stacks.
+ */
+#define NR_CACHED_STACKS 2
+static DEFINE_PER_CPU(struct vm_struct *, cached_stacks[NR_CACHED_STACKS]);
+#endif
+
  static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
  {
  #ifdef CONFIG_VMAP_STACK
-       void *stack = __vmalloc_node_range(THREAD_SIZE, THREAD_SIZE,
-                                          VMALLOC_START, VMALLOC_END,
-                                          THREADINFO_GFP | __GFP_HIGHMEM,
-                                          PAGE_KERNEL,
-                                          0, node,
-                                          __builtin_return_address(0));
+       void *stack;
+       int i;
+
+       local_irq_disable();
+       for (i = 0; i < NR_CACHED_STACKS; i++) {
+               struct vm_struct *s = this_cpu_read(cached_stacks[i]);
+
+               if (!s)
+                       continue;
+               this_cpu_write(cached_stacks[i], NULL);
+
+               tsk->stack_vm_area = s;
+               local_irq_enable();
+               return s->addr;
+       }
+       local_irq_enable();
+
+       stack = __vmalloc_node_range(THREAD_SIZE, THREAD_SIZE,
+                                    VMALLOC_START, VMALLOC_END,
+                                    THREADINFO_GFP | __GFP_HIGHMEM,
+                                    PAGE_KERNEL,
+                                    0, node, __builtin_return_address(0));
  
         /*
          * We can't call find_vm_area() in interrupt context, and
@@ -187,10 +213,28 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
  
  static inline void free_thread_stack(struct task_struct *tsk)
  {
-       if (task_stack_vm_area(tsk))
+#ifdef CONFIG_VMAP_STACK
+       if (task_stack_vm_area(tsk)) {
+               unsigned long flags;
+               int i;
+
+               local_irq_save(flags);
+               for (i = 0; i < NR_CACHED_STACKS; i++) {
+                       if (this_cpu_read(cached_stacks[i]))
+                               continue;
+
+                       this_cpu_write(cached_stacks[i], tsk->stack_vm_area);
+                       local_irq_restore(flags);
+                       return;
+               }
+               local_irq_restore(flags);
+
                 vfree(tsk->stack);
-       else
-               __free_pages(virt_to_page(tsk->stack), THREAD_SIZE_ORDER);
+               return;
+       }
+#endif
+
+       __free_pages(virt_to_page(tsk->stack), THREAD_SIZE_ORDER);
  }
  # else
  static struct kmem_cache *thread_stack_cache;
@@ -269,11 +313,40 @@ static void account_kernel_stack(struct task_struct *tsk, int account)
         }
  }
  
-void free_task(struct task_struct *tsk)
+static void release_task_stack(struct task_struct *tsk)
  {
         account_kernel_stack(tsk, -1);
         arch_release_thread_stack(tsk->stack);
         free_thread_stack(tsk);
+       tsk->stack = NULL;
+#ifdef CONFIG_VMAP_STACK
+       tsk->stack_vm_area = NULL;
+#endif
+}
+
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+void put_task_stack(struct task_struct *tsk)
+{
+       if (atomic_dec_and_test(&tsk->stack_refcount))
+               release_task_stack(tsk);
+}
+#endif
+
+void free_task(struct task_struct *tsk)
+{
+#ifndef CONFIG_THREAD_INFO_IN_TASK
+       /*
+        * The task is finally done with both the stack and thread_info,
+        * so free both.
+        */
+       release_task_stack(tsk);
+#else
+       /*
+        * If the task had a separate stack allocation, it should be gone
+        * by now.
+        */
+       WARN_ON_ONCE(atomic_read(&tsk->stack_refcount) != 0);
+#endif
         rt_mutex_debug_task_free(tsk);
         ftrace_graph_exit_task(tsk);
         put_seccomp_filter(tsk);
@@ -411,6 +484,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
  #ifdef CONFIG_VMAP_STACK
         tsk->stack_vm_area = stack_vm_area;
  #endif
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+       atomic_set(&tsk->stack_refcount, 1);
+#endif
  
         if (err)
                 goto free_stack;
@@ -854,6 +930,29 @@ struct file *get_mm_exe_file(struct mm_struct *mm)
  }
  EXPORT_SYMBOL(get_mm_exe_file);
  
+/**
+ * get_task_exe_file - acquire a reference to the task's executable file
+ *
+ * Returns %NULL if task's mm (if any) has no associated executable file or
+ * this is a kernel thread with borrowed mm (see the comment above get_task_mm).
+ * User must release file via fput().
+ */
+struct file *get_task_exe_file(struct task_struct *task)
+{
+       struct file *exe_file = NULL;
+       struct mm_struct *mm;
+
+       task_lock(task);
+       mm = task->mm;
+       if (mm) {
+               if (!(task->flags & PF_KTHREAD))
+                       exe_file = get_mm_exe_file(mm);
+       }
+       task_unlock(task);
+       return exe_file;
+}
+EXPORT_SYMBOL(get_task_exe_file);
+
  /**
   * get_task_mm - acquire a reference to the task's mm
   *
@@ -969,14 +1068,12 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
         deactivate_mm(tsk, mm);
  
         /*
-        * If we're exiting normally, clear a user-space tid field if
-        * requested.  We leave this alone when dying by signal, to leave
-        * the value intact in a core dump, and to save the unnecessary
-        * trouble, say, a killed vfork parent shouldn't touch this mm.
-        * Userland only wants this done for a sys_exit.
+        * Signal userspace if we're not exiting with a core dump
+        * because we want to leave the value intact for debugging
+        * purposes.
          */
         if (tsk->clear_child_tid) {
-               if (!(tsk->flags & PF_SIGNALED) &&
+               if (!(tsk->signal->flags & SIGNAL_GROUP_COREDUMP) &&
                     atomic_read(&mm->mm_users) > 1) {
                         /*
                          * We don't check the error code - if userspace has
@@ -1460,7 +1557,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
         p->real_start_time = ktime_get_boot_ns();
         p->io_context = NULL;
         p->audit_context = NULL;
-       threadgroup_change_begin(current);
         cgroup_fork(p);
  #ifdef CONFIG_NUMA
         p->mempolicy = mpol_dup(p->mempolicy);
@@ -1612,6 +1708,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
         INIT_LIST_HEAD(&p->thread_group);
         p->task_works = NULL;
  
+       threadgroup_change_begin(current);
         /*
          * Ensure that the cgroup subsystem policies allow the new process to be
          * forked. It should be noted the the new process's css_set can be changed
@@ -1712,6 +1809,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
  bad_fork_cancel_cgroup:
         cgroup_cancel_fork(p);
  bad_fork_free_pid:
+       threadgroup_change_end(current);
         if (pid != &init_struct_pid)
                 free_pid(pid);
  bad_fork_cleanup_thread:
@@ -1744,12 +1842,12 @@ bad_fork_cleanup_policy:
         mpol_put(p->mempolicy);
  bad_fork_cleanup_threadgroup_lock:
  #endif
-       threadgroup_change_end(current);
         delayacct_tsk_free(p);
  bad_fork_cleanup_count:
         atomic_dec(&p->cred->user->processes);
         exit_creds(p);
  bad_fork_free:
+       put_task_stack(p);
         free_task(p);
  fork_out:
         return ERR_PTR(retval);