Merge tag 'trace-v4.8-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt...

author Linus Torvalds <torvalds@linux-foundation.org>

Mon, 26 Sep 2016 01:40:13 +0000 (18:40 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Mon, 26 Sep 2016 01:40:13 +0000 (18:40 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Mon, 26 Sep 2016 01:40:13 +0000 (18:40 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Mon, 26 Sep 2016 01:40:13 +0000 (18:40 -0700)
diff --combined kernel/trace/trace.c

index dade4c9,77eeab2..7bc5676
--- 1/kernel/trace/trace.c
--- 2/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@@ -25,7 -25,7 +25,7 @@@
   #include <linux/hardirq.h>
   #include <linux/linkage.h>
   #include <linux/uaccess.h>
- -#include <linux/kprobes.h>
+ +#include <linux/vmalloc.h>
   #include <linux/ftrace.h>
   #include <linux/module.h>
   #include <linux/percpu.h>
@@@ -319,258 -319,6 +319,258 @@@ int call_filter_check_discard(struct tr
         return 0;
   }
   
+ +void trace_free_pid_list(struct trace_pid_list *pid_list)
+ +{
+ +      vfree(pid_list->pids);
+ +      kfree(pid_list);
+ +}
+ +
+ +/**
+ + * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
+ + * @filtered_pids: The list of pids to check
+ + * @search_pid: The PID to find in @filtered_pids
+ + *
+ + * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
+ + */
+ +bool
+ +trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
+ +{
+ +      /*
+ +       * If pid_max changed after filtered_pids was created, we
+ +       * by default ignore all pids greater than the previous pid_max.
+ +       */
+ +      if (search_pid >= filtered_pids->pid_max)
+ +              return false;
+ +
+ +      return test_bit(search_pid, filtered_pids->pids);
+ +}
+ +
+ +/**
+ + * trace_ignore_this_task - should a task be ignored for tracing
+ + * @filtered_pids: The list of pids to check
+ + * @task: The task that should be ignored if not filtered
+ + *
+ + * Checks if @task should be traced or not from @filtered_pids.
+ + * Returns true if @task should *NOT* be traced.
+ + * Returns false if @task should be traced.
+ + */
+ +bool
+ +trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
+ +{
+ +      /*
+ +       * Return false, because if filtered_pids does not exist,
+ +       * all pids are good to trace.
+ +       */
+ +      if (!filtered_pids)
+ +              return false;
+ +
+ +      return !trace_find_filtered_pid(filtered_pids, task->pid);
+ +}
+ +
+ +/**
+ + * trace_pid_filter_add_remove - Add or remove a task from a pid_list
+ + * @pid_list: The list to modify
+ + * @self: The current task for fork or NULL for exit
+ + * @task: The task to add or remove
+ + *
+ + * If adding a task, if @self is defined, the task is only added if @self
+ + * is also included in @pid_list. This happens on fork and tasks should
+ + * only be added when the parent is listed. If @self is NULL, then the
+ + * @task pid will be removed from the list, which would happen on exit
+ + * of a task.
+ + */
+ +void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
+ +                                struct task_struct *self,
+ +                                struct task_struct *task)
+ +{
+ +      if (!pid_list)
+ +              return;
+ +
+ +      /* For forks, we only add if the forking task is listed */
+ +      if (self) {
+ +              if (!trace_find_filtered_pid(pid_list, self->pid))
+ +                      return;
+ +      }
+ +
+ +      /* Sorry, but we don't support pid_max changing after setting */
+ +      if (task->pid >= pid_list->pid_max)
+ +              return;
+ +
+ +      /* "self" is set for forks, and NULL for exits */
+ +      if (self)
+ +              set_bit(task->pid, pid_list->pids);
+ +      else
+ +              clear_bit(task->pid, pid_list->pids);
+ +}
+ +
+ +/**
+ + * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
+ + * @pid_list: The pid list to show
+ + * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
+ + * @pos: The position of the file
+ + *
+ + * This is used by the seq_file "next" operation to iterate the pids
+ + * listed in a trace_pid_list structure.
+ + *
+ + * Returns the pid+1 as we want to display pid of zero, but NULL would
+ + * stop the iteration.
+ + */
+ +void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
+ +{
+ +      unsigned long pid = (unsigned long)v;
+ +
+ +      (*pos)++;
+ +
+ +      /* pid already is +1 of the actual prevous bit */
+ +      pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
+ +
+ +      /* Return pid + 1 to allow zero to be represented */
+ +      if (pid < pid_list->pid_max)
+ +              return (void *)(pid + 1);
+ +
+ +      return NULL;
+ +}
+ +
+ +/**
+ + * trace_pid_start - Used for seq_file to start reading pid lists
+ + * @pid_list: The pid list to show
+ + * @pos: The position of the file
+ + *
+ + * This is used by seq_file "start" operation to start the iteration
+ + * of listing pids.
+ + *
+ + * Returns the pid+1 as we want to display pid of zero, but NULL would
+ + * stop the iteration.
+ + */
+ +void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
+ +{
+ +      unsigned long pid;
+ +      loff_t l = 0;
+ +
+ +      pid = find_first_bit(pid_list->pids, pid_list->pid_max);
+ +      if (pid >= pid_list->pid_max)
+ +              return NULL;
+ +
+ +      /* Return pid + 1 so that zero can be the exit value */
+ +      for (pid++; pid && l < *pos;
+ +           pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
+ +              ;
+ +      return (void *)pid;
+ +}
+ +
+ +/**
+ + * trace_pid_show - show the current pid in seq_file processing
+ + * @m: The seq_file structure to write into
+ + * @v: A void pointer of the pid (+1) value to display
+ + *
+ + * Can be directly used by seq_file operations to display the current
+ + * pid value.
+ + */
+ +int trace_pid_show(struct seq_file *m, void *v)
+ +{
+ +      unsigned long pid = (unsigned long)v - 1;
+ +
+ +      seq_printf(m, "%lu\n", pid);
+ +      return 0;
+ +}
+ +
+ +/* 128 should be much more than enough */
+ +#define PID_BUF_SIZE          127
+ +
+ +int trace_pid_write(struct trace_pid_list *filtered_pids,
+ +                  struct trace_pid_list **new_pid_list,
+ +                  const char __user *ubuf, size_t cnt)
+ +{
+ +      struct trace_pid_list *pid_list;
+ +      struct trace_parser parser;
+ +      unsigned long val;
+ +      int nr_pids = 0;
+ +      ssize_t read = 0;
+ +      ssize_t ret = 0;
+ +      loff_t pos;
+ +      pid_t pid;
+ +
+ +      if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
+ +              return -ENOMEM;
+ +
+ +      /*
+ +       * Always recreate a new array. The write is an all or nothing
+ +       * operation. Always create a new array when adding new pids by
+ +       * the user. If the operation fails, then the current list is
+ +       * not modified.
+ +       */
+ +      pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
+ +      if (!pid_list)
+ +              return -ENOMEM;
+ +
+ +      pid_list->pid_max = READ_ONCE(pid_max);
+ +
+ +      /* Only truncating will shrink pid_max */
+ +      if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
+ +              pid_list->pid_max = filtered_pids->pid_max;
+ +
+ +      pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
+ +      if (!pid_list->pids) {
+ +              kfree(pid_list);
+ +              return -ENOMEM;
+ +      }
+ +
+ +      if (filtered_pids) {
+ +              /* copy the current bits to the new max */
+ +              for_each_set_bit(pid, filtered_pids->pids,
+ +                               filtered_pids->pid_max) {
+ +                      set_bit(pid, pid_list->pids);
+ +                      nr_pids++;
+ +              }
+ +      }
+ +
+ +      while (cnt > 0) {
+ +
+ +              pos = 0;
+ +
+ +              ret = trace_get_user(&parser, ubuf, cnt, &pos);
+ +              if (ret < 0 || !trace_parser_loaded(&parser))
+ +                      break;
+ +
+ +              read += ret;
+ +              ubuf += ret;
+ +              cnt -= ret;
+ +
+ +              parser.buffer[parser.idx] = 0;
+ +
+ +              ret = -EINVAL;
+ +              if (kstrtoul(parser.buffer, 0, &val))
+ +                      break;
+ +              if (val >= pid_list->pid_max)
+ +                      break;
+ +
+ +              pid = (pid_t)val;
+ +
+ +              set_bit(pid, pid_list->pids);
+ +              nr_pids++;
+ +
+ +              trace_parser_clear(&parser);
+ +              ret = 0;
+ +      }
+ +      trace_parser_put(&parser);
+ +
+ +      if (ret < 0) {
+ +              trace_free_pid_list(pid_list);
+ +              return ret;
+ +      }
+ +
+ +      if (!nr_pids) {
+ +              /* Cleared the list of pids */
+ +              trace_free_pid_list(pid_list);
+ +              read = ret;
+ +              pid_list = NULL;
+ +      }
+ +
+ +      *new_pid_list = pid_list;
+ +
+ +      return read;
+ +}
+ +
   static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
   {
         u64 ts;
@@@ -2114,17 -1862,7 +2114,17 @@@ void trace_buffer_unlock_commit_regs(st
   {
         __buffer_unlock_commit(buffer, event);
   
- -      ftrace_trace_stack(tr, buffer, flags, 0, pc, regs);
+ +      /*
+ +       * If regs is not set, then skip the following callers:
+ +       *   trace_buffer_unlock_commit_regs
+ +       *   event_trigger_unlock_commit
+ +       *   trace_event_buffer_commit
+ +       *   trace_event_raw_event_sched_switch
+ +       * Note, we can still get here via blktrace, wakeup tracer
+ +       * and mmiotrace, but that's ok if they lose a function or
+ +       * two. They are that meaningful.
+ +       */
+ +      ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
         ftrace_trace_userstack(buffer, flags, pc);
   }
   
@@@ -2174,13 -1912,6 +2174,13 @@@ static void __ftrace_trace_stack(struc
         trace.nr_entries        = 0;
         trace.skip              = skip;
   
+ +      /*
+ +       * Add two, for this function and the call to save_stack_trace()
+ +       * If regs is set, then these functions will not be in the way.
+ +       */
+ +      if (!regs)
+ +              trace.skip += 2;
+ +
         /*
          * Since events can happen in NMIs there's no safe way to
          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
@@@ -2352,41 -2083,83 +2352,41 @@@ static void __trace_userstack(struct tr
   
   /* created for use with alloc_percpu */
   struct trace_buffer_struct {
- -      char buffer[TRACE_BUF_SIZE];
+ +      int nesting;
+ +      char buffer[4][TRACE_BUF_SIZE];
   };
   
   static struct trace_buffer_struct *trace_percpu_buffer;
- -static struct trace_buffer_struct *trace_percpu_sirq_buffer;
- -static struct trace_buffer_struct *trace_percpu_irq_buffer;
- -static struct trace_buffer_struct *trace_percpu_nmi_buffer;
   
   /*
- - * The buffer used is dependent on the context. There is a per cpu
- - * buffer for normal context, softirq contex, hard irq context and
- - * for NMI context. Thise allows for lockless recording.
- - *
- - * Note, if the buffers failed to be allocated, then this returns NULL
+ + * Thise allows for lockless recording.  If we're nested too deeply, then
+ + * this returns NULL.
    */
   static char *get_trace_buf(void)
   {
- -      struct trace_buffer_struct *percpu_buffer;
- -
- -      /*
- -       * If we have allocated per cpu buffers, then we do not
- -       * need to do any locking.
- -       */
- -      if (in_nmi())
- -              percpu_buffer = trace_percpu_nmi_buffer;
- -      else if (in_irq())
- -              percpu_buffer = trace_percpu_irq_buffer;
- -      else if (in_softirq())
- -              percpu_buffer = trace_percpu_sirq_buffer;
- -      else
- -              percpu_buffer = trace_percpu_buffer;
+ +      struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
   
- -      if (!percpu_buffer)
+ +      if (!buffer || buffer->nesting >= 4)
                 return NULL;
   
- -      return this_cpu_ptr(&percpu_buffer->buffer[0]);
+ +      return &buffer->buffer[buffer->nesting++][0];
+ +}
+ +
+ +static void put_trace_buf(void)
+ +{
+ +      this_cpu_dec(trace_percpu_buffer->nesting);
   }
   
   static int alloc_percpu_trace_buffer(void)
   {
         struct trace_buffer_struct *buffers;
- -      struct trace_buffer_struct *sirq_buffers;
- -      struct trace_buffer_struct *irq_buffers;
- -      struct trace_buffer_struct *nmi_buffers;
   
         buffers = alloc_percpu(struct trace_buffer_struct);
- -      if (!buffers)
- -              goto err_warn;
- -
- -      sirq_buffers = alloc_percpu(struct trace_buffer_struct);
- -      if (!sirq_buffers)
- -              goto err_sirq;
- -
- -      irq_buffers = alloc_percpu(struct trace_buffer_struct);
- -      if (!irq_buffers)
- -              goto err_irq;
- -
- -      nmi_buffers = alloc_percpu(struct trace_buffer_struct);
- -      if (!nmi_buffers)
- -              goto err_nmi;
+ +      if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
+ +              return -ENOMEM;
   
         trace_percpu_buffer = buffers;
- -      trace_percpu_sirq_buffer = sirq_buffers;
- -      trace_percpu_irq_buffer = irq_buffers;
- -      trace_percpu_nmi_buffer = nmi_buffers;
- -
         return 0;
- -
- - err_nmi:
- -      free_percpu(irq_buffers);
- - err_irq:
- -      free_percpu(sirq_buffers);
- - err_sirq:
- -      free_percpu(buffers);
- - err_warn:
- -      WARN(1, "Could not allocate percpu trace_printk buffer");
- -      return -ENOMEM;
   }
   
   static int buffers_allocated;
@@@ -2477,7 -2250,7 +2477,7 @@@ int trace_vbprintk(unsigned long ip, co
         tbuffer = get_trace_buf();
         if (!tbuffer) {
                 len = 0;
- -              goto out;
+ +              goto out_nobuffer;
         }
   
         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
@@@ -2503,9 -2276,6 +2503,9 @@@
         }
   
   out:
+ +      put_trace_buf();
+ +
+ +out_nobuffer:
         preempt_enable_notrace();
         unpause_graph_tracing();
   
@@@ -2537,7 -2307,7 +2537,7 @@@ __trace_array_vprintk(struct ring_buffe
         tbuffer = get_trace_buf();
         if (!tbuffer) {
                 len = 0;
- -              goto out;
+ +              goto out_nobuffer;
         }
   
         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
@@@ -2556,11 -2326,7 +2556,11 @@@
                 __buffer_unlock_commit(buffer, event);
                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
         }
- - out:
+ +
+ +out:
+ +      put_trace_buf();
+ +
+ +out_nobuffer:
         preempt_enable_notrace();
         unpause_graph_tracing();
   
@@@ -5124,19 -4890,20 +5124,20 @@@ tracing_read_pipe(struct file *filp, ch
         struct trace_iterator *iter = filp->private_data;
         ssize_t sret;
   
-       /* return any leftover data */
-       sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
-       if (sret != -EBUSY)
-               return sret;
- 
-       trace_seq_init(&iter->seq);
- 
         /*
          * Avoid more than one consumer on a single file descriptor
          * This is just a matter of traces coherency, the ring buffer itself
          * is protected.
          */
         mutex_lock(&iter->mutex);
+ 
+       /* return any leftover data */
+       sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
+       if (sret != -EBUSY)
+               goto out;
+ 
+       trace_seq_init(&iter->seq);
+ 
         if (iter->trace->read) {
                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
                 if (sret)
@@@ -6163,9 -5930,6 +6164,6 @@@ tracing_buffers_splice_read(struct fil
                 return -EBUSY;
   #endif
   
-       if (splice_grow_spd(pipe, &spd))
-               return -ENOMEM;
- 
         if (*ppos & (PAGE_SIZE - 1))
                 return -EINVAL;
   
@@@ -6175,6 -5939,9 +6173,9 @@@
                 len &= PAGE_MASK;
         }
   
+       if (splice_grow_spd(pipe, &spd))
+               return -ENOMEM;
+ 
    again:
         trace_access_lock(iter->cpu_file);
         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
@@@ -6232,19 -5999,21 +6233,21 @@@
         /* did we read anything? */
         if (!spd.nr_pages) {
                 if (ret)
-                       return ret;
+                       goto out;
   
+               ret = -EAGAIN;
                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
-                       return -EAGAIN;
+                       goto out;
   
                 ret = wait_on_pipe(iter, true);
                 if (ret)
-                       return ret;
+                       goto out;
   
                 goto again;
         }
   
         ret = splice_to_pipe(pipe, &spd);
+ out:
         splice_shrink_spd(&spd);
   
         return ret;
@@@ -7211,7 -6980,6 +7214,7 @@@ init_tracer_tracefs(struct trace_array 
         for_each_tracing_cpu(cpu)
                 tracing_init_tracefs_percpu(tr, cpu);
   
+ +      ftrace_init_tracefs(tr, d_tracer);
   }
   
   static struct vfsmount *trace_automount(void *ingore)
@@@ -7365,7 -7133,6 +7368,7 @@@ static __init int tracer_init_tracefs(v
                 return 0;
   
         init_tracer_tracefs(&global_trace, d_tracer);
+ +      ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
   
         trace_create_file("tracing_thresh", 0644, d_tracer,
                         &global_trace, &tracing_thresh_fops);
author	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 26 Sep 2016 01:40:13 +0000 (18:40 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 26 Sep 2016 01:40:13 +0000 (18:40 -0700)