Merge tag 'trace-v4.3' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux...
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 8 Sep 2015 21:04:14 +0000 (14:04 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 8 Sep 2015 21:04:14 +0000 (14:04 -0700)
Pull tracing update from Steven Rostedt:
 "Mostly this is just clean ups and micro optimizations.

  The changes with more meat are:

   - Allowing the trace event filters to filter on CPU number and
     process ids

   - Two new markers for trace output latency were added (10 and 100
     msec latencies)

   - Have tracing_thresh filter function profiling time

  I also worked on modifying the ring buffer code for some future work,
  and moved the adding of the timestamp around.  One of my changes
  caused a regression, and since other changes were built on top of it
  and already tested, I had to operate a revert of that change.  Instead
  of rebasing, this change set has the code that caused a regression as
  well as the code to revert that change without touching the other
  changes that were made on top of it"

* tag 'trace-v4.3' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace:
  ring-buffer: Revert "ring-buffer: Get timestamp after event is allocated"
  tracing: Don't make assumptions about length of string on task rename
  tracing: Allow triggers to filter for CPU ids and process names
  ftrace: Format MCOUNT_ADDR address as type unsigned long
  tracing: Introduce two additional marks for delay
  ftrace: Fix function_graph duration spacing with 7-digits
  ftrace: add tracing_thresh to function profile
  tracing: Clean up stack tracing and fix fentry updates
  ring-buffer: Reorganize function locations
  ring-buffer: Make sure event has enough room for extend and padding
  ring-buffer: Get timestamp after event is allocated
  ring-buffer: Move the adding of the extended timestamp out of line
  ring-buffer: Add event descriptor to simplify passing data
  ftrace: correct the counter increment for trace_buffer data
  tracing: Fix for non-continuous cpu ids
  tracing: Prefer kcalloc over kzalloc with multiply

16 files changed:
Documentation/trace/ftrace.txt
arch/metag/include/asm/ftrace.h
arch/microblaze/include/asm/ftrace.h
arch/powerpc/include/asm/ftrace.h
arch/sh/include/asm/ftrace.h
arch/sparc/include/asm/ftrace.h
arch/x86/include/asm/ftrace.h
include/trace/events/task.h
kernel/trace/ftrace.c
kernel/trace/ring_buffer.c
kernel/trace/trace.c
kernel/trace/trace_events.c
kernel/trace/trace_events_filter.c
kernel/trace/trace_functions_graph.c
kernel/trace/trace_output.c
kernel/trace/trace_stack.c

index 87bb4aa..ef621d3 100644 (file)
@@ -691,6 +691,8 @@ The above is mostly meaningful for kernel developers.
         The marks are determined by the difference between this
         current trace and the next trace.
          '$' - greater than 1 second
+         '@' - greater than 100 milisecond
+         '*' - greater than 10 milisecond
          '#' - greater than 1000 microsecond
          '!' - greater than 100 microsecond
          '+' - greater than 10 microsecond
@@ -1944,26 +1946,49 @@ want, depending on your needs.
 
   ie:
 
-  0)               |    up_write() {
-  0)   0.646 us    |      _spin_lock_irqsave();
-  0)   0.684 us    |      _spin_unlock_irqrestore();
-  0)   3.123 us    |    }
-  0)   0.548 us    |    fput();
-  0) + 58.628 us   |  }
+  3) # 1837.709 us |          } /* __switch_to */
+  3)               |          finish_task_switch() {
+  3)   0.313 us    |            _raw_spin_unlock_irq();
+  3)   3.177 us    |          }
+  3) # 1889.063 us |        } /* __schedule */
+  3) ! 140.417 us  |      } /* __schedule */
+  3) # 2034.948 us |    } /* schedule */
+  3) * 33998.59 us |  } /* schedule_preempt_disabled */
 
   [...]
 
-  0)               |      putname() {
-  0)               |        kmem_cache_free() {
-  0)   0.518 us    |          __phys_addr();
-  0)   1.757 us    |        }
-  0)   2.861 us    |      }
-  0) ! 115.305 us  |    }
-  0) ! 116.402 us  |  }
+  1)   0.260 us    |              msecs_to_jiffies();
+  1)   0.313 us    |              __rcu_read_unlock();
+  1) + 61.770 us   |            }
+  1) + 64.479 us   |          }
+  1)   0.313 us    |          rcu_bh_qs();
+  1)   0.313 us    |          __local_bh_enable();
+  1) ! 217.240 us  |        }
+  1)   0.365 us    |        idle_cpu();
+  1)               |        rcu_irq_exit() {
+  1)   0.417 us    |          rcu_eqs_enter_common.isra.47();
+  1)   3.125 us    |        }
+  1) ! 227.812 us  |      }
+  1) ! 457.395 us  |    }
+  1) @ 119760.2 us |  }
+
+  [...]
+
+  2)               |    handle_IPI() {
+  1)   6.979 us    |                  }
+  2)   0.417 us    |      scheduler_ipi();
+  1)   9.791 us    |                }
+  1) + 12.917 us   |              }
+  2)   3.490 us    |    }
+  1) + 15.729 us   |            }
+  1) + 18.542 us   |          }
+  2) $ 3594274 us  |  }
 
   + means that the function exceeded 10 usecs.
   ! means that the function exceeded 100 usecs.
   # means that the function exceeded 1000 usecs.
+  * means that the function exceeded 10 msecs.
+  @ means that the function exceeded 100 msecs.
   $ means that the function exceeded 1 sec.
 
 
index 2901f0f..a2269d6 100644 (file)
@@ -6,7 +6,7 @@
 
 #ifndef __ASSEMBLY__
 extern void mcount_wrapper(void);
-#define MCOUNT_ADDR            ((long)(mcount_wrapper))
+#define MCOUNT_ADDR            ((unsigned long)(mcount_wrapper))
 
 static inline unsigned long ftrace_call_adjust(unsigned long addr)
 {
index fd2fa2e..da0144f 100644 (file)
@@ -3,7 +3,7 @@
 
 #ifdef CONFIG_FUNCTION_TRACER
 
-#define MCOUNT_ADDR            ((long)(_mcount))
+#define MCOUNT_ADDR            ((unsigned long)(_mcount))
 #define MCOUNT_INSN_SIZE       8 /* sizeof mcount call */
 
 #ifndef __ASSEMBLY__
index e366187..ef89b14 100644 (file)
@@ -2,7 +2,7 @@
 #define _ASM_POWERPC_FTRACE
 
 #ifdef CONFIG_FUNCTION_TRACER
-#define MCOUNT_ADDR            ((long)(_mcount))
+#define MCOUNT_ADDR            ((unsigned long)(_mcount))
 #define MCOUNT_INSN_SIZE       4 /* sizeof mcount call */
 
 #ifdef __ASSEMBLY__
index e79fb6e..1f157b8 100644 (file)
@@ -9,7 +9,7 @@
 #ifndef __ASSEMBLY__
 extern void mcount(void);
 
-#define MCOUNT_ADDR            ((long)(mcount))
+#define MCOUNT_ADDR            ((unsigned long)(mcount))
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 #define CALL_ADDR              ((long)(ftrace_call))
index 9ec94ad..3192a8e 100644 (file)
@@ -2,7 +2,7 @@
 #define _ASM_SPARC64_FTRACE
 
 #ifdef CONFIG_MCOUNT
-#define MCOUNT_ADDR            ((long)(_mcount))
+#define MCOUNT_ADDR            ((unsigned long)(_mcount))
 #define MCOUNT_INSN_SIZE       4 /* sizeof mcount call */
 
 #ifndef __ASSEMBLY__
index f45acad..2493885 100644 (file)
@@ -3,9 +3,9 @@
 
 #ifdef CONFIG_FUNCTION_TRACER
 #ifdef CC_USING_FENTRY
-# define MCOUNT_ADDR           ((long)(__fentry__))
+# define MCOUNT_ADDR           ((unsigned long)(__fentry__))
 #else
-# define MCOUNT_ADDR           ((long)(mcount))
+# define MCOUNT_ADDR           ((unsigned long)(mcount))
 #endif
 #define MCOUNT_INSN_SIZE       5 /* sizeof mcount call */
 
index dee3bb1..2cca6cd 100644 (file)
@@ -46,7 +46,7 @@ TRACE_EVENT(task_rename,
        TP_fast_assign(
                __entry->pid = task->pid;
                memcpy(entry->oldcomm, task->comm, TASK_COMM_LEN);
-               memcpy(entry->newcomm, comm, TASK_COMM_LEN);
+               strlcpy(entry->newcomm, comm, TASK_COMM_LEN);
                __entry->oom_score_adj = task->signal->oom_score_adj;
        ),
 
index eb11011..b0623ac 100644 (file)
@@ -630,13 +630,18 @@ static int function_stat_show(struct seq_file *m, void *v)
                goto out;
        }
 
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+       avg = rec->time;
+       do_div(avg, rec->counter);
+       if (tracing_thresh && (avg < tracing_thresh))
+               goto out;
+#endif
+
        kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
        seq_printf(m, "  %-30.30s  %10lu", str, rec->counter);
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
        seq_puts(m, "    ");
-       avg = rec->time;
-       do_div(avg, rec->counter);
 
        /* Sample standard deviation (s^2) */
        if (rec->counter <= 1)
index 6260717..fc347f8 100644 (file)
@@ -399,6 +399,17 @@ struct rb_irq_work {
        bool                            wakeup_full;
 };
 
+/*
+ * Structure to hold event state and handle nested events.
+ */
+struct rb_event_info {
+       u64                     ts;
+       u64                     delta;
+       unsigned long           length;
+       struct buffer_page      *tail_page;
+       int                     add_timestamp;
+};
+
 /*
  * Used for which event context the event is in.
  *  NMI     = 0
@@ -1876,73 +1887,6 @@ rb_event_index(struct ring_buffer_event *event)
        return (addr & ~PAGE_MASK) - BUF_PAGE_HDR_SIZE;
 }
 
-static inline int
-rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
-                  struct ring_buffer_event *event)
-{
-       unsigned long addr = (unsigned long)event;
-       unsigned long index;
-
-       index = rb_event_index(event);
-       addr &= PAGE_MASK;
-
-       return cpu_buffer->commit_page->page == (void *)addr &&
-               rb_commit_index(cpu_buffer) == index;
-}
-
-static void
-rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
-{
-       unsigned long max_count;
-
-       /*
-        * We only race with interrupts and NMIs on this CPU.
-        * If we own the commit event, then we can commit
-        * all others that interrupted us, since the interruptions
-        * are in stack format (they finish before they come
-        * back to us). This allows us to do a simple loop to
-        * assign the commit to the tail.
-        */
- again:
-       max_count = cpu_buffer->nr_pages * 100;
-
-       while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
-               if (RB_WARN_ON(cpu_buffer, !(--max_count)))
-                       return;
-               if (RB_WARN_ON(cpu_buffer,
-                              rb_is_reader_page(cpu_buffer->tail_page)))
-                       return;
-               local_set(&cpu_buffer->commit_page->page->commit,
-                         rb_page_write(cpu_buffer->commit_page));
-               rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
-               cpu_buffer->write_stamp =
-                       cpu_buffer->commit_page->page->time_stamp;
-               /* add barrier to keep gcc from optimizing too much */
-               barrier();
-       }
-       while (rb_commit_index(cpu_buffer) !=
-              rb_page_write(cpu_buffer->commit_page)) {
-
-               local_set(&cpu_buffer->commit_page->page->commit,
-                         rb_page_write(cpu_buffer->commit_page));
-               RB_WARN_ON(cpu_buffer,
-                          local_read(&cpu_buffer->commit_page->page->commit) &
-                          ~RB_WRITE_MASK);
-               barrier();
-       }
-
-       /* again, keep gcc from optimizing */
-       barrier();
-
-       /*
-        * If an interrupt came in just after the first while loop
-        * and pushed the tail page forward, we will be left with
-        * a dangling commit that will never go forward.
-        */
-       if (unlikely(cpu_buffer->commit_page != cpu_buffer->tail_page))
-               goto again;
-}
-
 static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
 {
        cpu_buffer->read_stamp = cpu_buffer->reader_page->page->time_stamp;
@@ -1968,64 +1912,6 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
        iter->head = 0;
 }
 
-/* Slow path, do not inline */
-static noinline struct ring_buffer_event *
-rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
-{
-       event->type_len = RINGBUF_TYPE_TIME_EXTEND;
-
-       /* Not the first event on the page? */
-       if (rb_event_index(event)) {
-               event->time_delta = delta & TS_MASK;
-               event->array[0] = delta >> TS_SHIFT;
-       } else {
-               /* nope, just zero it */
-               event->time_delta = 0;
-               event->array[0] = 0;
-       }
-
-       return skip_time_extend(event);
-}
-
-/**
- * rb_update_event - update event type and data
- * @event: the event to update
- * @type: the type of event
- * @length: the size of the event field in the ring buffer
- *
- * Update the type and data fields of the event. The length
- * is the actual size that is written to the ring buffer,
- * and with this, we can determine what to place into the
- * data field.
- */
-static void
-rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
-               struct ring_buffer_event *event, unsigned length,
-               int add_timestamp, u64 delta)
-{
-       /* Only a commit updates the timestamp */
-       if (unlikely(!rb_event_is_commit(cpu_buffer, event)))
-               delta = 0;
-
-       /*
-        * If we need to add a timestamp, then we
-        * add it to the start of the resevered space.
-        */
-       if (unlikely(add_timestamp)) {
-               event = rb_add_time_stamp(event, delta);
-               length -= RB_LEN_TIME_EXTEND;
-               delta = 0;
-       }
-
-       event->time_delta = delta;
-       length -= RB_EVNT_HDR_SIZE;
-       if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
-               event->type_len = 0;
-               event->array[0] = length;
-       } else
-               event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
-}
-
 /*
  * rb_handle_head_page - writer hit the head page
  *
@@ -2184,29 +2070,13 @@ rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer,
        return 0;
 }
 
-static unsigned rb_calculate_event_length(unsigned length)
-{
-       struct ring_buffer_event event; /* Used only for sizeof array */
-
-       /* zero length can cause confusions */
-       if (!length)
-               length++;
-
-       if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
-               length += sizeof(event.array[0]);
-
-       length += RB_EVNT_HDR_SIZE;
-       length = ALIGN(length, RB_ARCH_ALIGNMENT);
-
-       return length;
-}
-
 static inline void
 rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
-             struct buffer_page *tail_page,
-             unsigned long tail, unsigned long length)
+             unsigned long tail, struct rb_event_info *info)
 {
+       struct buffer_page *tail_page = info->tail_page;
        struct ring_buffer_event *event;
+       unsigned long length = info->length;
 
        /*
         * Only the event that crossed the page boundary
@@ -2276,13 +2146,14 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
  */
 static noinline struct ring_buffer_event *
 rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
-            unsigned long length, unsigned long tail,
-            struct buffer_page *tail_page, u64 ts)
+            unsigned long tail, struct rb_event_info *info)
 {
+       struct buffer_page *tail_page = info->tail_page;
        struct buffer_page *commit_page = cpu_buffer->commit_page;
        struct ring_buffer *buffer = cpu_buffer->buffer;
        struct buffer_page *next_page;
        int ret;
+       u64 ts;
 
        next_page = tail_page;
 
@@ -2368,74 +2239,120 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
 
  out_again:
 
-       rb_reset_tail(cpu_buffer, tail_page, tail, length);
+       rb_reset_tail(cpu_buffer, tail, info);
 
        /* fail and let the caller try again */
        return ERR_PTR(-EAGAIN);
 
  out_reset:
        /* reset write */
-       rb_reset_tail(cpu_buffer, tail_page, tail, length);
+       rb_reset_tail(cpu_buffer, tail, info);
 
        return NULL;
 }
 
-static struct ring_buffer_event *
-__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
-                 unsigned long length, u64 ts,
-                 u64 delta, int add_timestamp)
+/* Slow path, do not inline */
+static noinline struct ring_buffer_event *
+rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
 {
-       struct buffer_page *tail_page;
-       struct ring_buffer_event *event;
-       unsigned long tail, write;
+       event->type_len = RINGBUF_TYPE_TIME_EXTEND;
 
-       /*
-        * If the time delta since the last event is too big to
-        * hold in the time field of the event, then we append a
-        * TIME EXTEND event ahead of the data event.
-        */
-       if (unlikely(add_timestamp))
-               length += RB_LEN_TIME_EXTEND;
+       /* Not the first event on the page? */
+       if (rb_event_index(event)) {
+               event->time_delta = delta & TS_MASK;
+               event->array[0] = delta >> TS_SHIFT;
+       } else {
+               /* nope, just zero it */
+               event->time_delta = 0;
+               event->array[0] = 0;
+       }
+
+       return skip_time_extend(event);
+}
 
-       tail_page = cpu_buffer->tail_page;
-       write = local_add_return(length, &tail_page->write);
+static inline int rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
+                                    struct ring_buffer_event *event);
 
-       /* set write to only the index of the write */
-       write &= RB_WRITE_MASK;
-       tail = write - length;
+/**
+ * rb_update_event - update event type and data
+ * @event: the event to update
+ * @type: the type of event
+ * @length: the size of the event field in the ring buffer
+ *
+ * Update the type and data fields of the event. The length
+ * is the actual size that is written to the ring buffer,
+ * and with this, we can determine what to place into the
+ * data field.
+ */
+static void
+rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
+               struct ring_buffer_event *event,
+               struct rb_event_info *info)
+{
+       unsigned length = info->length;
+       u64 delta = info->delta;
+
+       /* Only a commit updates the timestamp */
+       if (unlikely(!rb_event_is_commit(cpu_buffer, event)))
+               delta = 0;
 
        /*
-        * If this is the first commit on the page, then it has the same
-        * timestamp as the page itself.
+        * If we need to add a timestamp, then we
+        * add it to the start of the resevered space.
         */
-       if (!tail)
+       if (unlikely(info->add_timestamp)) {
+               event = rb_add_time_stamp(event, delta);
+               length -= RB_LEN_TIME_EXTEND;
                delta = 0;
+       }
 
-       /* See if we shot pass the end of this buffer page */
-       if (unlikely(write > BUF_PAGE_SIZE))
-               return rb_move_tail(cpu_buffer, length, tail,
-                                   tail_page, ts);
+       event->time_delta = delta;
+       length -= RB_EVNT_HDR_SIZE;
+       if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
+               event->type_len = 0;
+               event->array[0] = length;
+       } else
+               event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
+}
 
-       /* We reserved something on the buffer */
+static unsigned rb_calculate_event_length(unsigned length)
+{
+       struct ring_buffer_event event; /* Used only for sizeof array */
 
-       event = __rb_page_index(tail_page, tail);
-       kmemcheck_annotate_bitfield(event, bitfield);
-       rb_update_event(cpu_buffer, event, length, add_timestamp, delta);
+       /* zero length can cause confusions */
+       if (!length)
+               length++;
 
-       local_inc(&tail_page->entries);
+       if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
+               length += sizeof(event.array[0]);
+
+       length += RB_EVNT_HDR_SIZE;
+       length = ALIGN(length, RB_ARCH_ALIGNMENT);
 
        /*
-        * If this is the first commit on the page, then update
-        * its timestamp.
+        * In case the time delta is larger than the 27 bits for it
+        * in the header, we need to add a timestamp. If another
+        * event comes in when trying to discard this one to increase
+        * the length, then the timestamp will be added in the allocated
+        * space of this event. If length is bigger than the size needed
+        * for the TIME_EXTEND, then padding has to be used. The events
+        * length must be either RB_LEN_TIME_EXTEND, or greater than or equal
+        * to RB_LEN_TIME_EXTEND + 8, as 8 is the minimum size for padding.
+        * As length is a multiple of 4, we only need to worry if it
+        * is 12 (RB_LEN_TIME_EXTEND + 4).
         */
-       if (!tail)
-               tail_page->page->time_stamp = ts;
+       if (length == RB_LEN_TIME_EXTEND + RB_ALIGNMENT)
+               length += RB_ALIGNMENT;
 
-       /* account for these added bytes */
-       local_add(length, &cpu_buffer->entries_bytes);
+       return length;
+}
 
-       return event;
+#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
+static inline bool sched_clock_stable(void)
+{
+       return true;
 }
+#endif
 
 static inline int
 rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
@@ -2483,22 +2400,75 @@ static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)
        local_inc(&cpu_buffer->commits);
 }
 
-static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
+static void
+rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
 {
-       unsigned long commits;
-
-       if (RB_WARN_ON(cpu_buffer,
-                      !local_read(&cpu_buffer->committing)))
-               return;
+       unsigned long max_count;
 
+       /*
+        * We only race with interrupts and NMIs on this CPU.
+        * If we own the commit event, then we can commit
+        * all others that interrupted us, since the interruptions
+        * are in stack format (they finish before they come
+        * back to us). This allows us to do a simple loop to
+        * assign the commit to the tail.
+        */
  again:
-       commits = local_read(&cpu_buffer->commits);
-       /* synchronize with interrupts */
-       barrier();
-       if (local_read(&cpu_buffer->committing) == 1)
-               rb_set_commit_to_write(cpu_buffer);
-
-       local_dec(&cpu_buffer->committing);
+       max_count = cpu_buffer->nr_pages * 100;
+
+       while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
+               if (RB_WARN_ON(cpu_buffer, !(--max_count)))
+                       return;
+               if (RB_WARN_ON(cpu_buffer,
+                              rb_is_reader_page(cpu_buffer->tail_page)))
+                       return;
+               local_set(&cpu_buffer->commit_page->page->commit,
+                         rb_page_write(cpu_buffer->commit_page));
+               rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
+               cpu_buffer->write_stamp =
+                       cpu_buffer->commit_page->page->time_stamp;
+               /* add barrier to keep gcc from optimizing too much */
+               barrier();
+       }
+       while (rb_commit_index(cpu_buffer) !=
+              rb_page_write(cpu_buffer->commit_page)) {
+
+               local_set(&cpu_buffer->commit_page->page->commit,
+                         rb_page_write(cpu_buffer->commit_page));
+               RB_WARN_ON(cpu_buffer,
+                          local_read(&cpu_buffer->commit_page->page->commit) &
+                          ~RB_WRITE_MASK);
+               barrier();
+       }
+
+       /* again, keep gcc from optimizing */
+       barrier();
+
+       /*
+        * If an interrupt came in just after the first while loop
+        * and pushed the tail page forward, we will be left with
+        * a dangling commit that will never go forward.
+        */
+       if (unlikely(cpu_buffer->commit_page != cpu_buffer->tail_page))
+               goto again;
+}
+
+static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
+{
+       unsigned long commits;
+
+       if (RB_WARN_ON(cpu_buffer,
+                      !local_read(&cpu_buffer->committing)))
+               return;
+
+ again:
+       commits = local_read(&cpu_buffer->commits);
+       /* synchronize with interrupts */
+       barrier();
+       if (local_read(&cpu_buffer->committing) == 1)
+               rb_set_commit_to_write(cpu_buffer);
+
+       local_dec(&cpu_buffer->committing);
 
        /* synchronize with interrupts */
        barrier();
@@ -2515,91 +2485,94 @@ static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
        }
 }
 
-static struct ring_buffer_event *
-rb_reserve_next_event(struct ring_buffer *buffer,
-                     struct ring_buffer_per_cpu *cpu_buffer,
-                     unsigned long length)
+static inline void rb_event_discard(struct ring_buffer_event *event)
 {
-       struct ring_buffer_event *event;
-       u64 ts, delta;
-       int nr_loops = 0;
-       int add_timestamp;
-       u64 diff;
+       if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
+               event = skip_time_extend(event);
 
-       rb_start_commit(cpu_buffer);
+       /* array[0] holds the actual length for the discarded event */
+       event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
+       event->type_len = RINGBUF_TYPE_PADDING;
+       /* time delta must be non zero */
+       if (!event->time_delta)
+               event->time_delta = 1;
+}
 
-#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
-       /*
-        * Due to the ability to swap a cpu buffer from a buffer
-        * it is possible it was swapped before we committed.
-        * (committing stops a swap). We check for it here and
-        * if it happened, we have to fail the write.
-        */
-       barrier();
-       if (unlikely(ACCESS_ONCE(cpu_buffer->buffer) != buffer)) {
-               local_dec(&cpu_buffer->committing);
-               local_dec(&cpu_buffer->commits);
-               return NULL;
-       }
-#endif
+static inline int
+rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
+                  struct ring_buffer_event *event)
+{
+       unsigned long addr = (unsigned long)event;
+       unsigned long index;
 
-       length = rb_calculate_event_length(length);
- again:
-       add_timestamp = 0;
-       delta = 0;
+       index = rb_event_index(event);
+       addr &= PAGE_MASK;
+
+       return cpu_buffer->commit_page->page == (void *)addr &&
+               rb_commit_index(cpu_buffer) == index;
+}
+
+static void
+rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
+                     struct ring_buffer_event *event)
+{
+       u64 delta;
 
        /*
-        * We allow for interrupts to reenter here and do a trace.
-        * If one does, it will cause this original code to loop
-        * back here. Even with heavy interrupts happening, this
-        * should only happen a few times in a row. If this happens
-        * 1000 times in a row, there must be either an interrupt
-        * storm or we have something buggy.
-        * Bail!
+        * The event first in the commit queue updates the
+        * time stamp.
         */
-       if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
-               goto out_fail;
+       if (rb_event_is_commit(cpu_buffer, event)) {
+               /*
+                * A commit event that is first on a page
+                * updates the write timestamp with the page stamp
+                */
+               if (!rb_event_index(event))
+                       cpu_buffer->write_stamp =
+                               cpu_buffer->commit_page->page->time_stamp;
+               else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
+                       delta = event->array[0];
+                       delta <<= TS_SHIFT;
+                       delta += event->time_delta;
+                       cpu_buffer->write_stamp += delta;
+               } else
+                       cpu_buffer->write_stamp += event->time_delta;
+       }
+}
 
-       ts = rb_time_stamp(cpu_buffer->buffer);
-       diff = ts - cpu_buffer->write_stamp;
+static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
+                     struct ring_buffer_event *event)
+{
+       local_inc(&cpu_buffer->entries);
+       rb_update_write_stamp(cpu_buffer, event);
+       rb_end_commit(cpu_buffer);
+}
 
-       /* make sure this diff is calculated here */
-       barrier();
+static __always_inline void
+rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
+{
+       bool pagebusy;
 
-       /* Did the write stamp get updated already? */
-       if (likely(ts >= cpu_buffer->write_stamp)) {
-               delta = diff;
-               if (unlikely(test_time_stamp(delta))) {
-                       int local_clock_stable = 1;
-#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
-                       local_clock_stable = sched_clock_stable();
-#endif
-                       WARN_ONCE(delta > (1ULL << 59),
-                                 KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s",
-                                 (unsigned long long)delta,
-                                 (unsigned long long)ts,
-                                 (unsigned long long)cpu_buffer->write_stamp,
-                                 local_clock_stable ? "" :
-                                 "If you just came from a suspend/resume,\n"
-                                 "please switch to the trace global clock:\n"
-                                 "  echo global > /sys/kernel/debug/tracing/trace_clock\n");
-                       add_timestamp = 1;
-               }
+       if (buffer->irq_work.waiters_pending) {
+               buffer->irq_work.waiters_pending = false;
+               /* irq_work_queue() supplies it's own memory barriers */
+               irq_work_queue(&buffer->irq_work.work);
        }
 
-       event = __rb_reserve_next(cpu_buffer, length, ts,
-                                 delta, add_timestamp);
-       if (unlikely(PTR_ERR(event) == -EAGAIN))
-               goto again;
-
-       if (!event)
-               goto out_fail;
+       if (cpu_buffer->irq_work.waiters_pending) {
+               cpu_buffer->irq_work.waiters_pending = false;
+               /* irq_work_queue() supplies it's own memory barriers */
+               irq_work_queue(&cpu_buffer->irq_work.work);
+       }
 
-       return event;
+       pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
 
- out_fail:
-       rb_end_commit(cpu_buffer);
-       return NULL;
+       if (!pagebusy && cpu_buffer->irq_work.full_waiters_pending) {
+               cpu_buffer->irq_work.wakeup_full = true;
+               cpu_buffer->irq_work.full_waiters_pending = false;
+               /* irq_work_queue() supplies it's own memory barriers */
+               irq_work_queue(&cpu_buffer->irq_work.work);
+       }
 }
 
 /*
@@ -2671,6 +2644,178 @@ trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer)
        cpu_buffer->current_context &= cpu_buffer->current_context - 1;
 }
 
+/**
+ * ring_buffer_unlock_commit - commit a reserved
+ * @buffer: The buffer to commit to
+ * @event: The event pointer to commit.
+ *
+ * This commits the data to the ring buffer, and releases any locks held.
+ *
+ * Must be paired with ring_buffer_lock_reserve.
+ */
+int ring_buffer_unlock_commit(struct ring_buffer *buffer,
+                             struct ring_buffer_event *event)
+{
+       struct ring_buffer_per_cpu *cpu_buffer;
+       int cpu = raw_smp_processor_id();
+
+       cpu_buffer = buffer->buffers[cpu];
+
+       rb_commit(cpu_buffer, event);
+
+       rb_wakeups(buffer, cpu_buffer);
+
+       trace_recursive_unlock(cpu_buffer);
+
+       preempt_enable_notrace();
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
+
+static noinline void
+rb_handle_timestamp(struct ring_buffer_per_cpu *cpu_buffer,
+                   struct rb_event_info *info)
+{
+       WARN_ONCE(info->delta > (1ULL << 59),
+                 KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s",
+                 (unsigned long long)info->delta,
+                 (unsigned long long)info->ts,
+                 (unsigned long long)cpu_buffer->write_stamp,
+                 sched_clock_stable() ? "" :
+                 "If you just came from a suspend/resume,\n"
+                 "please switch to the trace global clock:\n"
+                 "  echo global > /sys/kernel/debug/tracing/trace_clock\n");
+       info->add_timestamp = 1;
+}
+
+static struct ring_buffer_event *
+__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
+                 struct rb_event_info *info)
+{
+       struct ring_buffer_event *event;
+       struct buffer_page *tail_page;
+       unsigned long tail, write;
+
+       /*
+        * If the time delta since the last event is too big to
+        * hold in the time field of the event, then we append a
+        * TIME EXTEND event ahead of the data event.
+        */
+       if (unlikely(info->add_timestamp))
+               info->length += RB_LEN_TIME_EXTEND;
+
+       tail_page = info->tail_page = cpu_buffer->tail_page;
+       write = local_add_return(info->length, &tail_page->write);
+
+       /* set write to only the index of the write */
+       write &= RB_WRITE_MASK;
+       tail = write - info->length;
+
+       /*
+        * If this is the first commit on the page, then it has the same
+        * timestamp as the page itself.
+        */
+       if (!tail)
+               info->delta = 0;
+
+       /* See if we shot pass the end of this buffer page */
+       if (unlikely(write > BUF_PAGE_SIZE))
+               return rb_move_tail(cpu_buffer, tail, info);
+
+       /* We reserved something on the buffer */
+
+       event = __rb_page_index(tail_page, tail);
+       kmemcheck_annotate_bitfield(event, bitfield);
+       rb_update_event(cpu_buffer, event, info);
+
+       local_inc(&tail_page->entries);
+
+       /*
+        * If this is the first commit on the page, then update
+        * its timestamp.
+        */
+       if (!tail)
+               tail_page->page->time_stamp = info->ts;
+
+       /* account for these added bytes */
+       local_add(info->length, &cpu_buffer->entries_bytes);
+
+       return event;
+}
+
+static struct ring_buffer_event *
+rb_reserve_next_event(struct ring_buffer *buffer,
+                     struct ring_buffer_per_cpu *cpu_buffer,
+                     unsigned long length)
+{
+       struct ring_buffer_event *event;
+       struct rb_event_info info;
+       int nr_loops = 0;
+       u64 diff;
+
+       rb_start_commit(cpu_buffer);
+
+#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
+       /*
+        * Due to the ability to swap a cpu buffer from a buffer
+        * it is possible it was swapped before we committed.
+        * (committing stops a swap). We check for it here and
+        * if it happened, we have to fail the write.
+        */
+       barrier();
+       if (unlikely(ACCESS_ONCE(cpu_buffer->buffer) != buffer)) {
+               local_dec(&cpu_buffer->committing);
+               local_dec(&cpu_buffer->commits);
+               return NULL;
+       }
+#endif
+
+       info.length = rb_calculate_event_length(length);
+ again:
+       info.add_timestamp = 0;
+       info.delta = 0;
+
+       /*
+        * We allow for interrupts to reenter here and do a trace.
+        * If one does, it will cause this original code to loop
+        * back here. Even with heavy interrupts happening, this
+        * should only happen a few times in a row. If this happens
+        * 1000 times in a row, there must be either an interrupt
+        * storm or we have something buggy.
+        * Bail!
+        */
+       if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
+               goto out_fail;
+
+       info.ts = rb_time_stamp(cpu_buffer->buffer);
+       diff = info.ts - cpu_buffer->write_stamp;
+
+       /* make sure this diff is calculated here */
+       barrier();
+
+       /* Did the write stamp get updated already? */
+       if (likely(info.ts >= cpu_buffer->write_stamp)) {
+               info.delta = diff;
+               if (unlikely(test_time_stamp(info.delta)))
+                       rb_handle_timestamp(cpu_buffer, &info);
+       }
+
+       event = __rb_reserve_next(cpu_buffer, &info);
+
+       if (unlikely(PTR_ERR(event) == -EAGAIN))
+               goto again;
+
+       if (!event)
+               goto out_fail;
+
+       return event;
+
+ out_fail:
+       rb_end_commit(cpu_buffer);
+       return NULL;
+}
+
 /**
  * ring_buffer_lock_reserve - reserve a part of the buffer
  * @buffer: the ring buffer to reserve from
@@ -2729,111 +2874,6 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
 }
 EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
 
-static void
-rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
-                     struct ring_buffer_event *event)
-{
-       u64 delta;
-
-       /*
-        * The event first in the commit queue updates the
-        * time stamp.
-        */
-       if (rb_event_is_commit(cpu_buffer, event)) {
-               /*
-                * A commit event that is first on a page
-                * updates the write timestamp with the page stamp
-                */
-               if (!rb_event_index(event))
-                       cpu_buffer->write_stamp =
-                               cpu_buffer->commit_page->page->time_stamp;
-               else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
-                       delta = event->array[0];
-                       delta <<= TS_SHIFT;
-                       delta += event->time_delta;
-                       cpu_buffer->write_stamp += delta;
-               } else
-                       cpu_buffer->write_stamp += event->time_delta;
-       }
-}
-
-static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
-                     struct ring_buffer_event *event)
-{
-       local_inc(&cpu_buffer->entries);
-       rb_update_write_stamp(cpu_buffer, event);
-       rb_end_commit(cpu_buffer);
-}
-
-static __always_inline void
-rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
-{
-       bool pagebusy;
-
-       if (buffer->irq_work.waiters_pending) {
-               buffer->irq_work.waiters_pending = false;
-               /* irq_work_queue() supplies it's own memory barriers */
-               irq_work_queue(&buffer->irq_work.work);
-       }
-
-       if (cpu_buffer->irq_work.waiters_pending) {
-               cpu_buffer->irq_work.waiters_pending = false;
-               /* irq_work_queue() supplies it's own memory barriers */
-               irq_work_queue(&cpu_buffer->irq_work.work);
-       }
-
-       pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
-
-       if (!pagebusy && cpu_buffer->irq_work.full_waiters_pending) {
-               cpu_buffer->irq_work.wakeup_full = true;
-               cpu_buffer->irq_work.full_waiters_pending = false;
-               /* irq_work_queue() supplies it's own memory barriers */
-               irq_work_queue(&cpu_buffer->irq_work.work);
-       }
-}
-
-/**
- * ring_buffer_unlock_commit - commit a reserved
- * @buffer: The buffer to commit to
- * @event: The event pointer to commit.
- *
- * This commits the data to the ring buffer, and releases any locks held.
- *
- * Must be paired with ring_buffer_lock_reserve.
- */
-int ring_buffer_unlock_commit(struct ring_buffer *buffer,
-                             struct ring_buffer_event *event)
-{
-       struct ring_buffer_per_cpu *cpu_buffer;
-       int cpu = raw_smp_processor_id();
-
-       cpu_buffer = buffer->buffers[cpu];
-
-       rb_commit(cpu_buffer, event);
-
-       rb_wakeups(buffer, cpu_buffer);
-
-       trace_recursive_unlock(cpu_buffer);
-
-       preempt_enable_notrace();
-
-       return 0;
-}
-EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
-
-static inline void rb_event_discard(struct ring_buffer_event *event)
-{
-       if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
-               event = skip_time_extend(event);
-
-       /* array[0] holds the actual length for the discarded event */
-       event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
-       event->type_len = RINGBUF_TYPE_PADDING;
-       /* time delta must be non zero */
-       if (!event->time_delta)
-               event->time_delta = 1;
-}
-
 /*
  * Decrement the entries to the page that an event is on.
  * The event does not even need to exist, only the pointer
index abcbf7f..6e79408 100644 (file)
@@ -3035,7 +3035,7 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot)
        if (!iter)
                return ERR_PTR(-ENOMEM);
 
-       iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(),
+       iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
                                    GFP_KERNEL);
        if (!iter->buffer_iter)
                goto release;
@@ -6990,7 +6990,7 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
        trace_init_global_iter(&iter);
 
        for_each_tracing_cpu(cpu) {
-               atomic_inc(&per_cpu_ptr(iter.tr->trace_buffer.data, cpu)->disabled);
+               atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
        }
 
        old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
index 404a372..7ca09cd 100644 (file)
@@ -30,6 +30,7 @@
 DEFINE_MUTEX(event_mutex);
 
 LIST_HEAD(ftrace_events);
+static LIST_HEAD(ftrace_generic_fields);
 static LIST_HEAD(ftrace_common_fields);
 
 #define GFP_TRACE (GFP_KERNEL | __GFP_ZERO)
@@ -94,6 +95,10 @@ trace_find_event_field(struct trace_event_call *call, char *name)
        struct ftrace_event_field *field;
        struct list_head *head;
 
+       field = __find_event_field(&ftrace_generic_fields, name);
+       if (field)
+               return field;
+
        field = __find_event_field(&ftrace_common_fields, name);
        if (field)
                return field;
@@ -144,6 +149,13 @@ int trace_define_field(struct trace_event_call *call, const char *type,
 }
 EXPORT_SYMBOL_GPL(trace_define_field);
 
+#define __generic_field(type, item, filter_type)                       \
+       ret = __trace_define_field(&ftrace_generic_fields, #type,       \
+                                  #item, 0, 0, is_signed_type(type),   \
+                                  filter_type);                        \
+       if (ret)                                                        \
+               return ret;
+
 #define __common_field(type, item)                                     \
        ret = __trace_define_field(&ftrace_common_fields, #type,        \
                                   "common_" #item,                     \
@@ -153,6 +165,16 @@ EXPORT_SYMBOL_GPL(trace_define_field);
        if (ret)                                                        \
                return ret;
 
+static int trace_define_generic_fields(void)
+{
+       int ret;
+
+       __generic_field(int, cpu, FILTER_OTHER);
+       __generic_field(char *, comm, FILTER_PTR_STRING);
+
+       return ret;
+}
+
 static int trace_define_common_fields(void)
 {
        int ret;
@@ -2671,6 +2693,9 @@ static __init int event_trace_init(void)
        if (!entry)
                pr_warn("Could not create tracefs 'available_events' entry\n");
 
+       if (trace_define_generic_fields())
+               pr_warn("tracing: Failed to allocated generic fields");
+
        if (trace_define_common_fields())
                pr_warn("tracing: Failed to allocate common fields");
 
index d81d6f3..bd1bf18 100644 (file)
@@ -252,6 +252,50 @@ static int filter_pred_strloc(struct filter_pred *pred, void *event)
        return match;
 }
 
+/* Filter predicate for CPUs. */
+static int filter_pred_cpu(struct filter_pred *pred, void *event)
+{
+       int cpu, cmp;
+       int match = 0;
+
+       cpu = raw_smp_processor_id();
+       cmp = pred->val;
+
+       switch (pred->op) {
+       case OP_EQ:
+               match = cpu == cmp;
+               break;
+       case OP_LT:
+               match = cpu < cmp;
+               break;
+       case OP_LE:
+               match = cpu <= cmp;
+               break;
+       case OP_GT:
+               match = cpu > cmp;
+               break;
+       case OP_GE:
+               match = cpu >= cmp;
+               break;
+       default:
+               break;
+       }
+
+       return !!match == !pred->not;
+}
+
+/* Filter predicate for COMM. */
+static int filter_pred_comm(struct filter_pred *pred, void *event)
+{
+       int cmp, match;
+
+       cmp = pred->regex.match(current->comm, &pred->regex,
+                               pred->regex.field_len);
+       match = cmp ^ pred->not;
+
+       return match;
+}
+
 static int filter_pred_none(struct filter_pred *pred, void *event)
 {
        return 0;
@@ -1002,7 +1046,10 @@ static int init_pred(struct filter_parse_state *ps,
        if (is_string_field(field)) {
                filter_build_regex(pred);
 
-               if (field->filter_type == FILTER_STATIC_STRING) {
+               if (!strcmp(field->name, "comm")) {
+                       fn = filter_pred_comm;
+                       pred->regex.field_len = TASK_COMM_LEN;
+               } else if (field->filter_type == FILTER_STATIC_STRING) {
                        fn = filter_pred_string;
                        pred->regex.field_len = field->size;
                } else if (field->filter_type == FILTER_DYN_STRING)
@@ -1025,7 +1072,10 @@ static int init_pred(struct filter_parse_state *ps,
                }
                pred->val = val;
 
-               fn = select_comparison_fn(pred->op, field->size,
+               if (!strcmp(field->name, "cpu"))
+                       fn = filter_pred_cpu;
+               else
+                       fn = select_comparison_fn(pred->op, field->size,
                                          field->is_signed);
                if (!fn) {
                        parse_error(ps, FILT_ERR_INVALID_OP, 0);
index 8968bf7..ca98445 100644 (file)
@@ -715,13 +715,13 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s)
 
                snprintf(nsecs_str, slen, "%03lu", nsecs_rem);
                trace_seq_printf(s, ".%s", nsecs_str);
-               len += strlen(nsecs_str);
+               len += strlen(nsecs_str) + 1;
        }
 
        trace_seq_puts(s, " us ");
 
        /* Print remaining spaces to fit the row's width */
-       for (i = len; i < 7; i++)
+       for (i = len; i < 8; i++)
                trace_seq_putc(s, ' ');
 }
 
index dfab253..8e481a8 100644 (file)
@@ -496,6 +496,8 @@ static const struct trace_mark {
        char                    sym;
 } mark[] = {
        MARK(1000000000ULL      , '$'), /* 1 sec */
+       MARK(100000000ULL       , '@'), /* 100 msec */
+       MARK(10000000ULL        , '*'), /* 10 msec */
        MARK(1000000ULL         , '#'), /* 1000 usecs */
        MARK(100000ULL          , '!'), /* 100 usecs */
        MARK(10000ULL           , '+'), /* 10 usecs */
@@ -508,7 +510,7 @@ char trace_find_mark(unsigned long long d)
        int size = ARRAY_SIZE(mark);
 
        for (i = 0; i < size; i++) {
-               if (d >= mark[i].val)
+               if (d > mark[i].val)
                        break;
        }
 
index 3f34496..b746399 100644 (file)
 
 #define STACK_TRACE_ENTRIES 500
 
-#ifdef CC_USING_FENTRY
-# define fentry                1
-#else
-# define fentry                0
-#endif
-
 static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES+1] =
         { [0 ... (STACK_TRACE_ENTRIES)] = ULONG_MAX };
 static unsigned stack_dump_index[STACK_TRACE_ENTRIES];
@@ -35,7 +29,7 @@ static unsigned stack_dump_index[STACK_TRACE_ENTRIES];
  */
 static struct stack_trace max_stack_trace = {
        .max_entries            = STACK_TRACE_ENTRIES - 1,
-       .entries                = &stack_dump_trace[1],
+       .entries                = &stack_dump_trace[0],
 };
 
 static unsigned long max_stack_size;
@@ -55,7 +49,7 @@ static inline void print_max_stack(void)
 
        pr_emerg("        Depth    Size   Location    (%d entries)\n"
                           "        -----    ----   --------\n",
-                          max_stack_trace.nr_entries - 1);
+                          max_stack_trace.nr_entries);
 
        for (i = 0; i < max_stack_trace.nr_entries; i++) {
                if (stack_dump_trace[i] == ULONG_MAX)
@@ -77,7 +71,7 @@ check_stack(unsigned long ip, unsigned long *stack)
        unsigned long this_size, flags; unsigned long *p, *top, *start;
        static int tracer_frame;
        int frame_size = ACCESS_ONCE(tracer_frame);
-       int i;
+       int i, x;
 
        this_size = ((unsigned long)stack) & (THREAD_SIZE-1);
        this_size = THREAD_SIZE - this_size;
@@ -105,26 +99,20 @@ check_stack(unsigned long ip, unsigned long *stack)
        max_stack_size = this_size;
 
        max_stack_trace.nr_entries = 0;
-
-       if (using_ftrace_ops_list_func())
-               max_stack_trace.skip = 4;
-       else
-               max_stack_trace.skip = 3;
+       max_stack_trace.skip = 3;
 
        save_stack_trace(&max_stack_trace);
 
-       /*
-        * Add the passed in ip from the function tracer.
-        * Searching for this on the stack will skip over
-        * most of the overhead from the stack tracer itself.
-        */
-       stack_dump_trace[0] = ip;
-       max_stack_trace.nr_entries++;
+       /* Skip over the overhead of the stack tracer itself */
+       for (i = 0; i < max_stack_trace.nr_entries; i++) {
+               if (stack_dump_trace[i] == ip)
+                       break;
+       }
 
        /*
         * Now find where in the stack these are.
         */
-       i = 0;
+       x = 0;
        start = stack;
        top = (unsigned long *)
                (((unsigned long)start & ~(THREAD_SIZE-1)) + THREAD_SIZE);
@@ -139,12 +127,15 @@ check_stack(unsigned long ip, unsigned long *stack)
        while (i < max_stack_trace.nr_entries) {
                int found = 0;
 
-               stack_dump_index[i] = this_size;
+               stack_dump_index[x] = this_size;
                p = start;
 
                for (; p < top && i < max_stack_trace.nr_entries; p++) {
+                       if (stack_dump_trace[i] == ULONG_MAX)
+                               break;
                        if (*p == stack_dump_trace[i]) {
-                               this_size = stack_dump_index[i++] =
+                               stack_dump_trace[x] = stack_dump_trace[i++];
+                               this_size = stack_dump_index[x++] =
                                        (top - p) * sizeof(unsigned long);
                                found = 1;
                                /* Start the search from here */
@@ -156,7 +147,7 @@ check_stack(unsigned long ip, unsigned long *stack)
                                 * out what that is, then figure it out
                                 * now.
                                 */
-                               if (unlikely(!tracer_frame) && i == 1) {
+                               if (unlikely(!tracer_frame)) {
                                        tracer_frame = (p - stack) *
                                                sizeof(unsigned long);
                                        max_stack_size -= tracer_frame;
@@ -168,6 +159,10 @@ check_stack(unsigned long ip, unsigned long *stack)
                        i++;
        }
 
+       max_stack_trace.nr_entries = x;
+       for (; x < i; x++)
+               stack_dump_trace[x] = ULONG_MAX;
+
        if (task_stack_end_corrupted(current)) {
                print_max_stack();
                BUG();
@@ -192,24 +187,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip,
        if (per_cpu(trace_active, cpu)++ != 0)
                goto out;
 
-       /*
-        * When fentry is used, the traced function does not get
-        * its stack frame set up, and we lose the parent.
-        * The ip is pretty useless because the function tracer
-        * was called before that function set up its stack frame.
-        * In this case, we use the parent ip.
-        *
-        * By adding the return address of either the parent ip
-        * or the current ip we can disregard most of the stack usage
-        * caused by the stack tracer itself.
-        *
-        * The function tracer always reports the address of where the
-        * mcount call was, but the stack will hold the return address.
-        */
-       if (fentry)
-               ip = parent_ip;
-       else
-               ip += MCOUNT_INSN_SIZE;
+       ip += MCOUNT_INSN_SIZE;
 
        check_stack(ip, &stack);
 
@@ -284,7 +262,7 @@ __next(struct seq_file *m, loff_t *pos)
 {
        long n = *pos - 1;
 
-       if (n >= max_stack_trace.nr_entries || stack_dump_trace[n] == ULONG_MAX)
+       if (n > max_stack_trace.nr_entries || stack_dump_trace[n] == ULONG_MAX)
                return NULL;
 
        m->private = (void *)n;
@@ -354,7 +332,7 @@ static int t_show(struct seq_file *m, void *v)
                seq_printf(m, "        Depth    Size   Location"
                           "    (%d entries)\n"
                           "        -----    ----   --------\n",
-                          max_stack_trace.nr_entries - 1);
+                          max_stack_trace.nr_entries);
 
                if (!stack_tracer_enabled && !max_stack_size)
                        print_disabled(m);