aa240551fc5d4b184adf0058d9d76ff726fb731c
[cascardo/linux.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/sched/rt.h>
44
45 #include "trace.h"
46 #include "trace_output.h"
47
48 /*
49  * On boot up, the ring buffer is set to the minimum size, so that
50  * we do not waste memory on systems that are not using tracing.
51  */
52 bool ring_buffer_expanded;
53
54 /*
55  * We need to change this state when a selftest is running.
56  * A selftest will lurk into the ring-buffer to count the
57  * entries inserted during the selftest although some concurrent
58  * insertions into the ring-buffer such as trace_printk could occurred
59  * at the same time, giving false positive or negative results.
60  */
61 static bool __read_mostly tracing_selftest_running;
62
63 /*
64  * If a tracer is running, we do not want to run SELFTEST.
65  */
66 bool __read_mostly tracing_selftest_disabled;
67
68 /* Pipe tracepoints to printk */
69 struct trace_iterator *tracepoint_print_iter;
70 int tracepoint_printk;
71
72 /* For tracers that don't implement custom flags */
73 static struct tracer_opt dummy_tracer_opt[] = {
74         { }
75 };
76
77 static int
78 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
79 {
80         return 0;
81 }
82
83 /*
84  * To prevent the comm cache from being overwritten when no
85  * tracing is active, only save the comm when a trace event
86  * occurred.
87  */
88 static DEFINE_PER_CPU(bool, trace_cmdline_save);
89
90 /*
91  * Kill all tracing for good (never come back).
92  * It is initialized to 1 but will turn to zero if the initialization
93  * of the tracer is successful. But that is the only place that sets
94  * this back to zero.
95  */
96 static int tracing_disabled = 1;
97
98 cpumask_var_t __read_mostly     tracing_buffer_mask;
99
100 /*
101  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
102  *
103  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
104  * is set, then ftrace_dump is called. This will output the contents
105  * of the ftrace buffers to the console.  This is very useful for
106  * capturing traces that lead to crashes and outputing it to a
107  * serial console.
108  *
109  * It is default off, but you can enable it with either specifying
110  * "ftrace_dump_on_oops" in the kernel command line, or setting
111  * /proc/sys/kernel/ftrace_dump_on_oops
112  * Set 1 if you want to dump buffers of all CPUs
113  * Set 2 if you want to dump the buffer of the CPU that triggered oops
114  */
115
116 enum ftrace_dump_mode ftrace_dump_on_oops;
117
118 /* When set, tracing will stop when a WARN*() is hit */
119 int __disable_trace_on_warning;
120
121 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
122 /* Map of enums to their values, for "enum_map" file */
123 struct trace_enum_map_head {
124         struct module                   *mod;
125         unsigned long                   length;
126 };
127
128 union trace_enum_map_item;
129
130 struct trace_enum_map_tail {
131         /*
132          * "end" is first and points to NULL as it must be different
133          * than "mod" or "enum_string"
134          */
135         union trace_enum_map_item       *next;
136         const char                      *end;   /* points to NULL */
137 };
138
139 static DEFINE_MUTEX(trace_enum_mutex);
140
141 /*
142  * The trace_enum_maps are saved in an array with two extra elements,
143  * one at the beginning, and one at the end. The beginning item contains
144  * the count of the saved maps (head.length), and the module they
145  * belong to if not built in (head.mod). The ending item contains a
146  * pointer to the next array of saved enum_map items.
147  */
148 union trace_enum_map_item {
149         struct trace_enum_map           map;
150         struct trace_enum_map_head      head;
151         struct trace_enum_map_tail      tail;
152 };
153
154 static union trace_enum_map_item *trace_enum_maps;
155 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
156
157 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
158
159 #define MAX_TRACER_SIZE         100
160 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
161 static char *default_bootup_tracer;
162
163 static bool allocate_snapshot;
164
165 static int __init set_cmdline_ftrace(char *str)
166 {
167         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
168         default_bootup_tracer = bootup_tracer_buf;
169         /* We are using ftrace early, expand it */
170         ring_buffer_expanded = true;
171         return 1;
172 }
173 __setup("ftrace=", set_cmdline_ftrace);
174
175 static int __init set_ftrace_dump_on_oops(char *str)
176 {
177         if (*str++ != '=' || !*str) {
178                 ftrace_dump_on_oops = DUMP_ALL;
179                 return 1;
180         }
181
182         if (!strcmp("orig_cpu", str)) {
183                 ftrace_dump_on_oops = DUMP_ORIG;
184                 return 1;
185         }
186
187         return 0;
188 }
189 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
190
191 static int __init stop_trace_on_warning(char *str)
192 {
193         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
194                 __disable_trace_on_warning = 1;
195         return 1;
196 }
197 __setup("traceoff_on_warning", stop_trace_on_warning);
198
199 static int __init boot_alloc_snapshot(char *str)
200 {
201         allocate_snapshot = true;
202         /* We also need the main ring buffer expanded */
203         ring_buffer_expanded = true;
204         return 1;
205 }
206 __setup("alloc_snapshot", boot_alloc_snapshot);
207
208
209 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
210
211 static int __init set_trace_boot_options(char *str)
212 {
213         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
214         return 0;
215 }
216 __setup("trace_options=", set_trace_boot_options);
217
218 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
219 static char *trace_boot_clock __initdata;
220
221 static int __init set_trace_boot_clock(char *str)
222 {
223         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
224         trace_boot_clock = trace_boot_clock_buf;
225         return 0;
226 }
227 __setup("trace_clock=", set_trace_boot_clock);
228
229 static int __init set_tracepoint_printk(char *str)
230 {
231         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
232                 tracepoint_printk = 1;
233         return 1;
234 }
235 __setup("tp_printk", set_tracepoint_printk);
236
237 unsigned long long ns2usecs(cycle_t nsec)
238 {
239         nsec += 500;
240         do_div(nsec, 1000);
241         return nsec;
242 }
243
244 /* trace_flags holds trace_options default values */
245 #define TRACE_DEFAULT_FLAGS                                             \
246         (FUNCTION_DEFAULT_FLAGS |                                       \
247          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
248          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
249          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
250          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
251
252 /* trace_options that are only supported by global_trace */
253 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
254                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
255
256 /* trace_flags that are default zero for instances */
257 #define ZEROED_TRACE_FLAGS \
258         TRACE_ITER_EVENT_FORK
259
260 /*
261  * The global_trace is the descriptor that holds the tracing
262  * buffers for the live tracing. For each CPU, it contains
263  * a link list of pages that will store trace entries. The
264  * page descriptor of the pages in the memory is used to hold
265  * the link list by linking the lru item in the page descriptor
266  * to each of the pages in the buffer per CPU.
267  *
268  * For each active CPU there is a data field that holds the
269  * pages for the buffer for that CPU. Each CPU has the same number
270  * of pages allocated for its buffer.
271  */
272 static struct trace_array global_trace = {
273         .trace_flags = TRACE_DEFAULT_FLAGS,
274 };
275
276 LIST_HEAD(ftrace_trace_arrays);
277
278 int trace_array_get(struct trace_array *this_tr)
279 {
280         struct trace_array *tr;
281         int ret = -ENODEV;
282
283         mutex_lock(&trace_types_lock);
284         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
285                 if (tr == this_tr) {
286                         tr->ref++;
287                         ret = 0;
288                         break;
289                 }
290         }
291         mutex_unlock(&trace_types_lock);
292
293         return ret;
294 }
295
296 static void __trace_array_put(struct trace_array *this_tr)
297 {
298         WARN_ON(!this_tr->ref);
299         this_tr->ref--;
300 }
301
302 void trace_array_put(struct trace_array *this_tr)
303 {
304         mutex_lock(&trace_types_lock);
305         __trace_array_put(this_tr);
306         mutex_unlock(&trace_types_lock);
307 }
308
309 int call_filter_check_discard(struct trace_event_call *call, void *rec,
310                               struct ring_buffer *buffer,
311                               struct ring_buffer_event *event)
312 {
313         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
314             !filter_match_preds(call->filter, rec)) {
315                 __trace_event_discard_commit(buffer, event);
316                 return 1;
317         }
318
319         return 0;
320 }
321
322 void trace_free_pid_list(struct trace_pid_list *pid_list)
323 {
324         vfree(pid_list->pids);
325         kfree(pid_list);
326 }
327
328 /**
329  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
330  * @filtered_pids: The list of pids to check
331  * @search_pid: The PID to find in @filtered_pids
332  *
333  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
334  */
335 bool
336 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
337 {
338         /*
339          * If pid_max changed after filtered_pids was created, we
340          * by default ignore all pids greater than the previous pid_max.
341          */
342         if (search_pid >= filtered_pids->pid_max)
343                 return false;
344
345         return test_bit(search_pid, filtered_pids->pids);
346 }
347
348 /**
349  * trace_ignore_this_task - should a task be ignored for tracing
350  * @filtered_pids: The list of pids to check
351  * @task: The task that should be ignored if not filtered
352  *
353  * Checks if @task should be traced or not from @filtered_pids.
354  * Returns true if @task should *NOT* be traced.
355  * Returns false if @task should be traced.
356  */
357 bool
358 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
359 {
360         /*
361          * Return false, because if filtered_pids does not exist,
362          * all pids are good to trace.
363          */
364         if (!filtered_pids)
365                 return false;
366
367         return !trace_find_filtered_pid(filtered_pids, task->pid);
368 }
369
370 /**
371  * trace_pid_filter_add_remove - Add or remove a task from a pid_list
372  * @pid_list: The list to modify
373  * @self: The current task for fork or NULL for exit
374  * @task: The task to add or remove
375  *
376  * If adding a task, if @self is defined, the task is only added if @self
377  * is also included in @pid_list. This happens on fork and tasks should
378  * only be added when the parent is listed. If @self is NULL, then the
379  * @task pid will be removed from the list, which would happen on exit
380  * of a task.
381  */
382 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
383                                   struct task_struct *self,
384                                   struct task_struct *task)
385 {
386         if (!pid_list)
387                 return;
388
389         /* For forks, we only add if the forking task is listed */
390         if (self) {
391                 if (!trace_find_filtered_pid(pid_list, self->pid))
392                         return;
393         }
394
395         /* Sorry, but we don't support pid_max changing after setting */
396         if (task->pid >= pid_list->pid_max)
397                 return;
398
399         /* "self" is set for forks, and NULL for exits */
400         if (self)
401                 set_bit(task->pid, pid_list->pids);
402         else
403                 clear_bit(task->pid, pid_list->pids);
404 }
405
406 /**
407  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
408  * @pid_list: The pid list to show
409  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
410  * @pos: The position of the file
411  *
412  * This is used by the seq_file "next" operation to iterate the pids
413  * listed in a trace_pid_list structure.
414  *
415  * Returns the pid+1 as we want to display pid of zero, but NULL would
416  * stop the iteration.
417  */
418 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
419 {
420         unsigned long pid = (unsigned long)v;
421
422         (*pos)++;
423
424         /* pid already is +1 of the actual prevous bit */
425         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
426
427         /* Return pid + 1 to allow zero to be represented */
428         if (pid < pid_list->pid_max)
429                 return (void *)(pid + 1);
430
431         return NULL;
432 }
433
434 /**
435  * trace_pid_start - Used for seq_file to start reading pid lists
436  * @pid_list: The pid list to show
437  * @pos: The position of the file
438  *
439  * This is used by seq_file "start" operation to start the iteration
440  * of listing pids.
441  *
442  * Returns the pid+1 as we want to display pid of zero, but NULL would
443  * stop the iteration.
444  */
445 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
446 {
447         unsigned long pid;
448         loff_t l = 0;
449
450         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
451         if (pid >= pid_list->pid_max)
452                 return NULL;
453
454         /* Return pid + 1 so that zero can be the exit value */
455         for (pid++; pid && l < *pos;
456              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
457                 ;
458         return (void *)pid;
459 }
460
461 /**
462  * trace_pid_show - show the current pid in seq_file processing
463  * @m: The seq_file structure to write into
464  * @v: A void pointer of the pid (+1) value to display
465  *
466  * Can be directly used by seq_file operations to display the current
467  * pid value.
468  */
469 int trace_pid_show(struct seq_file *m, void *v)
470 {
471         unsigned long pid = (unsigned long)v - 1;
472
473         seq_printf(m, "%lu\n", pid);
474         return 0;
475 }
476
477 /* 128 should be much more than enough */
478 #define PID_BUF_SIZE            127
479
480 int trace_pid_write(struct trace_pid_list *filtered_pids,
481                     struct trace_pid_list **new_pid_list,
482                     const char __user *ubuf, size_t cnt)
483 {
484         struct trace_pid_list *pid_list;
485         struct trace_parser parser;
486         unsigned long val;
487         int nr_pids = 0;
488         ssize_t read = 0;
489         ssize_t ret = 0;
490         loff_t pos;
491         pid_t pid;
492
493         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
494                 return -ENOMEM;
495
496         /*
497          * Always recreate a new array. The write is an all or nothing
498          * operation. Always create a new array when adding new pids by
499          * the user. If the operation fails, then the current list is
500          * not modified.
501          */
502         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
503         if (!pid_list)
504                 return -ENOMEM;
505
506         pid_list->pid_max = READ_ONCE(pid_max);
507
508         /* Only truncating will shrink pid_max */
509         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
510                 pid_list->pid_max = filtered_pids->pid_max;
511
512         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
513         if (!pid_list->pids) {
514                 kfree(pid_list);
515                 return -ENOMEM;
516         }
517
518         if (filtered_pids) {
519                 /* copy the current bits to the new max */
520                 pid = find_first_bit(filtered_pids->pids,
521                                      filtered_pids->pid_max);
522                 while (pid < filtered_pids->pid_max) {
523                         set_bit(pid, pid_list->pids);
524                         pid = find_next_bit(filtered_pids->pids,
525                                             filtered_pids->pid_max,
526                                             pid + 1);
527                         nr_pids++;
528                 }
529         }
530
531         while (cnt > 0) {
532
533                 pos = 0;
534
535                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
536                 if (ret < 0 || !trace_parser_loaded(&parser))
537                         break;
538
539                 read += ret;
540                 ubuf += ret;
541                 cnt -= ret;
542
543                 parser.buffer[parser.idx] = 0;
544
545                 ret = -EINVAL;
546                 if (kstrtoul(parser.buffer, 0, &val))
547                         break;
548                 if (val >= pid_list->pid_max)
549                         break;
550
551                 pid = (pid_t)val;
552
553                 set_bit(pid, pid_list->pids);
554                 nr_pids++;
555
556                 trace_parser_clear(&parser);
557                 ret = 0;
558         }
559         trace_parser_put(&parser);
560
561         if (ret < 0) {
562                 trace_free_pid_list(pid_list);
563                 return ret;
564         }
565
566         if (!nr_pids) {
567                 /* Cleared the list of pids */
568                 trace_free_pid_list(pid_list);
569                 read = ret;
570                 pid_list = NULL;
571         }
572
573         *new_pid_list = pid_list;
574
575         return read;
576 }
577
578 static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
579 {
580         u64 ts;
581
582         /* Early boot up does not have a buffer yet */
583         if (!buf->buffer)
584                 return trace_clock_local();
585
586         ts = ring_buffer_time_stamp(buf->buffer, cpu);
587         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
588
589         return ts;
590 }
591
592 cycle_t ftrace_now(int cpu)
593 {
594         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
595 }
596
597 /**
598  * tracing_is_enabled - Show if global_trace has been disabled
599  *
600  * Shows if the global trace has been enabled or not. It uses the
601  * mirror flag "buffer_disabled" to be used in fast paths such as for
602  * the irqsoff tracer. But it may be inaccurate due to races. If you
603  * need to know the accurate state, use tracing_is_on() which is a little
604  * slower, but accurate.
605  */
606 int tracing_is_enabled(void)
607 {
608         /*
609          * For quick access (irqsoff uses this in fast path), just
610          * return the mirror variable of the state of the ring buffer.
611          * It's a little racy, but we don't really care.
612          */
613         smp_rmb();
614         return !global_trace.buffer_disabled;
615 }
616
617 /*
618  * trace_buf_size is the size in bytes that is allocated
619  * for a buffer. Note, the number of bytes is always rounded
620  * to page size.
621  *
622  * This number is purposely set to a low number of 16384.
623  * If the dump on oops happens, it will be much appreciated
624  * to not have to wait for all that output. Anyway this can be
625  * boot time and run time configurable.
626  */
627 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
628
629 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
630
631 /* trace_types holds a link list of available tracers. */
632 static struct tracer            *trace_types __read_mostly;
633
634 /*
635  * trace_types_lock is used to protect the trace_types list.
636  */
637 DEFINE_MUTEX(trace_types_lock);
638
639 /*
640  * serialize the access of the ring buffer
641  *
642  * ring buffer serializes readers, but it is low level protection.
643  * The validity of the events (which returns by ring_buffer_peek() ..etc)
644  * are not protected by ring buffer.
645  *
646  * The content of events may become garbage if we allow other process consumes
647  * these events concurrently:
648  *   A) the page of the consumed events may become a normal page
649  *      (not reader page) in ring buffer, and this page will be rewrited
650  *      by events producer.
651  *   B) The page of the consumed events may become a page for splice_read,
652  *      and this page will be returned to system.
653  *
654  * These primitives allow multi process access to different cpu ring buffer
655  * concurrently.
656  *
657  * These primitives don't distinguish read-only and read-consume access.
658  * Multi read-only access are also serialized.
659  */
660
661 #ifdef CONFIG_SMP
662 static DECLARE_RWSEM(all_cpu_access_lock);
663 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
664
665 static inline void trace_access_lock(int cpu)
666 {
667         if (cpu == RING_BUFFER_ALL_CPUS) {
668                 /* gain it for accessing the whole ring buffer. */
669                 down_write(&all_cpu_access_lock);
670         } else {
671                 /* gain it for accessing a cpu ring buffer. */
672
673                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
674                 down_read(&all_cpu_access_lock);
675
676                 /* Secondly block other access to this @cpu ring buffer. */
677                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
678         }
679 }
680
681 static inline void trace_access_unlock(int cpu)
682 {
683         if (cpu == RING_BUFFER_ALL_CPUS) {
684                 up_write(&all_cpu_access_lock);
685         } else {
686                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
687                 up_read(&all_cpu_access_lock);
688         }
689 }
690
691 static inline void trace_access_lock_init(void)
692 {
693         int cpu;
694
695         for_each_possible_cpu(cpu)
696                 mutex_init(&per_cpu(cpu_access_lock, cpu));
697 }
698
699 #else
700
701 static DEFINE_MUTEX(access_lock);
702
703 static inline void trace_access_lock(int cpu)
704 {
705         (void)cpu;
706         mutex_lock(&access_lock);
707 }
708
709 static inline void trace_access_unlock(int cpu)
710 {
711         (void)cpu;
712         mutex_unlock(&access_lock);
713 }
714
715 static inline void trace_access_lock_init(void)
716 {
717 }
718
719 #endif
720
721 #ifdef CONFIG_STACKTRACE
722 static void __ftrace_trace_stack(struct ring_buffer *buffer,
723                                  unsigned long flags,
724                                  int skip, int pc, struct pt_regs *regs);
725 static inline void ftrace_trace_stack(struct trace_array *tr,
726                                       struct ring_buffer *buffer,
727                                       unsigned long flags,
728                                       int skip, int pc, struct pt_regs *regs);
729
730 #else
731 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
732                                         unsigned long flags,
733                                         int skip, int pc, struct pt_regs *regs)
734 {
735 }
736 static inline void ftrace_trace_stack(struct trace_array *tr,
737                                       struct ring_buffer *buffer,
738                                       unsigned long flags,
739                                       int skip, int pc, struct pt_regs *regs)
740 {
741 }
742
743 #endif
744
745 static void tracer_tracing_on(struct trace_array *tr)
746 {
747         if (tr->trace_buffer.buffer)
748                 ring_buffer_record_on(tr->trace_buffer.buffer);
749         /*
750          * This flag is looked at when buffers haven't been allocated
751          * yet, or by some tracers (like irqsoff), that just want to
752          * know if the ring buffer has been disabled, but it can handle
753          * races of where it gets disabled but we still do a record.
754          * As the check is in the fast path of the tracers, it is more
755          * important to be fast than accurate.
756          */
757         tr->buffer_disabled = 0;
758         /* Make the flag seen by readers */
759         smp_wmb();
760 }
761
762 /**
763  * tracing_on - enable tracing buffers
764  *
765  * This function enables tracing buffers that may have been
766  * disabled with tracing_off.
767  */
768 void tracing_on(void)
769 {
770         tracer_tracing_on(&global_trace);
771 }
772 EXPORT_SYMBOL_GPL(tracing_on);
773
774 /**
775  * __trace_puts - write a constant string into the trace buffer.
776  * @ip:    The address of the caller
777  * @str:   The constant string to write
778  * @size:  The size of the string.
779  */
780 int __trace_puts(unsigned long ip, const char *str, int size)
781 {
782         struct ring_buffer_event *event;
783         struct ring_buffer *buffer;
784         struct print_entry *entry;
785         unsigned long irq_flags;
786         int alloc;
787         int pc;
788
789         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
790                 return 0;
791
792         pc = preempt_count();
793
794         if (unlikely(tracing_selftest_running || tracing_disabled))
795                 return 0;
796
797         alloc = sizeof(*entry) + size + 2; /* possible \n added */
798
799         local_save_flags(irq_flags);
800         buffer = global_trace.trace_buffer.buffer;
801         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
802                                           irq_flags, pc);
803         if (!event)
804                 return 0;
805
806         entry = ring_buffer_event_data(event);
807         entry->ip = ip;
808
809         memcpy(&entry->buf, str, size);
810
811         /* Add a newline if necessary */
812         if (entry->buf[size - 1] != '\n') {
813                 entry->buf[size] = '\n';
814                 entry->buf[size + 1] = '\0';
815         } else
816                 entry->buf[size] = '\0';
817
818         __buffer_unlock_commit(buffer, event);
819         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
820
821         return size;
822 }
823 EXPORT_SYMBOL_GPL(__trace_puts);
824
825 /**
826  * __trace_bputs - write the pointer to a constant string into trace buffer
827  * @ip:    The address of the caller
828  * @str:   The constant string to write to the buffer to
829  */
830 int __trace_bputs(unsigned long ip, const char *str)
831 {
832         struct ring_buffer_event *event;
833         struct ring_buffer *buffer;
834         struct bputs_entry *entry;
835         unsigned long irq_flags;
836         int size = sizeof(struct bputs_entry);
837         int pc;
838
839         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
840                 return 0;
841
842         pc = preempt_count();
843
844         if (unlikely(tracing_selftest_running || tracing_disabled))
845                 return 0;
846
847         local_save_flags(irq_flags);
848         buffer = global_trace.trace_buffer.buffer;
849         event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
850                                           irq_flags, pc);
851         if (!event)
852                 return 0;
853
854         entry = ring_buffer_event_data(event);
855         entry->ip                       = ip;
856         entry->str                      = str;
857
858         __buffer_unlock_commit(buffer, event);
859         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
860
861         return 1;
862 }
863 EXPORT_SYMBOL_GPL(__trace_bputs);
864
865 #ifdef CONFIG_TRACER_SNAPSHOT
866 /**
867  * trace_snapshot - take a snapshot of the current buffer.
868  *
869  * This causes a swap between the snapshot buffer and the current live
870  * tracing buffer. You can use this to take snapshots of the live
871  * trace when some condition is triggered, but continue to trace.
872  *
873  * Note, make sure to allocate the snapshot with either
874  * a tracing_snapshot_alloc(), or by doing it manually
875  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
876  *
877  * If the snapshot buffer is not allocated, it will stop tracing.
878  * Basically making a permanent snapshot.
879  */
880 void tracing_snapshot(void)
881 {
882         struct trace_array *tr = &global_trace;
883         struct tracer *tracer = tr->current_trace;
884         unsigned long flags;
885
886         if (in_nmi()) {
887                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
888                 internal_trace_puts("*** snapshot is being ignored        ***\n");
889                 return;
890         }
891
892         if (!tr->allocated_snapshot) {
893                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
894                 internal_trace_puts("*** stopping trace here!   ***\n");
895                 tracing_off();
896                 return;
897         }
898
899         /* Note, snapshot can not be used when the tracer uses it */
900         if (tracer->use_max_tr) {
901                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
902                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
903                 return;
904         }
905
906         local_irq_save(flags);
907         update_max_tr(tr, current, smp_processor_id());
908         local_irq_restore(flags);
909 }
910 EXPORT_SYMBOL_GPL(tracing_snapshot);
911
912 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
913                                         struct trace_buffer *size_buf, int cpu_id);
914 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
915
916 static int alloc_snapshot(struct trace_array *tr)
917 {
918         int ret;
919
920         if (!tr->allocated_snapshot) {
921
922                 /* allocate spare buffer */
923                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
924                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
925                 if (ret < 0)
926                         return ret;
927
928                 tr->allocated_snapshot = true;
929         }
930
931         return 0;
932 }
933
934 static void free_snapshot(struct trace_array *tr)
935 {
936         /*
937          * We don't free the ring buffer. instead, resize it because
938          * The max_tr ring buffer has some state (e.g. ring->clock) and
939          * we want preserve it.
940          */
941         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
942         set_buffer_entries(&tr->max_buffer, 1);
943         tracing_reset_online_cpus(&tr->max_buffer);
944         tr->allocated_snapshot = false;
945 }
946
947 /**
948  * tracing_alloc_snapshot - allocate snapshot buffer.
949  *
950  * This only allocates the snapshot buffer if it isn't already
951  * allocated - it doesn't also take a snapshot.
952  *
953  * This is meant to be used in cases where the snapshot buffer needs
954  * to be set up for events that can't sleep but need to be able to
955  * trigger a snapshot.
956  */
957 int tracing_alloc_snapshot(void)
958 {
959         struct trace_array *tr = &global_trace;
960         int ret;
961
962         ret = alloc_snapshot(tr);
963         WARN_ON(ret < 0);
964
965         return ret;
966 }
967 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
968
969 /**
970  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
971  *
972  * This is similar to trace_snapshot(), but it will allocate the
973  * snapshot buffer if it isn't already allocated. Use this only
974  * where it is safe to sleep, as the allocation may sleep.
975  *
976  * This causes a swap between the snapshot buffer and the current live
977  * tracing buffer. You can use this to take snapshots of the live
978  * trace when some condition is triggered, but continue to trace.
979  */
980 void tracing_snapshot_alloc(void)
981 {
982         int ret;
983
984         ret = tracing_alloc_snapshot();
985         if (ret < 0)
986                 return;
987
988         tracing_snapshot();
989 }
990 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
991 #else
992 void tracing_snapshot(void)
993 {
994         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
995 }
996 EXPORT_SYMBOL_GPL(tracing_snapshot);
997 int tracing_alloc_snapshot(void)
998 {
999         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1000         return -ENODEV;
1001 }
1002 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1003 void tracing_snapshot_alloc(void)
1004 {
1005         /* Give warning */
1006         tracing_snapshot();
1007 }
1008 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1009 #endif /* CONFIG_TRACER_SNAPSHOT */
1010
1011 static void tracer_tracing_off(struct trace_array *tr)
1012 {
1013         if (tr->trace_buffer.buffer)
1014                 ring_buffer_record_off(tr->trace_buffer.buffer);
1015         /*
1016          * This flag is looked at when buffers haven't been allocated
1017          * yet, or by some tracers (like irqsoff), that just want to
1018          * know if the ring buffer has been disabled, but it can handle
1019          * races of where it gets disabled but we still do a record.
1020          * As the check is in the fast path of the tracers, it is more
1021          * important to be fast than accurate.
1022          */
1023         tr->buffer_disabled = 1;
1024         /* Make the flag seen by readers */
1025         smp_wmb();
1026 }
1027
1028 /**
1029  * tracing_off - turn off tracing buffers
1030  *
1031  * This function stops the tracing buffers from recording data.
1032  * It does not disable any overhead the tracers themselves may
1033  * be causing. This function simply causes all recording to
1034  * the ring buffers to fail.
1035  */
1036 void tracing_off(void)
1037 {
1038         tracer_tracing_off(&global_trace);
1039 }
1040 EXPORT_SYMBOL_GPL(tracing_off);
1041
1042 void disable_trace_on_warning(void)
1043 {
1044         if (__disable_trace_on_warning)
1045                 tracing_off();
1046 }
1047
1048 /**
1049  * tracer_tracing_is_on - show real state of ring buffer enabled
1050  * @tr : the trace array to know if ring buffer is enabled
1051  *
1052  * Shows real state of the ring buffer if it is enabled or not.
1053  */
1054 static int tracer_tracing_is_on(struct trace_array *tr)
1055 {
1056         if (tr->trace_buffer.buffer)
1057                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1058         return !tr->buffer_disabled;
1059 }
1060
1061 /**
1062  * tracing_is_on - show state of ring buffers enabled
1063  */
1064 int tracing_is_on(void)
1065 {
1066         return tracer_tracing_is_on(&global_trace);
1067 }
1068 EXPORT_SYMBOL_GPL(tracing_is_on);
1069
1070 static int __init set_buf_size(char *str)
1071 {
1072         unsigned long buf_size;
1073
1074         if (!str)
1075                 return 0;
1076         buf_size = memparse(str, &str);
1077         /* nr_entries can not be zero */
1078         if (buf_size == 0)
1079                 return 0;
1080         trace_buf_size = buf_size;
1081         return 1;
1082 }
1083 __setup("trace_buf_size=", set_buf_size);
1084
1085 static int __init set_tracing_thresh(char *str)
1086 {
1087         unsigned long threshold;
1088         int ret;
1089
1090         if (!str)
1091                 return 0;
1092         ret = kstrtoul(str, 0, &threshold);
1093         if (ret < 0)
1094                 return 0;
1095         tracing_thresh = threshold * 1000;
1096         return 1;
1097 }
1098 __setup("tracing_thresh=", set_tracing_thresh);
1099
1100 unsigned long nsecs_to_usecs(unsigned long nsecs)
1101 {
1102         return nsecs / 1000;
1103 }
1104
1105 /*
1106  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1107  * It uses C(a, b) where 'a' is the enum name and 'b' is the string that
1108  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1109  * of strings in the order that the enums were defined.
1110  */
1111 #undef C
1112 #define C(a, b) b
1113
1114 /* These must match the bit postions in trace_iterator_flags */
1115 static const char *trace_options[] = {
1116         TRACE_FLAGS
1117         NULL
1118 };
1119
1120 static struct {
1121         u64 (*func)(void);
1122         const char *name;
1123         int in_ns;              /* is this clock in nanoseconds? */
1124 } trace_clocks[] = {
1125         { trace_clock_local,            "local",        1 },
1126         { trace_clock_global,           "global",       1 },
1127         { trace_clock_counter,          "counter",      0 },
1128         { trace_clock_jiffies,          "uptime",       0 },
1129         { trace_clock,                  "perf",         1 },
1130         { ktime_get_mono_fast_ns,       "mono",         1 },
1131         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1132         ARCH_TRACE_CLOCKS
1133 };
1134
1135 /*
1136  * trace_parser_get_init - gets the buffer for trace parser
1137  */
1138 int trace_parser_get_init(struct trace_parser *parser, int size)
1139 {
1140         memset(parser, 0, sizeof(*parser));
1141
1142         parser->buffer = kmalloc(size, GFP_KERNEL);
1143         if (!parser->buffer)
1144                 return 1;
1145
1146         parser->size = size;
1147         return 0;
1148 }
1149
1150 /*
1151  * trace_parser_put - frees the buffer for trace parser
1152  */
1153 void trace_parser_put(struct trace_parser *parser)
1154 {
1155         kfree(parser->buffer);
1156 }
1157
1158 /*
1159  * trace_get_user - reads the user input string separated by  space
1160  * (matched by isspace(ch))
1161  *
1162  * For each string found the 'struct trace_parser' is updated,
1163  * and the function returns.
1164  *
1165  * Returns number of bytes read.
1166  *
1167  * See kernel/trace/trace.h for 'struct trace_parser' details.
1168  */
1169 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1170         size_t cnt, loff_t *ppos)
1171 {
1172         char ch;
1173         size_t read = 0;
1174         ssize_t ret;
1175
1176         if (!*ppos)
1177                 trace_parser_clear(parser);
1178
1179         ret = get_user(ch, ubuf++);
1180         if (ret)
1181                 goto out;
1182
1183         read++;
1184         cnt--;
1185
1186         /*
1187          * The parser is not finished with the last write,
1188          * continue reading the user input without skipping spaces.
1189          */
1190         if (!parser->cont) {
1191                 /* skip white space */
1192                 while (cnt && isspace(ch)) {
1193                         ret = get_user(ch, ubuf++);
1194                         if (ret)
1195                                 goto out;
1196                         read++;
1197                         cnt--;
1198                 }
1199
1200                 /* only spaces were written */
1201                 if (isspace(ch)) {
1202                         *ppos += read;
1203                         ret = read;
1204                         goto out;
1205                 }
1206
1207                 parser->idx = 0;
1208         }
1209
1210         /* read the non-space input */
1211         while (cnt && !isspace(ch)) {
1212                 if (parser->idx < parser->size - 1)
1213                         parser->buffer[parser->idx++] = ch;
1214                 else {
1215                         ret = -EINVAL;
1216                         goto out;
1217                 }
1218                 ret = get_user(ch, ubuf++);
1219                 if (ret)
1220                         goto out;
1221                 read++;
1222                 cnt--;
1223         }
1224
1225         /* We either got finished input or we have to wait for another call. */
1226         if (isspace(ch)) {
1227                 parser->buffer[parser->idx] = 0;
1228                 parser->cont = false;
1229         } else if (parser->idx < parser->size - 1) {
1230                 parser->cont = true;
1231                 parser->buffer[parser->idx++] = ch;
1232         } else {
1233                 ret = -EINVAL;
1234                 goto out;
1235         }
1236
1237         *ppos += read;
1238         ret = read;
1239
1240 out:
1241         return ret;
1242 }
1243
1244 /* TODO add a seq_buf_to_buffer() */
1245 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1246 {
1247         int len;
1248
1249         if (trace_seq_used(s) <= s->seq.readpos)
1250                 return -EBUSY;
1251
1252         len = trace_seq_used(s) - s->seq.readpos;
1253         if (cnt > len)
1254                 cnt = len;
1255         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1256
1257         s->seq.readpos += cnt;
1258         return cnt;
1259 }
1260
1261 unsigned long __read_mostly     tracing_thresh;
1262
1263 #ifdef CONFIG_TRACER_MAX_TRACE
1264 /*
1265  * Copy the new maximum trace into the separate maximum-trace
1266  * structure. (this way the maximum trace is permanently saved,
1267  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1268  */
1269 static void
1270 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1271 {
1272         struct trace_buffer *trace_buf = &tr->trace_buffer;
1273         struct trace_buffer *max_buf = &tr->max_buffer;
1274         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1275         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1276
1277         max_buf->cpu = cpu;
1278         max_buf->time_start = data->preempt_timestamp;
1279
1280         max_data->saved_latency = tr->max_latency;
1281         max_data->critical_start = data->critical_start;
1282         max_data->critical_end = data->critical_end;
1283
1284         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1285         max_data->pid = tsk->pid;
1286         /*
1287          * If tsk == current, then use current_uid(), as that does not use
1288          * RCU. The irq tracer can be called out of RCU scope.
1289          */
1290         if (tsk == current)
1291                 max_data->uid = current_uid();
1292         else
1293                 max_data->uid = task_uid(tsk);
1294
1295         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1296         max_data->policy = tsk->policy;
1297         max_data->rt_priority = tsk->rt_priority;
1298
1299         /* record this tasks comm */
1300         tracing_record_cmdline(tsk);
1301 }
1302
1303 /**
1304  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1305  * @tr: tracer
1306  * @tsk: the task with the latency
1307  * @cpu: The cpu that initiated the trace.
1308  *
1309  * Flip the buffers between the @tr and the max_tr and record information
1310  * about which task was the cause of this latency.
1311  */
1312 void
1313 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1314 {
1315         struct ring_buffer *buf;
1316
1317         if (tr->stop_count)
1318                 return;
1319
1320         WARN_ON_ONCE(!irqs_disabled());
1321
1322         if (!tr->allocated_snapshot) {
1323                 /* Only the nop tracer should hit this when disabling */
1324                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1325                 return;
1326         }
1327
1328         arch_spin_lock(&tr->max_lock);
1329
1330         buf = tr->trace_buffer.buffer;
1331         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1332         tr->max_buffer.buffer = buf;
1333
1334         __update_max_tr(tr, tsk, cpu);
1335         arch_spin_unlock(&tr->max_lock);
1336 }
1337
1338 /**
1339  * update_max_tr_single - only copy one trace over, and reset the rest
1340  * @tr - tracer
1341  * @tsk - task with the latency
1342  * @cpu - the cpu of the buffer to copy.
1343  *
1344  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1345  */
1346 void
1347 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1348 {
1349         int ret;
1350
1351         if (tr->stop_count)
1352                 return;
1353
1354         WARN_ON_ONCE(!irqs_disabled());
1355         if (!tr->allocated_snapshot) {
1356                 /* Only the nop tracer should hit this when disabling */
1357                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1358                 return;
1359         }
1360
1361         arch_spin_lock(&tr->max_lock);
1362
1363         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1364
1365         if (ret == -EBUSY) {
1366                 /*
1367                  * We failed to swap the buffer due to a commit taking
1368                  * place on this CPU. We fail to record, but we reset
1369                  * the max trace buffer (no one writes directly to it)
1370                  * and flag that it failed.
1371                  */
1372                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1373                         "Failed to swap buffers due to commit in progress\n");
1374         }
1375
1376         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1377
1378         __update_max_tr(tr, tsk, cpu);
1379         arch_spin_unlock(&tr->max_lock);
1380 }
1381 #endif /* CONFIG_TRACER_MAX_TRACE */
1382
1383 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1384 {
1385         /* Iterators are static, they should be filled or empty */
1386         if (trace_buffer_iter(iter, iter->cpu_file))
1387                 return 0;
1388
1389         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1390                                 full);
1391 }
1392
1393 #ifdef CONFIG_FTRACE_STARTUP_TEST
1394 static int run_tracer_selftest(struct tracer *type)
1395 {
1396         struct trace_array *tr = &global_trace;
1397         struct tracer *saved_tracer = tr->current_trace;
1398         int ret;
1399
1400         if (!type->selftest || tracing_selftest_disabled)
1401                 return 0;
1402
1403         /*
1404          * Run a selftest on this tracer.
1405          * Here we reset the trace buffer, and set the current
1406          * tracer to be this tracer. The tracer can then run some
1407          * internal tracing to verify that everything is in order.
1408          * If we fail, we do not register this tracer.
1409          */
1410         tracing_reset_online_cpus(&tr->trace_buffer);
1411
1412         tr->current_trace = type;
1413
1414 #ifdef CONFIG_TRACER_MAX_TRACE
1415         if (type->use_max_tr) {
1416                 /* If we expanded the buffers, make sure the max is expanded too */
1417                 if (ring_buffer_expanded)
1418                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1419                                            RING_BUFFER_ALL_CPUS);
1420                 tr->allocated_snapshot = true;
1421         }
1422 #endif
1423
1424         /* the test is responsible for initializing and enabling */
1425         pr_info("Testing tracer %s: ", type->name);
1426         ret = type->selftest(type, tr);
1427         /* the test is responsible for resetting too */
1428         tr->current_trace = saved_tracer;
1429         if (ret) {
1430                 printk(KERN_CONT "FAILED!\n");
1431                 /* Add the warning after printing 'FAILED' */
1432                 WARN_ON(1);
1433                 return -1;
1434         }
1435         /* Only reset on passing, to avoid touching corrupted buffers */
1436         tracing_reset_online_cpus(&tr->trace_buffer);
1437
1438 #ifdef CONFIG_TRACER_MAX_TRACE
1439         if (type->use_max_tr) {
1440                 tr->allocated_snapshot = false;
1441
1442                 /* Shrink the max buffer again */
1443                 if (ring_buffer_expanded)
1444                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1445                                            RING_BUFFER_ALL_CPUS);
1446         }
1447 #endif
1448
1449         printk(KERN_CONT "PASSED\n");
1450         return 0;
1451 }
1452 #else
1453 static inline int run_tracer_selftest(struct tracer *type)
1454 {
1455         return 0;
1456 }
1457 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1458
1459 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1460
1461 static void __init apply_trace_boot_options(void);
1462
1463 /**
1464  * register_tracer - register a tracer with the ftrace system.
1465  * @type - the plugin for the tracer
1466  *
1467  * Register a new plugin tracer.
1468  */
1469 int __init register_tracer(struct tracer *type)
1470 {
1471         struct tracer *t;
1472         int ret = 0;
1473
1474         if (!type->name) {
1475                 pr_info("Tracer must have a name\n");
1476                 return -1;
1477         }
1478
1479         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1480                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1481                 return -1;
1482         }
1483
1484         mutex_lock(&trace_types_lock);
1485
1486         tracing_selftest_running = true;
1487
1488         for (t = trace_types; t; t = t->next) {
1489                 if (strcmp(type->name, t->name) == 0) {
1490                         /* already found */
1491                         pr_info("Tracer %s already registered\n",
1492                                 type->name);
1493                         ret = -1;
1494                         goto out;
1495                 }
1496         }
1497
1498         if (!type->set_flag)
1499                 type->set_flag = &dummy_set_flag;
1500         if (!type->flags) {
1501                 /*allocate a dummy tracer_flags*/
1502                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1503                 if (!type->flags) {
1504                         ret = -ENOMEM;
1505                         goto out;
1506                 }
1507                 type->flags->val = 0;
1508                 type->flags->opts = dummy_tracer_opt;
1509         } else
1510                 if (!type->flags->opts)
1511                         type->flags->opts = dummy_tracer_opt;
1512
1513         /* store the tracer for __set_tracer_option */
1514         type->flags->trace = type;
1515
1516         ret = run_tracer_selftest(type);
1517         if (ret < 0)
1518                 goto out;
1519
1520         type->next = trace_types;
1521         trace_types = type;
1522         add_tracer_options(&global_trace, type);
1523
1524  out:
1525         tracing_selftest_running = false;
1526         mutex_unlock(&trace_types_lock);
1527
1528         if (ret || !default_bootup_tracer)
1529                 goto out_unlock;
1530
1531         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1532                 goto out_unlock;
1533
1534         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1535         /* Do we want this tracer to start on bootup? */
1536         tracing_set_tracer(&global_trace, type->name);
1537         default_bootup_tracer = NULL;
1538
1539         apply_trace_boot_options();
1540
1541         /* disable other selftests, since this will break it. */
1542         tracing_selftest_disabled = true;
1543 #ifdef CONFIG_FTRACE_STARTUP_TEST
1544         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1545                type->name);
1546 #endif
1547
1548  out_unlock:
1549         return ret;
1550 }
1551
1552 void tracing_reset(struct trace_buffer *buf, int cpu)
1553 {
1554         struct ring_buffer *buffer = buf->buffer;
1555
1556         if (!buffer)
1557                 return;
1558
1559         ring_buffer_record_disable(buffer);
1560
1561         /* Make sure all commits have finished */
1562         synchronize_sched();
1563         ring_buffer_reset_cpu(buffer, cpu);
1564
1565         ring_buffer_record_enable(buffer);
1566 }
1567
1568 void tracing_reset_online_cpus(struct trace_buffer *buf)
1569 {
1570         struct ring_buffer *buffer = buf->buffer;
1571         int cpu;
1572
1573         if (!buffer)
1574                 return;
1575
1576         ring_buffer_record_disable(buffer);
1577
1578         /* Make sure all commits have finished */
1579         synchronize_sched();
1580
1581         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1582
1583         for_each_online_cpu(cpu)
1584                 ring_buffer_reset_cpu(buffer, cpu);
1585
1586         ring_buffer_record_enable(buffer);
1587 }
1588
1589 /* Must have trace_types_lock held */
1590 void tracing_reset_all_online_cpus(void)
1591 {
1592         struct trace_array *tr;
1593
1594         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1595                 tracing_reset_online_cpus(&tr->trace_buffer);
1596 #ifdef CONFIG_TRACER_MAX_TRACE
1597                 tracing_reset_online_cpus(&tr->max_buffer);
1598 #endif
1599         }
1600 }
1601
1602 #define SAVED_CMDLINES_DEFAULT 128
1603 #define NO_CMDLINE_MAP UINT_MAX
1604 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1605 struct saved_cmdlines_buffer {
1606         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1607         unsigned *map_cmdline_to_pid;
1608         unsigned cmdline_num;
1609         int cmdline_idx;
1610         char *saved_cmdlines;
1611 };
1612 static struct saved_cmdlines_buffer *savedcmd;
1613
1614 /* temporary disable recording */
1615 static atomic_t trace_record_cmdline_disabled __read_mostly;
1616
1617 static inline char *get_saved_cmdlines(int idx)
1618 {
1619         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1620 }
1621
1622 static inline void set_cmdline(int idx, const char *cmdline)
1623 {
1624         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1625 }
1626
1627 static int allocate_cmdlines_buffer(unsigned int val,
1628                                     struct saved_cmdlines_buffer *s)
1629 {
1630         s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1631                                         GFP_KERNEL);
1632         if (!s->map_cmdline_to_pid)
1633                 return -ENOMEM;
1634
1635         s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1636         if (!s->saved_cmdlines) {
1637                 kfree(s->map_cmdline_to_pid);
1638                 return -ENOMEM;
1639         }
1640
1641         s->cmdline_idx = 0;
1642         s->cmdline_num = val;
1643         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1644                sizeof(s->map_pid_to_cmdline));
1645         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1646                val * sizeof(*s->map_cmdline_to_pid));
1647
1648         return 0;
1649 }
1650
1651 static int trace_create_savedcmd(void)
1652 {
1653         int ret;
1654
1655         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1656         if (!savedcmd)
1657                 return -ENOMEM;
1658
1659         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1660         if (ret < 0) {
1661                 kfree(savedcmd);
1662                 savedcmd = NULL;
1663                 return -ENOMEM;
1664         }
1665
1666         return 0;
1667 }
1668
1669 int is_tracing_stopped(void)
1670 {
1671         return global_trace.stop_count;
1672 }
1673
1674 /**
1675  * tracing_start - quick start of the tracer
1676  *
1677  * If tracing is enabled but was stopped by tracing_stop,
1678  * this will start the tracer back up.
1679  */
1680 void tracing_start(void)
1681 {
1682         struct ring_buffer *buffer;
1683         unsigned long flags;
1684
1685         if (tracing_disabled)
1686                 return;
1687
1688         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1689         if (--global_trace.stop_count) {
1690                 if (global_trace.stop_count < 0) {
1691                         /* Someone screwed up their debugging */
1692                         WARN_ON_ONCE(1);
1693                         global_trace.stop_count = 0;
1694                 }
1695                 goto out;
1696         }
1697
1698         /* Prevent the buffers from switching */
1699         arch_spin_lock(&global_trace.max_lock);
1700
1701         buffer = global_trace.trace_buffer.buffer;
1702         if (buffer)
1703                 ring_buffer_record_enable(buffer);
1704
1705 #ifdef CONFIG_TRACER_MAX_TRACE
1706         buffer = global_trace.max_buffer.buffer;
1707         if (buffer)
1708                 ring_buffer_record_enable(buffer);
1709 #endif
1710
1711         arch_spin_unlock(&global_trace.max_lock);
1712
1713  out:
1714         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1715 }
1716
1717 static void tracing_start_tr(struct trace_array *tr)
1718 {
1719         struct ring_buffer *buffer;
1720         unsigned long flags;
1721
1722         if (tracing_disabled)
1723                 return;
1724
1725         /* If global, we need to also start the max tracer */
1726         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1727                 return tracing_start();
1728
1729         raw_spin_lock_irqsave(&tr->start_lock, flags);
1730
1731         if (--tr->stop_count) {
1732                 if (tr->stop_count < 0) {
1733                         /* Someone screwed up their debugging */
1734                         WARN_ON_ONCE(1);
1735                         tr->stop_count = 0;
1736                 }
1737                 goto out;
1738         }
1739
1740         buffer = tr->trace_buffer.buffer;
1741         if (buffer)
1742                 ring_buffer_record_enable(buffer);
1743
1744  out:
1745         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1746 }
1747
1748 /**
1749  * tracing_stop - quick stop of the tracer
1750  *
1751  * Light weight way to stop tracing. Use in conjunction with
1752  * tracing_start.
1753  */
1754 void tracing_stop(void)
1755 {
1756         struct ring_buffer *buffer;
1757         unsigned long flags;
1758
1759         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1760         if (global_trace.stop_count++)
1761                 goto out;
1762
1763         /* Prevent the buffers from switching */
1764         arch_spin_lock(&global_trace.max_lock);
1765
1766         buffer = global_trace.trace_buffer.buffer;
1767         if (buffer)
1768                 ring_buffer_record_disable(buffer);
1769
1770 #ifdef CONFIG_TRACER_MAX_TRACE
1771         buffer = global_trace.max_buffer.buffer;
1772         if (buffer)
1773                 ring_buffer_record_disable(buffer);
1774 #endif
1775
1776         arch_spin_unlock(&global_trace.max_lock);
1777
1778  out:
1779         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1780 }
1781
1782 static void tracing_stop_tr(struct trace_array *tr)
1783 {
1784         struct ring_buffer *buffer;
1785         unsigned long flags;
1786
1787         /* If global, we need to also stop the max tracer */
1788         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1789                 return tracing_stop();
1790
1791         raw_spin_lock_irqsave(&tr->start_lock, flags);
1792         if (tr->stop_count++)
1793                 goto out;
1794
1795         buffer = tr->trace_buffer.buffer;
1796         if (buffer)
1797                 ring_buffer_record_disable(buffer);
1798
1799  out:
1800         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1801 }
1802
1803 void trace_stop_cmdline_recording(void);
1804
1805 static int trace_save_cmdline(struct task_struct *tsk)
1806 {
1807         unsigned pid, idx;
1808
1809         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1810                 return 0;
1811
1812         /*
1813          * It's not the end of the world if we don't get
1814          * the lock, but we also don't want to spin
1815          * nor do we want to disable interrupts,
1816          * so if we miss here, then better luck next time.
1817          */
1818         if (!arch_spin_trylock(&trace_cmdline_lock))
1819                 return 0;
1820
1821         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1822         if (idx == NO_CMDLINE_MAP) {
1823                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1824
1825                 /*
1826                  * Check whether the cmdline buffer at idx has a pid
1827                  * mapped. We are going to overwrite that entry so we
1828                  * need to clear the map_pid_to_cmdline. Otherwise we
1829                  * would read the new comm for the old pid.
1830                  */
1831                 pid = savedcmd->map_cmdline_to_pid[idx];
1832                 if (pid != NO_CMDLINE_MAP)
1833                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1834
1835                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1836                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1837
1838                 savedcmd->cmdline_idx = idx;
1839         }
1840
1841         set_cmdline(idx, tsk->comm);
1842
1843         arch_spin_unlock(&trace_cmdline_lock);
1844
1845         return 1;
1846 }
1847
1848 static void __trace_find_cmdline(int pid, char comm[])
1849 {
1850         unsigned map;
1851
1852         if (!pid) {
1853                 strcpy(comm, "<idle>");
1854                 return;
1855         }
1856
1857         if (WARN_ON_ONCE(pid < 0)) {
1858                 strcpy(comm, "<XXX>");
1859                 return;
1860         }
1861
1862         if (pid > PID_MAX_DEFAULT) {
1863                 strcpy(comm, "<...>");
1864                 return;
1865         }
1866
1867         map = savedcmd->map_pid_to_cmdline[pid];
1868         if (map != NO_CMDLINE_MAP)
1869                 strcpy(comm, get_saved_cmdlines(map));
1870         else
1871                 strcpy(comm, "<...>");
1872 }
1873
1874 void trace_find_cmdline(int pid, char comm[])
1875 {
1876         preempt_disable();
1877         arch_spin_lock(&trace_cmdline_lock);
1878
1879         __trace_find_cmdline(pid, comm);
1880
1881         arch_spin_unlock(&trace_cmdline_lock);
1882         preempt_enable();
1883 }
1884
1885 void tracing_record_cmdline(struct task_struct *tsk)
1886 {
1887         if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1888                 return;
1889
1890         if (!__this_cpu_read(trace_cmdline_save))
1891                 return;
1892
1893         if (trace_save_cmdline(tsk))
1894                 __this_cpu_write(trace_cmdline_save, false);
1895 }
1896
1897 void
1898 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1899                              int pc)
1900 {
1901         struct task_struct *tsk = current;
1902
1903         entry->preempt_count            = pc & 0xff;
1904         entry->pid                      = (tsk) ? tsk->pid : 0;
1905         entry->flags =
1906 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1907                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1908 #else
1909                 TRACE_FLAG_IRQS_NOSUPPORT |
1910 #endif
1911                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
1912                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1913                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1914                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1915                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1916 }
1917 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1918
1919 static __always_inline void
1920 trace_event_setup(struct ring_buffer_event *event,
1921                   int type, unsigned long flags, int pc)
1922 {
1923         struct trace_entry *ent = ring_buffer_event_data(event);
1924
1925         tracing_generic_entry_update(ent, flags, pc);
1926         ent->type = type;
1927 }
1928
1929 struct ring_buffer_event *
1930 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1931                           int type,
1932                           unsigned long len,
1933                           unsigned long flags, int pc)
1934 {
1935         struct ring_buffer_event *event;
1936
1937         event = ring_buffer_lock_reserve(buffer, len);
1938         if (event != NULL)
1939                 trace_event_setup(event, type, flags, pc);
1940
1941         return event;
1942 }
1943
1944 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
1945 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
1946 static int trace_buffered_event_ref;
1947
1948 /**
1949  * trace_buffered_event_enable - enable buffering events
1950  *
1951  * When events are being filtered, it is quicker to use a temporary
1952  * buffer to write the event data into if there's a likely chance
1953  * that it will not be committed. The discard of the ring buffer
1954  * is not as fast as committing, and is much slower than copying
1955  * a commit.
1956  *
1957  * When an event is to be filtered, allocate per cpu buffers to
1958  * write the event data into, and if the event is filtered and discarded
1959  * it is simply dropped, otherwise, the entire data is to be committed
1960  * in one shot.
1961  */
1962 void trace_buffered_event_enable(void)
1963 {
1964         struct ring_buffer_event *event;
1965         struct page *page;
1966         int cpu;
1967
1968         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
1969
1970         if (trace_buffered_event_ref++)
1971                 return;
1972
1973         for_each_tracing_cpu(cpu) {
1974                 page = alloc_pages_node(cpu_to_node(cpu),
1975                                         GFP_KERNEL | __GFP_NORETRY, 0);
1976                 if (!page)
1977                         goto failed;
1978
1979                 event = page_address(page);
1980                 memset(event, 0, sizeof(*event));
1981
1982                 per_cpu(trace_buffered_event, cpu) = event;
1983
1984                 preempt_disable();
1985                 if (cpu == smp_processor_id() &&
1986                     this_cpu_read(trace_buffered_event) !=
1987                     per_cpu(trace_buffered_event, cpu))
1988                         WARN_ON_ONCE(1);
1989                 preempt_enable();
1990         }
1991
1992         return;
1993  failed:
1994         trace_buffered_event_disable();
1995 }
1996
1997 static void enable_trace_buffered_event(void *data)
1998 {
1999         /* Probably not needed, but do it anyway */
2000         smp_rmb();
2001         this_cpu_dec(trace_buffered_event_cnt);
2002 }
2003
2004 static void disable_trace_buffered_event(void *data)
2005 {
2006         this_cpu_inc(trace_buffered_event_cnt);
2007 }
2008
2009 /**
2010  * trace_buffered_event_disable - disable buffering events
2011  *
2012  * When a filter is removed, it is faster to not use the buffered
2013  * events, and to commit directly into the ring buffer. Free up
2014  * the temp buffers when there are no more users. This requires
2015  * special synchronization with current events.
2016  */
2017 void trace_buffered_event_disable(void)
2018 {
2019         int cpu;
2020
2021         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2022
2023         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2024                 return;
2025
2026         if (--trace_buffered_event_ref)
2027                 return;
2028
2029         preempt_disable();
2030         /* For each CPU, set the buffer as used. */
2031         smp_call_function_many(tracing_buffer_mask,
2032                                disable_trace_buffered_event, NULL, 1);
2033         preempt_enable();
2034
2035         /* Wait for all current users to finish */
2036         synchronize_sched();
2037
2038         for_each_tracing_cpu(cpu) {
2039                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2040                 per_cpu(trace_buffered_event, cpu) = NULL;
2041         }
2042         /*
2043          * Make sure trace_buffered_event is NULL before clearing
2044          * trace_buffered_event_cnt.
2045          */
2046         smp_wmb();
2047
2048         preempt_disable();
2049         /* Do the work on each cpu */
2050         smp_call_function_many(tracing_buffer_mask,
2051                                enable_trace_buffered_event, NULL, 1);
2052         preempt_enable();
2053 }
2054
2055 void
2056 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
2057 {
2058         __this_cpu_write(trace_cmdline_save, true);
2059
2060         /* If this is the temp buffer, we need to commit fully */
2061         if (this_cpu_read(trace_buffered_event) == event) {
2062                 /* Length is in event->array[0] */
2063                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
2064                 /* Release the temp buffer */
2065                 this_cpu_dec(trace_buffered_event_cnt);
2066         } else
2067                 ring_buffer_unlock_commit(buffer, event);
2068 }
2069
2070 static struct ring_buffer *temp_buffer;
2071
2072 struct ring_buffer_event *
2073 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2074                           struct trace_event_file *trace_file,
2075                           int type, unsigned long len,
2076                           unsigned long flags, int pc)
2077 {
2078         struct ring_buffer_event *entry;
2079         int val;
2080
2081         *current_rb = trace_file->tr->trace_buffer.buffer;
2082
2083         if ((trace_file->flags &
2084              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2085             (entry = this_cpu_read(trace_buffered_event))) {
2086                 /* Try to use the per cpu buffer first */
2087                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2088                 if (val == 1) {
2089                         trace_event_setup(entry, type, flags, pc);
2090                         entry->array[0] = len;
2091                         return entry;
2092                 }
2093                 this_cpu_dec(trace_buffered_event_cnt);
2094         }
2095
2096         entry = trace_buffer_lock_reserve(*current_rb,
2097                                          type, len, flags, pc);
2098         /*
2099          * If tracing is off, but we have triggers enabled
2100          * we still need to look at the event data. Use the temp_buffer
2101          * to store the trace event for the tigger to use. It's recusive
2102          * safe and will not be recorded anywhere.
2103          */
2104         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2105                 *current_rb = temp_buffer;
2106                 entry = trace_buffer_lock_reserve(*current_rb,
2107                                                   type, len, flags, pc);
2108         }
2109         return entry;
2110 }
2111 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2112
2113 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2114                                      struct ring_buffer *buffer,
2115                                      struct ring_buffer_event *event,
2116                                      unsigned long flags, int pc,
2117                                      struct pt_regs *regs)
2118 {
2119         __buffer_unlock_commit(buffer, event);
2120
2121         ftrace_trace_stack(tr, buffer, flags, 0, pc, regs);
2122         ftrace_trace_userstack(buffer, flags, pc);
2123 }
2124
2125 void
2126 trace_function(struct trace_array *tr,
2127                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2128                int pc)
2129 {
2130         struct trace_event_call *call = &event_function;
2131         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2132         struct ring_buffer_event *event;
2133         struct ftrace_entry *entry;
2134
2135         event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2136                                           flags, pc);
2137         if (!event)
2138                 return;
2139         entry   = ring_buffer_event_data(event);
2140         entry->ip                       = ip;
2141         entry->parent_ip                = parent_ip;
2142
2143         if (!call_filter_check_discard(call, entry, buffer, event))
2144                 __buffer_unlock_commit(buffer, event);
2145 }
2146
2147 #ifdef CONFIG_STACKTRACE
2148
2149 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2150 struct ftrace_stack {
2151         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
2152 };
2153
2154 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2155 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2156
2157 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2158                                  unsigned long flags,
2159                                  int skip, int pc, struct pt_regs *regs)
2160 {
2161         struct trace_event_call *call = &event_kernel_stack;
2162         struct ring_buffer_event *event;
2163         struct stack_entry *entry;
2164         struct stack_trace trace;
2165         int use_stack;
2166         int size = FTRACE_STACK_ENTRIES;
2167
2168         trace.nr_entries        = 0;
2169         trace.skip              = skip;
2170
2171         /*
2172          * Since events can happen in NMIs there's no safe way to
2173          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2174          * or NMI comes in, it will just have to use the default
2175          * FTRACE_STACK_SIZE.
2176          */
2177         preempt_disable_notrace();
2178
2179         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2180         /*
2181          * We don't need any atomic variables, just a barrier.
2182          * If an interrupt comes in, we don't care, because it would
2183          * have exited and put the counter back to what we want.
2184          * We just need a barrier to keep gcc from moving things
2185          * around.
2186          */
2187         barrier();
2188         if (use_stack == 1) {
2189                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
2190                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
2191
2192                 if (regs)
2193                         save_stack_trace_regs(regs, &trace);
2194                 else
2195                         save_stack_trace(&trace);
2196
2197                 if (trace.nr_entries > size)
2198                         size = trace.nr_entries;
2199         } else
2200                 /* From now on, use_stack is a boolean */
2201                 use_stack = 0;
2202
2203         size *= sizeof(unsigned long);
2204
2205         event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
2206                                           sizeof(*entry) + size, flags, pc);
2207         if (!event)
2208                 goto out;
2209         entry = ring_buffer_event_data(event);
2210
2211         memset(&entry->caller, 0, size);
2212
2213         if (use_stack)
2214                 memcpy(&entry->caller, trace.entries,
2215                        trace.nr_entries * sizeof(unsigned long));
2216         else {
2217                 trace.max_entries       = FTRACE_STACK_ENTRIES;
2218                 trace.entries           = entry->caller;
2219                 if (regs)
2220                         save_stack_trace_regs(regs, &trace);
2221                 else
2222                         save_stack_trace(&trace);
2223         }
2224
2225         entry->size = trace.nr_entries;
2226
2227         if (!call_filter_check_discard(call, entry, buffer, event))
2228                 __buffer_unlock_commit(buffer, event);
2229
2230  out:
2231         /* Again, don't let gcc optimize things here */
2232         barrier();
2233         __this_cpu_dec(ftrace_stack_reserve);
2234         preempt_enable_notrace();
2235
2236 }
2237
2238 static inline void ftrace_trace_stack(struct trace_array *tr,
2239                                       struct ring_buffer *buffer,
2240                                       unsigned long flags,
2241                                       int skip, int pc, struct pt_regs *regs)
2242 {
2243         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2244                 return;
2245
2246         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2247 }
2248
2249 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2250                    int pc)
2251 {
2252         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
2253 }
2254
2255 /**
2256  * trace_dump_stack - record a stack back trace in the trace buffer
2257  * @skip: Number of functions to skip (helper handlers)
2258  */
2259 void trace_dump_stack(int skip)
2260 {
2261         unsigned long flags;
2262
2263         if (tracing_disabled || tracing_selftest_running)
2264                 return;
2265
2266         local_save_flags(flags);
2267
2268         /*
2269          * Skip 3 more, seems to get us at the caller of
2270          * this function.
2271          */
2272         skip += 3;
2273         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2274                              flags, skip, preempt_count(), NULL);
2275 }
2276
2277 static DEFINE_PER_CPU(int, user_stack_count);
2278
2279 void
2280 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2281 {
2282         struct trace_event_call *call = &event_user_stack;
2283         struct ring_buffer_event *event;
2284         struct userstack_entry *entry;
2285         struct stack_trace trace;
2286
2287         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2288                 return;
2289
2290         /*
2291          * NMIs can not handle page faults, even with fix ups.
2292          * The save user stack can (and often does) fault.
2293          */
2294         if (unlikely(in_nmi()))
2295                 return;
2296
2297         /*
2298          * prevent recursion, since the user stack tracing may
2299          * trigger other kernel events.
2300          */
2301         preempt_disable();
2302         if (__this_cpu_read(user_stack_count))
2303                 goto out;
2304
2305         __this_cpu_inc(user_stack_count);
2306
2307         event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2308                                           sizeof(*entry), flags, pc);
2309         if (!event)
2310                 goto out_drop_count;
2311         entry   = ring_buffer_event_data(event);
2312
2313         entry->tgid             = current->tgid;
2314         memset(&entry->caller, 0, sizeof(entry->caller));
2315
2316         trace.nr_entries        = 0;
2317         trace.max_entries       = FTRACE_STACK_ENTRIES;
2318         trace.skip              = 0;
2319         trace.entries           = entry->caller;
2320
2321         save_stack_trace_user(&trace);
2322         if (!call_filter_check_discard(call, entry, buffer, event))
2323                 __buffer_unlock_commit(buffer, event);
2324
2325  out_drop_count:
2326         __this_cpu_dec(user_stack_count);
2327  out:
2328         preempt_enable();
2329 }
2330
2331 #ifdef UNUSED
2332 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2333 {
2334         ftrace_trace_userstack(tr, flags, preempt_count());
2335 }
2336 #endif /* UNUSED */
2337
2338 #endif /* CONFIG_STACKTRACE */
2339
2340 /* created for use with alloc_percpu */
2341 struct trace_buffer_struct {
2342         char buffer[TRACE_BUF_SIZE];
2343 };
2344
2345 static struct trace_buffer_struct *trace_percpu_buffer;
2346 static struct trace_buffer_struct *trace_percpu_sirq_buffer;
2347 static struct trace_buffer_struct *trace_percpu_irq_buffer;
2348 static struct trace_buffer_struct *trace_percpu_nmi_buffer;
2349
2350 /*
2351  * The buffer used is dependent on the context. There is a per cpu
2352  * buffer for normal context, softirq contex, hard irq context and
2353  * for NMI context. Thise allows for lockless recording.
2354  *
2355  * Note, if the buffers failed to be allocated, then this returns NULL
2356  */
2357 static char *get_trace_buf(void)
2358 {
2359         struct trace_buffer_struct *percpu_buffer;
2360
2361         /*
2362          * If we have allocated per cpu buffers, then we do not
2363          * need to do any locking.
2364          */
2365         if (in_nmi())
2366                 percpu_buffer = trace_percpu_nmi_buffer;
2367         else if (in_irq())
2368                 percpu_buffer = trace_percpu_irq_buffer;
2369         else if (in_softirq())
2370                 percpu_buffer = trace_percpu_sirq_buffer;
2371         else
2372                 percpu_buffer = trace_percpu_buffer;
2373
2374         if (!percpu_buffer)
2375                 return NULL;
2376
2377         return this_cpu_ptr(&percpu_buffer->buffer[0]);
2378 }
2379
2380 static int alloc_percpu_trace_buffer(void)
2381 {
2382         struct trace_buffer_struct *buffers;
2383         struct trace_buffer_struct *sirq_buffers;
2384         struct trace_buffer_struct *irq_buffers;
2385         struct trace_buffer_struct *nmi_buffers;
2386
2387         buffers = alloc_percpu(struct trace_buffer_struct);
2388         if (!buffers)
2389                 goto err_warn;
2390
2391         sirq_buffers = alloc_percpu(struct trace_buffer_struct);
2392         if (!sirq_buffers)
2393                 goto err_sirq;
2394
2395         irq_buffers = alloc_percpu(struct trace_buffer_struct);
2396         if (!irq_buffers)
2397                 goto err_irq;
2398
2399         nmi_buffers = alloc_percpu(struct trace_buffer_struct);
2400         if (!nmi_buffers)
2401                 goto err_nmi;
2402
2403         trace_percpu_buffer = buffers;
2404         trace_percpu_sirq_buffer = sirq_buffers;
2405         trace_percpu_irq_buffer = irq_buffers;
2406         trace_percpu_nmi_buffer = nmi_buffers;
2407
2408         return 0;
2409
2410  err_nmi:
2411         free_percpu(irq_buffers);
2412  err_irq:
2413         free_percpu(sirq_buffers);
2414  err_sirq:
2415         free_percpu(buffers);
2416  err_warn:
2417         WARN(1, "Could not allocate percpu trace_printk buffer");
2418         return -ENOMEM;
2419 }
2420
2421 static int buffers_allocated;
2422
2423 void trace_printk_init_buffers(void)
2424 {
2425         if (buffers_allocated)
2426                 return;
2427
2428         if (alloc_percpu_trace_buffer())
2429                 return;
2430
2431         /* trace_printk() is for debug use only. Don't use it in production. */
2432
2433         pr_warn("\n");
2434         pr_warn("**********************************************************\n");
2435         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2436         pr_warn("**                                                      **\n");
2437         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2438         pr_warn("**                                                      **\n");
2439         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2440         pr_warn("** unsafe for production use.                           **\n");
2441         pr_warn("**                                                      **\n");
2442         pr_warn("** If you see this message and you are not debugging    **\n");
2443         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2444         pr_warn("**                                                      **\n");
2445         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2446         pr_warn("**********************************************************\n");
2447
2448         /* Expand the buffers to set size */
2449         tracing_update_buffers();
2450
2451         buffers_allocated = 1;
2452
2453         /*
2454          * trace_printk_init_buffers() can be called by modules.
2455          * If that happens, then we need to start cmdline recording
2456          * directly here. If the global_trace.buffer is already
2457          * allocated here, then this was called by module code.
2458          */
2459         if (global_trace.trace_buffer.buffer)
2460                 tracing_start_cmdline_record();
2461 }
2462
2463 void trace_printk_start_comm(void)
2464 {
2465         /* Start tracing comms if trace printk is set */
2466         if (!buffers_allocated)
2467                 return;
2468         tracing_start_cmdline_record();
2469 }
2470
2471 static void trace_printk_start_stop_comm(int enabled)
2472 {
2473         if (!buffers_allocated)
2474                 return;
2475
2476         if (enabled)
2477                 tracing_start_cmdline_record();
2478         else
2479                 tracing_stop_cmdline_record();
2480 }
2481
2482 /**
2483  * trace_vbprintk - write binary msg to tracing buffer
2484  *
2485  */
2486 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2487 {
2488         struct trace_event_call *call = &event_bprint;
2489         struct ring_buffer_event *event;
2490         struct ring_buffer *buffer;
2491         struct trace_array *tr = &global_trace;
2492         struct bprint_entry *entry;
2493         unsigned long flags;
2494         char *tbuffer;
2495         int len = 0, size, pc;
2496
2497         if (unlikely(tracing_selftest_running || tracing_disabled))
2498                 return 0;
2499
2500         /* Don't pollute graph traces with trace_vprintk internals */
2501         pause_graph_tracing();
2502
2503         pc = preempt_count();
2504         preempt_disable_notrace();
2505
2506         tbuffer = get_trace_buf();
2507         if (!tbuffer) {
2508                 len = 0;
2509                 goto out;
2510         }
2511
2512         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2513
2514         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2515                 goto out;
2516
2517         local_save_flags(flags);
2518         size = sizeof(*entry) + sizeof(u32) * len;
2519         buffer = tr->trace_buffer.buffer;
2520         event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2521                                           flags, pc);
2522         if (!event)
2523                 goto out;
2524         entry = ring_buffer_event_data(event);
2525         entry->ip                       = ip;
2526         entry->fmt                      = fmt;
2527
2528         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2529         if (!call_filter_check_discard(call, entry, buffer, event)) {
2530                 __buffer_unlock_commit(buffer, event);
2531                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2532         }
2533
2534 out:
2535         preempt_enable_notrace();
2536         unpause_graph_tracing();
2537
2538         return len;
2539 }
2540 EXPORT_SYMBOL_GPL(trace_vbprintk);
2541
2542 static int
2543 __trace_array_vprintk(struct ring_buffer *buffer,
2544                       unsigned long ip, const char *fmt, va_list args)
2545 {
2546         struct trace_event_call *call = &event_print;
2547         struct ring_buffer_event *event;
2548         int len = 0, size, pc;
2549         struct print_entry *entry;
2550         unsigned long flags;
2551         char *tbuffer;
2552
2553         if (tracing_disabled || tracing_selftest_running)
2554                 return 0;
2555
2556         /* Don't pollute graph traces with trace_vprintk internals */
2557         pause_graph_tracing();
2558
2559         pc = preempt_count();
2560         preempt_disable_notrace();
2561
2562
2563         tbuffer = get_trace_buf();
2564         if (!tbuffer) {
2565                 len = 0;
2566                 goto out;
2567         }
2568
2569         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2570
2571         local_save_flags(flags);
2572         size = sizeof(*entry) + len + 1;
2573         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2574                                           flags, pc);
2575         if (!event)
2576                 goto out;
2577         entry = ring_buffer_event_data(event);
2578         entry->ip = ip;
2579
2580         memcpy(&entry->buf, tbuffer, len + 1);
2581         if (!call_filter_check_discard(call, entry, buffer, event)) {
2582                 __buffer_unlock_commit(buffer, event);
2583                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2584         }
2585  out:
2586         preempt_enable_notrace();
2587         unpause_graph_tracing();
2588
2589         return len;
2590 }
2591
2592 int trace_array_vprintk(struct trace_array *tr,
2593                         unsigned long ip, const char *fmt, va_list args)
2594 {
2595         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2596 }
2597
2598 int trace_array_printk(struct trace_array *tr,
2599                        unsigned long ip, const char *fmt, ...)
2600 {
2601         int ret;
2602         va_list ap;
2603
2604         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2605                 return 0;
2606
2607         va_start(ap, fmt);
2608         ret = trace_array_vprintk(tr, ip, fmt, ap);
2609         va_end(ap);
2610         return ret;
2611 }
2612
2613 int trace_array_printk_buf(struct ring_buffer *buffer,
2614                            unsigned long ip, const char *fmt, ...)
2615 {
2616         int ret;
2617         va_list ap;
2618
2619         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2620                 return 0;
2621
2622         va_start(ap, fmt);
2623         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2624         va_end(ap);
2625         return ret;
2626 }
2627
2628 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2629 {
2630         return trace_array_vprintk(&global_trace, ip, fmt, args);
2631 }
2632 EXPORT_SYMBOL_GPL(trace_vprintk);
2633
2634 static void trace_iterator_increment(struct trace_iterator *iter)
2635 {
2636         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2637
2638         iter->idx++;
2639         if (buf_iter)
2640                 ring_buffer_read(buf_iter, NULL);
2641 }
2642
2643 static struct trace_entry *
2644 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2645                 unsigned long *lost_events)
2646 {
2647         struct ring_buffer_event *event;
2648         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2649
2650         if (buf_iter)
2651                 event = ring_buffer_iter_peek(buf_iter, ts);
2652         else
2653                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2654                                          lost_events);
2655
2656         if (event) {
2657                 iter->ent_size = ring_buffer_event_length(event);
2658                 return ring_buffer_event_data(event);
2659         }
2660         iter->ent_size = 0;
2661         return NULL;
2662 }
2663
2664 static struct trace_entry *
2665 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2666                   unsigned long *missing_events, u64 *ent_ts)
2667 {
2668         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2669         struct trace_entry *ent, *next = NULL;
2670         unsigned long lost_events = 0, next_lost = 0;
2671         int cpu_file = iter->cpu_file;
2672         u64 next_ts = 0, ts;
2673         int next_cpu = -1;
2674         int next_size = 0;
2675         int cpu;
2676
2677         /*
2678          * If we are in a per_cpu trace file, don't bother by iterating over
2679          * all cpu and peek directly.
2680          */
2681         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2682                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2683                         return NULL;
2684                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2685                 if (ent_cpu)
2686                         *ent_cpu = cpu_file;
2687
2688                 return ent;
2689         }
2690
2691         for_each_tracing_cpu(cpu) {
2692
2693                 if (ring_buffer_empty_cpu(buffer, cpu))
2694                         continue;
2695
2696                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2697
2698                 /*
2699                  * Pick the entry with the smallest timestamp:
2700                  */
2701                 if (ent && (!next || ts < next_ts)) {
2702                         next = ent;
2703                         next_cpu = cpu;
2704                         next_ts = ts;
2705                         next_lost = lost_events;
2706                         next_size = iter->ent_size;
2707                 }
2708         }
2709
2710         iter->ent_size = next_size;
2711
2712         if (ent_cpu)
2713                 *ent_cpu = next_cpu;
2714
2715         if (ent_ts)
2716                 *ent_ts = next_ts;
2717
2718         if (missing_events)
2719                 *missing_events = next_lost;
2720
2721         return next;
2722 }
2723
2724 /* Find the next real entry, without updating the iterator itself */
2725 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2726                                           int *ent_cpu, u64 *ent_ts)
2727 {
2728         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2729 }
2730
2731 /* Find the next real entry, and increment the iterator to the next entry */
2732 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2733 {
2734         iter->ent = __find_next_entry(iter, &iter->cpu,
2735                                       &iter->lost_events, &iter->ts);
2736
2737         if (iter->ent)
2738                 trace_iterator_increment(iter);
2739
2740         return iter->ent ? iter : NULL;
2741 }
2742
2743 static void trace_consume(struct trace_iterator *iter)
2744 {
2745         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2746                             &iter->lost_events);
2747 }
2748
2749 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2750 {
2751         struct trace_iterator *iter = m->private;
2752         int i = (int)*pos;
2753         void *ent;
2754
2755         WARN_ON_ONCE(iter->leftover);
2756
2757         (*pos)++;
2758
2759         /* can't go backwards */
2760         if (iter->idx > i)
2761                 return NULL;
2762
2763         if (iter->idx < 0)
2764                 ent = trace_find_next_entry_inc(iter);
2765         else
2766                 ent = iter;
2767
2768         while (ent && iter->idx < i)
2769                 ent = trace_find_next_entry_inc(iter);
2770
2771         iter->pos = *pos;
2772
2773         return ent;
2774 }
2775
2776 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2777 {
2778         struct ring_buffer_event *event;
2779         struct ring_buffer_iter *buf_iter;
2780         unsigned long entries = 0;
2781         u64 ts;
2782
2783         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2784
2785         buf_iter = trace_buffer_iter(iter, cpu);
2786         if (!buf_iter)
2787                 return;
2788
2789         ring_buffer_iter_reset(buf_iter);
2790
2791         /*
2792          * We could have the case with the max latency tracers
2793          * that a reset never took place on a cpu. This is evident
2794          * by the timestamp being before the start of the buffer.
2795          */
2796         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2797                 if (ts >= iter->trace_buffer->time_start)
2798                         break;
2799                 entries++;
2800                 ring_buffer_read(buf_iter, NULL);
2801         }
2802
2803         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2804 }
2805
2806 /*
2807  * The current tracer is copied to avoid a global locking
2808  * all around.
2809  */
2810 static void *s_start(struct seq_file *m, loff_t *pos)
2811 {
2812         struct trace_iterator *iter = m->private;
2813         struct trace_array *tr = iter->tr;
2814         int cpu_file = iter->cpu_file;
2815         void *p = NULL;
2816         loff_t l = 0;
2817         int cpu;
2818
2819         /*
2820          * copy the tracer to avoid using a global lock all around.
2821          * iter->trace is a copy of current_trace, the pointer to the
2822          * name may be used instead of a strcmp(), as iter->trace->name
2823          * will point to the same string as current_trace->name.
2824          */
2825         mutex_lock(&trace_types_lock);
2826         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2827                 *iter->trace = *tr->current_trace;
2828         mutex_unlock(&trace_types_lock);
2829
2830 #ifdef CONFIG_TRACER_MAX_TRACE
2831         if (iter->snapshot && iter->trace->use_max_tr)
2832                 return ERR_PTR(-EBUSY);
2833 #endif
2834
2835         if (!iter->snapshot)
2836                 atomic_inc(&trace_record_cmdline_disabled);
2837
2838         if (*pos != iter->pos) {
2839                 iter->ent = NULL;
2840                 iter->cpu = 0;
2841                 iter->idx = -1;
2842
2843                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2844                         for_each_tracing_cpu(cpu)
2845                                 tracing_iter_reset(iter, cpu);
2846                 } else
2847                         tracing_iter_reset(iter, cpu_file);
2848
2849                 iter->leftover = 0;
2850                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2851                         ;
2852
2853         } else {
2854                 /*
2855                  * If we overflowed the seq_file before, then we want
2856                  * to just reuse the trace_seq buffer again.
2857                  */
2858                 if (iter->leftover)
2859                         p = iter;
2860                 else {
2861                         l = *pos - 1;
2862                         p = s_next(m, p, &l);
2863                 }
2864         }
2865
2866         trace_event_read_lock();
2867         trace_access_lock(cpu_file);
2868         return p;
2869 }
2870
2871 static void s_stop(struct seq_file *m, void *p)
2872 {
2873         struct trace_iterator *iter = m->private;
2874
2875 #ifdef CONFIG_TRACER_MAX_TRACE
2876         if (iter->snapshot && iter->trace->use_max_tr)
2877                 return;
2878 #endif
2879
2880         if (!iter->snapshot)
2881                 atomic_dec(&trace_record_cmdline_disabled);
2882
2883         trace_access_unlock(iter->cpu_file);
2884         trace_event_read_unlock();
2885 }
2886
2887 static void
2888 get_total_entries(struct trace_buffer *buf,
2889                   unsigned long *total, unsigned long *entries)
2890 {
2891         unsigned long count;
2892         int cpu;
2893
2894         *total = 0;
2895         *entries = 0;
2896
2897         for_each_tracing_cpu(cpu) {
2898                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2899                 /*
2900                  * If this buffer has skipped entries, then we hold all
2901                  * entries for the trace and we need to ignore the
2902                  * ones before the time stamp.
2903                  */
2904                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2905                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2906                         /* total is the same as the entries */
2907                         *total += count;
2908                 } else
2909                         *total += count +
2910                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
2911                 *entries += count;
2912         }
2913 }
2914
2915 static void print_lat_help_header(struct seq_file *m)
2916 {
2917         seq_puts(m, "#                  _------=> CPU#            \n"
2918                     "#                 / _-----=> irqs-off        \n"
2919                     "#                | / _----=> need-resched    \n"
2920                     "#                || / _---=> hardirq/softirq \n"
2921                     "#                ||| / _--=> preempt-depth   \n"
2922                     "#                |||| /     delay            \n"
2923                     "#  cmd     pid   ||||| time  |   caller      \n"
2924                     "#     \\   /      |||||  \\    |   /         \n");
2925 }
2926
2927 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2928 {
2929         unsigned long total;
2930         unsigned long entries;
2931
2932         get_total_entries(buf, &total, &entries);
2933         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2934                    entries, total, num_online_cpus());
2935         seq_puts(m, "#\n");
2936 }
2937
2938 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2939 {
2940         print_event_info(buf, m);
2941         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n"
2942                     "#              | |       |          |         |\n");
2943 }
2944
2945 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2946 {
2947         print_event_info(buf, m);
2948         seq_puts(m, "#                              _-----=> irqs-off\n"
2949                     "#                             / _----=> need-resched\n"
2950                     "#                            | / _---=> hardirq/softirq\n"
2951                     "#                            || / _--=> preempt-depth\n"
2952                     "#                            ||| /     delay\n"
2953                     "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n"
2954                     "#              | |       |   ||||       |         |\n");
2955 }
2956
2957 void
2958 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2959 {
2960         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
2961         struct trace_buffer *buf = iter->trace_buffer;
2962         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2963         struct tracer *type = iter->trace;
2964         unsigned long entries;
2965         unsigned long total;
2966         const char *name = "preemption";
2967
2968         name = type->name;
2969
2970         get_total_entries(buf, &total, &entries);
2971
2972         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2973                    name, UTS_RELEASE);
2974         seq_puts(m, "# -----------------------------------"
2975                  "---------------------------------\n");
2976         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2977                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2978                    nsecs_to_usecs(data->saved_latency),
2979                    entries,
2980                    total,
2981                    buf->cpu,
2982 #if defined(CONFIG_PREEMPT_NONE)
2983                    "server",
2984 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2985                    "desktop",
2986 #elif defined(CONFIG_PREEMPT)
2987                    "preempt",
2988 #else
2989                    "unknown",
2990 #endif
2991                    /* These are reserved for later use */
2992                    0, 0, 0, 0);
2993 #ifdef CONFIG_SMP
2994         seq_printf(m, " #P:%d)\n", num_online_cpus());
2995 #else
2996         seq_puts(m, ")\n");
2997 #endif
2998         seq_puts(m, "#    -----------------\n");
2999         seq_printf(m, "#    | task: %.16s-%d "
3000                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3001                    data->comm, data->pid,
3002                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3003                    data->policy, data->rt_priority);
3004         seq_puts(m, "#    -----------------\n");
3005
3006         if (data->critical_start) {
3007                 seq_puts(m, "#  => started at: ");
3008                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3009                 trace_print_seq(m, &iter->seq);
3010                 seq_puts(m, "\n#  => ended at:   ");
3011                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3012                 trace_print_seq(m, &iter->seq);
3013                 seq_puts(m, "\n#\n");
3014         }
3015
3016         seq_puts(m, "#\n");
3017 }
3018
3019 static void test_cpu_buff_start(struct trace_iterator *iter)
3020 {
3021         struct trace_seq *s = &iter->seq;
3022         struct trace_array *tr = iter->tr;
3023
3024         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3025                 return;
3026
3027         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3028                 return;
3029
3030         if (iter->started && cpumask_test_cpu(iter->cpu, iter->started))
3031                 return;
3032
3033         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3034                 return;
3035
3036         if (iter->started)
3037                 cpumask_set_cpu(iter->cpu, iter->started);
3038
3039         /* Don't print started cpu buffer for the first entry of the trace */
3040         if (iter->idx > 1)
3041                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3042                                 iter->cpu);
3043 }
3044
3045 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3046 {
3047         struct trace_array *tr = iter->tr;
3048         struct trace_seq *s = &iter->seq;
3049         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3050         struct trace_entry *entry;
3051         struct trace_event *event;
3052
3053         entry = iter->ent;
3054
3055         test_cpu_buff_start(iter);
3056
3057         event = ftrace_find_event(entry->type);
3058
3059         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3060                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3061                         trace_print_lat_context(iter);
3062                 else
3063                         trace_print_context(iter);
3064         }
3065
3066         if (trace_seq_has_overflowed(s))
3067                 return TRACE_TYPE_PARTIAL_LINE;
3068
3069         if (event)
3070                 return event->funcs->trace(iter, sym_flags, event);
3071
3072         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3073
3074         return trace_handle_return(s);
3075 }
3076
3077 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3078 {
3079         struct trace_array *tr = iter->tr;
3080         struct trace_seq *s = &iter->seq;
3081         struct trace_entry *entry;
3082         struct trace_event *event;
3083
3084         entry = iter->ent;
3085
3086         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3087                 trace_seq_printf(s, "%d %d %llu ",
3088                                  entry->pid, iter->cpu, iter->ts);
3089
3090         if (trace_seq_has_overflowed(s))
3091                 return TRACE_TYPE_PARTIAL_LINE;
3092
3093         event = ftrace_find_event(entry->type);
3094         if (event)
3095                 return event->funcs->raw(iter, 0, event);
3096
3097         trace_seq_printf(s, "%d ?\n", entry->type);
3098
3099         return trace_handle_return(s);
3100 }
3101
3102 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3103 {
3104         struct trace_array *tr = iter->tr;
3105         struct trace_seq *s = &iter->seq;
3106         unsigned char newline = '\n';
3107         struct trace_entry *entry;
3108         struct trace_event *event;
3109
3110         entry = iter->ent;
3111
3112         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3113                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3114                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3115                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3116                 if (trace_seq_has_overflowed(s))
3117                         return TRACE_TYPE_PARTIAL_LINE;
3118         }
3119
3120         event = ftrace_find_event(entry->type);
3121         if (event) {
3122                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3123                 if (ret != TRACE_TYPE_HANDLED)
3124                         return ret;
3125         }
3126
3127         SEQ_PUT_FIELD(s, newline);
3128
3129         return trace_handle_return(s);
3130 }
3131
3132 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3133 {
3134         struct trace_array *tr = iter->tr;
3135         struct trace_seq *s = &iter->seq;
3136         struct trace_entry *entry;
3137         struct trace_event *event;
3138
3139         entry = iter->ent;
3140
3141         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3142                 SEQ_PUT_FIELD(s, entry->pid);
3143                 SEQ_PUT_FIELD(s, iter->cpu);
3144                 SEQ_PUT_FIELD(s, iter->ts);
3145                 if (trace_seq_has_overflowed(s))
3146                         return TRACE_TYPE_PARTIAL_LINE;
3147         }
3148
3149         event = ftrace_find_event(entry->type);
3150         return event ? event->funcs->binary(iter, 0, event) :
3151                 TRACE_TYPE_HANDLED;
3152 }
3153
3154 int trace_empty(struct trace_iterator *iter)
3155 {
3156         struct ring_buffer_iter *buf_iter;
3157         int cpu;
3158
3159         /* If we are looking at one CPU buffer, only check that one */
3160         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3161                 cpu = iter->cpu_file;
3162                 buf_iter = trace_buffer_iter(iter, cpu);
3163                 if (buf_iter) {
3164                         if (!ring_buffer_iter_empty(buf_iter))
3165                                 return 0;
3166                 } else {
3167                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3168                                 return 0;
3169                 }
3170                 return 1;
3171         }
3172
3173         for_each_tracing_cpu(cpu) {
3174                 buf_iter = trace_buffer_iter(iter, cpu);
3175                 if (buf_iter) {
3176                         if (!ring_buffer_iter_empty(buf_iter))
3177                                 return 0;
3178                 } else {
3179                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3180                                 return 0;
3181                 }
3182         }
3183
3184         return 1;
3185 }
3186
3187 /*  Called with trace_event_read_lock() held. */
3188 enum print_line_t print_trace_line(struct trace_iterator *iter)
3189 {
3190         struct trace_array *tr = iter->tr;
3191         unsigned long trace_flags = tr->trace_flags;
3192         enum print_line_t ret;
3193
3194         if (iter->lost_events) {
3195                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3196                                  iter->cpu, iter->lost_events);
3197                 if (trace_seq_has_overflowed(&iter->seq))
3198                         return TRACE_TYPE_PARTIAL_LINE;
3199         }
3200
3201         if (iter->trace && iter->trace->print_line) {
3202                 ret = iter->trace->print_line(iter);
3203                 if (ret != TRACE_TYPE_UNHANDLED)
3204                         return ret;
3205         }
3206
3207         if (iter->ent->type == TRACE_BPUTS &&
3208                         trace_flags & TRACE_ITER_PRINTK &&
3209                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3210                 return trace_print_bputs_msg_only(iter);
3211
3212         if (iter->ent->type == TRACE_BPRINT &&
3213                         trace_flags & TRACE_ITER_PRINTK &&
3214                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3215                 return trace_print_bprintk_msg_only(iter);
3216
3217         if (iter->ent->type == TRACE_PRINT &&
3218                         trace_flags & TRACE_ITER_PRINTK &&
3219                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3220                 return trace_print_printk_msg_only(iter);
3221
3222         if (trace_flags & TRACE_ITER_BIN)
3223                 return print_bin_fmt(iter);
3224
3225         if (trace_flags & TRACE_ITER_HEX)
3226                 return print_hex_fmt(iter);
3227
3228         if (trace_flags & TRACE_ITER_RAW)
3229                 return print_raw_fmt(iter);
3230
3231         return print_trace_fmt(iter);
3232 }
3233
3234 void trace_latency_header(struct seq_file *m)
3235 {
3236         struct trace_iterator *iter = m->private;
3237         struct trace_array *tr = iter->tr;
3238
3239         /* print nothing if the buffers are empty */
3240         if (trace_empty(iter))
3241                 return;
3242
3243         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3244                 print_trace_header(m, iter);
3245
3246         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3247                 print_lat_help_header(m);
3248 }
3249
3250 void trace_default_header(struct seq_file *m)
3251 {
3252         struct trace_iterator *iter = m->private;
3253         struct trace_array *tr = iter->tr;
3254         unsigned long trace_flags = tr->trace_flags;
3255
3256         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3257                 return;
3258
3259         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3260                 /* print nothing if the buffers are empty */
3261                 if (trace_empty(iter))
3262                         return;
3263                 print_trace_header(m, iter);
3264                 if (!(trace_flags & TRACE_ITER_VERBOSE))
3265                         print_lat_help_header(m);
3266         } else {
3267                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3268                         if (trace_flags & TRACE_ITER_IRQ_INFO)
3269                                 print_func_help_header_irq(iter->trace_buffer, m);
3270                         else
3271                                 print_func_help_header(iter->trace_buffer, m);
3272                 }
3273         }
3274 }
3275
3276 static void test_ftrace_alive(struct seq_file *m)
3277 {
3278         if (!ftrace_is_dead())
3279                 return;
3280         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3281                     "#          MAY BE MISSING FUNCTION EVENTS\n");
3282 }
3283
3284 #ifdef CONFIG_TRACER_MAX_TRACE
3285 static void show_snapshot_main_help(struct seq_file *m)
3286 {
3287         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3288                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3289                     "#                      Takes a snapshot of the main buffer.\n"
3290                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3291                     "#                      (Doesn't have to be '2' works with any number that\n"
3292                     "#                       is not a '0' or '1')\n");
3293 }
3294
3295 static void show_snapshot_percpu_help(struct seq_file *m)
3296 {
3297         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3298 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3299         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3300                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
3301 #else
3302         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3303                     "#                     Must use main snapshot file to allocate.\n");
3304 #endif
3305         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3306                     "#                      (Doesn't have to be '2' works with any number that\n"
3307                     "#                       is not a '0' or '1')\n");
3308 }
3309
3310 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3311 {
3312         if (iter->tr->allocated_snapshot)
3313                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3314         else
3315                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3316
3317         seq_puts(m, "# Snapshot commands:\n");
3318         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3319                 show_snapshot_main_help(m);
3320         else
3321                 show_snapshot_percpu_help(m);
3322 }
3323 #else
3324 /* Should never be called */
3325 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3326 #endif
3327
3328 static int s_show(struct seq_file *m, void *v)
3329 {
3330         struct trace_iterator *iter = v;
3331         int ret;
3332
3333         if (iter->ent == NULL) {
3334                 if (iter->tr) {
3335                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
3336                         seq_puts(m, "#\n");
3337                         test_ftrace_alive(m);
3338                 }
3339                 if (iter->snapshot && trace_empty(iter))
3340                         print_snapshot_help(m, iter);
3341                 else if (iter->trace && iter->trace->print_header)
3342                         iter->trace->print_header(m);
3343                 else
3344                         trace_default_header(m);
3345
3346         } else if (iter->leftover) {
3347                 /*
3348                  * If we filled the seq_file buffer earlier, we
3349                  * want to just show it now.
3350                  */
3351                 ret = trace_print_seq(m, &iter->seq);
3352
3353                 /* ret should this time be zero, but you never know */
3354                 iter->leftover = ret;
3355
3356         } else {
3357                 print_trace_line(iter);
3358                 ret = trace_print_seq(m, &iter->seq);
3359                 /*
3360                  * If we overflow the seq_file buffer, then it will
3361                  * ask us for this data again at start up.
3362                  * Use that instead.
3363                  *  ret is 0 if seq_file write succeeded.
3364                  *        -1 otherwise.
3365                  */
3366                 iter->leftover = ret;
3367         }
3368
3369         return 0;
3370 }
3371
3372 /*
3373  * Should be used after trace_array_get(), trace_types_lock
3374  * ensures that i_cdev was already initialized.
3375  */
3376 static inline int tracing_get_cpu(struct inode *inode)
3377 {
3378         if (inode->i_cdev) /* See trace_create_cpu_file() */
3379                 return (long)inode->i_cdev - 1;
3380         return RING_BUFFER_ALL_CPUS;
3381 }
3382
3383 static const struct seq_operations tracer_seq_ops = {
3384         .start          = s_start,
3385         .next           = s_next,
3386         .stop           = s_stop,
3387         .show           = s_show,
3388 };
3389
3390 static struct trace_iterator *
3391 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3392 {
3393         struct trace_array *tr = inode->i_private;
3394         struct trace_iterator *iter;
3395         int cpu;
3396
3397         if (tracing_disabled)
3398                 return ERR_PTR(-ENODEV);
3399
3400         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3401         if (!iter)
3402                 return ERR_PTR(-ENOMEM);
3403
3404         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3405                                     GFP_KERNEL);
3406         if (!iter->buffer_iter)
3407                 goto release;
3408
3409         /*
3410          * We make a copy of the current tracer to avoid concurrent
3411          * changes on it while we are reading.
3412          */
3413         mutex_lock(&trace_types_lock);
3414         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3415         if (!iter->trace)
3416                 goto fail;
3417
3418         *iter->trace = *tr->current_trace;
3419
3420         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3421                 goto fail;
3422
3423         iter->tr = tr;
3424
3425 #ifdef CONFIG_TRACER_MAX_TRACE
3426         /* Currently only the top directory has a snapshot */
3427         if (tr->current_trace->print_max || snapshot)
3428                 iter->trace_buffer = &tr->max_buffer;
3429         else
3430 #endif
3431                 iter->trace_buffer = &tr->trace_buffer;
3432         iter->snapshot = snapshot;
3433         iter->pos = -1;
3434         iter->cpu_file = tracing_get_cpu(inode);
3435         mutex_init(&iter->mutex);
3436
3437         /* Notify the tracer early; before we stop tracing. */
3438         if (iter->trace && iter->trace->open)
3439                 iter->trace->open(iter);
3440
3441         /* Annotate start of buffers if we had overruns */
3442         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3443                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3444
3445         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3446         if (trace_clocks[tr->clock_id].in_ns)
3447                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3448
3449         /* stop the trace while dumping if we are not opening "snapshot" */
3450         if (!iter->snapshot)
3451                 tracing_stop_tr(tr);
3452
3453         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3454                 for_each_tracing_cpu(cpu) {
3455                         iter->buffer_iter[cpu] =
3456                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3457                 }
3458                 ring_buffer_read_prepare_sync();
3459                 for_each_tracing_cpu(cpu) {
3460                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3461                         tracing_iter_reset(iter, cpu);
3462                 }
3463         } else {
3464                 cpu = iter->cpu_file;
3465                 iter->buffer_iter[cpu] =
3466                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3467                 ring_buffer_read_prepare_sync();
3468                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3469                 tracing_iter_reset(iter, cpu);
3470         }
3471
3472         mutex_unlock(&trace_types_lock);
3473
3474         return iter;
3475
3476  fail:
3477         mutex_unlock(&trace_types_lock);
3478         kfree(iter->trace);
3479         kfree(iter->buffer_iter);
3480 release:
3481         seq_release_private(inode, file);
3482         return ERR_PTR(-ENOMEM);
3483 }
3484
3485 int tracing_open_generic(struct inode *inode, struct file *filp)
3486 {
3487         if (tracing_disabled)
3488                 return -ENODEV;
3489
3490         filp->private_data = inode->i_private;
3491         return 0;
3492 }
3493
3494 bool tracing_is_disabled(void)
3495 {
3496         return (tracing_disabled) ? true: false;
3497 }
3498
3499 /*
3500  * Open and update trace_array ref count.
3501  * Must have the current trace_array passed to it.
3502  */
3503 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3504 {
3505         struct trace_array *tr = inode->i_private;
3506
3507         if (tracing_disabled)
3508                 return -ENODEV;
3509
3510         if (trace_array_get(tr) < 0)
3511                 return -ENODEV;
3512
3513         filp->private_data = inode->i_private;
3514
3515         return 0;
3516 }
3517
3518 static int tracing_release(struct inode *inode, struct file *file)
3519 {
3520         struct trace_array *tr = inode->i_private;
3521         struct seq_file *m = file->private_data;
3522         struct trace_iterator *iter;
3523         int cpu;
3524
3525         if (!(file->f_mode & FMODE_READ)) {
3526                 trace_array_put(tr);
3527                 return 0;
3528         }
3529
3530         /* Writes do not use seq_file */
3531         iter = m->private;
3532         mutex_lock(&trace_types_lock);
3533
3534         for_each_tracing_cpu(cpu) {
3535                 if (iter->buffer_iter[cpu])
3536                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3537         }
3538
3539         if (iter->trace && iter->trace->close)
3540                 iter->trace->close(iter);
3541
3542         if (!iter->snapshot)
3543                 /* reenable tracing if it was previously enabled */
3544                 tracing_start_tr(tr);
3545
3546         __trace_array_put(tr);
3547
3548         mutex_unlock(&trace_types_lock);
3549
3550         mutex_destroy(&iter->mutex);
3551         free_cpumask_var(iter->started);
3552         kfree(iter->trace);
3553         kfree(iter->buffer_iter);
3554         seq_release_private(inode, file);
3555
3556         return 0;
3557 }
3558
3559 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3560 {
3561         struct trace_array *tr = inode->i_private;
3562
3563         trace_array_put(tr);
3564         return 0;
3565 }
3566
3567 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3568 {
3569         struct trace_array *tr = inode->i_private;
3570
3571         trace_array_put(tr);
3572
3573         return single_release(inode, file);
3574 }
3575
3576 static int tracing_open(struct inode *inode, struct file *file)
3577 {
3578         struct trace_array *tr = inode->i_private;
3579         struct trace_iterator *iter;
3580         int ret = 0;
3581
3582         if (trace_array_get(tr) < 0)
3583                 return -ENODEV;
3584
3585         /* If this file was open for write, then erase contents */
3586         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3587                 int cpu = tracing_get_cpu(inode);
3588
3589                 if (cpu == RING_BUFFER_ALL_CPUS)
3590                         tracing_reset_online_cpus(&tr->trace_buffer);
3591                 else
3592                         tracing_reset(&tr->trace_buffer, cpu);
3593         }
3594
3595         if (file->f_mode & FMODE_READ) {
3596                 iter = __tracing_open(inode, file, false);
3597                 if (IS_ERR(iter))
3598                         ret = PTR_ERR(iter);
3599                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3600                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3601         }
3602
3603         if (ret < 0)
3604                 trace_array_put(tr);
3605
3606         return ret;
3607 }
3608
3609 /*
3610  * Some tracers are not suitable for instance buffers.
3611  * A tracer is always available for the global array (toplevel)
3612  * or if it explicitly states that it is.
3613  */
3614 static bool
3615 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3616 {
3617         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3618 }
3619
3620 /* Find the next tracer that this trace array may use */
3621 static struct tracer *
3622 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3623 {
3624         while (t && !trace_ok_for_array(t, tr))
3625                 t = t->next;
3626
3627         return t;
3628 }
3629
3630 static void *
3631 t_next(struct seq_file *m, void *v, loff_t *pos)
3632 {
3633         struct trace_array *tr = m->private;
3634         struct tracer *t = v;
3635
3636         (*pos)++;
3637
3638         if (t)
3639                 t = get_tracer_for_array(tr, t->next);
3640
3641         return t;
3642 }
3643
3644 static void *t_start(struct seq_file *m, loff_t *pos)
3645 {
3646         struct trace_array *tr = m->private;
3647         struct tracer *t;
3648         loff_t l = 0;
3649
3650         mutex_lock(&trace_types_lock);
3651
3652         t = get_tracer_for_array(tr, trace_types);
3653         for (; t && l < *pos; t = t_next(m, t, &l))
3654                         ;
3655
3656         return t;
3657 }
3658
3659 static void t_stop(struct seq_file *m, void *p)
3660 {
3661         mutex_unlock(&trace_types_lock);
3662 }
3663
3664 static int t_show(struct seq_file *m, void *v)
3665 {
3666         struct tracer *t = v;
3667
3668         if (!t)
3669                 return 0;
3670
3671         seq_puts(m, t->name);
3672         if (t->next)
3673                 seq_putc(m, ' ');
3674         else
3675                 seq_putc(m, '\n');
3676
3677         return 0;
3678 }
3679
3680 static const struct seq_operations show_traces_seq_ops = {
3681         .start          = t_start,
3682         .next           = t_next,
3683         .stop           = t_stop,
3684         .show           = t_show,
3685 };
3686
3687 static int show_traces_open(struct inode *inode, struct file *file)
3688 {
3689         struct trace_array *tr = inode->i_private;
3690         struct seq_file *m;
3691         int ret;
3692
3693         if (tracing_disabled)
3694                 return -ENODEV;
3695
3696         ret = seq_open(file, &show_traces_seq_ops);
3697         if (ret)
3698                 return ret;
3699
3700         m = file->private_data;
3701         m->private = tr;
3702
3703         return 0;
3704 }
3705
3706 static ssize_t
3707 tracing_write_stub(struct file *filp, const char __user *ubuf,
3708                    size_t count, loff_t *ppos)
3709 {
3710         return count;
3711 }
3712
3713 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3714 {
3715         int ret;
3716
3717         if (file->f_mode & FMODE_READ)
3718                 ret = seq_lseek(file, offset, whence);
3719         else
3720                 file->f_pos = ret = 0;
3721
3722         return ret;
3723 }
3724
3725 static const struct file_operations tracing_fops = {
3726         .open           = tracing_open,
3727         .read           = seq_read,
3728         .write          = tracing_write_stub,
3729         .llseek         = tracing_lseek,
3730         .release        = tracing_release,
3731 };
3732
3733 static const struct file_operations show_traces_fops = {
3734         .open           = show_traces_open,
3735         .read           = seq_read,
3736         .release        = seq_release,
3737         .llseek         = seq_lseek,
3738 };
3739
3740 /*
3741  * The tracer itself will not take this lock, but still we want
3742  * to provide a consistent cpumask to user-space:
3743  */
3744 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3745
3746 /*
3747  * Temporary storage for the character representation of the
3748  * CPU bitmask (and one more byte for the newline):
3749  */
3750 static char mask_str[NR_CPUS + 1];
3751
3752 static ssize_t
3753 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3754                      size_t count, loff_t *ppos)
3755 {
3756         struct trace_array *tr = file_inode(filp)->i_private;
3757         int len;
3758
3759         mutex_lock(&tracing_cpumask_update_lock);
3760
3761         len = snprintf(mask_str, count, "%*pb\n",
3762                        cpumask_pr_args(tr->tracing_cpumask));
3763         if (len >= count) {
3764                 count = -EINVAL;
3765                 goto out_err;
3766         }
3767         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3768
3769 out_err:
3770         mutex_unlock(&tracing_cpumask_update_lock);
3771
3772         return count;
3773 }
3774
3775 static ssize_t
3776 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3777                       size_t count, loff_t *ppos)
3778 {
3779         struct trace_array *tr = file_inode(filp)->i_private;
3780         cpumask_var_t tracing_cpumask_new;
3781         int err, cpu;
3782
3783         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3784                 return -ENOMEM;
3785
3786         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3787         if (err)
3788                 goto err_unlock;
3789
3790         mutex_lock(&tracing_cpumask_update_lock);
3791
3792         local_irq_disable();
3793         arch_spin_lock(&tr->max_lock);
3794         for_each_tracing_cpu(cpu) {
3795                 /*
3796                  * Increase/decrease the disabled counter if we are
3797                  * about to flip a bit in the cpumask:
3798                  */
3799                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3800                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3801                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3802                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3803                 }
3804                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3805                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3806                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3807                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3808                 }
3809         }
3810         arch_spin_unlock(&tr->max_lock);
3811         local_irq_enable();
3812
3813         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3814
3815         mutex_unlock(&tracing_cpumask_update_lock);
3816         free_cpumask_var(tracing_cpumask_new);
3817
3818         return count;
3819
3820 err_unlock:
3821         free_cpumask_var(tracing_cpumask_new);
3822
3823         return err;
3824 }
3825
3826 static const struct file_operations tracing_cpumask_fops = {
3827         .open           = tracing_open_generic_tr,
3828         .read           = tracing_cpumask_read,
3829         .write          = tracing_cpumask_write,
3830         .release        = tracing_release_generic_tr,
3831         .llseek         = generic_file_llseek,
3832 };
3833
3834 static int tracing_trace_options_show(struct seq_file *m, void *v)
3835 {
3836         struct tracer_opt *trace_opts;
3837         struct trace_array *tr = m->private;
3838         u32 tracer_flags;
3839         int i;
3840
3841         mutex_lock(&trace_types_lock);
3842         tracer_flags = tr->current_trace->flags->val;
3843         trace_opts = tr->current_trace->flags->opts;
3844
3845         for (i = 0; trace_options[i]; i++) {
3846                 if (tr->trace_flags & (1 << i))
3847                         seq_printf(m, "%s\n", trace_options[i]);
3848                 else
3849                         seq_printf(m, "no%s\n", trace_options[i]);
3850         }
3851
3852         for (i = 0; trace_opts[i].name; i++) {
3853                 if (tracer_flags & trace_opts[i].bit)
3854                         seq_printf(m, "%s\n", trace_opts[i].name);
3855                 else
3856                         seq_printf(m, "no%s\n", trace_opts[i].name);
3857         }
3858         mutex_unlock(&trace_types_lock);
3859
3860         return 0;
3861 }
3862
3863 static int __set_tracer_option(struct trace_array *tr,
3864                                struct tracer_flags *tracer_flags,
3865                                struct tracer_opt *opts, int neg)
3866 {
3867         struct tracer *trace = tracer_flags->trace;
3868         int ret;
3869
3870         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3871         if (ret)
3872                 return ret;
3873
3874         if (neg)
3875                 tracer_flags->val &= ~opts->bit;
3876         else
3877                 tracer_flags->val |= opts->bit;
3878         return 0;
3879 }
3880
3881 /* Try to assign a tracer specific option */
3882 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3883 {
3884         struct tracer *trace = tr->current_trace;
3885         struct tracer_flags *tracer_flags = trace->flags;
3886         struct tracer_opt *opts = NULL;
3887         int i;
3888
3889         for (i = 0; tracer_flags->opts[i].name; i++) {
3890                 opts = &tracer_flags->opts[i];
3891
3892                 if (strcmp(cmp, opts->name) == 0)
3893                         return __set_tracer_option(tr, trace->flags, opts, neg);
3894         }
3895
3896         return -EINVAL;
3897 }
3898
3899 /* Some tracers require overwrite to stay enabled */
3900 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3901 {
3902         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3903                 return -1;
3904
3905         return 0;
3906 }
3907
3908 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3909 {
3910         /* do nothing if flag is already set */
3911         if (!!(tr->trace_flags & mask) == !!enabled)
3912                 return 0;
3913
3914         /* Give the tracer a chance to approve the change */
3915         if (tr->current_trace->flag_changed)
3916                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3917                         return -EINVAL;
3918
3919         if (enabled)
3920                 tr->trace_flags |= mask;
3921         else
3922                 tr->trace_flags &= ~mask;
3923
3924         if (mask == TRACE_ITER_RECORD_CMD)
3925                 trace_event_enable_cmd_record(enabled);
3926
3927         if (mask == TRACE_ITER_EVENT_FORK)
3928                 trace_event_follow_fork(tr, enabled);
3929
3930         if (mask == TRACE_ITER_OVERWRITE) {
3931                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3932 #ifdef CONFIG_TRACER_MAX_TRACE
3933                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3934 #endif
3935         }
3936
3937         if (mask == TRACE_ITER_PRINTK) {
3938                 trace_printk_start_stop_comm(enabled);
3939                 trace_printk_control(enabled);
3940         }
3941
3942         return 0;
3943 }
3944
3945 static int trace_set_options(struct trace_array *tr, char *option)
3946 {
3947         char *cmp;
3948         int neg = 0;
3949         int ret = -ENODEV;
3950         int i;
3951         size_t orig_len = strlen(option);
3952
3953         cmp = strstrip(option);
3954
3955         if (strncmp(cmp, "no", 2) == 0) {
3956                 neg = 1;
3957                 cmp += 2;
3958         }
3959
3960         mutex_lock(&trace_types_lock);
3961
3962         for (i = 0; trace_options[i]; i++) {
3963                 if (strcmp(cmp, trace_options[i]) == 0) {
3964                         ret = set_tracer_flag(tr, 1 << i, !neg);
3965                         break;
3966                 }
3967         }
3968
3969         /* If no option could be set, test the specific tracer options */
3970         if (!trace_options[i])
3971                 ret = set_tracer_option(tr, cmp, neg);
3972
3973         mutex_unlock(&trace_types_lock);
3974
3975         /*
3976          * If the first trailing whitespace is replaced with '\0' by strstrip,
3977          * turn it back into a space.
3978          */
3979         if (orig_len > strlen(option))
3980                 option[strlen(option)] = ' ';
3981
3982         return ret;
3983 }
3984
3985 static void __init apply_trace_boot_options(void)
3986 {
3987         char *buf = trace_boot_options_buf;
3988         char *option;
3989
3990         while (true) {
3991                 option = strsep(&buf, ",");
3992
3993                 if (!option)
3994                         break;
3995
3996                 if (*option)
3997                         trace_set_options(&global_trace, option);
3998
3999                 /* Put back the comma to allow this to be called again */
4000                 if (buf)
4001                         *(buf - 1) = ',';
4002         }
4003 }
4004
4005 static ssize_t
4006 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4007                         size_t cnt, loff_t *ppos)
4008 {
4009         struct seq_file *m = filp->private_data;
4010         struct trace_array *tr = m->private;
4011         char buf[64];
4012         int ret;
4013
4014         if (cnt >= sizeof(buf))
4015                 return -EINVAL;
4016
4017         if (copy_from_user(buf, ubuf, cnt))
4018                 return -EFAULT;
4019
4020         buf[cnt] = 0;
4021
4022         ret = trace_set_options(tr, buf);
4023         if (ret < 0)
4024                 return ret;
4025
4026         *ppos += cnt;
4027
4028         return cnt;
4029 }
4030
4031 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4032 {
4033         struct trace_array *tr = inode->i_private;
4034         int ret;
4035
4036         if (tracing_disabled)
4037                 return -ENODEV;
4038
4039         if (trace_array_get(tr) < 0)
4040                 return -ENODEV;
4041
4042         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4043         if (ret < 0)
4044                 trace_array_put(tr);
4045
4046         return ret;
4047 }
4048
4049 static const struct file_operations tracing_iter_fops = {
4050         .open           = tracing_trace_options_open,
4051         .read           = seq_read,
4052         .llseek         = seq_lseek,
4053         .release        = tracing_single_release_tr,
4054         .write          = tracing_trace_options_write,
4055 };
4056
4057 static const char readme_msg[] =
4058         "tracing mini-HOWTO:\n\n"
4059         "# echo 0 > tracing_on : quick way to disable tracing\n"
4060         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4061         " Important files:\n"
4062         "  trace\t\t\t- The static contents of the buffer\n"
4063         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4064         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4065         "  current_tracer\t- function and latency tracers\n"
4066         "  available_tracers\t- list of configured tracers for current_tracer\n"
4067         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4068         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4069         "  trace_clock\t\t-change the clock used to order events\n"
4070         "       local:   Per cpu clock but may not be synced across CPUs\n"
4071         "      global:   Synced across CPUs but slows tracing down.\n"
4072         "     counter:   Not a clock, but just an increment\n"
4073         "      uptime:   Jiffy counter from time of boot\n"
4074         "        perf:   Same clock that perf events use\n"
4075 #ifdef CONFIG_X86_64
4076         "     x86-tsc:   TSC cycle counter\n"
4077 #endif
4078         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4079         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4080         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4081         "\t\t\t  Remove sub-buffer with rmdir\n"
4082         "  trace_options\t\t- Set format or modify how tracing happens\n"
4083         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4084         "\t\t\t  option name\n"
4085         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4086 #ifdef CONFIG_DYNAMIC_FTRACE
4087         "\n  available_filter_functions - list of functions that can be filtered on\n"
4088         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4089         "\t\t\t  functions\n"
4090         "\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4091         "\t     modules: Can select a group via module\n"
4092         "\t      Format: :mod:<module-name>\n"
4093         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4094         "\t    triggers: a command to perform when function is hit\n"
4095         "\t      Format: <function>:<trigger>[:count]\n"
4096         "\t     trigger: traceon, traceoff\n"
4097         "\t\t      enable_event:<system>:<event>\n"
4098         "\t\t      disable_event:<system>:<event>\n"
4099 #ifdef CONFIG_STACKTRACE
4100         "\t\t      stacktrace\n"
4101 #endif
4102 #ifdef CONFIG_TRACER_SNAPSHOT
4103         "\t\t      snapshot\n"
4104 #endif
4105         "\t\t      dump\n"
4106         "\t\t      cpudump\n"
4107         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4108         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4109         "\t     The first one will disable tracing every time do_fault is hit\n"
4110         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4111         "\t       The first time do trap is hit and it disables tracing, the\n"
4112         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4113         "\t       the counter will not decrement. It only decrements when the\n"
4114         "\t       trigger did work\n"
4115         "\t     To remove trigger without count:\n"
4116         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4117         "\t     To remove trigger with a count:\n"
4118         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4119         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4120         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4121         "\t    modules: Can select a group via module command :mod:\n"
4122         "\t    Does not accept triggers\n"
4123 #endif /* CONFIG_DYNAMIC_FTRACE */
4124 #ifdef CONFIG_FUNCTION_TRACER
4125         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4126         "\t\t    (function)\n"
4127 #endif
4128 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4129         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4130         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4131         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4132 #endif
4133 #ifdef CONFIG_TRACER_SNAPSHOT
4134         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4135         "\t\t\t  snapshot buffer. Read the contents for more\n"
4136         "\t\t\t  information\n"
4137 #endif
4138 #ifdef CONFIG_STACK_TRACER
4139         "  stack_trace\t\t- Shows the max stack trace when active\n"
4140         "  stack_max_size\t- Shows current max stack size that was traced\n"
4141         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4142         "\t\t\t  new trace)\n"
4143 #ifdef CONFIG_DYNAMIC_FTRACE
4144         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4145         "\t\t\t  traces\n"
4146 #endif
4147 #endif /* CONFIG_STACK_TRACER */
4148         "  events/\t\t- Directory containing all trace event subsystems:\n"
4149         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4150         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4151         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4152         "\t\t\t  events\n"
4153         "      filter\t\t- If set, only events passing filter are traced\n"
4154         "  events/<system>/<event>/\t- Directory containing control files for\n"
4155         "\t\t\t  <event>:\n"
4156         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4157         "      filter\t\t- If set, only events passing filter are traced\n"
4158         "      trigger\t\t- If set, a command to perform when event is hit\n"
4159         "\t    Format: <trigger>[:count][if <filter>]\n"
4160         "\t   trigger: traceon, traceoff\n"
4161         "\t            enable_event:<system>:<event>\n"
4162         "\t            disable_event:<system>:<event>\n"
4163 #ifdef CONFIG_HIST_TRIGGERS
4164         "\t            enable_hist:<system>:<event>\n"
4165         "\t            disable_hist:<system>:<event>\n"
4166 #endif
4167 #ifdef CONFIG_STACKTRACE
4168         "\t\t    stacktrace\n"
4169 #endif
4170 #ifdef CONFIG_TRACER_SNAPSHOT
4171         "\t\t    snapshot\n"
4172 #endif
4173 #ifdef CONFIG_HIST_TRIGGERS
4174         "\t\t    hist (see below)\n"
4175 #endif
4176         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4177         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4178         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4179         "\t                  events/block/block_unplug/trigger\n"
4180         "\t   The first disables tracing every time block_unplug is hit.\n"
4181         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4182         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4183         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4184         "\t   Like function triggers, the counter is only decremented if it\n"
4185         "\t    enabled or disabled tracing.\n"
4186         "\t   To remove a trigger without a count:\n"
4187         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
4188         "\t   To remove a trigger with a count:\n"
4189         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4190         "\t   Filters can be ignored when removing a trigger.\n"
4191 #ifdef CONFIG_HIST_TRIGGERS
4192         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4193         "\t    Format: hist:keys=<field1[,field2,...]>\n"
4194         "\t            [:values=<field1[,field2,...]>]\n"
4195         "\t            [:sort=<field1[,field2,...]>]\n"
4196         "\t            [:size=#entries]\n"
4197         "\t            [:pause][:continue][:clear]\n"
4198         "\t            [:name=histname1]\n"
4199         "\t            [if <filter>]\n\n"
4200         "\t    When a matching event is hit, an entry is added to a hash\n"
4201         "\t    table using the key(s) and value(s) named, and the value of a\n"
4202         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
4203         "\t    correspond to fields in the event's format description.  Keys\n"
4204         "\t    can be any field, or the special string 'stacktrace'.\n"
4205         "\t    Compound keys consisting of up to two fields can be specified\n"
4206         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4207         "\t    fields.  Sort keys consisting of up to two fields can be\n"
4208         "\t    specified using the 'sort' keyword.  The sort direction can\n"
4209         "\t    be modified by appending '.descending' or '.ascending' to a\n"
4210         "\t    sort field.  The 'size' parameter can be used to specify more\n"
4211         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
4212         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
4213         "\t    its histogram data will be shared with other triggers of the\n"
4214         "\t    same name, and trigger hits will update this common data.\n\n"
4215         "\t    Reading the 'hist' file for the event will dump the hash\n"
4216         "\t    table in its entirety to stdout.  If there are multiple hist\n"
4217         "\t    triggers attached to an event, there will be a table for each\n"
4218         "\t    trigger in the output.  The table displayed for a named\n"
4219         "\t    trigger will be the same as any other instance having the\n"
4220         "\t    same name.  The default format used to display a given field\n"
4221         "\t    can be modified by appending any of the following modifiers\n"
4222         "\t    to the field name, as applicable:\n\n"
4223         "\t            .hex        display a number as a hex value\n"
4224         "\t            .sym        display an address as a symbol\n"
4225         "\t            .sym-offset display an address as a symbol and offset\n"
4226         "\t            .execname   display a common_pid as a program name\n"
4227         "\t            .syscall    display a syscall id as a syscall name\n\n"
4228         "\t            .log2       display log2 value rather than raw number\n\n"
4229         "\t    The 'pause' parameter can be used to pause an existing hist\n"
4230         "\t    trigger or to start a hist trigger but not log any events\n"
4231         "\t    until told to do so.  'continue' can be used to start or\n"
4232         "\t    restart a paused hist trigger.\n\n"
4233         "\t    The 'clear' parameter will clear the contents of a running\n"
4234         "\t    hist trigger and leave its current paused/active state\n"
4235         "\t    unchanged.\n\n"
4236         "\t    The enable_hist and disable_hist triggers can be used to\n"
4237         "\t    have one event conditionally start and stop another event's\n"
4238         "\t    already-attached hist trigger.  The syntax is analagous to\n"
4239         "\t    the enable_event and disable_event triggers.\n"
4240 #endif
4241 ;
4242
4243 static ssize_t
4244 tracing_readme_read(struct file *filp, char __user *ubuf,
4245                        size_t cnt, loff_t *ppos)
4246 {
4247         return simple_read_from_buffer(ubuf, cnt, ppos,
4248                                         readme_msg, strlen(readme_msg));
4249 }
4250
4251 static const struct file_operations tracing_readme_fops = {
4252         .open           = tracing_open_generic,
4253         .read           = tracing_readme_read,
4254         .llseek         = generic_file_llseek,
4255 };
4256
4257 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4258 {
4259         unsigned int *ptr = v;
4260
4261         if (*pos || m->count)
4262                 ptr++;
4263
4264         (*pos)++;
4265
4266         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4267              ptr++) {
4268                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4269                         continue;
4270
4271                 return ptr;
4272         }
4273
4274         return NULL;
4275 }
4276
4277 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4278 {
4279         void *v;
4280         loff_t l = 0;
4281
4282         preempt_disable();
4283         arch_spin_lock(&trace_cmdline_lock);
4284
4285         v = &savedcmd->map_cmdline_to_pid[0];
4286         while (l <= *pos) {
4287                 v = saved_cmdlines_next(m, v, &l);
4288                 if (!v)
4289                         return NULL;
4290         }
4291
4292         return v;
4293 }
4294
4295 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4296 {
4297         arch_spin_unlock(&trace_cmdline_lock);
4298         preempt_enable();
4299 }
4300
4301 static int saved_cmdlines_show(struct seq_file *m, void *v)
4302 {
4303         char buf[TASK_COMM_LEN];
4304         unsigned int *pid = v;
4305
4306         __trace_find_cmdline(*pid, buf);
4307         seq_printf(m, "%d %s\n", *pid, buf);
4308         return 0;
4309 }
4310
4311 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4312         .start          = saved_cmdlines_start,
4313         .next           = saved_cmdlines_next,
4314         .stop           = saved_cmdlines_stop,
4315         .show           = saved_cmdlines_show,
4316 };
4317
4318 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4319 {
4320         if (tracing_disabled)
4321                 return -ENODEV;
4322
4323         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4324 }
4325
4326 static const struct file_operations tracing_saved_cmdlines_fops = {
4327         .open           = tracing_saved_cmdlines_open,
4328         .read           = seq_read,
4329         .llseek         = seq_lseek,
4330         .release        = seq_release,
4331 };
4332
4333 static ssize_t
4334 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4335                                  size_t cnt, loff_t *ppos)
4336 {
4337         char buf[64];
4338         int r;
4339
4340         arch_spin_lock(&trace_cmdline_lock);
4341         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4342         arch_spin_unlock(&trace_cmdline_lock);
4343
4344         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4345 }
4346
4347 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4348 {
4349         kfree(s->saved_cmdlines);
4350         kfree(s->map_cmdline_to_pid);
4351         kfree(s);
4352 }
4353
4354 static int tracing_resize_saved_cmdlines(unsigned int val)
4355 {
4356         struct saved_cmdlines_buffer *s, *savedcmd_temp;
4357
4358         s = kmalloc(sizeof(*s), GFP_KERNEL);
4359         if (!s)
4360                 return -ENOMEM;
4361
4362         if (allocate_cmdlines_buffer(val, s) < 0) {
4363                 kfree(s);
4364                 return -ENOMEM;
4365         }
4366
4367         arch_spin_lock(&trace_cmdline_lock);
4368         savedcmd_temp = savedcmd;
4369         savedcmd = s;
4370         arch_spin_unlock(&trace_cmdline_lock);
4371         free_saved_cmdlines_buffer(savedcmd_temp);
4372
4373         return 0;
4374 }
4375
4376 static ssize_t
4377 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4378                                   size_t cnt, loff_t *ppos)
4379 {
4380         unsigned long val;
4381         int ret;
4382
4383         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4384         if (ret)
4385                 return ret;
4386
4387         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4388         if (!val || val > PID_MAX_DEFAULT)
4389                 return -EINVAL;
4390
4391         ret = tracing_resize_saved_cmdlines((unsigned int)val);
4392         if (ret < 0)
4393                 return ret;
4394
4395         *ppos += cnt;
4396
4397         return cnt;
4398 }
4399
4400 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4401         .open           = tracing_open_generic,
4402         .read           = tracing_saved_cmdlines_size_read,
4403         .write          = tracing_saved_cmdlines_size_write,
4404 };
4405
4406 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
4407 static union trace_enum_map_item *
4408 update_enum_map(union trace_enum_map_item *ptr)
4409 {
4410         if (!ptr->map.enum_string) {
4411                 if (ptr->tail.next) {
4412                         ptr = ptr->tail.next;
4413                         /* Set ptr to the next real item (skip head) */
4414                         ptr++;
4415                 } else
4416                         return NULL;
4417         }
4418         return ptr;
4419 }
4420
4421 static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos)
4422 {
4423         union trace_enum_map_item *ptr = v;
4424
4425         /*
4426          * Paranoid! If ptr points to end, we don't want to increment past it.
4427          * This really should never happen.
4428          */
4429         ptr = update_enum_map(ptr);
4430         if (WARN_ON_ONCE(!ptr))
4431                 return NULL;
4432
4433         ptr++;
4434
4435         (*pos)++;
4436
4437         ptr = update_enum_map(ptr);
4438
4439         return ptr;
4440 }
4441
4442 static void *enum_map_start(struct seq_file *m, loff_t *pos)
4443 {
4444         union trace_enum_map_item *v;
4445         loff_t l = 0;
4446
4447         mutex_lock(&trace_enum_mutex);
4448
4449         v = trace_enum_maps;
4450         if (v)
4451                 v++;
4452
4453         while (v && l < *pos) {
4454                 v = enum_map_next(m, v, &l);
4455         }
4456
4457         return v;
4458 }
4459
4460 static void enum_map_stop(struct seq_file *m, void *v)
4461 {
4462         mutex_unlock(&trace_enum_mutex);
4463 }
4464
4465 static int enum_map_show(struct seq_file *m, void *v)
4466 {
4467         union trace_enum_map_item *ptr = v;
4468
4469         seq_printf(m, "%s %ld (%s)\n",
4470                    ptr->map.enum_string, ptr->map.enum_value,
4471                    ptr->map.system);
4472
4473         return 0;
4474 }
4475
4476 static const struct seq_operations tracing_enum_map_seq_ops = {
4477         .start          = enum_map_start,
4478         .next           = enum_map_next,
4479         .stop           = enum_map_stop,
4480         .show           = enum_map_show,
4481 };
4482
4483 static int tracing_enum_map_open(struct inode *inode, struct file *filp)
4484 {
4485         if (tracing_disabled)
4486                 return -ENODEV;
4487
4488         return seq_open(filp, &tracing_enum_map_seq_ops);
4489 }
4490
4491 static const struct file_operations tracing_enum_map_fops = {
4492         .open           = tracing_enum_map_open,
4493         .read           = seq_read,
4494         .llseek         = seq_lseek,
4495         .release        = seq_release,
4496 };
4497
4498 static inline union trace_enum_map_item *
4499 trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
4500 {
4501         /* Return tail of array given the head */
4502         return ptr + ptr->head.length + 1;
4503 }
4504
4505 static void
4506 trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
4507                            int len)
4508 {
4509         struct trace_enum_map **stop;
4510         struct trace_enum_map **map;
4511         union trace_enum_map_item *map_array;
4512         union trace_enum_map_item *ptr;
4513
4514         stop = start + len;
4515
4516         /*
4517          * The trace_enum_maps contains the map plus a head and tail item,
4518          * where the head holds the module and length of array, and the
4519          * tail holds a pointer to the next list.
4520          */
4521         map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4522         if (!map_array) {
4523                 pr_warn("Unable to allocate trace enum mapping\n");
4524                 return;
4525         }
4526
4527         mutex_lock(&trace_enum_mutex);
4528
4529         if (!trace_enum_maps)
4530                 trace_enum_maps = map_array;
4531         else {
4532                 ptr = trace_enum_maps;
4533                 for (;;) {
4534                         ptr = trace_enum_jmp_to_tail(ptr);
4535                         if (!ptr->tail.next)
4536                                 break;
4537                         ptr = ptr->tail.next;
4538
4539                 }
4540                 ptr->tail.next = map_array;
4541         }
4542         map_array->head.mod = mod;
4543         map_array->head.length = len;
4544         map_array++;
4545
4546         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4547                 map_array->map = **map;
4548                 map_array++;
4549         }
4550         memset(map_array, 0, sizeof(*map_array));
4551
4552         mutex_unlock(&trace_enum_mutex);
4553 }
4554
4555 static void trace_create_enum_file(struct dentry *d_tracer)
4556 {
4557         trace_create_file("enum_map", 0444, d_tracer,
4558                           NULL, &tracing_enum_map_fops);
4559 }
4560
4561 #else /* CONFIG_TRACE_ENUM_MAP_FILE */
4562 static inline void trace_create_enum_file(struct dentry *d_tracer) { }
4563 static inline void trace_insert_enum_map_file(struct module *mod,
4564                               struct trace_enum_map **start, int len) { }
4565 #endif /* !CONFIG_TRACE_ENUM_MAP_FILE */
4566
4567 static void trace_insert_enum_map(struct module *mod,
4568                                   struct trace_enum_map **start, int len)
4569 {
4570         struct trace_enum_map **map;
4571
4572         if (len <= 0)
4573                 return;
4574
4575         map = start;
4576
4577         trace_event_enum_update(map, len);
4578
4579         trace_insert_enum_map_file(mod, start, len);
4580 }
4581
4582 static ssize_t
4583 tracing_set_trace_read(struct file *filp, char __user *ubuf,
4584                        size_t cnt, loff_t *ppos)
4585 {
4586         struct trace_array *tr = filp->private_data;
4587         char buf[MAX_TRACER_SIZE+2];
4588         int r;
4589
4590         mutex_lock(&trace_types_lock);
4591         r = sprintf(buf, "%s\n", tr->current_trace->name);
4592         mutex_unlock(&trace_types_lock);
4593
4594         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4595 }
4596
4597 int tracer_init(struct tracer *t, struct trace_array *tr)
4598 {
4599         tracing_reset_online_cpus(&tr->trace_buffer);
4600         return t->init(tr);
4601 }
4602
4603 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4604 {
4605         int cpu;
4606
4607         for_each_tracing_cpu(cpu)
4608                 per_cpu_ptr(buf->data, cpu)->entries = val;
4609 }
4610
4611 #ifdef CONFIG_TRACER_MAX_TRACE
4612 /* resize @tr's buffer to the size of @size_tr's entries */
4613 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
4614                                         struct trace_buffer *size_buf, int cpu_id)
4615 {
4616         int cpu, ret = 0;
4617
4618         if (cpu_id == RING_BUFFER_ALL_CPUS) {
4619                 for_each_tracing_cpu(cpu) {
4620                         ret = ring_buffer_resize(trace_buf->buffer,
4621                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4622                         if (ret < 0)
4623                                 break;
4624                         per_cpu_ptr(trace_buf->data, cpu)->entries =
4625                                 per_cpu_ptr(size_buf->data, cpu)->entries;
4626                 }
4627         } else {
4628                 ret = ring_buffer_resize(trace_buf->buffer,
4629                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4630                 if (ret == 0)
4631                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
4632                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
4633         }
4634
4635         return ret;
4636 }
4637 #endif /* CONFIG_TRACER_MAX_TRACE */
4638
4639 static int __tracing_resize_ring_buffer(struct trace_array *tr,
4640                                         unsigned long size, int cpu)
4641 {
4642         int ret;
4643
4644         /*
4645          * If kernel or user changes the size of the ring buffer
4646          * we use the size that was given, and we can forget about
4647          * expanding it later.
4648          */
4649         ring_buffer_expanded = true;
4650
4651         /* May be called before buffers are initialized */
4652         if (!tr->trace_buffer.buffer)
4653                 return 0;
4654
4655         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
4656         if (ret < 0)
4657                 return ret;
4658
4659 #ifdef CONFIG_TRACER_MAX_TRACE
4660         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
4661             !tr->current_trace->use_max_tr)
4662                 goto out;
4663
4664         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
4665         if (ret < 0) {
4666                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
4667                                                      &tr->trace_buffer, cpu);
4668                 if (r < 0) {
4669                         /*
4670                          * AARGH! We are left with different
4671                          * size max buffer!!!!
4672                          * The max buffer is our "snapshot" buffer.
4673                          * When a tracer needs a snapshot (one of the
4674                          * latency tracers), it swaps the max buffer
4675                          * with the saved snap shot. We succeeded to
4676                          * update the size of the main buffer, but failed to
4677                          * update the size of the max buffer. But when we tried
4678                          * to reset the main buffer to the original size, we
4679                          * failed there too. This is very unlikely to
4680                          * happen, but if it does, warn and kill all
4681                          * tracing.
4682                          */
4683                         WARN_ON(1);
4684                         tracing_disabled = 1;
4685                 }
4686                 return ret;
4687         }
4688
4689         if (cpu == RING_BUFFER_ALL_CPUS)
4690                 set_buffer_entries(&tr->max_buffer, size);
4691         else
4692                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4693
4694  out:
4695 #endif /* CONFIG_TRACER_MAX_TRACE */
4696
4697         if (cpu == RING_BUFFER_ALL_CPUS)
4698                 set_buffer_entries(&tr->trace_buffer, size);
4699         else
4700                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4701
4702         return ret;
4703 }
4704
4705 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4706                                           unsigned long size, int cpu_id)
4707 {
4708         int ret = size;
4709
4710         mutex_lock(&trace_types_lock);
4711
4712         if (cpu_id != RING_BUFFER_ALL_CPUS) {
4713                 /* make sure, this cpu is enabled in the mask */
4714                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4715                         ret = -EINVAL;
4716                         goto out;
4717                 }
4718         }
4719
4720         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4721         if (ret < 0)
4722                 ret = -ENOMEM;
4723
4724 out:
4725         mutex_unlock(&trace_types_lock);
4726
4727         return ret;
4728 }
4729
4730
4731 /**
4732  * tracing_update_buffers - used by tracing facility to expand ring buffers
4733  *
4734  * To save on memory when the tracing is never used on a system with it
4735  * configured in. The ring buffers are set to a minimum size. But once
4736  * a user starts to use the tracing facility, then they need to grow
4737  * to their default size.
4738  *
4739  * This function is to be called when a tracer is about to be used.
4740  */
4741 int tracing_update_buffers(void)
4742 {
4743         int ret = 0;
4744
4745         mutex_lock(&trace_types_lock);
4746         if (!ring_buffer_expanded)
4747                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4748                                                 RING_BUFFER_ALL_CPUS);
4749         mutex_unlock(&trace_types_lock);
4750
4751         return ret;
4752 }
4753
4754 struct trace_option_dentry;
4755
4756 static void
4757 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4758
4759 /*
4760  * Used to clear out the tracer before deletion of an instance.
4761  * Must have trace_types_lock held.
4762  */
4763 static void tracing_set_nop(struct trace_array *tr)
4764 {
4765         if (tr->current_trace == &nop_trace)
4766                 return;
4767         
4768         tr->current_trace->enabled--;
4769
4770         if (tr->current_trace->reset)
4771                 tr->current_trace->reset(tr);
4772
4773         tr->current_trace = &nop_trace;
4774 }
4775
4776 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
4777 {
4778         /* Only enable if the directory has been created already. */
4779         if (!tr->dir)
4780                 return;
4781
4782         create_trace_option_files(tr, t);
4783 }
4784
4785 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
4786 {
4787         struct tracer *t;
4788 #ifdef CONFIG_TRACER_MAX_TRACE
4789         bool had_max_tr;
4790 #endif
4791         int ret = 0;
4792
4793         mutex_lock(&trace_types_lock);
4794
4795         if (!ring_buffer_expanded) {
4796                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
4797                                                 RING_BUFFER_ALL_CPUS);
4798                 if (ret < 0)
4799                         goto out;
4800                 ret = 0;
4801         }
4802
4803         for (t = trace_types; t; t = t->next) {
4804                 if (strcmp(t->name, buf) == 0)
4805                         break;
4806         }
4807         if (!t) {
4808                 ret = -EINVAL;
4809                 goto out;
4810         }
4811         if (t == tr->current_trace)
4812                 goto out;
4813
4814         /* Some tracers are only allowed for the top level buffer */
4815         if (!trace_ok_for_array(t, tr)) {
4816                 ret = -EINVAL;
4817                 goto out;
4818         }
4819
4820         /* If trace pipe files are being read, we can't change the tracer */
4821         if (tr->current_trace->ref) {
4822                 ret = -EBUSY;
4823                 goto out;
4824         }
4825
4826         trace_branch_disable();
4827
4828         tr->current_trace->enabled--;
4829
4830         if (tr->current_trace->reset)
4831                 tr->current_trace->reset(tr);
4832
4833         /* Current trace needs to be nop_trace before synchronize_sched */
4834         tr->current_trace = &nop_trace;
4835
4836 #ifdef CONFIG_TRACER_MAX_TRACE
4837         had_max_tr = tr->allocated_snapshot;
4838
4839         if (had_max_tr && !t->use_max_tr) {
4840                 /*
4841                  * We need to make sure that the update_max_tr sees that
4842                  * current_trace changed to nop_trace to keep it from
4843                  * swapping the buffers after we resize it.
4844                  * The update_max_tr is called from interrupts disabled
4845                  * so a synchronized_sched() is sufficient.
4846                  */
4847                 synchronize_sched();
4848                 free_snapshot(tr);
4849         }
4850 #endif
4851
4852 #ifdef CONFIG_TRACER_MAX_TRACE
4853         if (t->use_max_tr && !had_max_tr) {
4854                 ret = alloc_snapshot(tr);
4855                 if (ret < 0)
4856                         goto out;
4857         }
4858 #endif
4859
4860         if (t->init) {
4861                 ret = tracer_init(t, tr);
4862                 if (ret)
4863                         goto out;
4864         }
4865
4866         tr->current_trace = t;
4867         tr->current_trace->enabled++;
4868         trace_branch_enable(tr);
4869  out:
4870         mutex_unlock(&trace_types_lock);
4871
4872         return ret;
4873 }
4874
4875 static ssize_t
4876 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
4877                         size_t cnt, loff_t *ppos)
4878 {
4879         struct trace_array *tr = filp->private_data;
4880         char buf[MAX_TRACER_SIZE+1];
4881         int i;
4882         size_t ret;
4883         int err;
4884
4885         ret = cnt;
4886
4887         if (cnt > MAX_TRACER_SIZE)
4888                 cnt = MAX_TRACER_SIZE;
4889
4890         if (copy_from_user(buf, ubuf, cnt))
4891                 return -EFAULT;
4892
4893         buf[cnt] = 0;
4894
4895         /* strip ending whitespace. */
4896         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4897                 buf[i] = 0;
4898
4899         err = tracing_set_tracer(tr, buf);
4900         if (err)
4901                 return err;
4902
4903         *ppos += ret;
4904
4905         return ret;
4906 }
4907
4908 static ssize_t
4909 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
4910                    size_t cnt, loff_t *ppos)
4911 {
4912         char buf[64];
4913         int r;
4914
4915         r = snprintf(buf, sizeof(buf), "%ld\n",
4916                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4917         if (r > sizeof(buf))
4918                 r = sizeof(buf);
4919         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4920 }
4921
4922 static ssize_t
4923 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
4924                     size_t cnt, loff_t *ppos)
4925 {
4926         unsigned long val;
4927         int ret;
4928
4929         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4930         if (ret)
4931                 return ret;
4932
4933         *ptr = val * 1000;
4934
4935         return cnt;
4936 }
4937
4938 static ssize_t
4939 tracing_thresh_read(struct file *filp, char __user *ubuf,
4940                     size_t cnt, loff_t *ppos)
4941 {
4942         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
4943 }
4944
4945 static ssize_t
4946 tracing_thresh_write(struct file *filp, const char __user *ubuf,
4947                      size_t cnt, loff_t *ppos)
4948 {
4949         struct trace_array *tr = filp->private_data;
4950         int ret;
4951
4952         mutex_lock(&trace_types_lock);
4953         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
4954         if (ret < 0)
4955                 goto out;
4956
4957         if (tr->current_trace->update_thresh) {
4958                 ret = tr->current_trace->update_thresh(tr);
4959                 if (ret < 0)
4960                         goto out;
4961         }
4962
4963         ret = cnt;
4964 out:
4965         mutex_unlock(&trace_types_lock);
4966
4967         return ret;
4968 }
4969
4970 #ifdef CONFIG_TRACER_MAX_TRACE
4971
4972 static ssize_t
4973 tracing_max_lat_read(struct file *filp, char __user *ubuf,
4974                      size_t cnt, loff_t *ppos)
4975 {
4976         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
4977 }
4978
4979 static ssize_t
4980 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4981                       size_t cnt, loff_t *ppos)
4982 {
4983         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
4984 }
4985
4986 #endif
4987
4988 static int tracing_open_pipe(struct inode *inode, struct file *filp)
4989 {
4990         struct trace_array *tr = inode->i_private;
4991         struct trace_iterator *iter;
4992         int ret = 0;
4993
4994         if (tracing_disabled)
4995                 return -ENODEV;
4996
4997         if (trace_array_get(tr) < 0)
4998                 return -ENODEV;
4999
5000         mutex_lock(&trace_types_lock);
5001
5002         /* create a buffer to store the information to pass to userspace */
5003         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5004         if (!iter) {
5005                 ret = -ENOMEM;
5006                 __trace_array_put(tr);
5007                 goto out;
5008         }
5009
5010         trace_seq_init(&iter->seq);
5011         iter->trace = tr->current_trace;
5012
5013         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5014                 ret = -ENOMEM;
5015                 goto fail;
5016         }
5017
5018         /* trace pipe does not show start of buffer */
5019         cpumask_setall(iter->started);
5020
5021         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5022                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5023
5024         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5025         if (trace_clocks[tr->clock_id].in_ns)
5026                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5027
5028         iter->tr = tr;
5029         iter->trace_buffer = &tr->trace_buffer;
5030         iter->cpu_file = tracing_get_cpu(inode);
5031         mutex_init(&iter->mutex);
5032         filp->private_data = iter;
5033
5034         if (iter->trace->pipe_open)
5035                 iter->trace->pipe_open(iter);
5036
5037         nonseekable_open(inode, filp);
5038
5039         tr->current_trace->ref++;
5040 out:
5041         mutex_unlock(&trace_types_lock);
5042         return ret;
5043
5044 fail:
5045         kfree(iter->trace);
5046         kfree(iter);
5047         __trace_array_put(tr);
5048         mutex_unlock(&trace_types_lock);
5049         return ret;
5050 }
5051
5052 static int tracing_release_pipe(struct inode *inode, struct file *file)
5053 {
5054         struct trace_iterator *iter = file->private_data;
5055         struct trace_array *tr = inode->i_private;
5056
5057         mutex_lock(&trace_types_lock);
5058
5059         tr->current_trace->ref--;
5060
5061         if (iter->trace->pipe_close)
5062                 iter->trace->pipe_close(iter);
5063
5064         mutex_unlock(&trace_types_lock);
5065
5066         free_cpumask_var(iter->started);
5067         mutex_destroy(&iter->mutex);
5068         kfree(iter);
5069
5070         trace_array_put(tr);
5071
5072         return 0;
5073 }
5074
5075 static unsigned int
5076 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5077 {
5078         struct trace_array *tr = iter->tr;
5079
5080         /* Iterators are static, they should be filled or empty */
5081         if (trace_buffer_iter(iter, iter->cpu_file))
5082                 return POLLIN | POLLRDNORM;
5083
5084         if (tr->trace_flags & TRACE_ITER_BLOCK)
5085                 /*
5086                  * Always select as readable when in blocking mode
5087                  */
5088                 return POLLIN | POLLRDNORM;
5089         else
5090                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5091                                              filp, poll_table);
5092 }
5093
5094 static unsigned int
5095 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5096 {
5097         struct trace_iterator *iter = filp->private_data;
5098
5099         return trace_poll(iter, filp, poll_table);
5100 }
5101
5102 /* Must be called with iter->mutex held. */
5103 static int tracing_wait_pipe(struct file *filp)
5104 {
5105         struct trace_iterator *iter = filp->private_data;
5106         int ret;
5107
5108         while (trace_empty(iter)) {
5109
5110                 if ((filp->f_flags & O_NONBLOCK)) {
5111                         return -EAGAIN;
5112                 }
5113
5114                 /*
5115                  * We block until we read something and tracing is disabled.
5116                  * We still block if tracing is disabled, but we have never
5117                  * read anything. This allows a user to cat this file, and
5118                  * then enable tracing. But after we have read something,
5119                  * we give an EOF when tracing is again disabled.
5120                  *
5121                  * iter->pos will be 0 if we haven't read anything.
5122                  */
5123                 if (!tracing_is_on() && iter->pos)
5124                         break;
5125
5126                 mutex_unlock(&iter->mutex);
5127
5128                 ret = wait_on_pipe(iter, false);
5129
5130                 mutex_lock(&iter->mutex);
5131
5132                 if (ret)
5133                         return ret;
5134         }
5135
5136         return 1;
5137 }
5138
5139 /*
5140  * Consumer reader.
5141  */
5142 static ssize_t
5143 tracing_read_pipe(struct file *filp, char __user *ubuf,
5144                   size_t cnt, loff_t *ppos)
5145 {
5146         struct trace_iterator *iter = filp->private_data;
5147         ssize_t sret;
5148
5149         /* return any leftover data */
5150         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5151         if (sret != -EBUSY)
5152                 return sret;
5153
5154         trace_seq_init(&iter->seq);
5155
5156         /*
5157          * Avoid more than one consumer on a single file descriptor
5158          * This is just a matter of traces coherency, the ring buffer itself
5159          * is protected.
5160          */
5161         mutex_lock(&iter->mutex);
5162         if (iter->trace->read) {
5163                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5164                 if (sret)
5165                         goto out;
5166         }
5167
5168 waitagain:
5169         sret = tracing_wait_pipe(filp);
5170         if (sret <= 0)
5171                 goto out;
5172
5173         /* stop when tracing is finished */
5174         if (trace_empty(iter)) {
5175                 sret = 0;
5176                 goto out;
5177         }
5178
5179         if (cnt >= PAGE_SIZE)
5180                 cnt = PAGE_SIZE - 1;
5181
5182         /* reset all but tr, trace, and overruns */
5183         memset(&iter->seq, 0,
5184                sizeof(struct trace_iterator) -
5185                offsetof(struct trace_iterator, seq));
5186         cpumask_clear(iter->started);
5187         iter->pos = -1;
5188
5189         trace_event_read_lock();
5190         trace_access_lock(iter->cpu_file);
5191         while (trace_find_next_entry_inc(iter) != NULL) {
5192                 enum print_line_t ret;
5193                 int save_len = iter->seq.seq.len;
5194
5195                 ret = print_trace_line(iter);
5196                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5197                         /* don't print partial lines */
5198                         iter->seq.seq.len = save_len;
5199                         break;
5200                 }
5201                 if (ret != TRACE_TYPE_NO_CONSUME)
5202                         trace_consume(iter);
5203
5204                 if (trace_seq_used(&iter->seq) >= cnt)
5205                         break;
5206
5207                 /*
5208                  * Setting the full flag means we reached the trace_seq buffer
5209                  * size and we should leave by partial output condition above.
5210                  * One of the trace_seq_* functions is not used properly.
5211                  */
5212                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5213                           iter->ent->type);
5214         }
5215         trace_access_unlock(iter->cpu_file);
5216         trace_event_read_unlock();
5217
5218         /* Now copy what we have to the user */
5219         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5220         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5221                 trace_seq_init(&iter->seq);
5222
5223         /*
5224          * If there was nothing to send to user, in spite of consuming trace
5225          * entries, go back to wait for more entries.
5226          */
5227         if (sret == -EBUSY)
5228                 goto waitagain;
5229
5230 out:
5231         mutex_unlock(&iter->mutex);
5232
5233         return sret;
5234 }
5235
5236 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5237                                      unsigned int idx)
5238 {
5239         __free_page(spd->pages[idx]);
5240 }
5241
5242 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5243         .can_merge              = 0,
5244         .confirm                = generic_pipe_buf_confirm,
5245         .release                = generic_pipe_buf_release,
5246         .steal                  = generic_pipe_buf_steal,
5247         .get                    = generic_pipe_buf_get,
5248 };
5249
5250 static size_t
5251 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5252 {
5253         size_t count;
5254         int save_len;
5255         int ret;
5256
5257         /* Seq buffer is page-sized, exactly what we need. */
5258         for (;;) {
5259                 save_len = iter->seq.seq.len;
5260                 ret = print_trace_line(iter);
5261
5262                 if (trace_seq_has_overflowed(&iter->seq)) {
5263                         iter->seq.seq.len = save_len;
5264                         break;
5265                 }
5266
5267                 /*
5268                  * This should not be hit, because it should only
5269                  * be set if the iter->seq overflowed. But check it
5270                  * anyway to be safe.
5271                  */
5272                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5273                         iter->seq.seq.len = save_len;
5274                         break;
5275                 }
5276
5277                 count = trace_seq_used(&iter->seq) - save_len;
5278                 if (rem < count) {
5279                         rem = 0;
5280                         iter->seq.seq.len = save_len;
5281                         break;
5282                 }
5283
5284                 if (ret != TRACE_TYPE_NO_CONSUME)
5285                         trace_consume(iter);
5286                 rem -= count;
5287                 if (!trace_find_next_entry_inc(iter))   {
5288                         rem = 0;
5289                         iter->ent = NULL;
5290                         break;
5291                 }
5292         }
5293
5294         return rem;
5295 }
5296
5297 static ssize_t tracing_splice_read_pipe(struct file *filp,
5298                                         loff_t *ppos,
5299                                         struct pipe_inode_info *pipe,
5300                                         size_t len,
5301                                         unsigned int flags)
5302 {
5303         struct page *pages_def[PIPE_DEF_BUFFERS];
5304         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5305         struct trace_iterator *iter = filp->private_data;
5306         struct splice_pipe_desc spd = {
5307                 .pages          = pages_def,
5308                 .partial        = partial_def,
5309                 .nr_pages       = 0, /* This gets updated below. */
5310                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5311                 .flags          = flags,
5312                 .ops            = &tracing_pipe_buf_ops,
5313                 .spd_release    = tracing_spd_release_pipe,
5314         };
5315         ssize_t ret;
5316         size_t rem;
5317         unsigned int i;
5318
5319         if (splice_grow_spd(pipe, &spd))
5320                 return -ENOMEM;
5321
5322         mutex_lock(&iter->mutex);
5323
5324         if (iter->trace->splice_read) {
5325                 ret = iter->trace->splice_read(iter, filp,
5326                                                ppos, pipe, len, flags);
5327                 if (ret)
5328                         goto out_err;
5329         }
5330
5331         ret = tracing_wait_pipe(filp);
5332         if (ret <= 0)
5333                 goto out_err;
5334
5335         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5336                 ret = -EFAULT;
5337                 goto out_err;
5338         }
5339
5340         trace_event_read_lock();
5341         trace_access_lock(iter->cpu_file);
5342
5343         /* Fill as many pages as possible. */
5344         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5345                 spd.pages[i] = alloc_page(GFP_KERNEL);
5346                 if (!spd.pages[i])
5347                         break;
5348
5349                 rem = tracing_fill_pipe_page(rem, iter);
5350
5351                 /* Copy the data into the page, so we can start over. */
5352                 ret = trace_seq_to_buffer(&iter->seq,
5353                                           page_address(spd.pages[i]),
5354                                           trace_seq_used(&iter->seq));
5355                 if (ret < 0) {
5356                         __free_page(spd.pages[i]);
5357                         break;
5358                 }
5359                 spd.partial[i].offset = 0;
5360                 spd.partial[i].len = trace_seq_used(&iter->seq);
5361
5362                 trace_seq_init(&iter->seq);
5363         }
5364
5365         trace_access_unlock(iter->cpu_file);
5366         trace_event_read_unlock();
5367         mutex_unlock(&iter->mutex);
5368
5369         spd.nr_pages = i;
5370
5371         if (i)
5372                 ret = splice_to_pipe(pipe, &spd);
5373         else
5374                 ret = 0;
5375 out:
5376         splice_shrink_spd(&spd);
5377         return ret;
5378
5379 out_err:
5380         mutex_unlock(&iter->mutex);
5381         goto out;
5382 }
5383
5384 static ssize_t
5385 tracing_entries_read(struct file *filp, char __user *ubuf,
5386                      size_t cnt, loff_t *ppos)
5387 {
5388         struct inode *inode = file_inode(filp);
5389         struct trace_array *tr = inode->i_private;
5390         int cpu = tracing_get_cpu(inode);
5391         char buf[64];
5392         int r = 0;
5393         ssize_t ret;
5394
5395         mutex_lock(&trace_types_lock);
5396
5397         if (cpu == RING_BUFFER_ALL_CPUS) {
5398                 int cpu, buf_size_same;
5399                 unsigned long size;
5400
5401                 size = 0;
5402                 buf_size_same = 1;
5403                 /* check if all cpu sizes are same */
5404                 for_each_tracing_cpu(cpu) {
5405                         /* fill in the size from first enabled cpu */
5406                         if (size == 0)
5407                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5408                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5409                                 buf_size_same = 0;
5410                                 break;
5411                         }
5412                 }
5413
5414                 if (buf_size_same) {
5415                         if (!ring_buffer_expanded)
5416                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
5417                                             size >> 10,
5418                                             trace_buf_size >> 10);
5419                         else
5420                                 r = sprintf(buf, "%lu\n", size >> 10);
5421                 } else
5422                         r = sprintf(buf, "X\n");
5423         } else
5424                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5425
5426         mutex_unlock(&trace_types_lock);
5427
5428         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5429         return ret;
5430 }
5431
5432 static ssize_t
5433 tracing_entries_write(struct file *filp, const char __user *ubuf,
5434                       size_t cnt, loff_t *ppos)
5435 {
5436         struct inode *inode = file_inode(filp);
5437         struct trace_array *tr = inode->i_private;
5438         unsigned long val;
5439         int ret;
5440
5441         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5442         if (ret)
5443                 return ret;
5444
5445         /* must have at least 1 entry */
5446         if (!val)
5447                 return -EINVAL;
5448
5449         /* value is in KB */
5450         val <<= 10;
5451         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5452         if (ret < 0)
5453                 return ret;
5454
5455         *ppos += cnt;
5456
5457         return cnt;
5458 }
5459
5460 static ssize_t
5461 tracing_total_entries_read(struct file *filp, char __user *ubuf,
5462                                 size_t cnt, loff_t *ppos)
5463 {
5464         struct trace_array *tr = filp->private_data;
5465         char buf[64];
5466         int r, cpu;
5467         unsigned long size = 0, expanded_size = 0;
5468
5469         mutex_lock(&trace_types_lock);
5470         for_each_tracing_cpu(cpu) {
5471                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5472                 if (!ring_buffer_expanded)
5473                         expanded_size += trace_buf_size >> 10;
5474         }
5475         if (ring_buffer_expanded)
5476                 r = sprintf(buf, "%lu\n", size);
5477         else
5478                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5479         mutex_unlock(&trace_types_lock);
5480
5481         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5482 }
5483
5484 static ssize_t
5485 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5486                           size_t cnt, loff_t *ppos)
5487 {
5488         /*
5489          * There is no need to read what the user has written, this function
5490          * is just to make sure that there is no error when "echo" is used
5491          */
5492
5493         *ppos += cnt;
5494
5495         return cnt;
5496 }
5497
5498 static int
5499 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5500 {
5501         struct trace_array *tr = inode->i_private;
5502
5503         /* disable tracing ? */
5504         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5505                 tracer_tracing_off(tr);
5506         /* resize the ring buffer to 0 */
5507         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5508
5509         trace_array_put(tr);
5510
5511         return 0;
5512 }
5513
5514 static ssize_t
5515 tracing_mark_write(struct file *filp, const char __user *ubuf,
5516                                         size_t cnt, loff_t *fpos)
5517 {
5518         unsigned long addr = (unsigned long)ubuf;
5519         struct trace_array *tr = filp->private_data;
5520         struct ring_buffer_event *event;
5521         struct ring_buffer *buffer;
5522         struct print_entry *entry;
5523         unsigned long irq_flags;
5524         struct page *pages[2];
5525         void *map_page[2];
5526         int nr_pages = 1;
5527         ssize_t written;
5528         int offset;
5529         int size;
5530         int len;
5531         int ret;
5532         int i;
5533
5534         if (tracing_disabled)
5535                 return -EINVAL;
5536
5537         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5538                 return -EINVAL;
5539
5540         if (cnt > TRACE_BUF_SIZE)
5541                 cnt = TRACE_BUF_SIZE;
5542
5543         /*
5544          * Userspace is injecting traces into the kernel trace buffer.
5545          * We want to be as non intrusive as possible.
5546          * To do so, we do not want to allocate any special buffers
5547          * or take any locks, but instead write the userspace data
5548          * straight into the ring buffer.
5549          *
5550          * First we need to pin the userspace buffer into memory,
5551          * which, most likely it is, because it just referenced it.
5552          * But there's no guarantee that it is. By using get_user_pages_fast()
5553          * and kmap_atomic/kunmap_atomic() we can get access to the
5554          * pages directly. We then write the data directly into the
5555          * ring buffer.
5556          */
5557         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5558
5559         /* check if we cross pages */
5560         if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
5561                 nr_pages = 2;
5562
5563         offset = addr & (PAGE_SIZE - 1);
5564         addr &= PAGE_MASK;
5565
5566         ret = get_user_pages_fast(addr, nr_pages, 0, pages);
5567         if (ret < nr_pages) {
5568                 while (--ret >= 0)
5569                         put_page(pages[ret]);
5570                 written = -EFAULT;
5571                 goto out;
5572         }
5573
5574         for (i = 0; i < nr_pages; i++)
5575                 map_page[i] = kmap_atomic(pages[i]);
5576
5577         local_save_flags(irq_flags);
5578         size = sizeof(*entry) + cnt + 2; /* possible \n added */
5579         buffer = tr->trace_buffer.buffer;
5580         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5581                                           irq_flags, preempt_count());
5582         if (!event) {
5583                 /* Ring buffer disabled, return as if not open for write */
5584                 written = -EBADF;
5585                 goto out_unlock;
5586         }
5587
5588         entry = ring_buffer_event_data(event);
5589         entry->ip = _THIS_IP_;
5590
5591         if (nr_pages == 2) {
5592                 len = PAGE_SIZE - offset;
5593                 memcpy(&entry->buf, map_page[0] + offset, len);
5594                 memcpy(&entry->buf[len], map_page[1], cnt - len);
5595         } else
5596                 memcpy(&entry->buf, map_page[0] + offset, cnt);
5597
5598         if (entry->buf[cnt - 1] != '\n') {
5599                 entry->buf[cnt] = '\n';
5600                 entry->buf[cnt + 1] = '\0';
5601         } else
5602                 entry->buf[cnt] = '\0';
5603
5604         __buffer_unlock_commit(buffer, event);
5605
5606         written = cnt;
5607
5608         *fpos += written;
5609
5610  out_unlock:
5611         for (i = nr_pages - 1; i >= 0; i--) {
5612                 kunmap_atomic(map_page[i]);
5613                 put_page(pages[i]);
5614         }
5615  out:
5616         return written;
5617 }
5618
5619 static int tracing_clock_show(struct seq_file *m, void *v)
5620 {
5621         struct trace_array *tr = m->private;
5622         int i;
5623
5624         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
5625                 seq_printf(m,
5626                         "%s%s%s%s", i ? " " : "",
5627                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
5628                         i == tr->clock_id ? "]" : "");
5629         seq_putc(m, '\n');
5630
5631         return 0;
5632 }
5633
5634 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
5635 {
5636         int i;
5637
5638         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
5639                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
5640                         break;
5641         }
5642         if (i == ARRAY_SIZE(trace_clocks))
5643                 return -EINVAL;
5644
5645         mutex_lock(&trace_types_lock);
5646
5647         tr->clock_id = i;
5648
5649         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
5650
5651         /*
5652          * New clock may not be consistent with the previous clock.
5653          * Reset the buffer so that it doesn't have incomparable timestamps.
5654          */
5655         tracing_reset_online_cpus(&tr->trace_buffer);
5656
5657 #ifdef CONFIG_TRACER_MAX_TRACE
5658         if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
5659                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
5660         tracing_reset_online_cpus(&tr->max_buffer);
5661 #endif
5662
5663         mutex_unlock(&trace_types_lock);
5664
5665         return 0;
5666 }
5667
5668 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
5669                                    size_t cnt, loff_t *fpos)
5670 {
5671         struct seq_file *m = filp->private_data;
5672         struct trace_array *tr = m->private;
5673         char buf[64];
5674         const char *clockstr;
5675         int ret;
5676
5677         if (cnt >= sizeof(buf))
5678                 return -EINVAL;
5679
5680         if (copy_from_user(buf, ubuf, cnt))
5681                 return -EFAULT;
5682
5683         buf[cnt] = 0;
5684
5685         clockstr = strstrip(buf);
5686
5687         ret = tracing_set_clock(tr, clockstr);
5688         if (ret)
5689                 return ret;
5690
5691         *fpos += cnt;
5692
5693         return cnt;
5694 }
5695
5696 static int tracing_clock_open(struct inode *inode, struct file *file)
5697 {
5698         struct trace_array *tr = inode->i_private;
5699         int ret;
5700
5701         if (tracing_disabled)
5702                 return -ENODEV;
5703
5704         if (trace_array_get(tr))
5705                 return -ENODEV;
5706
5707         ret = single_open(file, tracing_clock_show, inode->i_private);
5708         if (ret < 0)
5709                 trace_array_put(tr);
5710
5711         return ret;
5712 }
5713
5714 struct ftrace_buffer_info {
5715         struct trace_iterator   iter;
5716         void                    *spare;
5717         unsigned int            read;
5718 };
5719
5720 #ifdef CONFIG_TRACER_SNAPSHOT
5721 static int tracing_snapshot_open(struct inode *inode, struct file *file)
5722 {
5723         struct trace_array *tr = inode->i_private;
5724         struct trace_iterator *iter;
5725         struct seq_file *m;
5726         int ret = 0;
5727
5728         if (trace_array_get(tr) < 0)
5729                 return -ENODEV;
5730
5731         if (file->f_mode & FMODE_READ) {
5732                 iter = __tracing_open(inode, file, true);
5733                 if (IS_ERR(iter))
5734                         ret = PTR_ERR(iter);
5735         } else {
5736                 /* Writes still need the seq_file to hold the private data */
5737                 ret = -ENOMEM;
5738                 m = kzalloc(sizeof(*m), GFP_KERNEL);
5739                 if (!m)
5740                         goto out;
5741                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5742                 if (!iter) {
5743                         kfree(m);
5744                         goto out;
5745                 }
5746                 ret = 0;
5747
5748                 iter->tr = tr;
5749                 iter->trace_buffer = &tr->max_buffer;
5750                 iter->cpu_file = tracing_get_cpu(inode);
5751                 m->private = iter;
5752                 file->private_data = m;
5753         }
5754 out:
5755         if (ret < 0)
5756                 trace_array_put(tr);
5757
5758         return ret;
5759 }
5760
5761 static ssize_t
5762 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
5763                        loff_t *ppos)
5764 {
5765         struct seq_file *m = filp->private_data;
5766         struct trace_iterator *iter = m->private;
5767         struct trace_array *tr = iter->tr;
5768         unsigned long val;
5769         int ret;
5770
5771         ret = tracing_update_buffers();
5772         if (ret < 0)
5773                 return ret;
5774
5775         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5776         if (ret)
5777                 return ret;
5778
5779         mutex_lock(&trace_types_lock);
5780
5781         if (tr->current_trace->use_max_tr) {
5782                 ret = -EBUSY;
5783                 goto out;
5784         }
5785
5786         switch (val) {
5787         case 0:
5788                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5789                         ret = -EINVAL;
5790                         break;
5791                 }
5792                 if (tr->allocated_snapshot)
5793                         free_snapshot(tr);
5794                 break;
5795         case 1:
5796 /* Only allow per-cpu swap if the ring buffer supports it */
5797 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
5798                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5799                         ret = -EINVAL;
5800                         break;
5801                 }
5802 #endif
5803                 if (!tr->allocated_snapshot) {
5804                         ret = alloc_snapshot(tr);
5805                         if (ret < 0)
5806                                 break;
5807                 }
5808                 local_irq_disable();
5809                 /* Now, we're going to swap */
5810                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5811                         update_max_tr(tr, current, smp_processor_id());
5812                 else
5813                         update_max_tr_single(tr, current, iter->cpu_file);
5814                 local_irq_enable();
5815                 break;
5816         default:
5817                 if (tr->allocated_snapshot) {
5818                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5819                                 tracing_reset_online_cpus(&tr->max_buffer);
5820                         else
5821                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
5822                 }
5823                 break;
5824         }
5825
5826         if (ret >= 0) {
5827                 *ppos += cnt;
5828                 ret = cnt;
5829         }
5830 out:
5831         mutex_unlock(&trace_types_lock);
5832         return ret;
5833 }
5834
5835 static int tracing_snapshot_release(struct inode *inode, struct file *file)
5836 {
5837         struct seq_file *m = file->private_data;
5838         int ret;
5839
5840         ret = tracing_release(inode, file);
5841
5842         if (file->f_mode & FMODE_READ)
5843                 return ret;
5844
5845         /* If write only, the seq_file is just a stub */
5846         if (m)
5847                 kfree(m->private);
5848         kfree(m);
5849
5850         return 0;
5851 }
5852
5853 static int tracing_buffers_open(struct inode *inode, struct file *filp);
5854 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
5855                                     size_t count, loff_t *ppos);
5856 static int tracing_buffers_release(struct inode *inode, struct file *file);
5857 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5858                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
5859
5860 static int snapshot_raw_open(struct inode *inode, struct file *filp)
5861 {
5862         struct ftrace_buffer_info *info;
5863         int ret;
5864
5865         ret = tracing_buffers_open(inode, filp);
5866         if (ret < 0)
5867                 return ret;
5868
5869         info = filp->private_data;
5870
5871         if (info->iter.trace->use_max_tr) {
5872                 tracing_buffers_release(inode, filp);
5873                 return -EBUSY;
5874         }
5875
5876         info->iter.snapshot = true;
5877         info->iter.trace_buffer = &info->iter.tr->max_buffer;
5878
5879         return ret;
5880 }
5881
5882 #endif /* CONFIG_TRACER_SNAPSHOT */
5883
5884
5885 static const struct file_operations tracing_thresh_fops = {
5886         .open           = tracing_open_generic,
5887         .read           = tracing_thresh_read,
5888         .write          = tracing_thresh_write,
5889         .llseek         = generic_file_llseek,
5890 };
5891
5892 #ifdef CONFIG_TRACER_MAX_TRACE
5893 static const struct file_operations tracing_max_lat_fops = {
5894         .open           = tracing_open_generic,
5895         .read           = tracing_max_lat_read,
5896         .write          = tracing_max_lat_write,
5897         .llseek         = generic_file_llseek,
5898 };
5899 #endif
5900
5901 static const struct file_operations set_tracer_fops = {
5902         .open           = tracing_open_generic,
5903         .read           = tracing_set_trace_read,
5904         .write          = tracing_set_trace_write,
5905         .llseek         = generic_file_llseek,
5906 };
5907
5908 static const struct file_operations tracing_pipe_fops = {
5909         .open           = tracing_open_pipe,
5910         .poll           = tracing_poll_pipe,
5911         .read           = tracing_read_pipe,
5912         .splice_read    = tracing_splice_read_pipe,
5913         .release        = tracing_release_pipe,
5914         .llseek         = no_llseek,
5915 };
5916
5917 static const struct file_operations tracing_entries_fops = {
5918         .open           = tracing_open_generic_tr,
5919         .read           = tracing_entries_read,
5920         .write          = tracing_entries_write,
5921         .llseek         = generic_file_llseek,
5922         .release        = tracing_release_generic_tr,
5923 };
5924
5925 static const struct file_operations tracing_total_entries_fops = {
5926         .open           = tracing_open_generic_tr,
5927         .read           = tracing_total_entries_read,
5928         .llseek         = generic_file_llseek,
5929         .release        = tracing_release_generic_tr,
5930 };
5931
5932 static const struct file_operations tracing_free_buffer_fops = {
5933         .open           = tracing_open_generic_tr,
5934         .write          = tracing_free_buffer_write,
5935         .release        = tracing_free_buffer_release,
5936 };
5937
5938 static const struct file_operations tracing_mark_fops = {
5939         .open           = tracing_open_generic_tr,
5940         .write          = tracing_mark_write,
5941         .llseek         = generic_file_llseek,
5942         .release        = tracing_release_generic_tr,
5943 };
5944
5945 static const struct file_operations trace_clock_fops = {
5946         .open           = tracing_clock_open,
5947         .read           = seq_read,
5948         .llseek         = seq_lseek,
5949         .release        = tracing_single_release_tr,
5950         .write          = tracing_clock_write,
5951 };
5952
5953 #ifdef CONFIG_TRACER_SNAPSHOT
5954 static const struct file_operations snapshot_fops = {
5955         .open           = tracing_snapshot_open,
5956         .read           = seq_read,
5957         .write          = tracing_snapshot_write,
5958         .llseek         = tracing_lseek,
5959         .release        = tracing_snapshot_release,
5960 };
5961
5962 static const struct file_operations snapshot_raw_fops = {
5963         .open           = snapshot_raw_open,
5964         .read           = tracing_buffers_read,
5965         .release        = tracing_buffers_release,
5966         .splice_read    = tracing_buffers_splice_read,
5967         .llseek         = no_llseek,
5968 };
5969
5970 #endif /* CONFIG_TRACER_SNAPSHOT */
5971
5972 static int tracing_buffers_open(struct inode *inode, struct file *filp)
5973 {
5974         struct trace_array *tr = inode->i_private;
5975         struct ftrace_buffer_info *info;
5976         int ret;
5977
5978         if (tracing_disabled)
5979                 return -ENODEV;
5980
5981         if (trace_array_get(tr) < 0)
5982                 return -ENODEV;
5983
5984         info = kzalloc(sizeof(*info), GFP_KERNEL);
5985         if (!info) {
5986                 trace_array_put(tr);
5987                 return -ENOMEM;
5988         }
5989
5990         mutex_lock(&trace_types_lock);
5991
5992         info->iter.tr           = tr;
5993         info->iter.cpu_file     = tracing_get_cpu(inode);
5994         info->iter.trace        = tr->current_trace;
5995         info->iter.trace_buffer = &tr->trace_buffer;
5996         info->spare             = NULL;
5997         /* Force reading ring buffer for first read */
5998         info->read              = (unsigned int)-1;
5999
6000         filp->private_data = info;
6001
6002         tr->current_trace->ref++;
6003
6004         mutex_unlock(&trace_types_lock);
6005
6006         ret = nonseekable_open(inode, filp);
6007         if (ret < 0)
6008                 trace_array_put(tr);
6009
6010         return ret;
6011 }
6012
6013 static unsigned int
6014 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6015 {
6016         struct ftrace_buffer_info *info = filp->private_data;
6017         struct trace_iterator *iter = &info->iter;
6018
6019         return trace_poll(iter, filp, poll_table);
6020 }
6021
6022 static ssize_t
6023 tracing_buffers_read(struct file *filp, char __user *ubuf,
6024                      size_t count, loff_t *ppos)
6025 {
6026         struct ftrace_buffer_info *info = filp->private_data;
6027         struct trace_iterator *iter = &info->iter;
6028         ssize_t ret;
6029         ssize_t size;
6030
6031         if (!count)
6032                 return 0;
6033
6034 #ifdef CONFIG_TRACER_MAX_TRACE
6035         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6036                 return -EBUSY;
6037 #endif
6038
6039         if (!info->spare)
6040                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6041                                                           iter->cpu_file);
6042         if (!info->spare)
6043                 return -ENOMEM;
6044
6045         /* Do we have previous read data to read? */
6046         if (info->read < PAGE_SIZE)
6047                 goto read;
6048
6049  again:
6050         trace_access_lock(iter->cpu_file);
6051         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6052                                     &info->spare,
6053                                     count,
6054                                     iter->cpu_file, 0);
6055         trace_access_unlock(iter->cpu_file);
6056
6057         if (ret < 0) {
6058                 if (trace_empty(iter)) {
6059                         if ((filp->f_flags & O_NONBLOCK))
6060                                 return -EAGAIN;
6061
6062                         ret = wait_on_pipe(iter, false);
6063                         if (ret)
6064                                 return ret;
6065
6066                         goto again;
6067                 }
6068                 return 0;
6069         }
6070
6071         info->read = 0;
6072  read:
6073         size = PAGE_SIZE - info->read;
6074         if (size > count)
6075                 size = count;
6076
6077         ret = copy_to_user(ubuf, info->spare + info->read, size);
6078         if (ret == size)
6079                 return -EFAULT;
6080
6081         size -= ret;
6082
6083         *ppos += size;
6084         info->read += size;
6085
6086         return size;
6087 }
6088
6089 static int tracing_buffers_release(struct inode *inode, struct file *file)
6090 {
6091         struct ftrace_buffer_info *info = file->private_data;
6092         struct trace_iterator *iter = &info->iter;
6093
6094         mutex_lock(&trace_types_lock);
6095
6096         iter->tr->current_trace->ref--;
6097
6098         __trace_array_put(iter->tr);
6099
6100         if (info->spare)
6101                 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
6102         kfree(info);
6103
6104         mutex_unlock(&trace_types_lock);
6105
6106         return 0;
6107 }
6108
6109 struct buffer_ref {
6110         struct ring_buffer      *buffer;
6111         void                    *page;
6112         int                     ref;
6113 };
6114
6115 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6116                                     struct pipe_buffer *buf)
6117 {
6118         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6119
6120         if (--ref->ref)
6121                 return;
6122
6123         ring_buffer_free_read_page(ref->buffer, ref->page);
6124         kfree(ref);
6125         buf->private = 0;
6126 }
6127
6128 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6129                                 struct pipe_buffer *buf)
6130 {
6131         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6132
6133         ref->ref++;
6134 }
6135
6136 /* Pipe buffer operations for a buffer. */
6137 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6138         .can_merge              = 0,
6139         .confirm                = generic_pipe_buf_confirm,
6140         .release                = buffer_pipe_buf_release,
6141         .steal                  = generic_pipe_buf_steal,
6142         .get                    = buffer_pipe_buf_get,
6143 };
6144
6145 /*
6146  * Callback from splice_to_pipe(), if we need to release some pages
6147  * at the end of the spd in case we error'ed out in filling the pipe.
6148  */
6149 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6150 {
6151         struct buffer_ref *ref =
6152                 (struct buffer_ref *)spd->partial[i].private;
6153
6154         if (--ref->ref)
6155                 return;
6156
6157         ring_buffer_free_read_page(ref->buffer, ref->page);
6158         kfree(ref);
6159         spd->partial[i].private = 0;
6160 }
6161
6162 static ssize_t
6163 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6164                             struct pipe_inode_info *pipe, size_t len,
6165                             unsigned int flags)
6166 {
6167         struct ftrace_buffer_info *info = file->private_data;
6168         struct trace_iterator *iter = &info->iter;
6169         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6170         struct page *pages_def[PIPE_DEF_BUFFERS];
6171         struct splice_pipe_desc spd = {
6172                 .pages          = pages_def,
6173                 .partial        = partial_def,
6174                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6175                 .flags          = flags,
6176                 .ops            = &buffer_pipe_buf_ops,
6177                 .spd_release    = buffer_spd_release,
6178         };
6179         struct buffer_ref *ref;
6180         int entries, size, i;
6181         ssize_t ret = 0;
6182
6183 #ifdef CONFIG_TRACER_MAX_TRACE
6184         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6185                 return -EBUSY;
6186 #endif
6187
6188         if (splice_grow_spd(pipe, &spd))
6189                 return -ENOMEM;
6190
6191         if (*ppos & (PAGE_SIZE - 1))
6192                 return -EINVAL;
6193
6194         if (len & (PAGE_SIZE - 1)) {
6195                 if (len < PAGE_SIZE)
6196                         return -EINVAL;
6197                 len &= PAGE_MASK;
6198         }
6199
6200  again:
6201         trace_access_lock(iter->cpu_file);
6202         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6203
6204         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6205                 struct page *page;
6206                 int r;
6207
6208                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6209                 if (!ref) {
6210                         ret = -ENOMEM;
6211                         break;
6212                 }
6213
6214                 ref->ref = 1;
6215                 ref->buffer = iter->trace_buffer->buffer;
6216                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6217                 if (!ref->page) {
6218                         ret = -ENOMEM;
6219                         kfree(ref);
6220                         break;
6221                 }
6222
6223                 r = ring_buffer_read_page(ref->buffer, &ref->page,
6224                                           len, iter->cpu_file, 1);
6225                 if (r < 0) {
6226                         ring_buffer_free_read_page(ref->buffer, ref->page);
6227                         kfree(ref);
6228                         break;
6229                 }
6230
6231                 /*
6232                  * zero out any left over data, this is going to
6233                  * user land.
6234                  */
6235                 size = ring_buffer_page_len(ref->page);
6236                 if (size < PAGE_SIZE)
6237                         memset(ref->page + size, 0, PAGE_SIZE - size);
6238
6239                 page = virt_to_page(ref->page);
6240
6241                 spd.pages[i] = page;
6242                 spd.partial[i].len = PAGE_SIZE;
6243                 spd.partial[i].offset = 0;
6244                 spd.partial[i].private = (unsigned long)ref;
6245                 spd.nr_pages++;
6246                 *ppos += PAGE_SIZE;
6247
6248                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6249         }
6250
6251         trace_access_unlock(iter->cpu_file);
6252         spd.nr_pages = i;
6253
6254         /* did we read anything? */
6255         if (!spd.nr_pages) {
6256                 if (ret)
6257                         return ret;
6258
6259                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6260                         return -EAGAIN;
6261
6262                 ret = wait_on_pipe(iter, true);
6263                 if (ret)
6264                         return ret;
6265
6266                 goto again;
6267         }
6268
6269         ret = splice_to_pipe(pipe, &spd);
6270         splice_shrink_spd(&spd);
6271
6272         return ret;
6273 }
6274
6275 static const struct file_operations tracing_buffers_fops = {
6276         .open           = tracing_buffers_open,
6277         .read           = tracing_buffers_read,
6278         .poll           = tracing_buffers_poll,
6279         .release        = tracing_buffers_release,
6280         .splice_read    = tracing_buffers_splice_read,
6281         .llseek         = no_llseek,
6282 };
6283
6284 static ssize_t
6285 tracing_stats_read(struct file *filp, char __user *ubuf,
6286                    size_t count, loff_t *ppos)
6287 {
6288         struct inode *inode = file_inode(filp);
6289         struct trace_array *tr = inode->i_private;
6290         struct trace_buffer *trace_buf = &tr->trace_buffer;
6291         int cpu = tracing_get_cpu(inode);
6292         struct trace_seq *s;
6293         unsigned long cnt;
6294         unsigned long long t;
6295         unsigned long usec_rem;
6296
6297         s = kmalloc(sizeof(*s), GFP_KERNEL);
6298         if (!s)
6299                 return -ENOMEM;
6300
6301         trace_seq_init(s);
6302
6303         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6304         trace_seq_printf(s, "entries: %ld\n", cnt);
6305
6306         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6307         trace_seq_printf(s, "overrun: %ld\n", cnt);
6308
6309         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6310         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6311
6312         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6313         trace_seq_printf(s, "bytes: %ld\n", cnt);
6314
6315         if (trace_clocks[tr->clock_id].in_ns) {
6316                 /* local or global for trace_clock */
6317                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6318                 usec_rem = do_div(t, USEC_PER_SEC);
6319                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6320                                                                 t, usec_rem);
6321
6322                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6323                 usec_rem = do_div(t, USEC_PER_SEC);
6324                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6325         } else {
6326                 /* counter or tsc mode for trace_clock */
6327                 trace_seq_printf(s, "oldest event ts: %llu\n",
6328                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6329
6330                 trace_seq_printf(s, "now ts: %llu\n",
6331                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
6332         }
6333
6334         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6335         trace_seq_printf(s, "dropped events: %ld\n", cnt);
6336
6337         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
6338         trace_seq_printf(s, "read events: %ld\n", cnt);
6339
6340         count = simple_read_from_buffer(ubuf, count, ppos,
6341                                         s->buffer, trace_seq_used(s));
6342
6343         kfree(s);
6344
6345         return count;
6346 }
6347
6348 static const struct file_operations tracing_stats_fops = {
6349         .open           = tracing_open_generic_tr,
6350         .read           = tracing_stats_read,
6351         .llseek         = generic_file_llseek,
6352         .release        = tracing_release_generic_tr,
6353 };
6354
6355 #ifdef CONFIG_DYNAMIC_FTRACE
6356
6357 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
6358 {
6359         return 0;
6360 }
6361
6362 static ssize_t
6363 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
6364                   size_t cnt, loff_t *ppos)
6365 {
6366         static char ftrace_dyn_info_buffer[1024];
6367         static DEFINE_MUTEX(dyn_info_mutex);
6368         unsigned long *p = filp->private_data;
6369         char *buf = ftrace_dyn_info_buffer;
6370         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
6371         int r;
6372
6373         mutex_lock(&dyn_info_mutex);
6374         r = sprintf(buf, "%ld ", *p);
6375
6376         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
6377         buf[r++] = '\n';
6378
6379         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6380
6381         mutex_unlock(&dyn_info_mutex);
6382
6383         return r;
6384 }
6385
6386 static const struct file_operations tracing_dyn_info_fops = {
6387         .open           = tracing_open_generic,
6388         .read           = tracing_read_dyn_info,
6389         .llseek         = generic_file_llseek,
6390 };
6391 #endif /* CONFIG_DYNAMIC_FTRACE */
6392
6393 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6394 static void
6395 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6396 {
6397         tracing_snapshot();
6398 }
6399
6400 static void
6401 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6402 {
6403         unsigned long *count = (long *)data;
6404
6405         if (!*count)
6406                 return;
6407
6408         if (*count != -1)
6409                 (*count)--;
6410
6411         tracing_snapshot();
6412 }
6413
6414 static int
6415 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
6416                       struct ftrace_probe_ops *ops, void *data)
6417 {
6418         long count = (long)data;
6419
6420         seq_printf(m, "%ps:", (void *)ip);
6421
6422         seq_puts(m, "snapshot");
6423
6424         if (count == -1)
6425                 seq_puts(m, ":unlimited\n");
6426         else
6427                 seq_printf(m, ":count=%ld\n", count);
6428
6429         return 0;
6430 }
6431
6432 static struct ftrace_probe_ops snapshot_probe_ops = {
6433         .func                   = ftrace_snapshot,
6434         .print                  = ftrace_snapshot_print,
6435 };
6436
6437 static struct ftrace_probe_ops snapshot_count_probe_ops = {
6438         .func                   = ftrace_count_snapshot,
6439         .print                  = ftrace_snapshot_print,
6440 };
6441
6442 static int
6443 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
6444                                char *glob, char *cmd, char *param, int enable)
6445 {
6446         struct ftrace_probe_ops *ops;
6447         void *count = (void *)-1;
6448         char *number;
6449         int ret;
6450
6451         /* hash funcs only work with set_ftrace_filter */
6452         if (!enable)
6453                 return -EINVAL;
6454
6455         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
6456
6457         if (glob[0] == '!') {
6458                 unregister_ftrace_function_probe_func(glob+1, ops);
6459                 return 0;
6460         }
6461
6462         if (!param)
6463                 goto out_reg;
6464
6465         number = strsep(&param, ":");
6466
6467         if (!strlen(number))
6468                 goto out_reg;
6469
6470         /*
6471          * We use the callback data field (which is a pointer)
6472          * as our counter.
6473          */
6474         ret = kstrtoul(number, 0, (unsigned long *)&count);
6475         if (ret)
6476                 return ret;
6477
6478  out_reg:
6479         ret = register_ftrace_function_probe(glob, ops, count);
6480
6481         if (ret >= 0)
6482                 alloc_snapshot(&global_trace);
6483
6484         return ret < 0 ? ret : 0;
6485 }
6486
6487 static struct ftrace_func_command ftrace_snapshot_cmd = {
6488         .name                   = "snapshot",
6489         .func                   = ftrace_trace_snapshot_callback,
6490 };
6491
6492 static __init int register_snapshot_cmd(void)
6493 {
6494         return register_ftrace_command(&ftrace_snapshot_cmd);
6495 }
6496 #else
6497 static inline __init int register_snapshot_cmd(void) { return 0; }
6498 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
6499
6500 static struct dentry *tracing_get_dentry(struct trace_array *tr)
6501 {
6502         if (WARN_ON(!tr->dir))
6503                 return ERR_PTR(-ENODEV);
6504
6505         /* Top directory uses NULL as the parent */
6506         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
6507                 return NULL;
6508
6509         /* All sub buffers have a descriptor */
6510         return tr->dir;
6511 }
6512
6513 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
6514 {
6515         struct dentry *d_tracer;
6516
6517         if (tr->percpu_dir)
6518                 return tr->percpu_dir;
6519
6520         d_tracer = tracing_get_dentry(tr);
6521         if (IS_ERR(d_tracer))
6522                 return NULL;
6523
6524         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
6525
6526         WARN_ONCE(!tr->percpu_dir,
6527                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
6528
6529         return tr->percpu_dir;
6530 }
6531
6532 static struct dentry *
6533 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
6534                       void *data, long cpu, const struct file_operations *fops)
6535 {
6536         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
6537
6538         if (ret) /* See tracing_get_cpu() */
6539                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
6540         return ret;
6541 }
6542
6543 static void
6544 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
6545 {
6546         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
6547         struct dentry *d_cpu;
6548         char cpu_dir[30]; /* 30 characters should be more than enough */
6549
6550         if (!d_percpu)
6551                 return;
6552
6553         snprintf(cpu_dir, 30, "cpu%ld", cpu);
6554         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
6555         if (!d_cpu) {
6556                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
6557                 return;
6558         }
6559
6560         /* per cpu trace_pipe */
6561         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
6562                                 tr, cpu, &tracing_pipe_fops);
6563
6564         /* per cpu trace */
6565         trace_create_cpu_file("trace", 0644, d_cpu,
6566                                 tr, cpu, &tracing_fops);
6567
6568         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
6569                                 tr, cpu, &tracing_buffers_fops);
6570
6571         trace_create_cpu_file("stats", 0444, d_cpu,
6572                                 tr, cpu, &tracing_stats_fops);
6573
6574         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
6575                                 tr, cpu, &tracing_entries_fops);
6576
6577 #ifdef CONFIG_TRACER_SNAPSHOT
6578         trace_create_cpu_file("snapshot", 0644, d_cpu,
6579                                 tr, cpu, &snapshot_fops);
6580
6581         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
6582                                 tr, cpu, &snapshot_raw_fops);
6583 #endif
6584 }
6585
6586 #ifdef CONFIG_FTRACE_SELFTEST
6587 /* Let selftest have access to static functions in this file */
6588 #include "trace_selftest.c"
6589 #endif
6590
6591 static ssize_t
6592 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
6593                         loff_t *ppos)
6594 {
6595         struct trace_option_dentry *topt = filp->private_data;
6596         char *buf;
6597
6598         if (topt->flags->val & topt->opt->bit)
6599                 buf = "1\n";
6600         else
6601                 buf = "0\n";
6602
6603         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6604 }
6605
6606 static ssize_t
6607 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
6608                          loff_t *ppos)
6609 {
6610         struct trace_option_dentry *topt = filp->private_data;
6611         unsigned long val;
6612         int ret;
6613
6614         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6615         if (ret)
6616                 return ret;
6617
6618         if (val != 0 && val != 1)
6619                 return -EINVAL;
6620
6621         if (!!(topt->flags->val & topt->opt->bit) != val) {
6622                 mutex_lock(&trace_types_lock);
6623                 ret = __set_tracer_option(topt->tr, topt->flags,
6624                                           topt->opt, !val);
6625                 mutex_unlock(&trace_types_lock);
6626                 if (ret)
6627                         return ret;
6628         }
6629
6630         *ppos += cnt;
6631
6632         return cnt;
6633 }
6634
6635
6636 static const struct file_operations trace_options_fops = {
6637         .open = tracing_open_generic,
6638         .read = trace_options_read,
6639         .write = trace_options_write,
6640         .llseek = generic_file_llseek,
6641 };
6642
6643 /*
6644  * In order to pass in both the trace_array descriptor as well as the index
6645  * to the flag that the trace option file represents, the trace_array
6646  * has a character array of trace_flags_index[], which holds the index
6647  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
6648  * The address of this character array is passed to the flag option file
6649  * read/write callbacks.
6650  *
6651  * In order to extract both the index and the trace_array descriptor,
6652  * get_tr_index() uses the following algorithm.
6653  *
6654  *   idx = *ptr;
6655  *
6656  * As the pointer itself contains the address of the index (remember
6657  * index[1] == 1).
6658  *
6659  * Then to get the trace_array descriptor, by subtracting that index
6660  * from the ptr, we get to the start of the index itself.
6661  *
6662  *   ptr - idx == &index[0]
6663  *
6664  * Then a simple container_of() from that pointer gets us to the
6665  * trace_array descriptor.
6666  */
6667 static void get_tr_index(void *data, struct trace_array **ptr,
6668                          unsigned int *pindex)
6669 {
6670         *pindex = *(unsigned char *)data;
6671
6672         *ptr = container_of(data - *pindex, struct trace_array,
6673                             trace_flags_index);
6674 }
6675
6676 static ssize_t
6677 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
6678                         loff_t *ppos)
6679 {
6680         void *tr_index = filp->private_data;
6681         struct trace_array *tr;
6682         unsigned int index;
6683         char *buf;
6684
6685         get_tr_index(tr_index, &tr, &index);
6686
6687         if (tr->trace_flags & (1 << index))
6688                 buf = "1\n";
6689         else
6690                 buf = "0\n";
6691
6692         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6693 }
6694
6695 static ssize_t
6696 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
6697                          loff_t *ppos)
6698 {
6699         void *tr_index = filp->private_data;
6700         struct trace_array *tr;
6701         unsigned int index;
6702         unsigned long val;
6703         int ret;
6704
6705         get_tr_index(tr_index, &tr, &index);
6706
6707         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6708         if (ret)
6709                 return ret;
6710
6711         if (val != 0 && val != 1)
6712                 return -EINVAL;
6713
6714         mutex_lock(&trace_types_lock);
6715         ret = set_tracer_flag(tr, 1 << index, val);
6716         mutex_unlock(&trace_types_lock);
6717
6718         if (ret < 0)
6719                 return ret;
6720
6721         *ppos += cnt;
6722
6723         return cnt;
6724 }
6725
6726 static const struct file_operations trace_options_core_fops = {
6727         .open = tracing_open_generic,
6728         .read = trace_options_core_read,
6729         .write = trace_options_core_write,
6730         .llseek = generic_file_llseek,
6731 };
6732
6733 struct dentry *trace_create_file(const char *name,
6734                                  umode_t mode,
6735                                  struct dentry *parent,
6736                                  void *data,
6737                                  const struct file_operations *fops)
6738 {
6739         struct dentry *ret;
6740
6741         ret = tracefs_create_file(name, mode, parent, data, fops);
6742         if (!ret)
6743                 pr_warn("Could not create tracefs '%s' entry\n", name);
6744
6745         return ret;
6746 }
6747
6748
6749 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
6750 {
6751         struct dentry *d_tracer;
6752
6753         if (tr->options)
6754                 return tr->options;
6755
6756         d_tracer = tracing_get_dentry(tr);
6757         if (IS_ERR(d_tracer))
6758                 return NULL;
6759
6760         tr->options = tracefs_create_dir("options", d_tracer);
6761         if (!tr->options) {
6762                 pr_warn("Could not create tracefs directory 'options'\n");
6763                 return NULL;
6764         }
6765
6766         return tr->options;
6767 }
6768
6769 static void
6770 create_trace_option_file(struct trace_array *tr,
6771                          struct trace_option_dentry *topt,
6772                          struct tracer_flags *flags,
6773                          struct tracer_opt *opt)
6774 {
6775         struct dentry *t_options;
6776
6777         t_options = trace_options_init_dentry(tr);
6778         if (!t_options)
6779                 return;
6780
6781         topt->flags = flags;
6782         topt->opt = opt;
6783         topt->tr = tr;
6784
6785         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
6786                                     &trace_options_fops);
6787
6788 }
6789
6790 static void
6791 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
6792 {
6793         struct trace_option_dentry *topts;
6794         struct trace_options *tr_topts;
6795         struct tracer_flags *flags;
6796         struct tracer_opt *opts;
6797         int cnt;
6798         int i;
6799
6800         if (!tracer)
6801                 return;
6802
6803         flags = tracer->flags;
6804
6805         if (!flags || !flags->opts)
6806                 return;
6807
6808         /*
6809          * If this is an instance, only create flags for tracers
6810          * the instance may have.
6811          */
6812         if (!trace_ok_for_array(tracer, tr))
6813                 return;
6814
6815         for (i = 0; i < tr->nr_topts; i++) {
6816                 /* Make sure there's no duplicate flags. */
6817                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
6818                         return;
6819         }
6820
6821         opts = flags->opts;
6822
6823         for (cnt = 0; opts[cnt].name; cnt++)
6824                 ;
6825
6826         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
6827         if (!topts)
6828                 return;
6829
6830         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
6831                             GFP_KERNEL);
6832         if (!tr_topts) {
6833                 kfree(topts);
6834                 return;
6835         }
6836
6837         tr->topts = tr_topts;
6838         tr->topts[tr->nr_topts].tracer = tracer;
6839         tr->topts[tr->nr_topts].topts = topts;
6840         tr->nr_topts++;
6841
6842         for (cnt = 0; opts[cnt].name; cnt++) {
6843                 create_trace_option_file(tr, &topts[cnt], flags,
6844                                          &opts[cnt]);
6845                 WARN_ONCE(topts[cnt].entry == NULL,
6846                           "Failed to create trace option: %s",
6847                           opts[cnt].name);
6848         }
6849 }
6850
6851 static struct dentry *
6852 create_trace_option_core_file(struct trace_array *tr,
6853                               const char *option, long index)
6854 {
6855         struct dentry *t_options;
6856
6857         t_options = trace_options_init_dentry(tr);
6858         if (!t_options)
6859                 return NULL;
6860
6861         return trace_create_file(option, 0644, t_options,
6862                                  (void *)&tr->trace_flags_index[index],
6863                                  &trace_options_core_fops);
6864 }
6865
6866 static void create_trace_options_dir(struct trace_array *tr)
6867 {
6868         struct dentry *t_options;
6869         bool top_level = tr == &global_trace;
6870         int i;
6871
6872         t_options = trace_options_init_dentry(tr);
6873         if (!t_options)
6874                 return;
6875
6876         for (i = 0; trace_options[i]; i++) {
6877                 if (top_level ||
6878                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
6879                         create_trace_option_core_file(tr, trace_options[i], i);
6880         }
6881 }
6882
6883 static ssize_t
6884 rb_simple_read(struct file *filp, char __user *ubuf,
6885                size_t cnt, loff_t *ppos)
6886 {
6887         struct trace_array *tr = filp->private_data;
6888         char buf[64];
6889         int r;
6890
6891         r = tracer_tracing_is_on(tr);
6892         r = sprintf(buf, "%d\n", r);
6893
6894         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6895 }
6896
6897 static ssize_t
6898 rb_simple_write(struct file *filp, const char __user *ubuf,
6899                 size_t cnt, loff_t *ppos)
6900 {
6901         struct trace_array *tr = filp->private_data;
6902         struct ring_buffer *buffer = tr->trace_buffer.buffer;
6903         unsigned long val;
6904         int ret;
6905
6906         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6907         if (ret)
6908                 return ret;
6909
6910         if (buffer) {
6911                 mutex_lock(&trace_types_lock);
6912                 if (val) {
6913                         tracer_tracing_on(tr);
6914                         if (tr->current_trace->start)
6915                                 tr->current_trace->start(tr);
6916                 } else {
6917                         tracer_tracing_off(tr);
6918                         if (tr->current_trace->stop)
6919                                 tr->current_trace->stop(tr);
6920                 }
6921                 mutex_unlock(&trace_types_lock);
6922         }
6923
6924         (*ppos)++;
6925
6926         return cnt;
6927 }
6928
6929 static const struct file_operations rb_simple_fops = {
6930         .open           = tracing_open_generic_tr,
6931         .read           = rb_simple_read,
6932         .write          = rb_simple_write,
6933         .release        = tracing_release_generic_tr,
6934         .llseek         = default_llseek,
6935 };
6936
6937 struct dentry *trace_instance_dir;
6938
6939 static void
6940 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
6941
6942 static int
6943 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
6944 {
6945         enum ring_buffer_flags rb_flags;
6946
6947         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
6948
6949         buf->tr = tr;
6950
6951         buf->buffer = ring_buffer_alloc(size, rb_flags);
6952         if (!buf->buffer)
6953                 return -ENOMEM;
6954
6955         buf->data = alloc_percpu(struct trace_array_cpu);
6956         if (!buf->data) {
6957                 ring_buffer_free(buf->buffer);
6958                 return -ENOMEM;
6959         }
6960
6961         /* Allocate the first page for all buffers */
6962         set_buffer_entries(&tr->trace_buffer,
6963                            ring_buffer_size(tr->trace_buffer.buffer, 0));
6964
6965         return 0;
6966 }
6967
6968 static int allocate_trace_buffers(struct trace_array *tr, int size)
6969 {
6970         int ret;
6971
6972         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
6973         if (ret)
6974                 return ret;
6975
6976 #ifdef CONFIG_TRACER_MAX_TRACE
6977         ret = allocate_trace_buffer(tr, &tr->max_buffer,
6978                                     allocate_snapshot ? size : 1);
6979         if (WARN_ON(ret)) {
6980                 ring_buffer_free(tr->trace_buffer.buffer);
6981                 free_percpu(tr->trace_buffer.data);
6982                 return -ENOMEM;
6983         }
6984         tr->allocated_snapshot = allocate_snapshot;
6985
6986         /*
6987          * Only the top level trace array gets its snapshot allocated
6988          * from the kernel command line.
6989          */
6990         allocate_snapshot = false;
6991 #endif
6992         return 0;
6993 }
6994
6995 static void free_trace_buffer(struct trace_buffer *buf)
6996 {
6997         if (buf->buffer) {
6998                 ring_buffer_free(buf->buffer);
6999                 buf->buffer = NULL;
7000                 free_percpu(buf->data);
7001                 buf->data = NULL;
7002         }
7003 }
7004
7005 static void free_trace_buffers(struct trace_array *tr)
7006 {
7007         if (!tr)
7008                 return;
7009
7010         free_trace_buffer(&tr->trace_buffer);
7011
7012 #ifdef CONFIG_TRACER_MAX_TRACE
7013         free_trace_buffer(&tr->max_buffer);
7014 #endif
7015 }
7016
7017 static void init_trace_flags_index(struct trace_array *tr)
7018 {
7019         int i;
7020
7021         /* Used by the trace options files */
7022         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7023                 tr->trace_flags_index[i] = i;
7024 }
7025
7026 static void __update_tracer_options(struct trace_array *tr)
7027 {
7028         struct tracer *t;
7029
7030         for (t = trace_types; t; t = t->next)
7031                 add_tracer_options(tr, t);
7032 }
7033
7034 static void update_tracer_options(struct trace_array *tr)
7035 {
7036         mutex_lock(&trace_types_lock);
7037         __update_tracer_options(tr);
7038         mutex_unlock(&trace_types_lock);
7039 }
7040
7041 static int instance_mkdir(const char *name)
7042 {
7043         struct trace_array *tr;
7044         int ret;
7045
7046         mutex_lock(&trace_types_lock);
7047
7048         ret = -EEXIST;
7049         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7050                 if (tr->name && strcmp(tr->name, name) == 0)
7051                         goto out_unlock;
7052         }
7053
7054         ret = -ENOMEM;
7055         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7056         if (!tr)
7057                 goto out_unlock;
7058
7059         tr->name = kstrdup(name, GFP_KERNEL);
7060         if (!tr->name)
7061                 goto out_free_tr;
7062
7063         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7064                 goto out_free_tr;
7065
7066         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7067
7068         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7069
7070         raw_spin_lock_init(&tr->start_lock);
7071
7072         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7073
7074         tr->current_trace = &nop_trace;
7075
7076         INIT_LIST_HEAD(&tr->systems);
7077         INIT_LIST_HEAD(&tr->events);
7078
7079         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7080                 goto out_free_tr;
7081
7082         tr->dir = tracefs_create_dir(name, trace_instance_dir);
7083         if (!tr->dir)
7084                 goto out_free_tr;
7085
7086         ret = event_trace_add_tracer(tr->dir, tr);
7087         if (ret) {
7088                 tracefs_remove_recursive(tr->dir);
7089                 goto out_free_tr;
7090         }
7091
7092         init_tracer_tracefs(tr, tr->dir);
7093         init_trace_flags_index(tr);
7094         __update_tracer_options(tr);
7095
7096         list_add(&tr->list, &ftrace_trace_arrays);
7097
7098         mutex_unlock(&trace_types_lock);
7099
7100         return 0;
7101
7102  out_free_tr:
7103         free_trace_buffers(tr);
7104         free_cpumask_var(tr->tracing_cpumask);
7105         kfree(tr->name);
7106         kfree(tr);
7107
7108  out_unlock:
7109         mutex_unlock(&trace_types_lock);
7110
7111         return ret;
7112
7113 }
7114
7115 static int instance_rmdir(const char *name)
7116 {
7117         struct trace_array *tr;
7118         int found = 0;
7119         int ret;
7120         int i;
7121
7122         mutex_lock(&trace_types_lock);
7123
7124         ret = -ENODEV;
7125         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7126                 if (tr->name && strcmp(tr->name, name) == 0) {
7127                         found = 1;
7128                         break;
7129                 }
7130         }
7131         if (!found)
7132                 goto out_unlock;
7133
7134         ret = -EBUSY;
7135         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7136                 goto out_unlock;
7137
7138         list_del(&tr->list);
7139
7140         /* Disable all the flags that were enabled coming in */
7141         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7142                 if ((1 << i) & ZEROED_TRACE_FLAGS)
7143                         set_tracer_flag(tr, 1 << i, 0);
7144         }
7145
7146         tracing_set_nop(tr);
7147         event_trace_del_tracer(tr);
7148         ftrace_destroy_function_files(tr);
7149         tracefs_remove_recursive(tr->dir);
7150         free_trace_buffers(tr);
7151
7152         for (i = 0; i < tr->nr_topts; i++) {
7153                 kfree(tr->topts[i].topts);
7154         }
7155         kfree(tr->topts);
7156
7157         kfree(tr->name);
7158         kfree(tr);
7159
7160         ret = 0;
7161
7162  out_unlock:
7163         mutex_unlock(&trace_types_lock);
7164
7165         return ret;
7166 }
7167
7168 static __init void create_trace_instances(struct dentry *d_tracer)
7169 {
7170         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7171                                                          instance_mkdir,
7172                                                          instance_rmdir);
7173         if (WARN_ON(!trace_instance_dir))
7174                 return;
7175 }
7176
7177 static void
7178 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7179 {
7180         int cpu;
7181
7182         trace_create_file("available_tracers", 0444, d_tracer,
7183                         tr, &show_traces_fops);
7184
7185         trace_create_file("current_tracer", 0644, d_tracer,
7186                         tr, &set_tracer_fops);
7187
7188         trace_create_file("tracing_cpumask", 0644, d_tracer,
7189                           tr, &tracing_cpumask_fops);
7190
7191         trace_create_file("trace_options", 0644, d_tracer,
7192                           tr, &tracing_iter_fops);
7193
7194         trace_create_file("trace", 0644, d_tracer,
7195                           tr, &tracing_fops);
7196
7197         trace_create_file("trace_pipe", 0444, d_tracer,
7198                           tr, &tracing_pipe_fops);
7199
7200         trace_create_file("buffer_size_kb", 0644, d_tracer,
7201                           tr, &tracing_entries_fops);
7202
7203         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7204                           tr, &tracing_total_entries_fops);
7205
7206         trace_create_file("free_buffer", 0200, d_tracer,
7207                           tr, &tracing_free_buffer_fops);
7208
7209         trace_create_file("trace_marker", 0220, d_tracer,
7210                           tr, &tracing_mark_fops);
7211
7212         trace_create_file("trace_clock", 0644, d_tracer, tr,
7213                           &trace_clock_fops);
7214
7215         trace_create_file("tracing_on", 0644, d_tracer,
7216                           tr, &rb_simple_fops);
7217
7218         create_trace_options_dir(tr);
7219
7220 #ifdef CONFIG_TRACER_MAX_TRACE
7221         trace_create_file("tracing_max_latency", 0644, d_tracer,
7222                         &tr->max_latency, &tracing_max_lat_fops);
7223 #endif
7224
7225         if (ftrace_create_function_files(tr, d_tracer))
7226                 WARN(1, "Could not allocate function filter files");
7227
7228 #ifdef CONFIG_TRACER_SNAPSHOT
7229         trace_create_file("snapshot", 0644, d_tracer,
7230                           tr, &snapshot_fops);
7231 #endif
7232
7233         for_each_tracing_cpu(cpu)
7234                 tracing_init_tracefs_percpu(tr, cpu);
7235
7236         ftrace_init_tracefs(tr, d_tracer);
7237 }
7238
7239 static struct vfsmount *trace_automount(void *ingore)
7240 {
7241         struct vfsmount *mnt;
7242         struct file_system_type *type;
7243
7244         /*
7245          * To maintain backward compatibility for tools that mount
7246          * debugfs to get to the tracing facility, tracefs is automatically
7247          * mounted to the debugfs/tracing directory.
7248          */
7249         type = get_fs_type("tracefs");
7250         if (!type)
7251                 return NULL;
7252         mnt = vfs_kern_mount(type, 0, "tracefs", NULL);
7253         put_filesystem(type);
7254         if (IS_ERR(mnt))
7255                 return NULL;
7256         mntget(mnt);
7257
7258         return mnt;
7259 }
7260
7261 /**
7262  * tracing_init_dentry - initialize top level trace array
7263  *
7264  * This is called when creating files or directories in the tracing
7265  * directory. It is called via fs_initcall() by any of the boot up code
7266  * and expects to return the dentry of the top level tracing directory.
7267  */
7268 struct dentry *tracing_init_dentry(void)
7269 {
7270         struct trace_array *tr = &global_trace;
7271
7272         /* The top level trace array uses  NULL as parent */
7273         if (tr->dir)
7274                 return NULL;
7275
7276         if (WARN_ON(!tracefs_initialized()) ||
7277                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
7278                  WARN_ON(!debugfs_initialized())))
7279                 return ERR_PTR(-ENODEV);
7280
7281         /*
7282          * As there may still be users that expect the tracing
7283          * files to exist in debugfs/tracing, we must automount
7284          * the tracefs file system there, so older tools still
7285          * work with the newer kerenl.
7286          */
7287         tr->dir = debugfs_create_automount("tracing", NULL,
7288                                            trace_automount, NULL);
7289         if (!tr->dir) {
7290                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
7291                 return ERR_PTR(-ENOMEM);
7292         }
7293
7294         return NULL;
7295 }
7296
7297 extern struct trace_enum_map *__start_ftrace_enum_maps[];
7298 extern struct trace_enum_map *__stop_ftrace_enum_maps[];
7299
7300 static void __init trace_enum_init(void)
7301 {
7302         int len;
7303
7304         len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps;
7305         trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len);
7306 }
7307
7308 #ifdef CONFIG_MODULES
7309 static void trace_module_add_enums(struct module *mod)
7310 {
7311         if (!mod->num_trace_enums)
7312                 return;
7313
7314         /*
7315          * Modules with bad taint do not have events created, do
7316          * not bother with enums either.
7317          */
7318         if (trace_module_has_bad_taint(mod))
7319                 return;
7320
7321         trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums);
7322 }
7323
7324 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
7325 static void trace_module_remove_enums(struct module *mod)
7326 {
7327         union trace_enum_map_item *map;
7328         union trace_enum_map_item **last = &trace_enum_maps;
7329
7330         if (!mod->num_trace_enums)
7331                 return;
7332
7333         mutex_lock(&trace_enum_mutex);
7334
7335         map = trace_enum_maps;
7336
7337         while (map) {
7338                 if (map->head.mod == mod)
7339                         break;
7340                 map = trace_enum_jmp_to_tail(map);
7341                 last = &map->tail.next;
7342                 map = map->tail.next;
7343         }
7344         if (!map)
7345                 goto out;
7346
7347         *last = trace_enum_jmp_to_tail(map)->tail.next;
7348         kfree(map);
7349  out:
7350         mutex_unlock(&trace_enum_mutex);
7351 }
7352 #else
7353 static inline void trace_module_remove_enums(struct module *mod) { }
7354 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
7355
7356 static int trace_module_notify(struct notifier_block *self,
7357                                unsigned long val, void *data)
7358 {
7359         struct module *mod = data;
7360
7361         switch (val) {
7362         case MODULE_STATE_COMING:
7363                 trace_module_add_enums(mod);
7364                 break;
7365         case MODULE_STATE_GOING:
7366                 trace_module_remove_enums(mod);
7367                 break;
7368         }
7369
7370         return 0;
7371 }
7372
7373 static struct notifier_block trace_module_nb = {
7374         .notifier_call = trace_module_notify,
7375         .priority = 0,
7376 };
7377 #endif /* CONFIG_MODULES */
7378
7379 static __init int tracer_init_tracefs(void)
7380 {
7381         struct dentry *d_tracer;
7382
7383         trace_access_lock_init();
7384
7385         d_tracer = tracing_init_dentry();
7386         if (IS_ERR(d_tracer))
7387                 return 0;
7388
7389         init_tracer_tracefs(&global_trace, d_tracer);
7390
7391         trace_create_file("tracing_thresh", 0644, d_tracer,
7392                         &global_trace, &tracing_thresh_fops);
7393
7394         trace_create_file("README", 0444, d_tracer,
7395                         NULL, &tracing_readme_fops);
7396
7397         trace_create_file("saved_cmdlines", 0444, d_tracer,
7398                         NULL, &tracing_saved_cmdlines_fops);
7399
7400         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
7401                           NULL, &tracing_saved_cmdlines_size_fops);
7402
7403         trace_enum_init();
7404
7405         trace_create_enum_file(d_tracer);
7406
7407 #ifdef CONFIG_MODULES
7408         register_module_notifier(&trace_module_nb);
7409 #endif
7410
7411 #ifdef CONFIG_DYNAMIC_FTRACE
7412         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
7413                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
7414 #endif
7415
7416         create_trace_instances(d_tracer);
7417
7418         update_tracer_options(&global_trace);
7419
7420         return 0;
7421 }
7422
7423 static int trace_panic_handler(struct notifier_block *this,
7424                                unsigned long event, void *unused)
7425 {
7426         if (ftrace_dump_on_oops)
7427                 ftrace_dump(ftrace_dump_on_oops);
7428         return NOTIFY_OK;
7429 }
7430
7431 static struct notifier_block trace_panic_notifier = {
7432         .notifier_call  = trace_panic_handler,
7433         .next           = NULL,
7434         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
7435 };
7436
7437 static int trace_die_handler(struct notifier_block *self,
7438                              unsigned long val,
7439                              void *data)
7440 {
7441         switch (val) {
7442         case DIE_OOPS:
7443                 if (ftrace_dump_on_oops)
7444                         ftrace_dump(ftrace_dump_on_oops);
7445                 break;
7446         default:
7447                 break;
7448         }
7449         return NOTIFY_OK;
7450 }
7451
7452 static struct notifier_block trace_die_notifier = {
7453         .notifier_call = trace_die_handler,
7454         .priority = 200
7455 };
7456
7457 /*
7458  * printk is set to max of 1024, we really don't need it that big.
7459  * Nothing should be printing 1000 characters anyway.
7460  */
7461 #define TRACE_MAX_PRINT         1000
7462
7463 /*
7464  * Define here KERN_TRACE so that we have one place to modify
7465  * it if we decide to change what log level the ftrace dump
7466  * should be at.
7467  */
7468 #define KERN_TRACE              KERN_EMERG
7469
7470 void
7471 trace_printk_seq(struct trace_seq *s)
7472 {
7473         /* Probably should print a warning here. */
7474         if (s->seq.len >= TRACE_MAX_PRINT)
7475                 s->seq.len = TRACE_MAX_PRINT;
7476
7477         /*
7478          * More paranoid code. Although the buffer size is set to
7479          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
7480          * an extra layer of protection.
7481          */
7482         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
7483                 s->seq.len = s->seq.size - 1;
7484
7485         /* should be zero ended, but we are paranoid. */
7486         s->buffer[s->seq.len] = 0;
7487
7488         printk(KERN_TRACE "%s", s->buffer);
7489
7490         trace_seq_init(s);
7491 }
7492
7493 void trace_init_global_iter(struct trace_iterator *iter)
7494 {
7495         iter->tr = &global_trace;
7496         iter->trace = iter->tr->current_trace;
7497         iter->cpu_file = RING_BUFFER_ALL_CPUS;
7498         iter->trace_buffer = &global_trace.trace_buffer;
7499
7500         if (iter->trace && iter->trace->open)
7501                 iter->trace->open(iter);
7502
7503         /* Annotate start of buffers if we had overruns */
7504         if (ring_buffer_overruns(iter->trace_buffer->buffer))
7505                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
7506
7507         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
7508         if (trace_clocks[iter->tr->clock_id].in_ns)
7509                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
7510 }
7511
7512 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
7513 {
7514         /* use static because iter can be a bit big for the stack */
7515         static struct trace_iterator iter;
7516         static atomic_t dump_running;
7517         struct trace_array *tr = &global_trace;
7518         unsigned int old_userobj;
7519         unsigned long flags;
7520         int cnt = 0, cpu;
7521
7522         /* Only allow one dump user at a time. */
7523         if (atomic_inc_return(&dump_running) != 1) {
7524                 atomic_dec(&dump_running);
7525                 return;
7526         }
7527
7528         /*
7529          * Always turn off tracing when we dump.
7530          * We don't need to show trace output of what happens
7531          * between multiple crashes.
7532          *
7533          * If the user does a sysrq-z, then they can re-enable
7534          * tracing with echo 1 > tracing_on.
7535          */
7536         tracing_off();
7537
7538         local_irq_save(flags);
7539
7540         /* Simulate the iterator */
7541         trace_init_global_iter(&iter);
7542
7543         for_each_tracing_cpu(cpu) {
7544                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7545         }
7546
7547         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
7548
7549         /* don't look at user memory in panic mode */
7550         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
7551
7552         switch (oops_dump_mode) {
7553         case DUMP_ALL:
7554                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7555                 break;
7556         case DUMP_ORIG:
7557                 iter.cpu_file = raw_smp_processor_id();
7558                 break;
7559         case DUMP_NONE:
7560                 goto out_enable;
7561         default:
7562                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
7563                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7564         }
7565
7566         printk(KERN_TRACE "Dumping ftrace buffer:\n");
7567
7568         /* Did function tracer already get disabled? */
7569         if (ftrace_is_dead()) {
7570                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
7571                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
7572         }
7573
7574         /*
7575          * We need to stop all tracing on all CPUS to read the
7576          * the next buffer. This is a bit expensive, but is
7577          * not done often. We fill all what we can read,
7578          * and then release the locks again.
7579          */
7580
7581         while (!trace_empty(&iter)) {
7582
7583                 if (!cnt)
7584                         printk(KERN_TRACE "---------------------------------\n");
7585
7586                 cnt++;
7587
7588                 /* reset all but tr, trace, and overruns */
7589                 memset(&iter.seq, 0,
7590                        sizeof(struct trace_iterator) -
7591                        offsetof(struct trace_iterator, seq));
7592                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
7593                 iter.pos = -1;
7594
7595                 if (trace_find_next_entry_inc(&iter) != NULL) {
7596                         int ret;
7597
7598                         ret = print_trace_line(&iter);
7599                         if (ret != TRACE_TYPE_NO_CONSUME)
7600                                 trace_consume(&iter);
7601                 }
7602                 touch_nmi_watchdog();
7603
7604                 trace_printk_seq(&iter.seq);
7605         }
7606
7607         if (!cnt)
7608                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
7609         else
7610                 printk(KERN_TRACE "---------------------------------\n");
7611
7612  out_enable:
7613         tr->trace_flags |= old_userobj;
7614
7615         for_each_tracing_cpu(cpu) {
7616                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7617         }
7618         atomic_dec(&dump_running);
7619         local_irq_restore(flags);
7620 }
7621 EXPORT_SYMBOL_GPL(ftrace_dump);
7622
7623 __init static int tracer_alloc_buffers(void)
7624 {
7625         int ring_buf_size;
7626         int ret = -ENOMEM;
7627
7628         /*
7629          * Make sure we don't accidently add more trace options
7630          * than we have bits for.
7631          */
7632         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
7633
7634         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
7635                 goto out;
7636
7637         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
7638                 goto out_free_buffer_mask;
7639
7640         /* Only allocate trace_printk buffers if a trace_printk exists */
7641         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
7642                 /* Must be called before global_trace.buffer is allocated */
7643                 trace_printk_init_buffers();
7644
7645         /* To save memory, keep the ring buffer size to its minimum */
7646         if (ring_buffer_expanded)
7647                 ring_buf_size = trace_buf_size;
7648         else
7649                 ring_buf_size = 1;
7650
7651         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
7652         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
7653
7654         raw_spin_lock_init(&global_trace.start_lock);
7655
7656         /* Used for event triggers */
7657         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
7658         if (!temp_buffer)
7659                 goto out_free_cpumask;
7660
7661         if (trace_create_savedcmd() < 0)
7662                 goto out_free_temp_buffer;
7663
7664         /* TODO: make the number of buffers hot pluggable with CPUS */
7665         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
7666                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
7667                 WARN_ON(1);
7668                 goto out_free_savedcmd;
7669         }
7670
7671         if (global_trace.buffer_disabled)
7672                 tracing_off();
7673
7674         if (trace_boot_clock) {
7675                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
7676                 if (ret < 0)
7677                         pr_warn("Trace clock %s not defined, going back to default\n",
7678                                 trace_boot_clock);
7679         }
7680
7681         /*
7682          * register_tracer() might reference current_trace, so it
7683          * needs to be set before we register anything. This is
7684          * just a bootstrap of current_trace anyway.
7685          */
7686         global_trace.current_trace = &nop_trace;
7687
7688         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7689
7690         ftrace_init_global_array_ops(&global_trace);
7691
7692         init_trace_flags_index(&global_trace);
7693
7694         register_tracer(&nop_trace);
7695
7696         /* All seems OK, enable tracing */
7697         tracing_disabled = 0;
7698
7699         atomic_notifier_chain_register(&panic_notifier_list,
7700                                        &trace_panic_notifier);
7701
7702         register_die_notifier(&trace_die_notifier);
7703
7704         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
7705
7706         INIT_LIST_HEAD(&global_trace.systems);
7707         INIT_LIST_HEAD(&global_trace.events);
7708         list_add(&global_trace.list, &ftrace_trace_arrays);
7709
7710         apply_trace_boot_options();
7711
7712         register_snapshot_cmd();
7713
7714         return 0;
7715
7716 out_free_savedcmd:
7717         free_saved_cmdlines_buffer(savedcmd);
7718 out_free_temp_buffer:
7719         ring_buffer_free(temp_buffer);
7720 out_free_cpumask:
7721         free_cpumask_var(global_trace.tracing_cpumask);
7722 out_free_buffer_mask:
7723         free_cpumask_var(tracing_buffer_mask);
7724 out:
7725         return ret;
7726 }
7727
7728 void __init trace_init(void)
7729 {
7730         if (tracepoint_printk) {
7731                 tracepoint_print_iter =
7732                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
7733                 if (WARN_ON(!tracepoint_print_iter))
7734                         tracepoint_printk = 0;
7735         }
7736         tracer_alloc_buffers();
7737         trace_event_init();
7738 }
7739
7740 __init static int clear_boot_tracer(void)
7741 {
7742         /*
7743          * The default tracer at boot buffer is an init section.
7744          * This function is called in lateinit. If we did not
7745          * find the boot tracer, then clear it out, to prevent
7746          * later registration from accessing the buffer that is
7747          * about to be freed.
7748          */
7749         if (!default_bootup_tracer)
7750                 return 0;
7751
7752         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
7753                default_bootup_tracer);
7754         default_bootup_tracer = NULL;
7755
7756         return 0;
7757 }
7758
7759 fs_initcall(tracer_init_tracefs);
7760 late_initcall(clear_boot_tracer);