Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
[cascardo/linux.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/pagemap.h>
24 #include <linux/hardirq.h>
25 #include <linux/linkage.h>
26 #include <linux/uaccess.h>
27 #include <linux/kprobes.h>
28 #include <linux/ftrace.h>
29 #include <linux/module.h>
30 #include <linux/percpu.h>
31 #include <linux/splice.h>
32 #include <linux/kdebug.h>
33 #include <linux/string.h>
34 #include <linux/rwsem.h>
35 #include <linux/slab.h>
36 #include <linux/ctype.h>
37 #include <linux/init.h>
38 #include <linux/poll.h>
39 #include <linux/nmi.h>
40 #include <linux/fs.h>
41 #include <linux/sched/rt.h>
42
43 #include "trace.h"
44 #include "trace_output.h"
45
46 /*
47  * On boot up, the ring buffer is set to the minimum size, so that
48  * we do not waste memory on systems that are not using tracing.
49  */
50 bool ring_buffer_expanded;
51
52 /*
53  * We need to change this state when a selftest is running.
54  * A selftest will lurk into the ring-buffer to count the
55  * entries inserted during the selftest although some concurrent
56  * insertions into the ring-buffer such as trace_printk could occurred
57  * at the same time, giving false positive or negative results.
58  */
59 static bool __read_mostly tracing_selftest_running;
60
61 /*
62  * If a tracer is running, we do not want to run SELFTEST.
63  */
64 bool __read_mostly tracing_selftest_disabled;
65
66 /* For tracers that don't implement custom flags */
67 static struct tracer_opt dummy_tracer_opt[] = {
68         { }
69 };
70
71 static struct tracer_flags dummy_tracer_flags = {
72         .val = 0,
73         .opts = dummy_tracer_opt
74 };
75
76 static int
77 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
78 {
79         return 0;
80 }
81
82 /*
83  * To prevent the comm cache from being overwritten when no
84  * tracing is active, only save the comm when a trace event
85  * occurred.
86  */
87 static DEFINE_PER_CPU(bool, trace_cmdline_save);
88
89 /*
90  * Kill all tracing for good (never come back).
91  * It is initialized to 1 but will turn to zero if the initialization
92  * of the tracer is successful. But that is the only place that sets
93  * this back to zero.
94  */
95 static int tracing_disabled = 1;
96
97 DEFINE_PER_CPU(int, ftrace_cpu_disabled);
98
99 cpumask_var_t __read_mostly     tracing_buffer_mask;
100
101 /*
102  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
103  *
104  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
105  * is set, then ftrace_dump is called. This will output the contents
106  * of the ftrace buffers to the console.  This is very useful for
107  * capturing traces that lead to crashes and outputing it to a
108  * serial console.
109  *
110  * It is default off, but you can enable it with either specifying
111  * "ftrace_dump_on_oops" in the kernel command line, or setting
112  * /proc/sys/kernel/ftrace_dump_on_oops
113  * Set 1 if you want to dump buffers of all CPUs
114  * Set 2 if you want to dump the buffer of the CPU that triggered oops
115  */
116
117 enum ftrace_dump_mode ftrace_dump_on_oops;
118
119 /* When set, tracing will stop when a WARN*() is hit */
120 int __disable_trace_on_warning;
121
122 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
123
124 #define MAX_TRACER_SIZE         100
125 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
126 static char *default_bootup_tracer;
127
128 static bool allocate_snapshot;
129
130 static int __init set_cmdline_ftrace(char *str)
131 {
132         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
133         default_bootup_tracer = bootup_tracer_buf;
134         /* We are using ftrace early, expand it */
135         ring_buffer_expanded = true;
136         return 1;
137 }
138 __setup("ftrace=", set_cmdline_ftrace);
139
140 static int __init set_ftrace_dump_on_oops(char *str)
141 {
142         if (*str++ != '=' || !*str) {
143                 ftrace_dump_on_oops = DUMP_ALL;
144                 return 1;
145         }
146
147         if (!strcmp("orig_cpu", str)) {
148                 ftrace_dump_on_oops = DUMP_ORIG;
149                 return 1;
150         }
151
152         return 0;
153 }
154 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
155
156 static int __init stop_trace_on_warning(char *str)
157 {
158         __disable_trace_on_warning = 1;
159         return 1;
160 }
161 __setup("traceoff_on_warning=", stop_trace_on_warning);
162
163 static int __init boot_alloc_snapshot(char *str)
164 {
165         allocate_snapshot = true;
166         /* We also need the main ring buffer expanded */
167         ring_buffer_expanded = true;
168         return 1;
169 }
170 __setup("alloc_snapshot", boot_alloc_snapshot);
171
172
173 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
174 static char *trace_boot_options __initdata;
175
176 static int __init set_trace_boot_options(char *str)
177 {
178         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
179         trace_boot_options = trace_boot_options_buf;
180         return 0;
181 }
182 __setup("trace_options=", set_trace_boot_options);
183
184 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
185 static char *trace_boot_clock __initdata;
186
187 static int __init set_trace_boot_clock(char *str)
188 {
189         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
190         trace_boot_clock = trace_boot_clock_buf;
191         return 0;
192 }
193 __setup("trace_clock=", set_trace_boot_clock);
194
195
196 unsigned long long ns2usecs(cycle_t nsec)
197 {
198         nsec += 500;
199         do_div(nsec, 1000);
200         return nsec;
201 }
202
203 /*
204  * The global_trace is the descriptor that holds the tracing
205  * buffers for the live tracing. For each CPU, it contains
206  * a link list of pages that will store trace entries. The
207  * page descriptor of the pages in the memory is used to hold
208  * the link list by linking the lru item in the page descriptor
209  * to each of the pages in the buffer per CPU.
210  *
211  * For each active CPU there is a data field that holds the
212  * pages for the buffer for that CPU. Each CPU has the same number
213  * of pages allocated for its buffer.
214  */
215 static struct trace_array       global_trace;
216
217 LIST_HEAD(ftrace_trace_arrays);
218
219 int trace_array_get(struct trace_array *this_tr)
220 {
221         struct trace_array *tr;
222         int ret = -ENODEV;
223
224         mutex_lock(&trace_types_lock);
225         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
226                 if (tr == this_tr) {
227                         tr->ref++;
228                         ret = 0;
229                         break;
230                 }
231         }
232         mutex_unlock(&trace_types_lock);
233
234         return ret;
235 }
236
237 static void __trace_array_put(struct trace_array *this_tr)
238 {
239         WARN_ON(!this_tr->ref);
240         this_tr->ref--;
241 }
242
243 void trace_array_put(struct trace_array *this_tr)
244 {
245         mutex_lock(&trace_types_lock);
246         __trace_array_put(this_tr);
247         mutex_unlock(&trace_types_lock);
248 }
249
250 int filter_check_discard(struct ftrace_event_file *file, void *rec,
251                          struct ring_buffer *buffer,
252                          struct ring_buffer_event *event)
253 {
254         if (unlikely(file->flags & FTRACE_EVENT_FL_FILTERED) &&
255             !filter_match_preds(file->filter, rec)) {
256                 ring_buffer_discard_commit(buffer, event);
257                 return 1;
258         }
259
260         return 0;
261 }
262 EXPORT_SYMBOL_GPL(filter_check_discard);
263
264 int call_filter_check_discard(struct ftrace_event_call *call, void *rec,
265                               struct ring_buffer *buffer,
266                               struct ring_buffer_event *event)
267 {
268         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
269             !filter_match_preds(call->filter, rec)) {
270                 ring_buffer_discard_commit(buffer, event);
271                 return 1;
272         }
273
274         return 0;
275 }
276 EXPORT_SYMBOL_GPL(call_filter_check_discard);
277
278 static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
279 {
280         u64 ts;
281
282         /* Early boot up does not have a buffer yet */
283         if (!buf->buffer)
284                 return trace_clock_local();
285
286         ts = ring_buffer_time_stamp(buf->buffer, cpu);
287         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
288
289         return ts;
290 }
291
292 cycle_t ftrace_now(int cpu)
293 {
294         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
295 }
296
297 /**
298  * tracing_is_enabled - Show if global_trace has been disabled
299  *
300  * Shows if the global trace has been enabled or not. It uses the
301  * mirror flag "buffer_disabled" to be used in fast paths such as for
302  * the irqsoff tracer. But it may be inaccurate due to races. If you
303  * need to know the accurate state, use tracing_is_on() which is a little
304  * slower, but accurate.
305  */
306 int tracing_is_enabled(void)
307 {
308         /*
309          * For quick access (irqsoff uses this in fast path), just
310          * return the mirror variable of the state of the ring buffer.
311          * It's a little racy, but we don't really care.
312          */
313         smp_rmb();
314         return !global_trace.buffer_disabled;
315 }
316
317 /*
318  * trace_buf_size is the size in bytes that is allocated
319  * for a buffer. Note, the number of bytes is always rounded
320  * to page size.
321  *
322  * This number is purposely set to a low number of 16384.
323  * If the dump on oops happens, it will be much appreciated
324  * to not have to wait for all that output. Anyway this can be
325  * boot time and run time configurable.
326  */
327 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
328
329 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
330
331 /* trace_types holds a link list of available tracers. */
332 static struct tracer            *trace_types __read_mostly;
333
334 /*
335  * trace_types_lock is used to protect the trace_types list.
336  */
337 DEFINE_MUTEX(trace_types_lock);
338
339 /*
340  * serialize the access of the ring buffer
341  *
342  * ring buffer serializes readers, but it is low level protection.
343  * The validity of the events (which returns by ring_buffer_peek() ..etc)
344  * are not protected by ring buffer.
345  *
346  * The content of events may become garbage if we allow other process consumes
347  * these events concurrently:
348  *   A) the page of the consumed events may become a normal page
349  *      (not reader page) in ring buffer, and this page will be rewrited
350  *      by events producer.
351  *   B) The page of the consumed events may become a page for splice_read,
352  *      and this page will be returned to system.
353  *
354  * These primitives allow multi process access to different cpu ring buffer
355  * concurrently.
356  *
357  * These primitives don't distinguish read-only and read-consume access.
358  * Multi read-only access are also serialized.
359  */
360
361 #ifdef CONFIG_SMP
362 static DECLARE_RWSEM(all_cpu_access_lock);
363 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
364
365 static inline void trace_access_lock(int cpu)
366 {
367         if (cpu == RING_BUFFER_ALL_CPUS) {
368                 /* gain it for accessing the whole ring buffer. */
369                 down_write(&all_cpu_access_lock);
370         } else {
371                 /* gain it for accessing a cpu ring buffer. */
372
373                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
374                 down_read(&all_cpu_access_lock);
375
376                 /* Secondly block other access to this @cpu ring buffer. */
377                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
378         }
379 }
380
381 static inline void trace_access_unlock(int cpu)
382 {
383         if (cpu == RING_BUFFER_ALL_CPUS) {
384                 up_write(&all_cpu_access_lock);
385         } else {
386                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
387                 up_read(&all_cpu_access_lock);
388         }
389 }
390
391 static inline void trace_access_lock_init(void)
392 {
393         int cpu;
394
395         for_each_possible_cpu(cpu)
396                 mutex_init(&per_cpu(cpu_access_lock, cpu));
397 }
398
399 #else
400
401 static DEFINE_MUTEX(access_lock);
402
403 static inline void trace_access_lock(int cpu)
404 {
405         (void)cpu;
406         mutex_lock(&access_lock);
407 }
408
409 static inline void trace_access_unlock(int cpu)
410 {
411         (void)cpu;
412         mutex_unlock(&access_lock);
413 }
414
415 static inline void trace_access_lock_init(void)
416 {
417 }
418
419 #endif
420
421 /* trace_flags holds trace_options default values */
422 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
423         TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
424         TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |
425         TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | TRACE_ITER_FUNCTION;
426
427 static void tracer_tracing_on(struct trace_array *tr)
428 {
429         if (tr->trace_buffer.buffer)
430                 ring_buffer_record_on(tr->trace_buffer.buffer);
431         /*
432          * This flag is looked at when buffers haven't been allocated
433          * yet, or by some tracers (like irqsoff), that just want to
434          * know if the ring buffer has been disabled, but it can handle
435          * races of where it gets disabled but we still do a record.
436          * As the check is in the fast path of the tracers, it is more
437          * important to be fast than accurate.
438          */
439         tr->buffer_disabled = 0;
440         /* Make the flag seen by readers */
441         smp_wmb();
442 }
443
444 /**
445  * tracing_on - enable tracing buffers
446  *
447  * This function enables tracing buffers that may have been
448  * disabled with tracing_off.
449  */
450 void tracing_on(void)
451 {
452         tracer_tracing_on(&global_trace);
453 }
454 EXPORT_SYMBOL_GPL(tracing_on);
455
456 /**
457  * __trace_puts - write a constant string into the trace buffer.
458  * @ip:    The address of the caller
459  * @str:   The constant string to write
460  * @size:  The size of the string.
461  */
462 int __trace_puts(unsigned long ip, const char *str, int size)
463 {
464         struct ring_buffer_event *event;
465         struct ring_buffer *buffer;
466         struct print_entry *entry;
467         unsigned long irq_flags;
468         int alloc;
469
470         if (unlikely(tracing_selftest_running || tracing_disabled))
471                 return 0;
472
473         alloc = sizeof(*entry) + size + 2; /* possible \n added */
474
475         local_save_flags(irq_flags);
476         buffer = global_trace.trace_buffer.buffer;
477         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
478                                           irq_flags, preempt_count());
479         if (!event)
480                 return 0;
481
482         entry = ring_buffer_event_data(event);
483         entry->ip = ip;
484
485         memcpy(&entry->buf, str, size);
486
487         /* Add a newline if necessary */
488         if (entry->buf[size - 1] != '\n') {
489                 entry->buf[size] = '\n';
490                 entry->buf[size + 1] = '\0';
491         } else
492                 entry->buf[size] = '\0';
493
494         __buffer_unlock_commit(buffer, event);
495
496         return size;
497 }
498 EXPORT_SYMBOL_GPL(__trace_puts);
499
500 /**
501  * __trace_bputs - write the pointer to a constant string into trace buffer
502  * @ip:    The address of the caller
503  * @str:   The constant string to write to the buffer to
504  */
505 int __trace_bputs(unsigned long ip, const char *str)
506 {
507         struct ring_buffer_event *event;
508         struct ring_buffer *buffer;
509         struct bputs_entry *entry;
510         unsigned long irq_flags;
511         int size = sizeof(struct bputs_entry);
512
513         if (unlikely(tracing_selftest_running || tracing_disabled))
514                 return 0;
515
516         local_save_flags(irq_flags);
517         buffer = global_trace.trace_buffer.buffer;
518         event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
519                                           irq_flags, preempt_count());
520         if (!event)
521                 return 0;
522
523         entry = ring_buffer_event_data(event);
524         entry->ip                       = ip;
525         entry->str                      = str;
526
527         __buffer_unlock_commit(buffer, event);
528
529         return 1;
530 }
531 EXPORT_SYMBOL_GPL(__trace_bputs);
532
533 #ifdef CONFIG_TRACER_SNAPSHOT
534 /**
535  * trace_snapshot - take a snapshot of the current buffer.
536  *
537  * This causes a swap between the snapshot buffer and the current live
538  * tracing buffer. You can use this to take snapshots of the live
539  * trace when some condition is triggered, but continue to trace.
540  *
541  * Note, make sure to allocate the snapshot with either
542  * a tracing_snapshot_alloc(), or by doing it manually
543  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
544  *
545  * If the snapshot buffer is not allocated, it will stop tracing.
546  * Basically making a permanent snapshot.
547  */
548 void tracing_snapshot(void)
549 {
550         struct trace_array *tr = &global_trace;
551         struct tracer *tracer = tr->current_trace;
552         unsigned long flags;
553
554         if (in_nmi()) {
555                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
556                 internal_trace_puts("*** snapshot is being ignored        ***\n");
557                 return;
558         }
559
560         if (!tr->allocated_snapshot) {
561                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
562                 internal_trace_puts("*** stopping trace here!   ***\n");
563                 tracing_off();
564                 return;
565         }
566
567         /* Note, snapshot can not be used when the tracer uses it */
568         if (tracer->use_max_tr) {
569                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
570                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
571                 return;
572         }
573
574         local_irq_save(flags);
575         update_max_tr(tr, current, smp_processor_id());
576         local_irq_restore(flags);
577 }
578 EXPORT_SYMBOL_GPL(tracing_snapshot);
579
580 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
581                                         struct trace_buffer *size_buf, int cpu_id);
582 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
583
584 static int alloc_snapshot(struct trace_array *tr)
585 {
586         int ret;
587
588         if (!tr->allocated_snapshot) {
589
590                 /* allocate spare buffer */
591                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
592                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
593                 if (ret < 0)
594                         return ret;
595
596                 tr->allocated_snapshot = true;
597         }
598
599         return 0;
600 }
601
602 static void free_snapshot(struct trace_array *tr)
603 {
604         /*
605          * We don't free the ring buffer. instead, resize it because
606          * The max_tr ring buffer has some state (e.g. ring->clock) and
607          * we want preserve it.
608          */
609         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
610         set_buffer_entries(&tr->max_buffer, 1);
611         tracing_reset_online_cpus(&tr->max_buffer);
612         tr->allocated_snapshot = false;
613 }
614
615 /**
616  * tracing_alloc_snapshot - allocate snapshot buffer.
617  *
618  * This only allocates the snapshot buffer if it isn't already
619  * allocated - it doesn't also take a snapshot.
620  *
621  * This is meant to be used in cases where the snapshot buffer needs
622  * to be set up for events that can't sleep but need to be able to
623  * trigger a snapshot.
624  */
625 int tracing_alloc_snapshot(void)
626 {
627         struct trace_array *tr = &global_trace;
628         int ret;
629
630         ret = alloc_snapshot(tr);
631         WARN_ON(ret < 0);
632
633         return ret;
634 }
635 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
636
637 /**
638  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
639  *
640  * This is similar to trace_snapshot(), but it will allocate the
641  * snapshot buffer if it isn't already allocated. Use this only
642  * where it is safe to sleep, as the allocation may sleep.
643  *
644  * This causes a swap between the snapshot buffer and the current live
645  * tracing buffer. You can use this to take snapshots of the live
646  * trace when some condition is triggered, but continue to trace.
647  */
648 void tracing_snapshot_alloc(void)
649 {
650         int ret;
651
652         ret = tracing_alloc_snapshot();
653         if (ret < 0)
654                 return;
655
656         tracing_snapshot();
657 }
658 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
659 #else
660 void tracing_snapshot(void)
661 {
662         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
663 }
664 EXPORT_SYMBOL_GPL(tracing_snapshot);
665 int tracing_alloc_snapshot(void)
666 {
667         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
668         return -ENODEV;
669 }
670 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
671 void tracing_snapshot_alloc(void)
672 {
673         /* Give warning */
674         tracing_snapshot();
675 }
676 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
677 #endif /* CONFIG_TRACER_SNAPSHOT */
678
679 static void tracer_tracing_off(struct trace_array *tr)
680 {
681         if (tr->trace_buffer.buffer)
682                 ring_buffer_record_off(tr->trace_buffer.buffer);
683         /*
684          * This flag is looked at when buffers haven't been allocated
685          * yet, or by some tracers (like irqsoff), that just want to
686          * know if the ring buffer has been disabled, but it can handle
687          * races of where it gets disabled but we still do a record.
688          * As the check is in the fast path of the tracers, it is more
689          * important to be fast than accurate.
690          */
691         tr->buffer_disabled = 1;
692         /* Make the flag seen by readers */
693         smp_wmb();
694 }
695
696 /**
697  * tracing_off - turn off tracing buffers
698  *
699  * This function stops the tracing buffers from recording data.
700  * It does not disable any overhead the tracers themselves may
701  * be causing. This function simply causes all recording to
702  * the ring buffers to fail.
703  */
704 void tracing_off(void)
705 {
706         tracer_tracing_off(&global_trace);
707 }
708 EXPORT_SYMBOL_GPL(tracing_off);
709
710 void disable_trace_on_warning(void)
711 {
712         if (__disable_trace_on_warning)
713                 tracing_off();
714 }
715
716 /**
717  * tracer_tracing_is_on - show real state of ring buffer enabled
718  * @tr : the trace array to know if ring buffer is enabled
719  *
720  * Shows real state of the ring buffer if it is enabled or not.
721  */
722 static int tracer_tracing_is_on(struct trace_array *tr)
723 {
724         if (tr->trace_buffer.buffer)
725                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
726         return !tr->buffer_disabled;
727 }
728
729 /**
730  * tracing_is_on - show state of ring buffers enabled
731  */
732 int tracing_is_on(void)
733 {
734         return tracer_tracing_is_on(&global_trace);
735 }
736 EXPORT_SYMBOL_GPL(tracing_is_on);
737
738 static int __init set_buf_size(char *str)
739 {
740         unsigned long buf_size;
741
742         if (!str)
743                 return 0;
744         buf_size = memparse(str, &str);
745         /* nr_entries can not be zero */
746         if (buf_size == 0)
747                 return 0;
748         trace_buf_size = buf_size;
749         return 1;
750 }
751 __setup("trace_buf_size=", set_buf_size);
752
753 static int __init set_tracing_thresh(char *str)
754 {
755         unsigned long threshold;
756         int ret;
757
758         if (!str)
759                 return 0;
760         ret = kstrtoul(str, 0, &threshold);
761         if (ret < 0)
762                 return 0;
763         tracing_thresh = threshold * 1000;
764         return 1;
765 }
766 __setup("tracing_thresh=", set_tracing_thresh);
767
768 unsigned long nsecs_to_usecs(unsigned long nsecs)
769 {
770         return nsecs / 1000;
771 }
772
773 /* These must match the bit postions in trace_iterator_flags */
774 static const char *trace_options[] = {
775         "print-parent",
776         "sym-offset",
777         "sym-addr",
778         "verbose",
779         "raw",
780         "hex",
781         "bin",
782         "block",
783         "stacktrace",
784         "trace_printk",
785         "ftrace_preempt",
786         "branch",
787         "annotate",
788         "userstacktrace",
789         "sym-userobj",
790         "printk-msg-only",
791         "context-info",
792         "latency-format",
793         "sleep-time",
794         "graph-time",
795         "record-cmd",
796         "overwrite",
797         "disable_on_free",
798         "irq-info",
799         "markers",
800         "function-trace",
801         NULL
802 };
803
804 static struct {
805         u64 (*func)(void);
806         const char *name;
807         int in_ns;              /* is this clock in nanoseconds? */
808 } trace_clocks[] = {
809         { trace_clock_local,    "local",        1 },
810         { trace_clock_global,   "global",       1 },
811         { trace_clock_counter,  "counter",      0 },
812         { trace_clock_jiffies,  "uptime",       1 },
813         { trace_clock,          "perf",         1 },
814         ARCH_TRACE_CLOCKS
815 };
816
817 /*
818  * trace_parser_get_init - gets the buffer for trace parser
819  */
820 int trace_parser_get_init(struct trace_parser *parser, int size)
821 {
822         memset(parser, 0, sizeof(*parser));
823
824         parser->buffer = kmalloc(size, GFP_KERNEL);
825         if (!parser->buffer)
826                 return 1;
827
828         parser->size = size;
829         return 0;
830 }
831
832 /*
833  * trace_parser_put - frees the buffer for trace parser
834  */
835 void trace_parser_put(struct trace_parser *parser)
836 {
837         kfree(parser->buffer);
838 }
839
840 /*
841  * trace_get_user - reads the user input string separated by  space
842  * (matched by isspace(ch))
843  *
844  * For each string found the 'struct trace_parser' is updated,
845  * and the function returns.
846  *
847  * Returns number of bytes read.
848  *
849  * See kernel/trace/trace.h for 'struct trace_parser' details.
850  */
851 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
852         size_t cnt, loff_t *ppos)
853 {
854         char ch;
855         size_t read = 0;
856         ssize_t ret;
857
858         if (!*ppos)
859                 trace_parser_clear(parser);
860
861         ret = get_user(ch, ubuf++);
862         if (ret)
863                 goto out;
864
865         read++;
866         cnt--;
867
868         /*
869          * The parser is not finished with the last write,
870          * continue reading the user input without skipping spaces.
871          */
872         if (!parser->cont) {
873                 /* skip white space */
874                 while (cnt && isspace(ch)) {
875                         ret = get_user(ch, ubuf++);
876                         if (ret)
877                                 goto out;
878                         read++;
879                         cnt--;
880                 }
881
882                 /* only spaces were written */
883                 if (isspace(ch)) {
884                         *ppos += read;
885                         ret = read;
886                         goto out;
887                 }
888
889                 parser->idx = 0;
890         }
891
892         /* read the non-space input */
893         while (cnt && !isspace(ch)) {
894                 if (parser->idx < parser->size - 1)
895                         parser->buffer[parser->idx++] = ch;
896                 else {
897                         ret = -EINVAL;
898                         goto out;
899                 }
900                 ret = get_user(ch, ubuf++);
901                 if (ret)
902                         goto out;
903                 read++;
904                 cnt--;
905         }
906
907         /* We either got finished input or we have to wait for another call. */
908         if (isspace(ch)) {
909                 parser->buffer[parser->idx] = 0;
910                 parser->cont = false;
911         } else if (parser->idx < parser->size - 1) {
912                 parser->cont = true;
913                 parser->buffer[parser->idx++] = ch;
914         } else {
915                 ret = -EINVAL;
916                 goto out;
917         }
918
919         *ppos += read;
920         ret = read;
921
922 out:
923         return ret;
924 }
925
926 ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
927 {
928         int len;
929         int ret;
930
931         if (!cnt)
932                 return 0;
933
934         if (s->len <= s->readpos)
935                 return -EBUSY;
936
937         len = s->len - s->readpos;
938         if (cnt > len)
939                 cnt = len;
940         ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
941         if (ret == cnt)
942                 return -EFAULT;
943
944         cnt -= ret;
945
946         s->readpos += cnt;
947         return cnt;
948 }
949
950 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
951 {
952         int len;
953
954         if (s->len <= s->readpos)
955                 return -EBUSY;
956
957         len = s->len - s->readpos;
958         if (cnt > len)
959                 cnt = len;
960         memcpy(buf, s->buffer + s->readpos, cnt);
961
962         s->readpos += cnt;
963         return cnt;
964 }
965
966 unsigned long __read_mostly     tracing_thresh;
967
968 #ifdef CONFIG_TRACER_MAX_TRACE
969 /*
970  * Copy the new maximum trace into the separate maximum-trace
971  * structure. (this way the maximum trace is permanently saved,
972  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
973  */
974 static void
975 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
976 {
977         struct trace_buffer *trace_buf = &tr->trace_buffer;
978         struct trace_buffer *max_buf = &tr->max_buffer;
979         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
980         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
981
982         max_buf->cpu = cpu;
983         max_buf->time_start = data->preempt_timestamp;
984
985         max_data->saved_latency = tr->max_latency;
986         max_data->critical_start = data->critical_start;
987         max_data->critical_end = data->critical_end;
988
989         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
990         max_data->pid = tsk->pid;
991         /*
992          * If tsk == current, then use current_uid(), as that does not use
993          * RCU. The irq tracer can be called out of RCU scope.
994          */
995         if (tsk == current)
996                 max_data->uid = current_uid();
997         else
998                 max_data->uid = task_uid(tsk);
999
1000         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1001         max_data->policy = tsk->policy;
1002         max_data->rt_priority = tsk->rt_priority;
1003
1004         /* record this tasks comm */
1005         tracing_record_cmdline(tsk);
1006 }
1007
1008 /**
1009  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1010  * @tr: tracer
1011  * @tsk: the task with the latency
1012  * @cpu: The cpu that initiated the trace.
1013  *
1014  * Flip the buffers between the @tr and the max_tr and record information
1015  * about which task was the cause of this latency.
1016  */
1017 void
1018 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1019 {
1020         struct ring_buffer *buf;
1021
1022         if (tr->stop_count)
1023                 return;
1024
1025         WARN_ON_ONCE(!irqs_disabled());
1026
1027         if (!tr->allocated_snapshot) {
1028                 /* Only the nop tracer should hit this when disabling */
1029                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1030                 return;
1031         }
1032
1033         arch_spin_lock(&tr->max_lock);
1034
1035         buf = tr->trace_buffer.buffer;
1036         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1037         tr->max_buffer.buffer = buf;
1038
1039         __update_max_tr(tr, tsk, cpu);
1040         arch_spin_unlock(&tr->max_lock);
1041 }
1042
1043 /**
1044  * update_max_tr_single - only copy one trace over, and reset the rest
1045  * @tr - tracer
1046  * @tsk - task with the latency
1047  * @cpu - the cpu of the buffer to copy.
1048  *
1049  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1050  */
1051 void
1052 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1053 {
1054         int ret;
1055
1056         if (tr->stop_count)
1057                 return;
1058
1059         WARN_ON_ONCE(!irqs_disabled());
1060         if (!tr->allocated_snapshot) {
1061                 /* Only the nop tracer should hit this when disabling */
1062                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1063                 return;
1064         }
1065
1066         arch_spin_lock(&tr->max_lock);
1067
1068         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1069
1070         if (ret == -EBUSY) {
1071                 /*
1072                  * We failed to swap the buffer due to a commit taking
1073                  * place on this CPU. We fail to record, but we reset
1074                  * the max trace buffer (no one writes directly to it)
1075                  * and flag that it failed.
1076                  */
1077                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1078                         "Failed to swap buffers due to commit in progress\n");
1079         }
1080
1081         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1082
1083         __update_max_tr(tr, tsk, cpu);
1084         arch_spin_unlock(&tr->max_lock);
1085 }
1086 #endif /* CONFIG_TRACER_MAX_TRACE */
1087
1088 static void wait_on_pipe(struct trace_iterator *iter)
1089 {
1090         /* Iterators are static, they should be filled or empty */
1091         if (trace_buffer_iter(iter, iter->cpu_file))
1092                 return;
1093
1094         ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file);
1095 }
1096
1097 #ifdef CONFIG_FTRACE_STARTUP_TEST
1098 static int run_tracer_selftest(struct tracer *type)
1099 {
1100         struct trace_array *tr = &global_trace;
1101         struct tracer *saved_tracer = tr->current_trace;
1102         int ret;
1103
1104         if (!type->selftest || tracing_selftest_disabled)
1105                 return 0;
1106
1107         /*
1108          * Run a selftest on this tracer.
1109          * Here we reset the trace buffer, and set the current
1110          * tracer to be this tracer. The tracer can then run some
1111          * internal tracing to verify that everything is in order.
1112          * If we fail, we do not register this tracer.
1113          */
1114         tracing_reset_online_cpus(&tr->trace_buffer);
1115
1116         tr->current_trace = type;
1117
1118 #ifdef CONFIG_TRACER_MAX_TRACE
1119         if (type->use_max_tr) {
1120                 /* If we expanded the buffers, make sure the max is expanded too */
1121                 if (ring_buffer_expanded)
1122                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1123                                            RING_BUFFER_ALL_CPUS);
1124                 tr->allocated_snapshot = true;
1125         }
1126 #endif
1127
1128         /* the test is responsible for initializing and enabling */
1129         pr_info("Testing tracer %s: ", type->name);
1130         ret = type->selftest(type, tr);
1131         /* the test is responsible for resetting too */
1132         tr->current_trace = saved_tracer;
1133         if (ret) {
1134                 printk(KERN_CONT "FAILED!\n");
1135                 /* Add the warning after printing 'FAILED' */
1136                 WARN_ON(1);
1137                 return -1;
1138         }
1139         /* Only reset on passing, to avoid touching corrupted buffers */
1140         tracing_reset_online_cpus(&tr->trace_buffer);
1141
1142 #ifdef CONFIG_TRACER_MAX_TRACE
1143         if (type->use_max_tr) {
1144                 tr->allocated_snapshot = false;
1145
1146                 /* Shrink the max buffer again */
1147                 if (ring_buffer_expanded)
1148                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1149                                            RING_BUFFER_ALL_CPUS);
1150         }
1151 #endif
1152
1153         printk(KERN_CONT "PASSED\n");
1154         return 0;
1155 }
1156 #else
1157 static inline int run_tracer_selftest(struct tracer *type)
1158 {
1159         return 0;
1160 }
1161 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1162
1163 /**
1164  * register_tracer - register a tracer with the ftrace system.
1165  * @type - the plugin for the tracer
1166  *
1167  * Register a new plugin tracer.
1168  */
1169 int register_tracer(struct tracer *type)
1170 {
1171         struct tracer *t;
1172         int ret = 0;
1173
1174         if (!type->name) {
1175                 pr_info("Tracer must have a name\n");
1176                 return -1;
1177         }
1178
1179         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1180                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1181                 return -1;
1182         }
1183
1184         mutex_lock(&trace_types_lock);
1185
1186         tracing_selftest_running = true;
1187
1188         for (t = trace_types; t; t = t->next) {
1189                 if (strcmp(type->name, t->name) == 0) {
1190                         /* already found */
1191                         pr_info("Tracer %s already registered\n",
1192                                 type->name);
1193                         ret = -1;
1194                         goto out;
1195                 }
1196         }
1197
1198         if (!type->set_flag)
1199                 type->set_flag = &dummy_set_flag;
1200         if (!type->flags)
1201                 type->flags = &dummy_tracer_flags;
1202         else
1203                 if (!type->flags->opts)
1204                         type->flags->opts = dummy_tracer_opt;
1205
1206         ret = run_tracer_selftest(type);
1207         if (ret < 0)
1208                 goto out;
1209
1210         type->next = trace_types;
1211         trace_types = type;
1212
1213  out:
1214         tracing_selftest_running = false;
1215         mutex_unlock(&trace_types_lock);
1216
1217         if (ret || !default_bootup_tracer)
1218                 goto out_unlock;
1219
1220         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1221                 goto out_unlock;
1222
1223         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1224         /* Do we want this tracer to start on bootup? */
1225         tracing_set_tracer(&global_trace, type->name);
1226         default_bootup_tracer = NULL;
1227         /* disable other selftests, since this will break it. */
1228         tracing_selftest_disabled = true;
1229 #ifdef CONFIG_FTRACE_STARTUP_TEST
1230         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1231                type->name);
1232 #endif
1233
1234  out_unlock:
1235         return ret;
1236 }
1237
1238 void tracing_reset(struct trace_buffer *buf, int cpu)
1239 {
1240         struct ring_buffer *buffer = buf->buffer;
1241
1242         if (!buffer)
1243                 return;
1244
1245         ring_buffer_record_disable(buffer);
1246
1247         /* Make sure all commits have finished */
1248         synchronize_sched();
1249         ring_buffer_reset_cpu(buffer, cpu);
1250
1251         ring_buffer_record_enable(buffer);
1252 }
1253
1254 void tracing_reset_online_cpus(struct trace_buffer *buf)
1255 {
1256         struct ring_buffer *buffer = buf->buffer;
1257         int cpu;
1258
1259         if (!buffer)
1260                 return;
1261
1262         ring_buffer_record_disable(buffer);
1263
1264         /* Make sure all commits have finished */
1265         synchronize_sched();
1266
1267         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1268
1269         for_each_online_cpu(cpu)
1270                 ring_buffer_reset_cpu(buffer, cpu);
1271
1272         ring_buffer_record_enable(buffer);
1273 }
1274
1275 /* Must have trace_types_lock held */
1276 void tracing_reset_all_online_cpus(void)
1277 {
1278         struct trace_array *tr;
1279
1280         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1281                 tracing_reset_online_cpus(&tr->trace_buffer);
1282 #ifdef CONFIG_TRACER_MAX_TRACE
1283                 tracing_reset_online_cpus(&tr->max_buffer);
1284 #endif
1285         }
1286 }
1287
1288 #define SAVED_CMDLINES_DEFAULT 128
1289 #define NO_CMDLINE_MAP UINT_MAX
1290 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1291 struct saved_cmdlines_buffer {
1292         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1293         unsigned *map_cmdline_to_pid;
1294         unsigned cmdline_num;
1295         int cmdline_idx;
1296         char *saved_cmdlines;
1297 };
1298 static struct saved_cmdlines_buffer *savedcmd;
1299
1300 /* temporary disable recording */
1301 static atomic_t trace_record_cmdline_disabled __read_mostly;
1302
1303 static inline char *get_saved_cmdlines(int idx)
1304 {
1305         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1306 }
1307
1308 static inline void set_cmdline(int idx, const char *cmdline)
1309 {
1310         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1311 }
1312
1313 static int allocate_cmdlines_buffer(unsigned int val,
1314                                     struct saved_cmdlines_buffer *s)
1315 {
1316         s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1317                                         GFP_KERNEL);
1318         if (!s->map_cmdline_to_pid)
1319                 return -ENOMEM;
1320
1321         s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1322         if (!s->saved_cmdlines) {
1323                 kfree(s->map_cmdline_to_pid);
1324                 return -ENOMEM;
1325         }
1326
1327         s->cmdline_idx = 0;
1328         s->cmdline_num = val;
1329         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1330                sizeof(s->map_pid_to_cmdline));
1331         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1332                val * sizeof(*s->map_cmdline_to_pid));
1333
1334         return 0;
1335 }
1336
1337 static int trace_create_savedcmd(void)
1338 {
1339         int ret;
1340
1341         savedcmd = kmalloc(sizeof(struct saved_cmdlines_buffer), GFP_KERNEL);
1342         if (!savedcmd)
1343                 return -ENOMEM;
1344
1345         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1346         if (ret < 0) {
1347                 kfree(savedcmd);
1348                 savedcmd = NULL;
1349                 return -ENOMEM;
1350         }
1351
1352         return 0;
1353 }
1354
1355 int is_tracing_stopped(void)
1356 {
1357         return global_trace.stop_count;
1358 }
1359
1360 /**
1361  * tracing_start - quick start of the tracer
1362  *
1363  * If tracing is enabled but was stopped by tracing_stop,
1364  * this will start the tracer back up.
1365  */
1366 void tracing_start(void)
1367 {
1368         struct ring_buffer *buffer;
1369         unsigned long flags;
1370
1371         if (tracing_disabled)
1372                 return;
1373
1374         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1375         if (--global_trace.stop_count) {
1376                 if (global_trace.stop_count < 0) {
1377                         /* Someone screwed up their debugging */
1378                         WARN_ON_ONCE(1);
1379                         global_trace.stop_count = 0;
1380                 }
1381                 goto out;
1382         }
1383
1384         /* Prevent the buffers from switching */
1385         arch_spin_lock(&global_trace.max_lock);
1386
1387         buffer = global_trace.trace_buffer.buffer;
1388         if (buffer)
1389                 ring_buffer_record_enable(buffer);
1390
1391 #ifdef CONFIG_TRACER_MAX_TRACE
1392         buffer = global_trace.max_buffer.buffer;
1393         if (buffer)
1394                 ring_buffer_record_enable(buffer);
1395 #endif
1396
1397         arch_spin_unlock(&global_trace.max_lock);
1398
1399         ftrace_start();
1400  out:
1401         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1402 }
1403
1404 static void tracing_start_tr(struct trace_array *tr)
1405 {
1406         struct ring_buffer *buffer;
1407         unsigned long flags;
1408
1409         if (tracing_disabled)
1410                 return;
1411
1412         /* If global, we need to also start the max tracer */
1413         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1414                 return tracing_start();
1415
1416         raw_spin_lock_irqsave(&tr->start_lock, flags);
1417
1418         if (--tr->stop_count) {
1419                 if (tr->stop_count < 0) {
1420                         /* Someone screwed up their debugging */
1421                         WARN_ON_ONCE(1);
1422                         tr->stop_count = 0;
1423                 }
1424                 goto out;
1425         }
1426
1427         buffer = tr->trace_buffer.buffer;
1428         if (buffer)
1429                 ring_buffer_record_enable(buffer);
1430
1431  out:
1432         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1433 }
1434
1435 /**
1436  * tracing_stop - quick stop of the tracer
1437  *
1438  * Light weight way to stop tracing. Use in conjunction with
1439  * tracing_start.
1440  */
1441 void tracing_stop(void)
1442 {
1443         struct ring_buffer *buffer;
1444         unsigned long flags;
1445
1446         ftrace_stop();
1447         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1448         if (global_trace.stop_count++)
1449                 goto out;
1450
1451         /* Prevent the buffers from switching */
1452         arch_spin_lock(&global_trace.max_lock);
1453
1454         buffer = global_trace.trace_buffer.buffer;
1455         if (buffer)
1456                 ring_buffer_record_disable(buffer);
1457
1458 #ifdef CONFIG_TRACER_MAX_TRACE
1459         buffer = global_trace.max_buffer.buffer;
1460         if (buffer)
1461                 ring_buffer_record_disable(buffer);
1462 #endif
1463
1464         arch_spin_unlock(&global_trace.max_lock);
1465
1466  out:
1467         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1468 }
1469
1470 static void tracing_stop_tr(struct trace_array *tr)
1471 {
1472         struct ring_buffer *buffer;
1473         unsigned long flags;
1474
1475         /* If global, we need to also stop the max tracer */
1476         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1477                 return tracing_stop();
1478
1479         raw_spin_lock_irqsave(&tr->start_lock, flags);
1480         if (tr->stop_count++)
1481                 goto out;
1482
1483         buffer = tr->trace_buffer.buffer;
1484         if (buffer)
1485                 ring_buffer_record_disable(buffer);
1486
1487  out:
1488         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1489 }
1490
1491 void trace_stop_cmdline_recording(void);
1492
1493 static int trace_save_cmdline(struct task_struct *tsk)
1494 {
1495         unsigned pid, idx;
1496
1497         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1498                 return 0;
1499
1500         /*
1501          * It's not the end of the world if we don't get
1502          * the lock, but we also don't want to spin
1503          * nor do we want to disable interrupts,
1504          * so if we miss here, then better luck next time.
1505          */
1506         if (!arch_spin_trylock(&trace_cmdline_lock))
1507                 return 0;
1508
1509         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1510         if (idx == NO_CMDLINE_MAP) {
1511                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1512
1513                 /*
1514                  * Check whether the cmdline buffer at idx has a pid
1515                  * mapped. We are going to overwrite that entry so we
1516                  * need to clear the map_pid_to_cmdline. Otherwise we
1517                  * would read the new comm for the old pid.
1518                  */
1519                 pid = savedcmd->map_cmdline_to_pid[idx];
1520                 if (pid != NO_CMDLINE_MAP)
1521                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1522
1523                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1524                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1525
1526                 savedcmd->cmdline_idx = idx;
1527         }
1528
1529         set_cmdline(idx, tsk->comm);
1530
1531         arch_spin_unlock(&trace_cmdline_lock);
1532
1533         return 1;
1534 }
1535
1536 static void __trace_find_cmdline(int pid, char comm[])
1537 {
1538         unsigned map;
1539
1540         if (!pid) {
1541                 strcpy(comm, "<idle>");
1542                 return;
1543         }
1544
1545         if (WARN_ON_ONCE(pid < 0)) {
1546                 strcpy(comm, "<XXX>");
1547                 return;
1548         }
1549
1550         if (pid > PID_MAX_DEFAULT) {
1551                 strcpy(comm, "<...>");
1552                 return;
1553         }
1554
1555         map = savedcmd->map_pid_to_cmdline[pid];
1556         if (map != NO_CMDLINE_MAP)
1557                 strcpy(comm, get_saved_cmdlines(map));
1558         else
1559                 strcpy(comm, "<...>");
1560 }
1561
1562 void trace_find_cmdline(int pid, char comm[])
1563 {
1564         preempt_disable();
1565         arch_spin_lock(&trace_cmdline_lock);
1566
1567         __trace_find_cmdline(pid, comm);
1568
1569         arch_spin_unlock(&trace_cmdline_lock);
1570         preempt_enable();
1571 }
1572
1573 void tracing_record_cmdline(struct task_struct *tsk)
1574 {
1575         if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1576                 return;
1577
1578         if (!__this_cpu_read(trace_cmdline_save))
1579                 return;
1580
1581         if (trace_save_cmdline(tsk))
1582                 __this_cpu_write(trace_cmdline_save, false);
1583 }
1584
1585 void
1586 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1587                              int pc)
1588 {
1589         struct task_struct *tsk = current;
1590
1591         entry->preempt_count            = pc & 0xff;
1592         entry->pid                      = (tsk) ? tsk->pid : 0;
1593         entry->flags =
1594 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1595                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1596 #else
1597                 TRACE_FLAG_IRQS_NOSUPPORT |
1598 #endif
1599                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1600                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1601                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1602                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1603 }
1604 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1605
1606 struct ring_buffer_event *
1607 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1608                           int type,
1609                           unsigned long len,
1610                           unsigned long flags, int pc)
1611 {
1612         struct ring_buffer_event *event;
1613
1614         event = ring_buffer_lock_reserve(buffer, len);
1615         if (event != NULL) {
1616                 struct trace_entry *ent = ring_buffer_event_data(event);
1617
1618                 tracing_generic_entry_update(ent, flags, pc);
1619                 ent->type = type;
1620         }
1621
1622         return event;
1623 }
1624
1625 void
1626 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
1627 {
1628         __this_cpu_write(trace_cmdline_save, true);
1629         ring_buffer_unlock_commit(buffer, event);
1630 }
1631
1632 static inline void
1633 __trace_buffer_unlock_commit(struct ring_buffer *buffer,
1634                              struct ring_buffer_event *event,
1635                              unsigned long flags, int pc)
1636 {
1637         __buffer_unlock_commit(buffer, event);
1638
1639         ftrace_trace_stack(buffer, flags, 6, pc);
1640         ftrace_trace_userstack(buffer, flags, pc);
1641 }
1642
1643 void trace_buffer_unlock_commit(struct ring_buffer *buffer,
1644                                 struct ring_buffer_event *event,
1645                                 unsigned long flags, int pc)
1646 {
1647         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1648 }
1649 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
1650
1651 static struct ring_buffer *temp_buffer;
1652
1653 struct ring_buffer_event *
1654 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
1655                           struct ftrace_event_file *ftrace_file,
1656                           int type, unsigned long len,
1657                           unsigned long flags, int pc)
1658 {
1659         struct ring_buffer_event *entry;
1660
1661         *current_rb = ftrace_file->tr->trace_buffer.buffer;
1662         entry = trace_buffer_lock_reserve(*current_rb,
1663                                          type, len, flags, pc);
1664         /*
1665          * If tracing is off, but we have triggers enabled
1666          * we still need to look at the event data. Use the temp_buffer
1667          * to store the trace event for the tigger to use. It's recusive
1668          * safe and will not be recorded anywhere.
1669          */
1670         if (!entry && ftrace_file->flags & FTRACE_EVENT_FL_TRIGGER_COND) {
1671                 *current_rb = temp_buffer;
1672                 entry = trace_buffer_lock_reserve(*current_rb,
1673                                                   type, len, flags, pc);
1674         }
1675         return entry;
1676 }
1677 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
1678
1679 struct ring_buffer_event *
1680 trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
1681                                   int type, unsigned long len,
1682                                   unsigned long flags, int pc)
1683 {
1684         *current_rb = global_trace.trace_buffer.buffer;
1685         return trace_buffer_lock_reserve(*current_rb,
1686                                          type, len, flags, pc);
1687 }
1688 EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
1689
1690 void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
1691                                         struct ring_buffer_event *event,
1692                                         unsigned long flags, int pc)
1693 {
1694         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1695 }
1696 EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
1697
1698 void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer,
1699                                      struct ring_buffer_event *event,
1700                                      unsigned long flags, int pc,
1701                                      struct pt_regs *regs)
1702 {
1703         __buffer_unlock_commit(buffer, event);
1704
1705         ftrace_trace_stack_regs(buffer, flags, 0, pc, regs);
1706         ftrace_trace_userstack(buffer, flags, pc);
1707 }
1708 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
1709
1710 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
1711                                          struct ring_buffer_event *event)
1712 {
1713         ring_buffer_discard_commit(buffer, event);
1714 }
1715 EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
1716
1717 void
1718 trace_function(struct trace_array *tr,
1719                unsigned long ip, unsigned long parent_ip, unsigned long flags,
1720                int pc)
1721 {
1722         struct ftrace_event_call *call = &event_function;
1723         struct ring_buffer *buffer = tr->trace_buffer.buffer;
1724         struct ring_buffer_event *event;
1725         struct ftrace_entry *entry;
1726
1727         /* If we are reading the ring buffer, don't trace */
1728         if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
1729                 return;
1730
1731         event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
1732                                           flags, pc);
1733         if (!event)
1734                 return;
1735         entry   = ring_buffer_event_data(event);
1736         entry->ip                       = ip;
1737         entry->parent_ip                = parent_ip;
1738
1739         if (!call_filter_check_discard(call, entry, buffer, event))
1740                 __buffer_unlock_commit(buffer, event);
1741 }
1742
1743 #ifdef CONFIG_STACKTRACE
1744
1745 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
1746 struct ftrace_stack {
1747         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
1748 };
1749
1750 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
1751 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
1752
1753 static void __ftrace_trace_stack(struct ring_buffer *buffer,
1754                                  unsigned long flags,
1755                                  int skip, int pc, struct pt_regs *regs)
1756 {
1757         struct ftrace_event_call *call = &event_kernel_stack;
1758         struct ring_buffer_event *event;
1759         struct stack_entry *entry;
1760         struct stack_trace trace;
1761         int use_stack;
1762         int size = FTRACE_STACK_ENTRIES;
1763
1764         trace.nr_entries        = 0;
1765         trace.skip              = skip;
1766
1767         /*
1768          * Since events can happen in NMIs there's no safe way to
1769          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
1770          * or NMI comes in, it will just have to use the default
1771          * FTRACE_STACK_SIZE.
1772          */
1773         preempt_disable_notrace();
1774
1775         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
1776         /*
1777          * We don't need any atomic variables, just a barrier.
1778          * If an interrupt comes in, we don't care, because it would
1779          * have exited and put the counter back to what we want.
1780          * We just need a barrier to keep gcc from moving things
1781          * around.
1782          */
1783         barrier();
1784         if (use_stack == 1) {
1785                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
1786                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
1787
1788                 if (regs)
1789                         save_stack_trace_regs(regs, &trace);
1790                 else
1791                         save_stack_trace(&trace);
1792
1793                 if (trace.nr_entries > size)
1794                         size = trace.nr_entries;
1795         } else
1796                 /* From now on, use_stack is a boolean */
1797                 use_stack = 0;
1798
1799         size *= sizeof(unsigned long);
1800
1801         event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1802                                           sizeof(*entry) + size, flags, pc);
1803         if (!event)
1804                 goto out;
1805         entry = ring_buffer_event_data(event);
1806
1807         memset(&entry->caller, 0, size);
1808
1809         if (use_stack)
1810                 memcpy(&entry->caller, trace.entries,
1811                        trace.nr_entries * sizeof(unsigned long));
1812         else {
1813                 trace.max_entries       = FTRACE_STACK_ENTRIES;
1814                 trace.entries           = entry->caller;
1815                 if (regs)
1816                         save_stack_trace_regs(regs, &trace);
1817                 else
1818                         save_stack_trace(&trace);
1819         }
1820
1821         entry->size = trace.nr_entries;
1822
1823         if (!call_filter_check_discard(call, entry, buffer, event))
1824                 __buffer_unlock_commit(buffer, event);
1825
1826  out:
1827         /* Again, don't let gcc optimize things here */
1828         barrier();
1829         __this_cpu_dec(ftrace_stack_reserve);
1830         preempt_enable_notrace();
1831
1832 }
1833
1834 void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags,
1835                              int skip, int pc, struct pt_regs *regs)
1836 {
1837         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1838                 return;
1839
1840         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
1841 }
1842
1843 void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
1844                         int skip, int pc)
1845 {
1846         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1847                 return;
1848
1849         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
1850 }
1851
1852 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1853                    int pc)
1854 {
1855         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
1856 }
1857
1858 /**
1859  * trace_dump_stack - record a stack back trace in the trace buffer
1860  * @skip: Number of functions to skip (helper handlers)
1861  */
1862 void trace_dump_stack(int skip)
1863 {
1864         unsigned long flags;
1865
1866         if (tracing_disabled || tracing_selftest_running)
1867                 return;
1868
1869         local_save_flags(flags);
1870
1871         /*
1872          * Skip 3 more, seems to get us at the caller of
1873          * this function.
1874          */
1875         skip += 3;
1876         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
1877                              flags, skip, preempt_count(), NULL);
1878 }
1879
1880 static DEFINE_PER_CPU(int, user_stack_count);
1881
1882 void
1883 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1884 {
1885         struct ftrace_event_call *call = &event_user_stack;
1886         struct ring_buffer_event *event;
1887         struct userstack_entry *entry;
1888         struct stack_trace trace;
1889
1890         if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1891                 return;
1892
1893         /*
1894          * NMIs can not handle page faults, even with fix ups.
1895          * The save user stack can (and often does) fault.
1896          */
1897         if (unlikely(in_nmi()))
1898                 return;
1899
1900         /*
1901          * prevent recursion, since the user stack tracing may
1902          * trigger other kernel events.
1903          */
1904         preempt_disable();
1905         if (__this_cpu_read(user_stack_count))
1906                 goto out;
1907
1908         __this_cpu_inc(user_stack_count);
1909
1910         event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1911                                           sizeof(*entry), flags, pc);
1912         if (!event)
1913                 goto out_drop_count;
1914         entry   = ring_buffer_event_data(event);
1915
1916         entry->tgid             = current->tgid;
1917         memset(&entry->caller, 0, sizeof(entry->caller));
1918
1919         trace.nr_entries        = 0;
1920         trace.max_entries       = FTRACE_STACK_ENTRIES;
1921         trace.skip              = 0;
1922         trace.entries           = entry->caller;
1923
1924         save_stack_trace_user(&trace);
1925         if (!call_filter_check_discard(call, entry, buffer, event))
1926                 __buffer_unlock_commit(buffer, event);
1927
1928  out_drop_count:
1929         __this_cpu_dec(user_stack_count);
1930  out:
1931         preempt_enable();
1932 }
1933
1934 #ifdef UNUSED
1935 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1936 {
1937         ftrace_trace_userstack(tr, flags, preempt_count());
1938 }
1939 #endif /* UNUSED */
1940
1941 #endif /* CONFIG_STACKTRACE */
1942
1943 /* created for use with alloc_percpu */
1944 struct trace_buffer_struct {
1945         char buffer[TRACE_BUF_SIZE];
1946 };
1947
1948 static struct trace_buffer_struct *trace_percpu_buffer;
1949 static struct trace_buffer_struct *trace_percpu_sirq_buffer;
1950 static struct trace_buffer_struct *trace_percpu_irq_buffer;
1951 static struct trace_buffer_struct *trace_percpu_nmi_buffer;
1952
1953 /*
1954  * The buffer used is dependent on the context. There is a per cpu
1955  * buffer for normal context, softirq contex, hard irq context and
1956  * for NMI context. Thise allows for lockless recording.
1957  *
1958  * Note, if the buffers failed to be allocated, then this returns NULL
1959  */
1960 static char *get_trace_buf(void)
1961 {
1962         struct trace_buffer_struct *percpu_buffer;
1963
1964         /*
1965          * If we have allocated per cpu buffers, then we do not
1966          * need to do any locking.
1967          */
1968         if (in_nmi())
1969                 percpu_buffer = trace_percpu_nmi_buffer;
1970         else if (in_irq())
1971                 percpu_buffer = trace_percpu_irq_buffer;
1972         else if (in_softirq())
1973                 percpu_buffer = trace_percpu_sirq_buffer;
1974         else
1975                 percpu_buffer = trace_percpu_buffer;
1976
1977         if (!percpu_buffer)
1978                 return NULL;
1979
1980         return this_cpu_ptr(&percpu_buffer->buffer[0]);
1981 }
1982
1983 static int alloc_percpu_trace_buffer(void)
1984 {
1985         struct trace_buffer_struct *buffers;
1986         struct trace_buffer_struct *sirq_buffers;
1987         struct trace_buffer_struct *irq_buffers;
1988         struct trace_buffer_struct *nmi_buffers;
1989
1990         buffers = alloc_percpu(struct trace_buffer_struct);
1991         if (!buffers)
1992                 goto err_warn;
1993
1994         sirq_buffers = alloc_percpu(struct trace_buffer_struct);
1995         if (!sirq_buffers)
1996                 goto err_sirq;
1997
1998         irq_buffers = alloc_percpu(struct trace_buffer_struct);
1999         if (!irq_buffers)
2000                 goto err_irq;
2001
2002         nmi_buffers = alloc_percpu(struct trace_buffer_struct);
2003         if (!nmi_buffers)
2004                 goto err_nmi;
2005
2006         trace_percpu_buffer = buffers;
2007         trace_percpu_sirq_buffer = sirq_buffers;
2008         trace_percpu_irq_buffer = irq_buffers;
2009         trace_percpu_nmi_buffer = nmi_buffers;
2010
2011         return 0;
2012
2013  err_nmi:
2014         free_percpu(irq_buffers);
2015  err_irq:
2016         free_percpu(sirq_buffers);
2017  err_sirq:
2018         free_percpu(buffers);
2019  err_warn:
2020         WARN(1, "Could not allocate percpu trace_printk buffer");
2021         return -ENOMEM;
2022 }
2023
2024 static int buffers_allocated;
2025
2026 void trace_printk_init_buffers(void)
2027 {
2028         if (buffers_allocated)
2029                 return;
2030
2031         if (alloc_percpu_trace_buffer())
2032                 return;
2033
2034         /* trace_printk() is for debug use only. Don't use it in production. */
2035
2036         pr_warning("\n**********************************************************\n");
2037         pr_warning("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2038         pr_warning("**                                                      **\n");
2039         pr_warning("** trace_printk() being used. Allocating extra memory.  **\n");
2040         pr_warning("**                                                      **\n");
2041         pr_warning("** This means that this is a DEBUG kernel and it is     **\n");
2042         pr_warning("** unsafe for produciton use.                           **\n");
2043         pr_warning("**                                                      **\n");
2044         pr_warning("** If you see this message and you are not debugging    **\n");
2045         pr_warning("** the kernel, report this immediately to your vendor!  **\n");
2046         pr_warning("**                                                      **\n");
2047         pr_warning("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2048         pr_warning("**********************************************************\n");
2049
2050         /* Expand the buffers to set size */
2051         tracing_update_buffers();
2052
2053         buffers_allocated = 1;
2054
2055         /*
2056          * trace_printk_init_buffers() can be called by modules.
2057          * If that happens, then we need to start cmdline recording
2058          * directly here. If the global_trace.buffer is already
2059          * allocated here, then this was called by module code.
2060          */
2061         if (global_trace.trace_buffer.buffer)
2062                 tracing_start_cmdline_record();
2063 }
2064
2065 void trace_printk_start_comm(void)
2066 {
2067         /* Start tracing comms if trace printk is set */
2068         if (!buffers_allocated)
2069                 return;
2070         tracing_start_cmdline_record();
2071 }
2072
2073 static void trace_printk_start_stop_comm(int enabled)
2074 {
2075         if (!buffers_allocated)
2076                 return;
2077
2078         if (enabled)
2079                 tracing_start_cmdline_record();
2080         else
2081                 tracing_stop_cmdline_record();
2082 }
2083
2084 /**
2085  * trace_vbprintk - write binary msg to tracing buffer
2086  *
2087  */
2088 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2089 {
2090         struct ftrace_event_call *call = &event_bprint;
2091         struct ring_buffer_event *event;
2092         struct ring_buffer *buffer;
2093         struct trace_array *tr = &global_trace;
2094         struct bprint_entry *entry;
2095         unsigned long flags;
2096         char *tbuffer;
2097         int len = 0, size, pc;
2098
2099         if (unlikely(tracing_selftest_running || tracing_disabled))
2100                 return 0;
2101
2102         /* Don't pollute graph traces with trace_vprintk internals */
2103         pause_graph_tracing();
2104
2105         pc = preempt_count();
2106         preempt_disable_notrace();
2107
2108         tbuffer = get_trace_buf();
2109         if (!tbuffer) {
2110                 len = 0;
2111                 goto out;
2112         }
2113
2114         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2115
2116         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2117                 goto out;
2118
2119         local_save_flags(flags);
2120         size = sizeof(*entry) + sizeof(u32) * len;
2121         buffer = tr->trace_buffer.buffer;
2122         event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2123                                           flags, pc);
2124         if (!event)
2125                 goto out;
2126         entry = ring_buffer_event_data(event);
2127         entry->ip                       = ip;
2128         entry->fmt                      = fmt;
2129
2130         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2131         if (!call_filter_check_discard(call, entry, buffer, event)) {
2132                 __buffer_unlock_commit(buffer, event);
2133                 ftrace_trace_stack(buffer, flags, 6, pc);
2134         }
2135
2136 out:
2137         preempt_enable_notrace();
2138         unpause_graph_tracing();
2139
2140         return len;
2141 }
2142 EXPORT_SYMBOL_GPL(trace_vbprintk);
2143
2144 static int
2145 __trace_array_vprintk(struct ring_buffer *buffer,
2146                       unsigned long ip, const char *fmt, va_list args)
2147 {
2148         struct ftrace_event_call *call = &event_print;
2149         struct ring_buffer_event *event;
2150         int len = 0, size, pc;
2151         struct print_entry *entry;
2152         unsigned long flags;
2153         char *tbuffer;
2154
2155         if (tracing_disabled || tracing_selftest_running)
2156                 return 0;
2157
2158         /* Don't pollute graph traces with trace_vprintk internals */
2159         pause_graph_tracing();
2160
2161         pc = preempt_count();
2162         preempt_disable_notrace();
2163
2164
2165         tbuffer = get_trace_buf();
2166         if (!tbuffer) {
2167                 len = 0;
2168                 goto out;
2169         }
2170
2171         len = vsnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2172         if (len > TRACE_BUF_SIZE)
2173                 goto out;
2174
2175         local_save_flags(flags);
2176         size = sizeof(*entry) + len + 1;
2177         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2178                                           flags, pc);
2179         if (!event)
2180                 goto out;
2181         entry = ring_buffer_event_data(event);
2182         entry->ip = ip;
2183
2184         memcpy(&entry->buf, tbuffer, len);
2185         entry->buf[len] = '\0';
2186         if (!call_filter_check_discard(call, entry, buffer, event)) {
2187                 __buffer_unlock_commit(buffer, event);
2188                 ftrace_trace_stack(buffer, flags, 6, pc);
2189         }
2190  out:
2191         preempt_enable_notrace();
2192         unpause_graph_tracing();
2193
2194         return len;
2195 }
2196
2197 int trace_array_vprintk(struct trace_array *tr,
2198                         unsigned long ip, const char *fmt, va_list args)
2199 {
2200         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2201 }
2202
2203 int trace_array_printk(struct trace_array *tr,
2204                        unsigned long ip, const char *fmt, ...)
2205 {
2206         int ret;
2207         va_list ap;
2208
2209         if (!(trace_flags & TRACE_ITER_PRINTK))
2210                 return 0;
2211
2212         va_start(ap, fmt);
2213         ret = trace_array_vprintk(tr, ip, fmt, ap);
2214         va_end(ap);
2215         return ret;
2216 }
2217
2218 int trace_array_printk_buf(struct ring_buffer *buffer,
2219                            unsigned long ip, const char *fmt, ...)
2220 {
2221         int ret;
2222         va_list ap;
2223
2224         if (!(trace_flags & TRACE_ITER_PRINTK))
2225                 return 0;
2226
2227         va_start(ap, fmt);
2228         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2229         va_end(ap);
2230         return ret;
2231 }
2232
2233 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2234 {
2235         return trace_array_vprintk(&global_trace, ip, fmt, args);
2236 }
2237 EXPORT_SYMBOL_GPL(trace_vprintk);
2238
2239 static void trace_iterator_increment(struct trace_iterator *iter)
2240 {
2241         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2242
2243         iter->idx++;
2244         if (buf_iter)
2245                 ring_buffer_read(buf_iter, NULL);
2246 }
2247
2248 static struct trace_entry *
2249 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2250                 unsigned long *lost_events)
2251 {
2252         struct ring_buffer_event *event;
2253         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2254
2255         if (buf_iter)
2256                 event = ring_buffer_iter_peek(buf_iter, ts);
2257         else
2258                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2259                                          lost_events);
2260
2261         if (event) {
2262                 iter->ent_size = ring_buffer_event_length(event);
2263                 return ring_buffer_event_data(event);
2264         }
2265         iter->ent_size = 0;
2266         return NULL;
2267 }
2268
2269 static struct trace_entry *
2270 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2271                   unsigned long *missing_events, u64 *ent_ts)
2272 {
2273         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2274         struct trace_entry *ent, *next = NULL;
2275         unsigned long lost_events = 0, next_lost = 0;
2276         int cpu_file = iter->cpu_file;
2277         u64 next_ts = 0, ts;
2278         int next_cpu = -1;
2279         int next_size = 0;
2280         int cpu;
2281
2282         /*
2283          * If we are in a per_cpu trace file, don't bother by iterating over
2284          * all cpu and peek directly.
2285          */
2286         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2287                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2288                         return NULL;
2289                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2290                 if (ent_cpu)
2291                         *ent_cpu = cpu_file;
2292
2293                 return ent;
2294         }
2295
2296         for_each_tracing_cpu(cpu) {
2297
2298                 if (ring_buffer_empty_cpu(buffer, cpu))
2299                         continue;
2300
2301                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2302
2303                 /*
2304                  * Pick the entry with the smallest timestamp:
2305                  */
2306                 if (ent && (!next || ts < next_ts)) {
2307                         next = ent;
2308                         next_cpu = cpu;
2309                         next_ts = ts;
2310                         next_lost = lost_events;
2311                         next_size = iter->ent_size;
2312                 }
2313         }
2314
2315         iter->ent_size = next_size;
2316
2317         if (ent_cpu)
2318                 *ent_cpu = next_cpu;
2319
2320         if (ent_ts)
2321                 *ent_ts = next_ts;
2322
2323         if (missing_events)
2324                 *missing_events = next_lost;
2325
2326         return next;
2327 }
2328
2329 /* Find the next real entry, without updating the iterator itself */
2330 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2331                                           int *ent_cpu, u64 *ent_ts)
2332 {
2333         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2334 }
2335
2336 /* Find the next real entry, and increment the iterator to the next entry */
2337 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2338 {
2339         iter->ent = __find_next_entry(iter, &iter->cpu,
2340                                       &iter->lost_events, &iter->ts);
2341
2342         if (iter->ent)
2343                 trace_iterator_increment(iter);
2344
2345         return iter->ent ? iter : NULL;
2346 }
2347
2348 static void trace_consume(struct trace_iterator *iter)
2349 {
2350         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2351                             &iter->lost_events);
2352 }
2353
2354 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2355 {
2356         struct trace_iterator *iter = m->private;
2357         int i = (int)*pos;
2358         void *ent;
2359
2360         WARN_ON_ONCE(iter->leftover);
2361
2362         (*pos)++;
2363
2364         /* can't go backwards */
2365         if (iter->idx > i)
2366                 return NULL;
2367
2368         if (iter->idx < 0)
2369                 ent = trace_find_next_entry_inc(iter);
2370         else
2371                 ent = iter;
2372
2373         while (ent && iter->idx < i)
2374                 ent = trace_find_next_entry_inc(iter);
2375
2376         iter->pos = *pos;
2377
2378         return ent;
2379 }
2380
2381 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2382 {
2383         struct ring_buffer_event *event;
2384         struct ring_buffer_iter *buf_iter;
2385         unsigned long entries = 0;
2386         u64 ts;
2387
2388         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2389
2390         buf_iter = trace_buffer_iter(iter, cpu);
2391         if (!buf_iter)
2392                 return;
2393
2394         ring_buffer_iter_reset(buf_iter);
2395
2396         /*
2397          * We could have the case with the max latency tracers
2398          * that a reset never took place on a cpu. This is evident
2399          * by the timestamp being before the start of the buffer.
2400          */
2401         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2402                 if (ts >= iter->trace_buffer->time_start)
2403                         break;
2404                 entries++;
2405                 ring_buffer_read(buf_iter, NULL);
2406         }
2407
2408         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2409 }
2410
2411 /*
2412  * The current tracer is copied to avoid a global locking
2413  * all around.
2414  */
2415 static void *s_start(struct seq_file *m, loff_t *pos)
2416 {
2417         struct trace_iterator *iter = m->private;
2418         struct trace_array *tr = iter->tr;
2419         int cpu_file = iter->cpu_file;
2420         void *p = NULL;
2421         loff_t l = 0;
2422         int cpu;
2423
2424         /*
2425          * copy the tracer to avoid using a global lock all around.
2426          * iter->trace is a copy of current_trace, the pointer to the
2427          * name may be used instead of a strcmp(), as iter->trace->name
2428          * will point to the same string as current_trace->name.
2429          */
2430         mutex_lock(&trace_types_lock);
2431         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2432                 *iter->trace = *tr->current_trace;
2433         mutex_unlock(&trace_types_lock);
2434
2435 #ifdef CONFIG_TRACER_MAX_TRACE
2436         if (iter->snapshot && iter->trace->use_max_tr)
2437                 return ERR_PTR(-EBUSY);
2438 #endif
2439
2440         if (!iter->snapshot)
2441                 atomic_inc(&trace_record_cmdline_disabled);
2442
2443         if (*pos != iter->pos) {
2444                 iter->ent = NULL;
2445                 iter->cpu = 0;
2446                 iter->idx = -1;
2447
2448                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2449                         for_each_tracing_cpu(cpu)
2450                                 tracing_iter_reset(iter, cpu);
2451                 } else
2452                         tracing_iter_reset(iter, cpu_file);
2453
2454                 iter->leftover = 0;
2455                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2456                         ;
2457
2458         } else {
2459                 /*
2460                  * If we overflowed the seq_file before, then we want
2461                  * to just reuse the trace_seq buffer again.
2462                  */
2463                 if (iter->leftover)
2464                         p = iter;
2465                 else {
2466                         l = *pos - 1;
2467                         p = s_next(m, p, &l);
2468                 }
2469         }
2470
2471         trace_event_read_lock();
2472         trace_access_lock(cpu_file);
2473         return p;
2474 }
2475
2476 static void s_stop(struct seq_file *m, void *p)
2477 {
2478         struct trace_iterator *iter = m->private;
2479
2480 #ifdef CONFIG_TRACER_MAX_TRACE
2481         if (iter->snapshot && iter->trace->use_max_tr)
2482                 return;
2483 #endif
2484
2485         if (!iter->snapshot)
2486                 atomic_dec(&trace_record_cmdline_disabled);
2487
2488         trace_access_unlock(iter->cpu_file);
2489         trace_event_read_unlock();
2490 }
2491
2492 static void
2493 get_total_entries(struct trace_buffer *buf,
2494                   unsigned long *total, unsigned long *entries)
2495 {
2496         unsigned long count;
2497         int cpu;
2498
2499         *total = 0;
2500         *entries = 0;
2501
2502         for_each_tracing_cpu(cpu) {
2503                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2504                 /*
2505                  * If this buffer has skipped entries, then we hold all
2506                  * entries for the trace and we need to ignore the
2507                  * ones before the time stamp.
2508                  */
2509                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2510                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2511                         /* total is the same as the entries */
2512                         *total += count;
2513                 } else
2514                         *total += count +
2515                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
2516                 *entries += count;
2517         }
2518 }
2519
2520 static void print_lat_help_header(struct seq_file *m)
2521 {
2522         seq_puts(m, "#                  _------=> CPU#            \n");
2523         seq_puts(m, "#                 / _-----=> irqs-off        \n");
2524         seq_puts(m, "#                | / _----=> need-resched    \n");
2525         seq_puts(m, "#                || / _---=> hardirq/softirq \n");
2526         seq_puts(m, "#                ||| / _--=> preempt-depth   \n");
2527         seq_puts(m, "#                |||| /     delay             \n");
2528         seq_puts(m, "#  cmd     pid   ||||| time  |   caller      \n");
2529         seq_puts(m, "#     \\   /      |||||  \\    |   /           \n");
2530 }
2531
2532 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2533 {
2534         unsigned long total;
2535         unsigned long entries;
2536
2537         get_total_entries(buf, &total, &entries);
2538         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2539                    entries, total, num_online_cpus());
2540         seq_puts(m, "#\n");
2541 }
2542
2543 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2544 {
2545         print_event_info(buf, m);
2546         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n");
2547         seq_puts(m, "#              | |       |          |         |\n");
2548 }
2549
2550 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2551 {
2552         print_event_info(buf, m);
2553         seq_puts(m, "#                              _-----=> irqs-off\n");
2554         seq_puts(m, "#                             / _----=> need-resched\n");
2555         seq_puts(m, "#                            | / _---=> hardirq/softirq\n");
2556         seq_puts(m, "#                            || / _--=> preempt-depth\n");
2557         seq_puts(m, "#                            ||| /     delay\n");
2558         seq_puts(m, "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n");
2559         seq_puts(m, "#              | |       |   ||||       |         |\n");
2560 }
2561
2562 void
2563 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2564 {
2565         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2566         struct trace_buffer *buf = iter->trace_buffer;
2567         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2568         struct tracer *type = iter->trace;
2569         unsigned long entries;
2570         unsigned long total;
2571         const char *name = "preemption";
2572
2573         name = type->name;
2574
2575         get_total_entries(buf, &total, &entries);
2576
2577         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2578                    name, UTS_RELEASE);
2579         seq_puts(m, "# -----------------------------------"
2580                  "---------------------------------\n");
2581         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2582                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2583                    nsecs_to_usecs(data->saved_latency),
2584                    entries,
2585                    total,
2586                    buf->cpu,
2587 #if defined(CONFIG_PREEMPT_NONE)
2588                    "server",
2589 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2590                    "desktop",
2591 #elif defined(CONFIG_PREEMPT)
2592                    "preempt",
2593 #else
2594                    "unknown",
2595 #endif
2596                    /* These are reserved for later use */
2597                    0, 0, 0, 0);
2598 #ifdef CONFIG_SMP
2599         seq_printf(m, " #P:%d)\n", num_online_cpus());
2600 #else
2601         seq_puts(m, ")\n");
2602 #endif
2603         seq_puts(m, "#    -----------------\n");
2604         seq_printf(m, "#    | task: %.16s-%d "
2605                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2606                    data->comm, data->pid,
2607                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2608                    data->policy, data->rt_priority);
2609         seq_puts(m, "#    -----------------\n");
2610
2611         if (data->critical_start) {
2612                 seq_puts(m, "#  => started at: ");
2613                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2614                 trace_print_seq(m, &iter->seq);
2615                 seq_puts(m, "\n#  => ended at:   ");
2616                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2617                 trace_print_seq(m, &iter->seq);
2618                 seq_puts(m, "\n#\n");
2619         }
2620
2621         seq_puts(m, "#\n");
2622 }
2623
2624 static void test_cpu_buff_start(struct trace_iterator *iter)
2625 {
2626         struct trace_seq *s = &iter->seq;
2627
2628         if (!(trace_flags & TRACE_ITER_ANNOTATE))
2629                 return;
2630
2631         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
2632                 return;
2633
2634         if (cpumask_test_cpu(iter->cpu, iter->started))
2635                 return;
2636
2637         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
2638                 return;
2639
2640         cpumask_set_cpu(iter->cpu, iter->started);
2641
2642         /* Don't print started cpu buffer for the first entry of the trace */
2643         if (iter->idx > 1)
2644                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
2645                                 iter->cpu);
2646 }
2647
2648 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
2649 {
2650         struct trace_seq *s = &iter->seq;
2651         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2652         struct trace_entry *entry;
2653         struct trace_event *event;
2654
2655         entry = iter->ent;
2656
2657         test_cpu_buff_start(iter);
2658
2659         event = ftrace_find_event(entry->type);
2660
2661         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2662                 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2663                         if (!trace_print_lat_context(iter))
2664                                 goto partial;
2665                 } else {
2666                         if (!trace_print_context(iter))
2667                                 goto partial;
2668                 }
2669         }
2670
2671         if (event)
2672                 return event->funcs->trace(iter, sym_flags, event);
2673
2674         if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
2675                 goto partial;
2676
2677         return TRACE_TYPE_HANDLED;
2678 partial:
2679         return TRACE_TYPE_PARTIAL_LINE;
2680 }
2681
2682 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2683 {
2684         struct trace_seq *s = &iter->seq;
2685         struct trace_entry *entry;
2686         struct trace_event *event;
2687
2688         entry = iter->ent;
2689
2690         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2691                 if (!trace_seq_printf(s, "%d %d %llu ",
2692                                       entry->pid, iter->cpu, iter->ts))
2693                         goto partial;
2694         }
2695
2696         event = ftrace_find_event(entry->type);
2697         if (event)
2698                 return event->funcs->raw(iter, 0, event);
2699
2700         if (!trace_seq_printf(s, "%d ?\n", entry->type))
2701                 goto partial;
2702
2703         return TRACE_TYPE_HANDLED;
2704 partial:
2705         return TRACE_TYPE_PARTIAL_LINE;
2706 }
2707
2708 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2709 {
2710         struct trace_seq *s = &iter->seq;
2711         unsigned char newline = '\n';
2712         struct trace_entry *entry;
2713         struct trace_event *event;
2714
2715         entry = iter->ent;
2716
2717         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2718                 SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
2719                 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
2720                 SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
2721         }
2722
2723         event = ftrace_find_event(entry->type);
2724         if (event) {
2725                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
2726                 if (ret != TRACE_TYPE_HANDLED)
2727                         return ret;
2728         }
2729
2730         SEQ_PUT_FIELD_RET(s, newline);
2731
2732         return TRACE_TYPE_HANDLED;
2733 }
2734
2735 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2736 {
2737         struct trace_seq *s = &iter->seq;
2738         struct trace_entry *entry;
2739         struct trace_event *event;
2740
2741         entry = iter->ent;
2742
2743         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2744                 SEQ_PUT_FIELD_RET(s, entry->pid);
2745                 SEQ_PUT_FIELD_RET(s, iter->cpu);
2746                 SEQ_PUT_FIELD_RET(s, iter->ts);
2747         }
2748
2749         event = ftrace_find_event(entry->type);
2750         return event ? event->funcs->binary(iter, 0, event) :
2751                 TRACE_TYPE_HANDLED;
2752 }
2753
2754 int trace_empty(struct trace_iterator *iter)
2755 {
2756         struct ring_buffer_iter *buf_iter;
2757         int cpu;
2758
2759         /* If we are looking at one CPU buffer, only check that one */
2760         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
2761                 cpu = iter->cpu_file;
2762                 buf_iter = trace_buffer_iter(iter, cpu);
2763                 if (buf_iter) {
2764                         if (!ring_buffer_iter_empty(buf_iter))
2765                                 return 0;
2766                 } else {
2767                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2768                                 return 0;
2769                 }
2770                 return 1;
2771         }
2772
2773         for_each_tracing_cpu(cpu) {
2774                 buf_iter = trace_buffer_iter(iter, cpu);
2775                 if (buf_iter) {
2776                         if (!ring_buffer_iter_empty(buf_iter))
2777                                 return 0;
2778                 } else {
2779                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2780                                 return 0;
2781                 }
2782         }
2783
2784         return 1;
2785 }
2786
2787 /*  Called with trace_event_read_lock() held. */
2788 enum print_line_t print_trace_line(struct trace_iterator *iter)
2789 {
2790         enum print_line_t ret;
2791
2792         if (iter->lost_events &&
2793             !trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
2794                                  iter->cpu, iter->lost_events))
2795                 return TRACE_TYPE_PARTIAL_LINE;
2796
2797         if (iter->trace && iter->trace->print_line) {
2798                 ret = iter->trace->print_line(iter);
2799                 if (ret != TRACE_TYPE_UNHANDLED)
2800                         return ret;
2801         }
2802
2803         if (iter->ent->type == TRACE_BPUTS &&
2804                         trace_flags & TRACE_ITER_PRINTK &&
2805                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2806                 return trace_print_bputs_msg_only(iter);
2807
2808         if (iter->ent->type == TRACE_BPRINT &&
2809                         trace_flags & TRACE_ITER_PRINTK &&
2810                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2811                 return trace_print_bprintk_msg_only(iter);
2812
2813         if (iter->ent->type == TRACE_PRINT &&
2814                         trace_flags & TRACE_ITER_PRINTK &&
2815                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2816                 return trace_print_printk_msg_only(iter);
2817
2818         if (trace_flags & TRACE_ITER_BIN)
2819                 return print_bin_fmt(iter);
2820
2821         if (trace_flags & TRACE_ITER_HEX)
2822                 return print_hex_fmt(iter);
2823
2824         if (trace_flags & TRACE_ITER_RAW)
2825                 return print_raw_fmt(iter);
2826
2827         return print_trace_fmt(iter);
2828 }
2829
2830 void trace_latency_header(struct seq_file *m)
2831 {
2832         struct trace_iterator *iter = m->private;
2833
2834         /* print nothing if the buffers are empty */
2835         if (trace_empty(iter))
2836                 return;
2837
2838         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2839                 print_trace_header(m, iter);
2840
2841         if (!(trace_flags & TRACE_ITER_VERBOSE))
2842                 print_lat_help_header(m);
2843 }
2844
2845 void trace_default_header(struct seq_file *m)
2846 {
2847         struct trace_iterator *iter = m->private;
2848
2849         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
2850                 return;
2851
2852         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2853                 /* print nothing if the buffers are empty */
2854                 if (trace_empty(iter))
2855                         return;
2856                 print_trace_header(m, iter);
2857                 if (!(trace_flags & TRACE_ITER_VERBOSE))
2858                         print_lat_help_header(m);
2859         } else {
2860                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
2861                         if (trace_flags & TRACE_ITER_IRQ_INFO)
2862                                 print_func_help_header_irq(iter->trace_buffer, m);
2863                         else
2864                                 print_func_help_header(iter->trace_buffer, m);
2865                 }
2866         }
2867 }
2868
2869 static void test_ftrace_alive(struct seq_file *m)
2870 {
2871         if (!ftrace_is_dead())
2872                 return;
2873         seq_printf(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n");
2874         seq_printf(m, "#          MAY BE MISSING FUNCTION EVENTS\n");
2875 }
2876
2877 #ifdef CONFIG_TRACER_MAX_TRACE
2878 static void show_snapshot_main_help(struct seq_file *m)
2879 {
2880         seq_printf(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n");
2881         seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2882         seq_printf(m, "#                      Takes a snapshot of the main buffer.\n");
2883         seq_printf(m, "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n");
2884         seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
2885         seq_printf(m, "#                       is not a '0' or '1')\n");
2886 }
2887
2888 static void show_snapshot_percpu_help(struct seq_file *m)
2889 {
2890         seq_printf(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
2891 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2892         seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2893         seq_printf(m, "#                      Takes a snapshot of the main buffer for this cpu.\n");
2894 #else
2895         seq_printf(m, "# echo 1 > snapshot : Not supported with this kernel.\n");
2896         seq_printf(m, "#                     Must use main snapshot file to allocate.\n");
2897 #endif
2898         seq_printf(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n");
2899         seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
2900         seq_printf(m, "#                       is not a '0' or '1')\n");
2901 }
2902
2903 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
2904 {
2905         if (iter->tr->allocated_snapshot)
2906                 seq_printf(m, "#\n# * Snapshot is allocated *\n#\n");
2907         else
2908                 seq_printf(m, "#\n# * Snapshot is freed *\n#\n");
2909
2910         seq_printf(m, "# Snapshot commands:\n");
2911         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
2912                 show_snapshot_main_help(m);
2913         else
2914                 show_snapshot_percpu_help(m);
2915 }
2916 #else
2917 /* Should never be called */
2918 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
2919 #endif
2920
2921 static int s_show(struct seq_file *m, void *v)
2922 {
2923         struct trace_iterator *iter = v;
2924         int ret;
2925
2926         if (iter->ent == NULL) {
2927                 if (iter->tr) {
2928                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
2929                         seq_puts(m, "#\n");
2930                         test_ftrace_alive(m);
2931                 }
2932                 if (iter->snapshot && trace_empty(iter))
2933                         print_snapshot_help(m, iter);
2934                 else if (iter->trace && iter->trace->print_header)
2935                         iter->trace->print_header(m);
2936                 else
2937                         trace_default_header(m);
2938
2939         } else if (iter->leftover) {
2940                 /*
2941                  * If we filled the seq_file buffer earlier, we
2942                  * want to just show it now.
2943                  */
2944                 ret = trace_print_seq(m, &iter->seq);
2945
2946                 /* ret should this time be zero, but you never know */
2947                 iter->leftover = ret;
2948
2949         } else {
2950                 print_trace_line(iter);
2951                 ret = trace_print_seq(m, &iter->seq);
2952                 /*
2953                  * If we overflow the seq_file buffer, then it will
2954                  * ask us for this data again at start up.
2955                  * Use that instead.
2956                  *  ret is 0 if seq_file write succeeded.
2957                  *        -1 otherwise.
2958                  */
2959                 iter->leftover = ret;
2960         }
2961
2962         return 0;
2963 }
2964
2965 /*
2966  * Should be used after trace_array_get(), trace_types_lock
2967  * ensures that i_cdev was already initialized.
2968  */
2969 static inline int tracing_get_cpu(struct inode *inode)
2970 {
2971         if (inode->i_cdev) /* See trace_create_cpu_file() */
2972                 return (long)inode->i_cdev - 1;
2973         return RING_BUFFER_ALL_CPUS;
2974 }
2975
2976 static const struct seq_operations tracer_seq_ops = {
2977         .start          = s_start,
2978         .next           = s_next,
2979         .stop           = s_stop,
2980         .show           = s_show,
2981 };
2982
2983 static struct trace_iterator *
2984 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
2985 {
2986         struct trace_array *tr = inode->i_private;
2987         struct trace_iterator *iter;
2988         int cpu;
2989
2990         if (tracing_disabled)
2991                 return ERR_PTR(-ENODEV);
2992
2993         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
2994         if (!iter)
2995                 return ERR_PTR(-ENOMEM);
2996
2997         iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(),
2998                                     GFP_KERNEL);
2999         if (!iter->buffer_iter)
3000                 goto release;
3001
3002         /*
3003          * We make a copy of the current tracer to avoid concurrent
3004          * changes on it while we are reading.
3005          */
3006         mutex_lock(&trace_types_lock);
3007         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3008         if (!iter->trace)
3009                 goto fail;
3010
3011         *iter->trace = *tr->current_trace;
3012
3013         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3014                 goto fail;
3015
3016         iter->tr = tr;
3017
3018 #ifdef CONFIG_TRACER_MAX_TRACE
3019         /* Currently only the top directory has a snapshot */
3020         if (tr->current_trace->print_max || snapshot)
3021                 iter->trace_buffer = &tr->max_buffer;
3022         else
3023 #endif
3024                 iter->trace_buffer = &tr->trace_buffer;
3025         iter->snapshot = snapshot;
3026         iter->pos = -1;
3027         iter->cpu_file = tracing_get_cpu(inode);
3028         mutex_init(&iter->mutex);
3029
3030         /* Notify the tracer early; before we stop tracing. */
3031         if (iter->trace && iter->trace->open)
3032                 iter->trace->open(iter);
3033
3034         /* Annotate start of buffers if we had overruns */
3035         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3036                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3037
3038         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3039         if (trace_clocks[tr->clock_id].in_ns)
3040                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3041
3042         /* stop the trace while dumping if we are not opening "snapshot" */
3043         if (!iter->snapshot)
3044                 tracing_stop_tr(tr);
3045
3046         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3047                 for_each_tracing_cpu(cpu) {
3048                         iter->buffer_iter[cpu] =
3049                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3050                 }
3051                 ring_buffer_read_prepare_sync();
3052                 for_each_tracing_cpu(cpu) {
3053                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3054                         tracing_iter_reset(iter, cpu);
3055                 }
3056         } else {
3057                 cpu = iter->cpu_file;
3058                 iter->buffer_iter[cpu] =
3059                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3060                 ring_buffer_read_prepare_sync();
3061                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3062                 tracing_iter_reset(iter, cpu);
3063         }
3064
3065         mutex_unlock(&trace_types_lock);
3066
3067         return iter;
3068
3069  fail:
3070         mutex_unlock(&trace_types_lock);
3071         kfree(iter->trace);
3072         kfree(iter->buffer_iter);
3073 release:
3074         seq_release_private(inode, file);
3075         return ERR_PTR(-ENOMEM);
3076 }
3077
3078 int tracing_open_generic(struct inode *inode, struct file *filp)
3079 {
3080         if (tracing_disabled)
3081                 return -ENODEV;
3082
3083         filp->private_data = inode->i_private;
3084         return 0;
3085 }
3086
3087 bool tracing_is_disabled(void)
3088 {
3089         return (tracing_disabled) ? true: false;
3090 }
3091
3092 /*
3093  * Open and update trace_array ref count.
3094  * Must have the current trace_array passed to it.
3095  */
3096 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3097 {
3098         struct trace_array *tr = inode->i_private;
3099
3100         if (tracing_disabled)
3101                 return -ENODEV;
3102
3103         if (trace_array_get(tr) < 0)
3104                 return -ENODEV;
3105
3106         filp->private_data = inode->i_private;
3107
3108         return 0;
3109 }
3110
3111 static int tracing_release(struct inode *inode, struct file *file)
3112 {
3113         struct trace_array *tr = inode->i_private;
3114         struct seq_file *m = file->private_data;
3115         struct trace_iterator *iter;
3116         int cpu;
3117
3118         if (!(file->f_mode & FMODE_READ)) {
3119                 trace_array_put(tr);
3120                 return 0;
3121         }
3122
3123         /* Writes do not use seq_file */
3124         iter = m->private;
3125         mutex_lock(&trace_types_lock);
3126
3127         for_each_tracing_cpu(cpu) {
3128                 if (iter->buffer_iter[cpu])
3129                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3130         }
3131
3132         if (iter->trace && iter->trace->close)
3133                 iter->trace->close(iter);
3134
3135         if (!iter->snapshot)
3136                 /* reenable tracing if it was previously enabled */
3137                 tracing_start_tr(tr);
3138
3139         __trace_array_put(tr);
3140
3141         mutex_unlock(&trace_types_lock);
3142
3143         mutex_destroy(&iter->mutex);
3144         free_cpumask_var(iter->started);
3145         kfree(iter->trace);
3146         kfree(iter->buffer_iter);
3147         seq_release_private(inode, file);
3148
3149         return 0;
3150 }
3151
3152 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3153 {
3154         struct trace_array *tr = inode->i_private;
3155
3156         trace_array_put(tr);
3157         return 0;
3158 }
3159
3160 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3161 {
3162         struct trace_array *tr = inode->i_private;
3163
3164         trace_array_put(tr);
3165
3166         return single_release(inode, file);
3167 }
3168
3169 static int tracing_open(struct inode *inode, struct file *file)
3170 {
3171         struct trace_array *tr = inode->i_private;
3172         struct trace_iterator *iter;
3173         int ret = 0;
3174
3175         if (trace_array_get(tr) < 0)
3176                 return -ENODEV;
3177
3178         /* If this file was open for write, then erase contents */
3179         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3180                 int cpu = tracing_get_cpu(inode);
3181
3182                 if (cpu == RING_BUFFER_ALL_CPUS)
3183                         tracing_reset_online_cpus(&tr->trace_buffer);
3184                 else
3185                         tracing_reset(&tr->trace_buffer, cpu);
3186         }
3187
3188         if (file->f_mode & FMODE_READ) {
3189                 iter = __tracing_open(inode, file, false);
3190                 if (IS_ERR(iter))
3191                         ret = PTR_ERR(iter);
3192                 else if (trace_flags & TRACE_ITER_LATENCY_FMT)
3193                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3194         }
3195
3196         if (ret < 0)
3197                 trace_array_put(tr);
3198
3199         return ret;
3200 }
3201
3202 /*
3203  * Some tracers are not suitable for instance buffers.
3204  * A tracer is always available for the global array (toplevel)
3205  * or if it explicitly states that it is.
3206  */
3207 static bool
3208 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3209 {
3210         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3211 }
3212
3213 /* Find the next tracer that this trace array may use */
3214 static struct tracer *
3215 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3216 {
3217         while (t && !trace_ok_for_array(t, tr))
3218                 t = t->next;
3219
3220         return t;
3221 }
3222
3223 static void *
3224 t_next(struct seq_file *m, void *v, loff_t *pos)
3225 {
3226         struct trace_array *tr = m->private;
3227         struct tracer *t = v;
3228
3229         (*pos)++;
3230
3231         if (t)
3232                 t = get_tracer_for_array(tr, t->next);
3233
3234         return t;
3235 }
3236
3237 static void *t_start(struct seq_file *m, loff_t *pos)
3238 {
3239         struct trace_array *tr = m->private;
3240         struct tracer *t;
3241         loff_t l = 0;
3242
3243         mutex_lock(&trace_types_lock);
3244
3245         t = get_tracer_for_array(tr, trace_types);
3246         for (; t && l < *pos; t = t_next(m, t, &l))
3247                         ;
3248
3249         return t;
3250 }
3251
3252 static void t_stop(struct seq_file *m, void *p)
3253 {
3254         mutex_unlock(&trace_types_lock);
3255 }
3256
3257 static int t_show(struct seq_file *m, void *v)
3258 {
3259         struct tracer *t = v;
3260
3261         if (!t)
3262                 return 0;
3263
3264         seq_printf(m, "%s", t->name);
3265         if (t->next)
3266                 seq_putc(m, ' ');
3267         else
3268                 seq_putc(m, '\n');
3269
3270         return 0;
3271 }
3272
3273 static const struct seq_operations show_traces_seq_ops = {
3274         .start          = t_start,
3275         .next           = t_next,
3276         .stop           = t_stop,
3277         .show           = t_show,
3278 };
3279
3280 static int show_traces_open(struct inode *inode, struct file *file)
3281 {
3282         struct trace_array *tr = inode->i_private;
3283         struct seq_file *m;
3284         int ret;
3285
3286         if (tracing_disabled)
3287                 return -ENODEV;
3288
3289         ret = seq_open(file, &show_traces_seq_ops);
3290         if (ret)
3291                 return ret;
3292
3293         m = file->private_data;
3294         m->private = tr;
3295
3296         return 0;
3297 }
3298
3299 static ssize_t
3300 tracing_write_stub(struct file *filp, const char __user *ubuf,
3301                    size_t count, loff_t *ppos)
3302 {
3303         return count;
3304 }
3305
3306 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3307 {
3308         int ret;
3309
3310         if (file->f_mode & FMODE_READ)
3311                 ret = seq_lseek(file, offset, whence);
3312         else
3313                 file->f_pos = ret = 0;
3314
3315         return ret;
3316 }
3317
3318 static const struct file_operations tracing_fops = {
3319         .open           = tracing_open,
3320         .read           = seq_read,
3321         .write          = tracing_write_stub,
3322         .llseek         = tracing_lseek,
3323         .release        = tracing_release,
3324 };
3325
3326 static const struct file_operations show_traces_fops = {
3327         .open           = show_traces_open,
3328         .read           = seq_read,
3329         .release        = seq_release,
3330         .llseek         = seq_lseek,
3331 };
3332
3333 /*
3334  * The tracer itself will not take this lock, but still we want
3335  * to provide a consistent cpumask to user-space:
3336  */
3337 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3338
3339 /*
3340  * Temporary storage for the character representation of the
3341  * CPU bitmask (and one more byte for the newline):
3342  */
3343 static char mask_str[NR_CPUS + 1];
3344
3345 static ssize_t
3346 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3347                      size_t count, loff_t *ppos)
3348 {
3349         struct trace_array *tr = file_inode(filp)->i_private;
3350         int len;
3351
3352         mutex_lock(&tracing_cpumask_update_lock);
3353
3354         len = cpumask_scnprintf(mask_str, count, tr->tracing_cpumask);
3355         if (count - len < 2) {
3356                 count = -EINVAL;
3357                 goto out_err;
3358         }
3359         len += sprintf(mask_str + len, "\n");
3360         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3361
3362 out_err:
3363         mutex_unlock(&tracing_cpumask_update_lock);
3364
3365         return count;
3366 }
3367
3368 static ssize_t
3369 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3370                       size_t count, loff_t *ppos)
3371 {
3372         struct trace_array *tr = file_inode(filp)->i_private;
3373         cpumask_var_t tracing_cpumask_new;
3374         int err, cpu;
3375
3376         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3377                 return -ENOMEM;
3378
3379         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3380         if (err)
3381                 goto err_unlock;
3382
3383         mutex_lock(&tracing_cpumask_update_lock);
3384
3385         local_irq_disable();
3386         arch_spin_lock(&tr->max_lock);
3387         for_each_tracing_cpu(cpu) {
3388                 /*
3389                  * Increase/decrease the disabled counter if we are
3390                  * about to flip a bit in the cpumask:
3391                  */
3392                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3393                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3394                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3395                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3396                 }
3397                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3398                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3399                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3400                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3401                 }
3402         }
3403         arch_spin_unlock(&tr->max_lock);
3404         local_irq_enable();
3405
3406         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3407
3408         mutex_unlock(&tracing_cpumask_update_lock);
3409         free_cpumask_var(tracing_cpumask_new);
3410
3411         return count;
3412
3413 err_unlock:
3414         free_cpumask_var(tracing_cpumask_new);
3415
3416         return err;
3417 }
3418
3419 static const struct file_operations tracing_cpumask_fops = {
3420         .open           = tracing_open_generic_tr,
3421         .read           = tracing_cpumask_read,
3422         .write          = tracing_cpumask_write,
3423         .release        = tracing_release_generic_tr,
3424         .llseek         = generic_file_llseek,
3425 };
3426
3427 static int tracing_trace_options_show(struct seq_file *m, void *v)
3428 {
3429         struct tracer_opt *trace_opts;
3430         struct trace_array *tr = m->private;
3431         u32 tracer_flags;
3432         int i;
3433
3434         mutex_lock(&trace_types_lock);
3435         tracer_flags = tr->current_trace->flags->val;
3436         trace_opts = tr->current_trace->flags->opts;
3437
3438         for (i = 0; trace_options[i]; i++) {
3439                 if (trace_flags & (1 << i))
3440                         seq_printf(m, "%s\n", trace_options[i]);
3441                 else
3442                         seq_printf(m, "no%s\n", trace_options[i]);
3443         }
3444
3445         for (i = 0; trace_opts[i].name; i++) {
3446                 if (tracer_flags & trace_opts[i].bit)
3447                         seq_printf(m, "%s\n", trace_opts[i].name);
3448                 else
3449                         seq_printf(m, "no%s\n", trace_opts[i].name);
3450         }
3451         mutex_unlock(&trace_types_lock);
3452
3453         return 0;
3454 }
3455
3456 static int __set_tracer_option(struct trace_array *tr,
3457                                struct tracer_flags *tracer_flags,
3458                                struct tracer_opt *opts, int neg)
3459 {
3460         struct tracer *trace = tr->current_trace;
3461         int ret;
3462
3463         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3464         if (ret)
3465                 return ret;
3466
3467         if (neg)
3468                 tracer_flags->val &= ~opts->bit;
3469         else
3470                 tracer_flags->val |= opts->bit;
3471         return 0;
3472 }
3473
3474 /* Try to assign a tracer specific option */
3475 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3476 {
3477         struct tracer *trace = tr->current_trace;
3478         struct tracer_flags *tracer_flags = trace->flags;
3479         struct tracer_opt *opts = NULL;
3480         int i;
3481
3482         for (i = 0; tracer_flags->opts[i].name; i++) {
3483                 opts = &tracer_flags->opts[i];
3484
3485                 if (strcmp(cmp, opts->name) == 0)
3486                         return __set_tracer_option(tr, trace->flags, opts, neg);
3487         }
3488
3489         return -EINVAL;
3490 }
3491
3492 /* Some tracers require overwrite to stay enabled */
3493 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3494 {
3495         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3496                 return -1;
3497
3498         return 0;
3499 }
3500
3501 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3502 {
3503         /* do nothing if flag is already set */
3504         if (!!(trace_flags & mask) == !!enabled)
3505                 return 0;
3506
3507         /* Give the tracer a chance to approve the change */
3508         if (tr->current_trace->flag_changed)
3509                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3510                         return -EINVAL;
3511
3512         if (enabled)
3513                 trace_flags |= mask;
3514         else
3515                 trace_flags &= ~mask;
3516
3517         if (mask == TRACE_ITER_RECORD_CMD)
3518                 trace_event_enable_cmd_record(enabled);
3519
3520         if (mask == TRACE_ITER_OVERWRITE) {
3521                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3522 #ifdef CONFIG_TRACER_MAX_TRACE
3523                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3524 #endif
3525         }
3526
3527         if (mask == TRACE_ITER_PRINTK)
3528                 trace_printk_start_stop_comm(enabled);
3529
3530         return 0;
3531 }
3532
3533 static int trace_set_options(struct trace_array *tr, char *option)
3534 {
3535         char *cmp;
3536         int neg = 0;
3537         int ret = -ENODEV;
3538         int i;
3539
3540         cmp = strstrip(option);
3541
3542         if (strncmp(cmp, "no", 2) == 0) {
3543                 neg = 1;
3544                 cmp += 2;
3545         }
3546
3547         mutex_lock(&trace_types_lock);
3548
3549         for (i = 0; trace_options[i]; i++) {
3550                 if (strcmp(cmp, trace_options[i]) == 0) {
3551                         ret = set_tracer_flag(tr, 1 << i, !neg);
3552                         break;
3553                 }
3554         }
3555
3556         /* If no option could be set, test the specific tracer options */
3557         if (!trace_options[i])
3558                 ret = set_tracer_option(tr, cmp, neg);
3559
3560         mutex_unlock(&trace_types_lock);
3561
3562         return ret;
3563 }
3564
3565 static ssize_t
3566 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3567                         size_t cnt, loff_t *ppos)
3568 {
3569         struct seq_file *m = filp->private_data;
3570         struct trace_array *tr = m->private;
3571         char buf[64];
3572         int ret;
3573
3574         if (cnt >= sizeof(buf))
3575                 return -EINVAL;
3576
3577         if (copy_from_user(&buf, ubuf, cnt))
3578                 return -EFAULT;
3579
3580         buf[cnt] = 0;
3581
3582         ret = trace_set_options(tr, buf);
3583         if (ret < 0)
3584                 return ret;
3585
3586         *ppos += cnt;
3587
3588         return cnt;
3589 }
3590
3591 static int tracing_trace_options_open(struct inode *inode, struct file *file)
3592 {
3593         struct trace_array *tr = inode->i_private;
3594         int ret;
3595
3596         if (tracing_disabled)
3597                 return -ENODEV;
3598
3599         if (trace_array_get(tr) < 0)
3600                 return -ENODEV;
3601
3602         ret = single_open(file, tracing_trace_options_show, inode->i_private);
3603         if (ret < 0)
3604                 trace_array_put(tr);
3605
3606         return ret;
3607 }
3608
3609 static const struct file_operations tracing_iter_fops = {
3610         .open           = tracing_trace_options_open,
3611         .read           = seq_read,
3612         .llseek         = seq_lseek,
3613         .release        = tracing_single_release_tr,
3614         .write          = tracing_trace_options_write,
3615 };
3616
3617 static const char readme_msg[] =
3618         "tracing mini-HOWTO:\n\n"
3619         "# echo 0 > tracing_on : quick way to disable tracing\n"
3620         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
3621         " Important files:\n"
3622         "  trace\t\t\t- The static contents of the buffer\n"
3623         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
3624         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
3625         "  current_tracer\t- function and latency tracers\n"
3626         "  available_tracers\t- list of configured tracers for current_tracer\n"
3627         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
3628         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
3629         "  trace_clock\t\t-change the clock used to order events\n"
3630         "       local:   Per cpu clock but may not be synced across CPUs\n"
3631         "      global:   Synced across CPUs but slows tracing down.\n"
3632         "     counter:   Not a clock, but just an increment\n"
3633         "      uptime:   Jiffy counter from time of boot\n"
3634         "        perf:   Same clock that perf events use\n"
3635 #ifdef CONFIG_X86_64
3636         "     x86-tsc:   TSC cycle counter\n"
3637 #endif
3638         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
3639         "  tracing_cpumask\t- Limit which CPUs to trace\n"
3640         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
3641         "\t\t\t  Remove sub-buffer with rmdir\n"
3642         "  trace_options\t\t- Set format or modify how tracing happens\n"
3643         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
3644         "\t\t\t  option name\n"
3645         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
3646 #ifdef CONFIG_DYNAMIC_FTRACE
3647         "\n  available_filter_functions - list of functions that can be filtered on\n"
3648         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
3649         "\t\t\t  functions\n"
3650         "\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3651         "\t     modules: Can select a group via module\n"
3652         "\t      Format: :mod:<module-name>\n"
3653         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
3654         "\t    triggers: a command to perform when function is hit\n"
3655         "\t      Format: <function>:<trigger>[:count]\n"
3656         "\t     trigger: traceon, traceoff\n"
3657         "\t\t      enable_event:<system>:<event>\n"
3658         "\t\t      disable_event:<system>:<event>\n"
3659 #ifdef CONFIG_STACKTRACE
3660         "\t\t      stacktrace\n"
3661 #endif
3662 #ifdef CONFIG_TRACER_SNAPSHOT
3663         "\t\t      snapshot\n"
3664 #endif
3665         "\t\t      dump\n"
3666         "\t\t      cpudump\n"
3667         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
3668         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
3669         "\t     The first one will disable tracing every time do_fault is hit\n"
3670         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
3671         "\t       The first time do trap is hit and it disables tracing, the\n"
3672         "\t       counter will decrement to 2. If tracing is already disabled,\n"
3673         "\t       the counter will not decrement. It only decrements when the\n"
3674         "\t       trigger did work\n"
3675         "\t     To remove trigger without count:\n"
3676         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
3677         "\t     To remove trigger with a count:\n"
3678         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
3679         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
3680         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3681         "\t    modules: Can select a group via module command :mod:\n"
3682         "\t    Does not accept triggers\n"
3683 #endif /* CONFIG_DYNAMIC_FTRACE */
3684 #ifdef CONFIG_FUNCTION_TRACER
3685         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
3686         "\t\t    (function)\n"
3687 #endif
3688 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
3689         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
3690         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
3691 #endif
3692 #ifdef CONFIG_TRACER_SNAPSHOT
3693         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
3694         "\t\t\t  snapshot buffer. Read the contents for more\n"
3695         "\t\t\t  information\n"
3696 #endif
3697 #ifdef CONFIG_STACK_TRACER
3698         "  stack_trace\t\t- Shows the max stack trace when active\n"
3699         "  stack_max_size\t- Shows current max stack size that was traced\n"
3700         "\t\t\t  Write into this file to reset the max size (trigger a\n"
3701         "\t\t\t  new trace)\n"
3702 #ifdef CONFIG_DYNAMIC_FTRACE
3703         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
3704         "\t\t\t  traces\n"
3705 #endif
3706 #endif /* CONFIG_STACK_TRACER */
3707         "  events/\t\t- Directory containing all trace event subsystems:\n"
3708         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
3709         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
3710         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
3711         "\t\t\t  events\n"
3712         "      filter\t\t- If set, only events passing filter are traced\n"
3713         "  events/<system>/<event>/\t- Directory containing control files for\n"
3714         "\t\t\t  <event>:\n"
3715         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
3716         "      filter\t\t- If set, only events passing filter are traced\n"
3717         "      trigger\t\t- If set, a command to perform when event is hit\n"
3718         "\t    Format: <trigger>[:count][if <filter>]\n"
3719         "\t   trigger: traceon, traceoff\n"
3720         "\t            enable_event:<system>:<event>\n"
3721         "\t            disable_event:<system>:<event>\n"
3722 #ifdef CONFIG_STACKTRACE
3723         "\t\t    stacktrace\n"
3724 #endif
3725 #ifdef CONFIG_TRACER_SNAPSHOT
3726         "\t\t    snapshot\n"
3727 #endif
3728         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
3729         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
3730         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
3731         "\t                  events/block/block_unplug/trigger\n"
3732         "\t   The first disables tracing every time block_unplug is hit.\n"
3733         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
3734         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
3735         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
3736         "\t   Like function triggers, the counter is only decremented if it\n"
3737         "\t    enabled or disabled tracing.\n"
3738         "\t   To remove a trigger without a count:\n"
3739         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
3740         "\t   To remove a trigger with a count:\n"
3741         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
3742         "\t   Filters can be ignored when removing a trigger.\n"
3743 ;
3744
3745 static ssize_t
3746 tracing_readme_read(struct file *filp, char __user *ubuf,
3747                        size_t cnt, loff_t *ppos)
3748 {
3749         return simple_read_from_buffer(ubuf, cnt, ppos,
3750                                         readme_msg, strlen(readme_msg));
3751 }
3752
3753 static const struct file_operations tracing_readme_fops = {
3754         .open           = tracing_open_generic,
3755         .read           = tracing_readme_read,
3756         .llseek         = generic_file_llseek,
3757 };
3758
3759 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
3760 {
3761         unsigned int *ptr = v;
3762
3763         if (*pos || m->count)
3764                 ptr++;
3765
3766         (*pos)++;
3767
3768         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
3769              ptr++) {
3770                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
3771                         continue;
3772
3773                 return ptr;
3774         }
3775
3776         return NULL;
3777 }
3778
3779 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
3780 {
3781         void *v;
3782         loff_t l = 0;
3783
3784         preempt_disable();
3785         arch_spin_lock(&trace_cmdline_lock);
3786
3787         v = &savedcmd->map_cmdline_to_pid[0];
3788         while (l <= *pos) {
3789                 v = saved_cmdlines_next(m, v, &l);
3790                 if (!v)
3791                         return NULL;
3792         }
3793
3794         return v;
3795 }
3796
3797 static void saved_cmdlines_stop(struct seq_file *m, void *v)
3798 {
3799         arch_spin_unlock(&trace_cmdline_lock);
3800         preempt_enable();
3801 }
3802
3803 static int saved_cmdlines_show(struct seq_file *m, void *v)
3804 {
3805         char buf[TASK_COMM_LEN];
3806         unsigned int *pid = v;
3807
3808         __trace_find_cmdline(*pid, buf);
3809         seq_printf(m, "%d %s\n", *pid, buf);
3810         return 0;
3811 }
3812
3813 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
3814         .start          = saved_cmdlines_start,
3815         .next           = saved_cmdlines_next,
3816         .stop           = saved_cmdlines_stop,
3817         .show           = saved_cmdlines_show,
3818 };
3819
3820 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
3821 {
3822         if (tracing_disabled)
3823                 return -ENODEV;
3824
3825         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
3826 }
3827
3828 static const struct file_operations tracing_saved_cmdlines_fops = {
3829         .open           = tracing_saved_cmdlines_open,
3830         .read           = seq_read,
3831         .llseek         = seq_lseek,
3832         .release        = seq_release,
3833 };
3834
3835 static ssize_t
3836 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
3837                                  size_t cnt, loff_t *ppos)
3838 {
3839         char buf[64];
3840         int r;
3841
3842         arch_spin_lock(&trace_cmdline_lock);
3843         r = sprintf(buf, "%u\n", savedcmd->cmdline_num);
3844         arch_spin_unlock(&trace_cmdline_lock);
3845
3846         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3847 }
3848
3849 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
3850 {
3851         kfree(s->saved_cmdlines);
3852         kfree(s->map_cmdline_to_pid);
3853         kfree(s);
3854 }
3855
3856 static int tracing_resize_saved_cmdlines(unsigned int val)
3857 {
3858         struct saved_cmdlines_buffer *s, *savedcmd_temp;
3859
3860         s = kmalloc(sizeof(struct saved_cmdlines_buffer), GFP_KERNEL);
3861         if (!s)
3862                 return -ENOMEM;
3863
3864         if (allocate_cmdlines_buffer(val, s) < 0) {
3865                 kfree(s);
3866                 return -ENOMEM;
3867         }
3868
3869         arch_spin_lock(&trace_cmdline_lock);
3870         savedcmd_temp = savedcmd;
3871         savedcmd = s;
3872         arch_spin_unlock(&trace_cmdline_lock);
3873         free_saved_cmdlines_buffer(savedcmd_temp);
3874
3875         return 0;
3876 }
3877
3878 static ssize_t
3879 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
3880                                   size_t cnt, loff_t *ppos)
3881 {
3882         unsigned long val;
3883         int ret;
3884
3885         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
3886         if (ret)
3887                 return ret;
3888
3889         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
3890         if (!val || val > PID_MAX_DEFAULT)
3891                 return -EINVAL;
3892
3893         ret = tracing_resize_saved_cmdlines((unsigned int)val);
3894         if (ret < 0)
3895                 return ret;
3896
3897         *ppos += cnt;
3898
3899         return cnt;
3900 }
3901
3902 static const struct file_operations tracing_saved_cmdlines_size_fops = {
3903         .open           = tracing_open_generic,
3904         .read           = tracing_saved_cmdlines_size_read,
3905         .write          = tracing_saved_cmdlines_size_write,
3906 };
3907
3908 static ssize_t
3909 tracing_set_trace_read(struct file *filp, char __user *ubuf,
3910                        size_t cnt, loff_t *ppos)
3911 {
3912         struct trace_array *tr = filp->private_data;
3913         char buf[MAX_TRACER_SIZE+2];
3914         int r;
3915
3916         mutex_lock(&trace_types_lock);
3917         r = sprintf(buf, "%s\n", tr->current_trace->name);
3918         mutex_unlock(&trace_types_lock);
3919
3920         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3921 }
3922
3923 int tracer_init(struct tracer *t, struct trace_array *tr)
3924 {
3925         tracing_reset_online_cpus(&tr->trace_buffer);
3926         return t->init(tr);
3927 }
3928
3929 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
3930 {
3931         int cpu;
3932
3933         for_each_tracing_cpu(cpu)
3934                 per_cpu_ptr(buf->data, cpu)->entries = val;
3935 }
3936
3937 #ifdef CONFIG_TRACER_MAX_TRACE
3938 /* resize @tr's buffer to the size of @size_tr's entries */
3939 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
3940                                         struct trace_buffer *size_buf, int cpu_id)
3941 {
3942         int cpu, ret = 0;
3943
3944         if (cpu_id == RING_BUFFER_ALL_CPUS) {
3945                 for_each_tracing_cpu(cpu) {
3946                         ret = ring_buffer_resize(trace_buf->buffer,
3947                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
3948                         if (ret < 0)
3949                                 break;
3950                         per_cpu_ptr(trace_buf->data, cpu)->entries =
3951                                 per_cpu_ptr(size_buf->data, cpu)->entries;
3952                 }
3953         } else {
3954                 ret = ring_buffer_resize(trace_buf->buffer,
3955                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
3956                 if (ret == 0)
3957                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
3958                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
3959         }
3960
3961         return ret;
3962 }
3963 #endif /* CONFIG_TRACER_MAX_TRACE */
3964
3965 static int __tracing_resize_ring_buffer(struct trace_array *tr,
3966                                         unsigned long size, int cpu)
3967 {
3968         int ret;
3969
3970         /*
3971          * If kernel or user changes the size of the ring buffer
3972          * we use the size that was given, and we can forget about
3973          * expanding it later.
3974          */
3975         ring_buffer_expanded = true;
3976
3977         /* May be called before buffers are initialized */
3978         if (!tr->trace_buffer.buffer)
3979                 return 0;
3980
3981         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
3982         if (ret < 0)
3983                 return ret;
3984
3985 #ifdef CONFIG_TRACER_MAX_TRACE
3986         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
3987             !tr->current_trace->use_max_tr)
3988                 goto out;
3989
3990         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
3991         if (ret < 0) {
3992                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
3993                                                      &tr->trace_buffer, cpu);
3994                 if (r < 0) {
3995                         /*
3996                          * AARGH! We are left with different
3997                          * size max buffer!!!!
3998                          * The max buffer is our "snapshot" buffer.
3999                          * When a tracer needs a snapshot (one of the
4000                          * latency tracers), it swaps the max buffer
4001                          * with the saved snap shot. We succeeded to
4002                          * update the size of the main buffer, but failed to
4003                          * update the size of the max buffer. But when we tried
4004                          * to reset the main buffer to the original size, we
4005                          * failed there too. This is very unlikely to
4006                          * happen, but if it does, warn and kill all
4007                          * tracing.
4008                          */
4009                         WARN_ON(1);
4010                         tracing_disabled = 1;
4011                 }
4012                 return ret;
4013         }
4014
4015         if (cpu == RING_BUFFER_ALL_CPUS)
4016                 set_buffer_entries(&tr->max_buffer, size);
4017         else
4018                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4019
4020  out:
4021 #endif /* CONFIG_TRACER_MAX_TRACE */
4022
4023         if (cpu == RING_BUFFER_ALL_CPUS)
4024                 set_buffer_entries(&tr->trace_buffer, size);
4025         else
4026                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4027
4028         return ret;
4029 }
4030
4031 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4032                                           unsigned long size, int cpu_id)
4033 {
4034         int ret = size;
4035
4036         mutex_lock(&trace_types_lock);
4037
4038         if (cpu_id != RING_BUFFER_ALL_CPUS) {
4039                 /* make sure, this cpu is enabled in the mask */
4040                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4041                         ret = -EINVAL;
4042                         goto out;
4043                 }
4044         }
4045
4046         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4047         if (ret < 0)
4048                 ret = -ENOMEM;
4049
4050 out:
4051         mutex_unlock(&trace_types_lock);
4052
4053         return ret;
4054 }
4055
4056
4057 /**
4058  * tracing_update_buffers - used by tracing facility to expand ring buffers
4059  *
4060  * To save on memory when the tracing is never used on a system with it
4061  * configured in. The ring buffers are set to a minimum size. But once
4062  * a user starts to use the tracing facility, then they need to grow
4063  * to their default size.
4064  *
4065  * This function is to be called when a tracer is about to be used.
4066  */
4067 int tracing_update_buffers(void)
4068 {
4069         int ret = 0;
4070
4071         mutex_lock(&trace_types_lock);
4072         if (!ring_buffer_expanded)
4073                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4074                                                 RING_BUFFER_ALL_CPUS);
4075         mutex_unlock(&trace_types_lock);
4076
4077         return ret;
4078 }
4079
4080 struct trace_option_dentry;
4081
4082 static struct trace_option_dentry *
4083 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4084
4085 static void
4086 destroy_trace_option_files(struct trace_option_dentry *topts);
4087
4088 /*
4089  * Used to clear out the tracer before deletion of an instance.
4090  * Must have trace_types_lock held.
4091  */
4092 static void tracing_set_nop(struct trace_array *tr)
4093 {
4094         if (tr->current_trace == &nop_trace)
4095                 return;
4096         
4097         tr->current_trace->enabled--;
4098
4099         if (tr->current_trace->reset)
4100                 tr->current_trace->reset(tr);
4101
4102         tr->current_trace = &nop_trace;
4103 }
4104
4105 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
4106 {
4107         static struct trace_option_dentry *topts;
4108         struct tracer *t;
4109 #ifdef CONFIG_TRACER_MAX_TRACE
4110         bool had_max_tr;
4111 #endif
4112         int ret = 0;
4113
4114         mutex_lock(&trace_types_lock);
4115
4116         if (!ring_buffer_expanded) {
4117                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
4118                                                 RING_BUFFER_ALL_CPUS);
4119                 if (ret < 0)
4120                         goto out;
4121                 ret = 0;
4122         }
4123
4124         for (t = trace_types; t; t = t->next) {
4125                 if (strcmp(t->name, buf) == 0)
4126                         break;
4127         }
4128         if (!t) {
4129                 ret = -EINVAL;
4130                 goto out;
4131         }
4132         if (t == tr->current_trace)
4133                 goto out;
4134
4135         /* Some tracers are only allowed for the top level buffer */
4136         if (!trace_ok_for_array(t, tr)) {
4137                 ret = -EINVAL;
4138                 goto out;
4139         }
4140
4141         trace_branch_disable();
4142
4143         tr->current_trace->enabled--;
4144
4145         if (tr->current_trace->reset)
4146                 tr->current_trace->reset(tr);
4147
4148         /* Current trace needs to be nop_trace before synchronize_sched */
4149         tr->current_trace = &nop_trace;
4150
4151 #ifdef CONFIG_TRACER_MAX_TRACE
4152         had_max_tr = tr->allocated_snapshot;
4153
4154         if (had_max_tr && !t->use_max_tr) {
4155                 /*
4156                  * We need to make sure that the update_max_tr sees that
4157                  * current_trace changed to nop_trace to keep it from
4158                  * swapping the buffers after we resize it.
4159                  * The update_max_tr is called from interrupts disabled
4160                  * so a synchronized_sched() is sufficient.
4161                  */
4162                 synchronize_sched();
4163                 free_snapshot(tr);
4164         }
4165 #endif
4166         /* Currently, only the top instance has options */
4167         if (tr->flags & TRACE_ARRAY_FL_GLOBAL) {
4168                 destroy_trace_option_files(topts);
4169                 topts = create_trace_option_files(tr, t);
4170         }
4171
4172 #ifdef CONFIG_TRACER_MAX_TRACE
4173         if (t->use_max_tr && !had_max_tr) {
4174                 ret = alloc_snapshot(tr);
4175                 if (ret < 0)
4176                         goto out;
4177         }
4178 #endif
4179
4180         if (t->init) {
4181                 ret = tracer_init(t, tr);
4182                 if (ret)
4183                         goto out;
4184         }
4185
4186         tr->current_trace = t;
4187         tr->current_trace->enabled++;
4188         trace_branch_enable(tr);
4189  out:
4190         mutex_unlock(&trace_types_lock);
4191
4192         return ret;
4193 }
4194
4195 static ssize_t
4196 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
4197                         size_t cnt, loff_t *ppos)
4198 {
4199         struct trace_array *tr = filp->private_data;
4200         char buf[MAX_TRACER_SIZE+1];
4201         int i;
4202         size_t ret;
4203         int err;
4204
4205         ret = cnt;
4206
4207         if (cnt > MAX_TRACER_SIZE)
4208                 cnt = MAX_TRACER_SIZE;
4209
4210         if (copy_from_user(&buf, ubuf, cnt))
4211                 return -EFAULT;
4212
4213         buf[cnt] = 0;
4214
4215         /* strip ending whitespace. */
4216         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4217                 buf[i] = 0;
4218
4219         err = tracing_set_tracer(tr, buf);
4220         if (err)
4221                 return err;
4222
4223         *ppos += ret;
4224
4225         return ret;
4226 }
4227
4228 static ssize_t
4229 tracing_max_lat_read(struct file *filp, char __user *ubuf,
4230                      size_t cnt, loff_t *ppos)
4231 {
4232         unsigned long *ptr = filp->private_data;
4233         char buf[64];
4234         int r;
4235
4236         r = snprintf(buf, sizeof(buf), "%ld\n",
4237                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4238         if (r > sizeof(buf))
4239                 r = sizeof(buf);
4240         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4241 }
4242
4243 static ssize_t
4244 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4245                       size_t cnt, loff_t *ppos)
4246 {
4247         unsigned long *ptr = filp->private_data;
4248         unsigned long val;
4249         int ret;
4250
4251         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4252         if (ret)
4253                 return ret;
4254
4255         *ptr = val * 1000;
4256
4257         return cnt;
4258 }
4259
4260 static int tracing_open_pipe(struct inode *inode, struct file *filp)
4261 {
4262         struct trace_array *tr = inode->i_private;
4263         struct trace_iterator *iter;
4264         int ret = 0;
4265
4266         if (tracing_disabled)
4267                 return -ENODEV;
4268
4269         if (trace_array_get(tr) < 0)
4270                 return -ENODEV;
4271
4272         mutex_lock(&trace_types_lock);
4273
4274         /* create a buffer to store the information to pass to userspace */
4275         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4276         if (!iter) {
4277                 ret = -ENOMEM;
4278                 __trace_array_put(tr);
4279                 goto out;
4280         }
4281
4282         /*
4283          * We make a copy of the current tracer to avoid concurrent
4284          * changes on it while we are reading.
4285          */
4286         iter->trace = kmalloc(sizeof(*iter->trace), GFP_KERNEL);
4287         if (!iter->trace) {
4288                 ret = -ENOMEM;
4289                 goto fail;
4290         }
4291         *iter->trace = *tr->current_trace;
4292
4293         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
4294                 ret = -ENOMEM;
4295                 goto fail;
4296         }
4297
4298         /* trace pipe does not show start of buffer */
4299         cpumask_setall(iter->started);
4300
4301         if (trace_flags & TRACE_ITER_LATENCY_FMT)
4302                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4303
4304         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4305         if (trace_clocks[tr->clock_id].in_ns)
4306                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4307
4308         iter->tr = tr;
4309         iter->trace_buffer = &tr->trace_buffer;
4310         iter->cpu_file = tracing_get_cpu(inode);
4311         mutex_init(&iter->mutex);
4312         filp->private_data = iter;
4313
4314         if (iter->trace->pipe_open)
4315                 iter->trace->pipe_open(iter);
4316
4317         nonseekable_open(inode, filp);
4318 out:
4319         mutex_unlock(&trace_types_lock);
4320         return ret;
4321
4322 fail:
4323         kfree(iter->trace);
4324         kfree(iter);
4325         __trace_array_put(tr);
4326         mutex_unlock(&trace_types_lock);
4327         return ret;
4328 }
4329
4330 static int tracing_release_pipe(struct inode *inode, struct file *file)
4331 {
4332         struct trace_iterator *iter = file->private_data;
4333         struct trace_array *tr = inode->i_private;
4334
4335         mutex_lock(&trace_types_lock);
4336
4337         if (iter->trace->pipe_close)
4338                 iter->trace->pipe_close(iter);
4339
4340         mutex_unlock(&trace_types_lock);
4341
4342         free_cpumask_var(iter->started);
4343         mutex_destroy(&iter->mutex);
4344         kfree(iter->trace);
4345         kfree(iter);
4346
4347         trace_array_put(tr);
4348
4349         return 0;
4350 }
4351
4352 static unsigned int
4353 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
4354 {
4355         /* Iterators are static, they should be filled or empty */
4356         if (trace_buffer_iter(iter, iter->cpu_file))
4357                 return POLLIN | POLLRDNORM;
4358
4359         if (trace_flags & TRACE_ITER_BLOCK)
4360                 /*
4361                  * Always select as readable when in blocking mode
4362                  */
4363                 return POLLIN | POLLRDNORM;
4364         else
4365                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
4366                                              filp, poll_table);
4367 }
4368
4369 static unsigned int
4370 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
4371 {
4372         struct trace_iterator *iter = filp->private_data;
4373
4374         return trace_poll(iter, filp, poll_table);
4375 }
4376
4377 /* Must be called with trace_types_lock mutex held. */
4378 static int tracing_wait_pipe(struct file *filp)
4379 {
4380         struct trace_iterator *iter = filp->private_data;
4381
4382         while (trace_empty(iter)) {
4383
4384                 if ((filp->f_flags & O_NONBLOCK)) {
4385                         return -EAGAIN;
4386                 }
4387
4388                 /*
4389                  * We block until we read something and tracing is disabled.
4390                  * We still block if tracing is disabled, but we have never
4391                  * read anything. This allows a user to cat this file, and
4392                  * then enable tracing. But after we have read something,
4393                  * we give an EOF when tracing is again disabled.
4394                  *
4395                  * iter->pos will be 0 if we haven't read anything.
4396                  */
4397                 if (!tracing_is_on() && iter->pos)
4398                         break;
4399
4400                 mutex_unlock(&iter->mutex);
4401
4402                 wait_on_pipe(iter);
4403
4404                 mutex_lock(&iter->mutex);
4405
4406                 if (signal_pending(current))
4407                         return -EINTR;
4408         }
4409
4410         return 1;
4411 }
4412
4413 /*
4414  * Consumer reader.
4415  */
4416 static ssize_t
4417 tracing_read_pipe(struct file *filp, char __user *ubuf,
4418                   size_t cnt, loff_t *ppos)
4419 {
4420         struct trace_iterator *iter = filp->private_data;
4421         struct trace_array *tr = iter->tr;
4422         ssize_t sret;
4423
4424         /* return any leftover data */
4425         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4426         if (sret != -EBUSY)
4427                 return sret;
4428
4429         trace_seq_init(&iter->seq);
4430
4431         /* copy the tracer to avoid using a global lock all around */
4432         mutex_lock(&trace_types_lock);
4433         if (unlikely(iter->trace->name != tr->current_trace->name))
4434                 *iter->trace = *tr->current_trace;
4435         mutex_unlock(&trace_types_lock);
4436
4437         /*
4438          * Avoid more than one consumer on a single file descriptor
4439          * This is just a matter of traces coherency, the ring buffer itself
4440          * is protected.
4441          */
4442         mutex_lock(&iter->mutex);
4443         if (iter->trace->read) {
4444                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
4445                 if (sret)
4446                         goto out;
4447         }
4448
4449 waitagain:
4450         sret = tracing_wait_pipe(filp);
4451         if (sret <= 0)
4452                 goto out;
4453
4454         /* stop when tracing is finished */
4455         if (trace_empty(iter)) {
4456                 sret = 0;
4457                 goto out;
4458         }
4459
4460         if (cnt >= PAGE_SIZE)
4461                 cnt = PAGE_SIZE - 1;
4462
4463         /* reset all but tr, trace, and overruns */
4464         memset(&iter->seq, 0,
4465                sizeof(struct trace_iterator) -
4466                offsetof(struct trace_iterator, seq));
4467         cpumask_clear(iter->started);
4468         iter->pos = -1;
4469
4470         trace_event_read_lock();
4471         trace_access_lock(iter->cpu_file);
4472         while (trace_find_next_entry_inc(iter) != NULL) {
4473                 enum print_line_t ret;
4474                 int len = iter->seq.len;
4475
4476                 ret = print_trace_line(iter);
4477                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4478                         /* don't print partial lines */
4479                         iter->seq.len = len;
4480                         break;
4481                 }
4482                 if (ret != TRACE_TYPE_NO_CONSUME)
4483                         trace_consume(iter);
4484
4485                 if (iter->seq.len >= cnt)
4486                         break;
4487
4488                 /*
4489                  * Setting the full flag means we reached the trace_seq buffer
4490                  * size and we should leave by partial output condition above.
4491                  * One of the trace_seq_* functions is not used properly.
4492                  */
4493                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
4494                           iter->ent->type);
4495         }
4496         trace_access_unlock(iter->cpu_file);
4497         trace_event_read_unlock();
4498
4499         /* Now copy what we have to the user */
4500         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4501         if (iter->seq.readpos >= iter->seq.len)
4502                 trace_seq_init(&iter->seq);
4503
4504         /*
4505          * If there was nothing to send to user, in spite of consuming trace
4506          * entries, go back to wait for more entries.
4507          */
4508         if (sret == -EBUSY)
4509                 goto waitagain;
4510
4511 out:
4512         mutex_unlock(&iter->mutex);
4513
4514         return sret;
4515 }
4516
4517 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
4518                                      unsigned int idx)
4519 {
4520         __free_page(spd->pages[idx]);
4521 }
4522
4523 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
4524         .can_merge              = 0,
4525         .confirm                = generic_pipe_buf_confirm,
4526         .release                = generic_pipe_buf_release,
4527         .steal                  = generic_pipe_buf_steal,
4528         .get                    = generic_pipe_buf_get,
4529 };
4530
4531 static size_t
4532 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
4533 {
4534         size_t count;
4535         int ret;
4536
4537         /* Seq buffer is page-sized, exactly what we need. */
4538         for (;;) {
4539                 count = iter->seq.len;
4540                 ret = print_trace_line(iter);
4541                 count = iter->seq.len - count;
4542                 if (rem < count) {
4543                         rem = 0;
4544                         iter->seq.len -= count;
4545                         break;
4546                 }
4547                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4548                         iter->seq.len -= count;
4549                         break;
4550                 }
4551
4552                 if (ret != TRACE_TYPE_NO_CONSUME)
4553                         trace_consume(iter);
4554                 rem -= count;
4555                 if (!trace_find_next_entry_inc(iter))   {
4556                         rem = 0;
4557                         iter->ent = NULL;
4558                         break;
4559                 }
4560         }
4561
4562         return rem;
4563 }
4564
4565 static ssize_t tracing_splice_read_pipe(struct file *filp,
4566                                         loff_t *ppos,
4567                                         struct pipe_inode_info *pipe,
4568                                         size_t len,
4569                                         unsigned int flags)
4570 {
4571         struct page *pages_def[PIPE_DEF_BUFFERS];
4572         struct partial_page partial_def[PIPE_DEF_BUFFERS];
4573         struct trace_iterator *iter = filp->private_data;
4574         struct splice_pipe_desc spd = {
4575                 .pages          = pages_def,
4576                 .partial        = partial_def,
4577                 .nr_pages       = 0, /* This gets updated below. */
4578                 .nr_pages_max   = PIPE_DEF_BUFFERS,
4579                 .flags          = flags,
4580                 .ops            = &tracing_pipe_buf_ops,
4581                 .spd_release    = tracing_spd_release_pipe,
4582         };
4583         struct trace_array *tr = iter->tr;
4584         ssize_t ret;
4585         size_t rem;
4586         unsigned int i;
4587
4588         if (splice_grow_spd(pipe, &spd))
4589                 return -ENOMEM;
4590
4591         /* copy the tracer to avoid using a global lock all around */
4592         mutex_lock(&trace_types_lock);
4593         if (unlikely(iter->trace->name != tr->current_trace->name))
4594                 *iter->trace = *tr->current_trace;
4595         mutex_unlock(&trace_types_lock);
4596
4597         mutex_lock(&iter->mutex);
4598
4599         if (iter->trace->splice_read) {
4600                 ret = iter->trace->splice_read(iter, filp,
4601                                                ppos, pipe, len, flags);
4602                 if (ret)
4603                         goto out_err;
4604         }
4605
4606         ret = tracing_wait_pipe(filp);
4607         if (ret <= 0)
4608                 goto out_err;
4609
4610         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
4611                 ret = -EFAULT;
4612                 goto out_err;
4613         }
4614
4615         trace_event_read_lock();
4616         trace_access_lock(iter->cpu_file);
4617
4618         /* Fill as many pages as possible. */
4619         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
4620                 spd.pages[i] = alloc_page(GFP_KERNEL);
4621                 if (!spd.pages[i])
4622                         break;
4623
4624                 rem = tracing_fill_pipe_page(rem, iter);
4625
4626                 /* Copy the data into the page, so we can start over. */
4627                 ret = trace_seq_to_buffer(&iter->seq,
4628                                           page_address(spd.pages[i]),
4629                                           iter->seq.len);
4630                 if (ret < 0) {
4631                         __free_page(spd.pages[i]);
4632                         break;
4633                 }
4634                 spd.partial[i].offset = 0;
4635                 spd.partial[i].len = iter->seq.len;
4636
4637                 trace_seq_init(&iter->seq);
4638         }
4639
4640         trace_access_unlock(iter->cpu_file);
4641         trace_event_read_unlock();
4642         mutex_unlock(&iter->mutex);
4643
4644         spd.nr_pages = i;
4645
4646         ret = splice_to_pipe(pipe, &spd);
4647 out:
4648         splice_shrink_spd(&spd);
4649         return ret;
4650
4651 out_err:
4652         mutex_unlock(&iter->mutex);
4653         goto out;
4654 }
4655
4656 static ssize_t
4657 tracing_entries_read(struct file *filp, char __user *ubuf,
4658                      size_t cnt, loff_t *ppos)
4659 {
4660         struct inode *inode = file_inode(filp);
4661         struct trace_array *tr = inode->i_private;
4662         int cpu = tracing_get_cpu(inode);
4663         char buf[64];
4664         int r = 0;
4665         ssize_t ret;
4666
4667         mutex_lock(&trace_types_lock);
4668
4669         if (cpu == RING_BUFFER_ALL_CPUS) {
4670                 int cpu, buf_size_same;
4671                 unsigned long size;
4672
4673                 size = 0;
4674                 buf_size_same = 1;
4675                 /* check if all cpu sizes are same */
4676                 for_each_tracing_cpu(cpu) {
4677                         /* fill in the size from first enabled cpu */
4678                         if (size == 0)
4679                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
4680                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
4681                                 buf_size_same = 0;
4682                                 break;
4683                         }
4684                 }
4685
4686                 if (buf_size_same) {
4687                         if (!ring_buffer_expanded)
4688                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
4689                                             size >> 10,
4690                                             trace_buf_size >> 10);
4691                         else
4692                                 r = sprintf(buf, "%lu\n", size >> 10);
4693                 } else
4694                         r = sprintf(buf, "X\n");
4695         } else
4696                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
4697
4698         mutex_unlock(&trace_types_lock);
4699
4700         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4701         return ret;
4702 }
4703
4704 static ssize_t
4705 tracing_entries_write(struct file *filp, const char __user *ubuf,
4706                       size_t cnt, loff_t *ppos)
4707 {
4708         struct inode *inode = file_inode(filp);
4709         struct trace_array *tr = inode->i_private;
4710         unsigned long val;
4711         int ret;
4712
4713         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4714         if (ret)
4715                 return ret;
4716
4717         /* must have at least 1 entry */
4718         if (!val)
4719                 return -EINVAL;
4720
4721         /* value is in KB */
4722         val <<= 10;
4723         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
4724         if (ret < 0)
4725                 return ret;
4726
4727         *ppos += cnt;
4728
4729         return cnt;
4730 }
4731
4732 static ssize_t
4733 tracing_total_entries_read(struct file *filp, char __user *ubuf,
4734                                 size_t cnt, loff_t *ppos)
4735 {
4736         struct trace_array *tr = filp->private_data;
4737         char buf[64];
4738         int r, cpu;
4739         unsigned long size = 0, expanded_size = 0;
4740
4741         mutex_lock(&trace_types_lock);
4742         for_each_tracing_cpu(cpu) {
4743                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
4744                 if (!ring_buffer_expanded)
4745                         expanded_size += trace_buf_size >> 10;
4746         }
4747         if (ring_buffer_expanded)
4748                 r = sprintf(buf, "%lu\n", size);
4749         else
4750                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
4751         mutex_unlock(&trace_types_lock);
4752
4753         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4754 }
4755
4756 static ssize_t
4757 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
4758                           size_t cnt, loff_t *ppos)
4759 {
4760         /*
4761          * There is no need to read what the user has written, this function
4762          * is just to make sure that there is no error when "echo" is used
4763          */
4764
4765         *ppos += cnt;
4766
4767         return cnt;
4768 }
4769
4770 static int
4771 tracing_free_buffer_release(struct inode *inode, struct file *filp)
4772 {
4773         struct trace_array *tr = inode->i_private;
4774
4775         /* disable tracing ? */
4776         if (trace_flags & TRACE_ITER_STOP_ON_FREE)
4777                 tracer_tracing_off(tr);
4778         /* resize the ring buffer to 0 */
4779         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
4780
4781         trace_array_put(tr);
4782
4783         return 0;
4784 }
4785
4786 static ssize_t
4787 tracing_mark_write(struct file *filp, const char __user *ubuf,
4788                                         size_t cnt, loff_t *fpos)
4789 {
4790         unsigned long addr = (unsigned long)ubuf;
4791         struct trace_array *tr = filp->private_data;
4792         struct ring_buffer_event *event;
4793         struct ring_buffer *buffer;
4794         struct print_entry *entry;
4795         unsigned long irq_flags;
4796         struct page *pages[2];
4797         void *map_page[2];
4798         int nr_pages = 1;
4799         ssize_t written;
4800         int offset;
4801         int size;
4802         int len;
4803         int ret;
4804         int i;
4805
4806         if (tracing_disabled)
4807                 return -EINVAL;
4808
4809         if (!(trace_flags & TRACE_ITER_MARKERS))
4810                 return -EINVAL;
4811
4812         if (cnt > TRACE_BUF_SIZE)
4813                 cnt = TRACE_BUF_SIZE;
4814
4815         /*
4816          * Userspace is injecting traces into the kernel trace buffer.
4817          * We want to be as non intrusive as possible.
4818          * To do so, we do not want to allocate any special buffers
4819          * or take any locks, but instead write the userspace data
4820          * straight into the ring buffer.
4821          *
4822          * First we need to pin the userspace buffer into memory,
4823          * which, most likely it is, because it just referenced it.
4824          * But there's no guarantee that it is. By using get_user_pages_fast()
4825          * and kmap_atomic/kunmap_atomic() we can get access to the
4826          * pages directly. We then write the data directly into the
4827          * ring buffer.
4828          */
4829         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
4830
4831         /* check if we cross pages */
4832         if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
4833                 nr_pages = 2;
4834
4835         offset = addr & (PAGE_SIZE - 1);
4836         addr &= PAGE_MASK;
4837
4838         ret = get_user_pages_fast(addr, nr_pages, 0, pages);
4839         if (ret < nr_pages) {
4840                 while (--ret >= 0)
4841                         put_page(pages[ret]);
4842                 written = -EFAULT;
4843                 goto out;
4844         }
4845
4846         for (i = 0; i < nr_pages; i++)
4847                 map_page[i] = kmap_atomic(pages[i]);
4848
4849         local_save_flags(irq_flags);
4850         size = sizeof(*entry) + cnt + 2; /* possible \n added */
4851         buffer = tr->trace_buffer.buffer;
4852         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
4853                                           irq_flags, preempt_count());
4854         if (!event) {
4855                 /* Ring buffer disabled, return as if not open for write */
4856                 written = -EBADF;
4857                 goto out_unlock;
4858         }
4859
4860         entry = ring_buffer_event_data(event);
4861         entry->ip = _THIS_IP_;
4862
4863         if (nr_pages == 2) {
4864                 len = PAGE_SIZE - offset;
4865                 memcpy(&entry->buf, map_page[0] + offset, len);
4866                 memcpy(&entry->buf[len], map_page[1], cnt - len);
4867         } else
4868                 memcpy(&entry->buf, map_page[0] + offset, cnt);
4869
4870         if (entry->buf[cnt - 1] != '\n') {
4871                 entry->buf[cnt] = '\n';
4872                 entry->buf[cnt + 1] = '\0';
4873         } else
4874                 entry->buf[cnt] = '\0';
4875
4876         __buffer_unlock_commit(buffer, event);
4877
4878         written = cnt;
4879
4880         *fpos += written;
4881
4882  out_unlock:
4883         for (i = 0; i < nr_pages; i++){
4884                 kunmap_atomic(map_page[i]);
4885                 put_page(pages[i]);
4886         }
4887  out:
4888         return written;
4889 }
4890
4891 static int tracing_clock_show(struct seq_file *m, void *v)
4892 {
4893         struct trace_array *tr = m->private;
4894         int i;
4895
4896         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
4897                 seq_printf(m,
4898                         "%s%s%s%s", i ? " " : "",
4899                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
4900                         i == tr->clock_id ? "]" : "");
4901         seq_putc(m, '\n');
4902
4903         return 0;
4904 }
4905
4906 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
4907 {
4908         int i;
4909
4910         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
4911                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
4912                         break;
4913         }
4914         if (i == ARRAY_SIZE(trace_clocks))
4915                 return -EINVAL;
4916
4917         mutex_lock(&trace_types_lock);
4918
4919         tr->clock_id = i;
4920
4921         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
4922
4923         /*
4924          * New clock may not be consistent with the previous clock.
4925          * Reset the buffer so that it doesn't have incomparable timestamps.
4926          */
4927         tracing_reset_online_cpus(&tr->trace_buffer);
4928
4929 #ifdef CONFIG_TRACER_MAX_TRACE
4930         if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
4931                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
4932         tracing_reset_online_cpus(&tr->max_buffer);
4933 #endif
4934
4935         mutex_unlock(&trace_types_lock);
4936
4937         return 0;
4938 }
4939
4940 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
4941                                    size_t cnt, loff_t *fpos)
4942 {
4943         struct seq_file *m = filp->private_data;
4944         struct trace_array *tr = m->private;
4945         char buf[64];
4946         const char *clockstr;
4947         int ret;
4948
4949         if (cnt >= sizeof(buf))
4950                 return -EINVAL;
4951
4952         if (copy_from_user(&buf, ubuf, cnt))
4953                 return -EFAULT;
4954
4955         buf[cnt] = 0;
4956
4957         clockstr = strstrip(buf);
4958
4959         ret = tracing_set_clock(tr, clockstr);
4960         if (ret)
4961                 return ret;
4962
4963         *fpos += cnt;
4964
4965         return cnt;
4966 }
4967
4968 static int tracing_clock_open(struct inode *inode, struct file *file)
4969 {
4970         struct trace_array *tr = inode->i_private;
4971         int ret;
4972
4973         if (tracing_disabled)
4974                 return -ENODEV;
4975
4976         if (trace_array_get(tr))
4977                 return -ENODEV;
4978
4979         ret = single_open(file, tracing_clock_show, inode->i_private);
4980         if (ret < 0)
4981                 trace_array_put(tr);
4982
4983         return ret;
4984 }
4985
4986 struct ftrace_buffer_info {
4987         struct trace_iterator   iter;
4988         void                    *spare;
4989         unsigned int            read;
4990 };
4991
4992 #ifdef CONFIG_TRACER_SNAPSHOT
4993 static int tracing_snapshot_open(struct inode *inode, struct file *file)
4994 {
4995         struct trace_array *tr = inode->i_private;
4996         struct trace_iterator *iter;
4997         struct seq_file *m;
4998         int ret = 0;
4999
5000         if (trace_array_get(tr) < 0)
5001                 return -ENODEV;
5002
5003         if (file->f_mode & FMODE_READ) {
5004                 iter = __tracing_open(inode, file, true);
5005                 if (IS_ERR(iter))
5006                         ret = PTR_ERR(iter);
5007         } else {
5008                 /* Writes still need the seq_file to hold the private data */
5009                 ret = -ENOMEM;
5010                 m = kzalloc(sizeof(*m), GFP_KERNEL);
5011                 if (!m)
5012                         goto out;
5013                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5014                 if (!iter) {
5015                         kfree(m);
5016                         goto out;
5017                 }
5018                 ret = 0;
5019
5020                 iter->tr = tr;
5021                 iter->trace_buffer = &tr->max_buffer;
5022                 iter->cpu_file = tracing_get_cpu(inode);
5023                 m->private = iter;
5024                 file->private_data = m;
5025         }
5026 out:
5027         if (ret < 0)
5028                 trace_array_put(tr);
5029
5030         return ret;
5031 }
5032
5033 static ssize_t
5034 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
5035                        loff_t *ppos)
5036 {
5037         struct seq_file *m = filp->private_data;
5038         struct trace_iterator *iter = m->private;
5039         struct trace_array *tr = iter->tr;
5040         unsigned long val;
5041         int ret;
5042
5043         ret = tracing_update_buffers();
5044         if (ret < 0)
5045                 return ret;
5046
5047         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5048         if (ret)
5049                 return ret;
5050
5051         mutex_lock(&trace_types_lock);
5052
5053         if (tr->current_trace->use_max_tr) {
5054                 ret = -EBUSY;
5055                 goto out;
5056         }
5057
5058         switch (val) {
5059         case 0:
5060                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5061                         ret = -EINVAL;
5062                         break;
5063                 }
5064                 if (tr->allocated_snapshot)
5065                         free_snapshot(tr);
5066                 break;
5067         case 1:
5068 /* Only allow per-cpu swap if the ring buffer supports it */
5069 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
5070                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5071                         ret = -EINVAL;
5072                         break;
5073                 }
5074 #endif
5075                 if (!tr->allocated_snapshot) {
5076                         ret = alloc_snapshot(tr);
5077                         if (ret < 0)
5078                                 break;
5079                 }
5080                 local_irq_disable();
5081                 /* Now, we're going to swap */
5082                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5083                         update_max_tr(tr, current, smp_processor_id());
5084                 else
5085                         update_max_tr_single(tr, current, iter->cpu_file);
5086                 local_irq_enable();
5087                 break;
5088         default:
5089                 if (tr->allocated_snapshot) {
5090                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5091                                 tracing_reset_online_cpus(&tr->max_buffer);
5092                         else
5093                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
5094                 }
5095                 break;
5096         }
5097
5098         if (ret >= 0) {
5099                 *ppos += cnt;
5100                 ret = cnt;
5101         }
5102 out:
5103         mutex_unlock(&trace_types_lock);
5104         return ret;
5105 }
5106
5107 static int tracing_snapshot_release(struct inode *inode, struct file *file)
5108 {
5109         struct seq_file *m = file->private_data;
5110         int ret;
5111
5112         ret = tracing_release(inode, file);
5113
5114         if (file->f_mode & FMODE_READ)
5115                 return ret;
5116
5117         /* If write only, the seq_file is just a stub */
5118         if (m)
5119                 kfree(m->private);
5120         kfree(m);
5121
5122         return 0;
5123 }
5124
5125 static int tracing_buffers_open(struct inode *inode, struct file *filp);
5126 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
5127                                     size_t count, loff_t *ppos);
5128 static int tracing_buffers_release(struct inode *inode, struct file *file);
5129 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5130                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
5131
5132 static int snapshot_raw_open(struct inode *inode, struct file *filp)
5133 {
5134         struct ftrace_buffer_info *info;
5135         int ret;
5136
5137         ret = tracing_buffers_open(inode, filp);
5138         if (ret < 0)
5139                 return ret;
5140
5141         info = filp->private_data;
5142
5143         if (info->iter.trace->use_max_tr) {
5144                 tracing_buffers_release(inode, filp);
5145                 return -EBUSY;
5146         }
5147
5148         info->iter.snapshot = true;
5149         info->iter.trace_buffer = &info->iter.tr->max_buffer;
5150
5151         return ret;
5152 }
5153
5154 #endif /* CONFIG_TRACER_SNAPSHOT */
5155
5156
5157 static const struct file_operations tracing_max_lat_fops = {
5158         .open           = tracing_open_generic,
5159         .read           = tracing_max_lat_read,
5160         .write          = tracing_max_lat_write,
5161         .llseek         = generic_file_llseek,
5162 };
5163
5164 static const struct file_operations set_tracer_fops = {
5165         .open           = tracing_open_generic,
5166         .read           = tracing_set_trace_read,
5167         .write          = tracing_set_trace_write,
5168         .llseek         = generic_file_llseek,
5169 };
5170
5171 static const struct file_operations tracing_pipe_fops = {
5172         .open           = tracing_open_pipe,
5173         .poll           = tracing_poll_pipe,
5174         .read           = tracing_read_pipe,
5175         .splice_read    = tracing_splice_read_pipe,
5176         .release        = tracing_release_pipe,
5177         .llseek         = no_llseek,
5178 };
5179
5180 static const struct file_operations tracing_entries_fops = {
5181         .open           = tracing_open_generic_tr,
5182         .read           = tracing_entries_read,
5183         .write          = tracing_entries_write,
5184         .llseek         = generic_file_llseek,
5185         .release        = tracing_release_generic_tr,
5186 };
5187
5188 static const struct file_operations tracing_total_entries_fops = {
5189         .open           = tracing_open_generic_tr,
5190         .read           = tracing_total_entries_read,
5191         .llseek         = generic_file_llseek,
5192         .release        = tracing_release_generic_tr,
5193 };
5194
5195 static const struct file_operations tracing_free_buffer_fops = {
5196         .open           = tracing_open_generic_tr,
5197         .write          = tracing_free_buffer_write,
5198         .release        = tracing_free_buffer_release,
5199 };
5200
5201 static const struct file_operations tracing_mark_fops = {
5202         .open           = tracing_open_generic_tr,
5203         .write          = tracing_mark_write,
5204         .llseek         = generic_file_llseek,
5205         .release        = tracing_release_generic_tr,
5206 };
5207
5208 static const struct file_operations trace_clock_fops = {
5209         .open           = tracing_clock_open,
5210         .read           = seq_read,
5211         .llseek         = seq_lseek,
5212         .release        = tracing_single_release_tr,
5213         .write          = tracing_clock_write,
5214 };
5215
5216 #ifdef CONFIG_TRACER_SNAPSHOT
5217 static const struct file_operations snapshot_fops = {
5218         .open           = tracing_snapshot_open,
5219         .read           = seq_read,
5220         .write          = tracing_snapshot_write,
5221         .llseek         = tracing_lseek,
5222         .release        = tracing_snapshot_release,
5223 };
5224
5225 static const struct file_operations snapshot_raw_fops = {
5226         .open           = snapshot_raw_open,
5227         .read           = tracing_buffers_read,
5228         .release        = tracing_buffers_release,
5229         .splice_read    = tracing_buffers_splice_read,
5230         .llseek         = no_llseek,
5231 };
5232
5233 #endif /* CONFIG_TRACER_SNAPSHOT */
5234
5235 static int tracing_buffers_open(struct inode *inode, struct file *filp)
5236 {
5237         struct trace_array *tr = inode->i_private;
5238         struct ftrace_buffer_info *info;
5239         int ret;
5240
5241         if (tracing_disabled)
5242                 return -ENODEV;
5243
5244         if (trace_array_get(tr) < 0)
5245                 return -ENODEV;
5246
5247         info = kzalloc(sizeof(*info), GFP_KERNEL);
5248         if (!info) {
5249                 trace_array_put(tr);
5250                 return -ENOMEM;
5251         }
5252
5253         mutex_lock(&trace_types_lock);
5254
5255         info->iter.tr           = tr;
5256         info->iter.cpu_file     = tracing_get_cpu(inode);
5257         info->iter.trace        = tr->current_trace;
5258         info->iter.trace_buffer = &tr->trace_buffer;
5259         info->spare             = NULL;
5260         /* Force reading ring buffer for first read */
5261         info->read              = (unsigned int)-1;
5262
5263         filp->private_data = info;
5264
5265         mutex_unlock(&trace_types_lock);
5266
5267         ret = nonseekable_open(inode, filp);
5268         if (ret < 0)
5269                 trace_array_put(tr);
5270
5271         return ret;
5272 }
5273
5274 static unsigned int
5275 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
5276 {
5277         struct ftrace_buffer_info *info = filp->private_data;
5278         struct trace_iterator *iter = &info->iter;
5279
5280         return trace_poll(iter, filp, poll_table);
5281 }
5282
5283 static ssize_t
5284 tracing_buffers_read(struct file *filp, char __user *ubuf,
5285                      size_t count, loff_t *ppos)
5286 {
5287         struct ftrace_buffer_info *info = filp->private_data;
5288         struct trace_iterator *iter = &info->iter;
5289         ssize_t ret;
5290         ssize_t size;
5291
5292         if (!count)
5293                 return 0;
5294
5295         mutex_lock(&trace_types_lock);
5296
5297 #ifdef CONFIG_TRACER_MAX_TRACE
5298         if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5299                 size = -EBUSY;
5300                 goto out_unlock;
5301         }
5302 #endif
5303
5304         if (!info->spare)
5305                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
5306                                                           iter->cpu_file);
5307         size = -ENOMEM;
5308         if (!info->spare)
5309                 goto out_unlock;
5310
5311         /* Do we have previous read data to read? */
5312         if (info->read < PAGE_SIZE)
5313                 goto read;
5314
5315  again:
5316         trace_access_lock(iter->cpu_file);
5317         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
5318                                     &info->spare,
5319                                     count,
5320                                     iter->cpu_file, 0);
5321         trace_access_unlock(iter->cpu_file);
5322
5323         if (ret < 0) {
5324                 if (trace_empty(iter)) {
5325                         if ((filp->f_flags & O_NONBLOCK)) {
5326                                 size = -EAGAIN;
5327                                 goto out_unlock;
5328                         }
5329                         mutex_unlock(&trace_types_lock);
5330                         wait_on_pipe(iter);
5331                         mutex_lock(&trace_types_lock);
5332                         if (signal_pending(current)) {
5333                                 size = -EINTR;
5334                                 goto out_unlock;
5335                         }
5336                         goto again;
5337                 }
5338                 size = 0;
5339                 goto out_unlock;
5340         }
5341
5342         info->read = 0;
5343  read:
5344         size = PAGE_SIZE - info->read;
5345         if (size > count)
5346                 size = count;
5347
5348         ret = copy_to_user(ubuf, info->spare + info->read, size);
5349         if (ret == size) {
5350                 size = -EFAULT;
5351                 goto out_unlock;
5352         }
5353         size -= ret;
5354
5355         *ppos += size;
5356         info->read += size;
5357
5358  out_unlock:
5359         mutex_unlock(&trace_types_lock);
5360
5361         return size;
5362 }
5363
5364 static int tracing_buffers_release(struct inode *inode, struct file *file)
5365 {
5366         struct ftrace_buffer_info *info = file->private_data;
5367         struct trace_iterator *iter = &info->iter;
5368
5369         mutex_lock(&trace_types_lock);
5370
5371         __trace_array_put(iter->tr);
5372
5373         if (info->spare)
5374                 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
5375         kfree(info);
5376
5377         mutex_unlock(&trace_types_lock);
5378
5379         return 0;
5380 }
5381
5382 struct buffer_ref {
5383         struct ring_buffer      *buffer;
5384         void                    *page;
5385         int                     ref;
5386 };
5387
5388 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
5389                                     struct pipe_buffer *buf)
5390 {
5391         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5392
5393         if (--ref->ref)
5394                 return;
5395
5396         ring_buffer_free_read_page(ref->buffer, ref->page);
5397         kfree(ref);
5398         buf->private = 0;
5399 }
5400
5401 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
5402                                 struct pipe_buffer *buf)
5403 {
5404         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5405
5406         ref->ref++;
5407 }
5408
5409 /* Pipe buffer operations for a buffer. */
5410 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
5411         .can_merge              = 0,
5412         .confirm                = generic_pipe_buf_confirm,
5413         .release                = buffer_pipe_buf_release,
5414         .steal                  = generic_pipe_buf_steal,
5415         .get                    = buffer_pipe_buf_get,
5416 };
5417
5418 /*
5419  * Callback from splice_to_pipe(), if we need to release some pages
5420  * at the end of the spd in case we error'ed out in filling the pipe.
5421  */
5422 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
5423 {
5424         struct buffer_ref *ref =
5425                 (struct buffer_ref *)spd->partial[i].private;
5426
5427         if (--ref->ref)
5428                 return;
5429
5430         ring_buffer_free_read_page(ref->buffer, ref->page);
5431         kfree(ref);
5432         spd->partial[i].private = 0;
5433 }
5434
5435 static ssize_t
5436 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5437                             struct pipe_inode_info *pipe, size_t len,
5438                             unsigned int flags)
5439 {
5440         struct ftrace_buffer_info *info = file->private_data;
5441         struct trace_iterator *iter = &info->iter;
5442         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5443         struct page *pages_def[PIPE_DEF_BUFFERS];
5444         struct splice_pipe_desc spd = {
5445                 .pages          = pages_def,
5446                 .partial        = partial_def,
5447                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5448                 .flags          = flags,
5449                 .ops            = &buffer_pipe_buf_ops,
5450                 .spd_release    = buffer_spd_release,
5451         };
5452         struct buffer_ref *ref;
5453         int entries, size, i;
5454         ssize_t ret;
5455
5456         mutex_lock(&trace_types_lock);
5457
5458 #ifdef CONFIG_TRACER_MAX_TRACE
5459         if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5460                 ret = -EBUSY;
5461                 goto out;
5462         }
5463 #endif
5464
5465         if (splice_grow_spd(pipe, &spd)) {
5466                 ret = -ENOMEM;
5467                 goto out;
5468         }
5469
5470         if (*ppos & (PAGE_SIZE - 1)) {
5471                 ret = -EINVAL;
5472                 goto out;
5473         }
5474
5475         if (len & (PAGE_SIZE - 1)) {
5476                 if (len < PAGE_SIZE) {
5477                         ret = -EINVAL;
5478                         goto out;
5479                 }
5480                 len &= PAGE_MASK;
5481         }
5482
5483  again:
5484         trace_access_lock(iter->cpu_file);
5485         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5486
5487         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
5488                 struct page *page;
5489                 int r;
5490
5491                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
5492                 if (!ref)
5493                         break;
5494
5495                 ref->ref = 1;
5496                 ref->buffer = iter->trace_buffer->buffer;
5497                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
5498                 if (!ref->page) {
5499                         kfree(ref);
5500                         break;
5501                 }
5502
5503                 r = ring_buffer_read_page(ref->buffer, &ref->page,
5504                                           len, iter->cpu_file, 1);
5505                 if (r < 0) {
5506                         ring_buffer_free_read_page(ref->buffer, ref->page);
5507                         kfree(ref);
5508                         break;
5509                 }
5510
5511                 /*
5512                  * zero out any left over data, this is going to
5513                  * user land.
5514                  */
5515                 size = ring_buffer_page_len(ref->page);
5516                 if (size < PAGE_SIZE)
5517                         memset(ref->page + size, 0, PAGE_SIZE - size);
5518
5519                 page = virt_to_page(ref->page);
5520
5521                 spd.pages[i] = page;
5522                 spd.partial[i].len = PAGE_SIZE;
5523                 spd.partial[i].offset = 0;
5524                 spd.partial[i].private = (unsigned long)ref;
5525                 spd.nr_pages++;
5526                 *ppos += PAGE_SIZE;
5527
5528                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5529         }
5530
5531         trace_access_unlock(iter->cpu_file);
5532         spd.nr_pages = i;
5533
5534         /* did we read anything? */
5535         if (!spd.nr_pages) {
5536                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) {
5537                         ret = -EAGAIN;
5538                         goto out;
5539                 }
5540                 mutex_unlock(&trace_types_lock);
5541                 wait_on_pipe(iter);
5542                 mutex_lock(&trace_types_lock);
5543                 if (signal_pending(current)) {
5544                         ret = -EINTR;
5545                         goto out;
5546                 }
5547                 goto again;
5548         }
5549
5550         ret = splice_to_pipe(pipe, &spd);
5551         splice_shrink_spd(&spd);
5552 out:
5553         mutex_unlock(&trace_types_lock);
5554
5555         return ret;
5556 }
5557
5558 static const struct file_operations tracing_buffers_fops = {
5559         .open           = tracing_buffers_open,
5560         .read           = tracing_buffers_read,
5561         .poll           = tracing_buffers_poll,
5562         .release        = tracing_buffers_release,
5563         .splice_read    = tracing_buffers_splice_read,
5564         .llseek         = no_llseek,
5565 };
5566
5567 static ssize_t
5568 tracing_stats_read(struct file *filp, char __user *ubuf,
5569                    size_t count, loff_t *ppos)
5570 {
5571         struct inode *inode = file_inode(filp);
5572         struct trace_array *tr = inode->i_private;
5573         struct trace_buffer *trace_buf = &tr->trace_buffer;
5574         int cpu = tracing_get_cpu(inode);
5575         struct trace_seq *s;
5576         unsigned long cnt;
5577         unsigned long long t;
5578         unsigned long usec_rem;
5579
5580         s = kmalloc(sizeof(*s), GFP_KERNEL);
5581         if (!s)
5582                 return -ENOMEM;
5583
5584         trace_seq_init(s);
5585
5586         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
5587         trace_seq_printf(s, "entries: %ld\n", cnt);
5588
5589         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
5590         trace_seq_printf(s, "overrun: %ld\n", cnt);
5591
5592         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
5593         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
5594
5595         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
5596         trace_seq_printf(s, "bytes: %ld\n", cnt);
5597
5598         if (trace_clocks[tr->clock_id].in_ns) {
5599                 /* local or global for trace_clock */
5600                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5601                 usec_rem = do_div(t, USEC_PER_SEC);
5602                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
5603                                                                 t, usec_rem);
5604
5605                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
5606                 usec_rem = do_div(t, USEC_PER_SEC);
5607                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
5608         } else {
5609                 /* counter or tsc mode for trace_clock */
5610                 trace_seq_printf(s, "oldest event ts: %llu\n",
5611                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5612
5613                 trace_seq_printf(s, "now ts: %llu\n",
5614                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
5615         }
5616
5617         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
5618         trace_seq_printf(s, "dropped events: %ld\n", cnt);
5619
5620         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
5621         trace_seq_printf(s, "read events: %ld\n", cnt);
5622
5623         count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
5624
5625         kfree(s);
5626
5627         return count;
5628 }
5629
5630 static const struct file_operations tracing_stats_fops = {
5631         .open           = tracing_open_generic_tr,
5632         .read           = tracing_stats_read,
5633         .llseek         = generic_file_llseek,
5634         .release        = tracing_release_generic_tr,
5635 };
5636
5637 #ifdef CONFIG_DYNAMIC_FTRACE
5638
5639 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
5640 {
5641         return 0;
5642 }
5643
5644 static ssize_t
5645 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
5646                   size_t cnt, loff_t *ppos)
5647 {
5648         static char ftrace_dyn_info_buffer[1024];
5649         static DEFINE_MUTEX(dyn_info_mutex);
5650         unsigned long *p = filp->private_data;
5651         char *buf = ftrace_dyn_info_buffer;
5652         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
5653         int r;
5654
5655         mutex_lock(&dyn_info_mutex);
5656         r = sprintf(buf, "%ld ", *p);
5657
5658         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
5659         buf[r++] = '\n';
5660
5661         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5662
5663         mutex_unlock(&dyn_info_mutex);
5664
5665         return r;
5666 }
5667
5668 static const struct file_operations tracing_dyn_info_fops = {
5669         .open           = tracing_open_generic,
5670         .read           = tracing_read_dyn_info,
5671         .llseek         = generic_file_llseek,
5672 };
5673 #endif /* CONFIG_DYNAMIC_FTRACE */
5674
5675 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
5676 static void
5677 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5678 {
5679         tracing_snapshot();
5680 }
5681
5682 static void
5683 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5684 {
5685         unsigned long *count = (long *)data;
5686
5687         if (!*count)
5688                 return;
5689
5690         if (*count != -1)
5691                 (*count)--;
5692
5693         tracing_snapshot();
5694 }
5695
5696 static int
5697 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
5698                       struct ftrace_probe_ops *ops, void *data)
5699 {
5700         long count = (long)data;
5701
5702         seq_printf(m, "%ps:", (void *)ip);
5703
5704         seq_printf(m, "snapshot");
5705
5706         if (count == -1)
5707                 seq_printf(m, ":unlimited\n");
5708         else
5709                 seq_printf(m, ":count=%ld\n", count);
5710
5711         return 0;
5712 }
5713
5714 static struct ftrace_probe_ops snapshot_probe_ops = {
5715         .func                   = ftrace_snapshot,
5716         .print                  = ftrace_snapshot_print,
5717 };
5718
5719 static struct ftrace_probe_ops snapshot_count_probe_ops = {
5720         .func                   = ftrace_count_snapshot,
5721         .print                  = ftrace_snapshot_print,
5722 };
5723
5724 static int
5725 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
5726                                char *glob, char *cmd, char *param, int enable)
5727 {
5728         struct ftrace_probe_ops *ops;
5729         void *count = (void *)-1;
5730         char *number;
5731         int ret;
5732
5733         /* hash funcs only work with set_ftrace_filter */
5734         if (!enable)
5735                 return -EINVAL;
5736
5737         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
5738
5739         if (glob[0] == '!') {
5740                 unregister_ftrace_function_probe_func(glob+1, ops);
5741                 return 0;
5742         }
5743
5744         if (!param)
5745                 goto out_reg;
5746
5747         number = strsep(&param, ":");
5748
5749         if (!strlen(number))
5750                 goto out_reg;
5751
5752         /*
5753          * We use the callback data field (which is a pointer)
5754          * as our counter.
5755          */
5756         ret = kstrtoul(number, 0, (unsigned long *)&count);
5757         if (ret)
5758                 return ret;
5759
5760  out_reg:
5761         ret = register_ftrace_function_probe(glob, ops, count);
5762
5763         if (ret >= 0)
5764                 alloc_snapshot(&global_trace);
5765
5766         return ret < 0 ? ret : 0;
5767 }
5768
5769 static struct ftrace_func_command ftrace_snapshot_cmd = {
5770         .name                   = "snapshot",
5771         .func                   = ftrace_trace_snapshot_callback,
5772 };
5773
5774 static __init int register_snapshot_cmd(void)
5775 {
5776         return register_ftrace_command(&ftrace_snapshot_cmd);
5777 }
5778 #else
5779 static inline __init int register_snapshot_cmd(void) { return 0; }
5780 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
5781
5782 struct dentry *tracing_init_dentry_tr(struct trace_array *tr)
5783 {
5784         if (tr->dir)
5785                 return tr->dir;
5786
5787         if (!debugfs_initialized())
5788                 return NULL;
5789
5790         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
5791                 tr->dir = debugfs_create_dir("tracing", NULL);
5792
5793         if (!tr->dir)
5794                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
5795
5796         return tr->dir;
5797 }
5798
5799 struct dentry *tracing_init_dentry(void)
5800 {
5801         return tracing_init_dentry_tr(&global_trace);
5802 }
5803
5804 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
5805 {
5806         struct dentry *d_tracer;
5807
5808         if (tr->percpu_dir)
5809                 return tr->percpu_dir;
5810
5811         d_tracer = tracing_init_dentry_tr(tr);
5812         if (!d_tracer)
5813                 return NULL;
5814
5815         tr->percpu_dir = debugfs_create_dir("per_cpu", d_tracer);
5816
5817         WARN_ONCE(!tr->percpu_dir,
5818                   "Could not create debugfs directory 'per_cpu/%d'\n", cpu);
5819
5820         return tr->percpu_dir;
5821 }
5822
5823 static struct dentry *
5824 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
5825                       void *data, long cpu, const struct file_operations *fops)
5826 {
5827         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
5828
5829         if (ret) /* See tracing_get_cpu() */
5830                 ret->d_inode->i_cdev = (void *)(cpu + 1);
5831         return ret;
5832 }
5833
5834 static void
5835 tracing_init_debugfs_percpu(struct trace_array *tr, long cpu)
5836 {
5837         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
5838         struct dentry *d_cpu;
5839         char cpu_dir[30]; /* 30 characters should be more than enough */
5840
5841         if (!d_percpu)
5842                 return;
5843
5844         snprintf(cpu_dir, 30, "cpu%ld", cpu);
5845         d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
5846         if (!d_cpu) {
5847                 pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
5848                 return;
5849         }
5850
5851         /* per cpu trace_pipe */
5852         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
5853                                 tr, cpu, &tracing_pipe_fops);
5854
5855         /* per cpu trace */
5856         trace_create_cpu_file("trace", 0644, d_cpu,
5857                                 tr, cpu, &tracing_fops);
5858
5859         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
5860                                 tr, cpu, &tracing_buffers_fops);
5861
5862         trace_create_cpu_file("stats", 0444, d_cpu,
5863                                 tr, cpu, &tracing_stats_fops);
5864
5865         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
5866                                 tr, cpu, &tracing_entries_fops);
5867
5868 #ifdef CONFIG_TRACER_SNAPSHOT
5869         trace_create_cpu_file("snapshot", 0644, d_cpu,
5870                                 tr, cpu, &snapshot_fops);
5871
5872         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
5873                                 tr, cpu, &snapshot_raw_fops);
5874 #endif
5875 }
5876
5877 #ifdef CONFIG_FTRACE_SELFTEST
5878 /* Let selftest have access to static functions in this file */
5879 #include "trace_selftest.c"
5880 #endif
5881
5882 struct trace_option_dentry {
5883         struct tracer_opt               *opt;
5884         struct tracer_flags             *flags;
5885         struct trace_array              *tr;
5886         struct dentry                   *entry;
5887 };
5888
5889 static ssize_t
5890 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
5891                         loff_t *ppos)
5892 {
5893         struct trace_option_dentry *topt = filp->private_data;
5894         char *buf;
5895
5896         if (topt->flags->val & topt->opt->bit)
5897                 buf = "1\n";
5898         else
5899                 buf = "0\n";
5900
5901         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5902 }
5903
5904 static ssize_t
5905 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
5906                          loff_t *ppos)
5907 {
5908         struct trace_option_dentry *topt = filp->private_data;
5909         unsigned long val;
5910         int ret;
5911
5912         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5913         if (ret)
5914                 return ret;
5915
5916         if (val != 0 && val != 1)
5917                 return -EINVAL;
5918
5919         if (!!(topt->flags->val & topt->opt->bit) != val) {
5920                 mutex_lock(&trace_types_lock);
5921                 ret = __set_tracer_option(topt->tr, topt->flags,
5922                                           topt->opt, !val);
5923                 mutex_unlock(&trace_types_lock);
5924                 if (ret)
5925                         return ret;
5926         }
5927
5928         *ppos += cnt;
5929
5930         return cnt;
5931 }
5932
5933
5934 static const struct file_operations trace_options_fops = {
5935         .open = tracing_open_generic,
5936         .read = trace_options_read,
5937         .write = trace_options_write,
5938         .llseek = generic_file_llseek,
5939 };
5940
5941 static ssize_t
5942 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
5943                         loff_t *ppos)
5944 {
5945         long index = (long)filp->private_data;
5946         char *buf;
5947
5948         if (trace_flags & (1 << index))
5949                 buf = "1\n";
5950         else
5951                 buf = "0\n";
5952
5953         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5954 }
5955
5956 static ssize_t
5957 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
5958                          loff_t *ppos)
5959 {
5960         struct trace_array *tr = &global_trace;
5961         long index = (long)filp->private_data;
5962         unsigned long val;
5963         int ret;
5964
5965         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5966         if (ret)
5967                 return ret;
5968
5969         if (val != 0 && val != 1)
5970                 return -EINVAL;
5971
5972         mutex_lock(&trace_types_lock);
5973         ret = set_tracer_flag(tr, 1 << index, val);
5974         mutex_unlock(&trace_types_lock);
5975
5976         if (ret < 0)
5977                 return ret;
5978
5979         *ppos += cnt;
5980
5981         return cnt;
5982 }
5983
5984 static const struct file_operations trace_options_core_fops = {
5985         .open = tracing_open_generic,
5986         .read = trace_options_core_read,
5987         .write = trace_options_core_write,
5988         .llseek = generic_file_llseek,
5989 };
5990
5991 struct dentry *trace_create_file(const char *name,
5992                                  umode_t mode,
5993                                  struct dentry *parent,
5994                                  void *data,
5995                                  const struct file_operations *fops)
5996 {
5997         struct dentry *ret;
5998
5999         ret = debugfs_create_file(name, mode, parent, data, fops);
6000         if (!ret)
6001                 pr_warning("Could not create debugfs '%s' entry\n", name);
6002
6003         return ret;
6004 }
6005
6006
6007 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
6008 {
6009         struct dentry *d_tracer;
6010
6011         if (tr->options)
6012                 return tr->options;
6013
6014         d_tracer = tracing_init_dentry_tr(tr);
6015         if (!d_tracer)
6016                 return NULL;
6017
6018         tr->options = debugfs_create_dir("options", d_tracer);
6019         if (!tr->options) {
6020                 pr_warning("Could not create debugfs directory 'options'\n");
6021                 return NULL;
6022         }
6023
6024         return tr->options;
6025 }
6026
6027 static void
6028 create_trace_option_file(struct trace_array *tr,
6029                          struct trace_option_dentry *topt,
6030                          struct tracer_flags *flags,
6031                          struct tracer_opt *opt)
6032 {
6033         struct dentry *t_options;
6034
6035         t_options = trace_options_init_dentry(tr);
6036         if (!t_options)
6037                 return;
6038
6039         topt->flags = flags;
6040         topt->opt = opt;
6041         topt->tr = tr;
6042
6043         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
6044                                     &trace_options_fops);
6045
6046 }
6047
6048 static struct trace_option_dentry *
6049 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
6050 {
6051         struct trace_option_dentry *topts;
6052         struct tracer_flags *flags;
6053         struct tracer_opt *opts;
6054         int cnt;
6055
6056         if (!tracer)
6057                 return NULL;
6058
6059         flags = tracer->flags;
6060
6061         if (!flags || !flags->opts)
6062                 return NULL;
6063
6064         opts = flags->opts;
6065
6066         for (cnt = 0; opts[cnt].name; cnt++)
6067                 ;
6068
6069         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
6070         if (!topts)
6071                 return NULL;
6072
6073         for (cnt = 0; opts[cnt].name; cnt++)
6074                 create_trace_option_file(tr, &topts[cnt], flags,
6075                                          &opts[cnt]);
6076
6077         return topts;
6078 }
6079
6080 static void
6081 destroy_trace_option_files(struct trace_option_dentry *topts)
6082 {
6083         int cnt;
6084
6085         if (!topts)
6086                 return;
6087
6088         for (cnt = 0; topts[cnt].opt; cnt++) {
6089                 if (topts[cnt].entry)
6090                         debugfs_remove(topts[cnt].entry);
6091         }
6092
6093         kfree(topts);
6094 }
6095
6096 static struct dentry *
6097 create_trace_option_core_file(struct trace_array *tr,
6098                               const char *option, long index)
6099 {
6100         struct dentry *t_options;
6101
6102         t_options = trace_options_init_dentry(tr);
6103         if (!t_options)
6104                 return NULL;
6105
6106         return trace_create_file(option, 0644, t_options, (void *)index,
6107                                     &trace_options_core_fops);
6108 }
6109
6110 static __init void create_trace_options_dir(struct trace_array *tr)
6111 {
6112         struct dentry *t_options;
6113         int i;
6114
6115         t_options = trace_options_init_dentry(tr);
6116         if (!t_options)
6117                 return;
6118
6119         for (i = 0; trace_options[i]; i++)
6120                 create_trace_option_core_file(tr, trace_options[i], i);
6121 }
6122
6123 static ssize_t
6124 rb_simple_read(struct file *filp, char __user *ubuf,
6125                size_t cnt, loff_t *ppos)
6126 {
6127         struct trace_array *tr = filp->private_data;
6128         char buf[64];
6129         int r;
6130
6131         r = tracer_tracing_is_on(tr);
6132         r = sprintf(buf, "%d\n", r);
6133
6134         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6135 }
6136
6137 static ssize_t
6138 rb_simple_write(struct file *filp, const char __user *ubuf,
6139                 size_t cnt, loff_t *ppos)
6140 {
6141         struct trace_array *tr = filp->private_data;
6142         struct ring_buffer *buffer = tr->trace_buffer.buffer;
6143         unsigned long val;
6144         int ret;
6145
6146         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6147         if (ret)
6148                 return ret;
6149
6150         if (buffer) {
6151                 mutex_lock(&trace_types_lock);
6152                 if (val) {
6153                         tracer_tracing_on(tr);
6154                         if (tr->current_trace->start)
6155                                 tr->current_trace->start(tr);
6156                 } else {
6157                         tracer_tracing_off(tr);
6158                         if (tr->current_trace->stop)
6159                                 tr->current_trace->stop(tr);
6160                 }
6161                 mutex_unlock(&trace_types_lock);
6162         }
6163
6164         (*ppos)++;
6165
6166         return cnt;
6167 }
6168
6169 static const struct file_operations rb_simple_fops = {
6170         .open           = tracing_open_generic_tr,
6171         .read           = rb_simple_read,
6172         .write          = rb_simple_write,
6173         .release        = tracing_release_generic_tr,
6174         .llseek         = default_llseek,
6175 };
6176
6177 struct dentry *trace_instance_dir;
6178
6179 static void
6180 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer);
6181
6182 static int
6183 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
6184 {
6185         enum ring_buffer_flags rb_flags;
6186
6187         rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
6188
6189         buf->tr = tr;
6190
6191         buf->buffer = ring_buffer_alloc(size, rb_flags);
6192         if (!buf->buffer)
6193                 return -ENOMEM;
6194
6195         buf->data = alloc_percpu(struct trace_array_cpu);
6196         if (!buf->data) {
6197                 ring_buffer_free(buf->buffer);
6198                 return -ENOMEM;
6199         }
6200
6201         /* Allocate the first page for all buffers */
6202         set_buffer_entries(&tr->trace_buffer,
6203                            ring_buffer_size(tr->trace_buffer.buffer, 0));
6204
6205         return 0;
6206 }
6207
6208 static int allocate_trace_buffers(struct trace_array *tr, int size)
6209 {
6210         int ret;
6211
6212         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
6213         if (ret)
6214                 return ret;
6215
6216 #ifdef CONFIG_TRACER_MAX_TRACE
6217         ret = allocate_trace_buffer(tr, &tr->max_buffer,
6218                                     allocate_snapshot ? size : 1);
6219         if (WARN_ON(ret)) {
6220                 ring_buffer_free(tr->trace_buffer.buffer);
6221                 free_percpu(tr->trace_buffer.data);
6222                 return -ENOMEM;
6223         }
6224         tr->allocated_snapshot = allocate_snapshot;
6225
6226         /*
6227          * Only the top level trace array gets its snapshot allocated
6228          * from the kernel command line.
6229          */
6230         allocate_snapshot = false;
6231 #endif
6232         return 0;
6233 }
6234
6235 static void free_trace_buffers(struct trace_array *tr)
6236 {
6237         if (!tr)
6238                 return;
6239
6240         if (tr->trace_buffer.buffer) {
6241                 ring_buffer_free(tr->trace_buffer.buffer);
6242                 tr->trace_buffer.buffer = NULL;
6243                 free_percpu(tr->trace_buffer.data);
6244         }
6245
6246 #ifdef CONFIG_TRACER_MAX_TRACE
6247         if (tr->max_buffer.buffer) {
6248                 ring_buffer_free(tr->max_buffer.buffer);
6249                 tr->max_buffer.buffer = NULL;
6250         }
6251 #endif
6252 }
6253
6254 static int new_instance_create(const char *name)
6255 {
6256         struct trace_array *tr;
6257         int ret;
6258
6259         mutex_lock(&trace_types_lock);
6260
6261         ret = -EEXIST;
6262         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6263                 if (tr->name && strcmp(tr->name, name) == 0)
6264                         goto out_unlock;
6265         }
6266
6267         ret = -ENOMEM;
6268         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
6269         if (!tr)
6270                 goto out_unlock;
6271
6272         tr->name = kstrdup(name, GFP_KERNEL);
6273         if (!tr->name)
6274                 goto out_free_tr;
6275
6276         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
6277                 goto out_free_tr;
6278
6279         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
6280
6281         raw_spin_lock_init(&tr->start_lock);
6282
6283         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
6284
6285         tr->current_trace = &nop_trace;
6286
6287         INIT_LIST_HEAD(&tr->systems);
6288         INIT_LIST_HEAD(&tr->events);
6289
6290         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
6291                 goto out_free_tr;
6292
6293         tr->dir = debugfs_create_dir(name, trace_instance_dir);
6294         if (!tr->dir)
6295                 goto out_free_tr;
6296
6297         ret = event_trace_add_tracer(tr->dir, tr);
6298         if (ret) {
6299                 debugfs_remove_recursive(tr->dir);
6300                 goto out_free_tr;
6301         }
6302
6303         init_tracer_debugfs(tr, tr->dir);
6304
6305         list_add(&tr->list, &ftrace_trace_arrays);
6306
6307         mutex_unlock(&trace_types_lock);
6308
6309         return 0;
6310
6311  out_free_tr:
6312         free_trace_buffers(tr);
6313         free_cpumask_var(tr->tracing_cpumask);
6314         kfree(tr->name);
6315         kfree(tr);
6316
6317  out_unlock:
6318         mutex_unlock(&trace_types_lock);
6319
6320         return ret;
6321
6322 }
6323
6324 static int instance_delete(const char *name)
6325 {
6326         struct trace_array *tr;
6327         int found = 0;
6328         int ret;
6329
6330         mutex_lock(&trace_types_lock);
6331
6332         ret = -ENODEV;
6333         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6334                 if (tr->name && strcmp(tr->name, name) == 0) {
6335                         found = 1;
6336                         break;
6337                 }
6338         }
6339         if (!found)
6340                 goto out_unlock;
6341
6342         ret = -EBUSY;
6343         if (tr->ref)
6344                 goto out_unlock;
6345
6346         list_del(&tr->list);
6347
6348         tracing_set_nop(tr);
6349         event_trace_del_tracer(tr);
6350         ftrace_destroy_function_files(tr);
6351         debugfs_remove_recursive(tr->dir);
6352         free_trace_buffers(tr);
6353
6354         kfree(tr->name);
6355         kfree(tr);
6356
6357         ret = 0;
6358
6359  out_unlock:
6360         mutex_unlock(&trace_types_lock);
6361
6362         return ret;
6363 }
6364
6365 static int instance_mkdir (struct inode *inode, struct dentry *dentry, umode_t mode)
6366 {
6367         struct dentry *parent;
6368         int ret;
6369
6370         /* Paranoid: Make sure the parent is the "instances" directory */
6371         parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
6372         if (WARN_ON_ONCE(parent != trace_instance_dir))
6373                 return -ENOENT;
6374
6375         /*
6376          * The inode mutex is locked, but debugfs_create_dir() will also
6377          * take the mutex. As the instances directory can not be destroyed
6378          * or changed in any other way, it is safe to unlock it, and
6379          * let the dentry try. If two users try to make the same dir at
6380          * the same time, then the new_instance_create() will determine the
6381          * winner.
6382          */
6383         mutex_unlock(&inode->i_mutex);
6384
6385         ret = new_instance_create(dentry->d_iname);
6386
6387         mutex_lock(&inode->i_mutex);
6388
6389         return ret;
6390 }
6391
6392 static int instance_rmdir(struct inode *inode, struct dentry *dentry)
6393 {
6394         struct dentry *parent;
6395         int ret;
6396
6397         /* Paranoid: Make sure the parent is the "instances" directory */
6398         parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
6399         if (WARN_ON_ONCE(parent != trace_instance_dir))
6400                 return -ENOENT;
6401
6402         /* The caller did a dget() on dentry */
6403         mutex_unlock(&dentry->d_inode->i_mutex);
6404
6405         /*
6406          * The inode mutex is locked, but debugfs_create_dir() will also
6407          * take the mutex. As the instances directory can not be destroyed
6408          * or changed in any other way, it is safe to unlock it, and
6409          * let the dentry try. If two users try to make the same dir at
6410          * the same time, then the instance_delete() will determine the
6411          * winner.
6412          */
6413         mutex_unlock(&inode->i_mutex);
6414
6415         ret = instance_delete(dentry->d_iname);
6416
6417         mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
6418         mutex_lock(&dentry->d_inode->i_mutex);
6419
6420         return ret;
6421 }
6422
6423 static const struct inode_operations instance_dir_inode_operations = {
6424         .lookup         = simple_lookup,
6425         .mkdir          = instance_mkdir,
6426         .rmdir          = instance_rmdir,
6427 };
6428
6429 static __init void create_trace_instances(struct dentry *d_tracer)
6430 {
6431         trace_instance_dir = debugfs_create_dir("instances", d_tracer);
6432         if (WARN_ON(!trace_instance_dir))
6433                 return;
6434
6435         /* Hijack the dir inode operations, to allow mkdir */
6436         trace_instance_dir->d_inode->i_op = &instance_dir_inode_operations;
6437 }
6438
6439 static void
6440 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
6441 {
6442         int cpu;
6443
6444         trace_create_file("available_tracers", 0444, d_tracer,
6445                         tr, &show_traces_fops);
6446
6447         trace_create_file("current_tracer", 0644, d_tracer,
6448                         tr, &set_tracer_fops);
6449
6450         trace_create_file("tracing_cpumask", 0644, d_tracer,
6451                           tr, &tracing_cpumask_fops);
6452
6453         trace_create_file("trace_options", 0644, d_tracer,
6454                           tr, &tracing_iter_fops);
6455
6456         trace_create_file("trace", 0644, d_tracer,
6457                           tr, &tracing_fops);
6458
6459         trace_create_file("trace_pipe", 0444, d_tracer,
6460                           tr, &tracing_pipe_fops);
6461
6462         trace_create_file("buffer_size_kb", 0644, d_tracer,
6463                           tr, &tracing_entries_fops);
6464
6465         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
6466                           tr, &tracing_total_entries_fops);
6467
6468         trace_create_file("free_buffer", 0200, d_tracer,
6469                           tr, &tracing_free_buffer_fops);
6470
6471         trace_create_file("trace_marker", 0220, d_tracer,
6472                           tr, &tracing_mark_fops);
6473
6474         trace_create_file("trace_clock", 0644, d_tracer, tr,
6475                           &trace_clock_fops);
6476
6477         trace_create_file("tracing_on", 0644, d_tracer,
6478                           tr, &rb_simple_fops);
6479
6480 #ifdef CONFIG_TRACER_MAX_TRACE
6481         trace_create_file("tracing_max_latency", 0644, d_tracer,
6482                         &tr->max_latency, &tracing_max_lat_fops);
6483 #endif
6484
6485         if (ftrace_create_function_files(tr, d_tracer))
6486                 WARN(1, "Could not allocate function filter files");
6487
6488 #ifdef CONFIG_TRACER_SNAPSHOT
6489         trace_create_file("snapshot", 0644, d_tracer,
6490                           tr, &snapshot_fops);
6491 #endif
6492
6493         for_each_tracing_cpu(cpu)
6494                 tracing_init_debugfs_percpu(tr, cpu);
6495
6496 }
6497
6498 static __init int tracer_init_debugfs(void)
6499 {
6500         struct dentry *d_tracer;
6501
6502         trace_access_lock_init();
6503
6504         d_tracer = tracing_init_dentry();
6505         if (!d_tracer)
6506                 return 0;
6507
6508         init_tracer_debugfs(&global_trace, d_tracer);
6509
6510         trace_create_file("tracing_thresh", 0644, d_tracer,
6511                         &tracing_thresh, &tracing_max_lat_fops);
6512
6513         trace_create_file("README", 0444, d_tracer,
6514                         NULL, &tracing_readme_fops);
6515
6516         trace_create_file("saved_cmdlines", 0444, d_tracer,
6517                         NULL, &tracing_saved_cmdlines_fops);
6518
6519         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
6520                           NULL, &tracing_saved_cmdlines_size_fops);
6521
6522 #ifdef CONFIG_DYNAMIC_FTRACE
6523         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
6524                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
6525 #endif
6526
6527         create_trace_instances(d_tracer);
6528
6529         create_trace_options_dir(&global_trace);
6530
6531         return 0;
6532 }
6533
6534 static int trace_panic_handler(struct notifier_block *this,
6535                                unsigned long event, void *unused)
6536 {
6537         if (ftrace_dump_on_oops)
6538                 ftrace_dump(ftrace_dump_on_oops);
6539         return NOTIFY_OK;
6540 }
6541
6542 static struct notifier_block trace_panic_notifier = {
6543         .notifier_call  = trace_panic_handler,
6544         .next           = NULL,
6545         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
6546 };
6547
6548 static int trace_die_handler(struct notifier_block *self,
6549                              unsigned long val,
6550                              void *data)
6551 {
6552         switch (val) {
6553         case DIE_OOPS:
6554                 if (ftrace_dump_on_oops)
6555                         ftrace_dump(ftrace_dump_on_oops);
6556                 break;
6557         default:
6558                 break;
6559         }
6560         return NOTIFY_OK;
6561 }
6562
6563 static struct notifier_block trace_die_notifier = {
6564         .notifier_call = trace_die_handler,
6565         .priority = 200
6566 };
6567
6568 /*
6569  * printk is set to max of 1024, we really don't need it that big.
6570  * Nothing should be printing 1000 characters anyway.
6571  */
6572 #define TRACE_MAX_PRINT         1000
6573
6574 /*
6575  * Define here KERN_TRACE so that we have one place to modify
6576  * it if we decide to change what log level the ftrace dump
6577  * should be at.
6578  */
6579 #define KERN_TRACE              KERN_EMERG
6580
6581 void
6582 trace_printk_seq(struct trace_seq *s)
6583 {
6584         /* Probably should print a warning here. */
6585         if (s->len >= TRACE_MAX_PRINT)
6586                 s->len = TRACE_MAX_PRINT;
6587
6588         /* should be zero ended, but we are paranoid. */
6589         s->buffer[s->len] = 0;
6590
6591         printk(KERN_TRACE "%s", s->buffer);
6592
6593         trace_seq_init(s);
6594 }
6595
6596 void trace_init_global_iter(struct trace_iterator *iter)
6597 {
6598         iter->tr = &global_trace;
6599         iter->trace = iter->tr->current_trace;
6600         iter->cpu_file = RING_BUFFER_ALL_CPUS;
6601         iter->trace_buffer = &global_trace.trace_buffer;
6602
6603         if (iter->trace && iter->trace->open)
6604                 iter->trace->open(iter);
6605
6606         /* Annotate start of buffers if we had overruns */
6607         if (ring_buffer_overruns(iter->trace_buffer->buffer))
6608                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
6609
6610         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6611         if (trace_clocks[iter->tr->clock_id].in_ns)
6612                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6613 }
6614
6615 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
6616 {
6617         /* use static because iter can be a bit big for the stack */
6618         static struct trace_iterator iter;
6619         static atomic_t dump_running;
6620         unsigned int old_userobj;
6621         unsigned long flags;
6622         int cnt = 0, cpu;
6623
6624         /* Only allow one dump user at a time. */
6625         if (atomic_inc_return(&dump_running) != 1) {
6626                 atomic_dec(&dump_running);
6627                 return;
6628         }
6629
6630         /*
6631          * Always turn off tracing when we dump.
6632          * We don't need to show trace output of what happens
6633          * between multiple crashes.
6634          *
6635          * If the user does a sysrq-z, then they can re-enable
6636          * tracing with echo 1 > tracing_on.
6637          */
6638         tracing_off();
6639
6640         local_irq_save(flags);
6641
6642         /* Simulate the iterator */
6643         trace_init_global_iter(&iter);
6644
6645         for_each_tracing_cpu(cpu) {
6646                 atomic_inc(&per_cpu_ptr(iter.tr->trace_buffer.data, cpu)->disabled);
6647         }
6648
6649         old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
6650
6651         /* don't look at user memory in panic mode */
6652         trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
6653
6654         switch (oops_dump_mode) {
6655         case DUMP_ALL:
6656                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6657                 break;
6658         case DUMP_ORIG:
6659                 iter.cpu_file = raw_smp_processor_id();
6660                 break;
6661         case DUMP_NONE:
6662                 goto out_enable;
6663         default:
6664                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
6665                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6666         }
6667
6668         printk(KERN_TRACE "Dumping ftrace buffer:\n");
6669
6670         /* Did function tracer already get disabled? */
6671         if (ftrace_is_dead()) {
6672                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
6673                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
6674         }
6675
6676         /*
6677          * We need to stop all tracing on all CPUS to read the
6678          * the next buffer. This is a bit expensive, but is
6679          * not done often. We fill all what we can read,
6680          * and then release the locks again.
6681          */
6682
6683         while (!trace_empty(&iter)) {
6684
6685                 if (!cnt)
6686                         printk(KERN_TRACE "---------------------------------\n");
6687
6688                 cnt++;
6689
6690                 /* reset all but tr, trace, and overruns */
6691                 memset(&iter.seq, 0,
6692                        sizeof(struct trace_iterator) -
6693                        offsetof(struct trace_iterator, seq));
6694                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
6695                 iter.pos = -1;
6696
6697                 if (trace_find_next_entry_inc(&iter) != NULL) {
6698                         int ret;
6699
6700                         ret = print_trace_line(&iter);
6701                         if (ret != TRACE_TYPE_NO_CONSUME)
6702                                 trace_consume(&iter);
6703                 }
6704                 touch_nmi_watchdog();
6705
6706                 trace_printk_seq(&iter.seq);
6707         }
6708
6709         if (!cnt)
6710                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
6711         else
6712                 printk(KERN_TRACE "---------------------------------\n");
6713
6714  out_enable:
6715         trace_flags |= old_userobj;
6716
6717         for_each_tracing_cpu(cpu) {
6718                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
6719         }
6720         atomic_dec(&dump_running);
6721         local_irq_restore(flags);
6722 }
6723 EXPORT_SYMBOL_GPL(ftrace_dump);
6724
6725 __init static int tracer_alloc_buffers(void)
6726 {
6727         int ring_buf_size;
6728         int ret = -ENOMEM;
6729
6730
6731         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
6732                 goto out;
6733
6734         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
6735                 goto out_free_buffer_mask;
6736
6737         /* Only allocate trace_printk buffers if a trace_printk exists */
6738         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
6739                 /* Must be called before global_trace.buffer is allocated */
6740                 trace_printk_init_buffers();
6741
6742         /* To save memory, keep the ring buffer size to its minimum */
6743         if (ring_buffer_expanded)
6744                 ring_buf_size = trace_buf_size;
6745         else
6746                 ring_buf_size = 1;
6747
6748         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
6749         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
6750
6751         raw_spin_lock_init(&global_trace.start_lock);
6752
6753         /* Used for event triggers */
6754         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
6755         if (!temp_buffer)
6756                 goto out_free_cpumask;
6757
6758         if (trace_create_savedcmd() < 0)
6759                 goto out_free_temp_buffer;
6760
6761         /* TODO: make the number of buffers hot pluggable with CPUS */
6762         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
6763                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
6764                 WARN_ON(1);
6765                 goto out_free_savedcmd;
6766         }
6767
6768         if (global_trace.buffer_disabled)
6769                 tracing_off();
6770
6771         if (trace_boot_clock) {
6772                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
6773                 if (ret < 0)
6774                         pr_warning("Trace clock %s not defined, going back to default\n",
6775                                    trace_boot_clock);
6776         }
6777
6778         /*
6779          * register_tracer() might reference current_trace, so it
6780          * needs to be set before we register anything. This is
6781          * just a bootstrap of current_trace anyway.
6782          */
6783         global_trace.current_trace = &nop_trace;
6784
6785         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
6786
6787         ftrace_init_global_array_ops(&global_trace);
6788
6789         register_tracer(&nop_trace);
6790
6791         /* All seems OK, enable tracing */
6792         tracing_disabled = 0;
6793
6794         atomic_notifier_chain_register(&panic_notifier_list,
6795                                        &trace_panic_notifier);
6796
6797         register_die_notifier(&trace_die_notifier);
6798
6799         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
6800
6801         INIT_LIST_HEAD(&global_trace.systems);
6802         INIT_LIST_HEAD(&global_trace.events);
6803         list_add(&global_trace.list, &ftrace_trace_arrays);
6804
6805         while (trace_boot_options) {
6806                 char *option;
6807
6808                 option = strsep(&trace_boot_options, ",");
6809                 trace_set_options(&global_trace, option);
6810         }
6811
6812         register_snapshot_cmd();
6813
6814         return 0;
6815
6816 out_free_savedcmd:
6817         free_saved_cmdlines_buffer(savedcmd);
6818 out_free_temp_buffer:
6819         ring_buffer_free(temp_buffer);
6820 out_free_cpumask:
6821         free_cpumask_var(global_trace.tracing_cpumask);
6822 out_free_buffer_mask:
6823         free_cpumask_var(tracing_buffer_mask);
6824 out:
6825         return ret;
6826 }
6827
6828 __init static int clear_boot_tracer(void)
6829 {
6830         /*
6831          * The default tracer at boot buffer is an init section.
6832          * This function is called in lateinit. If we did not
6833          * find the boot tracer, then clear it out, to prevent
6834          * later registration from accessing the buffer that is
6835          * about to be freed.
6836          */
6837         if (!default_bootup_tracer)
6838                 return 0;
6839
6840         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
6841                default_bootup_tracer);
6842         default_bootup_tracer = NULL;
6843
6844         return 0;
6845 }
6846
6847 early_initcall(tracer_alloc_buffers);
6848 fs_initcall(tracer_init_debugfs);
6849 late_initcall(clear_boot_tracer);