ftrace: Provide trace clocks monotonic
[cascardo/linux.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/pagemap.h>
24 #include <linux/hardirq.h>
25 #include <linux/linkage.h>
26 #include <linux/uaccess.h>
27 #include <linux/kprobes.h>
28 #include <linux/ftrace.h>
29 #include <linux/module.h>
30 #include <linux/percpu.h>
31 #include <linux/splice.h>
32 #include <linux/kdebug.h>
33 #include <linux/string.h>
34 #include <linux/rwsem.h>
35 #include <linux/slab.h>
36 #include <linux/ctype.h>
37 #include <linux/init.h>
38 #include <linux/poll.h>
39 #include <linux/nmi.h>
40 #include <linux/fs.h>
41 #include <linux/sched/rt.h>
42
43 #include "trace.h"
44 #include "trace_output.h"
45
46 /*
47  * On boot up, the ring buffer is set to the minimum size, so that
48  * we do not waste memory on systems that are not using tracing.
49  */
50 bool ring_buffer_expanded;
51
52 /*
53  * We need to change this state when a selftest is running.
54  * A selftest will lurk into the ring-buffer to count the
55  * entries inserted during the selftest although some concurrent
56  * insertions into the ring-buffer such as trace_printk could occurred
57  * at the same time, giving false positive or negative results.
58  */
59 static bool __read_mostly tracing_selftest_running;
60
61 /*
62  * If a tracer is running, we do not want to run SELFTEST.
63  */
64 bool __read_mostly tracing_selftest_disabled;
65
66 /* For tracers that don't implement custom flags */
67 static struct tracer_opt dummy_tracer_opt[] = {
68         { }
69 };
70
71 static struct tracer_flags dummy_tracer_flags = {
72         .val = 0,
73         .opts = dummy_tracer_opt
74 };
75
76 static int
77 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
78 {
79         return 0;
80 }
81
82 /*
83  * To prevent the comm cache from being overwritten when no
84  * tracing is active, only save the comm when a trace event
85  * occurred.
86  */
87 static DEFINE_PER_CPU(bool, trace_cmdline_save);
88
89 /*
90  * Kill all tracing for good (never come back).
91  * It is initialized to 1 but will turn to zero if the initialization
92  * of the tracer is successful. But that is the only place that sets
93  * this back to zero.
94  */
95 static int tracing_disabled = 1;
96
97 DEFINE_PER_CPU(int, ftrace_cpu_disabled);
98
99 cpumask_var_t __read_mostly     tracing_buffer_mask;
100
101 /*
102  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
103  *
104  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
105  * is set, then ftrace_dump is called. This will output the contents
106  * of the ftrace buffers to the console.  This is very useful for
107  * capturing traces that lead to crashes and outputing it to a
108  * serial console.
109  *
110  * It is default off, but you can enable it with either specifying
111  * "ftrace_dump_on_oops" in the kernel command line, or setting
112  * /proc/sys/kernel/ftrace_dump_on_oops
113  * Set 1 if you want to dump buffers of all CPUs
114  * Set 2 if you want to dump the buffer of the CPU that triggered oops
115  */
116
117 enum ftrace_dump_mode ftrace_dump_on_oops;
118
119 /* When set, tracing will stop when a WARN*() is hit */
120 int __disable_trace_on_warning;
121
122 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
123
124 #define MAX_TRACER_SIZE         100
125 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
126 static char *default_bootup_tracer;
127
128 static bool allocate_snapshot;
129
130 static int __init set_cmdline_ftrace(char *str)
131 {
132         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
133         default_bootup_tracer = bootup_tracer_buf;
134         /* We are using ftrace early, expand it */
135         ring_buffer_expanded = true;
136         return 1;
137 }
138 __setup("ftrace=", set_cmdline_ftrace);
139
140 static int __init set_ftrace_dump_on_oops(char *str)
141 {
142         if (*str++ != '=' || !*str) {
143                 ftrace_dump_on_oops = DUMP_ALL;
144                 return 1;
145         }
146
147         if (!strcmp("orig_cpu", str)) {
148                 ftrace_dump_on_oops = DUMP_ORIG;
149                 return 1;
150         }
151
152         return 0;
153 }
154 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
155
156 static int __init stop_trace_on_warning(char *str)
157 {
158         __disable_trace_on_warning = 1;
159         return 1;
160 }
161 __setup("traceoff_on_warning=", stop_trace_on_warning);
162
163 static int __init boot_alloc_snapshot(char *str)
164 {
165         allocate_snapshot = true;
166         /* We also need the main ring buffer expanded */
167         ring_buffer_expanded = true;
168         return 1;
169 }
170 __setup("alloc_snapshot", boot_alloc_snapshot);
171
172
173 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
174 static char *trace_boot_options __initdata;
175
176 static int __init set_trace_boot_options(char *str)
177 {
178         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
179         trace_boot_options = trace_boot_options_buf;
180         return 0;
181 }
182 __setup("trace_options=", set_trace_boot_options);
183
184 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
185 static char *trace_boot_clock __initdata;
186
187 static int __init set_trace_boot_clock(char *str)
188 {
189         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
190         trace_boot_clock = trace_boot_clock_buf;
191         return 0;
192 }
193 __setup("trace_clock=", set_trace_boot_clock);
194
195
196 unsigned long long ns2usecs(cycle_t nsec)
197 {
198         nsec += 500;
199         do_div(nsec, 1000);
200         return nsec;
201 }
202
203 /*
204  * The global_trace is the descriptor that holds the tracing
205  * buffers for the live tracing. For each CPU, it contains
206  * a link list of pages that will store trace entries. The
207  * page descriptor of the pages in the memory is used to hold
208  * the link list by linking the lru item in the page descriptor
209  * to each of the pages in the buffer per CPU.
210  *
211  * For each active CPU there is a data field that holds the
212  * pages for the buffer for that CPU. Each CPU has the same number
213  * of pages allocated for its buffer.
214  */
215 static struct trace_array       global_trace;
216
217 LIST_HEAD(ftrace_trace_arrays);
218
219 int trace_array_get(struct trace_array *this_tr)
220 {
221         struct trace_array *tr;
222         int ret = -ENODEV;
223
224         mutex_lock(&trace_types_lock);
225         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
226                 if (tr == this_tr) {
227                         tr->ref++;
228                         ret = 0;
229                         break;
230                 }
231         }
232         mutex_unlock(&trace_types_lock);
233
234         return ret;
235 }
236
237 static void __trace_array_put(struct trace_array *this_tr)
238 {
239         WARN_ON(!this_tr->ref);
240         this_tr->ref--;
241 }
242
243 void trace_array_put(struct trace_array *this_tr)
244 {
245         mutex_lock(&trace_types_lock);
246         __trace_array_put(this_tr);
247         mutex_unlock(&trace_types_lock);
248 }
249
250 int filter_check_discard(struct ftrace_event_file *file, void *rec,
251                          struct ring_buffer *buffer,
252                          struct ring_buffer_event *event)
253 {
254         if (unlikely(file->flags & FTRACE_EVENT_FL_FILTERED) &&
255             !filter_match_preds(file->filter, rec)) {
256                 ring_buffer_discard_commit(buffer, event);
257                 return 1;
258         }
259
260         return 0;
261 }
262 EXPORT_SYMBOL_GPL(filter_check_discard);
263
264 int call_filter_check_discard(struct ftrace_event_call *call, void *rec,
265                               struct ring_buffer *buffer,
266                               struct ring_buffer_event *event)
267 {
268         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
269             !filter_match_preds(call->filter, rec)) {
270                 ring_buffer_discard_commit(buffer, event);
271                 return 1;
272         }
273
274         return 0;
275 }
276 EXPORT_SYMBOL_GPL(call_filter_check_discard);
277
278 static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
279 {
280         u64 ts;
281
282         /* Early boot up does not have a buffer yet */
283         if (!buf->buffer)
284                 return trace_clock_local();
285
286         ts = ring_buffer_time_stamp(buf->buffer, cpu);
287         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
288
289         return ts;
290 }
291
292 cycle_t ftrace_now(int cpu)
293 {
294         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
295 }
296
297 /**
298  * tracing_is_enabled - Show if global_trace has been disabled
299  *
300  * Shows if the global trace has been enabled or not. It uses the
301  * mirror flag "buffer_disabled" to be used in fast paths such as for
302  * the irqsoff tracer. But it may be inaccurate due to races. If you
303  * need to know the accurate state, use tracing_is_on() which is a little
304  * slower, but accurate.
305  */
306 int tracing_is_enabled(void)
307 {
308         /*
309          * For quick access (irqsoff uses this in fast path), just
310          * return the mirror variable of the state of the ring buffer.
311          * It's a little racy, but we don't really care.
312          */
313         smp_rmb();
314         return !global_trace.buffer_disabled;
315 }
316
317 /*
318  * trace_buf_size is the size in bytes that is allocated
319  * for a buffer. Note, the number of bytes is always rounded
320  * to page size.
321  *
322  * This number is purposely set to a low number of 16384.
323  * If the dump on oops happens, it will be much appreciated
324  * to not have to wait for all that output. Anyway this can be
325  * boot time and run time configurable.
326  */
327 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
328
329 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
330
331 /* trace_types holds a link list of available tracers. */
332 static struct tracer            *trace_types __read_mostly;
333
334 /*
335  * trace_types_lock is used to protect the trace_types list.
336  */
337 DEFINE_MUTEX(trace_types_lock);
338
339 /*
340  * serialize the access of the ring buffer
341  *
342  * ring buffer serializes readers, but it is low level protection.
343  * The validity of the events (which returns by ring_buffer_peek() ..etc)
344  * are not protected by ring buffer.
345  *
346  * The content of events may become garbage if we allow other process consumes
347  * these events concurrently:
348  *   A) the page of the consumed events may become a normal page
349  *      (not reader page) in ring buffer, and this page will be rewrited
350  *      by events producer.
351  *   B) The page of the consumed events may become a page for splice_read,
352  *      and this page will be returned to system.
353  *
354  * These primitives allow multi process access to different cpu ring buffer
355  * concurrently.
356  *
357  * These primitives don't distinguish read-only and read-consume access.
358  * Multi read-only access are also serialized.
359  */
360
361 #ifdef CONFIG_SMP
362 static DECLARE_RWSEM(all_cpu_access_lock);
363 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
364
365 static inline void trace_access_lock(int cpu)
366 {
367         if (cpu == RING_BUFFER_ALL_CPUS) {
368                 /* gain it for accessing the whole ring buffer. */
369                 down_write(&all_cpu_access_lock);
370         } else {
371                 /* gain it for accessing a cpu ring buffer. */
372
373                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
374                 down_read(&all_cpu_access_lock);
375
376                 /* Secondly block other access to this @cpu ring buffer. */
377                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
378         }
379 }
380
381 static inline void trace_access_unlock(int cpu)
382 {
383         if (cpu == RING_BUFFER_ALL_CPUS) {
384                 up_write(&all_cpu_access_lock);
385         } else {
386                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
387                 up_read(&all_cpu_access_lock);
388         }
389 }
390
391 static inline void trace_access_lock_init(void)
392 {
393         int cpu;
394
395         for_each_possible_cpu(cpu)
396                 mutex_init(&per_cpu(cpu_access_lock, cpu));
397 }
398
399 #else
400
401 static DEFINE_MUTEX(access_lock);
402
403 static inline void trace_access_lock(int cpu)
404 {
405         (void)cpu;
406         mutex_lock(&access_lock);
407 }
408
409 static inline void trace_access_unlock(int cpu)
410 {
411         (void)cpu;
412         mutex_unlock(&access_lock);
413 }
414
415 static inline void trace_access_lock_init(void)
416 {
417 }
418
419 #endif
420
421 /* trace_flags holds trace_options default values */
422 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
423         TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
424         TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |
425         TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | TRACE_ITER_FUNCTION;
426
427 static void tracer_tracing_on(struct trace_array *tr)
428 {
429         if (tr->trace_buffer.buffer)
430                 ring_buffer_record_on(tr->trace_buffer.buffer);
431         /*
432          * This flag is looked at when buffers haven't been allocated
433          * yet, or by some tracers (like irqsoff), that just want to
434          * know if the ring buffer has been disabled, but it can handle
435          * races of where it gets disabled but we still do a record.
436          * As the check is in the fast path of the tracers, it is more
437          * important to be fast than accurate.
438          */
439         tr->buffer_disabled = 0;
440         /* Make the flag seen by readers */
441         smp_wmb();
442 }
443
444 /**
445  * tracing_on - enable tracing buffers
446  *
447  * This function enables tracing buffers that may have been
448  * disabled with tracing_off.
449  */
450 void tracing_on(void)
451 {
452         tracer_tracing_on(&global_trace);
453 }
454 EXPORT_SYMBOL_GPL(tracing_on);
455
456 /**
457  * __trace_puts - write a constant string into the trace buffer.
458  * @ip:    The address of the caller
459  * @str:   The constant string to write
460  * @size:  The size of the string.
461  */
462 int __trace_puts(unsigned long ip, const char *str, int size)
463 {
464         struct ring_buffer_event *event;
465         struct ring_buffer *buffer;
466         struct print_entry *entry;
467         unsigned long irq_flags;
468         int alloc;
469
470         if (unlikely(tracing_selftest_running || tracing_disabled))
471                 return 0;
472
473         alloc = sizeof(*entry) + size + 2; /* possible \n added */
474
475         local_save_flags(irq_flags);
476         buffer = global_trace.trace_buffer.buffer;
477         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
478                                           irq_flags, preempt_count());
479         if (!event)
480                 return 0;
481
482         entry = ring_buffer_event_data(event);
483         entry->ip = ip;
484
485         memcpy(&entry->buf, str, size);
486
487         /* Add a newline if necessary */
488         if (entry->buf[size - 1] != '\n') {
489                 entry->buf[size] = '\n';
490                 entry->buf[size + 1] = '\0';
491         } else
492                 entry->buf[size] = '\0';
493
494         __buffer_unlock_commit(buffer, event);
495
496         return size;
497 }
498 EXPORT_SYMBOL_GPL(__trace_puts);
499
500 /**
501  * __trace_bputs - write the pointer to a constant string into trace buffer
502  * @ip:    The address of the caller
503  * @str:   The constant string to write to the buffer to
504  */
505 int __trace_bputs(unsigned long ip, const char *str)
506 {
507         struct ring_buffer_event *event;
508         struct ring_buffer *buffer;
509         struct bputs_entry *entry;
510         unsigned long irq_flags;
511         int size = sizeof(struct bputs_entry);
512
513         if (unlikely(tracing_selftest_running || tracing_disabled))
514                 return 0;
515
516         local_save_flags(irq_flags);
517         buffer = global_trace.trace_buffer.buffer;
518         event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
519                                           irq_flags, preempt_count());
520         if (!event)
521                 return 0;
522
523         entry = ring_buffer_event_data(event);
524         entry->ip                       = ip;
525         entry->str                      = str;
526
527         __buffer_unlock_commit(buffer, event);
528
529         return 1;
530 }
531 EXPORT_SYMBOL_GPL(__trace_bputs);
532
533 #ifdef CONFIG_TRACER_SNAPSHOT
534 /**
535  * trace_snapshot - take a snapshot of the current buffer.
536  *
537  * This causes a swap between the snapshot buffer and the current live
538  * tracing buffer. You can use this to take snapshots of the live
539  * trace when some condition is triggered, but continue to trace.
540  *
541  * Note, make sure to allocate the snapshot with either
542  * a tracing_snapshot_alloc(), or by doing it manually
543  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
544  *
545  * If the snapshot buffer is not allocated, it will stop tracing.
546  * Basically making a permanent snapshot.
547  */
548 void tracing_snapshot(void)
549 {
550         struct trace_array *tr = &global_trace;
551         struct tracer *tracer = tr->current_trace;
552         unsigned long flags;
553
554         if (in_nmi()) {
555                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
556                 internal_trace_puts("*** snapshot is being ignored        ***\n");
557                 return;
558         }
559
560         if (!tr->allocated_snapshot) {
561                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
562                 internal_trace_puts("*** stopping trace here!   ***\n");
563                 tracing_off();
564                 return;
565         }
566
567         /* Note, snapshot can not be used when the tracer uses it */
568         if (tracer->use_max_tr) {
569                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
570                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
571                 return;
572         }
573
574         local_irq_save(flags);
575         update_max_tr(tr, current, smp_processor_id());
576         local_irq_restore(flags);
577 }
578 EXPORT_SYMBOL_GPL(tracing_snapshot);
579
580 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
581                                         struct trace_buffer *size_buf, int cpu_id);
582 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
583
584 static int alloc_snapshot(struct trace_array *tr)
585 {
586         int ret;
587
588         if (!tr->allocated_snapshot) {
589
590                 /* allocate spare buffer */
591                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
592                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
593                 if (ret < 0)
594                         return ret;
595
596                 tr->allocated_snapshot = true;
597         }
598
599         return 0;
600 }
601
602 static void free_snapshot(struct trace_array *tr)
603 {
604         /*
605          * We don't free the ring buffer. instead, resize it because
606          * The max_tr ring buffer has some state (e.g. ring->clock) and
607          * we want preserve it.
608          */
609         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
610         set_buffer_entries(&tr->max_buffer, 1);
611         tracing_reset_online_cpus(&tr->max_buffer);
612         tr->allocated_snapshot = false;
613 }
614
615 /**
616  * tracing_alloc_snapshot - allocate snapshot buffer.
617  *
618  * This only allocates the snapshot buffer if it isn't already
619  * allocated - it doesn't also take a snapshot.
620  *
621  * This is meant to be used in cases where the snapshot buffer needs
622  * to be set up for events that can't sleep but need to be able to
623  * trigger a snapshot.
624  */
625 int tracing_alloc_snapshot(void)
626 {
627         struct trace_array *tr = &global_trace;
628         int ret;
629
630         ret = alloc_snapshot(tr);
631         WARN_ON(ret < 0);
632
633         return ret;
634 }
635 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
636
637 /**
638  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
639  *
640  * This is similar to trace_snapshot(), but it will allocate the
641  * snapshot buffer if it isn't already allocated. Use this only
642  * where it is safe to sleep, as the allocation may sleep.
643  *
644  * This causes a swap between the snapshot buffer and the current live
645  * tracing buffer. You can use this to take snapshots of the live
646  * trace when some condition is triggered, but continue to trace.
647  */
648 void tracing_snapshot_alloc(void)
649 {
650         int ret;
651
652         ret = tracing_alloc_snapshot();
653         if (ret < 0)
654                 return;
655
656         tracing_snapshot();
657 }
658 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
659 #else
660 void tracing_snapshot(void)
661 {
662         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
663 }
664 EXPORT_SYMBOL_GPL(tracing_snapshot);
665 int tracing_alloc_snapshot(void)
666 {
667         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
668         return -ENODEV;
669 }
670 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
671 void tracing_snapshot_alloc(void)
672 {
673         /* Give warning */
674         tracing_snapshot();
675 }
676 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
677 #endif /* CONFIG_TRACER_SNAPSHOT */
678
679 static void tracer_tracing_off(struct trace_array *tr)
680 {
681         if (tr->trace_buffer.buffer)
682                 ring_buffer_record_off(tr->trace_buffer.buffer);
683         /*
684          * This flag is looked at when buffers haven't been allocated
685          * yet, or by some tracers (like irqsoff), that just want to
686          * know if the ring buffer has been disabled, but it can handle
687          * races of where it gets disabled but we still do a record.
688          * As the check is in the fast path of the tracers, it is more
689          * important to be fast than accurate.
690          */
691         tr->buffer_disabled = 1;
692         /* Make the flag seen by readers */
693         smp_wmb();
694 }
695
696 /**
697  * tracing_off - turn off tracing buffers
698  *
699  * This function stops the tracing buffers from recording data.
700  * It does not disable any overhead the tracers themselves may
701  * be causing. This function simply causes all recording to
702  * the ring buffers to fail.
703  */
704 void tracing_off(void)
705 {
706         tracer_tracing_off(&global_trace);
707 }
708 EXPORT_SYMBOL_GPL(tracing_off);
709
710 void disable_trace_on_warning(void)
711 {
712         if (__disable_trace_on_warning)
713                 tracing_off();
714 }
715
716 /**
717  * tracer_tracing_is_on - show real state of ring buffer enabled
718  * @tr : the trace array to know if ring buffer is enabled
719  *
720  * Shows real state of the ring buffer if it is enabled or not.
721  */
722 static int tracer_tracing_is_on(struct trace_array *tr)
723 {
724         if (tr->trace_buffer.buffer)
725                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
726         return !tr->buffer_disabled;
727 }
728
729 /**
730  * tracing_is_on - show state of ring buffers enabled
731  */
732 int tracing_is_on(void)
733 {
734         return tracer_tracing_is_on(&global_trace);
735 }
736 EXPORT_SYMBOL_GPL(tracing_is_on);
737
738 static int __init set_buf_size(char *str)
739 {
740         unsigned long buf_size;
741
742         if (!str)
743                 return 0;
744         buf_size = memparse(str, &str);
745         /* nr_entries can not be zero */
746         if (buf_size == 0)
747                 return 0;
748         trace_buf_size = buf_size;
749         return 1;
750 }
751 __setup("trace_buf_size=", set_buf_size);
752
753 static int __init set_tracing_thresh(char *str)
754 {
755         unsigned long threshold;
756         int ret;
757
758         if (!str)
759                 return 0;
760         ret = kstrtoul(str, 0, &threshold);
761         if (ret < 0)
762                 return 0;
763         tracing_thresh = threshold * 1000;
764         return 1;
765 }
766 __setup("tracing_thresh=", set_tracing_thresh);
767
768 unsigned long nsecs_to_usecs(unsigned long nsecs)
769 {
770         return nsecs / 1000;
771 }
772
773 /* These must match the bit postions in trace_iterator_flags */
774 static const char *trace_options[] = {
775         "print-parent",
776         "sym-offset",
777         "sym-addr",
778         "verbose",
779         "raw",
780         "hex",
781         "bin",
782         "block",
783         "stacktrace",
784         "trace_printk",
785         "ftrace_preempt",
786         "branch",
787         "annotate",
788         "userstacktrace",
789         "sym-userobj",
790         "printk-msg-only",
791         "context-info",
792         "latency-format",
793         "sleep-time",
794         "graph-time",
795         "record-cmd",
796         "overwrite",
797         "disable_on_free",
798         "irq-info",
799         "markers",
800         "function-trace",
801         NULL
802 };
803
804 static struct {
805         u64 (*func)(void);
806         const char *name;
807         int in_ns;              /* is this clock in nanoseconds? */
808 } trace_clocks[] = {
809         { trace_clock_local,            "local",        1 },
810         { trace_clock_global,           "global",       1 },
811         { trace_clock_counter,          "counter",      0 },
812         { trace_clock_jiffies,          "uptime",       1 },
813         { trace_clock,                  "perf",         1 },
814         { ktime_get_mono_fast_ns,       "mono",         1 },
815         ARCH_TRACE_CLOCKS
816 };
817
818 /*
819  * trace_parser_get_init - gets the buffer for trace parser
820  */
821 int trace_parser_get_init(struct trace_parser *parser, int size)
822 {
823         memset(parser, 0, sizeof(*parser));
824
825         parser->buffer = kmalloc(size, GFP_KERNEL);
826         if (!parser->buffer)
827                 return 1;
828
829         parser->size = size;
830         return 0;
831 }
832
833 /*
834  * trace_parser_put - frees the buffer for trace parser
835  */
836 void trace_parser_put(struct trace_parser *parser)
837 {
838         kfree(parser->buffer);
839 }
840
841 /*
842  * trace_get_user - reads the user input string separated by  space
843  * (matched by isspace(ch))
844  *
845  * For each string found the 'struct trace_parser' is updated,
846  * and the function returns.
847  *
848  * Returns number of bytes read.
849  *
850  * See kernel/trace/trace.h for 'struct trace_parser' details.
851  */
852 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
853         size_t cnt, loff_t *ppos)
854 {
855         char ch;
856         size_t read = 0;
857         ssize_t ret;
858
859         if (!*ppos)
860                 trace_parser_clear(parser);
861
862         ret = get_user(ch, ubuf++);
863         if (ret)
864                 goto out;
865
866         read++;
867         cnt--;
868
869         /*
870          * The parser is not finished with the last write,
871          * continue reading the user input without skipping spaces.
872          */
873         if (!parser->cont) {
874                 /* skip white space */
875                 while (cnt && isspace(ch)) {
876                         ret = get_user(ch, ubuf++);
877                         if (ret)
878                                 goto out;
879                         read++;
880                         cnt--;
881                 }
882
883                 /* only spaces were written */
884                 if (isspace(ch)) {
885                         *ppos += read;
886                         ret = read;
887                         goto out;
888                 }
889
890                 parser->idx = 0;
891         }
892
893         /* read the non-space input */
894         while (cnt && !isspace(ch)) {
895                 if (parser->idx < parser->size - 1)
896                         parser->buffer[parser->idx++] = ch;
897                 else {
898                         ret = -EINVAL;
899                         goto out;
900                 }
901                 ret = get_user(ch, ubuf++);
902                 if (ret)
903                         goto out;
904                 read++;
905                 cnt--;
906         }
907
908         /* We either got finished input or we have to wait for another call. */
909         if (isspace(ch)) {
910                 parser->buffer[parser->idx] = 0;
911                 parser->cont = false;
912         } else if (parser->idx < parser->size - 1) {
913                 parser->cont = true;
914                 parser->buffer[parser->idx++] = ch;
915         } else {
916                 ret = -EINVAL;
917                 goto out;
918         }
919
920         *ppos += read;
921         ret = read;
922
923 out:
924         return ret;
925 }
926
927 ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
928 {
929         int len;
930         int ret;
931
932         if (!cnt)
933                 return 0;
934
935         if (s->len <= s->readpos)
936                 return -EBUSY;
937
938         len = s->len - s->readpos;
939         if (cnt > len)
940                 cnt = len;
941         ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
942         if (ret == cnt)
943                 return -EFAULT;
944
945         cnt -= ret;
946
947         s->readpos += cnt;
948         return cnt;
949 }
950
951 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
952 {
953         int len;
954
955         if (s->len <= s->readpos)
956                 return -EBUSY;
957
958         len = s->len - s->readpos;
959         if (cnt > len)
960                 cnt = len;
961         memcpy(buf, s->buffer + s->readpos, cnt);
962
963         s->readpos += cnt;
964         return cnt;
965 }
966
967 unsigned long __read_mostly     tracing_thresh;
968
969 #ifdef CONFIG_TRACER_MAX_TRACE
970 /*
971  * Copy the new maximum trace into the separate maximum-trace
972  * structure. (this way the maximum trace is permanently saved,
973  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
974  */
975 static void
976 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
977 {
978         struct trace_buffer *trace_buf = &tr->trace_buffer;
979         struct trace_buffer *max_buf = &tr->max_buffer;
980         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
981         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
982
983         max_buf->cpu = cpu;
984         max_buf->time_start = data->preempt_timestamp;
985
986         max_data->saved_latency = tr->max_latency;
987         max_data->critical_start = data->critical_start;
988         max_data->critical_end = data->critical_end;
989
990         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
991         max_data->pid = tsk->pid;
992         /*
993          * If tsk == current, then use current_uid(), as that does not use
994          * RCU. The irq tracer can be called out of RCU scope.
995          */
996         if (tsk == current)
997                 max_data->uid = current_uid();
998         else
999                 max_data->uid = task_uid(tsk);
1000
1001         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1002         max_data->policy = tsk->policy;
1003         max_data->rt_priority = tsk->rt_priority;
1004
1005         /* record this tasks comm */
1006         tracing_record_cmdline(tsk);
1007 }
1008
1009 /**
1010  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1011  * @tr: tracer
1012  * @tsk: the task with the latency
1013  * @cpu: The cpu that initiated the trace.
1014  *
1015  * Flip the buffers between the @tr and the max_tr and record information
1016  * about which task was the cause of this latency.
1017  */
1018 void
1019 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1020 {
1021         struct ring_buffer *buf;
1022
1023         if (tr->stop_count)
1024                 return;
1025
1026         WARN_ON_ONCE(!irqs_disabled());
1027
1028         if (!tr->allocated_snapshot) {
1029                 /* Only the nop tracer should hit this when disabling */
1030                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1031                 return;
1032         }
1033
1034         arch_spin_lock(&tr->max_lock);
1035
1036         buf = tr->trace_buffer.buffer;
1037         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1038         tr->max_buffer.buffer = buf;
1039
1040         __update_max_tr(tr, tsk, cpu);
1041         arch_spin_unlock(&tr->max_lock);
1042 }
1043
1044 /**
1045  * update_max_tr_single - only copy one trace over, and reset the rest
1046  * @tr - tracer
1047  * @tsk - task with the latency
1048  * @cpu - the cpu of the buffer to copy.
1049  *
1050  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1051  */
1052 void
1053 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1054 {
1055         int ret;
1056
1057         if (tr->stop_count)
1058                 return;
1059
1060         WARN_ON_ONCE(!irqs_disabled());
1061         if (!tr->allocated_snapshot) {
1062                 /* Only the nop tracer should hit this when disabling */
1063                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1064                 return;
1065         }
1066
1067         arch_spin_lock(&tr->max_lock);
1068
1069         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1070
1071         if (ret == -EBUSY) {
1072                 /*
1073                  * We failed to swap the buffer due to a commit taking
1074                  * place on this CPU. We fail to record, but we reset
1075                  * the max trace buffer (no one writes directly to it)
1076                  * and flag that it failed.
1077                  */
1078                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1079                         "Failed to swap buffers due to commit in progress\n");
1080         }
1081
1082         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1083
1084         __update_max_tr(tr, tsk, cpu);
1085         arch_spin_unlock(&tr->max_lock);
1086 }
1087 #endif /* CONFIG_TRACER_MAX_TRACE */
1088
1089 static int wait_on_pipe(struct trace_iterator *iter)
1090 {
1091         /* Iterators are static, they should be filled or empty */
1092         if (trace_buffer_iter(iter, iter->cpu_file))
1093                 return 0;
1094
1095         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file);
1096 }
1097
1098 #ifdef CONFIG_FTRACE_STARTUP_TEST
1099 static int run_tracer_selftest(struct tracer *type)
1100 {
1101         struct trace_array *tr = &global_trace;
1102         struct tracer *saved_tracer = tr->current_trace;
1103         int ret;
1104
1105         if (!type->selftest || tracing_selftest_disabled)
1106                 return 0;
1107
1108         /*
1109          * Run a selftest on this tracer.
1110          * Here we reset the trace buffer, and set the current
1111          * tracer to be this tracer. The tracer can then run some
1112          * internal tracing to verify that everything is in order.
1113          * If we fail, we do not register this tracer.
1114          */
1115         tracing_reset_online_cpus(&tr->trace_buffer);
1116
1117         tr->current_trace = type;
1118
1119 #ifdef CONFIG_TRACER_MAX_TRACE
1120         if (type->use_max_tr) {
1121                 /* If we expanded the buffers, make sure the max is expanded too */
1122                 if (ring_buffer_expanded)
1123                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1124                                            RING_BUFFER_ALL_CPUS);
1125                 tr->allocated_snapshot = true;
1126         }
1127 #endif
1128
1129         /* the test is responsible for initializing and enabling */
1130         pr_info("Testing tracer %s: ", type->name);
1131         ret = type->selftest(type, tr);
1132         /* the test is responsible for resetting too */
1133         tr->current_trace = saved_tracer;
1134         if (ret) {
1135                 printk(KERN_CONT "FAILED!\n");
1136                 /* Add the warning after printing 'FAILED' */
1137                 WARN_ON(1);
1138                 return -1;
1139         }
1140         /* Only reset on passing, to avoid touching corrupted buffers */
1141         tracing_reset_online_cpus(&tr->trace_buffer);
1142
1143 #ifdef CONFIG_TRACER_MAX_TRACE
1144         if (type->use_max_tr) {
1145                 tr->allocated_snapshot = false;
1146
1147                 /* Shrink the max buffer again */
1148                 if (ring_buffer_expanded)
1149                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1150                                            RING_BUFFER_ALL_CPUS);
1151         }
1152 #endif
1153
1154         printk(KERN_CONT "PASSED\n");
1155         return 0;
1156 }
1157 #else
1158 static inline int run_tracer_selftest(struct tracer *type)
1159 {
1160         return 0;
1161 }
1162 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1163
1164 /**
1165  * register_tracer - register a tracer with the ftrace system.
1166  * @type - the plugin for the tracer
1167  *
1168  * Register a new plugin tracer.
1169  */
1170 int register_tracer(struct tracer *type)
1171 {
1172         struct tracer *t;
1173         int ret = 0;
1174
1175         if (!type->name) {
1176                 pr_info("Tracer must have a name\n");
1177                 return -1;
1178         }
1179
1180         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1181                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1182                 return -1;
1183         }
1184
1185         mutex_lock(&trace_types_lock);
1186
1187         tracing_selftest_running = true;
1188
1189         for (t = trace_types; t; t = t->next) {
1190                 if (strcmp(type->name, t->name) == 0) {
1191                         /* already found */
1192                         pr_info("Tracer %s already registered\n",
1193                                 type->name);
1194                         ret = -1;
1195                         goto out;
1196                 }
1197         }
1198
1199         if (!type->set_flag)
1200                 type->set_flag = &dummy_set_flag;
1201         if (!type->flags)
1202                 type->flags = &dummy_tracer_flags;
1203         else
1204                 if (!type->flags->opts)
1205                         type->flags->opts = dummy_tracer_opt;
1206
1207         ret = run_tracer_selftest(type);
1208         if (ret < 0)
1209                 goto out;
1210
1211         type->next = trace_types;
1212         trace_types = type;
1213
1214  out:
1215         tracing_selftest_running = false;
1216         mutex_unlock(&trace_types_lock);
1217
1218         if (ret || !default_bootup_tracer)
1219                 goto out_unlock;
1220
1221         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1222                 goto out_unlock;
1223
1224         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1225         /* Do we want this tracer to start on bootup? */
1226         tracing_set_tracer(&global_trace, type->name);
1227         default_bootup_tracer = NULL;
1228         /* disable other selftests, since this will break it. */
1229         tracing_selftest_disabled = true;
1230 #ifdef CONFIG_FTRACE_STARTUP_TEST
1231         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1232                type->name);
1233 #endif
1234
1235  out_unlock:
1236         return ret;
1237 }
1238
1239 void tracing_reset(struct trace_buffer *buf, int cpu)
1240 {
1241         struct ring_buffer *buffer = buf->buffer;
1242
1243         if (!buffer)
1244                 return;
1245
1246         ring_buffer_record_disable(buffer);
1247
1248         /* Make sure all commits have finished */
1249         synchronize_sched();
1250         ring_buffer_reset_cpu(buffer, cpu);
1251
1252         ring_buffer_record_enable(buffer);
1253 }
1254
1255 void tracing_reset_online_cpus(struct trace_buffer *buf)
1256 {
1257         struct ring_buffer *buffer = buf->buffer;
1258         int cpu;
1259
1260         if (!buffer)
1261                 return;
1262
1263         ring_buffer_record_disable(buffer);
1264
1265         /* Make sure all commits have finished */
1266         synchronize_sched();
1267
1268         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1269
1270         for_each_online_cpu(cpu)
1271                 ring_buffer_reset_cpu(buffer, cpu);
1272
1273         ring_buffer_record_enable(buffer);
1274 }
1275
1276 /* Must have trace_types_lock held */
1277 void tracing_reset_all_online_cpus(void)
1278 {
1279         struct trace_array *tr;
1280
1281         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1282                 tracing_reset_online_cpus(&tr->trace_buffer);
1283 #ifdef CONFIG_TRACER_MAX_TRACE
1284                 tracing_reset_online_cpus(&tr->max_buffer);
1285 #endif
1286         }
1287 }
1288
1289 #define SAVED_CMDLINES_DEFAULT 128
1290 #define NO_CMDLINE_MAP UINT_MAX
1291 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1292 struct saved_cmdlines_buffer {
1293         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1294         unsigned *map_cmdline_to_pid;
1295         unsigned cmdline_num;
1296         int cmdline_idx;
1297         char *saved_cmdlines;
1298 };
1299 static struct saved_cmdlines_buffer *savedcmd;
1300
1301 /* temporary disable recording */
1302 static atomic_t trace_record_cmdline_disabled __read_mostly;
1303
1304 static inline char *get_saved_cmdlines(int idx)
1305 {
1306         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1307 }
1308
1309 static inline void set_cmdline(int idx, const char *cmdline)
1310 {
1311         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1312 }
1313
1314 static int allocate_cmdlines_buffer(unsigned int val,
1315                                     struct saved_cmdlines_buffer *s)
1316 {
1317         s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1318                                         GFP_KERNEL);
1319         if (!s->map_cmdline_to_pid)
1320                 return -ENOMEM;
1321
1322         s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1323         if (!s->saved_cmdlines) {
1324                 kfree(s->map_cmdline_to_pid);
1325                 return -ENOMEM;
1326         }
1327
1328         s->cmdline_idx = 0;
1329         s->cmdline_num = val;
1330         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1331                sizeof(s->map_pid_to_cmdline));
1332         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1333                val * sizeof(*s->map_cmdline_to_pid));
1334
1335         return 0;
1336 }
1337
1338 static int trace_create_savedcmd(void)
1339 {
1340         int ret;
1341
1342         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1343         if (!savedcmd)
1344                 return -ENOMEM;
1345
1346         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1347         if (ret < 0) {
1348                 kfree(savedcmd);
1349                 savedcmd = NULL;
1350                 return -ENOMEM;
1351         }
1352
1353         return 0;
1354 }
1355
1356 int is_tracing_stopped(void)
1357 {
1358         return global_trace.stop_count;
1359 }
1360
1361 /**
1362  * tracing_start - quick start of the tracer
1363  *
1364  * If tracing is enabled but was stopped by tracing_stop,
1365  * this will start the tracer back up.
1366  */
1367 void tracing_start(void)
1368 {
1369         struct ring_buffer *buffer;
1370         unsigned long flags;
1371
1372         if (tracing_disabled)
1373                 return;
1374
1375         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1376         if (--global_trace.stop_count) {
1377                 if (global_trace.stop_count < 0) {
1378                         /* Someone screwed up their debugging */
1379                         WARN_ON_ONCE(1);
1380                         global_trace.stop_count = 0;
1381                 }
1382                 goto out;
1383         }
1384
1385         /* Prevent the buffers from switching */
1386         arch_spin_lock(&global_trace.max_lock);
1387
1388         buffer = global_trace.trace_buffer.buffer;
1389         if (buffer)
1390                 ring_buffer_record_enable(buffer);
1391
1392 #ifdef CONFIG_TRACER_MAX_TRACE
1393         buffer = global_trace.max_buffer.buffer;
1394         if (buffer)
1395                 ring_buffer_record_enable(buffer);
1396 #endif
1397
1398         arch_spin_unlock(&global_trace.max_lock);
1399
1400  out:
1401         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1402 }
1403
1404 static void tracing_start_tr(struct trace_array *tr)
1405 {
1406         struct ring_buffer *buffer;
1407         unsigned long flags;
1408
1409         if (tracing_disabled)
1410                 return;
1411
1412         /* If global, we need to also start the max tracer */
1413         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1414                 return tracing_start();
1415
1416         raw_spin_lock_irqsave(&tr->start_lock, flags);
1417
1418         if (--tr->stop_count) {
1419                 if (tr->stop_count < 0) {
1420                         /* Someone screwed up their debugging */
1421                         WARN_ON_ONCE(1);
1422                         tr->stop_count = 0;
1423                 }
1424                 goto out;
1425         }
1426
1427         buffer = tr->trace_buffer.buffer;
1428         if (buffer)
1429                 ring_buffer_record_enable(buffer);
1430
1431  out:
1432         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1433 }
1434
1435 /**
1436  * tracing_stop - quick stop of the tracer
1437  *
1438  * Light weight way to stop tracing. Use in conjunction with
1439  * tracing_start.
1440  */
1441 void tracing_stop(void)
1442 {
1443         struct ring_buffer *buffer;
1444         unsigned long flags;
1445
1446         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1447         if (global_trace.stop_count++)
1448                 goto out;
1449
1450         /* Prevent the buffers from switching */
1451         arch_spin_lock(&global_trace.max_lock);
1452
1453         buffer = global_trace.trace_buffer.buffer;
1454         if (buffer)
1455                 ring_buffer_record_disable(buffer);
1456
1457 #ifdef CONFIG_TRACER_MAX_TRACE
1458         buffer = global_trace.max_buffer.buffer;
1459         if (buffer)
1460                 ring_buffer_record_disable(buffer);
1461 #endif
1462
1463         arch_spin_unlock(&global_trace.max_lock);
1464
1465  out:
1466         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1467 }
1468
1469 static void tracing_stop_tr(struct trace_array *tr)
1470 {
1471         struct ring_buffer *buffer;
1472         unsigned long flags;
1473
1474         /* If global, we need to also stop the max tracer */
1475         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1476                 return tracing_stop();
1477
1478         raw_spin_lock_irqsave(&tr->start_lock, flags);
1479         if (tr->stop_count++)
1480                 goto out;
1481
1482         buffer = tr->trace_buffer.buffer;
1483         if (buffer)
1484                 ring_buffer_record_disable(buffer);
1485
1486  out:
1487         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1488 }
1489
1490 void trace_stop_cmdline_recording(void);
1491
1492 static int trace_save_cmdline(struct task_struct *tsk)
1493 {
1494         unsigned pid, idx;
1495
1496         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1497                 return 0;
1498
1499         /*
1500          * It's not the end of the world if we don't get
1501          * the lock, but we also don't want to spin
1502          * nor do we want to disable interrupts,
1503          * so if we miss here, then better luck next time.
1504          */
1505         if (!arch_spin_trylock(&trace_cmdline_lock))
1506                 return 0;
1507
1508         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1509         if (idx == NO_CMDLINE_MAP) {
1510                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1511
1512                 /*
1513                  * Check whether the cmdline buffer at idx has a pid
1514                  * mapped. We are going to overwrite that entry so we
1515                  * need to clear the map_pid_to_cmdline. Otherwise we
1516                  * would read the new comm for the old pid.
1517                  */
1518                 pid = savedcmd->map_cmdline_to_pid[idx];
1519                 if (pid != NO_CMDLINE_MAP)
1520                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1521
1522                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1523                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1524
1525                 savedcmd->cmdline_idx = idx;
1526         }
1527
1528         set_cmdline(idx, tsk->comm);
1529
1530         arch_spin_unlock(&trace_cmdline_lock);
1531
1532         return 1;
1533 }
1534
1535 static void __trace_find_cmdline(int pid, char comm[])
1536 {
1537         unsigned map;
1538
1539         if (!pid) {
1540                 strcpy(comm, "<idle>");
1541                 return;
1542         }
1543
1544         if (WARN_ON_ONCE(pid < 0)) {
1545                 strcpy(comm, "<XXX>");
1546                 return;
1547         }
1548
1549         if (pid > PID_MAX_DEFAULT) {
1550                 strcpy(comm, "<...>");
1551                 return;
1552         }
1553
1554         map = savedcmd->map_pid_to_cmdline[pid];
1555         if (map != NO_CMDLINE_MAP)
1556                 strcpy(comm, get_saved_cmdlines(map));
1557         else
1558                 strcpy(comm, "<...>");
1559 }
1560
1561 void trace_find_cmdline(int pid, char comm[])
1562 {
1563         preempt_disable();
1564         arch_spin_lock(&trace_cmdline_lock);
1565
1566         __trace_find_cmdline(pid, comm);
1567
1568         arch_spin_unlock(&trace_cmdline_lock);
1569         preempt_enable();
1570 }
1571
1572 void tracing_record_cmdline(struct task_struct *tsk)
1573 {
1574         if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1575                 return;
1576
1577         if (!__this_cpu_read(trace_cmdline_save))
1578                 return;
1579
1580         if (trace_save_cmdline(tsk))
1581                 __this_cpu_write(trace_cmdline_save, false);
1582 }
1583
1584 void
1585 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1586                              int pc)
1587 {
1588         struct task_struct *tsk = current;
1589
1590         entry->preempt_count            = pc & 0xff;
1591         entry->pid                      = (tsk) ? tsk->pid : 0;
1592         entry->flags =
1593 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1594                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1595 #else
1596                 TRACE_FLAG_IRQS_NOSUPPORT |
1597 #endif
1598                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1599                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1600                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1601                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1602 }
1603 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1604
1605 struct ring_buffer_event *
1606 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1607                           int type,
1608                           unsigned long len,
1609                           unsigned long flags, int pc)
1610 {
1611         struct ring_buffer_event *event;
1612
1613         event = ring_buffer_lock_reserve(buffer, len);
1614         if (event != NULL) {
1615                 struct trace_entry *ent = ring_buffer_event_data(event);
1616
1617                 tracing_generic_entry_update(ent, flags, pc);
1618                 ent->type = type;
1619         }
1620
1621         return event;
1622 }
1623
1624 void
1625 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
1626 {
1627         __this_cpu_write(trace_cmdline_save, true);
1628         ring_buffer_unlock_commit(buffer, event);
1629 }
1630
1631 static inline void
1632 __trace_buffer_unlock_commit(struct ring_buffer *buffer,
1633                              struct ring_buffer_event *event,
1634                              unsigned long flags, int pc)
1635 {
1636         __buffer_unlock_commit(buffer, event);
1637
1638         ftrace_trace_stack(buffer, flags, 6, pc);
1639         ftrace_trace_userstack(buffer, flags, pc);
1640 }
1641
1642 void trace_buffer_unlock_commit(struct ring_buffer *buffer,
1643                                 struct ring_buffer_event *event,
1644                                 unsigned long flags, int pc)
1645 {
1646         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1647 }
1648 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
1649
1650 static struct ring_buffer *temp_buffer;
1651
1652 struct ring_buffer_event *
1653 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
1654                           struct ftrace_event_file *ftrace_file,
1655                           int type, unsigned long len,
1656                           unsigned long flags, int pc)
1657 {
1658         struct ring_buffer_event *entry;
1659
1660         *current_rb = ftrace_file->tr->trace_buffer.buffer;
1661         entry = trace_buffer_lock_reserve(*current_rb,
1662                                          type, len, flags, pc);
1663         /*
1664          * If tracing is off, but we have triggers enabled
1665          * we still need to look at the event data. Use the temp_buffer
1666          * to store the trace event for the tigger to use. It's recusive
1667          * safe and will not be recorded anywhere.
1668          */
1669         if (!entry && ftrace_file->flags & FTRACE_EVENT_FL_TRIGGER_COND) {
1670                 *current_rb = temp_buffer;
1671                 entry = trace_buffer_lock_reserve(*current_rb,
1672                                                   type, len, flags, pc);
1673         }
1674         return entry;
1675 }
1676 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
1677
1678 struct ring_buffer_event *
1679 trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
1680                                   int type, unsigned long len,
1681                                   unsigned long flags, int pc)
1682 {
1683         *current_rb = global_trace.trace_buffer.buffer;
1684         return trace_buffer_lock_reserve(*current_rb,
1685                                          type, len, flags, pc);
1686 }
1687 EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
1688
1689 void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
1690                                         struct ring_buffer_event *event,
1691                                         unsigned long flags, int pc)
1692 {
1693         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1694 }
1695 EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
1696
1697 void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer,
1698                                      struct ring_buffer_event *event,
1699                                      unsigned long flags, int pc,
1700                                      struct pt_regs *regs)
1701 {
1702         __buffer_unlock_commit(buffer, event);
1703
1704         ftrace_trace_stack_regs(buffer, flags, 0, pc, regs);
1705         ftrace_trace_userstack(buffer, flags, pc);
1706 }
1707 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
1708
1709 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
1710                                          struct ring_buffer_event *event)
1711 {
1712         ring_buffer_discard_commit(buffer, event);
1713 }
1714 EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
1715
1716 void
1717 trace_function(struct trace_array *tr,
1718                unsigned long ip, unsigned long parent_ip, unsigned long flags,
1719                int pc)
1720 {
1721         struct ftrace_event_call *call = &event_function;
1722         struct ring_buffer *buffer = tr->trace_buffer.buffer;
1723         struct ring_buffer_event *event;
1724         struct ftrace_entry *entry;
1725
1726         /* If we are reading the ring buffer, don't trace */
1727         if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
1728                 return;
1729
1730         event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
1731                                           flags, pc);
1732         if (!event)
1733                 return;
1734         entry   = ring_buffer_event_data(event);
1735         entry->ip                       = ip;
1736         entry->parent_ip                = parent_ip;
1737
1738         if (!call_filter_check_discard(call, entry, buffer, event))
1739                 __buffer_unlock_commit(buffer, event);
1740 }
1741
1742 #ifdef CONFIG_STACKTRACE
1743
1744 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
1745 struct ftrace_stack {
1746         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
1747 };
1748
1749 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
1750 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
1751
1752 static void __ftrace_trace_stack(struct ring_buffer *buffer,
1753                                  unsigned long flags,
1754                                  int skip, int pc, struct pt_regs *regs)
1755 {
1756         struct ftrace_event_call *call = &event_kernel_stack;
1757         struct ring_buffer_event *event;
1758         struct stack_entry *entry;
1759         struct stack_trace trace;
1760         int use_stack;
1761         int size = FTRACE_STACK_ENTRIES;
1762
1763         trace.nr_entries        = 0;
1764         trace.skip              = skip;
1765
1766         /*
1767          * Since events can happen in NMIs there's no safe way to
1768          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
1769          * or NMI comes in, it will just have to use the default
1770          * FTRACE_STACK_SIZE.
1771          */
1772         preempt_disable_notrace();
1773
1774         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
1775         /*
1776          * We don't need any atomic variables, just a barrier.
1777          * If an interrupt comes in, we don't care, because it would
1778          * have exited and put the counter back to what we want.
1779          * We just need a barrier to keep gcc from moving things
1780          * around.
1781          */
1782         barrier();
1783         if (use_stack == 1) {
1784                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
1785                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
1786
1787                 if (regs)
1788                         save_stack_trace_regs(regs, &trace);
1789                 else
1790                         save_stack_trace(&trace);
1791
1792                 if (trace.nr_entries > size)
1793                         size = trace.nr_entries;
1794         } else
1795                 /* From now on, use_stack is a boolean */
1796                 use_stack = 0;
1797
1798         size *= sizeof(unsigned long);
1799
1800         event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1801                                           sizeof(*entry) + size, flags, pc);
1802         if (!event)
1803                 goto out;
1804         entry = ring_buffer_event_data(event);
1805
1806         memset(&entry->caller, 0, size);
1807
1808         if (use_stack)
1809                 memcpy(&entry->caller, trace.entries,
1810                        trace.nr_entries * sizeof(unsigned long));
1811         else {
1812                 trace.max_entries       = FTRACE_STACK_ENTRIES;
1813                 trace.entries           = entry->caller;
1814                 if (regs)
1815                         save_stack_trace_regs(regs, &trace);
1816                 else
1817                         save_stack_trace(&trace);
1818         }
1819
1820         entry->size = trace.nr_entries;
1821
1822         if (!call_filter_check_discard(call, entry, buffer, event))
1823                 __buffer_unlock_commit(buffer, event);
1824
1825  out:
1826         /* Again, don't let gcc optimize things here */
1827         barrier();
1828         __this_cpu_dec(ftrace_stack_reserve);
1829         preempt_enable_notrace();
1830
1831 }
1832
1833 void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags,
1834                              int skip, int pc, struct pt_regs *regs)
1835 {
1836         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1837                 return;
1838
1839         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
1840 }
1841
1842 void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
1843                         int skip, int pc)
1844 {
1845         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1846                 return;
1847
1848         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
1849 }
1850
1851 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1852                    int pc)
1853 {
1854         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
1855 }
1856
1857 /**
1858  * trace_dump_stack - record a stack back trace in the trace buffer
1859  * @skip: Number of functions to skip (helper handlers)
1860  */
1861 void trace_dump_stack(int skip)
1862 {
1863         unsigned long flags;
1864
1865         if (tracing_disabled || tracing_selftest_running)
1866                 return;
1867
1868         local_save_flags(flags);
1869
1870         /*
1871          * Skip 3 more, seems to get us at the caller of
1872          * this function.
1873          */
1874         skip += 3;
1875         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
1876                              flags, skip, preempt_count(), NULL);
1877 }
1878
1879 static DEFINE_PER_CPU(int, user_stack_count);
1880
1881 void
1882 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1883 {
1884         struct ftrace_event_call *call = &event_user_stack;
1885         struct ring_buffer_event *event;
1886         struct userstack_entry *entry;
1887         struct stack_trace trace;
1888
1889         if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1890                 return;
1891
1892         /*
1893          * NMIs can not handle page faults, even with fix ups.
1894          * The save user stack can (and often does) fault.
1895          */
1896         if (unlikely(in_nmi()))
1897                 return;
1898
1899         /*
1900          * prevent recursion, since the user stack tracing may
1901          * trigger other kernel events.
1902          */
1903         preempt_disable();
1904         if (__this_cpu_read(user_stack_count))
1905                 goto out;
1906
1907         __this_cpu_inc(user_stack_count);
1908
1909         event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1910                                           sizeof(*entry), flags, pc);
1911         if (!event)
1912                 goto out_drop_count;
1913         entry   = ring_buffer_event_data(event);
1914
1915         entry->tgid             = current->tgid;
1916         memset(&entry->caller, 0, sizeof(entry->caller));
1917
1918         trace.nr_entries        = 0;
1919         trace.max_entries       = FTRACE_STACK_ENTRIES;
1920         trace.skip              = 0;
1921         trace.entries           = entry->caller;
1922
1923         save_stack_trace_user(&trace);
1924         if (!call_filter_check_discard(call, entry, buffer, event))
1925                 __buffer_unlock_commit(buffer, event);
1926
1927  out_drop_count:
1928         __this_cpu_dec(user_stack_count);
1929  out:
1930         preempt_enable();
1931 }
1932
1933 #ifdef UNUSED
1934 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1935 {
1936         ftrace_trace_userstack(tr, flags, preempt_count());
1937 }
1938 #endif /* UNUSED */
1939
1940 #endif /* CONFIG_STACKTRACE */
1941
1942 /* created for use with alloc_percpu */
1943 struct trace_buffer_struct {
1944         char buffer[TRACE_BUF_SIZE];
1945 };
1946
1947 static struct trace_buffer_struct *trace_percpu_buffer;
1948 static struct trace_buffer_struct *trace_percpu_sirq_buffer;
1949 static struct trace_buffer_struct *trace_percpu_irq_buffer;
1950 static struct trace_buffer_struct *trace_percpu_nmi_buffer;
1951
1952 /*
1953  * The buffer used is dependent on the context. There is a per cpu
1954  * buffer for normal context, softirq contex, hard irq context and
1955  * for NMI context. Thise allows for lockless recording.
1956  *
1957  * Note, if the buffers failed to be allocated, then this returns NULL
1958  */
1959 static char *get_trace_buf(void)
1960 {
1961         struct trace_buffer_struct *percpu_buffer;
1962
1963         /*
1964          * If we have allocated per cpu buffers, then we do not
1965          * need to do any locking.
1966          */
1967         if (in_nmi())
1968                 percpu_buffer = trace_percpu_nmi_buffer;
1969         else if (in_irq())
1970                 percpu_buffer = trace_percpu_irq_buffer;
1971         else if (in_softirq())
1972                 percpu_buffer = trace_percpu_sirq_buffer;
1973         else
1974                 percpu_buffer = trace_percpu_buffer;
1975
1976         if (!percpu_buffer)
1977                 return NULL;
1978
1979         return this_cpu_ptr(&percpu_buffer->buffer[0]);
1980 }
1981
1982 static int alloc_percpu_trace_buffer(void)
1983 {
1984         struct trace_buffer_struct *buffers;
1985         struct trace_buffer_struct *sirq_buffers;
1986         struct trace_buffer_struct *irq_buffers;
1987         struct trace_buffer_struct *nmi_buffers;
1988
1989         buffers = alloc_percpu(struct trace_buffer_struct);
1990         if (!buffers)
1991                 goto err_warn;
1992
1993         sirq_buffers = alloc_percpu(struct trace_buffer_struct);
1994         if (!sirq_buffers)
1995                 goto err_sirq;
1996
1997         irq_buffers = alloc_percpu(struct trace_buffer_struct);
1998         if (!irq_buffers)
1999                 goto err_irq;
2000
2001         nmi_buffers = alloc_percpu(struct trace_buffer_struct);
2002         if (!nmi_buffers)
2003                 goto err_nmi;
2004
2005         trace_percpu_buffer = buffers;
2006         trace_percpu_sirq_buffer = sirq_buffers;
2007         trace_percpu_irq_buffer = irq_buffers;
2008         trace_percpu_nmi_buffer = nmi_buffers;
2009
2010         return 0;
2011
2012  err_nmi:
2013         free_percpu(irq_buffers);
2014  err_irq:
2015         free_percpu(sirq_buffers);
2016  err_sirq:
2017         free_percpu(buffers);
2018  err_warn:
2019         WARN(1, "Could not allocate percpu trace_printk buffer");
2020         return -ENOMEM;
2021 }
2022
2023 static int buffers_allocated;
2024
2025 void trace_printk_init_buffers(void)
2026 {
2027         if (buffers_allocated)
2028                 return;
2029
2030         if (alloc_percpu_trace_buffer())
2031                 return;
2032
2033         /* trace_printk() is for debug use only. Don't use it in production. */
2034
2035         pr_warning("\n**********************************************************\n");
2036         pr_warning("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2037         pr_warning("**                                                      **\n");
2038         pr_warning("** trace_printk() being used. Allocating extra memory.  **\n");
2039         pr_warning("**                                                      **\n");
2040         pr_warning("** This means that this is a DEBUG kernel and it is     **\n");
2041         pr_warning("** unsafe for produciton use.                           **\n");
2042         pr_warning("**                                                      **\n");
2043         pr_warning("** If you see this message and you are not debugging    **\n");
2044         pr_warning("** the kernel, report this immediately to your vendor!  **\n");
2045         pr_warning("**                                                      **\n");
2046         pr_warning("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2047         pr_warning("**********************************************************\n");
2048
2049         /* Expand the buffers to set size */
2050         tracing_update_buffers();
2051
2052         buffers_allocated = 1;
2053
2054         /*
2055          * trace_printk_init_buffers() can be called by modules.
2056          * If that happens, then we need to start cmdline recording
2057          * directly here. If the global_trace.buffer is already
2058          * allocated here, then this was called by module code.
2059          */
2060         if (global_trace.trace_buffer.buffer)
2061                 tracing_start_cmdline_record();
2062 }
2063
2064 void trace_printk_start_comm(void)
2065 {
2066         /* Start tracing comms if trace printk is set */
2067         if (!buffers_allocated)
2068                 return;
2069         tracing_start_cmdline_record();
2070 }
2071
2072 static void trace_printk_start_stop_comm(int enabled)
2073 {
2074         if (!buffers_allocated)
2075                 return;
2076
2077         if (enabled)
2078                 tracing_start_cmdline_record();
2079         else
2080                 tracing_stop_cmdline_record();
2081 }
2082
2083 /**
2084  * trace_vbprintk - write binary msg to tracing buffer
2085  *
2086  */
2087 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2088 {
2089         struct ftrace_event_call *call = &event_bprint;
2090         struct ring_buffer_event *event;
2091         struct ring_buffer *buffer;
2092         struct trace_array *tr = &global_trace;
2093         struct bprint_entry *entry;
2094         unsigned long flags;
2095         char *tbuffer;
2096         int len = 0, size, pc;
2097
2098         if (unlikely(tracing_selftest_running || tracing_disabled))
2099                 return 0;
2100
2101         /* Don't pollute graph traces with trace_vprintk internals */
2102         pause_graph_tracing();
2103
2104         pc = preempt_count();
2105         preempt_disable_notrace();
2106
2107         tbuffer = get_trace_buf();
2108         if (!tbuffer) {
2109                 len = 0;
2110                 goto out;
2111         }
2112
2113         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2114
2115         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2116                 goto out;
2117
2118         local_save_flags(flags);
2119         size = sizeof(*entry) + sizeof(u32) * len;
2120         buffer = tr->trace_buffer.buffer;
2121         event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2122                                           flags, pc);
2123         if (!event)
2124                 goto out;
2125         entry = ring_buffer_event_data(event);
2126         entry->ip                       = ip;
2127         entry->fmt                      = fmt;
2128
2129         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2130         if (!call_filter_check_discard(call, entry, buffer, event)) {
2131                 __buffer_unlock_commit(buffer, event);
2132                 ftrace_trace_stack(buffer, flags, 6, pc);
2133         }
2134
2135 out:
2136         preempt_enable_notrace();
2137         unpause_graph_tracing();
2138
2139         return len;
2140 }
2141 EXPORT_SYMBOL_GPL(trace_vbprintk);
2142
2143 static int
2144 __trace_array_vprintk(struct ring_buffer *buffer,
2145                       unsigned long ip, const char *fmt, va_list args)
2146 {
2147         struct ftrace_event_call *call = &event_print;
2148         struct ring_buffer_event *event;
2149         int len = 0, size, pc;
2150         struct print_entry *entry;
2151         unsigned long flags;
2152         char *tbuffer;
2153
2154         if (tracing_disabled || tracing_selftest_running)
2155                 return 0;
2156
2157         /* Don't pollute graph traces with trace_vprintk internals */
2158         pause_graph_tracing();
2159
2160         pc = preempt_count();
2161         preempt_disable_notrace();
2162
2163
2164         tbuffer = get_trace_buf();
2165         if (!tbuffer) {
2166                 len = 0;
2167                 goto out;
2168         }
2169
2170         len = vsnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2171         if (len > TRACE_BUF_SIZE)
2172                 goto out;
2173
2174         local_save_flags(flags);
2175         size = sizeof(*entry) + len + 1;
2176         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2177                                           flags, pc);
2178         if (!event)
2179                 goto out;
2180         entry = ring_buffer_event_data(event);
2181         entry->ip = ip;
2182
2183         memcpy(&entry->buf, tbuffer, len);
2184         entry->buf[len] = '\0';
2185         if (!call_filter_check_discard(call, entry, buffer, event)) {
2186                 __buffer_unlock_commit(buffer, event);
2187                 ftrace_trace_stack(buffer, flags, 6, pc);
2188         }
2189  out:
2190         preempt_enable_notrace();
2191         unpause_graph_tracing();
2192
2193         return len;
2194 }
2195
2196 int trace_array_vprintk(struct trace_array *tr,
2197                         unsigned long ip, const char *fmt, va_list args)
2198 {
2199         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2200 }
2201
2202 int trace_array_printk(struct trace_array *tr,
2203                        unsigned long ip, const char *fmt, ...)
2204 {
2205         int ret;
2206         va_list ap;
2207
2208         if (!(trace_flags & TRACE_ITER_PRINTK))
2209                 return 0;
2210
2211         va_start(ap, fmt);
2212         ret = trace_array_vprintk(tr, ip, fmt, ap);
2213         va_end(ap);
2214         return ret;
2215 }
2216
2217 int trace_array_printk_buf(struct ring_buffer *buffer,
2218                            unsigned long ip, const char *fmt, ...)
2219 {
2220         int ret;
2221         va_list ap;
2222
2223         if (!(trace_flags & TRACE_ITER_PRINTK))
2224                 return 0;
2225
2226         va_start(ap, fmt);
2227         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2228         va_end(ap);
2229         return ret;
2230 }
2231
2232 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2233 {
2234         return trace_array_vprintk(&global_trace, ip, fmt, args);
2235 }
2236 EXPORT_SYMBOL_GPL(trace_vprintk);
2237
2238 static void trace_iterator_increment(struct trace_iterator *iter)
2239 {
2240         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2241
2242         iter->idx++;
2243         if (buf_iter)
2244                 ring_buffer_read(buf_iter, NULL);
2245 }
2246
2247 static struct trace_entry *
2248 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2249                 unsigned long *lost_events)
2250 {
2251         struct ring_buffer_event *event;
2252         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2253
2254         if (buf_iter)
2255                 event = ring_buffer_iter_peek(buf_iter, ts);
2256         else
2257                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2258                                          lost_events);
2259
2260         if (event) {
2261                 iter->ent_size = ring_buffer_event_length(event);
2262                 return ring_buffer_event_data(event);
2263         }
2264         iter->ent_size = 0;
2265         return NULL;
2266 }
2267
2268 static struct trace_entry *
2269 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2270                   unsigned long *missing_events, u64 *ent_ts)
2271 {
2272         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2273         struct trace_entry *ent, *next = NULL;
2274         unsigned long lost_events = 0, next_lost = 0;
2275         int cpu_file = iter->cpu_file;
2276         u64 next_ts = 0, ts;
2277         int next_cpu = -1;
2278         int next_size = 0;
2279         int cpu;
2280
2281         /*
2282          * If we are in a per_cpu trace file, don't bother by iterating over
2283          * all cpu and peek directly.
2284          */
2285         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2286                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2287                         return NULL;
2288                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2289                 if (ent_cpu)
2290                         *ent_cpu = cpu_file;
2291
2292                 return ent;
2293         }
2294
2295         for_each_tracing_cpu(cpu) {
2296
2297                 if (ring_buffer_empty_cpu(buffer, cpu))
2298                         continue;
2299
2300                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2301
2302                 /*
2303                  * Pick the entry with the smallest timestamp:
2304                  */
2305                 if (ent && (!next || ts < next_ts)) {
2306                         next = ent;
2307                         next_cpu = cpu;
2308                         next_ts = ts;
2309                         next_lost = lost_events;
2310                         next_size = iter->ent_size;
2311                 }
2312         }
2313
2314         iter->ent_size = next_size;
2315
2316         if (ent_cpu)
2317                 *ent_cpu = next_cpu;
2318
2319         if (ent_ts)
2320                 *ent_ts = next_ts;
2321
2322         if (missing_events)
2323                 *missing_events = next_lost;
2324
2325         return next;
2326 }
2327
2328 /* Find the next real entry, without updating the iterator itself */
2329 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2330                                           int *ent_cpu, u64 *ent_ts)
2331 {
2332         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2333 }
2334
2335 /* Find the next real entry, and increment the iterator to the next entry */
2336 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2337 {
2338         iter->ent = __find_next_entry(iter, &iter->cpu,
2339                                       &iter->lost_events, &iter->ts);
2340
2341         if (iter->ent)
2342                 trace_iterator_increment(iter);
2343
2344         return iter->ent ? iter : NULL;
2345 }
2346
2347 static void trace_consume(struct trace_iterator *iter)
2348 {
2349         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2350                             &iter->lost_events);
2351 }
2352
2353 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2354 {
2355         struct trace_iterator *iter = m->private;
2356         int i = (int)*pos;
2357         void *ent;
2358
2359         WARN_ON_ONCE(iter->leftover);
2360
2361         (*pos)++;
2362
2363         /* can't go backwards */
2364         if (iter->idx > i)
2365                 return NULL;
2366
2367         if (iter->idx < 0)
2368                 ent = trace_find_next_entry_inc(iter);
2369         else
2370                 ent = iter;
2371
2372         while (ent && iter->idx < i)
2373                 ent = trace_find_next_entry_inc(iter);
2374
2375         iter->pos = *pos;
2376
2377         return ent;
2378 }
2379
2380 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2381 {
2382         struct ring_buffer_event *event;
2383         struct ring_buffer_iter *buf_iter;
2384         unsigned long entries = 0;
2385         u64 ts;
2386
2387         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2388
2389         buf_iter = trace_buffer_iter(iter, cpu);
2390         if (!buf_iter)
2391                 return;
2392
2393         ring_buffer_iter_reset(buf_iter);
2394
2395         /*
2396          * We could have the case with the max latency tracers
2397          * that a reset never took place on a cpu. This is evident
2398          * by the timestamp being before the start of the buffer.
2399          */
2400         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2401                 if (ts >= iter->trace_buffer->time_start)
2402                         break;
2403                 entries++;
2404                 ring_buffer_read(buf_iter, NULL);
2405         }
2406
2407         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2408 }
2409
2410 /*
2411  * The current tracer is copied to avoid a global locking
2412  * all around.
2413  */
2414 static void *s_start(struct seq_file *m, loff_t *pos)
2415 {
2416         struct trace_iterator *iter = m->private;
2417         struct trace_array *tr = iter->tr;
2418         int cpu_file = iter->cpu_file;
2419         void *p = NULL;
2420         loff_t l = 0;
2421         int cpu;
2422
2423         /*
2424          * copy the tracer to avoid using a global lock all around.
2425          * iter->trace is a copy of current_trace, the pointer to the
2426          * name may be used instead of a strcmp(), as iter->trace->name
2427          * will point to the same string as current_trace->name.
2428          */
2429         mutex_lock(&trace_types_lock);
2430         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2431                 *iter->trace = *tr->current_trace;
2432         mutex_unlock(&trace_types_lock);
2433
2434 #ifdef CONFIG_TRACER_MAX_TRACE
2435         if (iter->snapshot && iter->trace->use_max_tr)
2436                 return ERR_PTR(-EBUSY);
2437 #endif
2438
2439         if (!iter->snapshot)
2440                 atomic_inc(&trace_record_cmdline_disabled);
2441
2442         if (*pos != iter->pos) {
2443                 iter->ent = NULL;
2444                 iter->cpu = 0;
2445                 iter->idx = -1;
2446
2447                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2448                         for_each_tracing_cpu(cpu)
2449                                 tracing_iter_reset(iter, cpu);
2450                 } else
2451                         tracing_iter_reset(iter, cpu_file);
2452
2453                 iter->leftover = 0;
2454                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2455                         ;
2456
2457         } else {
2458                 /*
2459                  * If we overflowed the seq_file before, then we want
2460                  * to just reuse the trace_seq buffer again.
2461                  */
2462                 if (iter->leftover)
2463                         p = iter;
2464                 else {
2465                         l = *pos - 1;
2466                         p = s_next(m, p, &l);
2467                 }
2468         }
2469
2470         trace_event_read_lock();
2471         trace_access_lock(cpu_file);
2472         return p;
2473 }
2474
2475 static void s_stop(struct seq_file *m, void *p)
2476 {
2477         struct trace_iterator *iter = m->private;
2478
2479 #ifdef CONFIG_TRACER_MAX_TRACE
2480         if (iter->snapshot && iter->trace->use_max_tr)
2481                 return;
2482 #endif
2483
2484         if (!iter->snapshot)
2485                 atomic_dec(&trace_record_cmdline_disabled);
2486
2487         trace_access_unlock(iter->cpu_file);
2488         trace_event_read_unlock();
2489 }
2490
2491 static void
2492 get_total_entries(struct trace_buffer *buf,
2493                   unsigned long *total, unsigned long *entries)
2494 {
2495         unsigned long count;
2496         int cpu;
2497
2498         *total = 0;
2499         *entries = 0;
2500
2501         for_each_tracing_cpu(cpu) {
2502                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2503                 /*
2504                  * If this buffer has skipped entries, then we hold all
2505                  * entries for the trace and we need to ignore the
2506                  * ones before the time stamp.
2507                  */
2508                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2509                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2510                         /* total is the same as the entries */
2511                         *total += count;
2512                 } else
2513                         *total += count +
2514                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
2515                 *entries += count;
2516         }
2517 }
2518
2519 static void print_lat_help_header(struct seq_file *m)
2520 {
2521         seq_puts(m, "#                  _------=> CPU#            \n");
2522         seq_puts(m, "#                 / _-----=> irqs-off        \n");
2523         seq_puts(m, "#                | / _----=> need-resched    \n");
2524         seq_puts(m, "#                || / _---=> hardirq/softirq \n");
2525         seq_puts(m, "#                ||| / _--=> preempt-depth   \n");
2526         seq_puts(m, "#                |||| /     delay             \n");
2527         seq_puts(m, "#  cmd     pid   ||||| time  |   caller      \n");
2528         seq_puts(m, "#     \\   /      |||||  \\    |   /           \n");
2529 }
2530
2531 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2532 {
2533         unsigned long total;
2534         unsigned long entries;
2535
2536         get_total_entries(buf, &total, &entries);
2537         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2538                    entries, total, num_online_cpus());
2539         seq_puts(m, "#\n");
2540 }
2541
2542 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2543 {
2544         print_event_info(buf, m);
2545         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n");
2546         seq_puts(m, "#              | |       |          |         |\n");
2547 }
2548
2549 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2550 {
2551         print_event_info(buf, m);
2552         seq_puts(m, "#                              _-----=> irqs-off\n");
2553         seq_puts(m, "#                             / _----=> need-resched\n");
2554         seq_puts(m, "#                            | / _---=> hardirq/softirq\n");
2555         seq_puts(m, "#                            || / _--=> preempt-depth\n");
2556         seq_puts(m, "#                            ||| /     delay\n");
2557         seq_puts(m, "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n");
2558         seq_puts(m, "#              | |       |   ||||       |         |\n");
2559 }
2560
2561 void
2562 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2563 {
2564         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2565         struct trace_buffer *buf = iter->trace_buffer;
2566         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2567         struct tracer *type = iter->trace;
2568         unsigned long entries;
2569         unsigned long total;
2570         const char *name = "preemption";
2571
2572         name = type->name;
2573
2574         get_total_entries(buf, &total, &entries);
2575
2576         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2577                    name, UTS_RELEASE);
2578         seq_puts(m, "# -----------------------------------"
2579                  "---------------------------------\n");
2580         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2581                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2582                    nsecs_to_usecs(data->saved_latency),
2583                    entries,
2584                    total,
2585                    buf->cpu,
2586 #if defined(CONFIG_PREEMPT_NONE)
2587                    "server",
2588 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2589                    "desktop",
2590 #elif defined(CONFIG_PREEMPT)
2591                    "preempt",
2592 #else
2593                    "unknown",
2594 #endif
2595                    /* These are reserved for later use */
2596                    0, 0, 0, 0);
2597 #ifdef CONFIG_SMP
2598         seq_printf(m, " #P:%d)\n", num_online_cpus());
2599 #else
2600         seq_puts(m, ")\n");
2601 #endif
2602         seq_puts(m, "#    -----------------\n");
2603         seq_printf(m, "#    | task: %.16s-%d "
2604                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2605                    data->comm, data->pid,
2606                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2607                    data->policy, data->rt_priority);
2608         seq_puts(m, "#    -----------------\n");
2609
2610         if (data->critical_start) {
2611                 seq_puts(m, "#  => started at: ");
2612                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2613                 trace_print_seq(m, &iter->seq);
2614                 seq_puts(m, "\n#  => ended at:   ");
2615                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2616                 trace_print_seq(m, &iter->seq);
2617                 seq_puts(m, "\n#\n");
2618         }
2619
2620         seq_puts(m, "#\n");
2621 }
2622
2623 static void test_cpu_buff_start(struct trace_iterator *iter)
2624 {
2625         struct trace_seq *s = &iter->seq;
2626
2627         if (!(trace_flags & TRACE_ITER_ANNOTATE))
2628                 return;
2629
2630         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
2631                 return;
2632
2633         if (cpumask_test_cpu(iter->cpu, iter->started))
2634                 return;
2635
2636         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
2637                 return;
2638
2639         cpumask_set_cpu(iter->cpu, iter->started);
2640
2641         /* Don't print started cpu buffer for the first entry of the trace */
2642         if (iter->idx > 1)
2643                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
2644                                 iter->cpu);
2645 }
2646
2647 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
2648 {
2649         struct trace_seq *s = &iter->seq;
2650         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2651         struct trace_entry *entry;
2652         struct trace_event *event;
2653
2654         entry = iter->ent;
2655
2656         test_cpu_buff_start(iter);
2657
2658         event = ftrace_find_event(entry->type);
2659
2660         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2661                 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2662                         if (!trace_print_lat_context(iter))
2663                                 goto partial;
2664                 } else {
2665                         if (!trace_print_context(iter))
2666                                 goto partial;
2667                 }
2668         }
2669
2670         if (event)
2671                 return event->funcs->trace(iter, sym_flags, event);
2672
2673         if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
2674                 goto partial;
2675
2676         return TRACE_TYPE_HANDLED;
2677 partial:
2678         return TRACE_TYPE_PARTIAL_LINE;
2679 }
2680
2681 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2682 {
2683         struct trace_seq *s = &iter->seq;
2684         struct trace_entry *entry;
2685         struct trace_event *event;
2686
2687         entry = iter->ent;
2688
2689         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2690                 if (!trace_seq_printf(s, "%d %d %llu ",
2691                                       entry->pid, iter->cpu, iter->ts))
2692                         goto partial;
2693         }
2694
2695         event = ftrace_find_event(entry->type);
2696         if (event)
2697                 return event->funcs->raw(iter, 0, event);
2698
2699         if (!trace_seq_printf(s, "%d ?\n", entry->type))
2700                 goto partial;
2701
2702         return TRACE_TYPE_HANDLED;
2703 partial:
2704         return TRACE_TYPE_PARTIAL_LINE;
2705 }
2706
2707 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2708 {
2709         struct trace_seq *s = &iter->seq;
2710         unsigned char newline = '\n';
2711         struct trace_entry *entry;
2712         struct trace_event *event;
2713
2714         entry = iter->ent;
2715
2716         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2717                 SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
2718                 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
2719                 SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
2720         }
2721
2722         event = ftrace_find_event(entry->type);
2723         if (event) {
2724                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
2725                 if (ret != TRACE_TYPE_HANDLED)
2726                         return ret;
2727         }
2728
2729         SEQ_PUT_FIELD_RET(s, newline);
2730
2731         return TRACE_TYPE_HANDLED;
2732 }
2733
2734 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2735 {
2736         struct trace_seq *s = &iter->seq;
2737         struct trace_entry *entry;
2738         struct trace_event *event;
2739
2740         entry = iter->ent;
2741
2742         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2743                 SEQ_PUT_FIELD_RET(s, entry->pid);
2744                 SEQ_PUT_FIELD_RET(s, iter->cpu);
2745                 SEQ_PUT_FIELD_RET(s, iter->ts);
2746         }
2747
2748         event = ftrace_find_event(entry->type);
2749         return event ? event->funcs->binary(iter, 0, event) :
2750                 TRACE_TYPE_HANDLED;
2751 }
2752
2753 int trace_empty(struct trace_iterator *iter)
2754 {
2755         struct ring_buffer_iter *buf_iter;
2756         int cpu;
2757
2758         /* If we are looking at one CPU buffer, only check that one */
2759         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
2760                 cpu = iter->cpu_file;
2761                 buf_iter = trace_buffer_iter(iter, cpu);
2762                 if (buf_iter) {
2763                         if (!ring_buffer_iter_empty(buf_iter))
2764                                 return 0;
2765                 } else {
2766                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2767                                 return 0;
2768                 }
2769                 return 1;
2770         }
2771
2772         for_each_tracing_cpu(cpu) {
2773                 buf_iter = trace_buffer_iter(iter, cpu);
2774                 if (buf_iter) {
2775                         if (!ring_buffer_iter_empty(buf_iter))
2776                                 return 0;
2777                 } else {
2778                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2779                                 return 0;
2780                 }
2781         }
2782
2783         return 1;
2784 }
2785
2786 /*  Called with trace_event_read_lock() held. */
2787 enum print_line_t print_trace_line(struct trace_iterator *iter)
2788 {
2789         enum print_line_t ret;
2790
2791         if (iter->lost_events &&
2792             !trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
2793                                  iter->cpu, iter->lost_events))
2794                 return TRACE_TYPE_PARTIAL_LINE;
2795
2796         if (iter->trace && iter->trace->print_line) {
2797                 ret = iter->trace->print_line(iter);
2798                 if (ret != TRACE_TYPE_UNHANDLED)
2799                         return ret;
2800         }
2801
2802         if (iter->ent->type == TRACE_BPUTS &&
2803                         trace_flags & TRACE_ITER_PRINTK &&
2804                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2805                 return trace_print_bputs_msg_only(iter);
2806
2807         if (iter->ent->type == TRACE_BPRINT &&
2808                         trace_flags & TRACE_ITER_PRINTK &&
2809                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2810                 return trace_print_bprintk_msg_only(iter);
2811
2812         if (iter->ent->type == TRACE_PRINT &&
2813                         trace_flags & TRACE_ITER_PRINTK &&
2814                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2815                 return trace_print_printk_msg_only(iter);
2816
2817         if (trace_flags & TRACE_ITER_BIN)
2818                 return print_bin_fmt(iter);
2819
2820         if (trace_flags & TRACE_ITER_HEX)
2821                 return print_hex_fmt(iter);
2822
2823         if (trace_flags & TRACE_ITER_RAW)
2824                 return print_raw_fmt(iter);
2825
2826         return print_trace_fmt(iter);
2827 }
2828
2829 void trace_latency_header(struct seq_file *m)
2830 {
2831         struct trace_iterator *iter = m->private;
2832
2833         /* print nothing if the buffers are empty */
2834         if (trace_empty(iter))
2835                 return;
2836
2837         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2838                 print_trace_header(m, iter);
2839
2840         if (!(trace_flags & TRACE_ITER_VERBOSE))
2841                 print_lat_help_header(m);
2842 }
2843
2844 void trace_default_header(struct seq_file *m)
2845 {
2846         struct trace_iterator *iter = m->private;
2847
2848         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
2849                 return;
2850
2851         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2852                 /* print nothing if the buffers are empty */
2853                 if (trace_empty(iter))
2854                         return;
2855                 print_trace_header(m, iter);
2856                 if (!(trace_flags & TRACE_ITER_VERBOSE))
2857                         print_lat_help_header(m);
2858         } else {
2859                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
2860                         if (trace_flags & TRACE_ITER_IRQ_INFO)
2861                                 print_func_help_header_irq(iter->trace_buffer, m);
2862                         else
2863                                 print_func_help_header(iter->trace_buffer, m);
2864                 }
2865         }
2866 }
2867
2868 static void test_ftrace_alive(struct seq_file *m)
2869 {
2870         if (!ftrace_is_dead())
2871                 return;
2872         seq_printf(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n");
2873         seq_printf(m, "#          MAY BE MISSING FUNCTION EVENTS\n");
2874 }
2875
2876 #ifdef CONFIG_TRACER_MAX_TRACE
2877 static void show_snapshot_main_help(struct seq_file *m)
2878 {
2879         seq_printf(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n");
2880         seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2881         seq_printf(m, "#                      Takes a snapshot of the main buffer.\n");
2882         seq_printf(m, "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n");
2883         seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
2884         seq_printf(m, "#                       is not a '0' or '1')\n");
2885 }
2886
2887 static void show_snapshot_percpu_help(struct seq_file *m)
2888 {
2889         seq_printf(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
2890 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2891         seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2892         seq_printf(m, "#                      Takes a snapshot of the main buffer for this cpu.\n");
2893 #else
2894         seq_printf(m, "# echo 1 > snapshot : Not supported with this kernel.\n");
2895         seq_printf(m, "#                     Must use main snapshot file to allocate.\n");
2896 #endif
2897         seq_printf(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n");
2898         seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
2899         seq_printf(m, "#                       is not a '0' or '1')\n");
2900 }
2901
2902 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
2903 {
2904         if (iter->tr->allocated_snapshot)
2905                 seq_printf(m, "#\n# * Snapshot is allocated *\n#\n");
2906         else
2907                 seq_printf(m, "#\n# * Snapshot is freed *\n#\n");
2908
2909         seq_printf(m, "# Snapshot commands:\n");
2910         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
2911                 show_snapshot_main_help(m);
2912         else
2913                 show_snapshot_percpu_help(m);
2914 }
2915 #else
2916 /* Should never be called */
2917 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
2918 #endif
2919
2920 static int s_show(struct seq_file *m, void *v)
2921 {
2922         struct trace_iterator *iter = v;
2923         int ret;
2924
2925         if (iter->ent == NULL) {
2926                 if (iter->tr) {
2927                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
2928                         seq_puts(m, "#\n");
2929                         test_ftrace_alive(m);
2930                 }
2931                 if (iter->snapshot && trace_empty(iter))
2932                         print_snapshot_help(m, iter);
2933                 else if (iter->trace && iter->trace->print_header)
2934                         iter->trace->print_header(m);
2935                 else
2936                         trace_default_header(m);
2937
2938         } else if (iter->leftover) {
2939                 /*
2940                  * If we filled the seq_file buffer earlier, we
2941                  * want to just show it now.
2942                  */
2943                 ret = trace_print_seq(m, &iter->seq);
2944
2945                 /* ret should this time be zero, but you never know */
2946                 iter->leftover = ret;
2947
2948         } else {
2949                 print_trace_line(iter);
2950                 ret = trace_print_seq(m, &iter->seq);
2951                 /*
2952                  * If we overflow the seq_file buffer, then it will
2953                  * ask us for this data again at start up.
2954                  * Use that instead.
2955                  *  ret is 0 if seq_file write succeeded.
2956                  *        -1 otherwise.
2957                  */
2958                 iter->leftover = ret;
2959         }
2960
2961         return 0;
2962 }
2963
2964 /*
2965  * Should be used after trace_array_get(), trace_types_lock
2966  * ensures that i_cdev was already initialized.
2967  */
2968 static inline int tracing_get_cpu(struct inode *inode)
2969 {
2970         if (inode->i_cdev) /* See trace_create_cpu_file() */
2971                 return (long)inode->i_cdev - 1;
2972         return RING_BUFFER_ALL_CPUS;
2973 }
2974
2975 static const struct seq_operations tracer_seq_ops = {
2976         .start          = s_start,
2977         .next           = s_next,
2978         .stop           = s_stop,
2979         .show           = s_show,
2980 };
2981
2982 static struct trace_iterator *
2983 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
2984 {
2985         struct trace_array *tr = inode->i_private;
2986         struct trace_iterator *iter;
2987         int cpu;
2988
2989         if (tracing_disabled)
2990                 return ERR_PTR(-ENODEV);
2991
2992         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
2993         if (!iter)
2994                 return ERR_PTR(-ENOMEM);
2995
2996         iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(),
2997                                     GFP_KERNEL);
2998         if (!iter->buffer_iter)
2999                 goto release;
3000
3001         /*
3002          * We make a copy of the current tracer to avoid concurrent
3003          * changes on it while we are reading.
3004          */
3005         mutex_lock(&trace_types_lock);
3006         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3007         if (!iter->trace)
3008                 goto fail;
3009
3010         *iter->trace = *tr->current_trace;
3011
3012         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3013                 goto fail;
3014
3015         iter->tr = tr;
3016
3017 #ifdef CONFIG_TRACER_MAX_TRACE
3018         /* Currently only the top directory has a snapshot */
3019         if (tr->current_trace->print_max || snapshot)
3020                 iter->trace_buffer = &tr->max_buffer;
3021         else
3022 #endif
3023                 iter->trace_buffer = &tr->trace_buffer;
3024         iter->snapshot = snapshot;
3025         iter->pos = -1;
3026         iter->cpu_file = tracing_get_cpu(inode);
3027         mutex_init(&iter->mutex);
3028
3029         /* Notify the tracer early; before we stop tracing. */
3030         if (iter->trace && iter->trace->open)
3031                 iter->trace->open(iter);
3032
3033         /* Annotate start of buffers if we had overruns */
3034         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3035                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3036
3037         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3038         if (trace_clocks[tr->clock_id].in_ns)
3039                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3040
3041         /* stop the trace while dumping if we are not opening "snapshot" */
3042         if (!iter->snapshot)
3043                 tracing_stop_tr(tr);
3044
3045         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3046                 for_each_tracing_cpu(cpu) {
3047                         iter->buffer_iter[cpu] =
3048                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3049                 }
3050                 ring_buffer_read_prepare_sync();
3051                 for_each_tracing_cpu(cpu) {
3052                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3053                         tracing_iter_reset(iter, cpu);
3054                 }
3055         } else {
3056                 cpu = iter->cpu_file;
3057                 iter->buffer_iter[cpu] =
3058                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3059                 ring_buffer_read_prepare_sync();
3060                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3061                 tracing_iter_reset(iter, cpu);
3062         }
3063
3064         mutex_unlock(&trace_types_lock);
3065
3066         return iter;
3067
3068  fail:
3069         mutex_unlock(&trace_types_lock);
3070         kfree(iter->trace);
3071         kfree(iter->buffer_iter);
3072 release:
3073         seq_release_private(inode, file);
3074         return ERR_PTR(-ENOMEM);
3075 }
3076
3077 int tracing_open_generic(struct inode *inode, struct file *filp)
3078 {
3079         if (tracing_disabled)
3080                 return -ENODEV;
3081
3082         filp->private_data = inode->i_private;
3083         return 0;
3084 }
3085
3086 bool tracing_is_disabled(void)
3087 {
3088         return (tracing_disabled) ? true: false;
3089 }
3090
3091 /*
3092  * Open and update trace_array ref count.
3093  * Must have the current trace_array passed to it.
3094  */
3095 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3096 {
3097         struct trace_array *tr = inode->i_private;
3098
3099         if (tracing_disabled)
3100                 return -ENODEV;
3101
3102         if (trace_array_get(tr) < 0)
3103                 return -ENODEV;
3104
3105         filp->private_data = inode->i_private;
3106
3107         return 0;
3108 }
3109
3110 static int tracing_release(struct inode *inode, struct file *file)
3111 {
3112         struct trace_array *tr = inode->i_private;
3113         struct seq_file *m = file->private_data;
3114         struct trace_iterator *iter;
3115         int cpu;
3116
3117         if (!(file->f_mode & FMODE_READ)) {
3118                 trace_array_put(tr);
3119                 return 0;
3120         }
3121
3122         /* Writes do not use seq_file */
3123         iter = m->private;
3124         mutex_lock(&trace_types_lock);
3125
3126         for_each_tracing_cpu(cpu) {
3127                 if (iter->buffer_iter[cpu])
3128                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3129         }
3130
3131         if (iter->trace && iter->trace->close)
3132                 iter->trace->close(iter);
3133
3134         if (!iter->snapshot)
3135                 /* reenable tracing if it was previously enabled */
3136                 tracing_start_tr(tr);
3137
3138         __trace_array_put(tr);
3139
3140         mutex_unlock(&trace_types_lock);
3141
3142         mutex_destroy(&iter->mutex);
3143         free_cpumask_var(iter->started);
3144         kfree(iter->trace);
3145         kfree(iter->buffer_iter);
3146         seq_release_private(inode, file);
3147
3148         return 0;
3149 }
3150
3151 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3152 {
3153         struct trace_array *tr = inode->i_private;
3154
3155         trace_array_put(tr);
3156         return 0;
3157 }
3158
3159 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3160 {
3161         struct trace_array *tr = inode->i_private;
3162
3163         trace_array_put(tr);
3164
3165         return single_release(inode, file);
3166 }
3167
3168 static int tracing_open(struct inode *inode, struct file *file)
3169 {
3170         struct trace_array *tr = inode->i_private;
3171         struct trace_iterator *iter;
3172         int ret = 0;
3173
3174         if (trace_array_get(tr) < 0)
3175                 return -ENODEV;
3176
3177         /* If this file was open for write, then erase contents */
3178         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3179                 int cpu = tracing_get_cpu(inode);
3180
3181                 if (cpu == RING_BUFFER_ALL_CPUS)
3182                         tracing_reset_online_cpus(&tr->trace_buffer);
3183                 else
3184                         tracing_reset(&tr->trace_buffer, cpu);
3185         }
3186
3187         if (file->f_mode & FMODE_READ) {
3188                 iter = __tracing_open(inode, file, false);
3189                 if (IS_ERR(iter))
3190                         ret = PTR_ERR(iter);
3191                 else if (trace_flags & TRACE_ITER_LATENCY_FMT)
3192                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3193         }
3194
3195         if (ret < 0)
3196                 trace_array_put(tr);
3197
3198         return ret;
3199 }
3200
3201 /*
3202  * Some tracers are not suitable for instance buffers.
3203  * A tracer is always available for the global array (toplevel)
3204  * or if it explicitly states that it is.
3205  */
3206 static bool
3207 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3208 {
3209         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3210 }
3211
3212 /* Find the next tracer that this trace array may use */
3213 static struct tracer *
3214 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3215 {
3216         while (t && !trace_ok_for_array(t, tr))
3217                 t = t->next;
3218
3219         return t;
3220 }
3221
3222 static void *
3223 t_next(struct seq_file *m, void *v, loff_t *pos)
3224 {
3225         struct trace_array *tr = m->private;
3226         struct tracer *t = v;
3227
3228         (*pos)++;
3229
3230         if (t)
3231                 t = get_tracer_for_array(tr, t->next);
3232
3233         return t;
3234 }
3235
3236 static void *t_start(struct seq_file *m, loff_t *pos)
3237 {
3238         struct trace_array *tr = m->private;
3239         struct tracer *t;
3240         loff_t l = 0;
3241
3242         mutex_lock(&trace_types_lock);
3243
3244         t = get_tracer_for_array(tr, trace_types);
3245         for (; t && l < *pos; t = t_next(m, t, &l))
3246                         ;
3247
3248         return t;
3249 }
3250
3251 static void t_stop(struct seq_file *m, void *p)
3252 {
3253         mutex_unlock(&trace_types_lock);
3254 }
3255
3256 static int t_show(struct seq_file *m, void *v)
3257 {
3258         struct tracer *t = v;
3259
3260         if (!t)
3261                 return 0;
3262
3263         seq_printf(m, "%s", t->name);
3264         if (t->next)
3265                 seq_putc(m, ' ');
3266         else
3267                 seq_putc(m, '\n');
3268
3269         return 0;
3270 }
3271
3272 static const struct seq_operations show_traces_seq_ops = {
3273         .start          = t_start,
3274         .next           = t_next,
3275         .stop           = t_stop,
3276         .show           = t_show,
3277 };
3278
3279 static int show_traces_open(struct inode *inode, struct file *file)
3280 {
3281         struct trace_array *tr = inode->i_private;
3282         struct seq_file *m;
3283         int ret;
3284
3285         if (tracing_disabled)
3286                 return -ENODEV;
3287
3288         ret = seq_open(file, &show_traces_seq_ops);
3289         if (ret)
3290                 return ret;
3291
3292         m = file->private_data;
3293         m->private = tr;
3294
3295         return 0;
3296 }
3297
3298 static ssize_t
3299 tracing_write_stub(struct file *filp, const char __user *ubuf,
3300                    size_t count, loff_t *ppos)
3301 {
3302         return count;
3303 }
3304
3305 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3306 {
3307         int ret;
3308
3309         if (file->f_mode & FMODE_READ)
3310                 ret = seq_lseek(file, offset, whence);
3311         else
3312                 file->f_pos = ret = 0;
3313
3314         return ret;
3315 }
3316
3317 static const struct file_operations tracing_fops = {
3318         .open           = tracing_open,
3319         .read           = seq_read,
3320         .write          = tracing_write_stub,
3321         .llseek         = tracing_lseek,
3322         .release        = tracing_release,
3323 };
3324
3325 static const struct file_operations show_traces_fops = {
3326         .open           = show_traces_open,
3327         .read           = seq_read,
3328         .release        = seq_release,
3329         .llseek         = seq_lseek,
3330 };
3331
3332 /*
3333  * The tracer itself will not take this lock, but still we want
3334  * to provide a consistent cpumask to user-space:
3335  */
3336 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3337
3338 /*
3339  * Temporary storage for the character representation of the
3340  * CPU bitmask (and one more byte for the newline):
3341  */
3342 static char mask_str[NR_CPUS + 1];
3343
3344 static ssize_t
3345 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3346                      size_t count, loff_t *ppos)
3347 {
3348         struct trace_array *tr = file_inode(filp)->i_private;
3349         int len;
3350
3351         mutex_lock(&tracing_cpumask_update_lock);
3352
3353         len = cpumask_scnprintf(mask_str, count, tr->tracing_cpumask);
3354         if (count - len < 2) {
3355                 count = -EINVAL;
3356                 goto out_err;
3357         }
3358         len += sprintf(mask_str + len, "\n");
3359         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3360
3361 out_err:
3362         mutex_unlock(&tracing_cpumask_update_lock);
3363
3364         return count;
3365 }
3366
3367 static ssize_t
3368 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3369                       size_t count, loff_t *ppos)
3370 {
3371         struct trace_array *tr = file_inode(filp)->i_private;
3372         cpumask_var_t tracing_cpumask_new;
3373         int err, cpu;
3374
3375         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3376                 return -ENOMEM;
3377
3378         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3379         if (err)
3380                 goto err_unlock;
3381
3382         mutex_lock(&tracing_cpumask_update_lock);
3383
3384         local_irq_disable();
3385         arch_spin_lock(&tr->max_lock);
3386         for_each_tracing_cpu(cpu) {
3387                 /*
3388                  * Increase/decrease the disabled counter if we are
3389                  * about to flip a bit in the cpumask:
3390                  */
3391                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3392                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3393                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3394                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3395                 }
3396                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3397                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3398                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3399                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3400                 }
3401         }
3402         arch_spin_unlock(&tr->max_lock);
3403         local_irq_enable();
3404
3405         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3406
3407         mutex_unlock(&tracing_cpumask_update_lock);
3408         free_cpumask_var(tracing_cpumask_new);
3409
3410         return count;
3411
3412 err_unlock:
3413         free_cpumask_var(tracing_cpumask_new);
3414
3415         return err;
3416 }
3417
3418 static const struct file_operations tracing_cpumask_fops = {
3419         .open           = tracing_open_generic_tr,
3420         .read           = tracing_cpumask_read,
3421         .write          = tracing_cpumask_write,
3422         .release        = tracing_release_generic_tr,
3423         .llseek         = generic_file_llseek,
3424 };
3425
3426 static int tracing_trace_options_show(struct seq_file *m, void *v)
3427 {
3428         struct tracer_opt *trace_opts;
3429         struct trace_array *tr = m->private;
3430         u32 tracer_flags;
3431         int i;
3432
3433         mutex_lock(&trace_types_lock);
3434         tracer_flags = tr->current_trace->flags->val;
3435         trace_opts = tr->current_trace->flags->opts;
3436
3437         for (i = 0; trace_options[i]; i++) {
3438                 if (trace_flags & (1 << i))
3439                         seq_printf(m, "%s\n", trace_options[i]);
3440                 else
3441                         seq_printf(m, "no%s\n", trace_options[i]);
3442         }
3443
3444         for (i = 0; trace_opts[i].name; i++) {
3445                 if (tracer_flags & trace_opts[i].bit)
3446                         seq_printf(m, "%s\n", trace_opts[i].name);
3447                 else
3448                         seq_printf(m, "no%s\n", trace_opts[i].name);
3449         }
3450         mutex_unlock(&trace_types_lock);
3451
3452         return 0;
3453 }
3454
3455 static int __set_tracer_option(struct trace_array *tr,
3456                                struct tracer_flags *tracer_flags,
3457                                struct tracer_opt *opts, int neg)
3458 {
3459         struct tracer *trace = tr->current_trace;
3460         int ret;
3461
3462         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3463         if (ret)
3464                 return ret;
3465
3466         if (neg)
3467                 tracer_flags->val &= ~opts->bit;
3468         else
3469                 tracer_flags->val |= opts->bit;
3470         return 0;
3471 }
3472
3473 /* Try to assign a tracer specific option */
3474 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3475 {
3476         struct tracer *trace = tr->current_trace;
3477         struct tracer_flags *tracer_flags = trace->flags;
3478         struct tracer_opt *opts = NULL;
3479         int i;
3480
3481         for (i = 0; tracer_flags->opts[i].name; i++) {
3482                 opts = &tracer_flags->opts[i];
3483
3484                 if (strcmp(cmp, opts->name) == 0)
3485                         return __set_tracer_option(tr, trace->flags, opts, neg);
3486         }
3487
3488         return -EINVAL;
3489 }
3490
3491 /* Some tracers require overwrite to stay enabled */
3492 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3493 {
3494         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3495                 return -1;
3496
3497         return 0;
3498 }
3499
3500 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3501 {
3502         /* do nothing if flag is already set */
3503         if (!!(trace_flags & mask) == !!enabled)
3504                 return 0;
3505
3506         /* Give the tracer a chance to approve the change */
3507         if (tr->current_trace->flag_changed)
3508                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3509                         return -EINVAL;
3510
3511         if (enabled)
3512                 trace_flags |= mask;
3513         else
3514                 trace_flags &= ~mask;
3515
3516         if (mask == TRACE_ITER_RECORD_CMD)
3517                 trace_event_enable_cmd_record(enabled);
3518
3519         if (mask == TRACE_ITER_OVERWRITE) {
3520                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3521 #ifdef CONFIG_TRACER_MAX_TRACE
3522                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3523 #endif
3524         }
3525
3526         if (mask == TRACE_ITER_PRINTK)
3527                 trace_printk_start_stop_comm(enabled);
3528
3529         return 0;
3530 }
3531
3532 static int trace_set_options(struct trace_array *tr, char *option)
3533 {
3534         char *cmp;
3535         int neg = 0;
3536         int ret = -ENODEV;
3537         int i;
3538
3539         cmp = strstrip(option);
3540
3541         if (strncmp(cmp, "no", 2) == 0) {
3542                 neg = 1;
3543                 cmp += 2;
3544         }
3545
3546         mutex_lock(&trace_types_lock);
3547
3548         for (i = 0; trace_options[i]; i++) {
3549                 if (strcmp(cmp, trace_options[i]) == 0) {
3550                         ret = set_tracer_flag(tr, 1 << i, !neg);
3551                         break;
3552                 }
3553         }
3554
3555         /* If no option could be set, test the specific tracer options */
3556         if (!trace_options[i])
3557                 ret = set_tracer_option(tr, cmp, neg);
3558
3559         mutex_unlock(&trace_types_lock);
3560
3561         return ret;
3562 }
3563
3564 static ssize_t
3565 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3566                         size_t cnt, loff_t *ppos)
3567 {
3568         struct seq_file *m = filp->private_data;
3569         struct trace_array *tr = m->private;
3570         char buf[64];
3571         int ret;
3572
3573         if (cnt >= sizeof(buf))
3574                 return -EINVAL;
3575
3576         if (copy_from_user(&buf, ubuf, cnt))
3577                 return -EFAULT;
3578
3579         buf[cnt] = 0;
3580
3581         ret = trace_set_options(tr, buf);
3582         if (ret < 0)
3583                 return ret;
3584
3585         *ppos += cnt;
3586
3587         return cnt;
3588 }
3589
3590 static int tracing_trace_options_open(struct inode *inode, struct file *file)
3591 {
3592         struct trace_array *tr = inode->i_private;
3593         int ret;
3594
3595         if (tracing_disabled)
3596                 return -ENODEV;
3597
3598         if (trace_array_get(tr) < 0)
3599                 return -ENODEV;
3600
3601         ret = single_open(file, tracing_trace_options_show, inode->i_private);
3602         if (ret < 0)
3603                 trace_array_put(tr);
3604
3605         return ret;
3606 }
3607
3608 static const struct file_operations tracing_iter_fops = {
3609         .open           = tracing_trace_options_open,
3610         .read           = seq_read,
3611         .llseek         = seq_lseek,
3612         .release        = tracing_single_release_tr,
3613         .write          = tracing_trace_options_write,
3614 };
3615
3616 static const char readme_msg[] =
3617         "tracing mini-HOWTO:\n\n"
3618         "# echo 0 > tracing_on : quick way to disable tracing\n"
3619         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
3620         " Important files:\n"
3621         "  trace\t\t\t- The static contents of the buffer\n"
3622         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
3623         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
3624         "  current_tracer\t- function and latency tracers\n"
3625         "  available_tracers\t- list of configured tracers for current_tracer\n"
3626         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
3627         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
3628         "  trace_clock\t\t-change the clock used to order events\n"
3629         "       local:   Per cpu clock but may not be synced across CPUs\n"
3630         "      global:   Synced across CPUs but slows tracing down.\n"
3631         "     counter:   Not a clock, but just an increment\n"
3632         "      uptime:   Jiffy counter from time of boot\n"
3633         "        perf:   Same clock that perf events use\n"
3634 #ifdef CONFIG_X86_64
3635         "     x86-tsc:   TSC cycle counter\n"
3636 #endif
3637         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
3638         "  tracing_cpumask\t- Limit which CPUs to trace\n"
3639         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
3640         "\t\t\t  Remove sub-buffer with rmdir\n"
3641         "  trace_options\t\t- Set format or modify how tracing happens\n"
3642         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
3643         "\t\t\t  option name\n"
3644         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
3645 #ifdef CONFIG_DYNAMIC_FTRACE
3646         "\n  available_filter_functions - list of functions that can be filtered on\n"
3647         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
3648         "\t\t\t  functions\n"
3649         "\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3650         "\t     modules: Can select a group via module\n"
3651         "\t      Format: :mod:<module-name>\n"
3652         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
3653         "\t    triggers: a command to perform when function is hit\n"
3654         "\t      Format: <function>:<trigger>[:count]\n"
3655         "\t     trigger: traceon, traceoff\n"
3656         "\t\t      enable_event:<system>:<event>\n"
3657         "\t\t      disable_event:<system>:<event>\n"
3658 #ifdef CONFIG_STACKTRACE
3659         "\t\t      stacktrace\n"
3660 #endif
3661 #ifdef CONFIG_TRACER_SNAPSHOT
3662         "\t\t      snapshot\n"
3663 #endif
3664         "\t\t      dump\n"
3665         "\t\t      cpudump\n"
3666         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
3667         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
3668         "\t     The first one will disable tracing every time do_fault is hit\n"
3669         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
3670         "\t       The first time do trap is hit and it disables tracing, the\n"
3671         "\t       counter will decrement to 2. If tracing is already disabled,\n"
3672         "\t       the counter will not decrement. It only decrements when the\n"
3673         "\t       trigger did work\n"
3674         "\t     To remove trigger without count:\n"
3675         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
3676         "\t     To remove trigger with a count:\n"
3677         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
3678         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
3679         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3680         "\t    modules: Can select a group via module command :mod:\n"
3681         "\t    Does not accept triggers\n"
3682 #endif /* CONFIG_DYNAMIC_FTRACE */
3683 #ifdef CONFIG_FUNCTION_TRACER
3684         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
3685         "\t\t    (function)\n"
3686 #endif
3687 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
3688         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
3689         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
3690 #endif
3691 #ifdef CONFIG_TRACER_SNAPSHOT
3692         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
3693         "\t\t\t  snapshot buffer. Read the contents for more\n"
3694         "\t\t\t  information\n"
3695 #endif
3696 #ifdef CONFIG_STACK_TRACER
3697         "  stack_trace\t\t- Shows the max stack trace when active\n"
3698         "  stack_max_size\t- Shows current max stack size that was traced\n"
3699         "\t\t\t  Write into this file to reset the max size (trigger a\n"
3700         "\t\t\t  new trace)\n"
3701 #ifdef CONFIG_DYNAMIC_FTRACE
3702         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
3703         "\t\t\t  traces\n"
3704 #endif
3705 #endif /* CONFIG_STACK_TRACER */
3706         "  events/\t\t- Directory containing all trace event subsystems:\n"
3707         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
3708         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
3709         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
3710         "\t\t\t  events\n"
3711         "      filter\t\t- If set, only events passing filter are traced\n"
3712         "  events/<system>/<event>/\t- Directory containing control files for\n"
3713         "\t\t\t  <event>:\n"
3714         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
3715         "      filter\t\t- If set, only events passing filter are traced\n"
3716         "      trigger\t\t- If set, a command to perform when event is hit\n"
3717         "\t    Format: <trigger>[:count][if <filter>]\n"
3718         "\t   trigger: traceon, traceoff\n"
3719         "\t            enable_event:<system>:<event>\n"
3720         "\t            disable_event:<system>:<event>\n"
3721 #ifdef CONFIG_STACKTRACE
3722         "\t\t    stacktrace\n"
3723 #endif
3724 #ifdef CONFIG_TRACER_SNAPSHOT
3725         "\t\t    snapshot\n"
3726 #endif
3727         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
3728         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
3729         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
3730         "\t                  events/block/block_unplug/trigger\n"
3731         "\t   The first disables tracing every time block_unplug is hit.\n"
3732         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
3733         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
3734         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
3735         "\t   Like function triggers, the counter is only decremented if it\n"
3736         "\t    enabled or disabled tracing.\n"
3737         "\t   To remove a trigger without a count:\n"
3738         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
3739         "\t   To remove a trigger with a count:\n"
3740         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
3741         "\t   Filters can be ignored when removing a trigger.\n"
3742 ;
3743
3744 static ssize_t
3745 tracing_readme_read(struct file *filp, char __user *ubuf,
3746                        size_t cnt, loff_t *ppos)
3747 {
3748         return simple_read_from_buffer(ubuf, cnt, ppos,
3749                                         readme_msg, strlen(readme_msg));
3750 }
3751
3752 static const struct file_operations tracing_readme_fops = {
3753         .open           = tracing_open_generic,
3754         .read           = tracing_readme_read,
3755         .llseek         = generic_file_llseek,
3756 };
3757
3758 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
3759 {
3760         unsigned int *ptr = v;
3761
3762         if (*pos || m->count)
3763                 ptr++;
3764
3765         (*pos)++;
3766
3767         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
3768              ptr++) {
3769                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
3770                         continue;
3771
3772                 return ptr;
3773         }
3774
3775         return NULL;
3776 }
3777
3778 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
3779 {
3780         void *v;
3781         loff_t l = 0;
3782
3783         preempt_disable();
3784         arch_spin_lock(&trace_cmdline_lock);
3785
3786         v = &savedcmd->map_cmdline_to_pid[0];
3787         while (l <= *pos) {
3788                 v = saved_cmdlines_next(m, v, &l);
3789                 if (!v)
3790                         return NULL;
3791         }
3792
3793         return v;
3794 }
3795
3796 static void saved_cmdlines_stop(struct seq_file *m, void *v)
3797 {
3798         arch_spin_unlock(&trace_cmdline_lock);
3799         preempt_enable();
3800 }
3801
3802 static int saved_cmdlines_show(struct seq_file *m, void *v)
3803 {
3804         char buf[TASK_COMM_LEN];
3805         unsigned int *pid = v;
3806
3807         __trace_find_cmdline(*pid, buf);
3808         seq_printf(m, "%d %s\n", *pid, buf);
3809         return 0;
3810 }
3811
3812 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
3813         .start          = saved_cmdlines_start,
3814         .next           = saved_cmdlines_next,
3815         .stop           = saved_cmdlines_stop,
3816         .show           = saved_cmdlines_show,
3817 };
3818
3819 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
3820 {
3821         if (tracing_disabled)
3822                 return -ENODEV;
3823
3824         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
3825 }
3826
3827 static const struct file_operations tracing_saved_cmdlines_fops = {
3828         .open           = tracing_saved_cmdlines_open,
3829         .read           = seq_read,
3830         .llseek         = seq_lseek,
3831         .release        = seq_release,
3832 };
3833
3834 static ssize_t
3835 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
3836                                  size_t cnt, loff_t *ppos)
3837 {
3838         char buf[64];
3839         int r;
3840
3841         arch_spin_lock(&trace_cmdline_lock);
3842         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
3843         arch_spin_unlock(&trace_cmdline_lock);
3844
3845         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3846 }
3847
3848 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
3849 {
3850         kfree(s->saved_cmdlines);
3851         kfree(s->map_cmdline_to_pid);
3852         kfree(s);
3853 }
3854
3855 static int tracing_resize_saved_cmdlines(unsigned int val)
3856 {
3857         struct saved_cmdlines_buffer *s, *savedcmd_temp;
3858
3859         s = kmalloc(sizeof(*s), GFP_KERNEL);
3860         if (!s)
3861                 return -ENOMEM;
3862
3863         if (allocate_cmdlines_buffer(val, s) < 0) {
3864                 kfree(s);
3865                 return -ENOMEM;
3866         }
3867
3868         arch_spin_lock(&trace_cmdline_lock);
3869         savedcmd_temp = savedcmd;
3870         savedcmd = s;
3871         arch_spin_unlock(&trace_cmdline_lock);
3872         free_saved_cmdlines_buffer(savedcmd_temp);
3873
3874         return 0;
3875 }
3876
3877 static ssize_t
3878 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
3879                                   size_t cnt, loff_t *ppos)
3880 {
3881         unsigned long val;
3882         int ret;
3883
3884         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
3885         if (ret)
3886                 return ret;
3887
3888         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
3889         if (!val || val > PID_MAX_DEFAULT)
3890                 return -EINVAL;
3891
3892         ret = tracing_resize_saved_cmdlines((unsigned int)val);
3893         if (ret < 0)
3894                 return ret;
3895
3896         *ppos += cnt;
3897
3898         return cnt;
3899 }
3900
3901 static const struct file_operations tracing_saved_cmdlines_size_fops = {
3902         .open           = tracing_open_generic,
3903         .read           = tracing_saved_cmdlines_size_read,
3904         .write          = tracing_saved_cmdlines_size_write,
3905 };
3906
3907 static ssize_t
3908 tracing_set_trace_read(struct file *filp, char __user *ubuf,
3909                        size_t cnt, loff_t *ppos)
3910 {
3911         struct trace_array *tr = filp->private_data;
3912         char buf[MAX_TRACER_SIZE+2];
3913         int r;
3914
3915         mutex_lock(&trace_types_lock);
3916         r = sprintf(buf, "%s\n", tr->current_trace->name);
3917         mutex_unlock(&trace_types_lock);
3918
3919         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3920 }
3921
3922 int tracer_init(struct tracer *t, struct trace_array *tr)
3923 {
3924         tracing_reset_online_cpus(&tr->trace_buffer);
3925         return t->init(tr);
3926 }
3927
3928 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
3929 {
3930         int cpu;
3931
3932         for_each_tracing_cpu(cpu)
3933                 per_cpu_ptr(buf->data, cpu)->entries = val;
3934 }
3935
3936 #ifdef CONFIG_TRACER_MAX_TRACE
3937 /* resize @tr's buffer to the size of @size_tr's entries */
3938 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
3939                                         struct trace_buffer *size_buf, int cpu_id)
3940 {
3941         int cpu, ret = 0;
3942
3943         if (cpu_id == RING_BUFFER_ALL_CPUS) {
3944                 for_each_tracing_cpu(cpu) {
3945                         ret = ring_buffer_resize(trace_buf->buffer,
3946                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
3947                         if (ret < 0)
3948                                 break;
3949                         per_cpu_ptr(trace_buf->data, cpu)->entries =
3950                                 per_cpu_ptr(size_buf->data, cpu)->entries;
3951                 }
3952         } else {
3953                 ret = ring_buffer_resize(trace_buf->buffer,
3954                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
3955                 if (ret == 0)
3956                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
3957                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
3958         }
3959
3960         return ret;
3961 }
3962 #endif /* CONFIG_TRACER_MAX_TRACE */
3963
3964 static int __tracing_resize_ring_buffer(struct trace_array *tr,
3965                                         unsigned long size, int cpu)
3966 {
3967         int ret;
3968
3969         /*
3970          * If kernel or user changes the size of the ring buffer
3971          * we use the size that was given, and we can forget about
3972          * expanding it later.
3973          */
3974         ring_buffer_expanded = true;
3975
3976         /* May be called before buffers are initialized */
3977         if (!tr->trace_buffer.buffer)
3978                 return 0;
3979
3980         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
3981         if (ret < 0)
3982                 return ret;
3983
3984 #ifdef CONFIG_TRACER_MAX_TRACE
3985         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
3986             !tr->current_trace->use_max_tr)
3987                 goto out;
3988
3989         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
3990         if (ret < 0) {
3991                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
3992                                                      &tr->trace_buffer, cpu);
3993                 if (r < 0) {
3994                         /*
3995                          * AARGH! We are left with different
3996                          * size max buffer!!!!
3997                          * The max buffer is our "snapshot" buffer.
3998                          * When a tracer needs a snapshot (one of the
3999                          * latency tracers), it swaps the max buffer
4000                          * with the saved snap shot. We succeeded to
4001                          * update the size of the main buffer, but failed to
4002                          * update the size of the max buffer. But when we tried
4003                          * to reset the main buffer to the original size, we
4004                          * failed there too. This is very unlikely to
4005                          * happen, but if it does, warn and kill all
4006                          * tracing.
4007                          */
4008                         WARN_ON(1);
4009                         tracing_disabled = 1;
4010                 }
4011                 return ret;
4012         }
4013
4014         if (cpu == RING_BUFFER_ALL_CPUS)
4015                 set_buffer_entries(&tr->max_buffer, size);
4016         else
4017                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4018
4019  out:
4020 #endif /* CONFIG_TRACER_MAX_TRACE */
4021
4022         if (cpu == RING_BUFFER_ALL_CPUS)
4023                 set_buffer_entries(&tr->trace_buffer, size);
4024         else
4025                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4026
4027         return ret;
4028 }
4029
4030 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4031                                           unsigned long size, int cpu_id)
4032 {
4033         int ret = size;
4034
4035         mutex_lock(&trace_types_lock);
4036
4037         if (cpu_id != RING_BUFFER_ALL_CPUS) {
4038                 /* make sure, this cpu is enabled in the mask */
4039                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4040                         ret = -EINVAL;
4041                         goto out;
4042                 }
4043         }
4044
4045         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4046         if (ret < 0)
4047                 ret = -ENOMEM;
4048
4049 out:
4050         mutex_unlock(&trace_types_lock);
4051
4052         return ret;
4053 }
4054
4055
4056 /**
4057  * tracing_update_buffers - used by tracing facility to expand ring buffers
4058  *
4059  * To save on memory when the tracing is never used on a system with it
4060  * configured in. The ring buffers are set to a minimum size. But once
4061  * a user starts to use the tracing facility, then they need to grow
4062  * to their default size.
4063  *
4064  * This function is to be called when a tracer is about to be used.
4065  */
4066 int tracing_update_buffers(void)
4067 {
4068         int ret = 0;
4069
4070         mutex_lock(&trace_types_lock);
4071         if (!ring_buffer_expanded)
4072                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4073                                                 RING_BUFFER_ALL_CPUS);
4074         mutex_unlock(&trace_types_lock);
4075
4076         return ret;
4077 }
4078
4079 struct trace_option_dentry;
4080
4081 static struct trace_option_dentry *
4082 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4083
4084 static void
4085 destroy_trace_option_files(struct trace_option_dentry *topts);
4086
4087 /*
4088  * Used to clear out the tracer before deletion of an instance.
4089  * Must have trace_types_lock held.
4090  */
4091 static void tracing_set_nop(struct trace_array *tr)
4092 {
4093         if (tr->current_trace == &nop_trace)
4094                 return;
4095         
4096         tr->current_trace->enabled--;
4097
4098         if (tr->current_trace->reset)
4099                 tr->current_trace->reset(tr);
4100
4101         tr->current_trace = &nop_trace;
4102 }
4103
4104 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
4105 {
4106         static struct trace_option_dentry *topts;
4107         struct tracer *t;
4108 #ifdef CONFIG_TRACER_MAX_TRACE
4109         bool had_max_tr;
4110 #endif
4111         int ret = 0;
4112
4113         mutex_lock(&trace_types_lock);
4114
4115         if (!ring_buffer_expanded) {
4116                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
4117                                                 RING_BUFFER_ALL_CPUS);
4118                 if (ret < 0)
4119                         goto out;
4120                 ret = 0;
4121         }
4122
4123         for (t = trace_types; t; t = t->next) {
4124                 if (strcmp(t->name, buf) == 0)
4125                         break;
4126         }
4127         if (!t) {
4128                 ret = -EINVAL;
4129                 goto out;
4130         }
4131         if (t == tr->current_trace)
4132                 goto out;
4133
4134         /* Some tracers are only allowed for the top level buffer */
4135         if (!trace_ok_for_array(t, tr)) {
4136                 ret = -EINVAL;
4137                 goto out;
4138         }
4139
4140         trace_branch_disable();
4141
4142         tr->current_trace->enabled--;
4143
4144         if (tr->current_trace->reset)
4145                 tr->current_trace->reset(tr);
4146
4147         /* Current trace needs to be nop_trace before synchronize_sched */
4148         tr->current_trace = &nop_trace;
4149
4150 #ifdef CONFIG_TRACER_MAX_TRACE
4151         had_max_tr = tr->allocated_snapshot;
4152
4153         if (had_max_tr && !t->use_max_tr) {
4154                 /*
4155                  * We need to make sure that the update_max_tr sees that
4156                  * current_trace changed to nop_trace to keep it from
4157                  * swapping the buffers after we resize it.
4158                  * The update_max_tr is called from interrupts disabled
4159                  * so a synchronized_sched() is sufficient.
4160                  */
4161                 synchronize_sched();
4162                 free_snapshot(tr);
4163         }
4164 #endif
4165         /* Currently, only the top instance has options */
4166         if (tr->flags & TRACE_ARRAY_FL_GLOBAL) {
4167                 destroy_trace_option_files(topts);
4168                 topts = create_trace_option_files(tr, t);
4169         }
4170
4171 #ifdef CONFIG_TRACER_MAX_TRACE
4172         if (t->use_max_tr && !had_max_tr) {
4173                 ret = alloc_snapshot(tr);
4174                 if (ret < 0)
4175                         goto out;
4176         }
4177 #endif
4178
4179         if (t->init) {
4180                 ret = tracer_init(t, tr);
4181                 if (ret)
4182                         goto out;
4183         }
4184
4185         tr->current_trace = t;
4186         tr->current_trace->enabled++;
4187         trace_branch_enable(tr);
4188  out:
4189         mutex_unlock(&trace_types_lock);
4190
4191         return ret;
4192 }
4193
4194 static ssize_t
4195 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
4196                         size_t cnt, loff_t *ppos)
4197 {
4198         struct trace_array *tr = filp->private_data;
4199         char buf[MAX_TRACER_SIZE+1];
4200         int i;
4201         size_t ret;
4202         int err;
4203
4204         ret = cnt;
4205
4206         if (cnt > MAX_TRACER_SIZE)
4207                 cnt = MAX_TRACER_SIZE;
4208
4209         if (copy_from_user(&buf, ubuf, cnt))
4210                 return -EFAULT;
4211
4212         buf[cnt] = 0;
4213
4214         /* strip ending whitespace. */
4215         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4216                 buf[i] = 0;
4217
4218         err = tracing_set_tracer(tr, buf);
4219         if (err)
4220                 return err;
4221
4222         *ppos += ret;
4223
4224         return ret;
4225 }
4226
4227 static ssize_t
4228 tracing_max_lat_read(struct file *filp, char __user *ubuf,
4229                      size_t cnt, loff_t *ppos)
4230 {
4231         unsigned long *ptr = filp->private_data;
4232         char buf[64];
4233         int r;
4234
4235         r = snprintf(buf, sizeof(buf), "%ld\n",
4236                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4237         if (r > sizeof(buf))
4238                 r = sizeof(buf);
4239         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4240 }
4241
4242 static ssize_t
4243 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4244                       size_t cnt, loff_t *ppos)
4245 {
4246         unsigned long *ptr = filp->private_data;
4247         unsigned long val;
4248         int ret;
4249
4250         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4251         if (ret)
4252                 return ret;
4253
4254         *ptr = val * 1000;
4255
4256         return cnt;
4257 }
4258
4259 static int tracing_open_pipe(struct inode *inode, struct file *filp)
4260 {
4261         struct trace_array *tr = inode->i_private;
4262         struct trace_iterator *iter;
4263         int ret = 0;
4264
4265         if (tracing_disabled)
4266                 return -ENODEV;
4267
4268         if (trace_array_get(tr) < 0)
4269                 return -ENODEV;
4270
4271         mutex_lock(&trace_types_lock);
4272
4273         /* create a buffer to store the information to pass to userspace */
4274         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4275         if (!iter) {
4276                 ret = -ENOMEM;
4277                 __trace_array_put(tr);
4278                 goto out;
4279         }
4280
4281         /*
4282          * We make a copy of the current tracer to avoid concurrent
4283          * changes on it while we are reading.
4284          */
4285         iter->trace = kmalloc(sizeof(*iter->trace), GFP_KERNEL);
4286         if (!iter->trace) {
4287                 ret = -ENOMEM;
4288                 goto fail;
4289         }
4290         *iter->trace = *tr->current_trace;
4291
4292         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
4293                 ret = -ENOMEM;
4294                 goto fail;
4295         }
4296
4297         /* trace pipe does not show start of buffer */
4298         cpumask_setall(iter->started);
4299
4300         if (trace_flags & TRACE_ITER_LATENCY_FMT)
4301                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4302
4303         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4304         if (trace_clocks[tr->clock_id].in_ns)
4305                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4306
4307         iter->tr = tr;
4308         iter->trace_buffer = &tr->trace_buffer;
4309         iter->cpu_file = tracing_get_cpu(inode);
4310         mutex_init(&iter->mutex);
4311         filp->private_data = iter;
4312
4313         if (iter->trace->pipe_open)
4314                 iter->trace->pipe_open(iter);
4315
4316         nonseekable_open(inode, filp);
4317 out:
4318         mutex_unlock(&trace_types_lock);
4319         return ret;
4320
4321 fail:
4322         kfree(iter->trace);
4323         kfree(iter);
4324         __trace_array_put(tr);
4325         mutex_unlock(&trace_types_lock);
4326         return ret;
4327 }
4328
4329 static int tracing_release_pipe(struct inode *inode, struct file *file)
4330 {
4331         struct trace_iterator *iter = file->private_data;
4332         struct trace_array *tr = inode->i_private;
4333
4334         mutex_lock(&trace_types_lock);
4335
4336         if (iter->trace->pipe_close)
4337                 iter->trace->pipe_close(iter);
4338
4339         mutex_unlock(&trace_types_lock);
4340
4341         free_cpumask_var(iter->started);
4342         mutex_destroy(&iter->mutex);
4343         kfree(iter->trace);
4344         kfree(iter);
4345
4346         trace_array_put(tr);
4347
4348         return 0;
4349 }
4350
4351 static unsigned int
4352 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
4353 {
4354         /* Iterators are static, they should be filled or empty */
4355         if (trace_buffer_iter(iter, iter->cpu_file))
4356                 return POLLIN | POLLRDNORM;
4357
4358         if (trace_flags & TRACE_ITER_BLOCK)
4359                 /*
4360                  * Always select as readable when in blocking mode
4361                  */
4362                 return POLLIN | POLLRDNORM;
4363         else
4364                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
4365                                              filp, poll_table);
4366 }
4367
4368 static unsigned int
4369 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
4370 {
4371         struct trace_iterator *iter = filp->private_data;
4372
4373         return trace_poll(iter, filp, poll_table);
4374 }
4375
4376 /* Must be called with trace_types_lock mutex held. */
4377 static int tracing_wait_pipe(struct file *filp)
4378 {
4379         struct trace_iterator *iter = filp->private_data;
4380         int ret;
4381
4382         while (trace_empty(iter)) {
4383
4384                 if ((filp->f_flags & O_NONBLOCK)) {
4385                         return -EAGAIN;
4386                 }
4387
4388                 /*
4389                  * We block until we read something and tracing is disabled.
4390                  * We still block if tracing is disabled, but we have never
4391                  * read anything. This allows a user to cat this file, and
4392                  * then enable tracing. But after we have read something,
4393                  * we give an EOF when tracing is again disabled.
4394                  *
4395                  * iter->pos will be 0 if we haven't read anything.
4396                  */
4397                 if (!tracing_is_on() && iter->pos)
4398                         break;
4399
4400                 mutex_unlock(&iter->mutex);
4401
4402                 ret = wait_on_pipe(iter);
4403
4404                 mutex_lock(&iter->mutex);
4405
4406                 if (ret)
4407                         return ret;
4408
4409                 if (signal_pending(current))
4410                         return -EINTR;
4411         }
4412
4413         return 1;
4414 }
4415
4416 /*
4417  * Consumer reader.
4418  */
4419 static ssize_t
4420 tracing_read_pipe(struct file *filp, char __user *ubuf,
4421                   size_t cnt, loff_t *ppos)
4422 {
4423         struct trace_iterator *iter = filp->private_data;
4424         struct trace_array *tr = iter->tr;
4425         ssize_t sret;
4426
4427         /* return any leftover data */
4428         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4429         if (sret != -EBUSY)
4430                 return sret;
4431
4432         trace_seq_init(&iter->seq);
4433
4434         /* copy the tracer to avoid using a global lock all around */
4435         mutex_lock(&trace_types_lock);
4436         if (unlikely(iter->trace->name != tr->current_trace->name))
4437                 *iter->trace = *tr->current_trace;
4438         mutex_unlock(&trace_types_lock);
4439
4440         /*
4441          * Avoid more than one consumer on a single file descriptor
4442          * This is just a matter of traces coherency, the ring buffer itself
4443          * is protected.
4444          */
4445         mutex_lock(&iter->mutex);
4446         if (iter->trace->read) {
4447                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
4448                 if (sret)
4449                         goto out;
4450         }
4451
4452 waitagain:
4453         sret = tracing_wait_pipe(filp);
4454         if (sret <= 0)
4455                 goto out;
4456
4457         /* stop when tracing is finished */
4458         if (trace_empty(iter)) {
4459                 sret = 0;
4460                 goto out;
4461         }
4462
4463         if (cnt >= PAGE_SIZE)
4464                 cnt = PAGE_SIZE - 1;
4465
4466         /* reset all but tr, trace, and overruns */
4467         memset(&iter->seq, 0,
4468                sizeof(struct trace_iterator) -
4469                offsetof(struct trace_iterator, seq));
4470         cpumask_clear(iter->started);
4471         iter->pos = -1;
4472
4473         trace_event_read_lock();
4474         trace_access_lock(iter->cpu_file);
4475         while (trace_find_next_entry_inc(iter) != NULL) {
4476                 enum print_line_t ret;
4477                 int len = iter->seq.len;
4478
4479                 ret = print_trace_line(iter);
4480                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4481                         /* don't print partial lines */
4482                         iter->seq.len = len;
4483                         break;
4484                 }
4485                 if (ret != TRACE_TYPE_NO_CONSUME)
4486                         trace_consume(iter);
4487
4488                 if (iter->seq.len >= cnt)
4489                         break;
4490
4491                 /*
4492                  * Setting the full flag means we reached the trace_seq buffer
4493                  * size and we should leave by partial output condition above.
4494                  * One of the trace_seq_* functions is not used properly.
4495                  */
4496                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
4497                           iter->ent->type);
4498         }
4499         trace_access_unlock(iter->cpu_file);
4500         trace_event_read_unlock();
4501
4502         /* Now copy what we have to the user */
4503         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4504         if (iter->seq.readpos >= iter->seq.len)
4505                 trace_seq_init(&iter->seq);
4506
4507         /*
4508          * If there was nothing to send to user, in spite of consuming trace
4509          * entries, go back to wait for more entries.
4510          */
4511         if (sret == -EBUSY)
4512                 goto waitagain;
4513
4514 out:
4515         mutex_unlock(&iter->mutex);
4516
4517         return sret;
4518 }
4519
4520 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
4521                                      unsigned int idx)
4522 {
4523         __free_page(spd->pages[idx]);
4524 }
4525
4526 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
4527         .can_merge              = 0,
4528         .confirm                = generic_pipe_buf_confirm,
4529         .release                = generic_pipe_buf_release,
4530         .steal                  = generic_pipe_buf_steal,
4531         .get                    = generic_pipe_buf_get,
4532 };
4533
4534 static size_t
4535 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
4536 {
4537         size_t count;
4538         int ret;
4539
4540         /* Seq buffer is page-sized, exactly what we need. */
4541         for (;;) {
4542                 count = iter->seq.len;
4543                 ret = print_trace_line(iter);
4544                 count = iter->seq.len - count;
4545                 if (rem < count) {
4546                         rem = 0;
4547                         iter->seq.len -= count;
4548                         break;
4549                 }
4550                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4551                         iter->seq.len -= count;
4552                         break;
4553                 }
4554
4555                 if (ret != TRACE_TYPE_NO_CONSUME)
4556                         trace_consume(iter);
4557                 rem -= count;
4558                 if (!trace_find_next_entry_inc(iter))   {
4559                         rem = 0;
4560                         iter->ent = NULL;
4561                         break;
4562                 }
4563         }
4564
4565         return rem;
4566 }
4567
4568 static ssize_t tracing_splice_read_pipe(struct file *filp,
4569                                         loff_t *ppos,
4570                                         struct pipe_inode_info *pipe,
4571                                         size_t len,
4572                                         unsigned int flags)
4573 {
4574         struct page *pages_def[PIPE_DEF_BUFFERS];
4575         struct partial_page partial_def[PIPE_DEF_BUFFERS];
4576         struct trace_iterator *iter = filp->private_data;
4577         struct splice_pipe_desc spd = {
4578                 .pages          = pages_def,
4579                 .partial        = partial_def,
4580                 .nr_pages       = 0, /* This gets updated below. */
4581                 .nr_pages_max   = PIPE_DEF_BUFFERS,
4582                 .flags          = flags,
4583                 .ops            = &tracing_pipe_buf_ops,
4584                 .spd_release    = tracing_spd_release_pipe,
4585         };
4586         struct trace_array *tr = iter->tr;
4587         ssize_t ret;
4588         size_t rem;
4589         unsigned int i;
4590
4591         if (splice_grow_spd(pipe, &spd))
4592                 return -ENOMEM;
4593
4594         /* copy the tracer to avoid using a global lock all around */
4595         mutex_lock(&trace_types_lock);
4596         if (unlikely(iter->trace->name != tr->current_trace->name))
4597                 *iter->trace = *tr->current_trace;
4598         mutex_unlock(&trace_types_lock);
4599
4600         mutex_lock(&iter->mutex);
4601
4602         if (iter->trace->splice_read) {
4603                 ret = iter->trace->splice_read(iter, filp,
4604                                                ppos, pipe, len, flags);
4605                 if (ret)
4606                         goto out_err;
4607         }
4608
4609         ret = tracing_wait_pipe(filp);
4610         if (ret <= 0)
4611                 goto out_err;
4612
4613         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
4614                 ret = -EFAULT;
4615                 goto out_err;
4616         }
4617
4618         trace_event_read_lock();
4619         trace_access_lock(iter->cpu_file);
4620
4621         /* Fill as many pages as possible. */
4622         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
4623                 spd.pages[i] = alloc_page(GFP_KERNEL);
4624                 if (!spd.pages[i])
4625                         break;
4626
4627                 rem = tracing_fill_pipe_page(rem, iter);
4628
4629                 /* Copy the data into the page, so we can start over. */
4630                 ret = trace_seq_to_buffer(&iter->seq,
4631                                           page_address(spd.pages[i]),
4632                                           iter->seq.len);
4633                 if (ret < 0) {
4634                         __free_page(spd.pages[i]);
4635                         break;
4636                 }
4637                 spd.partial[i].offset = 0;
4638                 spd.partial[i].len = iter->seq.len;
4639
4640                 trace_seq_init(&iter->seq);
4641         }
4642
4643         trace_access_unlock(iter->cpu_file);
4644         trace_event_read_unlock();
4645         mutex_unlock(&iter->mutex);
4646
4647         spd.nr_pages = i;
4648
4649         ret = splice_to_pipe(pipe, &spd);
4650 out:
4651         splice_shrink_spd(&spd);
4652         return ret;
4653
4654 out_err:
4655         mutex_unlock(&iter->mutex);
4656         goto out;
4657 }
4658
4659 static ssize_t
4660 tracing_entries_read(struct file *filp, char __user *ubuf,
4661                      size_t cnt, loff_t *ppos)
4662 {
4663         struct inode *inode = file_inode(filp);
4664         struct trace_array *tr = inode->i_private;
4665         int cpu = tracing_get_cpu(inode);
4666         char buf[64];
4667         int r = 0;
4668         ssize_t ret;
4669
4670         mutex_lock(&trace_types_lock);
4671
4672         if (cpu == RING_BUFFER_ALL_CPUS) {
4673                 int cpu, buf_size_same;
4674                 unsigned long size;
4675
4676                 size = 0;
4677                 buf_size_same = 1;
4678                 /* check if all cpu sizes are same */
4679                 for_each_tracing_cpu(cpu) {
4680                         /* fill in the size from first enabled cpu */
4681                         if (size == 0)
4682                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
4683                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
4684                                 buf_size_same = 0;
4685                                 break;
4686                         }
4687                 }
4688
4689                 if (buf_size_same) {
4690                         if (!ring_buffer_expanded)
4691                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
4692                                             size >> 10,
4693                                             trace_buf_size >> 10);
4694                         else
4695                                 r = sprintf(buf, "%lu\n", size >> 10);
4696                 } else
4697                         r = sprintf(buf, "X\n");
4698         } else
4699                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
4700
4701         mutex_unlock(&trace_types_lock);
4702
4703         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4704         return ret;
4705 }
4706
4707 static ssize_t
4708 tracing_entries_write(struct file *filp, const char __user *ubuf,
4709                       size_t cnt, loff_t *ppos)
4710 {
4711         struct inode *inode = file_inode(filp);
4712         struct trace_array *tr = inode->i_private;
4713         unsigned long val;
4714         int ret;
4715
4716         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4717         if (ret)
4718                 return ret;
4719
4720         /* must have at least 1 entry */
4721         if (!val)
4722                 return -EINVAL;
4723
4724         /* value is in KB */
4725         val <<= 10;
4726         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
4727         if (ret < 0)
4728                 return ret;
4729
4730         *ppos += cnt;
4731
4732         return cnt;
4733 }
4734
4735 static ssize_t
4736 tracing_total_entries_read(struct file *filp, char __user *ubuf,
4737                                 size_t cnt, loff_t *ppos)
4738 {
4739         struct trace_array *tr = filp->private_data;
4740         char buf[64];
4741         int r, cpu;
4742         unsigned long size = 0, expanded_size = 0;
4743
4744         mutex_lock(&trace_types_lock);
4745         for_each_tracing_cpu(cpu) {
4746                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
4747                 if (!ring_buffer_expanded)
4748                         expanded_size += trace_buf_size >> 10;
4749         }
4750         if (ring_buffer_expanded)
4751                 r = sprintf(buf, "%lu\n", size);
4752         else
4753                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
4754         mutex_unlock(&trace_types_lock);
4755
4756         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4757 }
4758
4759 static ssize_t
4760 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
4761                           size_t cnt, loff_t *ppos)
4762 {
4763         /*
4764          * There is no need to read what the user has written, this function
4765          * is just to make sure that there is no error when "echo" is used
4766          */
4767
4768         *ppos += cnt;
4769
4770         return cnt;
4771 }
4772
4773 static int
4774 tracing_free_buffer_release(struct inode *inode, struct file *filp)
4775 {
4776         struct trace_array *tr = inode->i_private;
4777
4778         /* disable tracing ? */
4779         if (trace_flags & TRACE_ITER_STOP_ON_FREE)
4780                 tracer_tracing_off(tr);
4781         /* resize the ring buffer to 0 */
4782         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
4783
4784         trace_array_put(tr);
4785
4786         return 0;
4787 }
4788
4789 static ssize_t
4790 tracing_mark_write(struct file *filp, const char __user *ubuf,
4791                                         size_t cnt, loff_t *fpos)
4792 {
4793         unsigned long addr = (unsigned long)ubuf;
4794         struct trace_array *tr = filp->private_data;
4795         struct ring_buffer_event *event;
4796         struct ring_buffer *buffer;
4797         struct print_entry *entry;
4798         unsigned long irq_flags;
4799         struct page *pages[2];
4800         void *map_page[2];
4801         int nr_pages = 1;
4802         ssize_t written;
4803         int offset;
4804         int size;
4805         int len;
4806         int ret;
4807         int i;
4808
4809         if (tracing_disabled)
4810                 return -EINVAL;
4811
4812         if (!(trace_flags & TRACE_ITER_MARKERS))
4813                 return -EINVAL;
4814
4815         if (cnt > TRACE_BUF_SIZE)
4816                 cnt = TRACE_BUF_SIZE;
4817
4818         /*
4819          * Userspace is injecting traces into the kernel trace buffer.
4820          * We want to be as non intrusive as possible.
4821          * To do so, we do not want to allocate any special buffers
4822          * or take any locks, but instead write the userspace data
4823          * straight into the ring buffer.
4824          *
4825          * First we need to pin the userspace buffer into memory,
4826          * which, most likely it is, because it just referenced it.
4827          * But there's no guarantee that it is. By using get_user_pages_fast()
4828          * and kmap_atomic/kunmap_atomic() we can get access to the
4829          * pages directly. We then write the data directly into the
4830          * ring buffer.
4831          */
4832         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
4833
4834         /* check if we cross pages */
4835         if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
4836                 nr_pages = 2;
4837
4838         offset = addr & (PAGE_SIZE - 1);
4839         addr &= PAGE_MASK;
4840
4841         ret = get_user_pages_fast(addr, nr_pages, 0, pages);
4842         if (ret < nr_pages) {
4843                 while (--ret >= 0)
4844                         put_page(pages[ret]);
4845                 written = -EFAULT;
4846                 goto out;
4847         }
4848
4849         for (i = 0; i < nr_pages; i++)
4850                 map_page[i] = kmap_atomic(pages[i]);
4851
4852         local_save_flags(irq_flags);
4853         size = sizeof(*entry) + cnt + 2; /* possible \n added */
4854         buffer = tr->trace_buffer.buffer;
4855         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
4856                                           irq_flags, preempt_count());
4857         if (!event) {
4858                 /* Ring buffer disabled, return as if not open for write */
4859                 written = -EBADF;
4860                 goto out_unlock;
4861         }
4862
4863         entry = ring_buffer_event_data(event);
4864         entry->ip = _THIS_IP_;
4865
4866         if (nr_pages == 2) {
4867                 len = PAGE_SIZE - offset;
4868                 memcpy(&entry->buf, map_page[0] + offset, len);
4869                 memcpy(&entry->buf[len], map_page[1], cnt - len);
4870         } else
4871                 memcpy(&entry->buf, map_page[0] + offset, cnt);
4872
4873         if (entry->buf[cnt - 1] != '\n') {
4874                 entry->buf[cnt] = '\n';
4875                 entry->buf[cnt + 1] = '\0';
4876         } else
4877                 entry->buf[cnt] = '\0';
4878
4879         __buffer_unlock_commit(buffer, event);
4880
4881         written = cnt;
4882
4883         *fpos += written;
4884
4885  out_unlock:
4886         for (i = 0; i < nr_pages; i++){
4887                 kunmap_atomic(map_page[i]);
4888                 put_page(pages[i]);
4889         }
4890  out:
4891         return written;
4892 }
4893
4894 static int tracing_clock_show(struct seq_file *m, void *v)
4895 {
4896         struct trace_array *tr = m->private;
4897         int i;
4898
4899         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
4900                 seq_printf(m,
4901                         "%s%s%s%s", i ? " " : "",
4902                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
4903                         i == tr->clock_id ? "]" : "");
4904         seq_putc(m, '\n');
4905
4906         return 0;
4907 }
4908
4909 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
4910 {
4911         int i;
4912
4913         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
4914                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
4915                         break;
4916         }
4917         if (i == ARRAY_SIZE(trace_clocks))
4918                 return -EINVAL;
4919
4920         mutex_lock(&trace_types_lock);
4921
4922         tr->clock_id = i;
4923
4924         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
4925
4926         /*
4927          * New clock may not be consistent with the previous clock.
4928          * Reset the buffer so that it doesn't have incomparable timestamps.
4929          */
4930         tracing_reset_online_cpus(&tr->trace_buffer);
4931
4932 #ifdef CONFIG_TRACER_MAX_TRACE
4933         if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
4934                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
4935         tracing_reset_online_cpus(&tr->max_buffer);
4936 #endif
4937
4938         mutex_unlock(&trace_types_lock);
4939
4940         return 0;
4941 }
4942
4943 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
4944                                    size_t cnt, loff_t *fpos)
4945 {
4946         struct seq_file *m = filp->private_data;
4947         struct trace_array *tr = m->private;
4948         char buf[64];
4949         const char *clockstr;
4950         int ret;
4951
4952         if (cnt >= sizeof(buf))
4953                 return -EINVAL;
4954
4955         if (copy_from_user(&buf, ubuf, cnt))
4956                 return -EFAULT;
4957
4958         buf[cnt] = 0;
4959
4960         clockstr = strstrip(buf);
4961
4962         ret = tracing_set_clock(tr, clockstr);
4963         if (ret)
4964                 return ret;
4965
4966         *fpos += cnt;
4967
4968         return cnt;
4969 }
4970
4971 static int tracing_clock_open(struct inode *inode, struct file *file)
4972 {
4973         struct trace_array *tr = inode->i_private;
4974         int ret;
4975
4976         if (tracing_disabled)
4977                 return -ENODEV;
4978
4979         if (trace_array_get(tr))
4980                 return -ENODEV;
4981
4982         ret = single_open(file, tracing_clock_show, inode->i_private);
4983         if (ret < 0)
4984                 trace_array_put(tr);
4985
4986         return ret;
4987 }
4988
4989 struct ftrace_buffer_info {
4990         struct trace_iterator   iter;
4991         void                    *spare;
4992         unsigned int            read;
4993 };
4994
4995 #ifdef CONFIG_TRACER_SNAPSHOT
4996 static int tracing_snapshot_open(struct inode *inode, struct file *file)
4997 {
4998         struct trace_array *tr = inode->i_private;
4999         struct trace_iterator *iter;
5000         struct seq_file *m;
5001         int ret = 0;
5002
5003         if (trace_array_get(tr) < 0)
5004                 return -ENODEV;
5005
5006         if (file->f_mode & FMODE_READ) {
5007                 iter = __tracing_open(inode, file, true);
5008                 if (IS_ERR(iter))
5009                         ret = PTR_ERR(iter);
5010         } else {
5011                 /* Writes still need the seq_file to hold the private data */
5012                 ret = -ENOMEM;
5013                 m = kzalloc(sizeof(*m), GFP_KERNEL);
5014                 if (!m)
5015                         goto out;
5016                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5017                 if (!iter) {
5018                         kfree(m);
5019                         goto out;
5020                 }
5021                 ret = 0;
5022
5023                 iter->tr = tr;
5024                 iter->trace_buffer = &tr->max_buffer;
5025                 iter->cpu_file = tracing_get_cpu(inode);
5026                 m->private = iter;
5027                 file->private_data = m;
5028         }
5029 out:
5030         if (ret < 0)
5031                 trace_array_put(tr);
5032
5033         return ret;
5034 }
5035
5036 static ssize_t
5037 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
5038                        loff_t *ppos)
5039 {
5040         struct seq_file *m = filp->private_data;
5041         struct trace_iterator *iter = m->private;
5042         struct trace_array *tr = iter->tr;
5043         unsigned long val;
5044         int ret;
5045
5046         ret = tracing_update_buffers();
5047         if (ret < 0)
5048                 return ret;
5049
5050         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5051         if (ret)
5052                 return ret;
5053
5054         mutex_lock(&trace_types_lock);
5055
5056         if (tr->current_trace->use_max_tr) {
5057                 ret = -EBUSY;
5058                 goto out;
5059         }
5060
5061         switch (val) {
5062         case 0:
5063                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5064                         ret = -EINVAL;
5065                         break;
5066                 }
5067                 if (tr->allocated_snapshot)
5068                         free_snapshot(tr);
5069                 break;
5070         case 1:
5071 /* Only allow per-cpu swap if the ring buffer supports it */
5072 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
5073                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5074                         ret = -EINVAL;
5075                         break;
5076                 }
5077 #endif
5078                 if (!tr->allocated_snapshot) {
5079                         ret = alloc_snapshot(tr);
5080                         if (ret < 0)
5081                                 break;
5082                 }
5083                 local_irq_disable();
5084                 /* Now, we're going to swap */
5085                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5086                         update_max_tr(tr, current, smp_processor_id());
5087                 else
5088                         update_max_tr_single(tr, current, iter->cpu_file);
5089                 local_irq_enable();
5090                 break;
5091         default:
5092                 if (tr->allocated_snapshot) {
5093                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5094                                 tracing_reset_online_cpus(&tr->max_buffer);
5095                         else
5096                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
5097                 }
5098                 break;
5099         }
5100
5101         if (ret >= 0) {
5102                 *ppos += cnt;
5103                 ret = cnt;
5104         }
5105 out:
5106         mutex_unlock(&trace_types_lock);
5107         return ret;
5108 }
5109
5110 static int tracing_snapshot_release(struct inode *inode, struct file *file)
5111 {
5112         struct seq_file *m = file->private_data;
5113         int ret;
5114
5115         ret = tracing_release(inode, file);
5116
5117         if (file->f_mode & FMODE_READ)
5118                 return ret;
5119
5120         /* If write only, the seq_file is just a stub */
5121         if (m)
5122                 kfree(m->private);
5123         kfree(m);
5124
5125         return 0;
5126 }
5127
5128 static int tracing_buffers_open(struct inode *inode, struct file *filp);
5129 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
5130                                     size_t count, loff_t *ppos);
5131 static int tracing_buffers_release(struct inode *inode, struct file *file);
5132 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5133                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
5134
5135 static int snapshot_raw_open(struct inode *inode, struct file *filp)
5136 {
5137         struct ftrace_buffer_info *info;
5138         int ret;
5139
5140         ret = tracing_buffers_open(inode, filp);
5141         if (ret < 0)
5142                 return ret;
5143
5144         info = filp->private_data;
5145
5146         if (info->iter.trace->use_max_tr) {
5147                 tracing_buffers_release(inode, filp);
5148                 return -EBUSY;
5149         }
5150
5151         info->iter.snapshot = true;
5152         info->iter.trace_buffer = &info->iter.tr->max_buffer;
5153
5154         return ret;
5155 }
5156
5157 #endif /* CONFIG_TRACER_SNAPSHOT */
5158
5159
5160 static const struct file_operations tracing_max_lat_fops = {
5161         .open           = tracing_open_generic,
5162         .read           = tracing_max_lat_read,
5163         .write          = tracing_max_lat_write,
5164         .llseek         = generic_file_llseek,
5165 };
5166
5167 static const struct file_operations set_tracer_fops = {
5168         .open           = tracing_open_generic,
5169         .read           = tracing_set_trace_read,
5170         .write          = tracing_set_trace_write,
5171         .llseek         = generic_file_llseek,
5172 };
5173
5174 static const struct file_operations tracing_pipe_fops = {
5175         .open           = tracing_open_pipe,
5176         .poll           = tracing_poll_pipe,
5177         .read           = tracing_read_pipe,
5178         .splice_read    = tracing_splice_read_pipe,
5179         .release        = tracing_release_pipe,
5180         .llseek         = no_llseek,
5181 };
5182
5183 static const struct file_operations tracing_entries_fops = {
5184         .open           = tracing_open_generic_tr,
5185         .read           = tracing_entries_read,
5186         .write          = tracing_entries_write,
5187         .llseek         = generic_file_llseek,
5188         .release        = tracing_release_generic_tr,
5189 };
5190
5191 static const struct file_operations tracing_total_entries_fops = {
5192         .open           = tracing_open_generic_tr,
5193         .read           = tracing_total_entries_read,
5194         .llseek         = generic_file_llseek,
5195         .release        = tracing_release_generic_tr,
5196 };
5197
5198 static const struct file_operations tracing_free_buffer_fops = {
5199         .open           = tracing_open_generic_tr,
5200         .write          = tracing_free_buffer_write,
5201         .release        = tracing_free_buffer_release,
5202 };
5203
5204 static const struct file_operations tracing_mark_fops = {
5205         .open           = tracing_open_generic_tr,
5206         .write          = tracing_mark_write,
5207         .llseek         = generic_file_llseek,
5208         .release        = tracing_release_generic_tr,
5209 };
5210
5211 static const struct file_operations trace_clock_fops = {
5212         .open           = tracing_clock_open,
5213         .read           = seq_read,
5214         .llseek         = seq_lseek,
5215         .release        = tracing_single_release_tr,
5216         .write          = tracing_clock_write,
5217 };
5218
5219 #ifdef CONFIG_TRACER_SNAPSHOT
5220 static const struct file_operations snapshot_fops = {
5221         .open           = tracing_snapshot_open,
5222         .read           = seq_read,
5223         .write          = tracing_snapshot_write,
5224         .llseek         = tracing_lseek,
5225         .release        = tracing_snapshot_release,
5226 };
5227
5228 static const struct file_operations snapshot_raw_fops = {
5229         .open           = snapshot_raw_open,
5230         .read           = tracing_buffers_read,
5231         .release        = tracing_buffers_release,
5232         .splice_read    = tracing_buffers_splice_read,
5233         .llseek         = no_llseek,
5234 };
5235
5236 #endif /* CONFIG_TRACER_SNAPSHOT */
5237
5238 static int tracing_buffers_open(struct inode *inode, struct file *filp)
5239 {
5240         struct trace_array *tr = inode->i_private;
5241         struct ftrace_buffer_info *info;
5242         int ret;
5243
5244         if (tracing_disabled)
5245                 return -ENODEV;
5246
5247         if (trace_array_get(tr) < 0)
5248                 return -ENODEV;
5249
5250         info = kzalloc(sizeof(*info), GFP_KERNEL);
5251         if (!info) {
5252                 trace_array_put(tr);
5253                 return -ENOMEM;
5254         }
5255
5256         mutex_lock(&trace_types_lock);
5257
5258         info->iter.tr           = tr;
5259         info->iter.cpu_file     = tracing_get_cpu(inode);
5260         info->iter.trace        = tr->current_trace;
5261         info->iter.trace_buffer = &tr->trace_buffer;
5262         info->spare             = NULL;
5263         /* Force reading ring buffer for first read */
5264         info->read              = (unsigned int)-1;
5265
5266         filp->private_data = info;
5267
5268         mutex_unlock(&trace_types_lock);
5269
5270         ret = nonseekable_open(inode, filp);
5271         if (ret < 0)
5272                 trace_array_put(tr);
5273
5274         return ret;
5275 }
5276
5277 static unsigned int
5278 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
5279 {
5280         struct ftrace_buffer_info *info = filp->private_data;
5281         struct trace_iterator *iter = &info->iter;
5282
5283         return trace_poll(iter, filp, poll_table);
5284 }
5285
5286 static ssize_t
5287 tracing_buffers_read(struct file *filp, char __user *ubuf,
5288                      size_t count, loff_t *ppos)
5289 {
5290         struct ftrace_buffer_info *info = filp->private_data;
5291         struct trace_iterator *iter = &info->iter;
5292         ssize_t ret;
5293         ssize_t size;
5294
5295         if (!count)
5296                 return 0;
5297
5298         mutex_lock(&trace_types_lock);
5299
5300 #ifdef CONFIG_TRACER_MAX_TRACE
5301         if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5302                 size = -EBUSY;
5303                 goto out_unlock;
5304         }
5305 #endif
5306
5307         if (!info->spare)
5308                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
5309                                                           iter->cpu_file);
5310         size = -ENOMEM;
5311         if (!info->spare)
5312                 goto out_unlock;
5313
5314         /* Do we have previous read data to read? */
5315         if (info->read < PAGE_SIZE)
5316                 goto read;
5317
5318  again:
5319         trace_access_lock(iter->cpu_file);
5320         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
5321                                     &info->spare,
5322                                     count,
5323                                     iter->cpu_file, 0);
5324         trace_access_unlock(iter->cpu_file);
5325
5326         if (ret < 0) {
5327                 if (trace_empty(iter)) {
5328                         if ((filp->f_flags & O_NONBLOCK)) {
5329                                 size = -EAGAIN;
5330                                 goto out_unlock;
5331                         }
5332                         mutex_unlock(&trace_types_lock);
5333                         ret = wait_on_pipe(iter);
5334                         mutex_lock(&trace_types_lock);
5335                         if (ret) {
5336                                 size = ret;
5337                                 goto out_unlock;
5338                         }
5339                         if (signal_pending(current)) {
5340                                 size = -EINTR;
5341                                 goto out_unlock;
5342                         }
5343                         goto again;
5344                 }
5345                 size = 0;
5346                 goto out_unlock;
5347         }
5348
5349         info->read = 0;
5350  read:
5351         size = PAGE_SIZE - info->read;
5352         if (size > count)
5353                 size = count;
5354
5355         ret = copy_to_user(ubuf, info->spare + info->read, size);
5356         if (ret == size) {
5357                 size = -EFAULT;
5358                 goto out_unlock;
5359         }
5360         size -= ret;
5361
5362         *ppos += size;
5363         info->read += size;
5364
5365  out_unlock:
5366         mutex_unlock(&trace_types_lock);
5367
5368         return size;
5369 }
5370
5371 static int tracing_buffers_release(struct inode *inode, struct file *file)
5372 {
5373         struct ftrace_buffer_info *info = file->private_data;
5374         struct trace_iterator *iter = &info->iter;
5375
5376         mutex_lock(&trace_types_lock);
5377
5378         __trace_array_put(iter->tr);
5379
5380         if (info->spare)
5381                 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
5382         kfree(info);
5383
5384         mutex_unlock(&trace_types_lock);
5385
5386         return 0;
5387 }
5388
5389 struct buffer_ref {
5390         struct ring_buffer      *buffer;
5391         void                    *page;
5392         int                     ref;
5393 };
5394
5395 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
5396                                     struct pipe_buffer *buf)
5397 {
5398         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5399
5400         if (--ref->ref)
5401                 return;
5402
5403         ring_buffer_free_read_page(ref->buffer, ref->page);
5404         kfree(ref);
5405         buf->private = 0;
5406 }
5407
5408 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
5409                                 struct pipe_buffer *buf)
5410 {
5411         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5412
5413         ref->ref++;
5414 }
5415
5416 /* Pipe buffer operations for a buffer. */
5417 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
5418         .can_merge              = 0,
5419         .confirm                = generic_pipe_buf_confirm,
5420         .release                = buffer_pipe_buf_release,
5421         .steal                  = generic_pipe_buf_steal,
5422         .get                    = buffer_pipe_buf_get,
5423 };
5424
5425 /*
5426  * Callback from splice_to_pipe(), if we need to release some pages
5427  * at the end of the spd in case we error'ed out in filling the pipe.
5428  */
5429 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
5430 {
5431         struct buffer_ref *ref =
5432                 (struct buffer_ref *)spd->partial[i].private;
5433
5434         if (--ref->ref)
5435                 return;
5436
5437         ring_buffer_free_read_page(ref->buffer, ref->page);
5438         kfree(ref);
5439         spd->partial[i].private = 0;
5440 }
5441
5442 static ssize_t
5443 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5444                             struct pipe_inode_info *pipe, size_t len,
5445                             unsigned int flags)
5446 {
5447         struct ftrace_buffer_info *info = file->private_data;
5448         struct trace_iterator *iter = &info->iter;
5449         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5450         struct page *pages_def[PIPE_DEF_BUFFERS];
5451         struct splice_pipe_desc spd = {
5452                 .pages          = pages_def,
5453                 .partial        = partial_def,
5454                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5455                 .flags          = flags,
5456                 .ops            = &buffer_pipe_buf_ops,
5457                 .spd_release    = buffer_spd_release,
5458         };
5459         struct buffer_ref *ref;
5460         int entries, size, i;
5461         ssize_t ret;
5462
5463         mutex_lock(&trace_types_lock);
5464
5465 #ifdef CONFIG_TRACER_MAX_TRACE
5466         if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5467                 ret = -EBUSY;
5468                 goto out;
5469         }
5470 #endif
5471
5472         if (splice_grow_spd(pipe, &spd)) {
5473                 ret = -ENOMEM;
5474                 goto out;
5475         }
5476
5477         if (*ppos & (PAGE_SIZE - 1)) {
5478                 ret = -EINVAL;
5479                 goto out;
5480         }
5481
5482         if (len & (PAGE_SIZE - 1)) {
5483                 if (len < PAGE_SIZE) {
5484                         ret = -EINVAL;
5485                         goto out;
5486                 }
5487                 len &= PAGE_MASK;
5488         }
5489
5490  again:
5491         trace_access_lock(iter->cpu_file);
5492         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5493
5494         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
5495                 struct page *page;
5496                 int r;
5497
5498                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
5499                 if (!ref)
5500                         break;
5501
5502                 ref->ref = 1;
5503                 ref->buffer = iter->trace_buffer->buffer;
5504                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
5505                 if (!ref->page) {
5506                         kfree(ref);
5507                         break;
5508                 }
5509
5510                 r = ring_buffer_read_page(ref->buffer, &ref->page,
5511                                           len, iter->cpu_file, 1);
5512                 if (r < 0) {
5513                         ring_buffer_free_read_page(ref->buffer, ref->page);
5514                         kfree(ref);
5515                         break;
5516                 }
5517
5518                 /*
5519                  * zero out any left over data, this is going to
5520                  * user land.
5521                  */
5522                 size = ring_buffer_page_len(ref->page);
5523                 if (size < PAGE_SIZE)
5524                         memset(ref->page + size, 0, PAGE_SIZE - size);
5525
5526                 page = virt_to_page(ref->page);
5527
5528                 spd.pages[i] = page;
5529                 spd.partial[i].len = PAGE_SIZE;
5530                 spd.partial[i].offset = 0;
5531                 spd.partial[i].private = (unsigned long)ref;
5532                 spd.nr_pages++;
5533                 *ppos += PAGE_SIZE;
5534
5535                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5536         }
5537
5538         trace_access_unlock(iter->cpu_file);
5539         spd.nr_pages = i;
5540
5541         /* did we read anything? */
5542         if (!spd.nr_pages) {
5543                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) {
5544                         ret = -EAGAIN;
5545                         goto out;
5546                 }
5547                 mutex_unlock(&trace_types_lock);
5548                 ret = wait_on_pipe(iter);
5549                 mutex_lock(&trace_types_lock);
5550                 if (ret)
5551                         goto out;
5552                 if (signal_pending(current)) {
5553                         ret = -EINTR;
5554                         goto out;
5555                 }
5556                 goto again;
5557         }
5558
5559         ret = splice_to_pipe(pipe, &spd);
5560         splice_shrink_spd(&spd);
5561 out:
5562         mutex_unlock(&trace_types_lock);
5563
5564         return ret;
5565 }
5566
5567 static const struct file_operations tracing_buffers_fops = {
5568         .open           = tracing_buffers_open,
5569         .read           = tracing_buffers_read,
5570         .poll           = tracing_buffers_poll,
5571         .release        = tracing_buffers_release,
5572         .splice_read    = tracing_buffers_splice_read,
5573         .llseek         = no_llseek,
5574 };
5575
5576 static ssize_t
5577 tracing_stats_read(struct file *filp, char __user *ubuf,
5578                    size_t count, loff_t *ppos)
5579 {
5580         struct inode *inode = file_inode(filp);
5581         struct trace_array *tr = inode->i_private;
5582         struct trace_buffer *trace_buf = &tr->trace_buffer;
5583         int cpu = tracing_get_cpu(inode);
5584         struct trace_seq *s;
5585         unsigned long cnt;
5586         unsigned long long t;
5587         unsigned long usec_rem;
5588
5589         s = kmalloc(sizeof(*s), GFP_KERNEL);
5590         if (!s)
5591                 return -ENOMEM;
5592
5593         trace_seq_init(s);
5594
5595         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
5596         trace_seq_printf(s, "entries: %ld\n", cnt);
5597
5598         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
5599         trace_seq_printf(s, "overrun: %ld\n", cnt);
5600
5601         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
5602         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
5603
5604         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
5605         trace_seq_printf(s, "bytes: %ld\n", cnt);
5606
5607         if (trace_clocks[tr->clock_id].in_ns) {
5608                 /* local or global for trace_clock */
5609                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5610                 usec_rem = do_div(t, USEC_PER_SEC);
5611                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
5612                                                                 t, usec_rem);
5613
5614                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
5615                 usec_rem = do_div(t, USEC_PER_SEC);
5616                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
5617         } else {
5618                 /* counter or tsc mode for trace_clock */
5619                 trace_seq_printf(s, "oldest event ts: %llu\n",
5620                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5621
5622                 trace_seq_printf(s, "now ts: %llu\n",
5623                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
5624         }
5625
5626         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
5627         trace_seq_printf(s, "dropped events: %ld\n", cnt);
5628
5629         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
5630         trace_seq_printf(s, "read events: %ld\n", cnt);
5631
5632         count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
5633
5634         kfree(s);
5635
5636         return count;
5637 }
5638
5639 static const struct file_operations tracing_stats_fops = {
5640         .open           = tracing_open_generic_tr,
5641         .read           = tracing_stats_read,
5642         .llseek         = generic_file_llseek,
5643         .release        = tracing_release_generic_tr,
5644 };
5645
5646 #ifdef CONFIG_DYNAMIC_FTRACE
5647
5648 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
5649 {
5650         return 0;
5651 }
5652
5653 static ssize_t
5654 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
5655                   size_t cnt, loff_t *ppos)
5656 {
5657         static char ftrace_dyn_info_buffer[1024];
5658         static DEFINE_MUTEX(dyn_info_mutex);
5659         unsigned long *p = filp->private_data;
5660         char *buf = ftrace_dyn_info_buffer;
5661         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
5662         int r;
5663
5664         mutex_lock(&dyn_info_mutex);
5665         r = sprintf(buf, "%ld ", *p);
5666
5667         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
5668         buf[r++] = '\n';
5669
5670         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5671
5672         mutex_unlock(&dyn_info_mutex);
5673
5674         return r;
5675 }
5676
5677 static const struct file_operations tracing_dyn_info_fops = {
5678         .open           = tracing_open_generic,
5679         .read           = tracing_read_dyn_info,
5680         .llseek         = generic_file_llseek,
5681 };
5682 #endif /* CONFIG_DYNAMIC_FTRACE */
5683
5684 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
5685 static void
5686 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5687 {
5688         tracing_snapshot();
5689 }
5690
5691 static void
5692 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5693 {
5694         unsigned long *count = (long *)data;
5695
5696         if (!*count)
5697                 return;
5698
5699         if (*count != -1)
5700                 (*count)--;
5701
5702         tracing_snapshot();
5703 }
5704
5705 static int
5706 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
5707                       struct ftrace_probe_ops *ops, void *data)
5708 {
5709         long count = (long)data;
5710
5711         seq_printf(m, "%ps:", (void *)ip);
5712
5713         seq_printf(m, "snapshot");
5714
5715         if (count == -1)
5716                 seq_printf(m, ":unlimited\n");
5717         else
5718                 seq_printf(m, ":count=%ld\n", count);
5719
5720         return 0;
5721 }
5722
5723 static struct ftrace_probe_ops snapshot_probe_ops = {
5724         .func                   = ftrace_snapshot,
5725         .print                  = ftrace_snapshot_print,
5726 };
5727
5728 static struct ftrace_probe_ops snapshot_count_probe_ops = {
5729         .func                   = ftrace_count_snapshot,
5730         .print                  = ftrace_snapshot_print,
5731 };
5732
5733 static int
5734 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
5735                                char *glob, char *cmd, char *param, int enable)
5736 {
5737         struct ftrace_probe_ops *ops;
5738         void *count = (void *)-1;
5739         char *number;
5740         int ret;
5741
5742         /* hash funcs only work with set_ftrace_filter */
5743         if (!enable)
5744                 return -EINVAL;
5745
5746         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
5747
5748         if (glob[0] == '!') {
5749                 unregister_ftrace_function_probe_func(glob+1, ops);
5750                 return 0;
5751         }
5752
5753         if (!param)
5754                 goto out_reg;
5755
5756         number = strsep(&param, ":");
5757
5758         if (!strlen(number))
5759                 goto out_reg;
5760
5761         /*
5762          * We use the callback data field (which is a pointer)
5763          * as our counter.
5764          */
5765         ret = kstrtoul(number, 0, (unsigned long *)&count);
5766         if (ret)
5767                 return ret;
5768
5769  out_reg:
5770         ret = register_ftrace_function_probe(glob, ops, count);
5771
5772         if (ret >= 0)
5773                 alloc_snapshot(&global_trace);
5774
5775         return ret < 0 ? ret : 0;
5776 }
5777
5778 static struct ftrace_func_command ftrace_snapshot_cmd = {
5779         .name                   = "snapshot",
5780         .func                   = ftrace_trace_snapshot_callback,
5781 };
5782
5783 static __init int register_snapshot_cmd(void)
5784 {
5785         return register_ftrace_command(&ftrace_snapshot_cmd);
5786 }
5787 #else
5788 static inline __init int register_snapshot_cmd(void) { return 0; }
5789 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
5790
5791 struct dentry *tracing_init_dentry_tr(struct trace_array *tr)
5792 {
5793         if (tr->dir)
5794                 return tr->dir;
5795
5796         if (!debugfs_initialized())
5797                 return NULL;
5798
5799         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
5800                 tr->dir = debugfs_create_dir("tracing", NULL);
5801
5802         if (!tr->dir)
5803                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
5804
5805         return tr->dir;
5806 }
5807
5808 struct dentry *tracing_init_dentry(void)
5809 {
5810         return tracing_init_dentry_tr(&global_trace);
5811 }
5812
5813 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
5814 {
5815         struct dentry *d_tracer;
5816
5817         if (tr->percpu_dir)
5818                 return tr->percpu_dir;
5819
5820         d_tracer = tracing_init_dentry_tr(tr);
5821         if (!d_tracer)
5822                 return NULL;
5823
5824         tr->percpu_dir = debugfs_create_dir("per_cpu", d_tracer);
5825
5826         WARN_ONCE(!tr->percpu_dir,
5827                   "Could not create debugfs directory 'per_cpu/%d'\n", cpu);
5828
5829         return tr->percpu_dir;
5830 }
5831
5832 static struct dentry *
5833 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
5834                       void *data, long cpu, const struct file_operations *fops)
5835 {
5836         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
5837
5838         if (ret) /* See tracing_get_cpu() */
5839                 ret->d_inode->i_cdev = (void *)(cpu + 1);
5840         return ret;
5841 }
5842
5843 static void
5844 tracing_init_debugfs_percpu(struct trace_array *tr, long cpu)
5845 {
5846         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
5847         struct dentry *d_cpu;
5848         char cpu_dir[30]; /* 30 characters should be more than enough */
5849
5850         if (!d_percpu)
5851                 return;
5852
5853         snprintf(cpu_dir, 30, "cpu%ld", cpu);
5854         d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
5855         if (!d_cpu) {
5856                 pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
5857                 return;
5858         }
5859
5860         /* per cpu trace_pipe */
5861         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
5862                                 tr, cpu, &tracing_pipe_fops);
5863
5864         /* per cpu trace */
5865         trace_create_cpu_file("trace", 0644, d_cpu,
5866                                 tr, cpu, &tracing_fops);
5867
5868         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
5869                                 tr, cpu, &tracing_buffers_fops);
5870
5871         trace_create_cpu_file("stats", 0444, d_cpu,
5872                                 tr, cpu, &tracing_stats_fops);
5873
5874         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
5875                                 tr, cpu, &tracing_entries_fops);
5876
5877 #ifdef CONFIG_TRACER_SNAPSHOT
5878         trace_create_cpu_file("snapshot", 0644, d_cpu,
5879                                 tr, cpu, &snapshot_fops);
5880
5881         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
5882                                 tr, cpu, &snapshot_raw_fops);
5883 #endif
5884 }
5885
5886 #ifdef CONFIG_FTRACE_SELFTEST
5887 /* Let selftest have access to static functions in this file */
5888 #include "trace_selftest.c"
5889 #endif
5890
5891 struct trace_option_dentry {
5892         struct tracer_opt               *opt;
5893         struct tracer_flags             *flags;
5894         struct trace_array              *tr;
5895         struct dentry                   *entry;
5896 };
5897
5898 static ssize_t
5899 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
5900                         loff_t *ppos)
5901 {
5902         struct trace_option_dentry *topt = filp->private_data;
5903         char *buf;
5904
5905         if (topt->flags->val & topt->opt->bit)
5906                 buf = "1\n";
5907         else
5908                 buf = "0\n";
5909
5910         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5911 }
5912
5913 static ssize_t
5914 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
5915                          loff_t *ppos)
5916 {
5917         struct trace_option_dentry *topt = filp->private_data;
5918         unsigned long val;
5919         int ret;
5920
5921         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5922         if (ret)
5923                 return ret;
5924
5925         if (val != 0 && val != 1)
5926                 return -EINVAL;
5927
5928         if (!!(topt->flags->val & topt->opt->bit) != val) {
5929                 mutex_lock(&trace_types_lock);
5930                 ret = __set_tracer_option(topt->tr, topt->flags,
5931                                           topt->opt, !val);
5932                 mutex_unlock(&trace_types_lock);
5933                 if (ret)
5934                         return ret;
5935         }
5936
5937         *ppos += cnt;
5938
5939         return cnt;
5940 }
5941
5942
5943 static const struct file_operations trace_options_fops = {
5944         .open = tracing_open_generic,
5945         .read = trace_options_read,
5946         .write = trace_options_write,
5947         .llseek = generic_file_llseek,
5948 };
5949
5950 static ssize_t
5951 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
5952                         loff_t *ppos)
5953 {
5954         long index = (long)filp->private_data;
5955         char *buf;
5956
5957         if (trace_flags & (1 << index))
5958                 buf = "1\n";
5959         else
5960                 buf = "0\n";
5961
5962         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5963 }
5964
5965 static ssize_t
5966 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
5967                          loff_t *ppos)
5968 {
5969         struct trace_array *tr = &global_trace;
5970         long index = (long)filp->private_data;
5971         unsigned long val;
5972         int ret;
5973
5974         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5975         if (ret)
5976                 return ret;
5977
5978         if (val != 0 && val != 1)
5979                 return -EINVAL;
5980
5981         mutex_lock(&trace_types_lock);
5982         ret = set_tracer_flag(tr, 1 << index, val);
5983         mutex_unlock(&trace_types_lock);
5984
5985         if (ret < 0)
5986                 return ret;
5987
5988         *ppos += cnt;
5989
5990         return cnt;
5991 }
5992
5993 static const struct file_operations trace_options_core_fops = {
5994         .open = tracing_open_generic,
5995         .read = trace_options_core_read,
5996         .write = trace_options_core_write,
5997         .llseek = generic_file_llseek,
5998 };
5999
6000 struct dentry *trace_create_file(const char *name,
6001                                  umode_t mode,
6002                                  struct dentry *parent,
6003                                  void *data,
6004                                  const struct file_operations *fops)
6005 {
6006         struct dentry *ret;
6007
6008         ret = debugfs_create_file(name, mode, parent, data, fops);
6009         if (!ret)
6010                 pr_warning("Could not create debugfs '%s' entry\n", name);
6011
6012         return ret;
6013 }
6014
6015
6016 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
6017 {
6018         struct dentry *d_tracer;
6019
6020         if (tr->options)
6021                 return tr->options;
6022
6023         d_tracer = tracing_init_dentry_tr(tr);
6024         if (!d_tracer)
6025                 return NULL;
6026
6027         tr->options = debugfs_create_dir("options", d_tracer);
6028         if (!tr->options) {
6029                 pr_warning("Could not create debugfs directory 'options'\n");
6030                 return NULL;
6031         }
6032
6033         return tr->options;
6034 }
6035
6036 static void
6037 create_trace_option_file(struct trace_array *tr,
6038                          struct trace_option_dentry *topt,
6039                          struct tracer_flags *flags,
6040                          struct tracer_opt *opt)
6041 {
6042         struct dentry *t_options;
6043
6044         t_options = trace_options_init_dentry(tr);
6045         if (!t_options)
6046                 return;
6047
6048         topt->flags = flags;
6049         topt->opt = opt;
6050         topt->tr = tr;
6051
6052         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
6053                                     &trace_options_fops);
6054
6055 }
6056
6057 static struct trace_option_dentry *
6058 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
6059 {
6060         struct trace_option_dentry *topts;
6061         struct tracer_flags *flags;
6062         struct tracer_opt *opts;
6063         int cnt;
6064
6065         if (!tracer)
6066                 return NULL;
6067
6068         flags = tracer->flags;
6069
6070         if (!flags || !flags->opts)
6071                 return NULL;
6072
6073         opts = flags->opts;
6074
6075         for (cnt = 0; opts[cnt].name; cnt++)
6076                 ;
6077
6078         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
6079         if (!topts)
6080                 return NULL;
6081
6082         for (cnt = 0; opts[cnt].name; cnt++)
6083                 create_trace_option_file(tr, &topts[cnt], flags,
6084                                          &opts[cnt]);
6085
6086         return topts;
6087 }
6088
6089 static void
6090 destroy_trace_option_files(struct trace_option_dentry *topts)
6091 {
6092         int cnt;
6093
6094         if (!topts)
6095                 return;
6096
6097         for (cnt = 0; topts[cnt].opt; cnt++) {
6098                 if (topts[cnt].entry)
6099                         debugfs_remove(topts[cnt].entry);
6100         }
6101
6102         kfree(topts);
6103 }
6104
6105 static struct dentry *
6106 create_trace_option_core_file(struct trace_array *tr,
6107                               const char *option, long index)
6108 {
6109         struct dentry *t_options;
6110
6111         t_options = trace_options_init_dentry(tr);
6112         if (!t_options)
6113                 return NULL;
6114
6115         return trace_create_file(option, 0644, t_options, (void *)index,
6116                                     &trace_options_core_fops);
6117 }
6118
6119 static __init void create_trace_options_dir(struct trace_array *tr)
6120 {
6121         struct dentry *t_options;
6122         int i;
6123
6124         t_options = trace_options_init_dentry(tr);
6125         if (!t_options)
6126                 return;
6127
6128         for (i = 0; trace_options[i]; i++)
6129                 create_trace_option_core_file(tr, trace_options[i], i);
6130 }
6131
6132 static ssize_t
6133 rb_simple_read(struct file *filp, char __user *ubuf,
6134                size_t cnt, loff_t *ppos)
6135 {
6136         struct trace_array *tr = filp->private_data;
6137         char buf[64];
6138         int r;
6139
6140         r = tracer_tracing_is_on(tr);
6141         r = sprintf(buf, "%d\n", r);
6142
6143         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6144 }
6145
6146 static ssize_t
6147 rb_simple_write(struct file *filp, const char __user *ubuf,
6148                 size_t cnt, loff_t *ppos)
6149 {
6150         struct trace_array *tr = filp->private_data;
6151         struct ring_buffer *buffer = tr->trace_buffer.buffer;
6152         unsigned long val;
6153         int ret;
6154
6155         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6156         if (ret)
6157                 return ret;
6158
6159         if (buffer) {
6160                 mutex_lock(&trace_types_lock);
6161                 if (val) {
6162                         tracer_tracing_on(tr);
6163                         if (tr->current_trace->start)
6164                                 tr->current_trace->start(tr);
6165                 } else {
6166                         tracer_tracing_off(tr);
6167                         if (tr->current_trace->stop)
6168                                 tr->current_trace->stop(tr);
6169                 }
6170                 mutex_unlock(&trace_types_lock);
6171         }
6172
6173         (*ppos)++;
6174
6175         return cnt;
6176 }
6177
6178 static const struct file_operations rb_simple_fops = {
6179         .open           = tracing_open_generic_tr,
6180         .read           = rb_simple_read,
6181         .write          = rb_simple_write,
6182         .release        = tracing_release_generic_tr,
6183         .llseek         = default_llseek,
6184 };
6185
6186 struct dentry *trace_instance_dir;
6187
6188 static void
6189 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer);
6190
6191 static int
6192 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
6193 {
6194         enum ring_buffer_flags rb_flags;
6195
6196         rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
6197
6198         buf->tr = tr;
6199
6200         buf->buffer = ring_buffer_alloc(size, rb_flags);
6201         if (!buf->buffer)
6202                 return -ENOMEM;
6203
6204         buf->data = alloc_percpu(struct trace_array_cpu);
6205         if (!buf->data) {
6206                 ring_buffer_free(buf->buffer);
6207                 return -ENOMEM;
6208         }
6209
6210         /* Allocate the first page for all buffers */
6211         set_buffer_entries(&tr->trace_buffer,
6212                            ring_buffer_size(tr->trace_buffer.buffer, 0));
6213
6214         return 0;
6215 }
6216
6217 static int allocate_trace_buffers(struct trace_array *tr, int size)
6218 {
6219         int ret;
6220
6221         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
6222         if (ret)
6223                 return ret;
6224
6225 #ifdef CONFIG_TRACER_MAX_TRACE
6226         ret = allocate_trace_buffer(tr, &tr->max_buffer,
6227                                     allocate_snapshot ? size : 1);
6228         if (WARN_ON(ret)) {
6229                 ring_buffer_free(tr->trace_buffer.buffer);
6230                 free_percpu(tr->trace_buffer.data);
6231                 return -ENOMEM;
6232         }
6233         tr->allocated_snapshot = allocate_snapshot;
6234
6235         /*
6236          * Only the top level trace array gets its snapshot allocated
6237          * from the kernel command line.
6238          */
6239         allocate_snapshot = false;
6240 #endif
6241         return 0;
6242 }
6243
6244 static void free_trace_buffer(struct trace_buffer *buf)
6245 {
6246         if (buf->buffer) {
6247                 ring_buffer_free(buf->buffer);
6248                 buf->buffer = NULL;
6249                 free_percpu(buf->data);
6250                 buf->data = NULL;
6251         }
6252 }
6253
6254 static void free_trace_buffers(struct trace_array *tr)
6255 {
6256         if (!tr)
6257                 return;
6258
6259         free_trace_buffer(&tr->trace_buffer);
6260
6261 #ifdef CONFIG_TRACER_MAX_TRACE
6262         free_trace_buffer(&tr->max_buffer);
6263 #endif
6264 }
6265
6266 static int new_instance_create(const char *name)
6267 {
6268         struct trace_array *tr;
6269         int ret;
6270
6271         mutex_lock(&trace_types_lock);
6272
6273         ret = -EEXIST;
6274         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6275                 if (tr->name && strcmp(tr->name, name) == 0)
6276                         goto out_unlock;
6277         }
6278
6279         ret = -ENOMEM;
6280         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
6281         if (!tr)
6282                 goto out_unlock;
6283
6284         tr->name = kstrdup(name, GFP_KERNEL);
6285         if (!tr->name)
6286                 goto out_free_tr;
6287
6288         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
6289                 goto out_free_tr;
6290
6291         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
6292
6293         raw_spin_lock_init(&tr->start_lock);
6294
6295         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
6296
6297         tr->current_trace = &nop_trace;
6298
6299         INIT_LIST_HEAD(&tr->systems);
6300         INIT_LIST_HEAD(&tr->events);
6301
6302         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
6303                 goto out_free_tr;
6304
6305         tr->dir = debugfs_create_dir(name, trace_instance_dir);
6306         if (!tr->dir)
6307                 goto out_free_tr;
6308
6309         ret = event_trace_add_tracer(tr->dir, tr);
6310         if (ret) {
6311                 debugfs_remove_recursive(tr->dir);
6312                 goto out_free_tr;
6313         }
6314
6315         init_tracer_debugfs(tr, tr->dir);
6316
6317         list_add(&tr->list, &ftrace_trace_arrays);
6318
6319         mutex_unlock(&trace_types_lock);
6320
6321         return 0;
6322
6323  out_free_tr:
6324         free_trace_buffers(tr);
6325         free_cpumask_var(tr->tracing_cpumask);
6326         kfree(tr->name);
6327         kfree(tr);
6328
6329  out_unlock:
6330         mutex_unlock(&trace_types_lock);
6331
6332         return ret;
6333
6334 }
6335
6336 static int instance_delete(const char *name)
6337 {
6338         struct trace_array *tr;
6339         int found = 0;
6340         int ret;
6341
6342         mutex_lock(&trace_types_lock);
6343
6344         ret = -ENODEV;
6345         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6346                 if (tr->name && strcmp(tr->name, name) == 0) {
6347                         found = 1;
6348                         break;
6349                 }
6350         }
6351         if (!found)
6352                 goto out_unlock;
6353
6354         ret = -EBUSY;
6355         if (tr->ref)
6356                 goto out_unlock;
6357
6358         list_del(&tr->list);
6359
6360         tracing_set_nop(tr);
6361         event_trace_del_tracer(tr);
6362         ftrace_destroy_function_files(tr);
6363         debugfs_remove_recursive(tr->dir);
6364         free_trace_buffers(tr);
6365
6366         kfree(tr->name);
6367         kfree(tr);
6368
6369         ret = 0;
6370
6371  out_unlock:
6372         mutex_unlock(&trace_types_lock);
6373
6374         return ret;
6375 }
6376
6377 static int instance_mkdir (struct inode *inode, struct dentry *dentry, umode_t mode)
6378 {
6379         struct dentry *parent;
6380         int ret;
6381
6382         /* Paranoid: Make sure the parent is the "instances" directory */
6383         parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
6384         if (WARN_ON_ONCE(parent != trace_instance_dir))
6385                 return -ENOENT;
6386
6387         /*
6388          * The inode mutex is locked, but debugfs_create_dir() will also
6389          * take the mutex. As the instances directory can not be destroyed
6390          * or changed in any other way, it is safe to unlock it, and
6391          * let the dentry try. If two users try to make the same dir at
6392          * the same time, then the new_instance_create() will determine the
6393          * winner.
6394          */
6395         mutex_unlock(&inode->i_mutex);
6396
6397         ret = new_instance_create(dentry->d_iname);
6398
6399         mutex_lock(&inode->i_mutex);
6400
6401         return ret;
6402 }
6403
6404 static int instance_rmdir(struct inode *inode, struct dentry *dentry)
6405 {
6406         struct dentry *parent;
6407         int ret;
6408
6409         /* Paranoid: Make sure the parent is the "instances" directory */
6410         parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
6411         if (WARN_ON_ONCE(parent != trace_instance_dir))
6412                 return -ENOENT;
6413
6414         /* The caller did a dget() on dentry */
6415         mutex_unlock(&dentry->d_inode->i_mutex);
6416
6417         /*
6418          * The inode mutex is locked, but debugfs_create_dir() will also
6419          * take the mutex. As the instances directory can not be destroyed
6420          * or changed in any other way, it is safe to unlock it, and
6421          * let the dentry try. If two users try to make the same dir at
6422          * the same time, then the instance_delete() will determine the
6423          * winner.
6424          */
6425         mutex_unlock(&inode->i_mutex);
6426
6427         ret = instance_delete(dentry->d_iname);
6428
6429         mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
6430         mutex_lock(&dentry->d_inode->i_mutex);
6431
6432         return ret;
6433 }
6434
6435 static const struct inode_operations instance_dir_inode_operations = {
6436         .lookup         = simple_lookup,
6437         .mkdir          = instance_mkdir,
6438         .rmdir          = instance_rmdir,
6439 };
6440
6441 static __init void create_trace_instances(struct dentry *d_tracer)
6442 {
6443         trace_instance_dir = debugfs_create_dir("instances", d_tracer);
6444         if (WARN_ON(!trace_instance_dir))
6445                 return;
6446
6447         /* Hijack the dir inode operations, to allow mkdir */
6448         trace_instance_dir->d_inode->i_op = &instance_dir_inode_operations;
6449 }
6450
6451 static void
6452 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
6453 {
6454         int cpu;
6455
6456         trace_create_file("available_tracers", 0444, d_tracer,
6457                         tr, &show_traces_fops);
6458
6459         trace_create_file("current_tracer", 0644, d_tracer,
6460                         tr, &set_tracer_fops);
6461
6462         trace_create_file("tracing_cpumask", 0644, d_tracer,
6463                           tr, &tracing_cpumask_fops);
6464
6465         trace_create_file("trace_options", 0644, d_tracer,
6466                           tr, &tracing_iter_fops);
6467
6468         trace_create_file("trace", 0644, d_tracer,
6469                           tr, &tracing_fops);
6470
6471         trace_create_file("trace_pipe", 0444, d_tracer,
6472                           tr, &tracing_pipe_fops);
6473
6474         trace_create_file("buffer_size_kb", 0644, d_tracer,
6475                           tr, &tracing_entries_fops);
6476
6477         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
6478                           tr, &tracing_total_entries_fops);
6479
6480         trace_create_file("free_buffer", 0200, d_tracer,
6481                           tr, &tracing_free_buffer_fops);
6482
6483         trace_create_file("trace_marker", 0220, d_tracer,
6484                           tr, &tracing_mark_fops);
6485
6486         trace_create_file("trace_clock", 0644, d_tracer, tr,
6487                           &trace_clock_fops);
6488
6489         trace_create_file("tracing_on", 0644, d_tracer,
6490                           tr, &rb_simple_fops);
6491
6492 #ifdef CONFIG_TRACER_MAX_TRACE
6493         trace_create_file("tracing_max_latency", 0644, d_tracer,
6494                         &tr->max_latency, &tracing_max_lat_fops);
6495 #endif
6496
6497         if (ftrace_create_function_files(tr, d_tracer))
6498                 WARN(1, "Could not allocate function filter files");
6499
6500 #ifdef CONFIG_TRACER_SNAPSHOT
6501         trace_create_file("snapshot", 0644, d_tracer,
6502                           tr, &snapshot_fops);
6503 #endif
6504
6505         for_each_tracing_cpu(cpu)
6506                 tracing_init_debugfs_percpu(tr, cpu);
6507
6508 }
6509
6510 static __init int tracer_init_debugfs(void)
6511 {
6512         struct dentry *d_tracer;
6513
6514         trace_access_lock_init();
6515
6516         d_tracer = tracing_init_dentry();
6517         if (!d_tracer)
6518                 return 0;
6519
6520         init_tracer_debugfs(&global_trace, d_tracer);
6521
6522         trace_create_file("tracing_thresh", 0644, d_tracer,
6523                         &tracing_thresh, &tracing_max_lat_fops);
6524
6525         trace_create_file("README", 0444, d_tracer,
6526                         NULL, &tracing_readme_fops);
6527
6528         trace_create_file("saved_cmdlines", 0444, d_tracer,
6529                         NULL, &tracing_saved_cmdlines_fops);
6530
6531         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
6532                           NULL, &tracing_saved_cmdlines_size_fops);
6533
6534 #ifdef CONFIG_DYNAMIC_FTRACE
6535         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
6536                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
6537 #endif
6538
6539         create_trace_instances(d_tracer);
6540
6541         create_trace_options_dir(&global_trace);
6542
6543         return 0;
6544 }
6545
6546 static int trace_panic_handler(struct notifier_block *this,
6547                                unsigned long event, void *unused)
6548 {
6549         if (ftrace_dump_on_oops)
6550                 ftrace_dump(ftrace_dump_on_oops);
6551         return NOTIFY_OK;
6552 }
6553
6554 static struct notifier_block trace_panic_notifier = {
6555         .notifier_call  = trace_panic_handler,
6556         .next           = NULL,
6557         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
6558 };
6559
6560 static int trace_die_handler(struct notifier_block *self,
6561                              unsigned long val,
6562                              void *data)
6563 {
6564         switch (val) {
6565         case DIE_OOPS:
6566                 if (ftrace_dump_on_oops)
6567                         ftrace_dump(ftrace_dump_on_oops);
6568                 break;
6569         default:
6570                 break;
6571         }
6572         return NOTIFY_OK;
6573 }
6574
6575 static struct notifier_block trace_die_notifier = {
6576         .notifier_call = trace_die_handler,
6577         .priority = 200
6578 };
6579
6580 /*
6581  * printk is set to max of 1024, we really don't need it that big.
6582  * Nothing should be printing 1000 characters anyway.
6583  */
6584 #define TRACE_MAX_PRINT         1000
6585
6586 /*
6587  * Define here KERN_TRACE so that we have one place to modify
6588  * it if we decide to change what log level the ftrace dump
6589  * should be at.
6590  */
6591 #define KERN_TRACE              KERN_EMERG
6592
6593 void
6594 trace_printk_seq(struct trace_seq *s)
6595 {
6596         /* Probably should print a warning here. */
6597         if (s->len >= TRACE_MAX_PRINT)
6598                 s->len = TRACE_MAX_PRINT;
6599
6600         /* should be zero ended, but we are paranoid. */
6601         s->buffer[s->len] = 0;
6602
6603         printk(KERN_TRACE "%s", s->buffer);
6604
6605         trace_seq_init(s);
6606 }
6607
6608 void trace_init_global_iter(struct trace_iterator *iter)
6609 {
6610         iter->tr = &global_trace;
6611         iter->trace = iter->tr->current_trace;
6612         iter->cpu_file = RING_BUFFER_ALL_CPUS;
6613         iter->trace_buffer = &global_trace.trace_buffer;
6614
6615         if (iter->trace && iter->trace->open)
6616                 iter->trace->open(iter);
6617
6618         /* Annotate start of buffers if we had overruns */
6619         if (ring_buffer_overruns(iter->trace_buffer->buffer))
6620                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
6621
6622         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6623         if (trace_clocks[iter->tr->clock_id].in_ns)
6624                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6625 }
6626
6627 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
6628 {
6629         /* use static because iter can be a bit big for the stack */
6630         static struct trace_iterator iter;
6631         static atomic_t dump_running;
6632         unsigned int old_userobj;
6633         unsigned long flags;
6634         int cnt = 0, cpu;
6635
6636         /* Only allow one dump user at a time. */
6637         if (atomic_inc_return(&dump_running) != 1) {
6638                 atomic_dec(&dump_running);
6639                 return;
6640         }
6641
6642         /*
6643          * Always turn off tracing when we dump.
6644          * We don't need to show trace output of what happens
6645          * between multiple crashes.
6646          *
6647          * If the user does a sysrq-z, then they can re-enable
6648          * tracing with echo 1 > tracing_on.
6649          */
6650         tracing_off();
6651
6652         local_irq_save(flags);
6653
6654         /* Simulate the iterator */
6655         trace_init_global_iter(&iter);
6656
6657         for_each_tracing_cpu(cpu) {
6658                 atomic_inc(&per_cpu_ptr(iter.tr->trace_buffer.data, cpu)->disabled);
6659         }
6660
6661         old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
6662
6663         /* don't look at user memory in panic mode */
6664         trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
6665
6666         switch (oops_dump_mode) {
6667         case DUMP_ALL:
6668                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6669                 break;
6670         case DUMP_ORIG:
6671                 iter.cpu_file = raw_smp_processor_id();
6672                 break;
6673         case DUMP_NONE:
6674                 goto out_enable;
6675         default:
6676                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
6677                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6678         }
6679
6680         printk(KERN_TRACE "Dumping ftrace buffer:\n");
6681
6682         /* Did function tracer already get disabled? */
6683         if (ftrace_is_dead()) {
6684                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
6685                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
6686         }
6687
6688         /*
6689          * We need to stop all tracing on all CPUS to read the
6690          * the next buffer. This is a bit expensive, but is
6691          * not done often. We fill all what we can read,
6692          * and then release the locks again.
6693          */
6694
6695         while (!trace_empty(&iter)) {
6696
6697                 if (!cnt)
6698                         printk(KERN_TRACE "---------------------------------\n");
6699
6700                 cnt++;
6701
6702                 /* reset all but tr, trace, and overruns */
6703                 memset(&iter.seq, 0,
6704                        sizeof(struct trace_iterator) -
6705                        offsetof(struct trace_iterator, seq));
6706                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
6707                 iter.pos = -1;
6708
6709                 if (trace_find_next_entry_inc(&iter) != NULL) {
6710                         int ret;
6711
6712                         ret = print_trace_line(&iter);
6713                         if (ret != TRACE_TYPE_NO_CONSUME)
6714                                 trace_consume(&iter);
6715                 }
6716                 touch_nmi_watchdog();
6717
6718                 trace_printk_seq(&iter.seq);
6719         }
6720
6721         if (!cnt)
6722                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
6723         else
6724                 printk(KERN_TRACE "---------------------------------\n");
6725
6726  out_enable:
6727         trace_flags |= old_userobj;
6728
6729         for_each_tracing_cpu(cpu) {
6730                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
6731         }
6732         atomic_dec(&dump_running);
6733         local_irq_restore(flags);
6734 }
6735 EXPORT_SYMBOL_GPL(ftrace_dump);
6736
6737 __init static int tracer_alloc_buffers(void)
6738 {
6739         int ring_buf_size;
6740         int ret = -ENOMEM;
6741
6742
6743         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
6744                 goto out;
6745
6746         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
6747                 goto out_free_buffer_mask;
6748
6749         /* Only allocate trace_printk buffers if a trace_printk exists */
6750         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
6751                 /* Must be called before global_trace.buffer is allocated */
6752                 trace_printk_init_buffers();
6753
6754         /* To save memory, keep the ring buffer size to its minimum */
6755         if (ring_buffer_expanded)
6756                 ring_buf_size = trace_buf_size;
6757         else
6758                 ring_buf_size = 1;
6759
6760         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
6761         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
6762
6763         raw_spin_lock_init(&global_trace.start_lock);
6764
6765         /* Used for event triggers */
6766         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
6767         if (!temp_buffer)
6768                 goto out_free_cpumask;
6769
6770         if (trace_create_savedcmd() < 0)
6771                 goto out_free_temp_buffer;
6772
6773         /* TODO: make the number of buffers hot pluggable with CPUS */
6774         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
6775                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
6776                 WARN_ON(1);
6777                 goto out_free_savedcmd;
6778         }
6779
6780         if (global_trace.buffer_disabled)
6781                 tracing_off();
6782
6783         if (trace_boot_clock) {
6784                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
6785                 if (ret < 0)
6786                         pr_warning("Trace clock %s not defined, going back to default\n",
6787                                    trace_boot_clock);
6788         }
6789
6790         /*
6791          * register_tracer() might reference current_trace, so it
6792          * needs to be set before we register anything. This is
6793          * just a bootstrap of current_trace anyway.
6794          */
6795         global_trace.current_trace = &nop_trace;
6796
6797         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
6798
6799         ftrace_init_global_array_ops(&global_trace);
6800
6801         register_tracer(&nop_trace);
6802
6803         /* All seems OK, enable tracing */
6804         tracing_disabled = 0;
6805
6806         atomic_notifier_chain_register(&panic_notifier_list,
6807                                        &trace_panic_notifier);
6808
6809         register_die_notifier(&trace_die_notifier);
6810
6811         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
6812
6813         INIT_LIST_HEAD(&global_trace.systems);
6814         INIT_LIST_HEAD(&global_trace.events);
6815         list_add(&global_trace.list, &ftrace_trace_arrays);
6816
6817         while (trace_boot_options) {
6818                 char *option;
6819
6820                 option = strsep(&trace_boot_options, ",");
6821                 trace_set_options(&global_trace, option);
6822         }
6823
6824         register_snapshot_cmd();
6825
6826         return 0;
6827
6828 out_free_savedcmd:
6829         free_saved_cmdlines_buffer(savedcmd);
6830 out_free_temp_buffer:
6831         ring_buffer_free(temp_buffer);
6832 out_free_cpumask:
6833         free_cpumask_var(global_trace.tracing_cpumask);
6834 out_free_buffer_mask:
6835         free_cpumask_var(tracing_buffer_mask);
6836 out:
6837         return ret;
6838 }
6839
6840 __init static int clear_boot_tracer(void)
6841 {
6842         /*
6843          * The default tracer at boot buffer is an init section.
6844          * This function is called in lateinit. If we did not
6845          * find the boot tracer, then clear it out, to prevent
6846          * later registration from accessing the buffer that is
6847          * about to be freed.
6848          */
6849         if (!default_bootup_tracer)
6850                 return 0;
6851
6852         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
6853                default_bootup_tracer);
6854         default_bootup_tracer = NULL;
6855
6856         return 0;
6857 }
6858
6859 early_initcall(tracer_alloc_buffers);
6860 fs_initcall(tracer_init_debugfs);
6861 late_initcall(clear_boot_tracer);