Merge tag 'ftracetest-3.19' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt...
[cascardo/linux.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/pagemap.h>
24 #include <linux/hardirq.h>
25 #include <linux/linkage.h>
26 #include <linux/uaccess.h>
27 #include <linux/kprobes.h>
28 #include <linux/ftrace.h>
29 #include <linux/module.h>
30 #include <linux/percpu.h>
31 #include <linux/splice.h>
32 #include <linux/kdebug.h>
33 #include <linux/string.h>
34 #include <linux/rwsem.h>
35 #include <linux/slab.h>
36 #include <linux/ctype.h>
37 #include <linux/init.h>
38 #include <linux/poll.h>
39 #include <linux/nmi.h>
40 #include <linux/fs.h>
41 #include <linux/sched/rt.h>
42
43 #include "trace.h"
44 #include "trace_output.h"
45
46 /*
47  * On boot up, the ring buffer is set to the minimum size, so that
48  * we do not waste memory on systems that are not using tracing.
49  */
50 bool ring_buffer_expanded;
51
52 /*
53  * We need to change this state when a selftest is running.
54  * A selftest will lurk into the ring-buffer to count the
55  * entries inserted during the selftest although some concurrent
56  * insertions into the ring-buffer such as trace_printk could occurred
57  * at the same time, giving false positive or negative results.
58  */
59 static bool __read_mostly tracing_selftest_running;
60
61 /*
62  * If a tracer is running, we do not want to run SELFTEST.
63  */
64 bool __read_mostly tracing_selftest_disabled;
65
66 /* For tracers that don't implement custom flags */
67 static struct tracer_opt dummy_tracer_opt[] = {
68         { }
69 };
70
71 static struct tracer_flags dummy_tracer_flags = {
72         .val = 0,
73         .opts = dummy_tracer_opt
74 };
75
76 static int
77 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
78 {
79         return 0;
80 }
81
82 /*
83  * To prevent the comm cache from being overwritten when no
84  * tracing is active, only save the comm when a trace event
85  * occurred.
86  */
87 static DEFINE_PER_CPU(bool, trace_cmdline_save);
88
89 /*
90  * Kill all tracing for good (never come back).
91  * It is initialized to 1 but will turn to zero if the initialization
92  * of the tracer is successful. But that is the only place that sets
93  * this back to zero.
94  */
95 static int tracing_disabled = 1;
96
97 DEFINE_PER_CPU(int, ftrace_cpu_disabled);
98
99 cpumask_var_t __read_mostly     tracing_buffer_mask;
100
101 /*
102  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
103  *
104  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
105  * is set, then ftrace_dump is called. This will output the contents
106  * of the ftrace buffers to the console.  This is very useful for
107  * capturing traces that lead to crashes and outputing it to a
108  * serial console.
109  *
110  * It is default off, but you can enable it with either specifying
111  * "ftrace_dump_on_oops" in the kernel command line, or setting
112  * /proc/sys/kernel/ftrace_dump_on_oops
113  * Set 1 if you want to dump buffers of all CPUs
114  * Set 2 if you want to dump the buffer of the CPU that triggered oops
115  */
116
117 enum ftrace_dump_mode ftrace_dump_on_oops;
118
119 /* When set, tracing will stop when a WARN*() is hit */
120 int __disable_trace_on_warning;
121
122 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
123
124 #define MAX_TRACER_SIZE         100
125 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
126 static char *default_bootup_tracer;
127
128 static bool allocate_snapshot;
129
130 static int __init set_cmdline_ftrace(char *str)
131 {
132         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
133         default_bootup_tracer = bootup_tracer_buf;
134         /* We are using ftrace early, expand it */
135         ring_buffer_expanded = true;
136         return 1;
137 }
138 __setup("ftrace=", set_cmdline_ftrace);
139
140 static int __init set_ftrace_dump_on_oops(char *str)
141 {
142         if (*str++ != '=' || !*str) {
143                 ftrace_dump_on_oops = DUMP_ALL;
144                 return 1;
145         }
146
147         if (!strcmp("orig_cpu", str)) {
148                 ftrace_dump_on_oops = DUMP_ORIG;
149                 return 1;
150         }
151
152         return 0;
153 }
154 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
155
156 static int __init stop_trace_on_warning(char *str)
157 {
158         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
159                 __disable_trace_on_warning = 1;
160         return 1;
161 }
162 __setup("traceoff_on_warning", stop_trace_on_warning);
163
164 static int __init boot_alloc_snapshot(char *str)
165 {
166         allocate_snapshot = true;
167         /* We also need the main ring buffer expanded */
168         ring_buffer_expanded = true;
169         return 1;
170 }
171 __setup("alloc_snapshot", boot_alloc_snapshot);
172
173
174 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
175 static char *trace_boot_options __initdata;
176
177 static int __init set_trace_boot_options(char *str)
178 {
179         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
180         trace_boot_options = trace_boot_options_buf;
181         return 0;
182 }
183 __setup("trace_options=", set_trace_boot_options);
184
185 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
186 static char *trace_boot_clock __initdata;
187
188 static int __init set_trace_boot_clock(char *str)
189 {
190         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
191         trace_boot_clock = trace_boot_clock_buf;
192         return 0;
193 }
194 __setup("trace_clock=", set_trace_boot_clock);
195
196
197 unsigned long long ns2usecs(cycle_t nsec)
198 {
199         nsec += 500;
200         do_div(nsec, 1000);
201         return nsec;
202 }
203
204 /*
205  * The global_trace is the descriptor that holds the tracing
206  * buffers for the live tracing. For each CPU, it contains
207  * a link list of pages that will store trace entries. The
208  * page descriptor of the pages in the memory is used to hold
209  * the link list by linking the lru item in the page descriptor
210  * to each of the pages in the buffer per CPU.
211  *
212  * For each active CPU there is a data field that holds the
213  * pages for the buffer for that CPU. Each CPU has the same number
214  * of pages allocated for its buffer.
215  */
216 static struct trace_array       global_trace;
217
218 LIST_HEAD(ftrace_trace_arrays);
219
220 int trace_array_get(struct trace_array *this_tr)
221 {
222         struct trace_array *tr;
223         int ret = -ENODEV;
224
225         mutex_lock(&trace_types_lock);
226         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
227                 if (tr == this_tr) {
228                         tr->ref++;
229                         ret = 0;
230                         break;
231                 }
232         }
233         mutex_unlock(&trace_types_lock);
234
235         return ret;
236 }
237
238 static void __trace_array_put(struct trace_array *this_tr)
239 {
240         WARN_ON(!this_tr->ref);
241         this_tr->ref--;
242 }
243
244 void trace_array_put(struct trace_array *this_tr)
245 {
246         mutex_lock(&trace_types_lock);
247         __trace_array_put(this_tr);
248         mutex_unlock(&trace_types_lock);
249 }
250
251 int filter_check_discard(struct ftrace_event_file *file, void *rec,
252                          struct ring_buffer *buffer,
253                          struct ring_buffer_event *event)
254 {
255         if (unlikely(file->flags & FTRACE_EVENT_FL_FILTERED) &&
256             !filter_match_preds(file->filter, rec)) {
257                 ring_buffer_discard_commit(buffer, event);
258                 return 1;
259         }
260
261         return 0;
262 }
263 EXPORT_SYMBOL_GPL(filter_check_discard);
264
265 int call_filter_check_discard(struct ftrace_event_call *call, void *rec,
266                               struct ring_buffer *buffer,
267                               struct ring_buffer_event *event)
268 {
269         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
270             !filter_match_preds(call->filter, rec)) {
271                 ring_buffer_discard_commit(buffer, event);
272                 return 1;
273         }
274
275         return 0;
276 }
277 EXPORT_SYMBOL_GPL(call_filter_check_discard);
278
279 static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
280 {
281         u64 ts;
282
283         /* Early boot up does not have a buffer yet */
284         if (!buf->buffer)
285                 return trace_clock_local();
286
287         ts = ring_buffer_time_stamp(buf->buffer, cpu);
288         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
289
290         return ts;
291 }
292
293 cycle_t ftrace_now(int cpu)
294 {
295         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
296 }
297
298 /**
299  * tracing_is_enabled - Show if global_trace has been disabled
300  *
301  * Shows if the global trace has been enabled or not. It uses the
302  * mirror flag "buffer_disabled" to be used in fast paths such as for
303  * the irqsoff tracer. But it may be inaccurate due to races. If you
304  * need to know the accurate state, use tracing_is_on() which is a little
305  * slower, but accurate.
306  */
307 int tracing_is_enabled(void)
308 {
309         /*
310          * For quick access (irqsoff uses this in fast path), just
311          * return the mirror variable of the state of the ring buffer.
312          * It's a little racy, but we don't really care.
313          */
314         smp_rmb();
315         return !global_trace.buffer_disabled;
316 }
317
318 /*
319  * trace_buf_size is the size in bytes that is allocated
320  * for a buffer. Note, the number of bytes is always rounded
321  * to page size.
322  *
323  * This number is purposely set to a low number of 16384.
324  * If the dump on oops happens, it will be much appreciated
325  * to not have to wait for all that output. Anyway this can be
326  * boot time and run time configurable.
327  */
328 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
329
330 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
331
332 /* trace_types holds a link list of available tracers. */
333 static struct tracer            *trace_types __read_mostly;
334
335 /*
336  * trace_types_lock is used to protect the trace_types list.
337  */
338 DEFINE_MUTEX(trace_types_lock);
339
340 /*
341  * serialize the access of the ring buffer
342  *
343  * ring buffer serializes readers, but it is low level protection.
344  * The validity of the events (which returns by ring_buffer_peek() ..etc)
345  * are not protected by ring buffer.
346  *
347  * The content of events may become garbage if we allow other process consumes
348  * these events concurrently:
349  *   A) the page of the consumed events may become a normal page
350  *      (not reader page) in ring buffer, and this page will be rewrited
351  *      by events producer.
352  *   B) The page of the consumed events may become a page for splice_read,
353  *      and this page will be returned to system.
354  *
355  * These primitives allow multi process access to different cpu ring buffer
356  * concurrently.
357  *
358  * These primitives don't distinguish read-only and read-consume access.
359  * Multi read-only access are also serialized.
360  */
361
362 #ifdef CONFIG_SMP
363 static DECLARE_RWSEM(all_cpu_access_lock);
364 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
365
366 static inline void trace_access_lock(int cpu)
367 {
368         if (cpu == RING_BUFFER_ALL_CPUS) {
369                 /* gain it for accessing the whole ring buffer. */
370                 down_write(&all_cpu_access_lock);
371         } else {
372                 /* gain it for accessing a cpu ring buffer. */
373
374                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
375                 down_read(&all_cpu_access_lock);
376
377                 /* Secondly block other access to this @cpu ring buffer. */
378                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
379         }
380 }
381
382 static inline void trace_access_unlock(int cpu)
383 {
384         if (cpu == RING_BUFFER_ALL_CPUS) {
385                 up_write(&all_cpu_access_lock);
386         } else {
387                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
388                 up_read(&all_cpu_access_lock);
389         }
390 }
391
392 static inline void trace_access_lock_init(void)
393 {
394         int cpu;
395
396         for_each_possible_cpu(cpu)
397                 mutex_init(&per_cpu(cpu_access_lock, cpu));
398 }
399
400 #else
401
402 static DEFINE_MUTEX(access_lock);
403
404 static inline void trace_access_lock(int cpu)
405 {
406         (void)cpu;
407         mutex_lock(&access_lock);
408 }
409
410 static inline void trace_access_unlock(int cpu)
411 {
412         (void)cpu;
413         mutex_unlock(&access_lock);
414 }
415
416 static inline void trace_access_lock_init(void)
417 {
418 }
419
420 #endif
421
422 /* trace_flags holds trace_options default values */
423 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
424         TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
425         TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |
426         TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | TRACE_ITER_FUNCTION;
427
428 static void tracer_tracing_on(struct trace_array *tr)
429 {
430         if (tr->trace_buffer.buffer)
431                 ring_buffer_record_on(tr->trace_buffer.buffer);
432         /*
433          * This flag is looked at when buffers haven't been allocated
434          * yet, or by some tracers (like irqsoff), that just want to
435          * know if the ring buffer has been disabled, but it can handle
436          * races of where it gets disabled but we still do a record.
437          * As the check is in the fast path of the tracers, it is more
438          * important to be fast than accurate.
439          */
440         tr->buffer_disabled = 0;
441         /* Make the flag seen by readers */
442         smp_wmb();
443 }
444
445 /**
446  * tracing_on - enable tracing buffers
447  *
448  * This function enables tracing buffers that may have been
449  * disabled with tracing_off.
450  */
451 void tracing_on(void)
452 {
453         tracer_tracing_on(&global_trace);
454 }
455 EXPORT_SYMBOL_GPL(tracing_on);
456
457 /**
458  * __trace_puts - write a constant string into the trace buffer.
459  * @ip:    The address of the caller
460  * @str:   The constant string to write
461  * @size:  The size of the string.
462  */
463 int __trace_puts(unsigned long ip, const char *str, int size)
464 {
465         struct ring_buffer_event *event;
466         struct ring_buffer *buffer;
467         struct print_entry *entry;
468         unsigned long irq_flags;
469         int alloc;
470         int pc;
471
472         if (!(trace_flags & TRACE_ITER_PRINTK))
473                 return 0;
474
475         pc = preempt_count();
476
477         if (unlikely(tracing_selftest_running || tracing_disabled))
478                 return 0;
479
480         alloc = sizeof(*entry) + size + 2; /* possible \n added */
481
482         local_save_flags(irq_flags);
483         buffer = global_trace.trace_buffer.buffer;
484         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
485                                           irq_flags, pc);
486         if (!event)
487                 return 0;
488
489         entry = ring_buffer_event_data(event);
490         entry->ip = ip;
491
492         memcpy(&entry->buf, str, size);
493
494         /* Add a newline if necessary */
495         if (entry->buf[size - 1] != '\n') {
496                 entry->buf[size] = '\n';
497                 entry->buf[size + 1] = '\0';
498         } else
499                 entry->buf[size] = '\0';
500
501         __buffer_unlock_commit(buffer, event);
502         ftrace_trace_stack(buffer, irq_flags, 4, pc);
503
504         return size;
505 }
506 EXPORT_SYMBOL_GPL(__trace_puts);
507
508 /**
509  * __trace_bputs - write the pointer to a constant string into trace buffer
510  * @ip:    The address of the caller
511  * @str:   The constant string to write to the buffer to
512  */
513 int __trace_bputs(unsigned long ip, const char *str)
514 {
515         struct ring_buffer_event *event;
516         struct ring_buffer *buffer;
517         struct bputs_entry *entry;
518         unsigned long irq_flags;
519         int size = sizeof(struct bputs_entry);
520         int pc;
521
522         if (!(trace_flags & TRACE_ITER_PRINTK))
523                 return 0;
524
525         pc = preempt_count();
526
527         if (unlikely(tracing_selftest_running || tracing_disabled))
528                 return 0;
529
530         local_save_flags(irq_flags);
531         buffer = global_trace.trace_buffer.buffer;
532         event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
533                                           irq_flags, pc);
534         if (!event)
535                 return 0;
536
537         entry = ring_buffer_event_data(event);
538         entry->ip                       = ip;
539         entry->str                      = str;
540
541         __buffer_unlock_commit(buffer, event);
542         ftrace_trace_stack(buffer, irq_flags, 4, pc);
543
544         return 1;
545 }
546 EXPORT_SYMBOL_GPL(__trace_bputs);
547
548 #ifdef CONFIG_TRACER_SNAPSHOT
549 /**
550  * trace_snapshot - take a snapshot of the current buffer.
551  *
552  * This causes a swap between the snapshot buffer and the current live
553  * tracing buffer. You can use this to take snapshots of the live
554  * trace when some condition is triggered, but continue to trace.
555  *
556  * Note, make sure to allocate the snapshot with either
557  * a tracing_snapshot_alloc(), or by doing it manually
558  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
559  *
560  * If the snapshot buffer is not allocated, it will stop tracing.
561  * Basically making a permanent snapshot.
562  */
563 void tracing_snapshot(void)
564 {
565         struct trace_array *tr = &global_trace;
566         struct tracer *tracer = tr->current_trace;
567         unsigned long flags;
568
569         if (in_nmi()) {
570                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
571                 internal_trace_puts("*** snapshot is being ignored        ***\n");
572                 return;
573         }
574
575         if (!tr->allocated_snapshot) {
576                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
577                 internal_trace_puts("*** stopping trace here!   ***\n");
578                 tracing_off();
579                 return;
580         }
581
582         /* Note, snapshot can not be used when the tracer uses it */
583         if (tracer->use_max_tr) {
584                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
585                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
586                 return;
587         }
588
589         local_irq_save(flags);
590         update_max_tr(tr, current, smp_processor_id());
591         local_irq_restore(flags);
592 }
593 EXPORT_SYMBOL_GPL(tracing_snapshot);
594
595 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
596                                         struct trace_buffer *size_buf, int cpu_id);
597 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
598
599 static int alloc_snapshot(struct trace_array *tr)
600 {
601         int ret;
602
603         if (!tr->allocated_snapshot) {
604
605                 /* allocate spare buffer */
606                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
607                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
608                 if (ret < 0)
609                         return ret;
610
611                 tr->allocated_snapshot = true;
612         }
613
614         return 0;
615 }
616
617 static void free_snapshot(struct trace_array *tr)
618 {
619         /*
620          * We don't free the ring buffer. instead, resize it because
621          * The max_tr ring buffer has some state (e.g. ring->clock) and
622          * we want preserve it.
623          */
624         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
625         set_buffer_entries(&tr->max_buffer, 1);
626         tracing_reset_online_cpus(&tr->max_buffer);
627         tr->allocated_snapshot = false;
628 }
629
630 /**
631  * tracing_alloc_snapshot - allocate snapshot buffer.
632  *
633  * This only allocates the snapshot buffer if it isn't already
634  * allocated - it doesn't also take a snapshot.
635  *
636  * This is meant to be used in cases where the snapshot buffer needs
637  * to be set up for events that can't sleep but need to be able to
638  * trigger a snapshot.
639  */
640 int tracing_alloc_snapshot(void)
641 {
642         struct trace_array *tr = &global_trace;
643         int ret;
644
645         ret = alloc_snapshot(tr);
646         WARN_ON(ret < 0);
647
648         return ret;
649 }
650 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
651
652 /**
653  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
654  *
655  * This is similar to trace_snapshot(), but it will allocate the
656  * snapshot buffer if it isn't already allocated. Use this only
657  * where it is safe to sleep, as the allocation may sleep.
658  *
659  * This causes a swap between the snapshot buffer and the current live
660  * tracing buffer. You can use this to take snapshots of the live
661  * trace when some condition is triggered, but continue to trace.
662  */
663 void tracing_snapshot_alloc(void)
664 {
665         int ret;
666
667         ret = tracing_alloc_snapshot();
668         if (ret < 0)
669                 return;
670
671         tracing_snapshot();
672 }
673 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
674 #else
675 void tracing_snapshot(void)
676 {
677         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
678 }
679 EXPORT_SYMBOL_GPL(tracing_snapshot);
680 int tracing_alloc_snapshot(void)
681 {
682         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
683         return -ENODEV;
684 }
685 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
686 void tracing_snapshot_alloc(void)
687 {
688         /* Give warning */
689         tracing_snapshot();
690 }
691 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
692 #endif /* CONFIG_TRACER_SNAPSHOT */
693
694 static void tracer_tracing_off(struct trace_array *tr)
695 {
696         if (tr->trace_buffer.buffer)
697                 ring_buffer_record_off(tr->trace_buffer.buffer);
698         /*
699          * This flag is looked at when buffers haven't been allocated
700          * yet, or by some tracers (like irqsoff), that just want to
701          * know if the ring buffer has been disabled, but it can handle
702          * races of where it gets disabled but we still do a record.
703          * As the check is in the fast path of the tracers, it is more
704          * important to be fast than accurate.
705          */
706         tr->buffer_disabled = 1;
707         /* Make the flag seen by readers */
708         smp_wmb();
709 }
710
711 /**
712  * tracing_off - turn off tracing buffers
713  *
714  * This function stops the tracing buffers from recording data.
715  * It does not disable any overhead the tracers themselves may
716  * be causing. This function simply causes all recording to
717  * the ring buffers to fail.
718  */
719 void tracing_off(void)
720 {
721         tracer_tracing_off(&global_trace);
722 }
723 EXPORT_SYMBOL_GPL(tracing_off);
724
725 void disable_trace_on_warning(void)
726 {
727         if (__disable_trace_on_warning)
728                 tracing_off();
729 }
730
731 /**
732  * tracer_tracing_is_on - show real state of ring buffer enabled
733  * @tr : the trace array to know if ring buffer is enabled
734  *
735  * Shows real state of the ring buffer if it is enabled or not.
736  */
737 static int tracer_tracing_is_on(struct trace_array *tr)
738 {
739         if (tr->trace_buffer.buffer)
740                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
741         return !tr->buffer_disabled;
742 }
743
744 /**
745  * tracing_is_on - show state of ring buffers enabled
746  */
747 int tracing_is_on(void)
748 {
749         return tracer_tracing_is_on(&global_trace);
750 }
751 EXPORT_SYMBOL_GPL(tracing_is_on);
752
753 static int __init set_buf_size(char *str)
754 {
755         unsigned long buf_size;
756
757         if (!str)
758                 return 0;
759         buf_size = memparse(str, &str);
760         /* nr_entries can not be zero */
761         if (buf_size == 0)
762                 return 0;
763         trace_buf_size = buf_size;
764         return 1;
765 }
766 __setup("trace_buf_size=", set_buf_size);
767
768 static int __init set_tracing_thresh(char *str)
769 {
770         unsigned long threshold;
771         int ret;
772
773         if (!str)
774                 return 0;
775         ret = kstrtoul(str, 0, &threshold);
776         if (ret < 0)
777                 return 0;
778         tracing_thresh = threshold * 1000;
779         return 1;
780 }
781 __setup("tracing_thresh=", set_tracing_thresh);
782
783 unsigned long nsecs_to_usecs(unsigned long nsecs)
784 {
785         return nsecs / 1000;
786 }
787
788 /* These must match the bit postions in trace_iterator_flags */
789 static const char *trace_options[] = {
790         "print-parent",
791         "sym-offset",
792         "sym-addr",
793         "verbose",
794         "raw",
795         "hex",
796         "bin",
797         "block",
798         "stacktrace",
799         "trace_printk",
800         "ftrace_preempt",
801         "branch",
802         "annotate",
803         "userstacktrace",
804         "sym-userobj",
805         "printk-msg-only",
806         "context-info",
807         "latency-format",
808         "sleep-time",
809         "graph-time",
810         "record-cmd",
811         "overwrite",
812         "disable_on_free",
813         "irq-info",
814         "markers",
815         "function-trace",
816         NULL
817 };
818
819 static struct {
820         u64 (*func)(void);
821         const char *name;
822         int in_ns;              /* is this clock in nanoseconds? */
823 } trace_clocks[] = {
824         { trace_clock_local,            "local",        1 },
825         { trace_clock_global,           "global",       1 },
826         { trace_clock_counter,          "counter",      0 },
827         { trace_clock_jiffies,          "uptime",       0 },
828         { trace_clock,                  "perf",         1 },
829         { ktime_get_mono_fast_ns,       "mono",         1 },
830         ARCH_TRACE_CLOCKS
831 };
832
833 /*
834  * trace_parser_get_init - gets the buffer for trace parser
835  */
836 int trace_parser_get_init(struct trace_parser *parser, int size)
837 {
838         memset(parser, 0, sizeof(*parser));
839
840         parser->buffer = kmalloc(size, GFP_KERNEL);
841         if (!parser->buffer)
842                 return 1;
843
844         parser->size = size;
845         return 0;
846 }
847
848 /*
849  * trace_parser_put - frees the buffer for trace parser
850  */
851 void trace_parser_put(struct trace_parser *parser)
852 {
853         kfree(parser->buffer);
854 }
855
856 /*
857  * trace_get_user - reads the user input string separated by  space
858  * (matched by isspace(ch))
859  *
860  * For each string found the 'struct trace_parser' is updated,
861  * and the function returns.
862  *
863  * Returns number of bytes read.
864  *
865  * See kernel/trace/trace.h for 'struct trace_parser' details.
866  */
867 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
868         size_t cnt, loff_t *ppos)
869 {
870         char ch;
871         size_t read = 0;
872         ssize_t ret;
873
874         if (!*ppos)
875                 trace_parser_clear(parser);
876
877         ret = get_user(ch, ubuf++);
878         if (ret)
879                 goto out;
880
881         read++;
882         cnt--;
883
884         /*
885          * The parser is not finished with the last write,
886          * continue reading the user input without skipping spaces.
887          */
888         if (!parser->cont) {
889                 /* skip white space */
890                 while (cnt && isspace(ch)) {
891                         ret = get_user(ch, ubuf++);
892                         if (ret)
893                                 goto out;
894                         read++;
895                         cnt--;
896                 }
897
898                 /* only spaces were written */
899                 if (isspace(ch)) {
900                         *ppos += read;
901                         ret = read;
902                         goto out;
903                 }
904
905                 parser->idx = 0;
906         }
907
908         /* read the non-space input */
909         while (cnt && !isspace(ch)) {
910                 if (parser->idx < parser->size - 1)
911                         parser->buffer[parser->idx++] = ch;
912                 else {
913                         ret = -EINVAL;
914                         goto out;
915                 }
916                 ret = get_user(ch, ubuf++);
917                 if (ret)
918                         goto out;
919                 read++;
920                 cnt--;
921         }
922
923         /* We either got finished input or we have to wait for another call. */
924         if (isspace(ch)) {
925                 parser->buffer[parser->idx] = 0;
926                 parser->cont = false;
927         } else if (parser->idx < parser->size - 1) {
928                 parser->cont = true;
929                 parser->buffer[parser->idx++] = ch;
930         } else {
931                 ret = -EINVAL;
932                 goto out;
933         }
934
935         *ppos += read;
936         ret = read;
937
938 out:
939         return ret;
940 }
941
942 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
943 {
944         int len;
945
946         if (s->len <= s->readpos)
947                 return -EBUSY;
948
949         len = s->len - s->readpos;
950         if (cnt > len)
951                 cnt = len;
952         memcpy(buf, s->buffer + s->readpos, cnt);
953
954         s->readpos += cnt;
955         return cnt;
956 }
957
958 unsigned long __read_mostly     tracing_thresh;
959
960 #ifdef CONFIG_TRACER_MAX_TRACE
961 /*
962  * Copy the new maximum trace into the separate maximum-trace
963  * structure. (this way the maximum trace is permanently saved,
964  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
965  */
966 static void
967 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
968 {
969         struct trace_buffer *trace_buf = &tr->trace_buffer;
970         struct trace_buffer *max_buf = &tr->max_buffer;
971         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
972         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
973
974         max_buf->cpu = cpu;
975         max_buf->time_start = data->preempt_timestamp;
976
977         max_data->saved_latency = tr->max_latency;
978         max_data->critical_start = data->critical_start;
979         max_data->critical_end = data->critical_end;
980
981         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
982         max_data->pid = tsk->pid;
983         /*
984          * If tsk == current, then use current_uid(), as that does not use
985          * RCU. The irq tracer can be called out of RCU scope.
986          */
987         if (tsk == current)
988                 max_data->uid = current_uid();
989         else
990                 max_data->uid = task_uid(tsk);
991
992         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
993         max_data->policy = tsk->policy;
994         max_data->rt_priority = tsk->rt_priority;
995
996         /* record this tasks comm */
997         tracing_record_cmdline(tsk);
998 }
999
1000 /**
1001  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1002  * @tr: tracer
1003  * @tsk: the task with the latency
1004  * @cpu: The cpu that initiated the trace.
1005  *
1006  * Flip the buffers between the @tr and the max_tr and record information
1007  * about which task was the cause of this latency.
1008  */
1009 void
1010 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1011 {
1012         struct ring_buffer *buf;
1013
1014         if (tr->stop_count)
1015                 return;
1016
1017         WARN_ON_ONCE(!irqs_disabled());
1018
1019         if (!tr->allocated_snapshot) {
1020                 /* Only the nop tracer should hit this when disabling */
1021                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1022                 return;
1023         }
1024
1025         arch_spin_lock(&tr->max_lock);
1026
1027         buf = tr->trace_buffer.buffer;
1028         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1029         tr->max_buffer.buffer = buf;
1030
1031         __update_max_tr(tr, tsk, cpu);
1032         arch_spin_unlock(&tr->max_lock);
1033 }
1034
1035 /**
1036  * update_max_tr_single - only copy one trace over, and reset the rest
1037  * @tr - tracer
1038  * @tsk - task with the latency
1039  * @cpu - the cpu of the buffer to copy.
1040  *
1041  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1042  */
1043 void
1044 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1045 {
1046         int ret;
1047
1048         if (tr->stop_count)
1049                 return;
1050
1051         WARN_ON_ONCE(!irqs_disabled());
1052         if (!tr->allocated_snapshot) {
1053                 /* Only the nop tracer should hit this when disabling */
1054                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1055                 return;
1056         }
1057
1058         arch_spin_lock(&tr->max_lock);
1059
1060         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1061
1062         if (ret == -EBUSY) {
1063                 /*
1064                  * We failed to swap the buffer due to a commit taking
1065                  * place on this CPU. We fail to record, but we reset
1066                  * the max trace buffer (no one writes directly to it)
1067                  * and flag that it failed.
1068                  */
1069                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1070                         "Failed to swap buffers due to commit in progress\n");
1071         }
1072
1073         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1074
1075         __update_max_tr(tr, tsk, cpu);
1076         arch_spin_unlock(&tr->max_lock);
1077 }
1078 #endif /* CONFIG_TRACER_MAX_TRACE */
1079
1080 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1081 {
1082         /* Iterators are static, they should be filled or empty */
1083         if (trace_buffer_iter(iter, iter->cpu_file))
1084                 return 0;
1085
1086         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1087                                 full);
1088 }
1089
1090 #ifdef CONFIG_FTRACE_STARTUP_TEST
1091 static int run_tracer_selftest(struct tracer *type)
1092 {
1093         struct trace_array *tr = &global_trace;
1094         struct tracer *saved_tracer = tr->current_trace;
1095         int ret;
1096
1097         if (!type->selftest || tracing_selftest_disabled)
1098                 return 0;
1099
1100         /*
1101          * Run a selftest on this tracer.
1102          * Here we reset the trace buffer, and set the current
1103          * tracer to be this tracer. The tracer can then run some
1104          * internal tracing to verify that everything is in order.
1105          * If we fail, we do not register this tracer.
1106          */
1107         tracing_reset_online_cpus(&tr->trace_buffer);
1108
1109         tr->current_trace = type;
1110
1111 #ifdef CONFIG_TRACER_MAX_TRACE
1112         if (type->use_max_tr) {
1113                 /* If we expanded the buffers, make sure the max is expanded too */
1114                 if (ring_buffer_expanded)
1115                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1116                                            RING_BUFFER_ALL_CPUS);
1117                 tr->allocated_snapshot = true;
1118         }
1119 #endif
1120
1121         /* the test is responsible for initializing and enabling */
1122         pr_info("Testing tracer %s: ", type->name);
1123         ret = type->selftest(type, tr);
1124         /* the test is responsible for resetting too */
1125         tr->current_trace = saved_tracer;
1126         if (ret) {
1127                 printk(KERN_CONT "FAILED!\n");
1128                 /* Add the warning after printing 'FAILED' */
1129                 WARN_ON(1);
1130                 return -1;
1131         }
1132         /* Only reset on passing, to avoid touching corrupted buffers */
1133         tracing_reset_online_cpus(&tr->trace_buffer);
1134
1135 #ifdef CONFIG_TRACER_MAX_TRACE
1136         if (type->use_max_tr) {
1137                 tr->allocated_snapshot = false;
1138
1139                 /* Shrink the max buffer again */
1140                 if (ring_buffer_expanded)
1141                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1142                                            RING_BUFFER_ALL_CPUS);
1143         }
1144 #endif
1145
1146         printk(KERN_CONT "PASSED\n");
1147         return 0;
1148 }
1149 #else
1150 static inline int run_tracer_selftest(struct tracer *type)
1151 {
1152         return 0;
1153 }
1154 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1155
1156 /**
1157  * register_tracer - register a tracer with the ftrace system.
1158  * @type - the plugin for the tracer
1159  *
1160  * Register a new plugin tracer.
1161  */
1162 int register_tracer(struct tracer *type)
1163 {
1164         struct tracer *t;
1165         int ret = 0;
1166
1167         if (!type->name) {
1168                 pr_info("Tracer must have a name\n");
1169                 return -1;
1170         }
1171
1172         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1173                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1174                 return -1;
1175         }
1176
1177         mutex_lock(&trace_types_lock);
1178
1179         tracing_selftest_running = true;
1180
1181         for (t = trace_types; t; t = t->next) {
1182                 if (strcmp(type->name, t->name) == 0) {
1183                         /* already found */
1184                         pr_info("Tracer %s already registered\n",
1185                                 type->name);
1186                         ret = -1;
1187                         goto out;
1188                 }
1189         }
1190
1191         if (!type->set_flag)
1192                 type->set_flag = &dummy_set_flag;
1193         if (!type->flags)
1194                 type->flags = &dummy_tracer_flags;
1195         else
1196                 if (!type->flags->opts)
1197                         type->flags->opts = dummy_tracer_opt;
1198
1199         ret = run_tracer_selftest(type);
1200         if (ret < 0)
1201                 goto out;
1202
1203         type->next = trace_types;
1204         trace_types = type;
1205
1206  out:
1207         tracing_selftest_running = false;
1208         mutex_unlock(&trace_types_lock);
1209
1210         if (ret || !default_bootup_tracer)
1211                 goto out_unlock;
1212
1213         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1214                 goto out_unlock;
1215
1216         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1217         /* Do we want this tracer to start on bootup? */
1218         tracing_set_tracer(&global_trace, type->name);
1219         default_bootup_tracer = NULL;
1220         /* disable other selftests, since this will break it. */
1221         tracing_selftest_disabled = true;
1222 #ifdef CONFIG_FTRACE_STARTUP_TEST
1223         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1224                type->name);
1225 #endif
1226
1227  out_unlock:
1228         return ret;
1229 }
1230
1231 void tracing_reset(struct trace_buffer *buf, int cpu)
1232 {
1233         struct ring_buffer *buffer = buf->buffer;
1234
1235         if (!buffer)
1236                 return;
1237
1238         ring_buffer_record_disable(buffer);
1239
1240         /* Make sure all commits have finished */
1241         synchronize_sched();
1242         ring_buffer_reset_cpu(buffer, cpu);
1243
1244         ring_buffer_record_enable(buffer);
1245 }
1246
1247 void tracing_reset_online_cpus(struct trace_buffer *buf)
1248 {
1249         struct ring_buffer *buffer = buf->buffer;
1250         int cpu;
1251
1252         if (!buffer)
1253                 return;
1254
1255         ring_buffer_record_disable(buffer);
1256
1257         /* Make sure all commits have finished */
1258         synchronize_sched();
1259
1260         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1261
1262         for_each_online_cpu(cpu)
1263                 ring_buffer_reset_cpu(buffer, cpu);
1264
1265         ring_buffer_record_enable(buffer);
1266 }
1267
1268 /* Must have trace_types_lock held */
1269 void tracing_reset_all_online_cpus(void)
1270 {
1271         struct trace_array *tr;
1272
1273         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1274                 tracing_reset_online_cpus(&tr->trace_buffer);
1275 #ifdef CONFIG_TRACER_MAX_TRACE
1276                 tracing_reset_online_cpus(&tr->max_buffer);
1277 #endif
1278         }
1279 }
1280
1281 #define SAVED_CMDLINES_DEFAULT 128
1282 #define NO_CMDLINE_MAP UINT_MAX
1283 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1284 struct saved_cmdlines_buffer {
1285         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1286         unsigned *map_cmdline_to_pid;
1287         unsigned cmdline_num;
1288         int cmdline_idx;
1289         char *saved_cmdlines;
1290 };
1291 static struct saved_cmdlines_buffer *savedcmd;
1292
1293 /* temporary disable recording */
1294 static atomic_t trace_record_cmdline_disabled __read_mostly;
1295
1296 static inline char *get_saved_cmdlines(int idx)
1297 {
1298         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1299 }
1300
1301 static inline void set_cmdline(int idx, const char *cmdline)
1302 {
1303         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1304 }
1305
1306 static int allocate_cmdlines_buffer(unsigned int val,
1307                                     struct saved_cmdlines_buffer *s)
1308 {
1309         s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1310                                         GFP_KERNEL);
1311         if (!s->map_cmdline_to_pid)
1312                 return -ENOMEM;
1313
1314         s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1315         if (!s->saved_cmdlines) {
1316                 kfree(s->map_cmdline_to_pid);
1317                 return -ENOMEM;
1318         }
1319
1320         s->cmdline_idx = 0;
1321         s->cmdline_num = val;
1322         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1323                sizeof(s->map_pid_to_cmdline));
1324         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1325                val * sizeof(*s->map_cmdline_to_pid));
1326
1327         return 0;
1328 }
1329
1330 static int trace_create_savedcmd(void)
1331 {
1332         int ret;
1333
1334         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1335         if (!savedcmd)
1336                 return -ENOMEM;
1337
1338         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1339         if (ret < 0) {
1340                 kfree(savedcmd);
1341                 savedcmd = NULL;
1342                 return -ENOMEM;
1343         }
1344
1345         return 0;
1346 }
1347
1348 int is_tracing_stopped(void)
1349 {
1350         return global_trace.stop_count;
1351 }
1352
1353 /**
1354  * tracing_start - quick start of the tracer
1355  *
1356  * If tracing is enabled but was stopped by tracing_stop,
1357  * this will start the tracer back up.
1358  */
1359 void tracing_start(void)
1360 {
1361         struct ring_buffer *buffer;
1362         unsigned long flags;
1363
1364         if (tracing_disabled)
1365                 return;
1366
1367         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1368         if (--global_trace.stop_count) {
1369                 if (global_trace.stop_count < 0) {
1370                         /* Someone screwed up their debugging */
1371                         WARN_ON_ONCE(1);
1372                         global_trace.stop_count = 0;
1373                 }
1374                 goto out;
1375         }
1376
1377         /* Prevent the buffers from switching */
1378         arch_spin_lock(&global_trace.max_lock);
1379
1380         buffer = global_trace.trace_buffer.buffer;
1381         if (buffer)
1382                 ring_buffer_record_enable(buffer);
1383
1384 #ifdef CONFIG_TRACER_MAX_TRACE
1385         buffer = global_trace.max_buffer.buffer;
1386         if (buffer)
1387                 ring_buffer_record_enable(buffer);
1388 #endif
1389
1390         arch_spin_unlock(&global_trace.max_lock);
1391
1392  out:
1393         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1394 }
1395
1396 static void tracing_start_tr(struct trace_array *tr)
1397 {
1398         struct ring_buffer *buffer;
1399         unsigned long flags;
1400
1401         if (tracing_disabled)
1402                 return;
1403
1404         /* If global, we need to also start the max tracer */
1405         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1406                 return tracing_start();
1407
1408         raw_spin_lock_irqsave(&tr->start_lock, flags);
1409
1410         if (--tr->stop_count) {
1411                 if (tr->stop_count < 0) {
1412                         /* Someone screwed up their debugging */
1413                         WARN_ON_ONCE(1);
1414                         tr->stop_count = 0;
1415                 }
1416                 goto out;
1417         }
1418
1419         buffer = tr->trace_buffer.buffer;
1420         if (buffer)
1421                 ring_buffer_record_enable(buffer);
1422
1423  out:
1424         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1425 }
1426
1427 /**
1428  * tracing_stop - quick stop of the tracer
1429  *
1430  * Light weight way to stop tracing. Use in conjunction with
1431  * tracing_start.
1432  */
1433 void tracing_stop(void)
1434 {
1435         struct ring_buffer *buffer;
1436         unsigned long flags;
1437
1438         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1439         if (global_trace.stop_count++)
1440                 goto out;
1441
1442         /* Prevent the buffers from switching */
1443         arch_spin_lock(&global_trace.max_lock);
1444
1445         buffer = global_trace.trace_buffer.buffer;
1446         if (buffer)
1447                 ring_buffer_record_disable(buffer);
1448
1449 #ifdef CONFIG_TRACER_MAX_TRACE
1450         buffer = global_trace.max_buffer.buffer;
1451         if (buffer)
1452                 ring_buffer_record_disable(buffer);
1453 #endif
1454
1455         arch_spin_unlock(&global_trace.max_lock);
1456
1457  out:
1458         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1459 }
1460
1461 static void tracing_stop_tr(struct trace_array *tr)
1462 {
1463         struct ring_buffer *buffer;
1464         unsigned long flags;
1465
1466         /* If global, we need to also stop the max tracer */
1467         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1468                 return tracing_stop();
1469
1470         raw_spin_lock_irqsave(&tr->start_lock, flags);
1471         if (tr->stop_count++)
1472                 goto out;
1473
1474         buffer = tr->trace_buffer.buffer;
1475         if (buffer)
1476                 ring_buffer_record_disable(buffer);
1477
1478  out:
1479         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1480 }
1481
1482 void trace_stop_cmdline_recording(void);
1483
1484 static int trace_save_cmdline(struct task_struct *tsk)
1485 {
1486         unsigned pid, idx;
1487
1488         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1489                 return 0;
1490
1491         /*
1492          * It's not the end of the world if we don't get
1493          * the lock, but we also don't want to spin
1494          * nor do we want to disable interrupts,
1495          * so if we miss here, then better luck next time.
1496          */
1497         if (!arch_spin_trylock(&trace_cmdline_lock))
1498                 return 0;
1499
1500         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1501         if (idx == NO_CMDLINE_MAP) {
1502                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1503
1504                 /*
1505                  * Check whether the cmdline buffer at idx has a pid
1506                  * mapped. We are going to overwrite that entry so we
1507                  * need to clear the map_pid_to_cmdline. Otherwise we
1508                  * would read the new comm for the old pid.
1509                  */
1510                 pid = savedcmd->map_cmdline_to_pid[idx];
1511                 if (pid != NO_CMDLINE_MAP)
1512                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1513
1514                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1515                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1516
1517                 savedcmd->cmdline_idx = idx;
1518         }
1519
1520         set_cmdline(idx, tsk->comm);
1521
1522         arch_spin_unlock(&trace_cmdline_lock);
1523
1524         return 1;
1525 }
1526
1527 static void __trace_find_cmdline(int pid, char comm[])
1528 {
1529         unsigned map;
1530
1531         if (!pid) {
1532                 strcpy(comm, "<idle>");
1533                 return;
1534         }
1535
1536         if (WARN_ON_ONCE(pid < 0)) {
1537                 strcpy(comm, "<XXX>");
1538                 return;
1539         }
1540
1541         if (pid > PID_MAX_DEFAULT) {
1542                 strcpy(comm, "<...>");
1543                 return;
1544         }
1545
1546         map = savedcmd->map_pid_to_cmdline[pid];
1547         if (map != NO_CMDLINE_MAP)
1548                 strcpy(comm, get_saved_cmdlines(map));
1549         else
1550                 strcpy(comm, "<...>");
1551 }
1552
1553 void trace_find_cmdline(int pid, char comm[])
1554 {
1555         preempt_disable();
1556         arch_spin_lock(&trace_cmdline_lock);
1557
1558         __trace_find_cmdline(pid, comm);
1559
1560         arch_spin_unlock(&trace_cmdline_lock);
1561         preempt_enable();
1562 }
1563
1564 void tracing_record_cmdline(struct task_struct *tsk)
1565 {
1566         if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1567                 return;
1568
1569         if (!__this_cpu_read(trace_cmdline_save))
1570                 return;
1571
1572         if (trace_save_cmdline(tsk))
1573                 __this_cpu_write(trace_cmdline_save, false);
1574 }
1575
1576 void
1577 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1578                              int pc)
1579 {
1580         struct task_struct *tsk = current;
1581
1582         entry->preempt_count            = pc & 0xff;
1583         entry->pid                      = (tsk) ? tsk->pid : 0;
1584         entry->flags =
1585 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1586                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1587 #else
1588                 TRACE_FLAG_IRQS_NOSUPPORT |
1589 #endif
1590                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1591                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1592                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1593                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1594 }
1595 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1596
1597 struct ring_buffer_event *
1598 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1599                           int type,
1600                           unsigned long len,
1601                           unsigned long flags, int pc)
1602 {
1603         struct ring_buffer_event *event;
1604
1605         event = ring_buffer_lock_reserve(buffer, len);
1606         if (event != NULL) {
1607                 struct trace_entry *ent = ring_buffer_event_data(event);
1608
1609                 tracing_generic_entry_update(ent, flags, pc);
1610                 ent->type = type;
1611         }
1612
1613         return event;
1614 }
1615
1616 void
1617 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
1618 {
1619         __this_cpu_write(trace_cmdline_save, true);
1620         ring_buffer_unlock_commit(buffer, event);
1621 }
1622
1623 static inline void
1624 __trace_buffer_unlock_commit(struct ring_buffer *buffer,
1625                              struct ring_buffer_event *event,
1626                              unsigned long flags, int pc)
1627 {
1628         __buffer_unlock_commit(buffer, event);
1629
1630         ftrace_trace_stack(buffer, flags, 6, pc);
1631         ftrace_trace_userstack(buffer, flags, pc);
1632 }
1633
1634 void trace_buffer_unlock_commit(struct ring_buffer *buffer,
1635                                 struct ring_buffer_event *event,
1636                                 unsigned long flags, int pc)
1637 {
1638         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1639 }
1640 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
1641
1642 static struct ring_buffer *temp_buffer;
1643
1644 struct ring_buffer_event *
1645 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
1646                           struct ftrace_event_file *ftrace_file,
1647                           int type, unsigned long len,
1648                           unsigned long flags, int pc)
1649 {
1650         struct ring_buffer_event *entry;
1651
1652         *current_rb = ftrace_file->tr->trace_buffer.buffer;
1653         entry = trace_buffer_lock_reserve(*current_rb,
1654                                          type, len, flags, pc);
1655         /*
1656          * If tracing is off, but we have triggers enabled
1657          * we still need to look at the event data. Use the temp_buffer
1658          * to store the trace event for the tigger to use. It's recusive
1659          * safe and will not be recorded anywhere.
1660          */
1661         if (!entry && ftrace_file->flags & FTRACE_EVENT_FL_TRIGGER_COND) {
1662                 *current_rb = temp_buffer;
1663                 entry = trace_buffer_lock_reserve(*current_rb,
1664                                                   type, len, flags, pc);
1665         }
1666         return entry;
1667 }
1668 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
1669
1670 struct ring_buffer_event *
1671 trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
1672                                   int type, unsigned long len,
1673                                   unsigned long flags, int pc)
1674 {
1675         *current_rb = global_trace.trace_buffer.buffer;
1676         return trace_buffer_lock_reserve(*current_rb,
1677                                          type, len, flags, pc);
1678 }
1679 EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
1680
1681 void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
1682                                         struct ring_buffer_event *event,
1683                                         unsigned long flags, int pc)
1684 {
1685         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1686 }
1687 EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
1688
1689 void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer,
1690                                      struct ring_buffer_event *event,
1691                                      unsigned long flags, int pc,
1692                                      struct pt_regs *regs)
1693 {
1694         __buffer_unlock_commit(buffer, event);
1695
1696         ftrace_trace_stack_regs(buffer, flags, 0, pc, regs);
1697         ftrace_trace_userstack(buffer, flags, pc);
1698 }
1699 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
1700
1701 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
1702                                          struct ring_buffer_event *event)
1703 {
1704         ring_buffer_discard_commit(buffer, event);
1705 }
1706 EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
1707
1708 void
1709 trace_function(struct trace_array *tr,
1710                unsigned long ip, unsigned long parent_ip, unsigned long flags,
1711                int pc)
1712 {
1713         struct ftrace_event_call *call = &event_function;
1714         struct ring_buffer *buffer = tr->trace_buffer.buffer;
1715         struct ring_buffer_event *event;
1716         struct ftrace_entry *entry;
1717
1718         /* If we are reading the ring buffer, don't trace */
1719         if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
1720                 return;
1721
1722         event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
1723                                           flags, pc);
1724         if (!event)
1725                 return;
1726         entry   = ring_buffer_event_data(event);
1727         entry->ip                       = ip;
1728         entry->parent_ip                = parent_ip;
1729
1730         if (!call_filter_check_discard(call, entry, buffer, event))
1731                 __buffer_unlock_commit(buffer, event);
1732 }
1733
1734 #ifdef CONFIG_STACKTRACE
1735
1736 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
1737 struct ftrace_stack {
1738         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
1739 };
1740
1741 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
1742 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
1743
1744 static void __ftrace_trace_stack(struct ring_buffer *buffer,
1745                                  unsigned long flags,
1746                                  int skip, int pc, struct pt_regs *regs)
1747 {
1748         struct ftrace_event_call *call = &event_kernel_stack;
1749         struct ring_buffer_event *event;
1750         struct stack_entry *entry;
1751         struct stack_trace trace;
1752         int use_stack;
1753         int size = FTRACE_STACK_ENTRIES;
1754
1755         trace.nr_entries        = 0;
1756         trace.skip              = skip;
1757
1758         /*
1759          * Since events can happen in NMIs there's no safe way to
1760          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
1761          * or NMI comes in, it will just have to use the default
1762          * FTRACE_STACK_SIZE.
1763          */
1764         preempt_disable_notrace();
1765
1766         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
1767         /*
1768          * We don't need any atomic variables, just a barrier.
1769          * If an interrupt comes in, we don't care, because it would
1770          * have exited and put the counter back to what we want.
1771          * We just need a barrier to keep gcc from moving things
1772          * around.
1773          */
1774         barrier();
1775         if (use_stack == 1) {
1776                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
1777                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
1778
1779                 if (regs)
1780                         save_stack_trace_regs(regs, &trace);
1781                 else
1782                         save_stack_trace(&trace);
1783
1784                 if (trace.nr_entries > size)
1785                         size = trace.nr_entries;
1786         } else
1787                 /* From now on, use_stack is a boolean */
1788                 use_stack = 0;
1789
1790         size *= sizeof(unsigned long);
1791
1792         event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1793                                           sizeof(*entry) + size, flags, pc);
1794         if (!event)
1795                 goto out;
1796         entry = ring_buffer_event_data(event);
1797
1798         memset(&entry->caller, 0, size);
1799
1800         if (use_stack)
1801                 memcpy(&entry->caller, trace.entries,
1802                        trace.nr_entries * sizeof(unsigned long));
1803         else {
1804                 trace.max_entries       = FTRACE_STACK_ENTRIES;
1805                 trace.entries           = entry->caller;
1806                 if (regs)
1807                         save_stack_trace_regs(regs, &trace);
1808                 else
1809                         save_stack_trace(&trace);
1810         }
1811
1812         entry->size = trace.nr_entries;
1813
1814         if (!call_filter_check_discard(call, entry, buffer, event))
1815                 __buffer_unlock_commit(buffer, event);
1816
1817  out:
1818         /* Again, don't let gcc optimize things here */
1819         barrier();
1820         __this_cpu_dec(ftrace_stack_reserve);
1821         preempt_enable_notrace();
1822
1823 }
1824
1825 void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags,
1826                              int skip, int pc, struct pt_regs *regs)
1827 {
1828         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1829                 return;
1830
1831         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
1832 }
1833
1834 void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
1835                         int skip, int pc)
1836 {
1837         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1838                 return;
1839
1840         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
1841 }
1842
1843 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1844                    int pc)
1845 {
1846         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
1847 }
1848
1849 /**
1850  * trace_dump_stack - record a stack back trace in the trace buffer
1851  * @skip: Number of functions to skip (helper handlers)
1852  */
1853 void trace_dump_stack(int skip)
1854 {
1855         unsigned long flags;
1856
1857         if (tracing_disabled || tracing_selftest_running)
1858                 return;
1859
1860         local_save_flags(flags);
1861
1862         /*
1863          * Skip 3 more, seems to get us at the caller of
1864          * this function.
1865          */
1866         skip += 3;
1867         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
1868                              flags, skip, preempt_count(), NULL);
1869 }
1870
1871 static DEFINE_PER_CPU(int, user_stack_count);
1872
1873 void
1874 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1875 {
1876         struct ftrace_event_call *call = &event_user_stack;
1877         struct ring_buffer_event *event;
1878         struct userstack_entry *entry;
1879         struct stack_trace trace;
1880
1881         if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1882                 return;
1883
1884         /*
1885          * NMIs can not handle page faults, even with fix ups.
1886          * The save user stack can (and often does) fault.
1887          */
1888         if (unlikely(in_nmi()))
1889                 return;
1890
1891         /*
1892          * prevent recursion, since the user stack tracing may
1893          * trigger other kernel events.
1894          */
1895         preempt_disable();
1896         if (__this_cpu_read(user_stack_count))
1897                 goto out;
1898
1899         __this_cpu_inc(user_stack_count);
1900
1901         event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1902                                           sizeof(*entry), flags, pc);
1903         if (!event)
1904                 goto out_drop_count;
1905         entry   = ring_buffer_event_data(event);
1906
1907         entry->tgid             = current->tgid;
1908         memset(&entry->caller, 0, sizeof(entry->caller));
1909
1910         trace.nr_entries        = 0;
1911         trace.max_entries       = FTRACE_STACK_ENTRIES;
1912         trace.skip              = 0;
1913         trace.entries           = entry->caller;
1914
1915         save_stack_trace_user(&trace);
1916         if (!call_filter_check_discard(call, entry, buffer, event))
1917                 __buffer_unlock_commit(buffer, event);
1918
1919  out_drop_count:
1920         __this_cpu_dec(user_stack_count);
1921  out:
1922         preempt_enable();
1923 }
1924
1925 #ifdef UNUSED
1926 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1927 {
1928         ftrace_trace_userstack(tr, flags, preempt_count());
1929 }
1930 #endif /* UNUSED */
1931
1932 #endif /* CONFIG_STACKTRACE */
1933
1934 /* created for use with alloc_percpu */
1935 struct trace_buffer_struct {
1936         char buffer[TRACE_BUF_SIZE];
1937 };
1938
1939 static struct trace_buffer_struct *trace_percpu_buffer;
1940 static struct trace_buffer_struct *trace_percpu_sirq_buffer;
1941 static struct trace_buffer_struct *trace_percpu_irq_buffer;
1942 static struct trace_buffer_struct *trace_percpu_nmi_buffer;
1943
1944 /*
1945  * The buffer used is dependent on the context. There is a per cpu
1946  * buffer for normal context, softirq contex, hard irq context and
1947  * for NMI context. Thise allows for lockless recording.
1948  *
1949  * Note, if the buffers failed to be allocated, then this returns NULL
1950  */
1951 static char *get_trace_buf(void)
1952 {
1953         struct trace_buffer_struct *percpu_buffer;
1954
1955         /*
1956          * If we have allocated per cpu buffers, then we do not
1957          * need to do any locking.
1958          */
1959         if (in_nmi())
1960                 percpu_buffer = trace_percpu_nmi_buffer;
1961         else if (in_irq())
1962                 percpu_buffer = trace_percpu_irq_buffer;
1963         else if (in_softirq())
1964                 percpu_buffer = trace_percpu_sirq_buffer;
1965         else
1966                 percpu_buffer = trace_percpu_buffer;
1967
1968         if (!percpu_buffer)
1969                 return NULL;
1970
1971         return this_cpu_ptr(&percpu_buffer->buffer[0]);
1972 }
1973
1974 static int alloc_percpu_trace_buffer(void)
1975 {
1976         struct trace_buffer_struct *buffers;
1977         struct trace_buffer_struct *sirq_buffers;
1978         struct trace_buffer_struct *irq_buffers;
1979         struct trace_buffer_struct *nmi_buffers;
1980
1981         buffers = alloc_percpu(struct trace_buffer_struct);
1982         if (!buffers)
1983                 goto err_warn;
1984
1985         sirq_buffers = alloc_percpu(struct trace_buffer_struct);
1986         if (!sirq_buffers)
1987                 goto err_sirq;
1988
1989         irq_buffers = alloc_percpu(struct trace_buffer_struct);
1990         if (!irq_buffers)
1991                 goto err_irq;
1992
1993         nmi_buffers = alloc_percpu(struct trace_buffer_struct);
1994         if (!nmi_buffers)
1995                 goto err_nmi;
1996
1997         trace_percpu_buffer = buffers;
1998         trace_percpu_sirq_buffer = sirq_buffers;
1999         trace_percpu_irq_buffer = irq_buffers;
2000         trace_percpu_nmi_buffer = nmi_buffers;
2001
2002         return 0;
2003
2004  err_nmi:
2005         free_percpu(irq_buffers);
2006  err_irq:
2007         free_percpu(sirq_buffers);
2008  err_sirq:
2009         free_percpu(buffers);
2010  err_warn:
2011         WARN(1, "Could not allocate percpu trace_printk buffer");
2012         return -ENOMEM;
2013 }
2014
2015 static int buffers_allocated;
2016
2017 void trace_printk_init_buffers(void)
2018 {
2019         if (buffers_allocated)
2020                 return;
2021
2022         if (alloc_percpu_trace_buffer())
2023                 return;
2024
2025         /* trace_printk() is for debug use only. Don't use it in production. */
2026
2027         pr_warning("\n**********************************************************\n");
2028         pr_warning("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2029         pr_warning("**                                                      **\n");
2030         pr_warning("** trace_printk() being used. Allocating extra memory.  **\n");
2031         pr_warning("**                                                      **\n");
2032         pr_warning("** This means that this is a DEBUG kernel and it is     **\n");
2033         pr_warning("** unsafe for produciton use.                           **\n");
2034         pr_warning("**                                                      **\n");
2035         pr_warning("** If you see this message and you are not debugging    **\n");
2036         pr_warning("** the kernel, report this immediately to your vendor!  **\n");
2037         pr_warning("**                                                      **\n");
2038         pr_warning("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2039         pr_warning("**********************************************************\n");
2040
2041         /* Expand the buffers to set size */
2042         tracing_update_buffers();
2043
2044         buffers_allocated = 1;
2045
2046         /*
2047          * trace_printk_init_buffers() can be called by modules.
2048          * If that happens, then we need to start cmdline recording
2049          * directly here. If the global_trace.buffer is already
2050          * allocated here, then this was called by module code.
2051          */
2052         if (global_trace.trace_buffer.buffer)
2053                 tracing_start_cmdline_record();
2054 }
2055
2056 void trace_printk_start_comm(void)
2057 {
2058         /* Start tracing comms if trace printk is set */
2059         if (!buffers_allocated)
2060                 return;
2061         tracing_start_cmdline_record();
2062 }
2063
2064 static void trace_printk_start_stop_comm(int enabled)
2065 {
2066         if (!buffers_allocated)
2067                 return;
2068
2069         if (enabled)
2070                 tracing_start_cmdline_record();
2071         else
2072                 tracing_stop_cmdline_record();
2073 }
2074
2075 /**
2076  * trace_vbprintk - write binary msg to tracing buffer
2077  *
2078  */
2079 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2080 {
2081         struct ftrace_event_call *call = &event_bprint;
2082         struct ring_buffer_event *event;
2083         struct ring_buffer *buffer;
2084         struct trace_array *tr = &global_trace;
2085         struct bprint_entry *entry;
2086         unsigned long flags;
2087         char *tbuffer;
2088         int len = 0, size, pc;
2089
2090         if (unlikely(tracing_selftest_running || tracing_disabled))
2091                 return 0;
2092
2093         /* Don't pollute graph traces with trace_vprintk internals */
2094         pause_graph_tracing();
2095
2096         pc = preempt_count();
2097         preempt_disable_notrace();
2098
2099         tbuffer = get_trace_buf();
2100         if (!tbuffer) {
2101                 len = 0;
2102                 goto out;
2103         }
2104
2105         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2106
2107         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2108                 goto out;
2109
2110         local_save_flags(flags);
2111         size = sizeof(*entry) + sizeof(u32) * len;
2112         buffer = tr->trace_buffer.buffer;
2113         event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2114                                           flags, pc);
2115         if (!event)
2116                 goto out;
2117         entry = ring_buffer_event_data(event);
2118         entry->ip                       = ip;
2119         entry->fmt                      = fmt;
2120
2121         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2122         if (!call_filter_check_discard(call, entry, buffer, event)) {
2123                 __buffer_unlock_commit(buffer, event);
2124                 ftrace_trace_stack(buffer, flags, 6, pc);
2125         }
2126
2127 out:
2128         preempt_enable_notrace();
2129         unpause_graph_tracing();
2130
2131         return len;
2132 }
2133 EXPORT_SYMBOL_GPL(trace_vbprintk);
2134
2135 static int
2136 __trace_array_vprintk(struct ring_buffer *buffer,
2137                       unsigned long ip, const char *fmt, va_list args)
2138 {
2139         struct ftrace_event_call *call = &event_print;
2140         struct ring_buffer_event *event;
2141         int len = 0, size, pc;
2142         struct print_entry *entry;
2143         unsigned long flags;
2144         char *tbuffer;
2145
2146         if (tracing_disabled || tracing_selftest_running)
2147                 return 0;
2148
2149         /* Don't pollute graph traces with trace_vprintk internals */
2150         pause_graph_tracing();
2151
2152         pc = preempt_count();
2153         preempt_disable_notrace();
2154
2155
2156         tbuffer = get_trace_buf();
2157         if (!tbuffer) {
2158                 len = 0;
2159                 goto out;
2160         }
2161
2162         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2163
2164         local_save_flags(flags);
2165         size = sizeof(*entry) + len + 1;
2166         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2167                                           flags, pc);
2168         if (!event)
2169                 goto out;
2170         entry = ring_buffer_event_data(event);
2171         entry->ip = ip;
2172
2173         memcpy(&entry->buf, tbuffer, len + 1);
2174         if (!call_filter_check_discard(call, entry, buffer, event)) {
2175                 __buffer_unlock_commit(buffer, event);
2176                 ftrace_trace_stack(buffer, flags, 6, pc);
2177         }
2178  out:
2179         preempt_enable_notrace();
2180         unpause_graph_tracing();
2181
2182         return len;
2183 }
2184
2185 int trace_array_vprintk(struct trace_array *tr,
2186                         unsigned long ip, const char *fmt, va_list args)
2187 {
2188         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2189 }
2190
2191 int trace_array_printk(struct trace_array *tr,
2192                        unsigned long ip, const char *fmt, ...)
2193 {
2194         int ret;
2195         va_list ap;
2196
2197         if (!(trace_flags & TRACE_ITER_PRINTK))
2198                 return 0;
2199
2200         va_start(ap, fmt);
2201         ret = trace_array_vprintk(tr, ip, fmt, ap);
2202         va_end(ap);
2203         return ret;
2204 }
2205
2206 int trace_array_printk_buf(struct ring_buffer *buffer,
2207                            unsigned long ip, const char *fmt, ...)
2208 {
2209         int ret;
2210         va_list ap;
2211
2212         if (!(trace_flags & TRACE_ITER_PRINTK))
2213                 return 0;
2214
2215         va_start(ap, fmt);
2216         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2217         va_end(ap);
2218         return ret;
2219 }
2220
2221 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2222 {
2223         return trace_array_vprintk(&global_trace, ip, fmt, args);
2224 }
2225 EXPORT_SYMBOL_GPL(trace_vprintk);
2226
2227 static void trace_iterator_increment(struct trace_iterator *iter)
2228 {
2229         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2230
2231         iter->idx++;
2232         if (buf_iter)
2233                 ring_buffer_read(buf_iter, NULL);
2234 }
2235
2236 static struct trace_entry *
2237 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2238                 unsigned long *lost_events)
2239 {
2240         struct ring_buffer_event *event;
2241         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2242
2243         if (buf_iter)
2244                 event = ring_buffer_iter_peek(buf_iter, ts);
2245         else
2246                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2247                                          lost_events);
2248
2249         if (event) {
2250                 iter->ent_size = ring_buffer_event_length(event);
2251                 return ring_buffer_event_data(event);
2252         }
2253         iter->ent_size = 0;
2254         return NULL;
2255 }
2256
2257 static struct trace_entry *
2258 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2259                   unsigned long *missing_events, u64 *ent_ts)
2260 {
2261         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2262         struct trace_entry *ent, *next = NULL;
2263         unsigned long lost_events = 0, next_lost = 0;
2264         int cpu_file = iter->cpu_file;
2265         u64 next_ts = 0, ts;
2266         int next_cpu = -1;
2267         int next_size = 0;
2268         int cpu;
2269
2270         /*
2271          * If we are in a per_cpu trace file, don't bother by iterating over
2272          * all cpu and peek directly.
2273          */
2274         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2275                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2276                         return NULL;
2277                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2278                 if (ent_cpu)
2279                         *ent_cpu = cpu_file;
2280
2281                 return ent;
2282         }
2283
2284         for_each_tracing_cpu(cpu) {
2285
2286                 if (ring_buffer_empty_cpu(buffer, cpu))
2287                         continue;
2288
2289                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2290
2291                 /*
2292                  * Pick the entry with the smallest timestamp:
2293                  */
2294                 if (ent && (!next || ts < next_ts)) {
2295                         next = ent;
2296                         next_cpu = cpu;
2297                         next_ts = ts;
2298                         next_lost = lost_events;
2299                         next_size = iter->ent_size;
2300                 }
2301         }
2302
2303         iter->ent_size = next_size;
2304
2305         if (ent_cpu)
2306                 *ent_cpu = next_cpu;
2307
2308         if (ent_ts)
2309                 *ent_ts = next_ts;
2310
2311         if (missing_events)
2312                 *missing_events = next_lost;
2313
2314         return next;
2315 }
2316
2317 /* Find the next real entry, without updating the iterator itself */
2318 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2319                                           int *ent_cpu, u64 *ent_ts)
2320 {
2321         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2322 }
2323
2324 /* Find the next real entry, and increment the iterator to the next entry */
2325 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2326 {
2327         iter->ent = __find_next_entry(iter, &iter->cpu,
2328                                       &iter->lost_events, &iter->ts);
2329
2330         if (iter->ent)
2331                 trace_iterator_increment(iter);
2332
2333         return iter->ent ? iter : NULL;
2334 }
2335
2336 static void trace_consume(struct trace_iterator *iter)
2337 {
2338         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2339                             &iter->lost_events);
2340 }
2341
2342 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2343 {
2344         struct trace_iterator *iter = m->private;
2345         int i = (int)*pos;
2346         void *ent;
2347
2348         WARN_ON_ONCE(iter->leftover);
2349
2350         (*pos)++;
2351
2352         /* can't go backwards */
2353         if (iter->idx > i)
2354                 return NULL;
2355
2356         if (iter->idx < 0)
2357                 ent = trace_find_next_entry_inc(iter);
2358         else
2359                 ent = iter;
2360
2361         while (ent && iter->idx < i)
2362                 ent = trace_find_next_entry_inc(iter);
2363
2364         iter->pos = *pos;
2365
2366         return ent;
2367 }
2368
2369 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2370 {
2371         struct ring_buffer_event *event;
2372         struct ring_buffer_iter *buf_iter;
2373         unsigned long entries = 0;
2374         u64 ts;
2375
2376         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2377
2378         buf_iter = trace_buffer_iter(iter, cpu);
2379         if (!buf_iter)
2380                 return;
2381
2382         ring_buffer_iter_reset(buf_iter);
2383
2384         /*
2385          * We could have the case with the max latency tracers
2386          * that a reset never took place on a cpu. This is evident
2387          * by the timestamp being before the start of the buffer.
2388          */
2389         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2390                 if (ts >= iter->trace_buffer->time_start)
2391                         break;
2392                 entries++;
2393                 ring_buffer_read(buf_iter, NULL);
2394         }
2395
2396         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2397 }
2398
2399 /*
2400  * The current tracer is copied to avoid a global locking
2401  * all around.
2402  */
2403 static void *s_start(struct seq_file *m, loff_t *pos)
2404 {
2405         struct trace_iterator *iter = m->private;
2406         struct trace_array *tr = iter->tr;
2407         int cpu_file = iter->cpu_file;
2408         void *p = NULL;
2409         loff_t l = 0;
2410         int cpu;
2411
2412         /*
2413          * copy the tracer to avoid using a global lock all around.
2414          * iter->trace is a copy of current_trace, the pointer to the
2415          * name may be used instead of a strcmp(), as iter->trace->name
2416          * will point to the same string as current_trace->name.
2417          */
2418         mutex_lock(&trace_types_lock);
2419         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2420                 *iter->trace = *tr->current_trace;
2421         mutex_unlock(&trace_types_lock);
2422
2423 #ifdef CONFIG_TRACER_MAX_TRACE
2424         if (iter->snapshot && iter->trace->use_max_tr)
2425                 return ERR_PTR(-EBUSY);
2426 #endif
2427
2428         if (!iter->snapshot)
2429                 atomic_inc(&trace_record_cmdline_disabled);
2430
2431         if (*pos != iter->pos) {
2432                 iter->ent = NULL;
2433                 iter->cpu = 0;
2434                 iter->idx = -1;
2435
2436                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2437                         for_each_tracing_cpu(cpu)
2438                                 tracing_iter_reset(iter, cpu);
2439                 } else
2440                         tracing_iter_reset(iter, cpu_file);
2441
2442                 iter->leftover = 0;
2443                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2444                         ;
2445
2446         } else {
2447                 /*
2448                  * If we overflowed the seq_file before, then we want
2449                  * to just reuse the trace_seq buffer again.
2450                  */
2451                 if (iter->leftover)
2452                         p = iter;
2453                 else {
2454                         l = *pos - 1;
2455                         p = s_next(m, p, &l);
2456                 }
2457         }
2458
2459         trace_event_read_lock();
2460         trace_access_lock(cpu_file);
2461         return p;
2462 }
2463
2464 static void s_stop(struct seq_file *m, void *p)
2465 {
2466         struct trace_iterator *iter = m->private;
2467
2468 #ifdef CONFIG_TRACER_MAX_TRACE
2469         if (iter->snapshot && iter->trace->use_max_tr)
2470                 return;
2471 #endif
2472
2473         if (!iter->snapshot)
2474                 atomic_dec(&trace_record_cmdline_disabled);
2475
2476         trace_access_unlock(iter->cpu_file);
2477         trace_event_read_unlock();
2478 }
2479
2480 static void
2481 get_total_entries(struct trace_buffer *buf,
2482                   unsigned long *total, unsigned long *entries)
2483 {
2484         unsigned long count;
2485         int cpu;
2486
2487         *total = 0;
2488         *entries = 0;
2489
2490         for_each_tracing_cpu(cpu) {
2491                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2492                 /*
2493                  * If this buffer has skipped entries, then we hold all
2494                  * entries for the trace and we need to ignore the
2495                  * ones before the time stamp.
2496                  */
2497                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2498                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2499                         /* total is the same as the entries */
2500                         *total += count;
2501                 } else
2502                         *total += count +
2503                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
2504                 *entries += count;
2505         }
2506 }
2507
2508 static void print_lat_help_header(struct seq_file *m)
2509 {
2510         seq_puts(m, "#                  _------=> CPU#            \n"
2511                     "#                 / _-----=> irqs-off        \n"
2512                     "#                | / _----=> need-resched    \n"
2513                     "#                || / _---=> hardirq/softirq \n"
2514                     "#                ||| / _--=> preempt-depth   \n"
2515                     "#                |||| /     delay            \n"
2516                     "#  cmd     pid   ||||| time  |   caller      \n"
2517                     "#     \\   /      |||||  \\    |   /         \n");
2518 }
2519
2520 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2521 {
2522         unsigned long total;
2523         unsigned long entries;
2524
2525         get_total_entries(buf, &total, &entries);
2526         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2527                    entries, total, num_online_cpus());
2528         seq_puts(m, "#\n");
2529 }
2530
2531 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2532 {
2533         print_event_info(buf, m);
2534         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n"
2535                     "#              | |       |          |         |\n");
2536 }
2537
2538 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2539 {
2540         print_event_info(buf, m);
2541         seq_puts(m, "#                              _-----=> irqs-off\n"
2542                     "#                             / _----=> need-resched\n"
2543                     "#                            | / _---=> hardirq/softirq\n"
2544                     "#                            || / _--=> preempt-depth\n"
2545                     "#                            ||| /     delay\n"
2546                     "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n"
2547                     "#              | |       |   ||||       |         |\n");
2548 }
2549
2550 void
2551 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2552 {
2553         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2554         struct trace_buffer *buf = iter->trace_buffer;
2555         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2556         struct tracer *type = iter->trace;
2557         unsigned long entries;
2558         unsigned long total;
2559         const char *name = "preemption";
2560
2561         name = type->name;
2562
2563         get_total_entries(buf, &total, &entries);
2564
2565         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2566                    name, UTS_RELEASE);
2567         seq_puts(m, "# -----------------------------------"
2568                  "---------------------------------\n");
2569         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2570                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2571                    nsecs_to_usecs(data->saved_latency),
2572                    entries,
2573                    total,
2574                    buf->cpu,
2575 #if defined(CONFIG_PREEMPT_NONE)
2576                    "server",
2577 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2578                    "desktop",
2579 #elif defined(CONFIG_PREEMPT)
2580                    "preempt",
2581 #else
2582                    "unknown",
2583 #endif
2584                    /* These are reserved for later use */
2585                    0, 0, 0, 0);
2586 #ifdef CONFIG_SMP
2587         seq_printf(m, " #P:%d)\n", num_online_cpus());
2588 #else
2589         seq_puts(m, ")\n");
2590 #endif
2591         seq_puts(m, "#    -----------------\n");
2592         seq_printf(m, "#    | task: %.16s-%d "
2593                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2594                    data->comm, data->pid,
2595                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2596                    data->policy, data->rt_priority);
2597         seq_puts(m, "#    -----------------\n");
2598
2599         if (data->critical_start) {
2600                 seq_puts(m, "#  => started at: ");
2601                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2602                 trace_print_seq(m, &iter->seq);
2603                 seq_puts(m, "\n#  => ended at:   ");
2604                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2605                 trace_print_seq(m, &iter->seq);
2606                 seq_puts(m, "\n#\n");
2607         }
2608
2609         seq_puts(m, "#\n");
2610 }
2611
2612 static void test_cpu_buff_start(struct trace_iterator *iter)
2613 {
2614         struct trace_seq *s = &iter->seq;
2615
2616         if (!(trace_flags & TRACE_ITER_ANNOTATE))
2617                 return;
2618
2619         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
2620                 return;
2621
2622         if (cpumask_test_cpu(iter->cpu, iter->started))
2623                 return;
2624
2625         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
2626                 return;
2627
2628         cpumask_set_cpu(iter->cpu, iter->started);
2629
2630         /* Don't print started cpu buffer for the first entry of the trace */
2631         if (iter->idx > 1)
2632                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
2633                                 iter->cpu);
2634 }
2635
2636 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
2637 {
2638         struct trace_seq *s = &iter->seq;
2639         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2640         struct trace_entry *entry;
2641         struct trace_event *event;
2642
2643         entry = iter->ent;
2644
2645         test_cpu_buff_start(iter);
2646
2647         event = ftrace_find_event(entry->type);
2648
2649         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2650                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2651                         trace_print_lat_context(iter);
2652                 else
2653                         trace_print_context(iter);
2654         }
2655
2656         if (trace_seq_has_overflowed(s))
2657                 return TRACE_TYPE_PARTIAL_LINE;
2658
2659         if (event)
2660                 return event->funcs->trace(iter, sym_flags, event);
2661
2662         trace_seq_printf(s, "Unknown type %d\n", entry->type);
2663
2664         return trace_handle_return(s);
2665 }
2666
2667 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2668 {
2669         struct trace_seq *s = &iter->seq;
2670         struct trace_entry *entry;
2671         struct trace_event *event;
2672
2673         entry = iter->ent;
2674
2675         if (trace_flags & TRACE_ITER_CONTEXT_INFO)
2676                 trace_seq_printf(s, "%d %d %llu ",
2677                                  entry->pid, iter->cpu, iter->ts);
2678
2679         if (trace_seq_has_overflowed(s))
2680                 return TRACE_TYPE_PARTIAL_LINE;
2681
2682         event = ftrace_find_event(entry->type);
2683         if (event)
2684                 return event->funcs->raw(iter, 0, event);
2685
2686         trace_seq_printf(s, "%d ?\n", entry->type);
2687
2688         return trace_handle_return(s);
2689 }
2690
2691 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2692 {
2693         struct trace_seq *s = &iter->seq;
2694         unsigned char newline = '\n';
2695         struct trace_entry *entry;
2696         struct trace_event *event;
2697
2698         entry = iter->ent;
2699
2700         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2701                 SEQ_PUT_HEX_FIELD(s, entry->pid);
2702                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
2703                 SEQ_PUT_HEX_FIELD(s, iter->ts);
2704                 if (trace_seq_has_overflowed(s))
2705                         return TRACE_TYPE_PARTIAL_LINE;
2706         }
2707
2708         event = ftrace_find_event(entry->type);
2709         if (event) {
2710                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
2711                 if (ret != TRACE_TYPE_HANDLED)
2712                         return ret;
2713         }
2714
2715         SEQ_PUT_FIELD(s, newline);
2716
2717         return trace_handle_return(s);
2718 }
2719
2720 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2721 {
2722         struct trace_seq *s = &iter->seq;
2723         struct trace_entry *entry;
2724         struct trace_event *event;
2725
2726         entry = iter->ent;
2727
2728         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2729                 SEQ_PUT_FIELD(s, entry->pid);
2730                 SEQ_PUT_FIELD(s, iter->cpu);
2731                 SEQ_PUT_FIELD(s, iter->ts);
2732                 if (trace_seq_has_overflowed(s))
2733                         return TRACE_TYPE_PARTIAL_LINE;
2734         }
2735
2736         event = ftrace_find_event(entry->type);
2737         return event ? event->funcs->binary(iter, 0, event) :
2738                 TRACE_TYPE_HANDLED;
2739 }
2740
2741 int trace_empty(struct trace_iterator *iter)
2742 {
2743         struct ring_buffer_iter *buf_iter;
2744         int cpu;
2745
2746         /* If we are looking at one CPU buffer, only check that one */
2747         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
2748                 cpu = iter->cpu_file;
2749                 buf_iter = trace_buffer_iter(iter, cpu);
2750                 if (buf_iter) {
2751                         if (!ring_buffer_iter_empty(buf_iter))
2752                                 return 0;
2753                 } else {
2754                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2755                                 return 0;
2756                 }
2757                 return 1;
2758         }
2759
2760         for_each_tracing_cpu(cpu) {
2761                 buf_iter = trace_buffer_iter(iter, cpu);
2762                 if (buf_iter) {
2763                         if (!ring_buffer_iter_empty(buf_iter))
2764                                 return 0;
2765                 } else {
2766                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2767                                 return 0;
2768                 }
2769         }
2770
2771         return 1;
2772 }
2773
2774 /*  Called with trace_event_read_lock() held. */
2775 enum print_line_t print_trace_line(struct trace_iterator *iter)
2776 {
2777         enum print_line_t ret;
2778
2779         if (iter->lost_events) {
2780                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
2781                                  iter->cpu, iter->lost_events);
2782                 if (trace_seq_has_overflowed(&iter->seq))
2783                         return TRACE_TYPE_PARTIAL_LINE;
2784         }
2785
2786         if (iter->trace && iter->trace->print_line) {
2787                 ret = iter->trace->print_line(iter);
2788                 if (ret != TRACE_TYPE_UNHANDLED)
2789                         return ret;
2790         }
2791
2792         if (iter->ent->type == TRACE_BPUTS &&
2793                         trace_flags & TRACE_ITER_PRINTK &&
2794                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2795                 return trace_print_bputs_msg_only(iter);
2796
2797         if (iter->ent->type == TRACE_BPRINT &&
2798                         trace_flags & TRACE_ITER_PRINTK &&
2799                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2800                 return trace_print_bprintk_msg_only(iter);
2801
2802         if (iter->ent->type == TRACE_PRINT &&
2803                         trace_flags & TRACE_ITER_PRINTK &&
2804                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2805                 return trace_print_printk_msg_only(iter);
2806
2807         if (trace_flags & TRACE_ITER_BIN)
2808                 return print_bin_fmt(iter);
2809
2810         if (trace_flags & TRACE_ITER_HEX)
2811                 return print_hex_fmt(iter);
2812
2813         if (trace_flags & TRACE_ITER_RAW)
2814                 return print_raw_fmt(iter);
2815
2816         return print_trace_fmt(iter);
2817 }
2818
2819 void trace_latency_header(struct seq_file *m)
2820 {
2821         struct trace_iterator *iter = m->private;
2822
2823         /* print nothing if the buffers are empty */
2824         if (trace_empty(iter))
2825                 return;
2826
2827         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2828                 print_trace_header(m, iter);
2829
2830         if (!(trace_flags & TRACE_ITER_VERBOSE))
2831                 print_lat_help_header(m);
2832 }
2833
2834 void trace_default_header(struct seq_file *m)
2835 {
2836         struct trace_iterator *iter = m->private;
2837
2838         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
2839                 return;
2840
2841         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2842                 /* print nothing if the buffers are empty */
2843                 if (trace_empty(iter))
2844                         return;
2845                 print_trace_header(m, iter);
2846                 if (!(trace_flags & TRACE_ITER_VERBOSE))
2847                         print_lat_help_header(m);
2848         } else {
2849                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
2850                         if (trace_flags & TRACE_ITER_IRQ_INFO)
2851                                 print_func_help_header_irq(iter->trace_buffer, m);
2852                         else
2853                                 print_func_help_header(iter->trace_buffer, m);
2854                 }
2855         }
2856 }
2857
2858 static void test_ftrace_alive(struct seq_file *m)
2859 {
2860         if (!ftrace_is_dead())
2861                 return;
2862         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
2863                     "#          MAY BE MISSING FUNCTION EVENTS\n");
2864 }
2865
2866 #ifdef CONFIG_TRACER_MAX_TRACE
2867 static void show_snapshot_main_help(struct seq_file *m)
2868 {
2869         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
2870                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
2871                     "#                      Takes a snapshot of the main buffer.\n"
2872                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
2873                     "#                      (Doesn't have to be '2' works with any number that\n"
2874                     "#                       is not a '0' or '1')\n");
2875 }
2876
2877 static void show_snapshot_percpu_help(struct seq_file *m)
2878 {
2879         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
2880 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2881         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
2882                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
2883 #else
2884         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
2885                     "#                     Must use main snapshot file to allocate.\n");
2886 #endif
2887         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
2888                     "#                      (Doesn't have to be '2' works with any number that\n"
2889                     "#                       is not a '0' or '1')\n");
2890 }
2891
2892 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
2893 {
2894         if (iter->tr->allocated_snapshot)
2895                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
2896         else
2897                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
2898
2899         seq_puts(m, "# Snapshot commands:\n");
2900         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
2901                 show_snapshot_main_help(m);
2902         else
2903                 show_snapshot_percpu_help(m);
2904 }
2905 #else
2906 /* Should never be called */
2907 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
2908 #endif
2909
2910 static int s_show(struct seq_file *m, void *v)
2911 {
2912         struct trace_iterator *iter = v;
2913         int ret;
2914
2915         if (iter->ent == NULL) {
2916                 if (iter->tr) {
2917                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
2918                         seq_puts(m, "#\n");
2919                         test_ftrace_alive(m);
2920                 }
2921                 if (iter->snapshot && trace_empty(iter))
2922                         print_snapshot_help(m, iter);
2923                 else if (iter->trace && iter->trace->print_header)
2924                         iter->trace->print_header(m);
2925                 else
2926                         trace_default_header(m);
2927
2928         } else if (iter->leftover) {
2929                 /*
2930                  * If we filled the seq_file buffer earlier, we
2931                  * want to just show it now.
2932                  */
2933                 ret = trace_print_seq(m, &iter->seq);
2934
2935                 /* ret should this time be zero, but you never know */
2936                 iter->leftover = ret;
2937
2938         } else {
2939                 print_trace_line(iter);
2940                 ret = trace_print_seq(m, &iter->seq);
2941                 /*
2942                  * If we overflow the seq_file buffer, then it will
2943                  * ask us for this data again at start up.
2944                  * Use that instead.
2945                  *  ret is 0 if seq_file write succeeded.
2946                  *        -1 otherwise.
2947                  */
2948                 iter->leftover = ret;
2949         }
2950
2951         return 0;
2952 }
2953
2954 /*
2955  * Should be used after trace_array_get(), trace_types_lock
2956  * ensures that i_cdev was already initialized.
2957  */
2958 static inline int tracing_get_cpu(struct inode *inode)
2959 {
2960         if (inode->i_cdev) /* See trace_create_cpu_file() */
2961                 return (long)inode->i_cdev - 1;
2962         return RING_BUFFER_ALL_CPUS;
2963 }
2964
2965 static const struct seq_operations tracer_seq_ops = {
2966         .start          = s_start,
2967         .next           = s_next,
2968         .stop           = s_stop,
2969         .show           = s_show,
2970 };
2971
2972 static struct trace_iterator *
2973 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
2974 {
2975         struct trace_array *tr = inode->i_private;
2976         struct trace_iterator *iter;
2977         int cpu;
2978
2979         if (tracing_disabled)
2980                 return ERR_PTR(-ENODEV);
2981
2982         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
2983         if (!iter)
2984                 return ERR_PTR(-ENOMEM);
2985
2986         iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(),
2987                                     GFP_KERNEL);
2988         if (!iter->buffer_iter)
2989                 goto release;
2990
2991         /*
2992          * We make a copy of the current tracer to avoid concurrent
2993          * changes on it while we are reading.
2994          */
2995         mutex_lock(&trace_types_lock);
2996         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
2997         if (!iter->trace)
2998                 goto fail;
2999
3000         *iter->trace = *tr->current_trace;
3001
3002         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3003                 goto fail;
3004
3005         iter->tr = tr;
3006
3007 #ifdef CONFIG_TRACER_MAX_TRACE
3008         /* Currently only the top directory has a snapshot */
3009         if (tr->current_trace->print_max || snapshot)
3010                 iter->trace_buffer = &tr->max_buffer;
3011         else
3012 #endif
3013                 iter->trace_buffer = &tr->trace_buffer;
3014         iter->snapshot = snapshot;
3015         iter->pos = -1;
3016         iter->cpu_file = tracing_get_cpu(inode);
3017         mutex_init(&iter->mutex);
3018
3019         /* Notify the tracer early; before we stop tracing. */
3020         if (iter->trace && iter->trace->open)
3021                 iter->trace->open(iter);
3022
3023         /* Annotate start of buffers if we had overruns */
3024         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3025                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3026
3027         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3028         if (trace_clocks[tr->clock_id].in_ns)
3029                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3030
3031         /* stop the trace while dumping if we are not opening "snapshot" */
3032         if (!iter->snapshot)
3033                 tracing_stop_tr(tr);
3034
3035         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3036                 for_each_tracing_cpu(cpu) {
3037                         iter->buffer_iter[cpu] =
3038                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3039                 }
3040                 ring_buffer_read_prepare_sync();
3041                 for_each_tracing_cpu(cpu) {
3042                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3043                         tracing_iter_reset(iter, cpu);
3044                 }
3045         } else {
3046                 cpu = iter->cpu_file;
3047                 iter->buffer_iter[cpu] =
3048                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3049                 ring_buffer_read_prepare_sync();
3050                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3051                 tracing_iter_reset(iter, cpu);
3052         }
3053
3054         mutex_unlock(&trace_types_lock);
3055
3056         return iter;
3057
3058  fail:
3059         mutex_unlock(&trace_types_lock);
3060         kfree(iter->trace);
3061         kfree(iter->buffer_iter);
3062 release:
3063         seq_release_private(inode, file);
3064         return ERR_PTR(-ENOMEM);
3065 }
3066
3067 int tracing_open_generic(struct inode *inode, struct file *filp)
3068 {
3069         if (tracing_disabled)
3070                 return -ENODEV;
3071
3072         filp->private_data = inode->i_private;
3073         return 0;
3074 }
3075
3076 bool tracing_is_disabled(void)
3077 {
3078         return (tracing_disabled) ? true: false;
3079 }
3080
3081 /*
3082  * Open and update trace_array ref count.
3083  * Must have the current trace_array passed to it.
3084  */
3085 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3086 {
3087         struct trace_array *tr = inode->i_private;
3088
3089         if (tracing_disabled)
3090                 return -ENODEV;
3091
3092         if (trace_array_get(tr) < 0)
3093                 return -ENODEV;
3094
3095         filp->private_data = inode->i_private;
3096
3097         return 0;
3098 }
3099
3100 static int tracing_release(struct inode *inode, struct file *file)
3101 {
3102         struct trace_array *tr = inode->i_private;
3103         struct seq_file *m = file->private_data;
3104         struct trace_iterator *iter;
3105         int cpu;
3106
3107         if (!(file->f_mode & FMODE_READ)) {
3108                 trace_array_put(tr);
3109                 return 0;
3110         }
3111
3112         /* Writes do not use seq_file */
3113         iter = m->private;
3114         mutex_lock(&trace_types_lock);
3115
3116         for_each_tracing_cpu(cpu) {
3117                 if (iter->buffer_iter[cpu])
3118                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3119         }
3120
3121         if (iter->trace && iter->trace->close)
3122                 iter->trace->close(iter);
3123
3124         if (!iter->snapshot)
3125                 /* reenable tracing if it was previously enabled */
3126                 tracing_start_tr(tr);
3127
3128         __trace_array_put(tr);
3129
3130         mutex_unlock(&trace_types_lock);
3131
3132         mutex_destroy(&iter->mutex);
3133         free_cpumask_var(iter->started);
3134         kfree(iter->trace);
3135         kfree(iter->buffer_iter);
3136         seq_release_private(inode, file);
3137
3138         return 0;
3139 }
3140
3141 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3142 {
3143         struct trace_array *tr = inode->i_private;
3144
3145         trace_array_put(tr);
3146         return 0;
3147 }
3148
3149 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3150 {
3151         struct trace_array *tr = inode->i_private;
3152
3153         trace_array_put(tr);
3154
3155         return single_release(inode, file);
3156 }
3157
3158 static int tracing_open(struct inode *inode, struct file *file)
3159 {
3160         struct trace_array *tr = inode->i_private;
3161         struct trace_iterator *iter;
3162         int ret = 0;
3163
3164         if (trace_array_get(tr) < 0)
3165                 return -ENODEV;
3166
3167         /* If this file was open for write, then erase contents */
3168         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3169                 int cpu = tracing_get_cpu(inode);
3170
3171                 if (cpu == RING_BUFFER_ALL_CPUS)
3172                         tracing_reset_online_cpus(&tr->trace_buffer);
3173                 else
3174                         tracing_reset(&tr->trace_buffer, cpu);
3175         }
3176
3177         if (file->f_mode & FMODE_READ) {
3178                 iter = __tracing_open(inode, file, false);
3179                 if (IS_ERR(iter))
3180                         ret = PTR_ERR(iter);
3181                 else if (trace_flags & TRACE_ITER_LATENCY_FMT)
3182                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3183         }
3184
3185         if (ret < 0)
3186                 trace_array_put(tr);
3187
3188         return ret;
3189 }
3190
3191 /*
3192  * Some tracers are not suitable for instance buffers.
3193  * A tracer is always available for the global array (toplevel)
3194  * or if it explicitly states that it is.
3195  */
3196 static bool
3197 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3198 {
3199         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3200 }
3201
3202 /* Find the next tracer that this trace array may use */
3203 static struct tracer *
3204 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3205 {
3206         while (t && !trace_ok_for_array(t, tr))
3207                 t = t->next;
3208
3209         return t;
3210 }
3211
3212 static void *
3213 t_next(struct seq_file *m, void *v, loff_t *pos)
3214 {
3215         struct trace_array *tr = m->private;
3216         struct tracer *t = v;
3217
3218         (*pos)++;
3219
3220         if (t)
3221                 t = get_tracer_for_array(tr, t->next);
3222
3223         return t;
3224 }
3225
3226 static void *t_start(struct seq_file *m, loff_t *pos)
3227 {
3228         struct trace_array *tr = m->private;
3229         struct tracer *t;
3230         loff_t l = 0;
3231
3232         mutex_lock(&trace_types_lock);
3233
3234         t = get_tracer_for_array(tr, trace_types);
3235         for (; t && l < *pos; t = t_next(m, t, &l))
3236                         ;
3237
3238         return t;
3239 }
3240
3241 static void t_stop(struct seq_file *m, void *p)
3242 {
3243         mutex_unlock(&trace_types_lock);
3244 }
3245
3246 static int t_show(struct seq_file *m, void *v)
3247 {
3248         struct tracer *t = v;
3249
3250         if (!t)
3251                 return 0;
3252
3253         seq_puts(m, t->name);
3254         if (t->next)
3255                 seq_putc(m, ' ');
3256         else
3257                 seq_putc(m, '\n');
3258
3259         return 0;
3260 }
3261
3262 static const struct seq_operations show_traces_seq_ops = {
3263         .start          = t_start,
3264         .next           = t_next,
3265         .stop           = t_stop,
3266         .show           = t_show,
3267 };
3268
3269 static int show_traces_open(struct inode *inode, struct file *file)
3270 {
3271         struct trace_array *tr = inode->i_private;
3272         struct seq_file *m;
3273         int ret;
3274
3275         if (tracing_disabled)
3276                 return -ENODEV;
3277
3278         ret = seq_open(file, &show_traces_seq_ops);
3279         if (ret)
3280                 return ret;
3281
3282         m = file->private_data;
3283         m->private = tr;
3284
3285         return 0;
3286 }
3287
3288 static ssize_t
3289 tracing_write_stub(struct file *filp, const char __user *ubuf,
3290                    size_t count, loff_t *ppos)
3291 {
3292         return count;
3293 }
3294
3295 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3296 {
3297         int ret;
3298
3299         if (file->f_mode & FMODE_READ)
3300                 ret = seq_lseek(file, offset, whence);
3301         else
3302                 file->f_pos = ret = 0;
3303
3304         return ret;
3305 }
3306
3307 static const struct file_operations tracing_fops = {
3308         .open           = tracing_open,
3309         .read           = seq_read,
3310         .write          = tracing_write_stub,
3311         .llseek         = tracing_lseek,
3312         .release        = tracing_release,
3313 };
3314
3315 static const struct file_operations show_traces_fops = {
3316         .open           = show_traces_open,
3317         .read           = seq_read,
3318         .release        = seq_release,
3319         .llseek         = seq_lseek,
3320 };
3321
3322 /*
3323  * The tracer itself will not take this lock, but still we want
3324  * to provide a consistent cpumask to user-space:
3325  */
3326 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3327
3328 /*
3329  * Temporary storage for the character representation of the
3330  * CPU bitmask (and one more byte for the newline):
3331  */
3332 static char mask_str[NR_CPUS + 1];
3333
3334 static ssize_t
3335 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3336                      size_t count, loff_t *ppos)
3337 {
3338         struct trace_array *tr = file_inode(filp)->i_private;
3339         int len;
3340
3341         mutex_lock(&tracing_cpumask_update_lock);
3342
3343         len = cpumask_scnprintf(mask_str, count, tr->tracing_cpumask);
3344         if (count - len < 2) {
3345                 count = -EINVAL;
3346                 goto out_err;
3347         }
3348         len += sprintf(mask_str + len, "\n");
3349         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3350
3351 out_err:
3352         mutex_unlock(&tracing_cpumask_update_lock);
3353
3354         return count;
3355 }
3356
3357 static ssize_t
3358 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3359                       size_t count, loff_t *ppos)
3360 {
3361         struct trace_array *tr = file_inode(filp)->i_private;
3362         cpumask_var_t tracing_cpumask_new;
3363         int err, cpu;
3364
3365         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3366                 return -ENOMEM;
3367
3368         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3369         if (err)
3370                 goto err_unlock;
3371
3372         mutex_lock(&tracing_cpumask_update_lock);
3373
3374         local_irq_disable();
3375         arch_spin_lock(&tr->max_lock);
3376         for_each_tracing_cpu(cpu) {
3377                 /*
3378                  * Increase/decrease the disabled counter if we are
3379                  * about to flip a bit in the cpumask:
3380                  */
3381                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3382                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3383                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3384                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3385                 }
3386                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3387                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3388                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3389                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3390                 }
3391         }
3392         arch_spin_unlock(&tr->max_lock);
3393         local_irq_enable();
3394
3395         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3396
3397         mutex_unlock(&tracing_cpumask_update_lock);
3398         free_cpumask_var(tracing_cpumask_new);
3399
3400         return count;
3401
3402 err_unlock:
3403         free_cpumask_var(tracing_cpumask_new);
3404
3405         return err;
3406 }
3407
3408 static const struct file_operations tracing_cpumask_fops = {
3409         .open           = tracing_open_generic_tr,
3410         .read           = tracing_cpumask_read,
3411         .write          = tracing_cpumask_write,
3412         .release        = tracing_release_generic_tr,
3413         .llseek         = generic_file_llseek,
3414 };
3415
3416 static int tracing_trace_options_show(struct seq_file *m, void *v)
3417 {
3418         struct tracer_opt *trace_opts;
3419         struct trace_array *tr = m->private;
3420         u32 tracer_flags;
3421         int i;
3422
3423         mutex_lock(&trace_types_lock);
3424         tracer_flags = tr->current_trace->flags->val;
3425         trace_opts = tr->current_trace->flags->opts;
3426
3427         for (i = 0; trace_options[i]; i++) {
3428                 if (trace_flags & (1 << i))
3429                         seq_printf(m, "%s\n", trace_options[i]);
3430                 else
3431                         seq_printf(m, "no%s\n", trace_options[i]);
3432         }
3433
3434         for (i = 0; trace_opts[i].name; i++) {
3435                 if (tracer_flags & trace_opts[i].bit)
3436                         seq_printf(m, "%s\n", trace_opts[i].name);
3437                 else
3438                         seq_printf(m, "no%s\n", trace_opts[i].name);
3439         }
3440         mutex_unlock(&trace_types_lock);
3441
3442         return 0;
3443 }
3444
3445 static int __set_tracer_option(struct trace_array *tr,
3446                                struct tracer_flags *tracer_flags,
3447                                struct tracer_opt *opts, int neg)
3448 {
3449         struct tracer *trace = tr->current_trace;
3450         int ret;
3451
3452         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3453         if (ret)
3454                 return ret;
3455
3456         if (neg)
3457                 tracer_flags->val &= ~opts->bit;
3458         else
3459                 tracer_flags->val |= opts->bit;
3460         return 0;
3461 }
3462
3463 /* Try to assign a tracer specific option */
3464 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3465 {
3466         struct tracer *trace = tr->current_trace;
3467         struct tracer_flags *tracer_flags = trace->flags;
3468         struct tracer_opt *opts = NULL;
3469         int i;
3470
3471         for (i = 0; tracer_flags->opts[i].name; i++) {
3472                 opts = &tracer_flags->opts[i];
3473
3474                 if (strcmp(cmp, opts->name) == 0)
3475                         return __set_tracer_option(tr, trace->flags, opts, neg);
3476         }
3477
3478         return -EINVAL;
3479 }
3480
3481 /* Some tracers require overwrite to stay enabled */
3482 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3483 {
3484         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3485                 return -1;
3486
3487         return 0;
3488 }
3489
3490 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3491 {
3492         /* do nothing if flag is already set */
3493         if (!!(trace_flags & mask) == !!enabled)
3494                 return 0;
3495
3496         /* Give the tracer a chance to approve the change */
3497         if (tr->current_trace->flag_changed)
3498                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3499                         return -EINVAL;
3500
3501         if (enabled)
3502                 trace_flags |= mask;
3503         else
3504                 trace_flags &= ~mask;
3505
3506         if (mask == TRACE_ITER_RECORD_CMD)
3507                 trace_event_enable_cmd_record(enabled);
3508
3509         if (mask == TRACE_ITER_OVERWRITE) {
3510                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3511 #ifdef CONFIG_TRACER_MAX_TRACE
3512                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3513 #endif
3514         }
3515
3516         if (mask == TRACE_ITER_PRINTK)
3517                 trace_printk_start_stop_comm(enabled);
3518
3519         return 0;
3520 }
3521
3522 static int trace_set_options(struct trace_array *tr, char *option)
3523 {
3524         char *cmp;
3525         int neg = 0;
3526         int ret = -ENODEV;
3527         int i;
3528
3529         cmp = strstrip(option);
3530
3531         if (strncmp(cmp, "no", 2) == 0) {
3532                 neg = 1;
3533                 cmp += 2;
3534         }
3535
3536         mutex_lock(&trace_types_lock);
3537
3538         for (i = 0; trace_options[i]; i++) {
3539                 if (strcmp(cmp, trace_options[i]) == 0) {
3540                         ret = set_tracer_flag(tr, 1 << i, !neg);
3541                         break;
3542                 }
3543         }
3544
3545         /* If no option could be set, test the specific tracer options */
3546         if (!trace_options[i])
3547                 ret = set_tracer_option(tr, cmp, neg);
3548
3549         mutex_unlock(&trace_types_lock);
3550
3551         return ret;
3552 }
3553
3554 static ssize_t
3555 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3556                         size_t cnt, loff_t *ppos)
3557 {
3558         struct seq_file *m = filp->private_data;
3559         struct trace_array *tr = m->private;
3560         char buf[64];
3561         int ret;
3562
3563         if (cnt >= sizeof(buf))
3564                 return -EINVAL;
3565
3566         if (copy_from_user(&buf, ubuf, cnt))
3567                 return -EFAULT;
3568
3569         buf[cnt] = 0;
3570
3571         ret = trace_set_options(tr, buf);
3572         if (ret < 0)
3573                 return ret;
3574
3575         *ppos += cnt;
3576
3577         return cnt;
3578 }
3579
3580 static int tracing_trace_options_open(struct inode *inode, struct file *file)
3581 {
3582         struct trace_array *tr = inode->i_private;
3583         int ret;
3584
3585         if (tracing_disabled)
3586                 return -ENODEV;
3587
3588         if (trace_array_get(tr) < 0)
3589                 return -ENODEV;
3590
3591         ret = single_open(file, tracing_trace_options_show, inode->i_private);
3592         if (ret < 0)
3593                 trace_array_put(tr);
3594
3595         return ret;
3596 }
3597
3598 static const struct file_operations tracing_iter_fops = {
3599         .open           = tracing_trace_options_open,
3600         .read           = seq_read,
3601         .llseek         = seq_lseek,
3602         .release        = tracing_single_release_tr,
3603         .write          = tracing_trace_options_write,
3604 };
3605
3606 static const char readme_msg[] =
3607         "tracing mini-HOWTO:\n\n"
3608         "# echo 0 > tracing_on : quick way to disable tracing\n"
3609         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
3610         " Important files:\n"
3611         "  trace\t\t\t- The static contents of the buffer\n"
3612         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
3613         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
3614         "  current_tracer\t- function and latency tracers\n"
3615         "  available_tracers\t- list of configured tracers for current_tracer\n"
3616         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
3617         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
3618         "  trace_clock\t\t-change the clock used to order events\n"
3619         "       local:   Per cpu clock but may not be synced across CPUs\n"
3620         "      global:   Synced across CPUs but slows tracing down.\n"
3621         "     counter:   Not a clock, but just an increment\n"
3622         "      uptime:   Jiffy counter from time of boot\n"
3623         "        perf:   Same clock that perf events use\n"
3624 #ifdef CONFIG_X86_64
3625         "     x86-tsc:   TSC cycle counter\n"
3626 #endif
3627         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
3628         "  tracing_cpumask\t- Limit which CPUs to trace\n"
3629         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
3630         "\t\t\t  Remove sub-buffer with rmdir\n"
3631         "  trace_options\t\t- Set format or modify how tracing happens\n"
3632         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
3633         "\t\t\t  option name\n"
3634         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
3635 #ifdef CONFIG_DYNAMIC_FTRACE
3636         "\n  available_filter_functions - list of functions that can be filtered on\n"
3637         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
3638         "\t\t\t  functions\n"
3639         "\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3640         "\t     modules: Can select a group via module\n"
3641         "\t      Format: :mod:<module-name>\n"
3642         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
3643         "\t    triggers: a command to perform when function is hit\n"
3644         "\t      Format: <function>:<trigger>[:count]\n"
3645         "\t     trigger: traceon, traceoff\n"
3646         "\t\t      enable_event:<system>:<event>\n"
3647         "\t\t      disable_event:<system>:<event>\n"
3648 #ifdef CONFIG_STACKTRACE
3649         "\t\t      stacktrace\n"
3650 #endif
3651 #ifdef CONFIG_TRACER_SNAPSHOT
3652         "\t\t      snapshot\n"
3653 #endif
3654         "\t\t      dump\n"
3655         "\t\t      cpudump\n"
3656         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
3657         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
3658         "\t     The first one will disable tracing every time do_fault is hit\n"
3659         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
3660         "\t       The first time do trap is hit and it disables tracing, the\n"
3661         "\t       counter will decrement to 2. If tracing is already disabled,\n"
3662         "\t       the counter will not decrement. It only decrements when the\n"
3663         "\t       trigger did work\n"
3664         "\t     To remove trigger without count:\n"
3665         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
3666         "\t     To remove trigger with a count:\n"
3667         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
3668         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
3669         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3670         "\t    modules: Can select a group via module command :mod:\n"
3671         "\t    Does not accept triggers\n"
3672 #endif /* CONFIG_DYNAMIC_FTRACE */
3673 #ifdef CONFIG_FUNCTION_TRACER
3674         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
3675         "\t\t    (function)\n"
3676 #endif
3677 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
3678         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
3679         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
3680         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
3681 #endif
3682 #ifdef CONFIG_TRACER_SNAPSHOT
3683         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
3684         "\t\t\t  snapshot buffer. Read the contents for more\n"
3685         "\t\t\t  information\n"
3686 #endif
3687 #ifdef CONFIG_STACK_TRACER
3688         "  stack_trace\t\t- Shows the max stack trace when active\n"
3689         "  stack_max_size\t- Shows current max stack size that was traced\n"
3690         "\t\t\t  Write into this file to reset the max size (trigger a\n"
3691         "\t\t\t  new trace)\n"
3692 #ifdef CONFIG_DYNAMIC_FTRACE
3693         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
3694         "\t\t\t  traces\n"
3695 #endif
3696 #endif /* CONFIG_STACK_TRACER */
3697         "  events/\t\t- Directory containing all trace event subsystems:\n"
3698         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
3699         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
3700         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
3701         "\t\t\t  events\n"
3702         "      filter\t\t- If set, only events passing filter are traced\n"
3703         "  events/<system>/<event>/\t- Directory containing control files for\n"
3704         "\t\t\t  <event>:\n"
3705         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
3706         "      filter\t\t- If set, only events passing filter are traced\n"
3707         "      trigger\t\t- If set, a command to perform when event is hit\n"
3708         "\t    Format: <trigger>[:count][if <filter>]\n"
3709         "\t   trigger: traceon, traceoff\n"
3710         "\t            enable_event:<system>:<event>\n"
3711         "\t            disable_event:<system>:<event>\n"
3712 #ifdef CONFIG_STACKTRACE
3713         "\t\t    stacktrace\n"
3714 #endif
3715 #ifdef CONFIG_TRACER_SNAPSHOT
3716         "\t\t    snapshot\n"
3717 #endif
3718         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
3719         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
3720         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
3721         "\t                  events/block/block_unplug/trigger\n"
3722         "\t   The first disables tracing every time block_unplug is hit.\n"
3723         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
3724         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
3725         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
3726         "\t   Like function triggers, the counter is only decremented if it\n"
3727         "\t    enabled or disabled tracing.\n"
3728         "\t   To remove a trigger without a count:\n"
3729         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
3730         "\t   To remove a trigger with a count:\n"
3731         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
3732         "\t   Filters can be ignored when removing a trigger.\n"
3733 ;
3734
3735 static ssize_t
3736 tracing_readme_read(struct file *filp, char __user *ubuf,
3737                        size_t cnt, loff_t *ppos)
3738 {
3739         return simple_read_from_buffer(ubuf, cnt, ppos,
3740                                         readme_msg, strlen(readme_msg));
3741 }
3742
3743 static const struct file_operations tracing_readme_fops = {
3744         .open           = tracing_open_generic,
3745         .read           = tracing_readme_read,
3746         .llseek         = generic_file_llseek,
3747 };
3748
3749 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
3750 {
3751         unsigned int *ptr = v;
3752
3753         if (*pos || m->count)
3754                 ptr++;
3755
3756         (*pos)++;
3757
3758         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
3759              ptr++) {
3760                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
3761                         continue;
3762
3763                 return ptr;
3764         }
3765
3766         return NULL;
3767 }
3768
3769 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
3770 {
3771         void *v;
3772         loff_t l = 0;
3773
3774         preempt_disable();
3775         arch_spin_lock(&trace_cmdline_lock);
3776
3777         v = &savedcmd->map_cmdline_to_pid[0];
3778         while (l <= *pos) {
3779                 v = saved_cmdlines_next(m, v, &l);
3780                 if (!v)
3781                         return NULL;
3782         }
3783
3784         return v;
3785 }
3786
3787 static void saved_cmdlines_stop(struct seq_file *m, void *v)
3788 {
3789         arch_spin_unlock(&trace_cmdline_lock);
3790         preempt_enable();
3791 }
3792
3793 static int saved_cmdlines_show(struct seq_file *m, void *v)
3794 {
3795         char buf[TASK_COMM_LEN];
3796         unsigned int *pid = v;
3797
3798         __trace_find_cmdline(*pid, buf);
3799         seq_printf(m, "%d %s\n", *pid, buf);
3800         return 0;
3801 }
3802
3803 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
3804         .start          = saved_cmdlines_start,
3805         .next           = saved_cmdlines_next,
3806         .stop           = saved_cmdlines_stop,
3807         .show           = saved_cmdlines_show,
3808 };
3809
3810 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
3811 {
3812         if (tracing_disabled)
3813                 return -ENODEV;
3814
3815         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
3816 }
3817
3818 static const struct file_operations tracing_saved_cmdlines_fops = {
3819         .open           = tracing_saved_cmdlines_open,
3820         .read           = seq_read,
3821         .llseek         = seq_lseek,
3822         .release        = seq_release,
3823 };
3824
3825 static ssize_t
3826 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
3827                                  size_t cnt, loff_t *ppos)
3828 {
3829         char buf[64];
3830         int r;
3831
3832         arch_spin_lock(&trace_cmdline_lock);
3833         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
3834         arch_spin_unlock(&trace_cmdline_lock);
3835
3836         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3837 }
3838
3839 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
3840 {
3841         kfree(s->saved_cmdlines);
3842         kfree(s->map_cmdline_to_pid);
3843         kfree(s);
3844 }
3845
3846 static int tracing_resize_saved_cmdlines(unsigned int val)
3847 {
3848         struct saved_cmdlines_buffer *s, *savedcmd_temp;
3849
3850         s = kmalloc(sizeof(*s), GFP_KERNEL);
3851         if (!s)
3852                 return -ENOMEM;
3853
3854         if (allocate_cmdlines_buffer(val, s) < 0) {
3855                 kfree(s);
3856                 return -ENOMEM;
3857         }
3858
3859         arch_spin_lock(&trace_cmdline_lock);
3860         savedcmd_temp = savedcmd;
3861         savedcmd = s;
3862         arch_spin_unlock(&trace_cmdline_lock);
3863         free_saved_cmdlines_buffer(savedcmd_temp);
3864
3865         return 0;
3866 }
3867
3868 static ssize_t
3869 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
3870                                   size_t cnt, loff_t *ppos)
3871 {
3872         unsigned long val;
3873         int ret;
3874
3875         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
3876         if (ret)
3877                 return ret;
3878
3879         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
3880         if (!val || val > PID_MAX_DEFAULT)
3881                 return -EINVAL;
3882
3883         ret = tracing_resize_saved_cmdlines((unsigned int)val);
3884         if (ret < 0)
3885                 return ret;
3886
3887         *ppos += cnt;
3888
3889         return cnt;
3890 }
3891
3892 static const struct file_operations tracing_saved_cmdlines_size_fops = {
3893         .open           = tracing_open_generic,
3894         .read           = tracing_saved_cmdlines_size_read,
3895         .write          = tracing_saved_cmdlines_size_write,
3896 };
3897
3898 static ssize_t
3899 tracing_set_trace_read(struct file *filp, char __user *ubuf,
3900                        size_t cnt, loff_t *ppos)
3901 {
3902         struct trace_array *tr = filp->private_data;
3903         char buf[MAX_TRACER_SIZE+2];
3904         int r;
3905
3906         mutex_lock(&trace_types_lock);
3907         r = sprintf(buf, "%s\n", tr->current_trace->name);
3908         mutex_unlock(&trace_types_lock);
3909
3910         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3911 }
3912
3913 int tracer_init(struct tracer *t, struct trace_array *tr)
3914 {
3915         tracing_reset_online_cpus(&tr->trace_buffer);
3916         return t->init(tr);
3917 }
3918
3919 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
3920 {
3921         int cpu;
3922
3923         for_each_tracing_cpu(cpu)
3924                 per_cpu_ptr(buf->data, cpu)->entries = val;
3925 }
3926
3927 #ifdef CONFIG_TRACER_MAX_TRACE
3928 /* resize @tr's buffer to the size of @size_tr's entries */
3929 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
3930                                         struct trace_buffer *size_buf, int cpu_id)
3931 {
3932         int cpu, ret = 0;
3933
3934         if (cpu_id == RING_BUFFER_ALL_CPUS) {
3935                 for_each_tracing_cpu(cpu) {
3936                         ret = ring_buffer_resize(trace_buf->buffer,
3937                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
3938                         if (ret < 0)
3939                                 break;
3940                         per_cpu_ptr(trace_buf->data, cpu)->entries =
3941                                 per_cpu_ptr(size_buf->data, cpu)->entries;
3942                 }
3943         } else {
3944                 ret = ring_buffer_resize(trace_buf->buffer,
3945                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
3946                 if (ret == 0)
3947                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
3948                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
3949         }
3950
3951         return ret;
3952 }
3953 #endif /* CONFIG_TRACER_MAX_TRACE */
3954
3955 static int __tracing_resize_ring_buffer(struct trace_array *tr,
3956                                         unsigned long size, int cpu)
3957 {
3958         int ret;
3959
3960         /*
3961          * If kernel or user changes the size of the ring buffer
3962          * we use the size that was given, and we can forget about
3963          * expanding it later.
3964          */
3965         ring_buffer_expanded = true;
3966
3967         /* May be called before buffers are initialized */
3968         if (!tr->trace_buffer.buffer)
3969                 return 0;
3970
3971         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
3972         if (ret < 0)
3973                 return ret;
3974
3975 #ifdef CONFIG_TRACER_MAX_TRACE
3976         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
3977             !tr->current_trace->use_max_tr)
3978                 goto out;
3979
3980         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
3981         if (ret < 0) {
3982                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
3983                                                      &tr->trace_buffer, cpu);
3984                 if (r < 0) {
3985                         /*
3986                          * AARGH! We are left with different
3987                          * size max buffer!!!!
3988                          * The max buffer is our "snapshot" buffer.
3989                          * When a tracer needs a snapshot (one of the
3990                          * latency tracers), it swaps the max buffer
3991                          * with the saved snap shot. We succeeded to
3992                          * update the size of the main buffer, but failed to
3993                          * update the size of the max buffer. But when we tried
3994                          * to reset the main buffer to the original size, we
3995                          * failed there too. This is very unlikely to
3996                          * happen, but if it does, warn and kill all
3997                          * tracing.
3998                          */
3999                         WARN_ON(1);
4000                         tracing_disabled = 1;
4001                 }
4002                 return ret;
4003         }
4004
4005         if (cpu == RING_BUFFER_ALL_CPUS)
4006                 set_buffer_entries(&tr->max_buffer, size);
4007         else
4008                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4009
4010  out:
4011 #endif /* CONFIG_TRACER_MAX_TRACE */
4012
4013         if (cpu == RING_BUFFER_ALL_CPUS)
4014                 set_buffer_entries(&tr->trace_buffer, size);
4015         else
4016                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4017
4018         return ret;
4019 }
4020
4021 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4022                                           unsigned long size, int cpu_id)
4023 {
4024         int ret = size;
4025
4026         mutex_lock(&trace_types_lock);
4027
4028         if (cpu_id != RING_BUFFER_ALL_CPUS) {
4029                 /* make sure, this cpu is enabled in the mask */
4030                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4031                         ret = -EINVAL;
4032                         goto out;
4033                 }
4034         }
4035
4036         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4037         if (ret < 0)
4038                 ret = -ENOMEM;
4039
4040 out:
4041         mutex_unlock(&trace_types_lock);
4042
4043         return ret;
4044 }
4045
4046
4047 /**
4048  * tracing_update_buffers - used by tracing facility to expand ring buffers
4049  *
4050  * To save on memory when the tracing is never used on a system with it
4051  * configured in. The ring buffers are set to a minimum size. But once
4052  * a user starts to use the tracing facility, then they need to grow
4053  * to their default size.
4054  *
4055  * This function is to be called when a tracer is about to be used.
4056  */
4057 int tracing_update_buffers(void)
4058 {
4059         int ret = 0;
4060
4061         mutex_lock(&trace_types_lock);
4062         if (!ring_buffer_expanded)
4063                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4064                                                 RING_BUFFER_ALL_CPUS);
4065         mutex_unlock(&trace_types_lock);
4066
4067         return ret;
4068 }
4069
4070 struct trace_option_dentry;
4071
4072 static struct trace_option_dentry *
4073 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4074
4075 static void
4076 destroy_trace_option_files(struct trace_option_dentry *topts);
4077
4078 /*
4079  * Used to clear out the tracer before deletion of an instance.
4080  * Must have trace_types_lock held.
4081  */
4082 static void tracing_set_nop(struct trace_array *tr)
4083 {
4084         if (tr->current_trace == &nop_trace)
4085                 return;
4086         
4087         tr->current_trace->enabled--;
4088
4089         if (tr->current_trace->reset)
4090                 tr->current_trace->reset(tr);
4091
4092         tr->current_trace = &nop_trace;
4093 }
4094
4095 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
4096 {
4097         static struct trace_option_dentry *topts;
4098         struct tracer *t;
4099 #ifdef CONFIG_TRACER_MAX_TRACE
4100         bool had_max_tr;
4101 #endif
4102         int ret = 0;
4103
4104         mutex_lock(&trace_types_lock);
4105
4106         if (!ring_buffer_expanded) {
4107                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
4108                                                 RING_BUFFER_ALL_CPUS);
4109                 if (ret < 0)
4110                         goto out;
4111                 ret = 0;
4112         }
4113
4114         for (t = trace_types; t; t = t->next) {
4115                 if (strcmp(t->name, buf) == 0)
4116                         break;
4117         }
4118         if (!t) {
4119                 ret = -EINVAL;
4120                 goto out;
4121         }
4122         if (t == tr->current_trace)
4123                 goto out;
4124
4125         /* Some tracers are only allowed for the top level buffer */
4126         if (!trace_ok_for_array(t, tr)) {
4127                 ret = -EINVAL;
4128                 goto out;
4129         }
4130
4131         trace_branch_disable();
4132
4133         tr->current_trace->enabled--;
4134
4135         if (tr->current_trace->reset)
4136                 tr->current_trace->reset(tr);
4137
4138         /* Current trace needs to be nop_trace before synchronize_sched */
4139         tr->current_trace = &nop_trace;
4140
4141 #ifdef CONFIG_TRACER_MAX_TRACE
4142         had_max_tr = tr->allocated_snapshot;
4143
4144         if (had_max_tr && !t->use_max_tr) {
4145                 /*
4146                  * We need to make sure that the update_max_tr sees that
4147                  * current_trace changed to nop_trace to keep it from
4148                  * swapping the buffers after we resize it.
4149                  * The update_max_tr is called from interrupts disabled
4150                  * so a synchronized_sched() is sufficient.
4151                  */
4152                 synchronize_sched();
4153                 free_snapshot(tr);
4154         }
4155 #endif
4156         /* Currently, only the top instance has options */
4157         if (tr->flags & TRACE_ARRAY_FL_GLOBAL) {
4158                 destroy_trace_option_files(topts);
4159                 topts = create_trace_option_files(tr, t);
4160         }
4161
4162 #ifdef CONFIG_TRACER_MAX_TRACE
4163         if (t->use_max_tr && !had_max_tr) {
4164                 ret = alloc_snapshot(tr);
4165                 if (ret < 0)
4166                         goto out;
4167         }
4168 #endif
4169
4170         if (t->init) {
4171                 ret = tracer_init(t, tr);
4172                 if (ret)
4173                         goto out;
4174         }
4175
4176         tr->current_trace = t;
4177         tr->current_trace->enabled++;
4178         trace_branch_enable(tr);
4179  out:
4180         mutex_unlock(&trace_types_lock);
4181
4182         return ret;
4183 }
4184
4185 static ssize_t
4186 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
4187                         size_t cnt, loff_t *ppos)
4188 {
4189         struct trace_array *tr = filp->private_data;
4190         char buf[MAX_TRACER_SIZE+1];
4191         int i;
4192         size_t ret;
4193         int err;
4194
4195         ret = cnt;
4196
4197         if (cnt > MAX_TRACER_SIZE)
4198                 cnt = MAX_TRACER_SIZE;
4199
4200         if (copy_from_user(&buf, ubuf, cnt))
4201                 return -EFAULT;
4202
4203         buf[cnt] = 0;
4204
4205         /* strip ending whitespace. */
4206         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4207                 buf[i] = 0;
4208
4209         err = tracing_set_tracer(tr, buf);
4210         if (err)
4211                 return err;
4212
4213         *ppos += ret;
4214
4215         return ret;
4216 }
4217
4218 static ssize_t
4219 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
4220                    size_t cnt, loff_t *ppos)
4221 {
4222         char buf[64];
4223         int r;
4224
4225         r = snprintf(buf, sizeof(buf), "%ld\n",
4226                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4227         if (r > sizeof(buf))
4228                 r = sizeof(buf);
4229         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4230 }
4231
4232 static ssize_t
4233 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
4234                     size_t cnt, loff_t *ppos)
4235 {
4236         unsigned long val;
4237         int ret;
4238
4239         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4240         if (ret)
4241                 return ret;
4242
4243         *ptr = val * 1000;
4244
4245         return cnt;
4246 }
4247
4248 static ssize_t
4249 tracing_thresh_read(struct file *filp, char __user *ubuf,
4250                     size_t cnt, loff_t *ppos)
4251 {
4252         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
4253 }
4254
4255 static ssize_t
4256 tracing_thresh_write(struct file *filp, const char __user *ubuf,
4257                      size_t cnt, loff_t *ppos)
4258 {
4259         struct trace_array *tr = filp->private_data;
4260         int ret;
4261
4262         mutex_lock(&trace_types_lock);
4263         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
4264         if (ret < 0)
4265                 goto out;
4266
4267         if (tr->current_trace->update_thresh) {
4268                 ret = tr->current_trace->update_thresh(tr);
4269                 if (ret < 0)
4270                         goto out;
4271         }
4272
4273         ret = cnt;
4274 out:
4275         mutex_unlock(&trace_types_lock);
4276
4277         return ret;
4278 }
4279
4280 static ssize_t
4281 tracing_max_lat_read(struct file *filp, char __user *ubuf,
4282                      size_t cnt, loff_t *ppos)
4283 {
4284         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
4285 }
4286
4287 static ssize_t
4288 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4289                       size_t cnt, loff_t *ppos)
4290 {
4291         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
4292 }
4293
4294 static int tracing_open_pipe(struct inode *inode, struct file *filp)
4295 {
4296         struct trace_array *tr = inode->i_private;
4297         struct trace_iterator *iter;
4298         int ret = 0;
4299
4300         if (tracing_disabled)
4301                 return -ENODEV;
4302
4303         if (trace_array_get(tr) < 0)
4304                 return -ENODEV;
4305
4306         mutex_lock(&trace_types_lock);
4307
4308         /* create a buffer to store the information to pass to userspace */
4309         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4310         if (!iter) {
4311                 ret = -ENOMEM;
4312                 __trace_array_put(tr);
4313                 goto out;
4314         }
4315
4316         /*
4317          * We make a copy of the current tracer to avoid concurrent
4318          * changes on it while we are reading.
4319          */
4320         iter->trace = kmalloc(sizeof(*iter->trace), GFP_KERNEL);
4321         if (!iter->trace) {
4322                 ret = -ENOMEM;
4323                 goto fail;
4324         }
4325         *iter->trace = *tr->current_trace;
4326
4327         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
4328                 ret = -ENOMEM;
4329                 goto fail;
4330         }
4331
4332         /* trace pipe does not show start of buffer */
4333         cpumask_setall(iter->started);
4334
4335         if (trace_flags & TRACE_ITER_LATENCY_FMT)
4336                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4337
4338         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4339         if (trace_clocks[tr->clock_id].in_ns)
4340                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4341
4342         iter->tr = tr;
4343         iter->trace_buffer = &tr->trace_buffer;
4344         iter->cpu_file = tracing_get_cpu(inode);
4345         mutex_init(&iter->mutex);
4346         filp->private_data = iter;
4347
4348         if (iter->trace->pipe_open)
4349                 iter->trace->pipe_open(iter);
4350
4351         nonseekable_open(inode, filp);
4352 out:
4353         mutex_unlock(&trace_types_lock);
4354         return ret;
4355
4356 fail:
4357         kfree(iter->trace);
4358         kfree(iter);
4359         __trace_array_put(tr);
4360         mutex_unlock(&trace_types_lock);
4361         return ret;
4362 }
4363
4364 static int tracing_release_pipe(struct inode *inode, struct file *file)
4365 {
4366         struct trace_iterator *iter = file->private_data;
4367         struct trace_array *tr = inode->i_private;
4368
4369         mutex_lock(&trace_types_lock);
4370
4371         if (iter->trace->pipe_close)
4372                 iter->trace->pipe_close(iter);
4373
4374         mutex_unlock(&trace_types_lock);
4375
4376         free_cpumask_var(iter->started);
4377         mutex_destroy(&iter->mutex);
4378         kfree(iter->trace);
4379         kfree(iter);
4380
4381         trace_array_put(tr);
4382
4383         return 0;
4384 }
4385
4386 static unsigned int
4387 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
4388 {
4389         /* Iterators are static, they should be filled or empty */
4390         if (trace_buffer_iter(iter, iter->cpu_file))
4391                 return POLLIN | POLLRDNORM;
4392
4393         if (trace_flags & TRACE_ITER_BLOCK)
4394                 /*
4395                  * Always select as readable when in blocking mode
4396                  */
4397                 return POLLIN | POLLRDNORM;
4398         else
4399                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
4400                                              filp, poll_table);
4401 }
4402
4403 static unsigned int
4404 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
4405 {
4406         struct trace_iterator *iter = filp->private_data;
4407
4408         return trace_poll(iter, filp, poll_table);
4409 }
4410
4411 /* Must be called with trace_types_lock mutex held. */
4412 static int tracing_wait_pipe(struct file *filp)
4413 {
4414         struct trace_iterator *iter = filp->private_data;
4415         int ret;
4416
4417         while (trace_empty(iter)) {
4418
4419                 if ((filp->f_flags & O_NONBLOCK)) {
4420                         return -EAGAIN;
4421                 }
4422
4423                 /*
4424                  * We block until we read something and tracing is disabled.
4425                  * We still block if tracing is disabled, but we have never
4426                  * read anything. This allows a user to cat this file, and
4427                  * then enable tracing. But after we have read something,
4428                  * we give an EOF when tracing is again disabled.
4429                  *
4430                  * iter->pos will be 0 if we haven't read anything.
4431                  */
4432                 if (!tracing_is_on() && iter->pos)
4433                         break;
4434
4435                 mutex_unlock(&iter->mutex);
4436
4437                 ret = wait_on_pipe(iter, false);
4438
4439                 mutex_lock(&iter->mutex);
4440
4441                 if (ret)
4442                         return ret;
4443         }
4444
4445         return 1;
4446 }
4447
4448 /*
4449  * Consumer reader.
4450  */
4451 static ssize_t
4452 tracing_read_pipe(struct file *filp, char __user *ubuf,
4453                   size_t cnt, loff_t *ppos)
4454 {
4455         struct trace_iterator *iter = filp->private_data;
4456         struct trace_array *tr = iter->tr;
4457         ssize_t sret;
4458
4459         /* return any leftover data */
4460         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4461         if (sret != -EBUSY)
4462                 return sret;
4463
4464         trace_seq_init(&iter->seq);
4465
4466         /* copy the tracer to avoid using a global lock all around */
4467         mutex_lock(&trace_types_lock);
4468         if (unlikely(iter->trace->name != tr->current_trace->name))
4469                 *iter->trace = *tr->current_trace;
4470         mutex_unlock(&trace_types_lock);
4471
4472         /*
4473          * Avoid more than one consumer on a single file descriptor
4474          * This is just a matter of traces coherency, the ring buffer itself
4475          * is protected.
4476          */
4477         mutex_lock(&iter->mutex);
4478         if (iter->trace->read) {
4479                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
4480                 if (sret)
4481                         goto out;
4482         }
4483
4484 waitagain:
4485         sret = tracing_wait_pipe(filp);
4486         if (sret <= 0)
4487                 goto out;
4488
4489         /* stop when tracing is finished */
4490         if (trace_empty(iter)) {
4491                 sret = 0;
4492                 goto out;
4493         }
4494
4495         if (cnt >= PAGE_SIZE)
4496                 cnt = PAGE_SIZE - 1;
4497
4498         /* reset all but tr, trace, and overruns */
4499         memset(&iter->seq, 0,
4500                sizeof(struct trace_iterator) -
4501                offsetof(struct trace_iterator, seq));
4502         cpumask_clear(iter->started);
4503         iter->pos = -1;
4504
4505         trace_event_read_lock();
4506         trace_access_lock(iter->cpu_file);
4507         while (trace_find_next_entry_inc(iter) != NULL) {
4508                 enum print_line_t ret;
4509                 int len = iter->seq.len;
4510
4511                 ret = print_trace_line(iter);
4512                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4513                         /* don't print partial lines */
4514                         iter->seq.len = len;
4515                         break;
4516                 }
4517                 if (ret != TRACE_TYPE_NO_CONSUME)
4518                         trace_consume(iter);
4519
4520                 if (iter->seq.len >= cnt)
4521                         break;
4522
4523                 /*
4524                  * Setting the full flag means we reached the trace_seq buffer
4525                  * size and we should leave by partial output condition above.
4526                  * One of the trace_seq_* functions is not used properly.
4527                  */
4528                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
4529                           iter->ent->type);
4530         }
4531         trace_access_unlock(iter->cpu_file);
4532         trace_event_read_unlock();
4533
4534         /* Now copy what we have to the user */
4535         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4536         if (iter->seq.readpos >= iter->seq.len)
4537                 trace_seq_init(&iter->seq);
4538
4539         /*
4540          * If there was nothing to send to user, in spite of consuming trace
4541          * entries, go back to wait for more entries.
4542          */
4543         if (sret == -EBUSY)
4544                 goto waitagain;
4545
4546 out:
4547         mutex_unlock(&iter->mutex);
4548
4549         return sret;
4550 }
4551
4552 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
4553                                      unsigned int idx)
4554 {
4555         __free_page(spd->pages[idx]);
4556 }
4557
4558 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
4559         .can_merge              = 0,
4560         .confirm                = generic_pipe_buf_confirm,
4561         .release                = generic_pipe_buf_release,
4562         .steal                  = generic_pipe_buf_steal,
4563         .get                    = generic_pipe_buf_get,
4564 };
4565
4566 static size_t
4567 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
4568 {
4569         size_t count;
4570         int ret;
4571
4572         /* Seq buffer is page-sized, exactly what we need. */
4573         for (;;) {
4574                 count = iter->seq.len;
4575                 ret = print_trace_line(iter);
4576                 count = iter->seq.len - count;
4577                 if (rem < count) {
4578                         rem = 0;
4579                         iter->seq.len -= count;
4580                         break;
4581                 }
4582                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4583                         iter->seq.len -= count;
4584                         break;
4585                 }
4586
4587                 if (ret != TRACE_TYPE_NO_CONSUME)
4588                         trace_consume(iter);
4589                 rem -= count;
4590                 if (!trace_find_next_entry_inc(iter))   {
4591                         rem = 0;
4592                         iter->ent = NULL;
4593                         break;
4594                 }
4595         }
4596
4597         return rem;
4598 }
4599
4600 static ssize_t tracing_splice_read_pipe(struct file *filp,
4601                                         loff_t *ppos,
4602                                         struct pipe_inode_info *pipe,
4603                                         size_t len,
4604                                         unsigned int flags)
4605 {
4606         struct page *pages_def[PIPE_DEF_BUFFERS];
4607         struct partial_page partial_def[PIPE_DEF_BUFFERS];
4608         struct trace_iterator *iter = filp->private_data;
4609         struct splice_pipe_desc spd = {
4610                 .pages          = pages_def,
4611                 .partial        = partial_def,
4612                 .nr_pages       = 0, /* This gets updated below. */
4613                 .nr_pages_max   = PIPE_DEF_BUFFERS,
4614                 .flags          = flags,
4615                 .ops            = &tracing_pipe_buf_ops,
4616                 .spd_release    = tracing_spd_release_pipe,
4617         };
4618         struct trace_array *tr = iter->tr;
4619         ssize_t ret;
4620         size_t rem;
4621         unsigned int i;
4622
4623         if (splice_grow_spd(pipe, &spd))
4624                 return -ENOMEM;
4625
4626         /* copy the tracer to avoid using a global lock all around */
4627         mutex_lock(&trace_types_lock);
4628         if (unlikely(iter->trace->name != tr->current_trace->name))
4629                 *iter->trace = *tr->current_trace;
4630         mutex_unlock(&trace_types_lock);
4631
4632         mutex_lock(&iter->mutex);
4633
4634         if (iter->trace->splice_read) {
4635                 ret = iter->trace->splice_read(iter, filp,
4636                                                ppos, pipe, len, flags);
4637                 if (ret)
4638                         goto out_err;
4639         }
4640
4641         ret = tracing_wait_pipe(filp);
4642         if (ret <= 0)
4643                 goto out_err;
4644
4645         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
4646                 ret = -EFAULT;
4647                 goto out_err;
4648         }
4649
4650         trace_event_read_lock();
4651         trace_access_lock(iter->cpu_file);
4652
4653         /* Fill as many pages as possible. */
4654         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
4655                 spd.pages[i] = alloc_page(GFP_KERNEL);
4656                 if (!spd.pages[i])
4657                         break;
4658
4659                 rem = tracing_fill_pipe_page(rem, iter);
4660
4661                 /* Copy the data into the page, so we can start over. */
4662                 ret = trace_seq_to_buffer(&iter->seq,
4663                                           page_address(spd.pages[i]),
4664                                           iter->seq.len);
4665                 if (ret < 0) {
4666                         __free_page(spd.pages[i]);
4667                         break;
4668                 }
4669                 spd.partial[i].offset = 0;
4670                 spd.partial[i].len = iter->seq.len;
4671
4672                 trace_seq_init(&iter->seq);
4673         }
4674
4675         trace_access_unlock(iter->cpu_file);
4676         trace_event_read_unlock();
4677         mutex_unlock(&iter->mutex);
4678
4679         spd.nr_pages = i;
4680
4681         ret = splice_to_pipe(pipe, &spd);
4682 out:
4683         splice_shrink_spd(&spd);
4684         return ret;
4685
4686 out_err:
4687         mutex_unlock(&iter->mutex);
4688         goto out;
4689 }
4690
4691 static ssize_t
4692 tracing_entries_read(struct file *filp, char __user *ubuf,
4693                      size_t cnt, loff_t *ppos)
4694 {
4695         struct inode *inode = file_inode(filp);
4696         struct trace_array *tr = inode->i_private;
4697         int cpu = tracing_get_cpu(inode);
4698         char buf[64];
4699         int r = 0;
4700         ssize_t ret;
4701
4702         mutex_lock(&trace_types_lock);
4703
4704         if (cpu == RING_BUFFER_ALL_CPUS) {
4705                 int cpu, buf_size_same;
4706                 unsigned long size;
4707
4708                 size = 0;
4709                 buf_size_same = 1;
4710                 /* check if all cpu sizes are same */
4711                 for_each_tracing_cpu(cpu) {
4712                         /* fill in the size from first enabled cpu */
4713                         if (size == 0)
4714                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
4715                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
4716                                 buf_size_same = 0;
4717                                 break;
4718                         }
4719                 }
4720
4721                 if (buf_size_same) {
4722                         if (!ring_buffer_expanded)
4723                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
4724                                             size >> 10,
4725                                             trace_buf_size >> 10);
4726                         else
4727                                 r = sprintf(buf, "%lu\n", size >> 10);
4728                 } else
4729                         r = sprintf(buf, "X\n");
4730         } else
4731                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
4732
4733         mutex_unlock(&trace_types_lock);
4734
4735         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4736         return ret;
4737 }
4738
4739 static ssize_t
4740 tracing_entries_write(struct file *filp, const char __user *ubuf,
4741                       size_t cnt, loff_t *ppos)
4742 {
4743         struct inode *inode = file_inode(filp);
4744         struct trace_array *tr = inode->i_private;
4745         unsigned long val;
4746         int ret;
4747
4748         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4749         if (ret)
4750                 return ret;
4751
4752         /* must have at least 1 entry */
4753         if (!val)
4754                 return -EINVAL;
4755
4756         /* value is in KB */
4757         val <<= 10;
4758         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
4759         if (ret < 0)
4760                 return ret;
4761
4762         *ppos += cnt;
4763
4764         return cnt;
4765 }
4766
4767 static ssize_t
4768 tracing_total_entries_read(struct file *filp, char __user *ubuf,
4769                                 size_t cnt, loff_t *ppos)
4770 {
4771         struct trace_array *tr = filp->private_data;
4772         char buf[64];
4773         int r, cpu;
4774         unsigned long size = 0, expanded_size = 0;
4775
4776         mutex_lock(&trace_types_lock);
4777         for_each_tracing_cpu(cpu) {
4778                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
4779                 if (!ring_buffer_expanded)
4780                         expanded_size += trace_buf_size >> 10;
4781         }
4782         if (ring_buffer_expanded)
4783                 r = sprintf(buf, "%lu\n", size);
4784         else
4785                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
4786         mutex_unlock(&trace_types_lock);
4787
4788         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4789 }
4790
4791 static ssize_t
4792 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
4793                           size_t cnt, loff_t *ppos)
4794 {
4795         /*
4796          * There is no need to read what the user has written, this function
4797          * is just to make sure that there is no error when "echo" is used
4798          */
4799
4800         *ppos += cnt;
4801
4802         return cnt;
4803 }
4804
4805 static int
4806 tracing_free_buffer_release(struct inode *inode, struct file *filp)
4807 {
4808         struct trace_array *tr = inode->i_private;
4809
4810         /* disable tracing ? */
4811         if (trace_flags & TRACE_ITER_STOP_ON_FREE)
4812                 tracer_tracing_off(tr);
4813         /* resize the ring buffer to 0 */
4814         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
4815
4816         trace_array_put(tr);
4817
4818         return 0;
4819 }
4820
4821 static ssize_t
4822 tracing_mark_write(struct file *filp, const char __user *ubuf,
4823                                         size_t cnt, loff_t *fpos)
4824 {
4825         unsigned long addr = (unsigned long)ubuf;
4826         struct trace_array *tr = filp->private_data;
4827         struct ring_buffer_event *event;
4828         struct ring_buffer *buffer;
4829         struct print_entry *entry;
4830         unsigned long irq_flags;
4831         struct page *pages[2];
4832         void *map_page[2];
4833         int nr_pages = 1;
4834         ssize_t written;
4835         int offset;
4836         int size;
4837         int len;
4838         int ret;
4839         int i;
4840
4841         if (tracing_disabled)
4842                 return -EINVAL;
4843
4844         if (!(trace_flags & TRACE_ITER_MARKERS))
4845                 return -EINVAL;
4846
4847         if (cnt > TRACE_BUF_SIZE)
4848                 cnt = TRACE_BUF_SIZE;
4849
4850         /*
4851          * Userspace is injecting traces into the kernel trace buffer.
4852          * We want to be as non intrusive as possible.
4853          * To do so, we do not want to allocate any special buffers
4854          * or take any locks, but instead write the userspace data
4855          * straight into the ring buffer.
4856          *
4857          * First we need to pin the userspace buffer into memory,
4858          * which, most likely it is, because it just referenced it.
4859          * But there's no guarantee that it is. By using get_user_pages_fast()
4860          * and kmap_atomic/kunmap_atomic() we can get access to the
4861          * pages directly. We then write the data directly into the
4862          * ring buffer.
4863          */
4864         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
4865
4866         /* check if we cross pages */
4867         if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
4868                 nr_pages = 2;
4869
4870         offset = addr & (PAGE_SIZE - 1);
4871         addr &= PAGE_MASK;
4872
4873         ret = get_user_pages_fast(addr, nr_pages, 0, pages);
4874         if (ret < nr_pages) {
4875                 while (--ret >= 0)
4876                         put_page(pages[ret]);
4877                 written = -EFAULT;
4878                 goto out;
4879         }
4880
4881         for (i = 0; i < nr_pages; i++)
4882                 map_page[i] = kmap_atomic(pages[i]);
4883
4884         local_save_flags(irq_flags);
4885         size = sizeof(*entry) + cnt + 2; /* possible \n added */
4886         buffer = tr->trace_buffer.buffer;
4887         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
4888                                           irq_flags, preempt_count());
4889         if (!event) {
4890                 /* Ring buffer disabled, return as if not open for write */
4891                 written = -EBADF;
4892                 goto out_unlock;
4893         }
4894
4895         entry = ring_buffer_event_data(event);
4896         entry->ip = _THIS_IP_;
4897
4898         if (nr_pages == 2) {
4899                 len = PAGE_SIZE - offset;
4900                 memcpy(&entry->buf, map_page[0] + offset, len);
4901                 memcpy(&entry->buf[len], map_page[1], cnt - len);
4902         } else
4903                 memcpy(&entry->buf, map_page[0] + offset, cnt);
4904
4905         if (entry->buf[cnt - 1] != '\n') {
4906                 entry->buf[cnt] = '\n';
4907                 entry->buf[cnt + 1] = '\0';
4908         } else
4909                 entry->buf[cnt] = '\0';
4910
4911         __buffer_unlock_commit(buffer, event);
4912
4913         written = cnt;
4914
4915         *fpos += written;
4916
4917  out_unlock:
4918         for (i = 0; i < nr_pages; i++){
4919                 kunmap_atomic(map_page[i]);
4920                 put_page(pages[i]);
4921         }
4922  out:
4923         return written;
4924 }
4925
4926 static int tracing_clock_show(struct seq_file *m, void *v)
4927 {
4928         struct trace_array *tr = m->private;
4929         int i;
4930
4931         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
4932                 seq_printf(m,
4933                         "%s%s%s%s", i ? " " : "",
4934                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
4935                         i == tr->clock_id ? "]" : "");
4936         seq_putc(m, '\n');
4937
4938         return 0;
4939 }
4940
4941 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
4942 {
4943         int i;
4944
4945         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
4946                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
4947                         break;
4948         }
4949         if (i == ARRAY_SIZE(trace_clocks))
4950                 return -EINVAL;
4951
4952         mutex_lock(&trace_types_lock);
4953
4954         tr->clock_id = i;
4955
4956         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
4957
4958         /*
4959          * New clock may not be consistent with the previous clock.
4960          * Reset the buffer so that it doesn't have incomparable timestamps.
4961          */
4962         tracing_reset_online_cpus(&tr->trace_buffer);
4963
4964 #ifdef CONFIG_TRACER_MAX_TRACE
4965         if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
4966                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
4967         tracing_reset_online_cpus(&tr->max_buffer);
4968 #endif
4969
4970         mutex_unlock(&trace_types_lock);
4971
4972         return 0;
4973 }
4974
4975 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
4976                                    size_t cnt, loff_t *fpos)
4977 {
4978         struct seq_file *m = filp->private_data;
4979         struct trace_array *tr = m->private;
4980         char buf[64];
4981         const char *clockstr;
4982         int ret;
4983
4984         if (cnt >= sizeof(buf))
4985                 return -EINVAL;
4986
4987         if (copy_from_user(&buf, ubuf, cnt))
4988                 return -EFAULT;
4989
4990         buf[cnt] = 0;
4991
4992         clockstr = strstrip(buf);
4993
4994         ret = tracing_set_clock(tr, clockstr);
4995         if (ret)
4996                 return ret;
4997
4998         *fpos += cnt;
4999
5000         return cnt;
5001 }
5002
5003 static int tracing_clock_open(struct inode *inode, struct file *file)
5004 {
5005         struct trace_array *tr = inode->i_private;
5006         int ret;
5007
5008         if (tracing_disabled)
5009                 return -ENODEV;
5010
5011         if (trace_array_get(tr))
5012                 return -ENODEV;
5013
5014         ret = single_open(file, tracing_clock_show, inode->i_private);
5015         if (ret < 0)
5016                 trace_array_put(tr);
5017
5018         return ret;
5019 }
5020
5021 struct ftrace_buffer_info {
5022         struct trace_iterator   iter;
5023         void                    *spare;
5024         unsigned int            read;
5025 };
5026
5027 #ifdef CONFIG_TRACER_SNAPSHOT
5028 static int tracing_snapshot_open(struct inode *inode, struct file *file)
5029 {
5030         struct trace_array *tr = inode->i_private;
5031         struct trace_iterator *iter;
5032         struct seq_file *m;
5033         int ret = 0;
5034
5035         if (trace_array_get(tr) < 0)
5036                 return -ENODEV;
5037
5038         if (file->f_mode & FMODE_READ) {
5039                 iter = __tracing_open(inode, file, true);
5040                 if (IS_ERR(iter))
5041                         ret = PTR_ERR(iter);
5042         } else {
5043                 /* Writes still need the seq_file to hold the private data */
5044                 ret = -ENOMEM;
5045                 m = kzalloc(sizeof(*m), GFP_KERNEL);
5046                 if (!m)
5047                         goto out;
5048                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5049                 if (!iter) {
5050                         kfree(m);
5051                         goto out;
5052                 }
5053                 ret = 0;
5054
5055                 iter->tr = tr;
5056                 iter->trace_buffer = &tr->max_buffer;
5057                 iter->cpu_file = tracing_get_cpu(inode);
5058                 m->private = iter;
5059                 file->private_data = m;
5060         }
5061 out:
5062         if (ret < 0)
5063                 trace_array_put(tr);
5064
5065         return ret;
5066 }
5067
5068 static ssize_t
5069 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
5070                        loff_t *ppos)
5071 {
5072         struct seq_file *m = filp->private_data;
5073         struct trace_iterator *iter = m->private;
5074         struct trace_array *tr = iter->tr;
5075         unsigned long val;
5076         int ret;
5077
5078         ret = tracing_update_buffers();
5079         if (ret < 0)
5080                 return ret;
5081
5082         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5083         if (ret)
5084                 return ret;
5085
5086         mutex_lock(&trace_types_lock);
5087
5088         if (tr->current_trace->use_max_tr) {
5089                 ret = -EBUSY;
5090                 goto out;
5091         }
5092
5093         switch (val) {
5094         case 0:
5095                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5096                         ret = -EINVAL;
5097                         break;
5098                 }
5099                 if (tr->allocated_snapshot)
5100                         free_snapshot(tr);
5101                 break;
5102         case 1:
5103 /* Only allow per-cpu swap if the ring buffer supports it */
5104 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
5105                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5106                         ret = -EINVAL;
5107                         break;
5108                 }
5109 #endif
5110                 if (!tr->allocated_snapshot) {
5111                         ret = alloc_snapshot(tr);
5112                         if (ret < 0)
5113                                 break;
5114                 }
5115                 local_irq_disable();
5116                 /* Now, we're going to swap */
5117                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5118                         update_max_tr(tr, current, smp_processor_id());
5119                 else
5120                         update_max_tr_single(tr, current, iter->cpu_file);
5121                 local_irq_enable();
5122                 break;
5123         default:
5124                 if (tr->allocated_snapshot) {
5125                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5126                                 tracing_reset_online_cpus(&tr->max_buffer);
5127                         else
5128                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
5129                 }
5130                 break;
5131         }
5132
5133         if (ret >= 0) {
5134                 *ppos += cnt;
5135                 ret = cnt;
5136         }
5137 out:
5138         mutex_unlock(&trace_types_lock);
5139         return ret;
5140 }
5141
5142 static int tracing_snapshot_release(struct inode *inode, struct file *file)
5143 {
5144         struct seq_file *m = file->private_data;
5145         int ret;
5146
5147         ret = tracing_release(inode, file);
5148
5149         if (file->f_mode & FMODE_READ)
5150                 return ret;
5151
5152         /* If write only, the seq_file is just a stub */
5153         if (m)
5154                 kfree(m->private);
5155         kfree(m);
5156
5157         return 0;
5158 }
5159
5160 static int tracing_buffers_open(struct inode *inode, struct file *filp);
5161 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
5162                                     size_t count, loff_t *ppos);
5163 static int tracing_buffers_release(struct inode *inode, struct file *file);
5164 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5165                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
5166
5167 static int snapshot_raw_open(struct inode *inode, struct file *filp)
5168 {
5169         struct ftrace_buffer_info *info;
5170         int ret;
5171
5172         ret = tracing_buffers_open(inode, filp);
5173         if (ret < 0)
5174                 return ret;
5175
5176         info = filp->private_data;
5177
5178         if (info->iter.trace->use_max_tr) {
5179                 tracing_buffers_release(inode, filp);
5180                 return -EBUSY;
5181         }
5182
5183         info->iter.snapshot = true;
5184         info->iter.trace_buffer = &info->iter.tr->max_buffer;
5185
5186         return ret;
5187 }
5188
5189 #endif /* CONFIG_TRACER_SNAPSHOT */
5190
5191
5192 static const struct file_operations tracing_thresh_fops = {
5193         .open           = tracing_open_generic,
5194         .read           = tracing_thresh_read,
5195         .write          = tracing_thresh_write,
5196         .llseek         = generic_file_llseek,
5197 };
5198
5199 static const struct file_operations tracing_max_lat_fops = {
5200         .open           = tracing_open_generic,
5201         .read           = tracing_max_lat_read,
5202         .write          = tracing_max_lat_write,
5203         .llseek         = generic_file_llseek,
5204 };
5205
5206 static const struct file_operations set_tracer_fops = {
5207         .open           = tracing_open_generic,
5208         .read           = tracing_set_trace_read,
5209         .write          = tracing_set_trace_write,
5210         .llseek         = generic_file_llseek,
5211 };
5212
5213 static const struct file_operations tracing_pipe_fops = {
5214         .open           = tracing_open_pipe,
5215         .poll           = tracing_poll_pipe,
5216         .read           = tracing_read_pipe,
5217         .splice_read    = tracing_splice_read_pipe,
5218         .release        = tracing_release_pipe,
5219         .llseek         = no_llseek,
5220 };
5221
5222 static const struct file_operations tracing_entries_fops = {
5223         .open           = tracing_open_generic_tr,
5224         .read           = tracing_entries_read,
5225         .write          = tracing_entries_write,
5226         .llseek         = generic_file_llseek,
5227         .release        = tracing_release_generic_tr,
5228 };
5229
5230 static const struct file_operations tracing_total_entries_fops = {
5231         .open           = tracing_open_generic_tr,
5232         .read           = tracing_total_entries_read,
5233         .llseek         = generic_file_llseek,
5234         .release        = tracing_release_generic_tr,
5235 };
5236
5237 static const struct file_operations tracing_free_buffer_fops = {
5238         .open           = tracing_open_generic_tr,
5239         .write          = tracing_free_buffer_write,
5240         .release        = tracing_free_buffer_release,
5241 };
5242
5243 static const struct file_operations tracing_mark_fops = {
5244         .open           = tracing_open_generic_tr,
5245         .write          = tracing_mark_write,
5246         .llseek         = generic_file_llseek,
5247         .release        = tracing_release_generic_tr,
5248 };
5249
5250 static const struct file_operations trace_clock_fops = {
5251         .open           = tracing_clock_open,
5252         .read           = seq_read,
5253         .llseek         = seq_lseek,
5254         .release        = tracing_single_release_tr,
5255         .write          = tracing_clock_write,
5256 };
5257
5258 #ifdef CONFIG_TRACER_SNAPSHOT
5259 static const struct file_operations snapshot_fops = {
5260         .open           = tracing_snapshot_open,
5261         .read           = seq_read,
5262         .write          = tracing_snapshot_write,
5263         .llseek         = tracing_lseek,
5264         .release        = tracing_snapshot_release,
5265 };
5266
5267 static const struct file_operations snapshot_raw_fops = {
5268         .open           = snapshot_raw_open,
5269         .read           = tracing_buffers_read,
5270         .release        = tracing_buffers_release,
5271         .splice_read    = tracing_buffers_splice_read,
5272         .llseek         = no_llseek,
5273 };
5274
5275 #endif /* CONFIG_TRACER_SNAPSHOT */
5276
5277 static int tracing_buffers_open(struct inode *inode, struct file *filp)
5278 {
5279         struct trace_array *tr = inode->i_private;
5280         struct ftrace_buffer_info *info;
5281         int ret;
5282
5283         if (tracing_disabled)
5284                 return -ENODEV;
5285
5286         if (trace_array_get(tr) < 0)
5287                 return -ENODEV;
5288
5289         info = kzalloc(sizeof(*info), GFP_KERNEL);
5290         if (!info) {
5291                 trace_array_put(tr);
5292                 return -ENOMEM;
5293         }
5294
5295         mutex_lock(&trace_types_lock);
5296
5297         info->iter.tr           = tr;
5298         info->iter.cpu_file     = tracing_get_cpu(inode);
5299         info->iter.trace        = tr->current_trace;
5300         info->iter.trace_buffer = &tr->trace_buffer;
5301         info->spare             = NULL;
5302         /* Force reading ring buffer for first read */
5303         info->read              = (unsigned int)-1;
5304
5305         filp->private_data = info;
5306
5307         mutex_unlock(&trace_types_lock);
5308
5309         ret = nonseekable_open(inode, filp);
5310         if (ret < 0)
5311                 trace_array_put(tr);
5312
5313         return ret;
5314 }
5315
5316 static unsigned int
5317 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
5318 {
5319         struct ftrace_buffer_info *info = filp->private_data;
5320         struct trace_iterator *iter = &info->iter;
5321
5322         return trace_poll(iter, filp, poll_table);
5323 }
5324
5325 static ssize_t
5326 tracing_buffers_read(struct file *filp, char __user *ubuf,
5327                      size_t count, loff_t *ppos)
5328 {
5329         struct ftrace_buffer_info *info = filp->private_data;
5330         struct trace_iterator *iter = &info->iter;
5331         ssize_t ret;
5332         ssize_t size;
5333
5334         if (!count)
5335                 return 0;
5336
5337         mutex_lock(&trace_types_lock);
5338
5339 #ifdef CONFIG_TRACER_MAX_TRACE
5340         if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5341                 size = -EBUSY;
5342                 goto out_unlock;
5343         }
5344 #endif
5345
5346         if (!info->spare)
5347                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
5348                                                           iter->cpu_file);
5349         size = -ENOMEM;
5350         if (!info->spare)
5351                 goto out_unlock;
5352
5353         /* Do we have previous read data to read? */
5354         if (info->read < PAGE_SIZE)
5355                 goto read;
5356
5357  again:
5358         trace_access_lock(iter->cpu_file);
5359         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
5360                                     &info->spare,
5361                                     count,
5362                                     iter->cpu_file, 0);
5363         trace_access_unlock(iter->cpu_file);
5364
5365         if (ret < 0) {
5366                 if (trace_empty(iter)) {
5367                         if ((filp->f_flags & O_NONBLOCK)) {
5368                                 size = -EAGAIN;
5369                                 goto out_unlock;
5370                         }
5371                         mutex_unlock(&trace_types_lock);
5372                         ret = wait_on_pipe(iter, false);
5373                         mutex_lock(&trace_types_lock);
5374                         if (ret) {
5375                                 size = ret;
5376                                 goto out_unlock;
5377                         }
5378                         goto again;
5379                 }
5380                 size = 0;
5381                 goto out_unlock;
5382         }
5383
5384         info->read = 0;
5385  read:
5386         size = PAGE_SIZE - info->read;
5387         if (size > count)
5388                 size = count;
5389
5390         ret = copy_to_user(ubuf, info->spare + info->read, size);
5391         if (ret == size) {
5392                 size = -EFAULT;
5393                 goto out_unlock;
5394         }
5395         size -= ret;
5396
5397         *ppos += size;
5398         info->read += size;
5399
5400  out_unlock:
5401         mutex_unlock(&trace_types_lock);
5402
5403         return size;
5404 }
5405
5406 static int tracing_buffers_release(struct inode *inode, struct file *file)
5407 {
5408         struct ftrace_buffer_info *info = file->private_data;
5409         struct trace_iterator *iter = &info->iter;
5410
5411         mutex_lock(&trace_types_lock);
5412
5413         __trace_array_put(iter->tr);
5414
5415         if (info->spare)
5416                 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
5417         kfree(info);
5418
5419         mutex_unlock(&trace_types_lock);
5420
5421         return 0;
5422 }
5423
5424 struct buffer_ref {
5425         struct ring_buffer      *buffer;
5426         void                    *page;
5427         int                     ref;
5428 };
5429
5430 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
5431                                     struct pipe_buffer *buf)
5432 {
5433         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5434
5435         if (--ref->ref)
5436                 return;
5437
5438         ring_buffer_free_read_page(ref->buffer, ref->page);
5439         kfree(ref);
5440         buf->private = 0;
5441 }
5442
5443 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
5444                                 struct pipe_buffer *buf)
5445 {
5446         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5447
5448         ref->ref++;
5449 }
5450
5451 /* Pipe buffer operations for a buffer. */
5452 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
5453         .can_merge              = 0,
5454         .confirm                = generic_pipe_buf_confirm,
5455         .release                = buffer_pipe_buf_release,
5456         .steal                  = generic_pipe_buf_steal,
5457         .get                    = buffer_pipe_buf_get,
5458 };
5459
5460 /*
5461  * Callback from splice_to_pipe(), if we need to release some pages
5462  * at the end of the spd in case we error'ed out in filling the pipe.
5463  */
5464 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
5465 {
5466         struct buffer_ref *ref =
5467                 (struct buffer_ref *)spd->partial[i].private;
5468
5469         if (--ref->ref)
5470                 return;
5471
5472         ring_buffer_free_read_page(ref->buffer, ref->page);
5473         kfree(ref);
5474         spd->partial[i].private = 0;
5475 }
5476
5477 static ssize_t
5478 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5479                             struct pipe_inode_info *pipe, size_t len,
5480                             unsigned int flags)
5481 {
5482         struct ftrace_buffer_info *info = file->private_data;
5483         struct trace_iterator *iter = &info->iter;
5484         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5485         struct page *pages_def[PIPE_DEF_BUFFERS];
5486         struct splice_pipe_desc spd = {
5487                 .pages          = pages_def,
5488                 .partial        = partial_def,
5489                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5490                 .flags          = flags,
5491                 .ops            = &buffer_pipe_buf_ops,
5492                 .spd_release    = buffer_spd_release,
5493         };
5494         struct buffer_ref *ref;
5495         int entries, size, i;
5496         ssize_t ret = 0;
5497
5498         mutex_lock(&trace_types_lock);
5499
5500 #ifdef CONFIG_TRACER_MAX_TRACE
5501         if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5502                 ret = -EBUSY;
5503                 goto out;
5504         }
5505 #endif
5506
5507         if (splice_grow_spd(pipe, &spd)) {
5508                 ret = -ENOMEM;
5509                 goto out;
5510         }
5511
5512         if (*ppos & (PAGE_SIZE - 1)) {
5513                 ret = -EINVAL;
5514                 goto out;
5515         }
5516
5517         if (len & (PAGE_SIZE - 1)) {
5518                 if (len < PAGE_SIZE) {
5519                         ret = -EINVAL;
5520                         goto out;
5521                 }
5522                 len &= PAGE_MASK;
5523         }
5524
5525  again:
5526         trace_access_lock(iter->cpu_file);
5527         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5528
5529         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
5530                 struct page *page;
5531                 int r;
5532
5533                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
5534                 if (!ref) {
5535                         ret = -ENOMEM;
5536                         break;
5537                 }
5538
5539                 ref->ref = 1;
5540                 ref->buffer = iter->trace_buffer->buffer;
5541                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
5542                 if (!ref->page) {
5543                         ret = -ENOMEM;
5544                         kfree(ref);
5545                         break;
5546                 }
5547
5548                 r = ring_buffer_read_page(ref->buffer, &ref->page,
5549                                           len, iter->cpu_file, 1);
5550                 if (r < 0) {
5551                         ring_buffer_free_read_page(ref->buffer, ref->page);
5552                         kfree(ref);
5553                         break;
5554                 }
5555
5556                 /*
5557                  * zero out any left over data, this is going to
5558                  * user land.
5559                  */
5560                 size = ring_buffer_page_len(ref->page);
5561                 if (size < PAGE_SIZE)
5562                         memset(ref->page + size, 0, PAGE_SIZE - size);
5563
5564                 page = virt_to_page(ref->page);
5565
5566                 spd.pages[i] = page;
5567                 spd.partial[i].len = PAGE_SIZE;
5568                 spd.partial[i].offset = 0;
5569                 spd.partial[i].private = (unsigned long)ref;
5570                 spd.nr_pages++;
5571                 *ppos += PAGE_SIZE;
5572
5573                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5574         }
5575
5576         trace_access_unlock(iter->cpu_file);
5577         spd.nr_pages = i;
5578
5579         /* did we read anything? */
5580         if (!spd.nr_pages) {
5581                 if (ret)
5582                         goto out;
5583
5584                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) {
5585                         ret = -EAGAIN;
5586                         goto out;
5587                 }
5588                 mutex_unlock(&trace_types_lock);
5589                 ret = wait_on_pipe(iter, true);
5590                 mutex_lock(&trace_types_lock);
5591                 if (ret)
5592                         goto out;
5593
5594                 goto again;
5595         }
5596
5597         ret = splice_to_pipe(pipe, &spd);
5598         splice_shrink_spd(&spd);
5599 out:
5600         mutex_unlock(&trace_types_lock);
5601
5602         return ret;
5603 }
5604
5605 static const struct file_operations tracing_buffers_fops = {
5606         .open           = tracing_buffers_open,
5607         .read           = tracing_buffers_read,
5608         .poll           = tracing_buffers_poll,
5609         .release        = tracing_buffers_release,
5610         .splice_read    = tracing_buffers_splice_read,
5611         .llseek         = no_llseek,
5612 };
5613
5614 static ssize_t
5615 tracing_stats_read(struct file *filp, char __user *ubuf,
5616                    size_t count, loff_t *ppos)
5617 {
5618         struct inode *inode = file_inode(filp);
5619         struct trace_array *tr = inode->i_private;
5620         struct trace_buffer *trace_buf = &tr->trace_buffer;
5621         int cpu = tracing_get_cpu(inode);
5622         struct trace_seq *s;
5623         unsigned long cnt;
5624         unsigned long long t;
5625         unsigned long usec_rem;
5626
5627         s = kmalloc(sizeof(*s), GFP_KERNEL);
5628         if (!s)
5629                 return -ENOMEM;
5630
5631         trace_seq_init(s);
5632
5633         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
5634         trace_seq_printf(s, "entries: %ld\n", cnt);
5635
5636         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
5637         trace_seq_printf(s, "overrun: %ld\n", cnt);
5638
5639         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
5640         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
5641
5642         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
5643         trace_seq_printf(s, "bytes: %ld\n", cnt);
5644
5645         if (trace_clocks[tr->clock_id].in_ns) {
5646                 /* local or global for trace_clock */
5647                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5648                 usec_rem = do_div(t, USEC_PER_SEC);
5649                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
5650                                                                 t, usec_rem);
5651
5652                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
5653                 usec_rem = do_div(t, USEC_PER_SEC);
5654                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
5655         } else {
5656                 /* counter or tsc mode for trace_clock */
5657                 trace_seq_printf(s, "oldest event ts: %llu\n",
5658                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5659
5660                 trace_seq_printf(s, "now ts: %llu\n",
5661                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
5662         }
5663
5664         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
5665         trace_seq_printf(s, "dropped events: %ld\n", cnt);
5666
5667         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
5668         trace_seq_printf(s, "read events: %ld\n", cnt);
5669
5670         count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
5671
5672         kfree(s);
5673
5674         return count;
5675 }
5676
5677 static const struct file_operations tracing_stats_fops = {
5678         .open           = tracing_open_generic_tr,
5679         .read           = tracing_stats_read,
5680         .llseek         = generic_file_llseek,
5681         .release        = tracing_release_generic_tr,
5682 };
5683
5684 #ifdef CONFIG_DYNAMIC_FTRACE
5685
5686 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
5687 {
5688         return 0;
5689 }
5690
5691 static ssize_t
5692 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
5693                   size_t cnt, loff_t *ppos)
5694 {
5695         static char ftrace_dyn_info_buffer[1024];
5696         static DEFINE_MUTEX(dyn_info_mutex);
5697         unsigned long *p = filp->private_data;
5698         char *buf = ftrace_dyn_info_buffer;
5699         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
5700         int r;
5701
5702         mutex_lock(&dyn_info_mutex);
5703         r = sprintf(buf, "%ld ", *p);
5704
5705         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
5706         buf[r++] = '\n';
5707
5708         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5709
5710         mutex_unlock(&dyn_info_mutex);
5711
5712         return r;
5713 }
5714
5715 static const struct file_operations tracing_dyn_info_fops = {
5716         .open           = tracing_open_generic,
5717         .read           = tracing_read_dyn_info,
5718         .llseek         = generic_file_llseek,
5719 };
5720 #endif /* CONFIG_DYNAMIC_FTRACE */
5721
5722 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
5723 static void
5724 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5725 {
5726         tracing_snapshot();
5727 }
5728
5729 static void
5730 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5731 {
5732         unsigned long *count = (long *)data;
5733
5734         if (!*count)
5735                 return;
5736
5737         if (*count != -1)
5738                 (*count)--;
5739
5740         tracing_snapshot();
5741 }
5742
5743 static int
5744 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
5745                       struct ftrace_probe_ops *ops, void *data)
5746 {
5747         long count = (long)data;
5748
5749         seq_printf(m, "%ps:", (void *)ip);
5750
5751         seq_puts(m, "snapshot");
5752
5753         if (count == -1)
5754                 seq_puts(m, ":unlimited\n");
5755         else
5756                 seq_printf(m, ":count=%ld\n", count);
5757
5758         return 0;
5759 }
5760
5761 static struct ftrace_probe_ops snapshot_probe_ops = {
5762         .func                   = ftrace_snapshot,
5763         .print                  = ftrace_snapshot_print,
5764 };
5765
5766 static struct ftrace_probe_ops snapshot_count_probe_ops = {
5767         .func                   = ftrace_count_snapshot,
5768         .print                  = ftrace_snapshot_print,
5769 };
5770
5771 static int
5772 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
5773                                char *glob, char *cmd, char *param, int enable)
5774 {
5775         struct ftrace_probe_ops *ops;
5776         void *count = (void *)-1;
5777         char *number;
5778         int ret;
5779
5780         /* hash funcs only work with set_ftrace_filter */
5781         if (!enable)
5782                 return -EINVAL;
5783
5784         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
5785
5786         if (glob[0] == '!') {
5787                 unregister_ftrace_function_probe_func(glob+1, ops);
5788                 return 0;
5789         }
5790
5791         if (!param)
5792                 goto out_reg;
5793
5794         number = strsep(&param, ":");
5795
5796         if (!strlen(number))
5797                 goto out_reg;
5798
5799         /*
5800          * We use the callback data field (which is a pointer)
5801          * as our counter.
5802          */
5803         ret = kstrtoul(number, 0, (unsigned long *)&count);
5804         if (ret)
5805                 return ret;
5806
5807  out_reg:
5808         ret = register_ftrace_function_probe(glob, ops, count);
5809
5810         if (ret >= 0)
5811                 alloc_snapshot(&global_trace);
5812
5813         return ret < 0 ? ret : 0;
5814 }
5815
5816 static struct ftrace_func_command ftrace_snapshot_cmd = {
5817         .name                   = "snapshot",
5818         .func                   = ftrace_trace_snapshot_callback,
5819 };
5820
5821 static __init int register_snapshot_cmd(void)
5822 {
5823         return register_ftrace_command(&ftrace_snapshot_cmd);
5824 }
5825 #else
5826 static inline __init int register_snapshot_cmd(void) { return 0; }
5827 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
5828
5829 struct dentry *tracing_init_dentry_tr(struct trace_array *tr)
5830 {
5831         if (tr->dir)
5832                 return tr->dir;
5833
5834         if (!debugfs_initialized())
5835                 return NULL;
5836
5837         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
5838                 tr->dir = debugfs_create_dir("tracing", NULL);
5839
5840         if (!tr->dir)
5841                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
5842
5843         return tr->dir;
5844 }
5845
5846 struct dentry *tracing_init_dentry(void)
5847 {
5848         return tracing_init_dentry_tr(&global_trace);
5849 }
5850
5851 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
5852 {
5853         struct dentry *d_tracer;
5854
5855         if (tr->percpu_dir)
5856                 return tr->percpu_dir;
5857
5858         d_tracer = tracing_init_dentry_tr(tr);
5859         if (!d_tracer)
5860                 return NULL;
5861
5862         tr->percpu_dir = debugfs_create_dir("per_cpu", d_tracer);
5863
5864         WARN_ONCE(!tr->percpu_dir,
5865                   "Could not create debugfs directory 'per_cpu/%d'\n", cpu);
5866
5867         return tr->percpu_dir;
5868 }
5869
5870 static struct dentry *
5871 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
5872                       void *data, long cpu, const struct file_operations *fops)
5873 {
5874         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
5875
5876         if (ret) /* See tracing_get_cpu() */
5877                 ret->d_inode->i_cdev = (void *)(cpu + 1);
5878         return ret;
5879 }
5880
5881 static void
5882 tracing_init_debugfs_percpu(struct trace_array *tr, long cpu)
5883 {
5884         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
5885         struct dentry *d_cpu;
5886         char cpu_dir[30]; /* 30 characters should be more than enough */
5887
5888         if (!d_percpu)
5889                 return;
5890
5891         snprintf(cpu_dir, 30, "cpu%ld", cpu);
5892         d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
5893         if (!d_cpu) {
5894                 pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
5895                 return;
5896         }
5897
5898         /* per cpu trace_pipe */
5899         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
5900                                 tr, cpu, &tracing_pipe_fops);
5901
5902         /* per cpu trace */
5903         trace_create_cpu_file("trace", 0644, d_cpu,
5904                                 tr, cpu, &tracing_fops);
5905
5906         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
5907                                 tr, cpu, &tracing_buffers_fops);
5908
5909         trace_create_cpu_file("stats", 0444, d_cpu,
5910                                 tr, cpu, &tracing_stats_fops);
5911
5912         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
5913                                 tr, cpu, &tracing_entries_fops);
5914
5915 #ifdef CONFIG_TRACER_SNAPSHOT
5916         trace_create_cpu_file("snapshot", 0644, d_cpu,
5917                                 tr, cpu, &snapshot_fops);
5918
5919         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
5920                                 tr, cpu, &snapshot_raw_fops);
5921 #endif
5922 }
5923
5924 #ifdef CONFIG_FTRACE_SELFTEST
5925 /* Let selftest have access to static functions in this file */
5926 #include "trace_selftest.c"
5927 #endif
5928
5929 struct trace_option_dentry {
5930         struct tracer_opt               *opt;
5931         struct tracer_flags             *flags;
5932         struct trace_array              *tr;
5933         struct dentry                   *entry;
5934 };
5935
5936 static ssize_t
5937 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
5938                         loff_t *ppos)
5939 {
5940         struct trace_option_dentry *topt = filp->private_data;
5941         char *buf;
5942
5943         if (topt->flags->val & topt->opt->bit)
5944                 buf = "1\n";
5945         else
5946                 buf = "0\n";
5947
5948         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5949 }
5950
5951 static ssize_t
5952 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
5953                          loff_t *ppos)
5954 {
5955         struct trace_option_dentry *topt = filp->private_data;
5956         unsigned long val;
5957         int ret;
5958
5959         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5960         if (ret)
5961                 return ret;
5962
5963         if (val != 0 && val != 1)
5964                 return -EINVAL;
5965
5966         if (!!(topt->flags->val & topt->opt->bit) != val) {
5967                 mutex_lock(&trace_types_lock);
5968                 ret = __set_tracer_option(topt->tr, topt->flags,
5969                                           topt->opt, !val);
5970                 mutex_unlock(&trace_types_lock);
5971                 if (ret)
5972                         return ret;
5973         }
5974
5975         *ppos += cnt;
5976
5977         return cnt;
5978 }
5979
5980
5981 static const struct file_operations trace_options_fops = {
5982         .open = tracing_open_generic,
5983         .read = trace_options_read,
5984         .write = trace_options_write,
5985         .llseek = generic_file_llseek,
5986 };
5987
5988 static ssize_t
5989 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
5990                         loff_t *ppos)
5991 {
5992         long index = (long)filp->private_data;
5993         char *buf;
5994
5995         if (trace_flags & (1 << index))
5996                 buf = "1\n";
5997         else
5998                 buf = "0\n";
5999
6000         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6001 }
6002
6003 static ssize_t
6004 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
6005                          loff_t *ppos)
6006 {
6007         struct trace_array *tr = &global_trace;
6008         long index = (long)filp->private_data;
6009         unsigned long val;
6010         int ret;
6011
6012         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6013         if (ret)
6014                 return ret;
6015
6016         if (val != 0 && val != 1)
6017                 return -EINVAL;
6018
6019         mutex_lock(&trace_types_lock);
6020         ret = set_tracer_flag(tr, 1 << index, val);
6021         mutex_unlock(&trace_types_lock);
6022
6023         if (ret < 0)
6024                 return ret;
6025
6026         *ppos += cnt;
6027
6028         return cnt;
6029 }
6030
6031 static const struct file_operations trace_options_core_fops = {
6032         .open = tracing_open_generic,
6033         .read = trace_options_core_read,
6034         .write = trace_options_core_write,
6035         .llseek = generic_file_llseek,
6036 };
6037
6038 struct dentry *trace_create_file(const char *name,
6039                                  umode_t mode,
6040                                  struct dentry *parent,
6041                                  void *data,
6042                                  const struct file_operations *fops)
6043 {
6044         struct dentry *ret;
6045
6046         ret = debugfs_create_file(name, mode, parent, data, fops);
6047         if (!ret)
6048                 pr_warning("Could not create debugfs '%s' entry\n", name);
6049
6050         return ret;
6051 }
6052
6053
6054 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
6055 {
6056         struct dentry *d_tracer;
6057
6058         if (tr->options)
6059                 return tr->options;
6060
6061         d_tracer = tracing_init_dentry_tr(tr);
6062         if (!d_tracer)
6063                 return NULL;
6064
6065         tr->options = debugfs_create_dir("options", d_tracer);
6066         if (!tr->options) {
6067                 pr_warning("Could not create debugfs directory 'options'\n");
6068                 return NULL;
6069         }
6070
6071         return tr->options;
6072 }
6073
6074 static void
6075 create_trace_option_file(struct trace_array *tr,
6076                          struct trace_option_dentry *topt,
6077                          struct tracer_flags *flags,
6078                          struct tracer_opt *opt)
6079 {
6080         struct dentry *t_options;
6081
6082         t_options = trace_options_init_dentry(tr);
6083         if (!t_options)
6084                 return;
6085
6086         topt->flags = flags;
6087         topt->opt = opt;
6088         topt->tr = tr;
6089
6090         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
6091                                     &trace_options_fops);
6092
6093 }
6094
6095 static struct trace_option_dentry *
6096 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
6097 {
6098         struct trace_option_dentry *topts;
6099         struct tracer_flags *flags;
6100         struct tracer_opt *opts;
6101         int cnt;
6102
6103         if (!tracer)
6104                 return NULL;
6105
6106         flags = tracer->flags;
6107
6108         if (!flags || !flags->opts)
6109                 return NULL;
6110
6111         opts = flags->opts;
6112
6113         for (cnt = 0; opts[cnt].name; cnt++)
6114                 ;
6115
6116         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
6117         if (!topts)
6118                 return NULL;
6119
6120         for (cnt = 0; opts[cnt].name; cnt++)
6121                 create_trace_option_file(tr, &topts[cnt], flags,
6122                                          &opts[cnt]);
6123
6124         return topts;
6125 }
6126
6127 static void
6128 destroy_trace_option_files(struct trace_option_dentry *topts)
6129 {
6130         int cnt;
6131
6132         if (!topts)
6133                 return;
6134
6135         for (cnt = 0; topts[cnt].opt; cnt++)
6136                 debugfs_remove(topts[cnt].entry);
6137
6138         kfree(topts);
6139 }
6140
6141 static struct dentry *
6142 create_trace_option_core_file(struct trace_array *tr,
6143                               const char *option, long index)
6144 {
6145         struct dentry *t_options;
6146
6147         t_options = trace_options_init_dentry(tr);
6148         if (!t_options)
6149                 return NULL;
6150
6151         return trace_create_file(option, 0644, t_options, (void *)index,
6152                                     &trace_options_core_fops);
6153 }
6154
6155 static __init void create_trace_options_dir(struct trace_array *tr)
6156 {
6157         struct dentry *t_options;
6158         int i;
6159
6160         t_options = trace_options_init_dentry(tr);
6161         if (!t_options)
6162                 return;
6163
6164         for (i = 0; trace_options[i]; i++)
6165                 create_trace_option_core_file(tr, trace_options[i], i);
6166 }
6167
6168 static ssize_t
6169 rb_simple_read(struct file *filp, char __user *ubuf,
6170                size_t cnt, loff_t *ppos)
6171 {
6172         struct trace_array *tr = filp->private_data;
6173         char buf[64];
6174         int r;
6175
6176         r = tracer_tracing_is_on(tr);
6177         r = sprintf(buf, "%d\n", r);
6178
6179         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6180 }
6181
6182 static ssize_t
6183 rb_simple_write(struct file *filp, const char __user *ubuf,
6184                 size_t cnt, loff_t *ppos)
6185 {
6186         struct trace_array *tr = filp->private_data;
6187         struct ring_buffer *buffer = tr->trace_buffer.buffer;
6188         unsigned long val;
6189         int ret;
6190
6191         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6192         if (ret)
6193                 return ret;
6194
6195         if (buffer) {
6196                 mutex_lock(&trace_types_lock);
6197                 if (val) {
6198                         tracer_tracing_on(tr);
6199                         if (tr->current_trace->start)
6200                                 tr->current_trace->start(tr);
6201                 } else {
6202                         tracer_tracing_off(tr);
6203                         if (tr->current_trace->stop)
6204                                 tr->current_trace->stop(tr);
6205                 }
6206                 mutex_unlock(&trace_types_lock);
6207         }
6208
6209         (*ppos)++;
6210
6211         return cnt;
6212 }
6213
6214 static const struct file_operations rb_simple_fops = {
6215         .open           = tracing_open_generic_tr,
6216         .read           = rb_simple_read,
6217         .write          = rb_simple_write,
6218         .release        = tracing_release_generic_tr,
6219         .llseek         = default_llseek,
6220 };
6221
6222 struct dentry *trace_instance_dir;
6223
6224 static void
6225 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer);
6226
6227 static int
6228 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
6229 {
6230         enum ring_buffer_flags rb_flags;
6231
6232         rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
6233
6234         buf->tr = tr;
6235
6236         buf->buffer = ring_buffer_alloc(size, rb_flags);
6237         if (!buf->buffer)
6238                 return -ENOMEM;
6239
6240         buf->data = alloc_percpu(struct trace_array_cpu);
6241         if (!buf->data) {
6242                 ring_buffer_free(buf->buffer);
6243                 return -ENOMEM;
6244         }
6245
6246         /* Allocate the first page for all buffers */
6247         set_buffer_entries(&tr->trace_buffer,
6248                            ring_buffer_size(tr->trace_buffer.buffer, 0));
6249
6250         return 0;
6251 }
6252
6253 static int allocate_trace_buffers(struct trace_array *tr, int size)
6254 {
6255         int ret;
6256
6257         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
6258         if (ret)
6259                 return ret;
6260
6261 #ifdef CONFIG_TRACER_MAX_TRACE
6262         ret = allocate_trace_buffer(tr, &tr->max_buffer,
6263                                     allocate_snapshot ? size : 1);
6264         if (WARN_ON(ret)) {
6265                 ring_buffer_free(tr->trace_buffer.buffer);
6266                 free_percpu(tr->trace_buffer.data);
6267                 return -ENOMEM;
6268         }
6269         tr->allocated_snapshot = allocate_snapshot;
6270
6271         /*
6272          * Only the top level trace array gets its snapshot allocated
6273          * from the kernel command line.
6274          */
6275         allocate_snapshot = false;
6276 #endif
6277         return 0;
6278 }
6279
6280 static void free_trace_buffer(struct trace_buffer *buf)
6281 {
6282         if (buf->buffer) {
6283                 ring_buffer_free(buf->buffer);
6284                 buf->buffer = NULL;
6285                 free_percpu(buf->data);
6286                 buf->data = NULL;
6287         }
6288 }
6289
6290 static void free_trace_buffers(struct trace_array *tr)
6291 {
6292         if (!tr)
6293                 return;
6294
6295         free_trace_buffer(&tr->trace_buffer);
6296
6297 #ifdef CONFIG_TRACER_MAX_TRACE
6298         free_trace_buffer(&tr->max_buffer);
6299 #endif
6300 }
6301
6302 static int new_instance_create(const char *name)
6303 {
6304         struct trace_array *tr;
6305         int ret;
6306
6307         mutex_lock(&trace_types_lock);
6308
6309         ret = -EEXIST;
6310         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6311                 if (tr->name && strcmp(tr->name, name) == 0)
6312                         goto out_unlock;
6313         }
6314
6315         ret = -ENOMEM;
6316         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
6317         if (!tr)
6318                 goto out_unlock;
6319
6320         tr->name = kstrdup(name, GFP_KERNEL);
6321         if (!tr->name)
6322                 goto out_free_tr;
6323
6324         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
6325                 goto out_free_tr;
6326
6327         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
6328
6329         raw_spin_lock_init(&tr->start_lock);
6330
6331         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
6332
6333         tr->current_trace = &nop_trace;
6334
6335         INIT_LIST_HEAD(&tr->systems);
6336         INIT_LIST_HEAD(&tr->events);
6337
6338         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
6339                 goto out_free_tr;
6340
6341         tr->dir = debugfs_create_dir(name, trace_instance_dir);
6342         if (!tr->dir)
6343                 goto out_free_tr;
6344
6345         ret = event_trace_add_tracer(tr->dir, tr);
6346         if (ret) {
6347                 debugfs_remove_recursive(tr->dir);
6348                 goto out_free_tr;
6349         }
6350
6351         init_tracer_debugfs(tr, tr->dir);
6352
6353         list_add(&tr->list, &ftrace_trace_arrays);
6354
6355         mutex_unlock(&trace_types_lock);
6356
6357         return 0;
6358
6359  out_free_tr:
6360         free_trace_buffers(tr);
6361         free_cpumask_var(tr->tracing_cpumask);
6362         kfree(tr->name);
6363         kfree(tr);
6364
6365  out_unlock:
6366         mutex_unlock(&trace_types_lock);
6367
6368         return ret;
6369
6370 }
6371
6372 static int instance_delete(const char *name)
6373 {
6374         struct trace_array *tr;
6375         int found = 0;
6376         int ret;
6377
6378         mutex_lock(&trace_types_lock);
6379
6380         ret = -ENODEV;
6381         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6382                 if (tr->name && strcmp(tr->name, name) == 0) {
6383                         found = 1;
6384                         break;
6385                 }
6386         }
6387         if (!found)
6388                 goto out_unlock;
6389
6390         ret = -EBUSY;
6391         if (tr->ref)
6392                 goto out_unlock;
6393
6394         list_del(&tr->list);
6395
6396         tracing_set_nop(tr);
6397         event_trace_del_tracer(tr);
6398         ftrace_destroy_function_files(tr);
6399         debugfs_remove_recursive(tr->dir);
6400         free_trace_buffers(tr);
6401
6402         kfree(tr->name);
6403         kfree(tr);
6404
6405         ret = 0;
6406
6407  out_unlock:
6408         mutex_unlock(&trace_types_lock);
6409
6410         return ret;
6411 }
6412
6413 static int instance_mkdir (struct inode *inode, struct dentry *dentry, umode_t mode)
6414 {
6415         struct dentry *parent;
6416         int ret;
6417
6418         /* Paranoid: Make sure the parent is the "instances" directory */
6419         parent = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias);
6420         if (WARN_ON_ONCE(parent != trace_instance_dir))
6421                 return -ENOENT;
6422
6423         /*
6424          * The inode mutex is locked, but debugfs_create_dir() will also
6425          * take the mutex. As the instances directory can not be destroyed
6426          * or changed in any other way, it is safe to unlock it, and
6427          * let the dentry try. If two users try to make the same dir at
6428          * the same time, then the new_instance_create() will determine the
6429          * winner.
6430          */
6431         mutex_unlock(&inode->i_mutex);
6432
6433         ret = new_instance_create(dentry->d_iname);
6434
6435         mutex_lock(&inode->i_mutex);
6436
6437         return ret;
6438 }
6439
6440 static int instance_rmdir(struct inode *inode, struct dentry *dentry)
6441 {
6442         struct dentry *parent;
6443         int ret;
6444
6445         /* Paranoid: Make sure the parent is the "instances" directory */
6446         parent = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias);
6447         if (WARN_ON_ONCE(parent != trace_instance_dir))
6448                 return -ENOENT;
6449
6450         /* The caller did a dget() on dentry */
6451         mutex_unlock(&dentry->d_inode->i_mutex);
6452
6453         /*
6454          * The inode mutex is locked, but debugfs_create_dir() will also
6455          * take the mutex. As the instances directory can not be destroyed
6456          * or changed in any other way, it is safe to unlock it, and
6457          * let the dentry try. If two users try to make the same dir at
6458          * the same time, then the instance_delete() will determine the
6459          * winner.
6460          */
6461         mutex_unlock(&inode->i_mutex);
6462
6463         ret = instance_delete(dentry->d_iname);
6464
6465         mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
6466         mutex_lock(&dentry->d_inode->i_mutex);
6467
6468         return ret;
6469 }
6470
6471 static const struct inode_operations instance_dir_inode_operations = {
6472         .lookup         = simple_lookup,
6473         .mkdir          = instance_mkdir,
6474         .rmdir          = instance_rmdir,
6475 };
6476
6477 static __init void create_trace_instances(struct dentry *d_tracer)
6478 {
6479         trace_instance_dir = debugfs_create_dir("instances", d_tracer);
6480         if (WARN_ON(!trace_instance_dir))
6481                 return;
6482
6483         /* Hijack the dir inode operations, to allow mkdir */
6484         trace_instance_dir->d_inode->i_op = &instance_dir_inode_operations;
6485 }
6486
6487 static void
6488 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
6489 {
6490         int cpu;
6491
6492         trace_create_file("available_tracers", 0444, d_tracer,
6493                         tr, &show_traces_fops);
6494
6495         trace_create_file("current_tracer", 0644, d_tracer,
6496                         tr, &set_tracer_fops);
6497
6498         trace_create_file("tracing_cpumask", 0644, d_tracer,
6499                           tr, &tracing_cpumask_fops);
6500
6501         trace_create_file("trace_options", 0644, d_tracer,
6502                           tr, &tracing_iter_fops);
6503
6504         trace_create_file("trace", 0644, d_tracer,
6505                           tr, &tracing_fops);
6506
6507         trace_create_file("trace_pipe", 0444, d_tracer,
6508                           tr, &tracing_pipe_fops);
6509
6510         trace_create_file("buffer_size_kb", 0644, d_tracer,
6511                           tr, &tracing_entries_fops);
6512
6513         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
6514                           tr, &tracing_total_entries_fops);
6515
6516         trace_create_file("free_buffer", 0200, d_tracer,
6517                           tr, &tracing_free_buffer_fops);
6518
6519         trace_create_file("trace_marker", 0220, d_tracer,
6520                           tr, &tracing_mark_fops);
6521
6522         trace_create_file("trace_clock", 0644, d_tracer, tr,
6523                           &trace_clock_fops);
6524
6525         trace_create_file("tracing_on", 0644, d_tracer,
6526                           tr, &rb_simple_fops);
6527
6528 #ifdef CONFIG_TRACER_MAX_TRACE
6529         trace_create_file("tracing_max_latency", 0644, d_tracer,
6530                         &tr->max_latency, &tracing_max_lat_fops);
6531 #endif
6532
6533         if (ftrace_create_function_files(tr, d_tracer))
6534                 WARN(1, "Could not allocate function filter files");
6535
6536 #ifdef CONFIG_TRACER_SNAPSHOT
6537         trace_create_file("snapshot", 0644, d_tracer,
6538                           tr, &snapshot_fops);
6539 #endif
6540
6541         for_each_tracing_cpu(cpu)
6542                 tracing_init_debugfs_percpu(tr, cpu);
6543
6544 }
6545
6546 static __init int tracer_init_debugfs(void)
6547 {
6548         struct dentry *d_tracer;
6549
6550         trace_access_lock_init();
6551
6552         d_tracer = tracing_init_dentry();
6553         if (!d_tracer)
6554                 return 0;
6555
6556         init_tracer_debugfs(&global_trace, d_tracer);
6557
6558         trace_create_file("tracing_thresh", 0644, d_tracer,
6559                         &global_trace, &tracing_thresh_fops);
6560
6561         trace_create_file("README", 0444, d_tracer,
6562                         NULL, &tracing_readme_fops);
6563
6564         trace_create_file("saved_cmdlines", 0444, d_tracer,
6565                         NULL, &tracing_saved_cmdlines_fops);
6566
6567         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
6568                           NULL, &tracing_saved_cmdlines_size_fops);
6569
6570 #ifdef CONFIG_DYNAMIC_FTRACE
6571         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
6572                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
6573 #endif
6574
6575         create_trace_instances(d_tracer);
6576
6577         create_trace_options_dir(&global_trace);
6578
6579         return 0;
6580 }
6581
6582 static int trace_panic_handler(struct notifier_block *this,
6583                                unsigned long event, void *unused)
6584 {
6585         if (ftrace_dump_on_oops)
6586                 ftrace_dump(ftrace_dump_on_oops);
6587         return NOTIFY_OK;
6588 }
6589
6590 static struct notifier_block trace_panic_notifier = {
6591         .notifier_call  = trace_panic_handler,
6592         .next           = NULL,
6593         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
6594 };
6595
6596 static int trace_die_handler(struct notifier_block *self,
6597                              unsigned long val,
6598                              void *data)
6599 {
6600         switch (val) {
6601         case DIE_OOPS:
6602                 if (ftrace_dump_on_oops)
6603                         ftrace_dump(ftrace_dump_on_oops);
6604                 break;
6605         default:
6606                 break;
6607         }
6608         return NOTIFY_OK;
6609 }
6610
6611 static struct notifier_block trace_die_notifier = {
6612         .notifier_call = trace_die_handler,
6613         .priority = 200
6614 };
6615
6616 /*
6617  * printk is set to max of 1024, we really don't need it that big.
6618  * Nothing should be printing 1000 characters anyway.
6619  */
6620 #define TRACE_MAX_PRINT         1000
6621
6622 /*
6623  * Define here KERN_TRACE so that we have one place to modify
6624  * it if we decide to change what log level the ftrace dump
6625  * should be at.
6626  */
6627 #define KERN_TRACE              KERN_EMERG
6628
6629 void
6630 trace_printk_seq(struct trace_seq *s)
6631 {
6632         /* Probably should print a warning here. */
6633         if (s->len >= TRACE_MAX_PRINT)
6634                 s->len = TRACE_MAX_PRINT;
6635
6636         /* should be zero ended, but we are paranoid. */
6637         s->buffer[s->len] = 0;
6638
6639         printk(KERN_TRACE "%s", s->buffer);
6640
6641         trace_seq_init(s);
6642 }
6643
6644 void trace_init_global_iter(struct trace_iterator *iter)
6645 {
6646         iter->tr = &global_trace;
6647         iter->trace = iter->tr->current_trace;
6648         iter->cpu_file = RING_BUFFER_ALL_CPUS;
6649         iter->trace_buffer = &global_trace.trace_buffer;
6650
6651         if (iter->trace && iter->trace->open)
6652                 iter->trace->open(iter);
6653
6654         /* Annotate start of buffers if we had overruns */
6655         if (ring_buffer_overruns(iter->trace_buffer->buffer))
6656                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
6657
6658         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6659         if (trace_clocks[iter->tr->clock_id].in_ns)
6660                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6661 }
6662
6663 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
6664 {
6665         /* use static because iter can be a bit big for the stack */
6666         static struct trace_iterator iter;
6667         static atomic_t dump_running;
6668         unsigned int old_userobj;
6669         unsigned long flags;
6670         int cnt = 0, cpu;
6671
6672         /* Only allow one dump user at a time. */
6673         if (atomic_inc_return(&dump_running) != 1) {
6674                 atomic_dec(&dump_running);
6675                 return;
6676         }
6677
6678         /*
6679          * Always turn off tracing when we dump.
6680          * We don't need to show trace output of what happens
6681          * between multiple crashes.
6682          *
6683          * If the user does a sysrq-z, then they can re-enable
6684          * tracing with echo 1 > tracing_on.
6685          */
6686         tracing_off();
6687
6688         local_irq_save(flags);
6689
6690         /* Simulate the iterator */
6691         trace_init_global_iter(&iter);
6692
6693         for_each_tracing_cpu(cpu) {
6694                 atomic_inc(&per_cpu_ptr(iter.tr->trace_buffer.data, cpu)->disabled);
6695         }
6696
6697         old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
6698
6699         /* don't look at user memory in panic mode */
6700         trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
6701
6702         switch (oops_dump_mode) {
6703         case DUMP_ALL:
6704                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6705                 break;
6706         case DUMP_ORIG:
6707                 iter.cpu_file = raw_smp_processor_id();
6708                 break;
6709         case DUMP_NONE:
6710                 goto out_enable;
6711         default:
6712                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
6713                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6714         }
6715
6716         printk(KERN_TRACE "Dumping ftrace buffer:\n");
6717
6718         /* Did function tracer already get disabled? */
6719         if (ftrace_is_dead()) {
6720                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
6721                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
6722         }
6723
6724         /*
6725          * We need to stop all tracing on all CPUS to read the
6726          * the next buffer. This is a bit expensive, but is
6727          * not done often. We fill all what we can read,
6728          * and then release the locks again.
6729          */
6730
6731         while (!trace_empty(&iter)) {
6732
6733                 if (!cnt)
6734                         printk(KERN_TRACE "---------------------------------\n");
6735
6736                 cnt++;
6737
6738                 /* reset all but tr, trace, and overruns */
6739                 memset(&iter.seq, 0,
6740                        sizeof(struct trace_iterator) -
6741                        offsetof(struct trace_iterator, seq));
6742                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
6743                 iter.pos = -1;
6744
6745                 if (trace_find_next_entry_inc(&iter) != NULL) {
6746                         int ret;
6747
6748                         ret = print_trace_line(&iter);
6749                         if (ret != TRACE_TYPE_NO_CONSUME)
6750                                 trace_consume(&iter);
6751                 }
6752                 touch_nmi_watchdog();
6753
6754                 trace_printk_seq(&iter.seq);
6755         }
6756
6757         if (!cnt)
6758                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
6759         else
6760                 printk(KERN_TRACE "---------------------------------\n");
6761
6762  out_enable:
6763         trace_flags |= old_userobj;
6764
6765         for_each_tracing_cpu(cpu) {
6766                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
6767         }
6768         atomic_dec(&dump_running);
6769         local_irq_restore(flags);
6770 }
6771 EXPORT_SYMBOL_GPL(ftrace_dump);
6772
6773 __init static int tracer_alloc_buffers(void)
6774 {
6775         int ring_buf_size;
6776         int ret = -ENOMEM;
6777
6778
6779         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
6780                 goto out;
6781
6782         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
6783                 goto out_free_buffer_mask;
6784
6785         /* Only allocate trace_printk buffers if a trace_printk exists */
6786         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
6787                 /* Must be called before global_trace.buffer is allocated */
6788                 trace_printk_init_buffers();
6789
6790         /* To save memory, keep the ring buffer size to its minimum */
6791         if (ring_buffer_expanded)
6792                 ring_buf_size = trace_buf_size;
6793         else
6794                 ring_buf_size = 1;
6795
6796         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
6797         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
6798
6799         raw_spin_lock_init(&global_trace.start_lock);
6800
6801         /* Used for event triggers */
6802         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
6803         if (!temp_buffer)
6804                 goto out_free_cpumask;
6805
6806         if (trace_create_savedcmd() < 0)
6807                 goto out_free_temp_buffer;
6808
6809         /* TODO: make the number of buffers hot pluggable with CPUS */
6810         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
6811                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
6812                 WARN_ON(1);
6813                 goto out_free_savedcmd;
6814         }
6815
6816         if (global_trace.buffer_disabled)
6817                 tracing_off();
6818
6819         if (trace_boot_clock) {
6820                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
6821                 if (ret < 0)
6822                         pr_warning("Trace clock %s not defined, going back to default\n",
6823                                    trace_boot_clock);
6824         }
6825
6826         /*
6827          * register_tracer() might reference current_trace, so it
6828          * needs to be set before we register anything. This is
6829          * just a bootstrap of current_trace anyway.
6830          */
6831         global_trace.current_trace = &nop_trace;
6832
6833         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
6834
6835         ftrace_init_global_array_ops(&global_trace);
6836
6837         register_tracer(&nop_trace);
6838
6839         /* All seems OK, enable tracing */
6840         tracing_disabled = 0;
6841
6842         atomic_notifier_chain_register(&panic_notifier_list,
6843                                        &trace_panic_notifier);
6844
6845         register_die_notifier(&trace_die_notifier);
6846
6847         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
6848
6849         INIT_LIST_HEAD(&global_trace.systems);
6850         INIT_LIST_HEAD(&global_trace.events);
6851         list_add(&global_trace.list, &ftrace_trace_arrays);
6852
6853         while (trace_boot_options) {
6854                 char *option;
6855
6856                 option = strsep(&trace_boot_options, ",");
6857                 trace_set_options(&global_trace, option);
6858         }
6859
6860         register_snapshot_cmd();
6861
6862         return 0;
6863
6864 out_free_savedcmd:
6865         free_saved_cmdlines_buffer(savedcmd);
6866 out_free_temp_buffer:
6867         ring_buffer_free(temp_buffer);
6868 out_free_cpumask:
6869         free_cpumask_var(global_trace.tracing_cpumask);
6870 out_free_buffer_mask:
6871         free_cpumask_var(tracing_buffer_mask);
6872 out:
6873         return ret;
6874 }
6875
6876 __init static int clear_boot_tracer(void)
6877 {
6878         /*
6879          * The default tracer at boot buffer is an init section.
6880          * This function is called in lateinit. If we did not
6881          * find the boot tracer, then clear it out, to prevent
6882          * later registration from accessing the buffer that is
6883          * about to be freed.
6884          */
6885         if (!default_bootup_tracer)
6886                 return 0;
6887
6888         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
6889                default_bootup_tracer);
6890         default_bootup_tracer = NULL;
6891
6892         return 0;
6893 }
6894
6895 early_initcall(tracer_alloc_buffers);
6896 fs_initcall(tracer_init_debugfs);
6897 late_initcall(clear_boot_tracer);