Merge Linus' tree to be be to apply submitted patches to newer code than
[cascardo/linux.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/pagemap.h>
24 #include <linux/hardirq.h>
25 #include <linux/linkage.h>
26 #include <linux/uaccess.h>
27 #include <linux/kprobes.h>
28 #include <linux/ftrace.h>
29 #include <linux/module.h>
30 #include <linux/percpu.h>
31 #include <linux/splice.h>
32 #include <linux/kdebug.h>
33 #include <linux/string.h>
34 #include <linux/rwsem.h>
35 #include <linux/slab.h>
36 #include <linux/ctype.h>
37 #include <linux/init.h>
38 #include <linux/poll.h>
39 #include <linux/nmi.h>
40 #include <linux/fs.h>
41 #include <linux/sched/rt.h>
42
43 #include "trace.h"
44 #include "trace_output.h"
45
46 /*
47  * On boot up, the ring buffer is set to the minimum size, so that
48  * we do not waste memory on systems that are not using tracing.
49  */
50 bool ring_buffer_expanded;
51
52 /*
53  * We need to change this state when a selftest is running.
54  * A selftest will lurk into the ring-buffer to count the
55  * entries inserted during the selftest although some concurrent
56  * insertions into the ring-buffer such as trace_printk could occurred
57  * at the same time, giving false positive or negative results.
58  */
59 static bool __read_mostly tracing_selftest_running;
60
61 /*
62  * If a tracer is running, we do not want to run SELFTEST.
63  */
64 bool __read_mostly tracing_selftest_disabled;
65
66 /* For tracers that don't implement custom flags */
67 static struct tracer_opt dummy_tracer_opt[] = {
68         { }
69 };
70
71 static struct tracer_flags dummy_tracer_flags = {
72         .val = 0,
73         .opts = dummy_tracer_opt
74 };
75
76 static int
77 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
78 {
79         return 0;
80 }
81
82 /*
83  * To prevent the comm cache from being overwritten when no
84  * tracing is active, only save the comm when a trace event
85  * occurred.
86  */
87 static DEFINE_PER_CPU(bool, trace_cmdline_save);
88
89 /*
90  * Kill all tracing for good (never come back).
91  * It is initialized to 1 but will turn to zero if the initialization
92  * of the tracer is successful. But that is the only place that sets
93  * this back to zero.
94  */
95 static int tracing_disabled = 1;
96
97 DEFINE_PER_CPU(int, ftrace_cpu_disabled);
98
99 cpumask_var_t __read_mostly     tracing_buffer_mask;
100
101 /*
102  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
103  *
104  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
105  * is set, then ftrace_dump is called. This will output the contents
106  * of the ftrace buffers to the console.  This is very useful for
107  * capturing traces that lead to crashes and outputing it to a
108  * serial console.
109  *
110  * It is default off, but you can enable it with either specifying
111  * "ftrace_dump_on_oops" in the kernel command line, or setting
112  * /proc/sys/kernel/ftrace_dump_on_oops
113  * Set 1 if you want to dump buffers of all CPUs
114  * Set 2 if you want to dump the buffer of the CPU that triggered oops
115  */
116
117 enum ftrace_dump_mode ftrace_dump_on_oops;
118
119 /* When set, tracing will stop when a WARN*() is hit */
120 int __disable_trace_on_warning;
121
122 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
123
124 #define MAX_TRACER_SIZE         100
125 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
126 static char *default_bootup_tracer;
127
128 static bool allocate_snapshot;
129
130 static int __init set_cmdline_ftrace(char *str)
131 {
132         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
133         default_bootup_tracer = bootup_tracer_buf;
134         /* We are using ftrace early, expand it */
135         ring_buffer_expanded = true;
136         return 1;
137 }
138 __setup("ftrace=", set_cmdline_ftrace);
139
140 static int __init set_ftrace_dump_on_oops(char *str)
141 {
142         if (*str++ != '=' || !*str) {
143                 ftrace_dump_on_oops = DUMP_ALL;
144                 return 1;
145         }
146
147         if (!strcmp("orig_cpu", str)) {
148                 ftrace_dump_on_oops = DUMP_ORIG;
149                 return 1;
150         }
151
152         return 0;
153 }
154 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
155
156 static int __init stop_trace_on_warning(char *str)
157 {
158         __disable_trace_on_warning = 1;
159         return 1;
160 }
161 __setup("traceoff_on_warning=", stop_trace_on_warning);
162
163 static int __init boot_alloc_snapshot(char *str)
164 {
165         allocate_snapshot = true;
166         /* We also need the main ring buffer expanded */
167         ring_buffer_expanded = true;
168         return 1;
169 }
170 __setup("alloc_snapshot", boot_alloc_snapshot);
171
172
173 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
174 static char *trace_boot_options __initdata;
175
176 static int __init set_trace_boot_options(char *str)
177 {
178         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
179         trace_boot_options = trace_boot_options_buf;
180         return 0;
181 }
182 __setup("trace_options=", set_trace_boot_options);
183
184 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
185 static char *trace_boot_clock __initdata;
186
187 static int __init set_trace_boot_clock(char *str)
188 {
189         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
190         trace_boot_clock = trace_boot_clock_buf;
191         return 0;
192 }
193 __setup("trace_clock=", set_trace_boot_clock);
194
195
196 unsigned long long ns2usecs(cycle_t nsec)
197 {
198         nsec += 500;
199         do_div(nsec, 1000);
200         return nsec;
201 }
202
203 /*
204  * The global_trace is the descriptor that holds the tracing
205  * buffers for the live tracing. For each CPU, it contains
206  * a link list of pages that will store trace entries. The
207  * page descriptor of the pages in the memory is used to hold
208  * the link list by linking the lru item in the page descriptor
209  * to each of the pages in the buffer per CPU.
210  *
211  * For each active CPU there is a data field that holds the
212  * pages for the buffer for that CPU. Each CPU has the same number
213  * of pages allocated for its buffer.
214  */
215 static struct trace_array       global_trace;
216
217 LIST_HEAD(ftrace_trace_arrays);
218
219 int trace_array_get(struct trace_array *this_tr)
220 {
221         struct trace_array *tr;
222         int ret = -ENODEV;
223
224         mutex_lock(&trace_types_lock);
225         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
226                 if (tr == this_tr) {
227                         tr->ref++;
228                         ret = 0;
229                         break;
230                 }
231         }
232         mutex_unlock(&trace_types_lock);
233
234         return ret;
235 }
236
237 static void __trace_array_put(struct trace_array *this_tr)
238 {
239         WARN_ON(!this_tr->ref);
240         this_tr->ref--;
241 }
242
243 void trace_array_put(struct trace_array *this_tr)
244 {
245         mutex_lock(&trace_types_lock);
246         __trace_array_put(this_tr);
247         mutex_unlock(&trace_types_lock);
248 }
249
250 int filter_check_discard(struct ftrace_event_file *file, void *rec,
251                          struct ring_buffer *buffer,
252                          struct ring_buffer_event *event)
253 {
254         if (unlikely(file->flags & FTRACE_EVENT_FL_FILTERED) &&
255             !filter_match_preds(file->filter, rec)) {
256                 ring_buffer_discard_commit(buffer, event);
257                 return 1;
258         }
259
260         return 0;
261 }
262 EXPORT_SYMBOL_GPL(filter_check_discard);
263
264 int call_filter_check_discard(struct ftrace_event_call *call, void *rec,
265                               struct ring_buffer *buffer,
266                               struct ring_buffer_event *event)
267 {
268         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
269             !filter_match_preds(call->filter, rec)) {
270                 ring_buffer_discard_commit(buffer, event);
271                 return 1;
272         }
273
274         return 0;
275 }
276 EXPORT_SYMBOL_GPL(call_filter_check_discard);
277
278 static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
279 {
280         u64 ts;
281
282         /* Early boot up does not have a buffer yet */
283         if (!buf->buffer)
284                 return trace_clock_local();
285
286         ts = ring_buffer_time_stamp(buf->buffer, cpu);
287         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
288
289         return ts;
290 }
291
292 cycle_t ftrace_now(int cpu)
293 {
294         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
295 }
296
297 /**
298  * tracing_is_enabled - Show if global_trace has been disabled
299  *
300  * Shows if the global trace has been enabled or not. It uses the
301  * mirror flag "buffer_disabled" to be used in fast paths such as for
302  * the irqsoff tracer. But it may be inaccurate due to races. If you
303  * need to know the accurate state, use tracing_is_on() which is a little
304  * slower, but accurate.
305  */
306 int tracing_is_enabled(void)
307 {
308         /*
309          * For quick access (irqsoff uses this in fast path), just
310          * return the mirror variable of the state of the ring buffer.
311          * It's a little racy, but we don't really care.
312          */
313         smp_rmb();
314         return !global_trace.buffer_disabled;
315 }
316
317 /*
318  * trace_buf_size is the size in bytes that is allocated
319  * for a buffer. Note, the number of bytes is always rounded
320  * to page size.
321  *
322  * This number is purposely set to a low number of 16384.
323  * If the dump on oops happens, it will be much appreciated
324  * to not have to wait for all that output. Anyway this can be
325  * boot time and run time configurable.
326  */
327 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
328
329 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
330
331 /* trace_types holds a link list of available tracers. */
332 static struct tracer            *trace_types __read_mostly;
333
334 /*
335  * trace_types_lock is used to protect the trace_types list.
336  */
337 DEFINE_MUTEX(trace_types_lock);
338
339 /*
340  * serialize the access of the ring buffer
341  *
342  * ring buffer serializes readers, but it is low level protection.
343  * The validity of the events (which returns by ring_buffer_peek() ..etc)
344  * are not protected by ring buffer.
345  *
346  * The content of events may become garbage if we allow other process consumes
347  * these events concurrently:
348  *   A) the page of the consumed events may become a normal page
349  *      (not reader page) in ring buffer, and this page will be rewrited
350  *      by events producer.
351  *   B) The page of the consumed events may become a page for splice_read,
352  *      and this page will be returned to system.
353  *
354  * These primitives allow multi process access to different cpu ring buffer
355  * concurrently.
356  *
357  * These primitives don't distinguish read-only and read-consume access.
358  * Multi read-only access are also serialized.
359  */
360
361 #ifdef CONFIG_SMP
362 static DECLARE_RWSEM(all_cpu_access_lock);
363 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
364
365 static inline void trace_access_lock(int cpu)
366 {
367         if (cpu == RING_BUFFER_ALL_CPUS) {
368                 /* gain it for accessing the whole ring buffer. */
369                 down_write(&all_cpu_access_lock);
370         } else {
371                 /* gain it for accessing a cpu ring buffer. */
372
373                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
374                 down_read(&all_cpu_access_lock);
375
376                 /* Secondly block other access to this @cpu ring buffer. */
377                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
378         }
379 }
380
381 static inline void trace_access_unlock(int cpu)
382 {
383         if (cpu == RING_BUFFER_ALL_CPUS) {
384                 up_write(&all_cpu_access_lock);
385         } else {
386                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
387                 up_read(&all_cpu_access_lock);
388         }
389 }
390
391 static inline void trace_access_lock_init(void)
392 {
393         int cpu;
394
395         for_each_possible_cpu(cpu)
396                 mutex_init(&per_cpu(cpu_access_lock, cpu));
397 }
398
399 #else
400
401 static DEFINE_MUTEX(access_lock);
402
403 static inline void trace_access_lock(int cpu)
404 {
405         (void)cpu;
406         mutex_lock(&access_lock);
407 }
408
409 static inline void trace_access_unlock(int cpu)
410 {
411         (void)cpu;
412         mutex_unlock(&access_lock);
413 }
414
415 static inline void trace_access_lock_init(void)
416 {
417 }
418
419 #endif
420
421 /* trace_flags holds trace_options default values */
422 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
423         TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
424         TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |
425         TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | TRACE_ITER_FUNCTION;
426
427 static void tracer_tracing_on(struct trace_array *tr)
428 {
429         if (tr->trace_buffer.buffer)
430                 ring_buffer_record_on(tr->trace_buffer.buffer);
431         /*
432          * This flag is looked at when buffers haven't been allocated
433          * yet, or by some tracers (like irqsoff), that just want to
434          * know if the ring buffer has been disabled, but it can handle
435          * races of where it gets disabled but we still do a record.
436          * As the check is in the fast path of the tracers, it is more
437          * important to be fast than accurate.
438          */
439         tr->buffer_disabled = 0;
440         /* Make the flag seen by readers */
441         smp_wmb();
442 }
443
444 /**
445  * tracing_on - enable tracing buffers
446  *
447  * This function enables tracing buffers that may have been
448  * disabled with tracing_off.
449  */
450 void tracing_on(void)
451 {
452         tracer_tracing_on(&global_trace);
453 }
454 EXPORT_SYMBOL_GPL(tracing_on);
455
456 /**
457  * __trace_puts - write a constant string into the trace buffer.
458  * @ip:    The address of the caller
459  * @str:   The constant string to write
460  * @size:  The size of the string.
461  */
462 int __trace_puts(unsigned long ip, const char *str, int size)
463 {
464         struct ring_buffer_event *event;
465         struct ring_buffer *buffer;
466         struct print_entry *entry;
467         unsigned long irq_flags;
468         int alloc;
469         int pc;
470
471         if (!(trace_flags & TRACE_ITER_PRINTK))
472                 return 0;
473
474         pc = preempt_count();
475
476         if (unlikely(tracing_selftest_running || tracing_disabled))
477                 return 0;
478
479         alloc = sizeof(*entry) + size + 2; /* possible \n added */
480
481         local_save_flags(irq_flags);
482         buffer = global_trace.trace_buffer.buffer;
483         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
484                                           irq_flags, pc);
485         if (!event)
486                 return 0;
487
488         entry = ring_buffer_event_data(event);
489         entry->ip = ip;
490
491         memcpy(&entry->buf, str, size);
492
493         /* Add a newline if necessary */
494         if (entry->buf[size - 1] != '\n') {
495                 entry->buf[size] = '\n';
496                 entry->buf[size + 1] = '\0';
497         } else
498                 entry->buf[size] = '\0';
499
500         __buffer_unlock_commit(buffer, event);
501         ftrace_trace_stack(buffer, irq_flags, 4, pc);
502
503         return size;
504 }
505 EXPORT_SYMBOL_GPL(__trace_puts);
506
507 /**
508  * __trace_bputs - write the pointer to a constant string into trace buffer
509  * @ip:    The address of the caller
510  * @str:   The constant string to write to the buffer to
511  */
512 int __trace_bputs(unsigned long ip, const char *str)
513 {
514         struct ring_buffer_event *event;
515         struct ring_buffer *buffer;
516         struct bputs_entry *entry;
517         unsigned long irq_flags;
518         int size = sizeof(struct bputs_entry);
519         int pc;
520
521         if (!(trace_flags & TRACE_ITER_PRINTK))
522                 return 0;
523
524         pc = preempt_count();
525
526         if (unlikely(tracing_selftest_running || tracing_disabled))
527                 return 0;
528
529         local_save_flags(irq_flags);
530         buffer = global_trace.trace_buffer.buffer;
531         event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
532                                           irq_flags, pc);
533         if (!event)
534                 return 0;
535
536         entry = ring_buffer_event_data(event);
537         entry->ip                       = ip;
538         entry->str                      = str;
539
540         __buffer_unlock_commit(buffer, event);
541         ftrace_trace_stack(buffer, irq_flags, 4, pc);
542
543         return 1;
544 }
545 EXPORT_SYMBOL_GPL(__trace_bputs);
546
547 #ifdef CONFIG_TRACER_SNAPSHOT
548 /**
549  * trace_snapshot - take a snapshot of the current buffer.
550  *
551  * This causes a swap between the snapshot buffer and the current live
552  * tracing buffer. You can use this to take snapshots of the live
553  * trace when some condition is triggered, but continue to trace.
554  *
555  * Note, make sure to allocate the snapshot with either
556  * a tracing_snapshot_alloc(), or by doing it manually
557  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
558  *
559  * If the snapshot buffer is not allocated, it will stop tracing.
560  * Basically making a permanent snapshot.
561  */
562 void tracing_snapshot(void)
563 {
564         struct trace_array *tr = &global_trace;
565         struct tracer *tracer = tr->current_trace;
566         unsigned long flags;
567
568         if (in_nmi()) {
569                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
570                 internal_trace_puts("*** snapshot is being ignored        ***\n");
571                 return;
572         }
573
574         if (!tr->allocated_snapshot) {
575                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
576                 internal_trace_puts("*** stopping trace here!   ***\n");
577                 tracing_off();
578                 return;
579         }
580
581         /* Note, snapshot can not be used when the tracer uses it */
582         if (tracer->use_max_tr) {
583                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
584                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
585                 return;
586         }
587
588         local_irq_save(flags);
589         update_max_tr(tr, current, smp_processor_id());
590         local_irq_restore(flags);
591 }
592 EXPORT_SYMBOL_GPL(tracing_snapshot);
593
594 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
595                                         struct trace_buffer *size_buf, int cpu_id);
596 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
597
598 static int alloc_snapshot(struct trace_array *tr)
599 {
600         int ret;
601
602         if (!tr->allocated_snapshot) {
603
604                 /* allocate spare buffer */
605                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
606                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
607                 if (ret < 0)
608                         return ret;
609
610                 tr->allocated_snapshot = true;
611         }
612
613         return 0;
614 }
615
616 static void free_snapshot(struct trace_array *tr)
617 {
618         /*
619          * We don't free the ring buffer. instead, resize it because
620          * The max_tr ring buffer has some state (e.g. ring->clock) and
621          * we want preserve it.
622          */
623         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
624         set_buffer_entries(&tr->max_buffer, 1);
625         tracing_reset_online_cpus(&tr->max_buffer);
626         tr->allocated_snapshot = false;
627 }
628
629 /**
630  * tracing_alloc_snapshot - allocate snapshot buffer.
631  *
632  * This only allocates the snapshot buffer if it isn't already
633  * allocated - it doesn't also take a snapshot.
634  *
635  * This is meant to be used in cases where the snapshot buffer needs
636  * to be set up for events that can't sleep but need to be able to
637  * trigger a snapshot.
638  */
639 int tracing_alloc_snapshot(void)
640 {
641         struct trace_array *tr = &global_trace;
642         int ret;
643
644         ret = alloc_snapshot(tr);
645         WARN_ON(ret < 0);
646
647         return ret;
648 }
649 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
650
651 /**
652  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
653  *
654  * This is similar to trace_snapshot(), but it will allocate the
655  * snapshot buffer if it isn't already allocated. Use this only
656  * where it is safe to sleep, as the allocation may sleep.
657  *
658  * This causes a swap between the snapshot buffer and the current live
659  * tracing buffer. You can use this to take snapshots of the live
660  * trace when some condition is triggered, but continue to trace.
661  */
662 void tracing_snapshot_alloc(void)
663 {
664         int ret;
665
666         ret = tracing_alloc_snapshot();
667         if (ret < 0)
668                 return;
669
670         tracing_snapshot();
671 }
672 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
673 #else
674 void tracing_snapshot(void)
675 {
676         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
677 }
678 EXPORT_SYMBOL_GPL(tracing_snapshot);
679 int tracing_alloc_snapshot(void)
680 {
681         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
682         return -ENODEV;
683 }
684 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
685 void tracing_snapshot_alloc(void)
686 {
687         /* Give warning */
688         tracing_snapshot();
689 }
690 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
691 #endif /* CONFIG_TRACER_SNAPSHOT */
692
693 static void tracer_tracing_off(struct trace_array *tr)
694 {
695         if (tr->trace_buffer.buffer)
696                 ring_buffer_record_off(tr->trace_buffer.buffer);
697         /*
698          * This flag is looked at when buffers haven't been allocated
699          * yet, or by some tracers (like irqsoff), that just want to
700          * know if the ring buffer has been disabled, but it can handle
701          * races of where it gets disabled but we still do a record.
702          * As the check is in the fast path of the tracers, it is more
703          * important to be fast than accurate.
704          */
705         tr->buffer_disabled = 1;
706         /* Make the flag seen by readers */
707         smp_wmb();
708 }
709
710 /**
711  * tracing_off - turn off tracing buffers
712  *
713  * This function stops the tracing buffers from recording data.
714  * It does not disable any overhead the tracers themselves may
715  * be causing. This function simply causes all recording to
716  * the ring buffers to fail.
717  */
718 void tracing_off(void)
719 {
720         tracer_tracing_off(&global_trace);
721 }
722 EXPORT_SYMBOL_GPL(tracing_off);
723
724 void disable_trace_on_warning(void)
725 {
726         if (__disable_trace_on_warning)
727                 tracing_off();
728 }
729
730 /**
731  * tracer_tracing_is_on - show real state of ring buffer enabled
732  * @tr : the trace array to know if ring buffer is enabled
733  *
734  * Shows real state of the ring buffer if it is enabled or not.
735  */
736 static int tracer_tracing_is_on(struct trace_array *tr)
737 {
738         if (tr->trace_buffer.buffer)
739                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
740         return !tr->buffer_disabled;
741 }
742
743 /**
744  * tracing_is_on - show state of ring buffers enabled
745  */
746 int tracing_is_on(void)
747 {
748         return tracer_tracing_is_on(&global_trace);
749 }
750 EXPORT_SYMBOL_GPL(tracing_is_on);
751
752 static int __init set_buf_size(char *str)
753 {
754         unsigned long buf_size;
755
756         if (!str)
757                 return 0;
758         buf_size = memparse(str, &str);
759         /* nr_entries can not be zero */
760         if (buf_size == 0)
761                 return 0;
762         trace_buf_size = buf_size;
763         return 1;
764 }
765 __setup("trace_buf_size=", set_buf_size);
766
767 static int __init set_tracing_thresh(char *str)
768 {
769         unsigned long threshold;
770         int ret;
771
772         if (!str)
773                 return 0;
774         ret = kstrtoul(str, 0, &threshold);
775         if (ret < 0)
776                 return 0;
777         tracing_thresh = threshold * 1000;
778         return 1;
779 }
780 __setup("tracing_thresh=", set_tracing_thresh);
781
782 unsigned long nsecs_to_usecs(unsigned long nsecs)
783 {
784         return nsecs / 1000;
785 }
786
787 /* These must match the bit postions in trace_iterator_flags */
788 static const char *trace_options[] = {
789         "print-parent",
790         "sym-offset",
791         "sym-addr",
792         "verbose",
793         "raw",
794         "hex",
795         "bin",
796         "block",
797         "stacktrace",
798         "trace_printk",
799         "ftrace_preempt",
800         "branch",
801         "annotate",
802         "userstacktrace",
803         "sym-userobj",
804         "printk-msg-only",
805         "context-info",
806         "latency-format",
807         "sleep-time",
808         "graph-time",
809         "record-cmd",
810         "overwrite",
811         "disable_on_free",
812         "irq-info",
813         "markers",
814         "function-trace",
815         NULL
816 };
817
818 static struct {
819         u64 (*func)(void);
820         const char *name;
821         int in_ns;              /* is this clock in nanoseconds? */
822 } trace_clocks[] = {
823         { trace_clock_local,            "local",        1 },
824         { trace_clock_global,           "global",       1 },
825         { trace_clock_counter,          "counter",      0 },
826         { trace_clock_jiffies,          "uptime",       0 },
827         { trace_clock,                  "perf",         1 },
828         { ktime_get_mono_fast_ns,       "mono",         1 },
829         ARCH_TRACE_CLOCKS
830 };
831
832 /*
833  * trace_parser_get_init - gets the buffer for trace parser
834  */
835 int trace_parser_get_init(struct trace_parser *parser, int size)
836 {
837         memset(parser, 0, sizeof(*parser));
838
839         parser->buffer = kmalloc(size, GFP_KERNEL);
840         if (!parser->buffer)
841                 return 1;
842
843         parser->size = size;
844         return 0;
845 }
846
847 /*
848  * trace_parser_put - frees the buffer for trace parser
849  */
850 void trace_parser_put(struct trace_parser *parser)
851 {
852         kfree(parser->buffer);
853 }
854
855 /*
856  * trace_get_user - reads the user input string separated by  space
857  * (matched by isspace(ch))
858  *
859  * For each string found the 'struct trace_parser' is updated,
860  * and the function returns.
861  *
862  * Returns number of bytes read.
863  *
864  * See kernel/trace/trace.h for 'struct trace_parser' details.
865  */
866 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
867         size_t cnt, loff_t *ppos)
868 {
869         char ch;
870         size_t read = 0;
871         ssize_t ret;
872
873         if (!*ppos)
874                 trace_parser_clear(parser);
875
876         ret = get_user(ch, ubuf++);
877         if (ret)
878                 goto out;
879
880         read++;
881         cnt--;
882
883         /*
884          * The parser is not finished with the last write,
885          * continue reading the user input without skipping spaces.
886          */
887         if (!parser->cont) {
888                 /* skip white space */
889                 while (cnt && isspace(ch)) {
890                         ret = get_user(ch, ubuf++);
891                         if (ret)
892                                 goto out;
893                         read++;
894                         cnt--;
895                 }
896
897                 /* only spaces were written */
898                 if (isspace(ch)) {
899                         *ppos += read;
900                         ret = read;
901                         goto out;
902                 }
903
904                 parser->idx = 0;
905         }
906
907         /* read the non-space input */
908         while (cnt && !isspace(ch)) {
909                 if (parser->idx < parser->size - 1)
910                         parser->buffer[parser->idx++] = ch;
911                 else {
912                         ret = -EINVAL;
913                         goto out;
914                 }
915                 ret = get_user(ch, ubuf++);
916                 if (ret)
917                         goto out;
918                 read++;
919                 cnt--;
920         }
921
922         /* We either got finished input or we have to wait for another call. */
923         if (isspace(ch)) {
924                 parser->buffer[parser->idx] = 0;
925                 parser->cont = false;
926         } else if (parser->idx < parser->size - 1) {
927                 parser->cont = true;
928                 parser->buffer[parser->idx++] = ch;
929         } else {
930                 ret = -EINVAL;
931                 goto out;
932         }
933
934         *ppos += read;
935         ret = read;
936
937 out:
938         return ret;
939 }
940
941 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
942 {
943         int len;
944
945         if (s->len <= s->readpos)
946                 return -EBUSY;
947
948         len = s->len - s->readpos;
949         if (cnt > len)
950                 cnt = len;
951         memcpy(buf, s->buffer + s->readpos, cnt);
952
953         s->readpos += cnt;
954         return cnt;
955 }
956
957 unsigned long __read_mostly     tracing_thresh;
958
959 #ifdef CONFIG_TRACER_MAX_TRACE
960 /*
961  * Copy the new maximum trace into the separate maximum-trace
962  * structure. (this way the maximum trace is permanently saved,
963  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
964  */
965 static void
966 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
967 {
968         struct trace_buffer *trace_buf = &tr->trace_buffer;
969         struct trace_buffer *max_buf = &tr->max_buffer;
970         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
971         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
972
973         max_buf->cpu = cpu;
974         max_buf->time_start = data->preempt_timestamp;
975
976         max_data->saved_latency = tr->max_latency;
977         max_data->critical_start = data->critical_start;
978         max_data->critical_end = data->critical_end;
979
980         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
981         max_data->pid = tsk->pid;
982         /*
983          * If tsk == current, then use current_uid(), as that does not use
984          * RCU. The irq tracer can be called out of RCU scope.
985          */
986         if (tsk == current)
987                 max_data->uid = current_uid();
988         else
989                 max_data->uid = task_uid(tsk);
990
991         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
992         max_data->policy = tsk->policy;
993         max_data->rt_priority = tsk->rt_priority;
994
995         /* record this tasks comm */
996         tracing_record_cmdline(tsk);
997 }
998
999 /**
1000  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1001  * @tr: tracer
1002  * @tsk: the task with the latency
1003  * @cpu: The cpu that initiated the trace.
1004  *
1005  * Flip the buffers between the @tr and the max_tr and record information
1006  * about which task was the cause of this latency.
1007  */
1008 void
1009 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1010 {
1011         struct ring_buffer *buf;
1012
1013         if (tr->stop_count)
1014                 return;
1015
1016         WARN_ON_ONCE(!irqs_disabled());
1017
1018         if (!tr->allocated_snapshot) {
1019                 /* Only the nop tracer should hit this when disabling */
1020                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1021                 return;
1022         }
1023
1024         arch_spin_lock(&tr->max_lock);
1025
1026         buf = tr->trace_buffer.buffer;
1027         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1028         tr->max_buffer.buffer = buf;
1029
1030         __update_max_tr(tr, tsk, cpu);
1031         arch_spin_unlock(&tr->max_lock);
1032 }
1033
1034 /**
1035  * update_max_tr_single - only copy one trace over, and reset the rest
1036  * @tr - tracer
1037  * @tsk - task with the latency
1038  * @cpu - the cpu of the buffer to copy.
1039  *
1040  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1041  */
1042 void
1043 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1044 {
1045         int ret;
1046
1047         if (tr->stop_count)
1048                 return;
1049
1050         WARN_ON_ONCE(!irqs_disabled());
1051         if (!tr->allocated_snapshot) {
1052                 /* Only the nop tracer should hit this when disabling */
1053                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1054                 return;
1055         }
1056
1057         arch_spin_lock(&tr->max_lock);
1058
1059         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1060
1061         if (ret == -EBUSY) {
1062                 /*
1063                  * We failed to swap the buffer due to a commit taking
1064                  * place on this CPU. We fail to record, but we reset
1065                  * the max trace buffer (no one writes directly to it)
1066                  * and flag that it failed.
1067                  */
1068                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1069                         "Failed to swap buffers due to commit in progress\n");
1070         }
1071
1072         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1073
1074         __update_max_tr(tr, tsk, cpu);
1075         arch_spin_unlock(&tr->max_lock);
1076 }
1077 #endif /* CONFIG_TRACER_MAX_TRACE */
1078
1079 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1080 {
1081         /* Iterators are static, they should be filled or empty */
1082         if (trace_buffer_iter(iter, iter->cpu_file))
1083                 return 0;
1084
1085         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1086                                 full);
1087 }
1088
1089 #ifdef CONFIG_FTRACE_STARTUP_TEST
1090 static int run_tracer_selftest(struct tracer *type)
1091 {
1092         struct trace_array *tr = &global_trace;
1093         struct tracer *saved_tracer = tr->current_trace;
1094         int ret;
1095
1096         if (!type->selftest || tracing_selftest_disabled)
1097                 return 0;
1098
1099         /*
1100          * Run a selftest on this tracer.
1101          * Here we reset the trace buffer, and set the current
1102          * tracer to be this tracer. The tracer can then run some
1103          * internal tracing to verify that everything is in order.
1104          * If we fail, we do not register this tracer.
1105          */
1106         tracing_reset_online_cpus(&tr->trace_buffer);
1107
1108         tr->current_trace = type;
1109
1110 #ifdef CONFIG_TRACER_MAX_TRACE
1111         if (type->use_max_tr) {
1112                 /* If we expanded the buffers, make sure the max is expanded too */
1113                 if (ring_buffer_expanded)
1114                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1115                                            RING_BUFFER_ALL_CPUS);
1116                 tr->allocated_snapshot = true;
1117         }
1118 #endif
1119
1120         /* the test is responsible for initializing and enabling */
1121         pr_info("Testing tracer %s: ", type->name);
1122         ret = type->selftest(type, tr);
1123         /* the test is responsible for resetting too */
1124         tr->current_trace = saved_tracer;
1125         if (ret) {
1126                 printk(KERN_CONT "FAILED!\n");
1127                 /* Add the warning after printing 'FAILED' */
1128                 WARN_ON(1);
1129                 return -1;
1130         }
1131         /* Only reset on passing, to avoid touching corrupted buffers */
1132         tracing_reset_online_cpus(&tr->trace_buffer);
1133
1134 #ifdef CONFIG_TRACER_MAX_TRACE
1135         if (type->use_max_tr) {
1136                 tr->allocated_snapshot = false;
1137
1138                 /* Shrink the max buffer again */
1139                 if (ring_buffer_expanded)
1140                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1141                                            RING_BUFFER_ALL_CPUS);
1142         }
1143 #endif
1144
1145         printk(KERN_CONT "PASSED\n");
1146         return 0;
1147 }
1148 #else
1149 static inline int run_tracer_selftest(struct tracer *type)
1150 {
1151         return 0;
1152 }
1153 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1154
1155 /**
1156  * register_tracer - register a tracer with the ftrace system.
1157  * @type - the plugin for the tracer
1158  *
1159  * Register a new plugin tracer.
1160  */
1161 int register_tracer(struct tracer *type)
1162 {
1163         struct tracer *t;
1164         int ret = 0;
1165
1166         if (!type->name) {
1167                 pr_info("Tracer must have a name\n");
1168                 return -1;
1169         }
1170
1171         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1172                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1173                 return -1;
1174         }
1175
1176         mutex_lock(&trace_types_lock);
1177
1178         tracing_selftest_running = true;
1179
1180         for (t = trace_types; t; t = t->next) {
1181                 if (strcmp(type->name, t->name) == 0) {
1182                         /* already found */
1183                         pr_info("Tracer %s already registered\n",
1184                                 type->name);
1185                         ret = -1;
1186                         goto out;
1187                 }
1188         }
1189
1190         if (!type->set_flag)
1191                 type->set_flag = &dummy_set_flag;
1192         if (!type->flags)
1193                 type->flags = &dummy_tracer_flags;
1194         else
1195                 if (!type->flags->opts)
1196                         type->flags->opts = dummy_tracer_opt;
1197
1198         ret = run_tracer_selftest(type);
1199         if (ret < 0)
1200                 goto out;
1201
1202         type->next = trace_types;
1203         trace_types = type;
1204
1205  out:
1206         tracing_selftest_running = false;
1207         mutex_unlock(&trace_types_lock);
1208
1209         if (ret || !default_bootup_tracer)
1210                 goto out_unlock;
1211
1212         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1213                 goto out_unlock;
1214
1215         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1216         /* Do we want this tracer to start on bootup? */
1217         tracing_set_tracer(&global_trace, type->name);
1218         default_bootup_tracer = NULL;
1219         /* disable other selftests, since this will break it. */
1220         tracing_selftest_disabled = true;
1221 #ifdef CONFIG_FTRACE_STARTUP_TEST
1222         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1223                type->name);
1224 #endif
1225
1226  out_unlock:
1227         return ret;
1228 }
1229
1230 void tracing_reset(struct trace_buffer *buf, int cpu)
1231 {
1232         struct ring_buffer *buffer = buf->buffer;
1233
1234         if (!buffer)
1235                 return;
1236
1237         ring_buffer_record_disable(buffer);
1238
1239         /* Make sure all commits have finished */
1240         synchronize_sched();
1241         ring_buffer_reset_cpu(buffer, cpu);
1242
1243         ring_buffer_record_enable(buffer);
1244 }
1245
1246 void tracing_reset_online_cpus(struct trace_buffer *buf)
1247 {
1248         struct ring_buffer *buffer = buf->buffer;
1249         int cpu;
1250
1251         if (!buffer)
1252                 return;
1253
1254         ring_buffer_record_disable(buffer);
1255
1256         /* Make sure all commits have finished */
1257         synchronize_sched();
1258
1259         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1260
1261         for_each_online_cpu(cpu)
1262                 ring_buffer_reset_cpu(buffer, cpu);
1263
1264         ring_buffer_record_enable(buffer);
1265 }
1266
1267 /* Must have trace_types_lock held */
1268 void tracing_reset_all_online_cpus(void)
1269 {
1270         struct trace_array *tr;
1271
1272         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1273                 tracing_reset_online_cpus(&tr->trace_buffer);
1274 #ifdef CONFIG_TRACER_MAX_TRACE
1275                 tracing_reset_online_cpus(&tr->max_buffer);
1276 #endif
1277         }
1278 }
1279
1280 #define SAVED_CMDLINES_DEFAULT 128
1281 #define NO_CMDLINE_MAP UINT_MAX
1282 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1283 struct saved_cmdlines_buffer {
1284         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1285         unsigned *map_cmdline_to_pid;
1286         unsigned cmdline_num;
1287         int cmdline_idx;
1288         char *saved_cmdlines;
1289 };
1290 static struct saved_cmdlines_buffer *savedcmd;
1291
1292 /* temporary disable recording */
1293 static atomic_t trace_record_cmdline_disabled __read_mostly;
1294
1295 static inline char *get_saved_cmdlines(int idx)
1296 {
1297         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1298 }
1299
1300 static inline void set_cmdline(int idx, const char *cmdline)
1301 {
1302         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1303 }
1304
1305 static int allocate_cmdlines_buffer(unsigned int val,
1306                                     struct saved_cmdlines_buffer *s)
1307 {
1308         s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1309                                         GFP_KERNEL);
1310         if (!s->map_cmdline_to_pid)
1311                 return -ENOMEM;
1312
1313         s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1314         if (!s->saved_cmdlines) {
1315                 kfree(s->map_cmdline_to_pid);
1316                 return -ENOMEM;
1317         }
1318
1319         s->cmdline_idx = 0;
1320         s->cmdline_num = val;
1321         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1322                sizeof(s->map_pid_to_cmdline));
1323         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1324                val * sizeof(*s->map_cmdline_to_pid));
1325
1326         return 0;
1327 }
1328
1329 static int trace_create_savedcmd(void)
1330 {
1331         int ret;
1332
1333         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1334         if (!savedcmd)
1335                 return -ENOMEM;
1336
1337         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1338         if (ret < 0) {
1339                 kfree(savedcmd);
1340                 savedcmd = NULL;
1341                 return -ENOMEM;
1342         }
1343
1344         return 0;
1345 }
1346
1347 int is_tracing_stopped(void)
1348 {
1349         return global_trace.stop_count;
1350 }
1351
1352 /**
1353  * tracing_start - quick start of the tracer
1354  *
1355  * If tracing is enabled but was stopped by tracing_stop,
1356  * this will start the tracer back up.
1357  */
1358 void tracing_start(void)
1359 {
1360         struct ring_buffer *buffer;
1361         unsigned long flags;
1362
1363         if (tracing_disabled)
1364                 return;
1365
1366         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1367         if (--global_trace.stop_count) {
1368                 if (global_trace.stop_count < 0) {
1369                         /* Someone screwed up their debugging */
1370                         WARN_ON_ONCE(1);
1371                         global_trace.stop_count = 0;
1372                 }
1373                 goto out;
1374         }
1375
1376         /* Prevent the buffers from switching */
1377         arch_spin_lock(&global_trace.max_lock);
1378
1379         buffer = global_trace.trace_buffer.buffer;
1380         if (buffer)
1381                 ring_buffer_record_enable(buffer);
1382
1383 #ifdef CONFIG_TRACER_MAX_TRACE
1384         buffer = global_trace.max_buffer.buffer;
1385         if (buffer)
1386                 ring_buffer_record_enable(buffer);
1387 #endif
1388
1389         arch_spin_unlock(&global_trace.max_lock);
1390
1391  out:
1392         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1393 }
1394
1395 static void tracing_start_tr(struct trace_array *tr)
1396 {
1397         struct ring_buffer *buffer;
1398         unsigned long flags;
1399
1400         if (tracing_disabled)
1401                 return;
1402
1403         /* If global, we need to also start the max tracer */
1404         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1405                 return tracing_start();
1406
1407         raw_spin_lock_irqsave(&tr->start_lock, flags);
1408
1409         if (--tr->stop_count) {
1410                 if (tr->stop_count < 0) {
1411                         /* Someone screwed up their debugging */
1412                         WARN_ON_ONCE(1);
1413                         tr->stop_count = 0;
1414                 }
1415                 goto out;
1416         }
1417
1418         buffer = tr->trace_buffer.buffer;
1419         if (buffer)
1420                 ring_buffer_record_enable(buffer);
1421
1422  out:
1423         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1424 }
1425
1426 /**
1427  * tracing_stop - quick stop of the tracer
1428  *
1429  * Light weight way to stop tracing. Use in conjunction with
1430  * tracing_start.
1431  */
1432 void tracing_stop(void)
1433 {
1434         struct ring_buffer *buffer;
1435         unsigned long flags;
1436
1437         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1438         if (global_trace.stop_count++)
1439                 goto out;
1440
1441         /* Prevent the buffers from switching */
1442         arch_spin_lock(&global_trace.max_lock);
1443
1444         buffer = global_trace.trace_buffer.buffer;
1445         if (buffer)
1446                 ring_buffer_record_disable(buffer);
1447
1448 #ifdef CONFIG_TRACER_MAX_TRACE
1449         buffer = global_trace.max_buffer.buffer;
1450         if (buffer)
1451                 ring_buffer_record_disable(buffer);
1452 #endif
1453
1454         arch_spin_unlock(&global_trace.max_lock);
1455
1456  out:
1457         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1458 }
1459
1460 static void tracing_stop_tr(struct trace_array *tr)
1461 {
1462         struct ring_buffer *buffer;
1463         unsigned long flags;
1464
1465         /* If global, we need to also stop the max tracer */
1466         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1467                 return tracing_stop();
1468
1469         raw_spin_lock_irqsave(&tr->start_lock, flags);
1470         if (tr->stop_count++)
1471                 goto out;
1472
1473         buffer = tr->trace_buffer.buffer;
1474         if (buffer)
1475                 ring_buffer_record_disable(buffer);
1476
1477  out:
1478         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1479 }
1480
1481 void trace_stop_cmdline_recording(void);
1482
1483 static int trace_save_cmdline(struct task_struct *tsk)
1484 {
1485         unsigned pid, idx;
1486
1487         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1488                 return 0;
1489
1490         /*
1491          * It's not the end of the world if we don't get
1492          * the lock, but we also don't want to spin
1493          * nor do we want to disable interrupts,
1494          * so if we miss here, then better luck next time.
1495          */
1496         if (!arch_spin_trylock(&trace_cmdline_lock))
1497                 return 0;
1498
1499         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1500         if (idx == NO_CMDLINE_MAP) {
1501                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1502
1503                 /*
1504                  * Check whether the cmdline buffer at idx has a pid
1505                  * mapped. We are going to overwrite that entry so we
1506                  * need to clear the map_pid_to_cmdline. Otherwise we
1507                  * would read the new comm for the old pid.
1508                  */
1509                 pid = savedcmd->map_cmdline_to_pid[idx];
1510                 if (pid != NO_CMDLINE_MAP)
1511                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1512
1513                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1514                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1515
1516                 savedcmd->cmdline_idx = idx;
1517         }
1518
1519         set_cmdline(idx, tsk->comm);
1520
1521         arch_spin_unlock(&trace_cmdline_lock);
1522
1523         return 1;
1524 }
1525
1526 static void __trace_find_cmdline(int pid, char comm[])
1527 {
1528         unsigned map;
1529
1530         if (!pid) {
1531                 strcpy(comm, "<idle>");
1532                 return;
1533         }
1534
1535         if (WARN_ON_ONCE(pid < 0)) {
1536                 strcpy(comm, "<XXX>");
1537                 return;
1538         }
1539
1540         if (pid > PID_MAX_DEFAULT) {
1541                 strcpy(comm, "<...>");
1542                 return;
1543         }
1544
1545         map = savedcmd->map_pid_to_cmdline[pid];
1546         if (map != NO_CMDLINE_MAP)
1547                 strcpy(comm, get_saved_cmdlines(map));
1548         else
1549                 strcpy(comm, "<...>");
1550 }
1551
1552 void trace_find_cmdline(int pid, char comm[])
1553 {
1554         preempt_disable();
1555         arch_spin_lock(&trace_cmdline_lock);
1556
1557         __trace_find_cmdline(pid, comm);
1558
1559         arch_spin_unlock(&trace_cmdline_lock);
1560         preempt_enable();
1561 }
1562
1563 void tracing_record_cmdline(struct task_struct *tsk)
1564 {
1565         if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1566                 return;
1567
1568         if (!__this_cpu_read(trace_cmdline_save))
1569                 return;
1570
1571         if (trace_save_cmdline(tsk))
1572                 __this_cpu_write(trace_cmdline_save, false);
1573 }
1574
1575 void
1576 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1577                              int pc)
1578 {
1579         struct task_struct *tsk = current;
1580
1581         entry->preempt_count            = pc & 0xff;
1582         entry->pid                      = (tsk) ? tsk->pid : 0;
1583         entry->flags =
1584 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1585                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1586 #else
1587                 TRACE_FLAG_IRQS_NOSUPPORT |
1588 #endif
1589                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1590                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1591                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1592                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1593 }
1594 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1595
1596 struct ring_buffer_event *
1597 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1598                           int type,
1599                           unsigned long len,
1600                           unsigned long flags, int pc)
1601 {
1602         struct ring_buffer_event *event;
1603
1604         event = ring_buffer_lock_reserve(buffer, len);
1605         if (event != NULL) {
1606                 struct trace_entry *ent = ring_buffer_event_data(event);
1607
1608                 tracing_generic_entry_update(ent, flags, pc);
1609                 ent->type = type;
1610         }
1611
1612         return event;
1613 }
1614
1615 void
1616 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
1617 {
1618         __this_cpu_write(trace_cmdline_save, true);
1619         ring_buffer_unlock_commit(buffer, event);
1620 }
1621
1622 static inline void
1623 __trace_buffer_unlock_commit(struct ring_buffer *buffer,
1624                              struct ring_buffer_event *event,
1625                              unsigned long flags, int pc)
1626 {
1627         __buffer_unlock_commit(buffer, event);
1628
1629         ftrace_trace_stack(buffer, flags, 6, pc);
1630         ftrace_trace_userstack(buffer, flags, pc);
1631 }
1632
1633 void trace_buffer_unlock_commit(struct ring_buffer *buffer,
1634                                 struct ring_buffer_event *event,
1635                                 unsigned long flags, int pc)
1636 {
1637         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1638 }
1639 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
1640
1641 static struct ring_buffer *temp_buffer;
1642
1643 struct ring_buffer_event *
1644 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
1645                           struct ftrace_event_file *ftrace_file,
1646                           int type, unsigned long len,
1647                           unsigned long flags, int pc)
1648 {
1649         struct ring_buffer_event *entry;
1650
1651         *current_rb = ftrace_file->tr->trace_buffer.buffer;
1652         entry = trace_buffer_lock_reserve(*current_rb,
1653                                          type, len, flags, pc);
1654         /*
1655          * If tracing is off, but we have triggers enabled
1656          * we still need to look at the event data. Use the temp_buffer
1657          * to store the trace event for the tigger to use. It's recusive
1658          * safe and will not be recorded anywhere.
1659          */
1660         if (!entry && ftrace_file->flags & FTRACE_EVENT_FL_TRIGGER_COND) {
1661                 *current_rb = temp_buffer;
1662                 entry = trace_buffer_lock_reserve(*current_rb,
1663                                                   type, len, flags, pc);
1664         }
1665         return entry;
1666 }
1667 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
1668
1669 struct ring_buffer_event *
1670 trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
1671                                   int type, unsigned long len,
1672                                   unsigned long flags, int pc)
1673 {
1674         *current_rb = global_trace.trace_buffer.buffer;
1675         return trace_buffer_lock_reserve(*current_rb,
1676                                          type, len, flags, pc);
1677 }
1678 EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
1679
1680 void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
1681                                         struct ring_buffer_event *event,
1682                                         unsigned long flags, int pc)
1683 {
1684         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1685 }
1686 EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
1687
1688 void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer,
1689                                      struct ring_buffer_event *event,
1690                                      unsigned long flags, int pc,
1691                                      struct pt_regs *regs)
1692 {
1693         __buffer_unlock_commit(buffer, event);
1694
1695         ftrace_trace_stack_regs(buffer, flags, 0, pc, regs);
1696         ftrace_trace_userstack(buffer, flags, pc);
1697 }
1698 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
1699
1700 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
1701                                          struct ring_buffer_event *event)
1702 {
1703         ring_buffer_discard_commit(buffer, event);
1704 }
1705 EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
1706
1707 void
1708 trace_function(struct trace_array *tr,
1709                unsigned long ip, unsigned long parent_ip, unsigned long flags,
1710                int pc)
1711 {
1712         struct ftrace_event_call *call = &event_function;
1713         struct ring_buffer *buffer = tr->trace_buffer.buffer;
1714         struct ring_buffer_event *event;
1715         struct ftrace_entry *entry;
1716
1717         /* If we are reading the ring buffer, don't trace */
1718         if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
1719                 return;
1720
1721         event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
1722                                           flags, pc);
1723         if (!event)
1724                 return;
1725         entry   = ring_buffer_event_data(event);
1726         entry->ip                       = ip;
1727         entry->parent_ip                = parent_ip;
1728
1729         if (!call_filter_check_discard(call, entry, buffer, event))
1730                 __buffer_unlock_commit(buffer, event);
1731 }
1732
1733 #ifdef CONFIG_STACKTRACE
1734
1735 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
1736 struct ftrace_stack {
1737         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
1738 };
1739
1740 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
1741 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
1742
1743 static void __ftrace_trace_stack(struct ring_buffer *buffer,
1744                                  unsigned long flags,
1745                                  int skip, int pc, struct pt_regs *regs)
1746 {
1747         struct ftrace_event_call *call = &event_kernel_stack;
1748         struct ring_buffer_event *event;
1749         struct stack_entry *entry;
1750         struct stack_trace trace;
1751         int use_stack;
1752         int size = FTRACE_STACK_ENTRIES;
1753
1754         trace.nr_entries        = 0;
1755         trace.skip              = skip;
1756
1757         /*
1758          * Since events can happen in NMIs there's no safe way to
1759          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
1760          * or NMI comes in, it will just have to use the default
1761          * FTRACE_STACK_SIZE.
1762          */
1763         preempt_disable_notrace();
1764
1765         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
1766         /*
1767          * We don't need any atomic variables, just a barrier.
1768          * If an interrupt comes in, we don't care, because it would
1769          * have exited and put the counter back to what we want.
1770          * We just need a barrier to keep gcc from moving things
1771          * around.
1772          */
1773         barrier();
1774         if (use_stack == 1) {
1775                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
1776                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
1777
1778                 if (regs)
1779                         save_stack_trace_regs(regs, &trace);
1780                 else
1781                         save_stack_trace(&trace);
1782
1783                 if (trace.nr_entries > size)
1784                         size = trace.nr_entries;
1785         } else
1786                 /* From now on, use_stack is a boolean */
1787                 use_stack = 0;
1788
1789         size *= sizeof(unsigned long);
1790
1791         event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1792                                           sizeof(*entry) + size, flags, pc);
1793         if (!event)
1794                 goto out;
1795         entry = ring_buffer_event_data(event);
1796
1797         memset(&entry->caller, 0, size);
1798
1799         if (use_stack)
1800                 memcpy(&entry->caller, trace.entries,
1801                        trace.nr_entries * sizeof(unsigned long));
1802         else {
1803                 trace.max_entries       = FTRACE_STACK_ENTRIES;
1804                 trace.entries           = entry->caller;
1805                 if (regs)
1806                         save_stack_trace_regs(regs, &trace);
1807                 else
1808                         save_stack_trace(&trace);
1809         }
1810
1811         entry->size = trace.nr_entries;
1812
1813         if (!call_filter_check_discard(call, entry, buffer, event))
1814                 __buffer_unlock_commit(buffer, event);
1815
1816  out:
1817         /* Again, don't let gcc optimize things here */
1818         barrier();
1819         __this_cpu_dec(ftrace_stack_reserve);
1820         preempt_enable_notrace();
1821
1822 }
1823
1824 void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags,
1825                              int skip, int pc, struct pt_regs *regs)
1826 {
1827         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1828                 return;
1829
1830         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
1831 }
1832
1833 void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
1834                         int skip, int pc)
1835 {
1836         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1837                 return;
1838
1839         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
1840 }
1841
1842 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1843                    int pc)
1844 {
1845         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
1846 }
1847
1848 /**
1849  * trace_dump_stack - record a stack back trace in the trace buffer
1850  * @skip: Number of functions to skip (helper handlers)
1851  */
1852 void trace_dump_stack(int skip)
1853 {
1854         unsigned long flags;
1855
1856         if (tracing_disabled || tracing_selftest_running)
1857                 return;
1858
1859         local_save_flags(flags);
1860
1861         /*
1862          * Skip 3 more, seems to get us at the caller of
1863          * this function.
1864          */
1865         skip += 3;
1866         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
1867                              flags, skip, preempt_count(), NULL);
1868 }
1869
1870 static DEFINE_PER_CPU(int, user_stack_count);
1871
1872 void
1873 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1874 {
1875         struct ftrace_event_call *call = &event_user_stack;
1876         struct ring_buffer_event *event;
1877         struct userstack_entry *entry;
1878         struct stack_trace trace;
1879
1880         if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1881                 return;
1882
1883         /*
1884          * NMIs can not handle page faults, even with fix ups.
1885          * The save user stack can (and often does) fault.
1886          */
1887         if (unlikely(in_nmi()))
1888                 return;
1889
1890         /*
1891          * prevent recursion, since the user stack tracing may
1892          * trigger other kernel events.
1893          */
1894         preempt_disable();
1895         if (__this_cpu_read(user_stack_count))
1896                 goto out;
1897
1898         __this_cpu_inc(user_stack_count);
1899
1900         event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1901                                           sizeof(*entry), flags, pc);
1902         if (!event)
1903                 goto out_drop_count;
1904         entry   = ring_buffer_event_data(event);
1905
1906         entry->tgid             = current->tgid;
1907         memset(&entry->caller, 0, sizeof(entry->caller));
1908
1909         trace.nr_entries        = 0;
1910         trace.max_entries       = FTRACE_STACK_ENTRIES;
1911         trace.skip              = 0;
1912         trace.entries           = entry->caller;
1913
1914         save_stack_trace_user(&trace);
1915         if (!call_filter_check_discard(call, entry, buffer, event))
1916                 __buffer_unlock_commit(buffer, event);
1917
1918  out_drop_count:
1919         __this_cpu_dec(user_stack_count);
1920  out:
1921         preempt_enable();
1922 }
1923
1924 #ifdef UNUSED
1925 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1926 {
1927         ftrace_trace_userstack(tr, flags, preempt_count());
1928 }
1929 #endif /* UNUSED */
1930
1931 #endif /* CONFIG_STACKTRACE */
1932
1933 /* created for use with alloc_percpu */
1934 struct trace_buffer_struct {
1935         char buffer[TRACE_BUF_SIZE];
1936 };
1937
1938 static struct trace_buffer_struct *trace_percpu_buffer;
1939 static struct trace_buffer_struct *trace_percpu_sirq_buffer;
1940 static struct trace_buffer_struct *trace_percpu_irq_buffer;
1941 static struct trace_buffer_struct *trace_percpu_nmi_buffer;
1942
1943 /*
1944  * The buffer used is dependent on the context. There is a per cpu
1945  * buffer for normal context, softirq contex, hard irq context and
1946  * for NMI context. Thise allows for lockless recording.
1947  *
1948  * Note, if the buffers failed to be allocated, then this returns NULL
1949  */
1950 static char *get_trace_buf(void)
1951 {
1952         struct trace_buffer_struct *percpu_buffer;
1953
1954         /*
1955          * If we have allocated per cpu buffers, then we do not
1956          * need to do any locking.
1957          */
1958         if (in_nmi())
1959                 percpu_buffer = trace_percpu_nmi_buffer;
1960         else if (in_irq())
1961                 percpu_buffer = trace_percpu_irq_buffer;
1962         else if (in_softirq())
1963                 percpu_buffer = trace_percpu_sirq_buffer;
1964         else
1965                 percpu_buffer = trace_percpu_buffer;
1966
1967         if (!percpu_buffer)
1968                 return NULL;
1969
1970         return this_cpu_ptr(&percpu_buffer->buffer[0]);
1971 }
1972
1973 static int alloc_percpu_trace_buffer(void)
1974 {
1975         struct trace_buffer_struct *buffers;
1976         struct trace_buffer_struct *sirq_buffers;
1977         struct trace_buffer_struct *irq_buffers;
1978         struct trace_buffer_struct *nmi_buffers;
1979
1980         buffers = alloc_percpu(struct trace_buffer_struct);
1981         if (!buffers)
1982                 goto err_warn;
1983
1984         sirq_buffers = alloc_percpu(struct trace_buffer_struct);
1985         if (!sirq_buffers)
1986                 goto err_sirq;
1987
1988         irq_buffers = alloc_percpu(struct trace_buffer_struct);
1989         if (!irq_buffers)
1990                 goto err_irq;
1991
1992         nmi_buffers = alloc_percpu(struct trace_buffer_struct);
1993         if (!nmi_buffers)
1994                 goto err_nmi;
1995
1996         trace_percpu_buffer = buffers;
1997         trace_percpu_sirq_buffer = sirq_buffers;
1998         trace_percpu_irq_buffer = irq_buffers;
1999         trace_percpu_nmi_buffer = nmi_buffers;
2000
2001         return 0;
2002
2003  err_nmi:
2004         free_percpu(irq_buffers);
2005  err_irq:
2006         free_percpu(sirq_buffers);
2007  err_sirq:
2008         free_percpu(buffers);
2009  err_warn:
2010         WARN(1, "Could not allocate percpu trace_printk buffer");
2011         return -ENOMEM;
2012 }
2013
2014 static int buffers_allocated;
2015
2016 void trace_printk_init_buffers(void)
2017 {
2018         if (buffers_allocated)
2019                 return;
2020
2021         if (alloc_percpu_trace_buffer())
2022                 return;
2023
2024         /* trace_printk() is for debug use only. Don't use it in production. */
2025
2026         pr_warning("\n**********************************************************\n");
2027         pr_warning("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2028         pr_warning("**                                                      **\n");
2029         pr_warning("** trace_printk() being used. Allocating extra memory.  **\n");
2030         pr_warning("**                                                      **\n");
2031         pr_warning("** This means that this is a DEBUG kernel and it is     **\n");
2032         pr_warning("** unsafe for production use.                           **\n");
2033         pr_warning("**                                                      **\n");
2034         pr_warning("** If you see this message and you are not debugging    **\n");
2035         pr_warning("** the kernel, report this immediately to your vendor!  **\n");
2036         pr_warning("**                                                      **\n");
2037         pr_warning("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2038         pr_warning("**********************************************************\n");
2039
2040         /* Expand the buffers to set size */
2041         tracing_update_buffers();
2042
2043         buffers_allocated = 1;
2044
2045         /*
2046          * trace_printk_init_buffers() can be called by modules.
2047          * If that happens, then we need to start cmdline recording
2048          * directly here. If the global_trace.buffer is already
2049          * allocated here, then this was called by module code.
2050          */
2051         if (global_trace.trace_buffer.buffer)
2052                 tracing_start_cmdline_record();
2053 }
2054
2055 void trace_printk_start_comm(void)
2056 {
2057         /* Start tracing comms if trace printk is set */
2058         if (!buffers_allocated)
2059                 return;
2060         tracing_start_cmdline_record();
2061 }
2062
2063 static void trace_printk_start_stop_comm(int enabled)
2064 {
2065         if (!buffers_allocated)
2066                 return;
2067
2068         if (enabled)
2069                 tracing_start_cmdline_record();
2070         else
2071                 tracing_stop_cmdline_record();
2072 }
2073
2074 /**
2075  * trace_vbprintk - write binary msg to tracing buffer
2076  *
2077  */
2078 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2079 {
2080         struct ftrace_event_call *call = &event_bprint;
2081         struct ring_buffer_event *event;
2082         struct ring_buffer *buffer;
2083         struct trace_array *tr = &global_trace;
2084         struct bprint_entry *entry;
2085         unsigned long flags;
2086         char *tbuffer;
2087         int len = 0, size, pc;
2088
2089         if (unlikely(tracing_selftest_running || tracing_disabled))
2090                 return 0;
2091
2092         /* Don't pollute graph traces with trace_vprintk internals */
2093         pause_graph_tracing();
2094
2095         pc = preempt_count();
2096         preempt_disable_notrace();
2097
2098         tbuffer = get_trace_buf();
2099         if (!tbuffer) {
2100                 len = 0;
2101                 goto out;
2102         }
2103
2104         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2105
2106         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2107                 goto out;
2108
2109         local_save_flags(flags);
2110         size = sizeof(*entry) + sizeof(u32) * len;
2111         buffer = tr->trace_buffer.buffer;
2112         event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2113                                           flags, pc);
2114         if (!event)
2115                 goto out;
2116         entry = ring_buffer_event_data(event);
2117         entry->ip                       = ip;
2118         entry->fmt                      = fmt;
2119
2120         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2121         if (!call_filter_check_discard(call, entry, buffer, event)) {
2122                 __buffer_unlock_commit(buffer, event);
2123                 ftrace_trace_stack(buffer, flags, 6, pc);
2124         }
2125
2126 out:
2127         preempt_enable_notrace();
2128         unpause_graph_tracing();
2129
2130         return len;
2131 }
2132 EXPORT_SYMBOL_GPL(trace_vbprintk);
2133
2134 static int
2135 __trace_array_vprintk(struct ring_buffer *buffer,
2136                       unsigned long ip, const char *fmt, va_list args)
2137 {
2138         struct ftrace_event_call *call = &event_print;
2139         struct ring_buffer_event *event;
2140         int len = 0, size, pc;
2141         struct print_entry *entry;
2142         unsigned long flags;
2143         char *tbuffer;
2144
2145         if (tracing_disabled || tracing_selftest_running)
2146                 return 0;
2147
2148         /* Don't pollute graph traces with trace_vprintk internals */
2149         pause_graph_tracing();
2150
2151         pc = preempt_count();
2152         preempt_disable_notrace();
2153
2154
2155         tbuffer = get_trace_buf();
2156         if (!tbuffer) {
2157                 len = 0;
2158                 goto out;
2159         }
2160
2161         len = vsnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2162         if (len > TRACE_BUF_SIZE)
2163                 goto out;
2164
2165         local_save_flags(flags);
2166         size = sizeof(*entry) + len + 1;
2167         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2168                                           flags, pc);
2169         if (!event)
2170                 goto out;
2171         entry = ring_buffer_event_data(event);
2172         entry->ip = ip;
2173
2174         memcpy(&entry->buf, tbuffer, len);
2175         entry->buf[len] = '\0';
2176         if (!call_filter_check_discard(call, entry, buffer, event)) {
2177                 __buffer_unlock_commit(buffer, event);
2178                 ftrace_trace_stack(buffer, flags, 6, pc);
2179         }
2180  out:
2181         preempt_enable_notrace();
2182         unpause_graph_tracing();
2183
2184         return len;
2185 }
2186
2187 int trace_array_vprintk(struct trace_array *tr,
2188                         unsigned long ip, const char *fmt, va_list args)
2189 {
2190         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2191 }
2192
2193 int trace_array_printk(struct trace_array *tr,
2194                        unsigned long ip, const char *fmt, ...)
2195 {
2196         int ret;
2197         va_list ap;
2198
2199         if (!(trace_flags & TRACE_ITER_PRINTK))
2200                 return 0;
2201
2202         va_start(ap, fmt);
2203         ret = trace_array_vprintk(tr, ip, fmt, ap);
2204         va_end(ap);
2205         return ret;
2206 }
2207
2208 int trace_array_printk_buf(struct ring_buffer *buffer,
2209                            unsigned long ip, const char *fmt, ...)
2210 {
2211         int ret;
2212         va_list ap;
2213
2214         if (!(trace_flags & TRACE_ITER_PRINTK))
2215                 return 0;
2216
2217         va_start(ap, fmt);
2218         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2219         va_end(ap);
2220         return ret;
2221 }
2222
2223 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2224 {
2225         return trace_array_vprintk(&global_trace, ip, fmt, args);
2226 }
2227 EXPORT_SYMBOL_GPL(trace_vprintk);
2228
2229 static void trace_iterator_increment(struct trace_iterator *iter)
2230 {
2231         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2232
2233         iter->idx++;
2234         if (buf_iter)
2235                 ring_buffer_read(buf_iter, NULL);
2236 }
2237
2238 static struct trace_entry *
2239 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2240                 unsigned long *lost_events)
2241 {
2242         struct ring_buffer_event *event;
2243         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2244
2245         if (buf_iter)
2246                 event = ring_buffer_iter_peek(buf_iter, ts);
2247         else
2248                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2249                                          lost_events);
2250
2251         if (event) {
2252                 iter->ent_size = ring_buffer_event_length(event);
2253                 return ring_buffer_event_data(event);
2254         }
2255         iter->ent_size = 0;
2256         return NULL;
2257 }
2258
2259 static struct trace_entry *
2260 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2261                   unsigned long *missing_events, u64 *ent_ts)
2262 {
2263         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2264         struct trace_entry *ent, *next = NULL;
2265         unsigned long lost_events = 0, next_lost = 0;
2266         int cpu_file = iter->cpu_file;
2267         u64 next_ts = 0, ts;
2268         int next_cpu = -1;
2269         int next_size = 0;
2270         int cpu;
2271
2272         /*
2273          * If we are in a per_cpu trace file, don't bother by iterating over
2274          * all cpu and peek directly.
2275          */
2276         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2277                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2278                         return NULL;
2279                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2280                 if (ent_cpu)
2281                         *ent_cpu = cpu_file;
2282
2283                 return ent;
2284         }
2285
2286         for_each_tracing_cpu(cpu) {
2287
2288                 if (ring_buffer_empty_cpu(buffer, cpu))
2289                         continue;
2290
2291                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2292
2293                 /*
2294                  * Pick the entry with the smallest timestamp:
2295                  */
2296                 if (ent && (!next || ts < next_ts)) {
2297                         next = ent;
2298                         next_cpu = cpu;
2299                         next_ts = ts;
2300                         next_lost = lost_events;
2301                         next_size = iter->ent_size;
2302                 }
2303         }
2304
2305         iter->ent_size = next_size;
2306
2307         if (ent_cpu)
2308                 *ent_cpu = next_cpu;
2309
2310         if (ent_ts)
2311                 *ent_ts = next_ts;
2312
2313         if (missing_events)
2314                 *missing_events = next_lost;
2315
2316         return next;
2317 }
2318
2319 /* Find the next real entry, without updating the iterator itself */
2320 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2321                                           int *ent_cpu, u64 *ent_ts)
2322 {
2323         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2324 }
2325
2326 /* Find the next real entry, and increment the iterator to the next entry */
2327 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2328 {
2329         iter->ent = __find_next_entry(iter, &iter->cpu,
2330                                       &iter->lost_events, &iter->ts);
2331
2332         if (iter->ent)
2333                 trace_iterator_increment(iter);
2334
2335         return iter->ent ? iter : NULL;
2336 }
2337
2338 static void trace_consume(struct trace_iterator *iter)
2339 {
2340         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2341                             &iter->lost_events);
2342 }
2343
2344 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2345 {
2346         struct trace_iterator *iter = m->private;
2347         int i = (int)*pos;
2348         void *ent;
2349
2350         WARN_ON_ONCE(iter->leftover);
2351
2352         (*pos)++;
2353
2354         /* can't go backwards */
2355         if (iter->idx > i)
2356                 return NULL;
2357
2358         if (iter->idx < 0)
2359                 ent = trace_find_next_entry_inc(iter);
2360         else
2361                 ent = iter;
2362
2363         while (ent && iter->idx < i)
2364                 ent = trace_find_next_entry_inc(iter);
2365
2366         iter->pos = *pos;
2367
2368         return ent;
2369 }
2370
2371 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2372 {
2373         struct ring_buffer_event *event;
2374         struct ring_buffer_iter *buf_iter;
2375         unsigned long entries = 0;
2376         u64 ts;
2377
2378         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2379
2380         buf_iter = trace_buffer_iter(iter, cpu);
2381         if (!buf_iter)
2382                 return;
2383
2384         ring_buffer_iter_reset(buf_iter);
2385
2386         /*
2387          * We could have the case with the max latency tracers
2388          * that a reset never took place on a cpu. This is evident
2389          * by the timestamp being before the start of the buffer.
2390          */
2391         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2392                 if (ts >= iter->trace_buffer->time_start)
2393                         break;
2394                 entries++;
2395                 ring_buffer_read(buf_iter, NULL);
2396         }
2397
2398         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2399 }
2400
2401 /*
2402  * The current tracer is copied to avoid a global locking
2403  * all around.
2404  */
2405 static void *s_start(struct seq_file *m, loff_t *pos)
2406 {
2407         struct trace_iterator *iter = m->private;
2408         struct trace_array *tr = iter->tr;
2409         int cpu_file = iter->cpu_file;
2410         void *p = NULL;
2411         loff_t l = 0;
2412         int cpu;
2413
2414         /*
2415          * copy the tracer to avoid using a global lock all around.
2416          * iter->trace is a copy of current_trace, the pointer to the
2417          * name may be used instead of a strcmp(), as iter->trace->name
2418          * will point to the same string as current_trace->name.
2419          */
2420         mutex_lock(&trace_types_lock);
2421         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2422                 *iter->trace = *tr->current_trace;
2423         mutex_unlock(&trace_types_lock);
2424
2425 #ifdef CONFIG_TRACER_MAX_TRACE
2426         if (iter->snapshot && iter->trace->use_max_tr)
2427                 return ERR_PTR(-EBUSY);
2428 #endif
2429
2430         if (!iter->snapshot)
2431                 atomic_inc(&trace_record_cmdline_disabled);
2432
2433         if (*pos != iter->pos) {
2434                 iter->ent = NULL;
2435                 iter->cpu = 0;
2436                 iter->idx = -1;
2437
2438                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2439                         for_each_tracing_cpu(cpu)
2440                                 tracing_iter_reset(iter, cpu);
2441                 } else
2442                         tracing_iter_reset(iter, cpu_file);
2443
2444                 iter->leftover = 0;
2445                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2446                         ;
2447
2448         } else {
2449                 /*
2450                  * If we overflowed the seq_file before, then we want
2451                  * to just reuse the trace_seq buffer again.
2452                  */
2453                 if (iter->leftover)
2454                         p = iter;
2455                 else {
2456                         l = *pos - 1;
2457                         p = s_next(m, p, &l);
2458                 }
2459         }
2460
2461         trace_event_read_lock();
2462         trace_access_lock(cpu_file);
2463         return p;
2464 }
2465
2466 static void s_stop(struct seq_file *m, void *p)
2467 {
2468         struct trace_iterator *iter = m->private;
2469
2470 #ifdef CONFIG_TRACER_MAX_TRACE
2471         if (iter->snapshot && iter->trace->use_max_tr)
2472                 return;
2473 #endif
2474
2475         if (!iter->snapshot)
2476                 atomic_dec(&trace_record_cmdline_disabled);
2477
2478         trace_access_unlock(iter->cpu_file);
2479         trace_event_read_unlock();
2480 }
2481
2482 static void
2483 get_total_entries(struct trace_buffer *buf,
2484                   unsigned long *total, unsigned long *entries)
2485 {
2486         unsigned long count;
2487         int cpu;
2488
2489         *total = 0;
2490         *entries = 0;
2491
2492         for_each_tracing_cpu(cpu) {
2493                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2494                 /*
2495                  * If this buffer has skipped entries, then we hold all
2496                  * entries for the trace and we need to ignore the
2497                  * ones before the time stamp.
2498                  */
2499                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2500                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2501                         /* total is the same as the entries */
2502                         *total += count;
2503                 } else
2504                         *total += count +
2505                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
2506                 *entries += count;
2507         }
2508 }
2509
2510 static void print_lat_help_header(struct seq_file *m)
2511 {
2512         seq_puts(m, "#                  _------=> CPU#            \n");
2513         seq_puts(m, "#                 / _-----=> irqs-off        \n");
2514         seq_puts(m, "#                | / _----=> need-resched    \n");
2515         seq_puts(m, "#                || / _---=> hardirq/softirq \n");
2516         seq_puts(m, "#                ||| / _--=> preempt-depth   \n");
2517         seq_puts(m, "#                |||| /     delay             \n");
2518         seq_puts(m, "#  cmd     pid   ||||| time  |   caller      \n");
2519         seq_puts(m, "#     \\   /      |||||  \\    |   /           \n");
2520 }
2521
2522 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2523 {
2524         unsigned long total;
2525         unsigned long entries;
2526
2527         get_total_entries(buf, &total, &entries);
2528         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2529                    entries, total, num_online_cpus());
2530         seq_puts(m, "#\n");
2531 }
2532
2533 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2534 {
2535         print_event_info(buf, m);
2536         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n");
2537         seq_puts(m, "#              | |       |          |         |\n");
2538 }
2539
2540 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2541 {
2542         print_event_info(buf, m);
2543         seq_puts(m, "#                              _-----=> irqs-off\n");
2544         seq_puts(m, "#                             / _----=> need-resched\n");
2545         seq_puts(m, "#                            | / _---=> hardirq/softirq\n");
2546         seq_puts(m, "#                            || / _--=> preempt-depth\n");
2547         seq_puts(m, "#                            ||| /     delay\n");
2548         seq_puts(m, "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n");
2549         seq_puts(m, "#              | |       |   ||||       |         |\n");
2550 }
2551
2552 void
2553 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2554 {
2555         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2556         struct trace_buffer *buf = iter->trace_buffer;
2557         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2558         struct tracer *type = iter->trace;
2559         unsigned long entries;
2560         unsigned long total;
2561         const char *name = "preemption";
2562
2563         name = type->name;
2564
2565         get_total_entries(buf, &total, &entries);
2566
2567         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2568                    name, UTS_RELEASE);
2569         seq_puts(m, "# -----------------------------------"
2570                  "---------------------------------\n");
2571         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2572                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2573                    nsecs_to_usecs(data->saved_latency),
2574                    entries,
2575                    total,
2576                    buf->cpu,
2577 #if defined(CONFIG_PREEMPT_NONE)
2578                    "server",
2579 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2580                    "desktop",
2581 #elif defined(CONFIG_PREEMPT)
2582                    "preempt",
2583 #else
2584                    "unknown",
2585 #endif
2586                    /* These are reserved for later use */
2587                    0, 0, 0, 0);
2588 #ifdef CONFIG_SMP
2589         seq_printf(m, " #P:%d)\n", num_online_cpus());
2590 #else
2591         seq_puts(m, ")\n");
2592 #endif
2593         seq_puts(m, "#    -----------------\n");
2594         seq_printf(m, "#    | task: %.16s-%d "
2595                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2596                    data->comm, data->pid,
2597                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2598                    data->policy, data->rt_priority);
2599         seq_puts(m, "#    -----------------\n");
2600
2601         if (data->critical_start) {
2602                 seq_puts(m, "#  => started at: ");
2603                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2604                 trace_print_seq(m, &iter->seq);
2605                 seq_puts(m, "\n#  => ended at:   ");
2606                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2607                 trace_print_seq(m, &iter->seq);
2608                 seq_puts(m, "\n#\n");
2609         }
2610
2611         seq_puts(m, "#\n");
2612 }
2613
2614 static void test_cpu_buff_start(struct trace_iterator *iter)
2615 {
2616         struct trace_seq *s = &iter->seq;
2617
2618         if (!(trace_flags & TRACE_ITER_ANNOTATE))
2619                 return;
2620
2621         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
2622                 return;
2623
2624         if (cpumask_test_cpu(iter->cpu, iter->started))
2625                 return;
2626
2627         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
2628                 return;
2629
2630         cpumask_set_cpu(iter->cpu, iter->started);
2631
2632         /* Don't print started cpu buffer for the first entry of the trace */
2633         if (iter->idx > 1)
2634                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
2635                                 iter->cpu);
2636 }
2637
2638 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
2639 {
2640         struct trace_seq *s = &iter->seq;
2641         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2642         struct trace_entry *entry;
2643         struct trace_event *event;
2644
2645         entry = iter->ent;
2646
2647         test_cpu_buff_start(iter);
2648
2649         event = ftrace_find_event(entry->type);
2650
2651         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2652                 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2653                         if (!trace_print_lat_context(iter))
2654                                 goto partial;
2655                 } else {
2656                         if (!trace_print_context(iter))
2657                                 goto partial;
2658                 }
2659         }
2660
2661         if (event)
2662                 return event->funcs->trace(iter, sym_flags, event);
2663
2664         if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
2665                 goto partial;
2666
2667         return TRACE_TYPE_HANDLED;
2668 partial:
2669         return TRACE_TYPE_PARTIAL_LINE;
2670 }
2671
2672 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2673 {
2674         struct trace_seq *s = &iter->seq;
2675         struct trace_entry *entry;
2676         struct trace_event *event;
2677
2678         entry = iter->ent;
2679
2680         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2681                 if (!trace_seq_printf(s, "%d %d %llu ",
2682                                       entry->pid, iter->cpu, iter->ts))
2683                         goto partial;
2684         }
2685
2686         event = ftrace_find_event(entry->type);
2687         if (event)
2688                 return event->funcs->raw(iter, 0, event);
2689
2690         if (!trace_seq_printf(s, "%d ?\n", entry->type))
2691                 goto partial;
2692
2693         return TRACE_TYPE_HANDLED;
2694 partial:
2695         return TRACE_TYPE_PARTIAL_LINE;
2696 }
2697
2698 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2699 {
2700         struct trace_seq *s = &iter->seq;
2701         unsigned char newline = '\n';
2702         struct trace_entry *entry;
2703         struct trace_event *event;
2704
2705         entry = iter->ent;
2706
2707         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2708                 SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
2709                 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
2710                 SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
2711         }
2712
2713         event = ftrace_find_event(entry->type);
2714         if (event) {
2715                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
2716                 if (ret != TRACE_TYPE_HANDLED)
2717                         return ret;
2718         }
2719
2720         SEQ_PUT_FIELD_RET(s, newline);
2721
2722         return TRACE_TYPE_HANDLED;
2723 }
2724
2725 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2726 {
2727         struct trace_seq *s = &iter->seq;
2728         struct trace_entry *entry;
2729         struct trace_event *event;
2730
2731         entry = iter->ent;
2732
2733         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2734                 SEQ_PUT_FIELD_RET(s, entry->pid);
2735                 SEQ_PUT_FIELD_RET(s, iter->cpu);
2736                 SEQ_PUT_FIELD_RET(s, iter->ts);
2737         }
2738
2739         event = ftrace_find_event(entry->type);
2740         return event ? event->funcs->binary(iter, 0, event) :
2741                 TRACE_TYPE_HANDLED;
2742 }
2743
2744 int trace_empty(struct trace_iterator *iter)
2745 {
2746         struct ring_buffer_iter *buf_iter;
2747         int cpu;
2748
2749         /* If we are looking at one CPU buffer, only check that one */
2750         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
2751                 cpu = iter->cpu_file;
2752                 buf_iter = trace_buffer_iter(iter, cpu);
2753                 if (buf_iter) {
2754                         if (!ring_buffer_iter_empty(buf_iter))
2755                                 return 0;
2756                 } else {
2757                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2758                                 return 0;
2759                 }
2760                 return 1;
2761         }
2762
2763         for_each_tracing_cpu(cpu) {
2764                 buf_iter = trace_buffer_iter(iter, cpu);
2765                 if (buf_iter) {
2766                         if (!ring_buffer_iter_empty(buf_iter))
2767                                 return 0;
2768                 } else {
2769                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2770                                 return 0;
2771                 }
2772         }
2773
2774         return 1;
2775 }
2776
2777 /*  Called with trace_event_read_lock() held. */
2778 enum print_line_t print_trace_line(struct trace_iterator *iter)
2779 {
2780         enum print_line_t ret;
2781
2782         if (iter->lost_events &&
2783             !trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
2784                                  iter->cpu, iter->lost_events))
2785                 return TRACE_TYPE_PARTIAL_LINE;
2786
2787         if (iter->trace && iter->trace->print_line) {
2788                 ret = iter->trace->print_line(iter);
2789                 if (ret != TRACE_TYPE_UNHANDLED)
2790                         return ret;
2791         }
2792
2793         if (iter->ent->type == TRACE_BPUTS &&
2794                         trace_flags & TRACE_ITER_PRINTK &&
2795                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2796                 return trace_print_bputs_msg_only(iter);
2797
2798         if (iter->ent->type == TRACE_BPRINT &&
2799                         trace_flags & TRACE_ITER_PRINTK &&
2800                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2801                 return trace_print_bprintk_msg_only(iter);
2802
2803         if (iter->ent->type == TRACE_PRINT &&
2804                         trace_flags & TRACE_ITER_PRINTK &&
2805                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2806                 return trace_print_printk_msg_only(iter);
2807
2808         if (trace_flags & TRACE_ITER_BIN)
2809                 return print_bin_fmt(iter);
2810
2811         if (trace_flags & TRACE_ITER_HEX)
2812                 return print_hex_fmt(iter);
2813
2814         if (trace_flags & TRACE_ITER_RAW)
2815                 return print_raw_fmt(iter);
2816
2817         return print_trace_fmt(iter);
2818 }
2819
2820 void trace_latency_header(struct seq_file *m)
2821 {
2822         struct trace_iterator *iter = m->private;
2823
2824         /* print nothing if the buffers are empty */
2825         if (trace_empty(iter))
2826                 return;
2827
2828         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2829                 print_trace_header(m, iter);
2830
2831         if (!(trace_flags & TRACE_ITER_VERBOSE))
2832                 print_lat_help_header(m);
2833 }
2834
2835 void trace_default_header(struct seq_file *m)
2836 {
2837         struct trace_iterator *iter = m->private;
2838
2839         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
2840                 return;
2841
2842         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2843                 /* print nothing if the buffers are empty */
2844                 if (trace_empty(iter))
2845                         return;
2846                 print_trace_header(m, iter);
2847                 if (!(trace_flags & TRACE_ITER_VERBOSE))
2848                         print_lat_help_header(m);
2849         } else {
2850                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
2851                         if (trace_flags & TRACE_ITER_IRQ_INFO)
2852                                 print_func_help_header_irq(iter->trace_buffer, m);
2853                         else
2854                                 print_func_help_header(iter->trace_buffer, m);
2855                 }
2856         }
2857 }
2858
2859 static void test_ftrace_alive(struct seq_file *m)
2860 {
2861         if (!ftrace_is_dead())
2862                 return;
2863         seq_printf(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n");
2864         seq_printf(m, "#          MAY BE MISSING FUNCTION EVENTS\n");
2865 }
2866
2867 #ifdef CONFIG_TRACER_MAX_TRACE
2868 static void show_snapshot_main_help(struct seq_file *m)
2869 {
2870         seq_printf(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n");
2871         seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2872         seq_printf(m, "#                      Takes a snapshot of the main buffer.\n");
2873         seq_printf(m, "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n");
2874         seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
2875         seq_printf(m, "#                       is not a '0' or '1')\n");
2876 }
2877
2878 static void show_snapshot_percpu_help(struct seq_file *m)
2879 {
2880         seq_printf(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
2881 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2882         seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2883         seq_printf(m, "#                      Takes a snapshot of the main buffer for this cpu.\n");
2884 #else
2885         seq_printf(m, "# echo 1 > snapshot : Not supported with this kernel.\n");
2886         seq_printf(m, "#                     Must use main snapshot file to allocate.\n");
2887 #endif
2888         seq_printf(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n");
2889         seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
2890         seq_printf(m, "#                       is not a '0' or '1')\n");
2891 }
2892
2893 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
2894 {
2895         if (iter->tr->allocated_snapshot)
2896                 seq_printf(m, "#\n# * Snapshot is allocated *\n#\n");
2897         else
2898                 seq_printf(m, "#\n# * Snapshot is freed *\n#\n");
2899
2900         seq_printf(m, "# Snapshot commands:\n");
2901         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
2902                 show_snapshot_main_help(m);
2903         else
2904                 show_snapshot_percpu_help(m);
2905 }
2906 #else
2907 /* Should never be called */
2908 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
2909 #endif
2910
2911 static int s_show(struct seq_file *m, void *v)
2912 {
2913         struct trace_iterator *iter = v;
2914         int ret;
2915
2916         if (iter->ent == NULL) {
2917                 if (iter->tr) {
2918                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
2919                         seq_puts(m, "#\n");
2920                         test_ftrace_alive(m);
2921                 }
2922                 if (iter->snapshot && trace_empty(iter))
2923                         print_snapshot_help(m, iter);
2924                 else if (iter->trace && iter->trace->print_header)
2925                         iter->trace->print_header(m);
2926                 else
2927                         trace_default_header(m);
2928
2929         } else if (iter->leftover) {
2930                 /*
2931                  * If we filled the seq_file buffer earlier, we
2932                  * want to just show it now.
2933                  */
2934                 ret = trace_print_seq(m, &iter->seq);
2935
2936                 /* ret should this time be zero, but you never know */
2937                 iter->leftover = ret;
2938
2939         } else {
2940                 print_trace_line(iter);
2941                 ret = trace_print_seq(m, &iter->seq);
2942                 /*
2943                  * If we overflow the seq_file buffer, then it will
2944                  * ask us for this data again at start up.
2945                  * Use that instead.
2946                  *  ret is 0 if seq_file write succeeded.
2947                  *        -1 otherwise.
2948                  */
2949                 iter->leftover = ret;
2950         }
2951
2952         return 0;
2953 }
2954
2955 /*
2956  * Should be used after trace_array_get(), trace_types_lock
2957  * ensures that i_cdev was already initialized.
2958  */
2959 static inline int tracing_get_cpu(struct inode *inode)
2960 {
2961         if (inode->i_cdev) /* See trace_create_cpu_file() */
2962                 return (long)inode->i_cdev - 1;
2963         return RING_BUFFER_ALL_CPUS;
2964 }
2965
2966 static const struct seq_operations tracer_seq_ops = {
2967         .start          = s_start,
2968         .next           = s_next,
2969         .stop           = s_stop,
2970         .show           = s_show,
2971 };
2972
2973 static struct trace_iterator *
2974 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
2975 {
2976         struct trace_array *tr = inode->i_private;
2977         struct trace_iterator *iter;
2978         int cpu;
2979
2980         if (tracing_disabled)
2981                 return ERR_PTR(-ENODEV);
2982
2983         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
2984         if (!iter)
2985                 return ERR_PTR(-ENOMEM);
2986
2987         iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(),
2988                                     GFP_KERNEL);
2989         if (!iter->buffer_iter)
2990                 goto release;
2991
2992         /*
2993          * We make a copy of the current tracer to avoid concurrent
2994          * changes on it while we are reading.
2995          */
2996         mutex_lock(&trace_types_lock);
2997         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
2998         if (!iter->trace)
2999                 goto fail;
3000
3001         *iter->trace = *tr->current_trace;
3002
3003         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3004                 goto fail;
3005
3006         iter->tr = tr;
3007
3008 #ifdef CONFIG_TRACER_MAX_TRACE
3009         /* Currently only the top directory has a snapshot */
3010         if (tr->current_trace->print_max || snapshot)
3011                 iter->trace_buffer = &tr->max_buffer;
3012         else
3013 #endif
3014                 iter->trace_buffer = &tr->trace_buffer;
3015         iter->snapshot = snapshot;
3016         iter->pos = -1;
3017         iter->cpu_file = tracing_get_cpu(inode);
3018         mutex_init(&iter->mutex);
3019
3020         /* Notify the tracer early; before we stop tracing. */
3021         if (iter->trace && iter->trace->open)
3022                 iter->trace->open(iter);
3023
3024         /* Annotate start of buffers if we had overruns */
3025         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3026                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3027
3028         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3029         if (trace_clocks[tr->clock_id].in_ns)
3030                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3031
3032         /* stop the trace while dumping if we are not opening "snapshot" */
3033         if (!iter->snapshot)
3034                 tracing_stop_tr(tr);
3035
3036         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3037                 for_each_tracing_cpu(cpu) {
3038                         iter->buffer_iter[cpu] =
3039                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3040                 }
3041                 ring_buffer_read_prepare_sync();
3042                 for_each_tracing_cpu(cpu) {
3043                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3044                         tracing_iter_reset(iter, cpu);
3045                 }
3046         } else {
3047                 cpu = iter->cpu_file;
3048                 iter->buffer_iter[cpu] =
3049                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3050                 ring_buffer_read_prepare_sync();
3051                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3052                 tracing_iter_reset(iter, cpu);
3053         }
3054
3055         mutex_unlock(&trace_types_lock);
3056
3057         return iter;
3058
3059  fail:
3060         mutex_unlock(&trace_types_lock);
3061         kfree(iter->trace);
3062         kfree(iter->buffer_iter);
3063 release:
3064         seq_release_private(inode, file);
3065         return ERR_PTR(-ENOMEM);
3066 }
3067
3068 int tracing_open_generic(struct inode *inode, struct file *filp)
3069 {
3070         if (tracing_disabled)
3071                 return -ENODEV;
3072
3073         filp->private_data = inode->i_private;
3074         return 0;
3075 }
3076
3077 bool tracing_is_disabled(void)
3078 {
3079         return (tracing_disabled) ? true: false;
3080 }
3081
3082 /*
3083  * Open and update trace_array ref count.
3084  * Must have the current trace_array passed to it.
3085  */
3086 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3087 {
3088         struct trace_array *tr = inode->i_private;
3089
3090         if (tracing_disabled)
3091                 return -ENODEV;
3092
3093         if (trace_array_get(tr) < 0)
3094                 return -ENODEV;
3095
3096         filp->private_data = inode->i_private;
3097
3098         return 0;
3099 }
3100
3101 static int tracing_release(struct inode *inode, struct file *file)
3102 {
3103         struct trace_array *tr = inode->i_private;
3104         struct seq_file *m = file->private_data;
3105         struct trace_iterator *iter;
3106         int cpu;
3107
3108         if (!(file->f_mode & FMODE_READ)) {
3109                 trace_array_put(tr);
3110                 return 0;
3111         }
3112
3113         /* Writes do not use seq_file */
3114         iter = m->private;
3115         mutex_lock(&trace_types_lock);
3116
3117         for_each_tracing_cpu(cpu) {
3118                 if (iter->buffer_iter[cpu])
3119                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3120         }
3121
3122         if (iter->trace && iter->trace->close)
3123                 iter->trace->close(iter);
3124
3125         if (!iter->snapshot)
3126                 /* reenable tracing if it was previously enabled */
3127                 tracing_start_tr(tr);
3128
3129         __trace_array_put(tr);
3130
3131         mutex_unlock(&trace_types_lock);
3132
3133         mutex_destroy(&iter->mutex);
3134         free_cpumask_var(iter->started);
3135         kfree(iter->trace);
3136         kfree(iter->buffer_iter);
3137         seq_release_private(inode, file);
3138
3139         return 0;
3140 }
3141
3142 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3143 {
3144         struct trace_array *tr = inode->i_private;
3145
3146         trace_array_put(tr);
3147         return 0;
3148 }
3149
3150 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3151 {
3152         struct trace_array *tr = inode->i_private;
3153
3154         trace_array_put(tr);
3155
3156         return single_release(inode, file);
3157 }
3158
3159 static int tracing_open(struct inode *inode, struct file *file)
3160 {
3161         struct trace_array *tr = inode->i_private;
3162         struct trace_iterator *iter;
3163         int ret = 0;
3164
3165         if (trace_array_get(tr) < 0)
3166                 return -ENODEV;
3167
3168         /* If this file was open for write, then erase contents */
3169         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3170                 int cpu = tracing_get_cpu(inode);
3171
3172                 if (cpu == RING_BUFFER_ALL_CPUS)
3173                         tracing_reset_online_cpus(&tr->trace_buffer);
3174                 else
3175                         tracing_reset(&tr->trace_buffer, cpu);
3176         }
3177
3178         if (file->f_mode & FMODE_READ) {
3179                 iter = __tracing_open(inode, file, false);
3180                 if (IS_ERR(iter))
3181                         ret = PTR_ERR(iter);
3182                 else if (trace_flags & TRACE_ITER_LATENCY_FMT)
3183                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3184         }
3185
3186         if (ret < 0)
3187                 trace_array_put(tr);
3188
3189         return ret;
3190 }
3191
3192 /*
3193  * Some tracers are not suitable for instance buffers.
3194  * A tracer is always available for the global array (toplevel)
3195  * or if it explicitly states that it is.
3196  */
3197 static bool
3198 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3199 {
3200         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3201 }
3202
3203 /* Find the next tracer that this trace array may use */
3204 static struct tracer *
3205 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3206 {
3207         while (t && !trace_ok_for_array(t, tr))
3208                 t = t->next;
3209
3210         return t;
3211 }
3212
3213 static void *
3214 t_next(struct seq_file *m, void *v, loff_t *pos)
3215 {
3216         struct trace_array *tr = m->private;
3217         struct tracer *t = v;
3218
3219         (*pos)++;
3220
3221         if (t)
3222                 t = get_tracer_for_array(tr, t->next);
3223
3224         return t;
3225 }
3226
3227 static void *t_start(struct seq_file *m, loff_t *pos)
3228 {
3229         struct trace_array *tr = m->private;
3230         struct tracer *t;
3231         loff_t l = 0;
3232
3233         mutex_lock(&trace_types_lock);
3234
3235         t = get_tracer_for_array(tr, trace_types);
3236         for (; t && l < *pos; t = t_next(m, t, &l))
3237                         ;
3238
3239         return t;
3240 }
3241
3242 static void t_stop(struct seq_file *m, void *p)
3243 {
3244         mutex_unlock(&trace_types_lock);
3245 }
3246
3247 static int t_show(struct seq_file *m, void *v)
3248 {
3249         struct tracer *t = v;
3250
3251         if (!t)
3252                 return 0;
3253
3254         seq_printf(m, "%s", t->name);
3255         if (t->next)
3256                 seq_putc(m, ' ');
3257         else
3258                 seq_putc(m, '\n');
3259
3260         return 0;
3261 }
3262
3263 static const struct seq_operations show_traces_seq_ops = {
3264         .start          = t_start,
3265         .next           = t_next,
3266         .stop           = t_stop,
3267         .show           = t_show,
3268 };
3269
3270 static int show_traces_open(struct inode *inode, struct file *file)
3271 {
3272         struct trace_array *tr = inode->i_private;
3273         struct seq_file *m;
3274         int ret;
3275
3276         if (tracing_disabled)
3277                 return -ENODEV;
3278
3279         ret = seq_open(file, &show_traces_seq_ops);
3280         if (ret)
3281                 return ret;
3282
3283         m = file->private_data;
3284         m->private = tr;
3285
3286         return 0;
3287 }
3288
3289 static ssize_t
3290 tracing_write_stub(struct file *filp, const char __user *ubuf,
3291                    size_t count, loff_t *ppos)
3292 {
3293         return count;
3294 }
3295
3296 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3297 {
3298         int ret;
3299
3300         if (file->f_mode & FMODE_READ)
3301                 ret = seq_lseek(file, offset, whence);
3302         else
3303                 file->f_pos = ret = 0;
3304
3305         return ret;
3306 }
3307
3308 static const struct file_operations tracing_fops = {
3309         .open           = tracing_open,
3310         .read           = seq_read,
3311         .write          = tracing_write_stub,
3312         .llseek         = tracing_lseek,
3313         .release        = tracing_release,
3314 };
3315
3316 static const struct file_operations show_traces_fops = {
3317         .open           = show_traces_open,
3318         .read           = seq_read,
3319         .release        = seq_release,
3320         .llseek         = seq_lseek,
3321 };
3322
3323 /*
3324  * The tracer itself will not take this lock, but still we want
3325  * to provide a consistent cpumask to user-space:
3326  */
3327 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3328
3329 /*
3330  * Temporary storage for the character representation of the
3331  * CPU bitmask (and one more byte for the newline):
3332  */
3333 static char mask_str[NR_CPUS + 1];
3334
3335 static ssize_t
3336 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3337                      size_t count, loff_t *ppos)
3338 {
3339         struct trace_array *tr = file_inode(filp)->i_private;
3340         int len;
3341
3342         mutex_lock(&tracing_cpumask_update_lock);
3343
3344         len = cpumask_scnprintf(mask_str, count, tr->tracing_cpumask);
3345         if (count - len < 2) {
3346                 count = -EINVAL;
3347                 goto out_err;
3348         }
3349         len += sprintf(mask_str + len, "\n");
3350         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3351
3352 out_err:
3353         mutex_unlock(&tracing_cpumask_update_lock);
3354
3355         return count;
3356 }
3357
3358 static ssize_t
3359 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3360                       size_t count, loff_t *ppos)
3361 {
3362         struct trace_array *tr = file_inode(filp)->i_private;
3363         cpumask_var_t tracing_cpumask_new;
3364         int err, cpu;
3365
3366         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3367                 return -ENOMEM;
3368
3369         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3370         if (err)
3371                 goto err_unlock;
3372
3373         mutex_lock(&tracing_cpumask_update_lock);
3374
3375         local_irq_disable();
3376         arch_spin_lock(&tr->max_lock);
3377         for_each_tracing_cpu(cpu) {
3378                 /*
3379                  * Increase/decrease the disabled counter if we are
3380                  * about to flip a bit in the cpumask:
3381                  */
3382                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3383                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3384                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3385                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3386                 }
3387                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3388                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3389                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3390                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3391                 }
3392         }
3393         arch_spin_unlock(&tr->max_lock);
3394         local_irq_enable();
3395
3396         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3397
3398         mutex_unlock(&tracing_cpumask_update_lock);
3399         free_cpumask_var(tracing_cpumask_new);
3400
3401         return count;
3402
3403 err_unlock:
3404         free_cpumask_var(tracing_cpumask_new);
3405
3406         return err;
3407 }
3408
3409 static const struct file_operations tracing_cpumask_fops = {
3410         .open           = tracing_open_generic_tr,
3411         .read           = tracing_cpumask_read,
3412         .write          = tracing_cpumask_write,
3413         .release        = tracing_release_generic_tr,
3414         .llseek         = generic_file_llseek,
3415 };
3416
3417 static int tracing_trace_options_show(struct seq_file *m, void *v)
3418 {
3419         struct tracer_opt *trace_opts;
3420         struct trace_array *tr = m->private;
3421         u32 tracer_flags;
3422         int i;
3423
3424         mutex_lock(&trace_types_lock);
3425         tracer_flags = tr->current_trace->flags->val;
3426         trace_opts = tr->current_trace->flags->opts;
3427
3428         for (i = 0; trace_options[i]; i++) {
3429                 if (trace_flags & (1 << i))
3430                         seq_printf(m, "%s\n", trace_options[i]);
3431                 else
3432                         seq_printf(m, "no%s\n", trace_options[i]);
3433         }
3434
3435         for (i = 0; trace_opts[i].name; i++) {
3436                 if (tracer_flags & trace_opts[i].bit)
3437                         seq_printf(m, "%s\n", trace_opts[i].name);
3438                 else
3439                         seq_printf(m, "no%s\n", trace_opts[i].name);
3440         }
3441         mutex_unlock(&trace_types_lock);
3442
3443         return 0;
3444 }
3445
3446 static int __set_tracer_option(struct trace_array *tr,
3447                                struct tracer_flags *tracer_flags,
3448                                struct tracer_opt *opts, int neg)
3449 {
3450         struct tracer *trace = tr->current_trace;
3451         int ret;
3452
3453         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3454         if (ret)
3455                 return ret;
3456
3457         if (neg)
3458                 tracer_flags->val &= ~opts->bit;
3459         else
3460                 tracer_flags->val |= opts->bit;
3461         return 0;
3462 }
3463
3464 /* Try to assign a tracer specific option */
3465 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3466 {
3467         struct tracer *trace = tr->current_trace;
3468         struct tracer_flags *tracer_flags = trace->flags;
3469         struct tracer_opt *opts = NULL;
3470         int i;
3471
3472         for (i = 0; tracer_flags->opts[i].name; i++) {
3473                 opts = &tracer_flags->opts[i];
3474
3475                 if (strcmp(cmp, opts->name) == 0)
3476                         return __set_tracer_option(tr, trace->flags, opts, neg);
3477         }
3478
3479         return -EINVAL;
3480 }
3481
3482 /* Some tracers require overwrite to stay enabled */
3483 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3484 {
3485         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3486                 return -1;
3487
3488         return 0;
3489 }
3490
3491 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3492 {
3493         /* do nothing if flag is already set */
3494         if (!!(trace_flags & mask) == !!enabled)
3495                 return 0;
3496
3497         /* Give the tracer a chance to approve the change */
3498         if (tr->current_trace->flag_changed)
3499                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3500                         return -EINVAL;
3501
3502         if (enabled)
3503                 trace_flags |= mask;
3504         else
3505                 trace_flags &= ~mask;
3506
3507         if (mask == TRACE_ITER_RECORD_CMD)
3508                 trace_event_enable_cmd_record(enabled);
3509
3510         if (mask == TRACE_ITER_OVERWRITE) {
3511                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3512 #ifdef CONFIG_TRACER_MAX_TRACE
3513                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3514 #endif
3515         }
3516
3517         if (mask == TRACE_ITER_PRINTK)
3518                 trace_printk_start_stop_comm(enabled);
3519
3520         return 0;
3521 }
3522
3523 static int trace_set_options(struct trace_array *tr, char *option)
3524 {
3525         char *cmp;
3526         int neg = 0;
3527         int ret = -ENODEV;
3528         int i;
3529
3530         cmp = strstrip(option);
3531
3532         if (strncmp(cmp, "no", 2) == 0) {
3533                 neg = 1;
3534                 cmp += 2;
3535         }
3536
3537         mutex_lock(&trace_types_lock);
3538
3539         for (i = 0; trace_options[i]; i++) {
3540                 if (strcmp(cmp, trace_options[i]) == 0) {
3541                         ret = set_tracer_flag(tr, 1 << i, !neg);
3542                         break;
3543                 }
3544         }
3545
3546         /* If no option could be set, test the specific tracer options */
3547         if (!trace_options[i])
3548                 ret = set_tracer_option(tr, cmp, neg);
3549
3550         mutex_unlock(&trace_types_lock);
3551
3552         return ret;
3553 }
3554
3555 static ssize_t
3556 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3557                         size_t cnt, loff_t *ppos)
3558 {
3559         struct seq_file *m = filp->private_data;
3560         struct trace_array *tr = m->private;
3561         char buf[64];
3562         int ret;
3563
3564         if (cnt >= sizeof(buf))
3565                 return -EINVAL;
3566
3567         if (copy_from_user(&buf, ubuf, cnt))
3568                 return -EFAULT;
3569
3570         buf[cnt] = 0;
3571
3572         ret = trace_set_options(tr, buf);
3573         if (ret < 0)
3574                 return ret;
3575
3576         *ppos += cnt;
3577
3578         return cnt;
3579 }
3580
3581 static int tracing_trace_options_open(struct inode *inode, struct file *file)
3582 {
3583         struct trace_array *tr = inode->i_private;
3584         int ret;
3585
3586         if (tracing_disabled)
3587                 return -ENODEV;
3588
3589         if (trace_array_get(tr) < 0)
3590                 return -ENODEV;
3591
3592         ret = single_open(file, tracing_trace_options_show, inode->i_private);
3593         if (ret < 0)
3594                 trace_array_put(tr);
3595
3596         return ret;
3597 }
3598
3599 static const struct file_operations tracing_iter_fops = {
3600         .open           = tracing_trace_options_open,
3601         .read           = seq_read,
3602         .llseek         = seq_lseek,
3603         .release        = tracing_single_release_tr,
3604         .write          = tracing_trace_options_write,
3605 };
3606
3607 static const char readme_msg[] =
3608         "tracing mini-HOWTO:\n\n"
3609         "# echo 0 > tracing_on : quick way to disable tracing\n"
3610         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
3611         " Important files:\n"
3612         "  trace\t\t\t- The static contents of the buffer\n"
3613         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
3614         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
3615         "  current_tracer\t- function and latency tracers\n"
3616         "  available_tracers\t- list of configured tracers for current_tracer\n"
3617         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
3618         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
3619         "  trace_clock\t\t-change the clock used to order events\n"
3620         "       local:   Per cpu clock but may not be synced across CPUs\n"
3621         "      global:   Synced across CPUs but slows tracing down.\n"
3622         "     counter:   Not a clock, but just an increment\n"
3623         "      uptime:   Jiffy counter from time of boot\n"
3624         "        perf:   Same clock that perf events use\n"
3625 #ifdef CONFIG_X86_64
3626         "     x86-tsc:   TSC cycle counter\n"
3627 #endif
3628         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
3629         "  tracing_cpumask\t- Limit which CPUs to trace\n"
3630         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
3631         "\t\t\t  Remove sub-buffer with rmdir\n"
3632         "  trace_options\t\t- Set format or modify how tracing happens\n"
3633         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
3634         "\t\t\t  option name\n"
3635         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
3636 #ifdef CONFIG_DYNAMIC_FTRACE
3637         "\n  available_filter_functions - list of functions that can be filtered on\n"
3638         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
3639         "\t\t\t  functions\n"
3640         "\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3641         "\t     modules: Can select a group via module\n"
3642         "\t      Format: :mod:<module-name>\n"
3643         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
3644         "\t    triggers: a command to perform when function is hit\n"
3645         "\t      Format: <function>:<trigger>[:count]\n"
3646         "\t     trigger: traceon, traceoff\n"
3647         "\t\t      enable_event:<system>:<event>\n"
3648         "\t\t      disable_event:<system>:<event>\n"
3649 #ifdef CONFIG_STACKTRACE
3650         "\t\t      stacktrace\n"
3651 #endif
3652 #ifdef CONFIG_TRACER_SNAPSHOT
3653         "\t\t      snapshot\n"
3654 #endif
3655         "\t\t      dump\n"
3656         "\t\t      cpudump\n"
3657         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
3658         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
3659         "\t     The first one will disable tracing every time do_fault is hit\n"
3660         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
3661         "\t       The first time do trap is hit and it disables tracing, the\n"
3662         "\t       counter will decrement to 2. If tracing is already disabled,\n"
3663         "\t       the counter will not decrement. It only decrements when the\n"
3664         "\t       trigger did work\n"
3665         "\t     To remove trigger without count:\n"
3666         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
3667         "\t     To remove trigger with a count:\n"
3668         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
3669         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
3670         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3671         "\t    modules: Can select a group via module command :mod:\n"
3672         "\t    Does not accept triggers\n"
3673 #endif /* CONFIG_DYNAMIC_FTRACE */
3674 #ifdef CONFIG_FUNCTION_TRACER
3675         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
3676         "\t\t    (function)\n"
3677 #endif
3678 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
3679         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
3680         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
3681         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
3682 #endif
3683 #ifdef CONFIG_TRACER_SNAPSHOT
3684         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
3685         "\t\t\t  snapshot buffer. Read the contents for more\n"
3686         "\t\t\t  information\n"
3687 #endif
3688 #ifdef CONFIG_STACK_TRACER
3689         "  stack_trace\t\t- Shows the max stack trace when active\n"
3690         "  stack_max_size\t- Shows current max stack size that was traced\n"
3691         "\t\t\t  Write into this file to reset the max size (trigger a\n"
3692         "\t\t\t  new trace)\n"
3693 #ifdef CONFIG_DYNAMIC_FTRACE
3694         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
3695         "\t\t\t  traces\n"
3696 #endif
3697 #endif /* CONFIG_STACK_TRACER */
3698         "  events/\t\t- Directory containing all trace event subsystems:\n"
3699         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
3700         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
3701         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
3702         "\t\t\t  events\n"
3703         "      filter\t\t- If set, only events passing filter are traced\n"
3704         "  events/<system>/<event>/\t- Directory containing control files for\n"
3705         "\t\t\t  <event>:\n"
3706         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
3707         "      filter\t\t- If set, only events passing filter are traced\n"
3708         "      trigger\t\t- If set, a command to perform when event is hit\n"
3709         "\t    Format: <trigger>[:count][if <filter>]\n"
3710         "\t   trigger: traceon, traceoff\n"
3711         "\t            enable_event:<system>:<event>\n"
3712         "\t            disable_event:<system>:<event>\n"
3713 #ifdef CONFIG_STACKTRACE
3714         "\t\t    stacktrace\n"
3715 #endif
3716 #ifdef CONFIG_TRACER_SNAPSHOT
3717         "\t\t    snapshot\n"
3718 #endif
3719         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
3720         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
3721         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
3722         "\t                  events/block/block_unplug/trigger\n"
3723         "\t   The first disables tracing every time block_unplug is hit.\n"
3724         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
3725         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
3726         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
3727         "\t   Like function triggers, the counter is only decremented if it\n"
3728         "\t    enabled or disabled tracing.\n"
3729         "\t   To remove a trigger without a count:\n"
3730         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
3731         "\t   To remove a trigger with a count:\n"
3732         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
3733         "\t   Filters can be ignored when removing a trigger.\n"
3734 ;
3735
3736 static ssize_t
3737 tracing_readme_read(struct file *filp, char __user *ubuf,
3738                        size_t cnt, loff_t *ppos)
3739 {
3740         return simple_read_from_buffer(ubuf, cnt, ppos,
3741                                         readme_msg, strlen(readme_msg));
3742 }
3743
3744 static const struct file_operations tracing_readme_fops = {
3745         .open           = tracing_open_generic,
3746         .read           = tracing_readme_read,
3747         .llseek         = generic_file_llseek,
3748 };
3749
3750 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
3751 {
3752         unsigned int *ptr = v;
3753
3754         if (*pos || m->count)
3755                 ptr++;
3756
3757         (*pos)++;
3758
3759         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
3760              ptr++) {
3761                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
3762                         continue;
3763
3764                 return ptr;
3765         }
3766
3767         return NULL;
3768 }
3769
3770 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
3771 {
3772         void *v;
3773         loff_t l = 0;
3774
3775         preempt_disable();
3776         arch_spin_lock(&trace_cmdline_lock);
3777
3778         v = &savedcmd->map_cmdline_to_pid[0];
3779         while (l <= *pos) {
3780                 v = saved_cmdlines_next(m, v, &l);
3781                 if (!v)
3782                         return NULL;
3783         }
3784
3785         return v;
3786 }
3787
3788 static void saved_cmdlines_stop(struct seq_file *m, void *v)
3789 {
3790         arch_spin_unlock(&trace_cmdline_lock);
3791         preempt_enable();
3792 }
3793
3794 static int saved_cmdlines_show(struct seq_file *m, void *v)
3795 {
3796         char buf[TASK_COMM_LEN];
3797         unsigned int *pid = v;
3798
3799         __trace_find_cmdline(*pid, buf);
3800         seq_printf(m, "%d %s\n", *pid, buf);
3801         return 0;
3802 }
3803
3804 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
3805         .start          = saved_cmdlines_start,
3806         .next           = saved_cmdlines_next,
3807         .stop           = saved_cmdlines_stop,
3808         .show           = saved_cmdlines_show,
3809 };
3810
3811 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
3812 {
3813         if (tracing_disabled)
3814                 return -ENODEV;
3815
3816         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
3817 }
3818
3819 static const struct file_operations tracing_saved_cmdlines_fops = {
3820         .open           = tracing_saved_cmdlines_open,
3821         .read           = seq_read,
3822         .llseek         = seq_lseek,
3823         .release        = seq_release,
3824 };
3825
3826 static ssize_t
3827 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
3828                                  size_t cnt, loff_t *ppos)
3829 {
3830         char buf[64];
3831         int r;
3832
3833         arch_spin_lock(&trace_cmdline_lock);
3834         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
3835         arch_spin_unlock(&trace_cmdline_lock);
3836
3837         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3838 }
3839
3840 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
3841 {
3842         kfree(s->saved_cmdlines);
3843         kfree(s->map_cmdline_to_pid);
3844         kfree(s);
3845 }
3846
3847 static int tracing_resize_saved_cmdlines(unsigned int val)
3848 {
3849         struct saved_cmdlines_buffer *s, *savedcmd_temp;
3850
3851         s = kmalloc(sizeof(*s), GFP_KERNEL);
3852         if (!s)
3853                 return -ENOMEM;
3854
3855         if (allocate_cmdlines_buffer(val, s) < 0) {
3856                 kfree(s);
3857                 return -ENOMEM;
3858         }
3859
3860         arch_spin_lock(&trace_cmdline_lock);
3861         savedcmd_temp = savedcmd;
3862         savedcmd = s;
3863         arch_spin_unlock(&trace_cmdline_lock);
3864         free_saved_cmdlines_buffer(savedcmd_temp);
3865
3866         return 0;
3867 }
3868
3869 static ssize_t
3870 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
3871                                   size_t cnt, loff_t *ppos)
3872 {
3873         unsigned long val;
3874         int ret;
3875
3876         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
3877         if (ret)
3878                 return ret;
3879
3880         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
3881         if (!val || val > PID_MAX_DEFAULT)
3882                 return -EINVAL;
3883
3884         ret = tracing_resize_saved_cmdlines((unsigned int)val);
3885         if (ret < 0)
3886                 return ret;
3887
3888         *ppos += cnt;
3889
3890         return cnt;
3891 }
3892
3893 static const struct file_operations tracing_saved_cmdlines_size_fops = {
3894         .open           = tracing_open_generic,
3895         .read           = tracing_saved_cmdlines_size_read,
3896         .write          = tracing_saved_cmdlines_size_write,
3897 };
3898
3899 static ssize_t
3900 tracing_set_trace_read(struct file *filp, char __user *ubuf,
3901                        size_t cnt, loff_t *ppos)
3902 {
3903         struct trace_array *tr = filp->private_data;
3904         char buf[MAX_TRACER_SIZE+2];
3905         int r;
3906
3907         mutex_lock(&trace_types_lock);
3908         r = sprintf(buf, "%s\n", tr->current_trace->name);
3909         mutex_unlock(&trace_types_lock);
3910
3911         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3912 }
3913
3914 int tracer_init(struct tracer *t, struct trace_array *tr)
3915 {
3916         tracing_reset_online_cpus(&tr->trace_buffer);
3917         return t->init(tr);
3918 }
3919
3920 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
3921 {
3922         int cpu;
3923
3924         for_each_tracing_cpu(cpu)
3925                 per_cpu_ptr(buf->data, cpu)->entries = val;
3926 }
3927
3928 #ifdef CONFIG_TRACER_MAX_TRACE
3929 /* resize @tr's buffer to the size of @size_tr's entries */
3930 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
3931                                         struct trace_buffer *size_buf, int cpu_id)
3932 {
3933         int cpu, ret = 0;
3934
3935         if (cpu_id == RING_BUFFER_ALL_CPUS) {
3936                 for_each_tracing_cpu(cpu) {
3937                         ret = ring_buffer_resize(trace_buf->buffer,
3938                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
3939                         if (ret < 0)
3940                                 break;
3941                         per_cpu_ptr(trace_buf->data, cpu)->entries =
3942                                 per_cpu_ptr(size_buf->data, cpu)->entries;
3943                 }
3944         } else {
3945                 ret = ring_buffer_resize(trace_buf->buffer,
3946                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
3947                 if (ret == 0)
3948                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
3949                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
3950         }
3951
3952         return ret;
3953 }
3954 #endif /* CONFIG_TRACER_MAX_TRACE */
3955
3956 static int __tracing_resize_ring_buffer(struct trace_array *tr,
3957                                         unsigned long size, int cpu)
3958 {
3959         int ret;
3960
3961         /*
3962          * If kernel or user changes the size of the ring buffer
3963          * we use the size that was given, and we can forget about
3964          * expanding it later.
3965          */
3966         ring_buffer_expanded = true;
3967
3968         /* May be called before buffers are initialized */
3969         if (!tr->trace_buffer.buffer)
3970                 return 0;
3971
3972         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
3973         if (ret < 0)
3974                 return ret;
3975
3976 #ifdef CONFIG_TRACER_MAX_TRACE
3977         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
3978             !tr->current_trace->use_max_tr)
3979                 goto out;
3980
3981         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
3982         if (ret < 0) {
3983                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
3984                                                      &tr->trace_buffer, cpu);
3985                 if (r < 0) {
3986                         /*
3987                          * AARGH! We are left with different
3988                          * size max buffer!!!!
3989                          * The max buffer is our "snapshot" buffer.
3990                          * When a tracer needs a snapshot (one of the
3991                          * latency tracers), it swaps the max buffer
3992                          * with the saved snap shot. We succeeded to
3993                          * update the size of the main buffer, but failed to
3994                          * update the size of the max buffer. But when we tried
3995                          * to reset the main buffer to the original size, we
3996                          * failed there too. This is very unlikely to
3997                          * happen, but if it does, warn and kill all
3998                          * tracing.
3999                          */
4000                         WARN_ON(1);
4001                         tracing_disabled = 1;
4002                 }
4003                 return ret;
4004         }
4005
4006         if (cpu == RING_BUFFER_ALL_CPUS)
4007                 set_buffer_entries(&tr->max_buffer, size);
4008         else
4009                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4010
4011  out:
4012 #endif /* CONFIG_TRACER_MAX_TRACE */
4013
4014         if (cpu == RING_BUFFER_ALL_CPUS)
4015                 set_buffer_entries(&tr->trace_buffer, size);
4016         else
4017                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4018
4019         return ret;
4020 }
4021
4022 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4023                                           unsigned long size, int cpu_id)
4024 {
4025         int ret = size;
4026
4027         mutex_lock(&trace_types_lock);
4028
4029         if (cpu_id != RING_BUFFER_ALL_CPUS) {
4030                 /* make sure, this cpu is enabled in the mask */
4031                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4032                         ret = -EINVAL;
4033                         goto out;
4034                 }
4035         }
4036
4037         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4038         if (ret < 0)
4039                 ret = -ENOMEM;
4040
4041 out:
4042         mutex_unlock(&trace_types_lock);
4043
4044         return ret;
4045 }
4046
4047
4048 /**
4049  * tracing_update_buffers - used by tracing facility to expand ring buffers
4050  *
4051  * To save on memory when the tracing is never used on a system with it
4052  * configured in. The ring buffers are set to a minimum size. But once
4053  * a user starts to use the tracing facility, then they need to grow
4054  * to their default size.
4055  *
4056  * This function is to be called when a tracer is about to be used.
4057  */
4058 int tracing_update_buffers(void)
4059 {
4060         int ret = 0;
4061
4062         mutex_lock(&trace_types_lock);
4063         if (!ring_buffer_expanded)
4064                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4065                                                 RING_BUFFER_ALL_CPUS);
4066         mutex_unlock(&trace_types_lock);
4067
4068         return ret;
4069 }
4070
4071 struct trace_option_dentry;
4072
4073 static struct trace_option_dentry *
4074 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4075
4076 static void
4077 destroy_trace_option_files(struct trace_option_dentry *topts);
4078
4079 /*
4080  * Used to clear out the tracer before deletion of an instance.
4081  * Must have trace_types_lock held.
4082  */
4083 static void tracing_set_nop(struct trace_array *tr)
4084 {
4085         if (tr->current_trace == &nop_trace)
4086                 return;
4087         
4088         tr->current_trace->enabled--;
4089
4090         if (tr->current_trace->reset)
4091                 tr->current_trace->reset(tr);
4092
4093         tr->current_trace = &nop_trace;
4094 }
4095
4096 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
4097 {
4098         static struct trace_option_dentry *topts;
4099         struct tracer *t;
4100 #ifdef CONFIG_TRACER_MAX_TRACE
4101         bool had_max_tr;
4102 #endif
4103         int ret = 0;
4104
4105         mutex_lock(&trace_types_lock);
4106
4107         if (!ring_buffer_expanded) {
4108                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
4109                                                 RING_BUFFER_ALL_CPUS);
4110                 if (ret < 0)
4111                         goto out;
4112                 ret = 0;
4113         }
4114
4115         for (t = trace_types; t; t = t->next) {
4116                 if (strcmp(t->name, buf) == 0)
4117                         break;
4118         }
4119         if (!t) {
4120                 ret = -EINVAL;
4121                 goto out;
4122         }
4123         if (t == tr->current_trace)
4124                 goto out;
4125
4126         /* Some tracers are only allowed for the top level buffer */
4127         if (!trace_ok_for_array(t, tr)) {
4128                 ret = -EINVAL;
4129                 goto out;
4130         }
4131
4132         trace_branch_disable();
4133
4134         tr->current_trace->enabled--;
4135
4136         if (tr->current_trace->reset)
4137                 tr->current_trace->reset(tr);
4138
4139         /* Current trace needs to be nop_trace before synchronize_sched */
4140         tr->current_trace = &nop_trace;
4141
4142 #ifdef CONFIG_TRACER_MAX_TRACE
4143         had_max_tr = tr->allocated_snapshot;
4144
4145         if (had_max_tr && !t->use_max_tr) {
4146                 /*
4147                  * We need to make sure that the update_max_tr sees that
4148                  * current_trace changed to nop_trace to keep it from
4149                  * swapping the buffers after we resize it.
4150                  * The update_max_tr is called from interrupts disabled
4151                  * so a synchronized_sched() is sufficient.
4152                  */
4153                 synchronize_sched();
4154                 free_snapshot(tr);
4155         }
4156 #endif
4157         /* Currently, only the top instance has options */
4158         if (tr->flags & TRACE_ARRAY_FL_GLOBAL) {
4159                 destroy_trace_option_files(topts);
4160                 topts = create_trace_option_files(tr, t);
4161         }
4162
4163 #ifdef CONFIG_TRACER_MAX_TRACE
4164         if (t->use_max_tr && !had_max_tr) {
4165                 ret = alloc_snapshot(tr);
4166                 if (ret < 0)
4167                         goto out;
4168         }
4169 #endif
4170
4171         if (t->init) {
4172                 ret = tracer_init(t, tr);
4173                 if (ret)
4174                         goto out;
4175         }
4176
4177         tr->current_trace = t;
4178         tr->current_trace->enabled++;
4179         trace_branch_enable(tr);
4180  out:
4181         mutex_unlock(&trace_types_lock);
4182
4183         return ret;
4184 }
4185
4186 static ssize_t
4187 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
4188                         size_t cnt, loff_t *ppos)
4189 {
4190         struct trace_array *tr = filp->private_data;
4191         char buf[MAX_TRACER_SIZE+1];
4192         int i;
4193         size_t ret;
4194         int err;
4195
4196         ret = cnt;
4197
4198         if (cnt > MAX_TRACER_SIZE)
4199                 cnt = MAX_TRACER_SIZE;
4200
4201         if (copy_from_user(&buf, ubuf, cnt))
4202                 return -EFAULT;
4203
4204         buf[cnt] = 0;
4205
4206         /* strip ending whitespace. */
4207         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4208                 buf[i] = 0;
4209
4210         err = tracing_set_tracer(tr, buf);
4211         if (err)
4212                 return err;
4213
4214         *ppos += ret;
4215
4216         return ret;
4217 }
4218
4219 static ssize_t
4220 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
4221                    size_t cnt, loff_t *ppos)
4222 {
4223         char buf[64];
4224         int r;
4225
4226         r = snprintf(buf, sizeof(buf), "%ld\n",
4227                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4228         if (r > sizeof(buf))
4229                 r = sizeof(buf);
4230         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4231 }
4232
4233 static ssize_t
4234 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
4235                     size_t cnt, loff_t *ppos)
4236 {
4237         unsigned long val;
4238         int ret;
4239
4240         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4241         if (ret)
4242                 return ret;
4243
4244         *ptr = val * 1000;
4245
4246         return cnt;
4247 }
4248
4249 static ssize_t
4250 tracing_thresh_read(struct file *filp, char __user *ubuf,
4251                     size_t cnt, loff_t *ppos)
4252 {
4253         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
4254 }
4255
4256 static ssize_t
4257 tracing_thresh_write(struct file *filp, const char __user *ubuf,
4258                      size_t cnt, loff_t *ppos)
4259 {
4260         struct trace_array *tr = filp->private_data;
4261         int ret;
4262
4263         mutex_lock(&trace_types_lock);
4264         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
4265         if (ret < 0)
4266                 goto out;
4267
4268         if (tr->current_trace->update_thresh) {
4269                 ret = tr->current_trace->update_thresh(tr);
4270                 if (ret < 0)
4271                         goto out;
4272         }
4273
4274         ret = cnt;
4275 out:
4276         mutex_unlock(&trace_types_lock);
4277
4278         return ret;
4279 }
4280
4281 static ssize_t
4282 tracing_max_lat_read(struct file *filp, char __user *ubuf,
4283                      size_t cnt, loff_t *ppos)
4284 {
4285         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
4286 }
4287
4288 static ssize_t
4289 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4290                       size_t cnt, loff_t *ppos)
4291 {
4292         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
4293 }
4294
4295 static int tracing_open_pipe(struct inode *inode, struct file *filp)
4296 {
4297         struct trace_array *tr = inode->i_private;
4298         struct trace_iterator *iter;
4299         int ret = 0;
4300
4301         if (tracing_disabled)
4302                 return -ENODEV;
4303
4304         if (trace_array_get(tr) < 0)
4305                 return -ENODEV;
4306
4307         mutex_lock(&trace_types_lock);
4308
4309         /* create a buffer to store the information to pass to userspace */
4310         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4311         if (!iter) {
4312                 ret = -ENOMEM;
4313                 __trace_array_put(tr);
4314                 goto out;
4315         }
4316
4317         /*
4318          * We make a copy of the current tracer to avoid concurrent
4319          * changes on it while we are reading.
4320          */
4321         iter->trace = kmalloc(sizeof(*iter->trace), GFP_KERNEL);
4322         if (!iter->trace) {
4323                 ret = -ENOMEM;
4324                 goto fail;
4325         }
4326         *iter->trace = *tr->current_trace;
4327
4328         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
4329                 ret = -ENOMEM;
4330                 goto fail;
4331         }
4332
4333         /* trace pipe does not show start of buffer */
4334         cpumask_setall(iter->started);
4335
4336         if (trace_flags & TRACE_ITER_LATENCY_FMT)
4337                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4338
4339         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4340         if (trace_clocks[tr->clock_id].in_ns)
4341                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4342
4343         iter->tr = tr;
4344         iter->trace_buffer = &tr->trace_buffer;
4345         iter->cpu_file = tracing_get_cpu(inode);
4346         mutex_init(&iter->mutex);
4347         filp->private_data = iter;
4348
4349         if (iter->trace->pipe_open)
4350                 iter->trace->pipe_open(iter);
4351
4352         nonseekable_open(inode, filp);
4353 out:
4354         mutex_unlock(&trace_types_lock);
4355         return ret;
4356
4357 fail:
4358         kfree(iter->trace);
4359         kfree(iter);
4360         __trace_array_put(tr);
4361         mutex_unlock(&trace_types_lock);
4362         return ret;
4363 }
4364
4365 static int tracing_release_pipe(struct inode *inode, struct file *file)
4366 {
4367         struct trace_iterator *iter = file->private_data;
4368         struct trace_array *tr = inode->i_private;
4369
4370         mutex_lock(&trace_types_lock);
4371
4372         if (iter->trace->pipe_close)
4373                 iter->trace->pipe_close(iter);
4374
4375         mutex_unlock(&trace_types_lock);
4376
4377         free_cpumask_var(iter->started);
4378         mutex_destroy(&iter->mutex);
4379         kfree(iter->trace);
4380         kfree(iter);
4381
4382         trace_array_put(tr);
4383
4384         return 0;
4385 }
4386
4387 static unsigned int
4388 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
4389 {
4390         /* Iterators are static, they should be filled or empty */
4391         if (trace_buffer_iter(iter, iter->cpu_file))
4392                 return POLLIN | POLLRDNORM;
4393
4394         if (trace_flags & TRACE_ITER_BLOCK)
4395                 /*
4396                  * Always select as readable when in blocking mode
4397                  */
4398                 return POLLIN | POLLRDNORM;
4399         else
4400                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
4401                                              filp, poll_table);
4402 }
4403
4404 static unsigned int
4405 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
4406 {
4407         struct trace_iterator *iter = filp->private_data;
4408
4409         return trace_poll(iter, filp, poll_table);
4410 }
4411
4412 /* Must be called with trace_types_lock mutex held. */
4413 static int tracing_wait_pipe(struct file *filp)
4414 {
4415         struct trace_iterator *iter = filp->private_data;
4416         int ret;
4417
4418         while (trace_empty(iter)) {
4419
4420                 if ((filp->f_flags & O_NONBLOCK)) {
4421                         return -EAGAIN;
4422                 }
4423
4424                 /*
4425                  * We block until we read something and tracing is disabled.
4426                  * We still block if tracing is disabled, but we have never
4427                  * read anything. This allows a user to cat this file, and
4428                  * then enable tracing. But after we have read something,
4429                  * we give an EOF when tracing is again disabled.
4430                  *
4431                  * iter->pos will be 0 if we haven't read anything.
4432                  */
4433                 if (!tracing_is_on() && iter->pos)
4434                         break;
4435
4436                 mutex_unlock(&iter->mutex);
4437
4438                 ret = wait_on_pipe(iter, false);
4439
4440                 mutex_lock(&iter->mutex);
4441
4442                 if (ret)
4443                         return ret;
4444         }
4445
4446         return 1;
4447 }
4448
4449 /*
4450  * Consumer reader.
4451  */
4452 static ssize_t
4453 tracing_read_pipe(struct file *filp, char __user *ubuf,
4454                   size_t cnt, loff_t *ppos)
4455 {
4456         struct trace_iterator *iter = filp->private_data;
4457         struct trace_array *tr = iter->tr;
4458         ssize_t sret;
4459
4460         /* return any leftover data */
4461         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4462         if (sret != -EBUSY)
4463                 return sret;
4464
4465         trace_seq_init(&iter->seq);
4466
4467         /* copy the tracer to avoid using a global lock all around */
4468         mutex_lock(&trace_types_lock);
4469         if (unlikely(iter->trace->name != tr->current_trace->name))
4470                 *iter->trace = *tr->current_trace;
4471         mutex_unlock(&trace_types_lock);
4472
4473         /*
4474          * Avoid more than one consumer on a single file descriptor
4475          * This is just a matter of traces coherency, the ring buffer itself
4476          * is protected.
4477          */
4478         mutex_lock(&iter->mutex);
4479         if (iter->trace->read) {
4480                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
4481                 if (sret)
4482                         goto out;
4483         }
4484
4485 waitagain:
4486         sret = tracing_wait_pipe(filp);
4487         if (sret <= 0)
4488                 goto out;
4489
4490         /* stop when tracing is finished */
4491         if (trace_empty(iter)) {
4492                 sret = 0;
4493                 goto out;
4494         }
4495
4496         if (cnt >= PAGE_SIZE)
4497                 cnt = PAGE_SIZE - 1;
4498
4499         /* reset all but tr, trace, and overruns */
4500         memset(&iter->seq, 0,
4501                sizeof(struct trace_iterator) -
4502                offsetof(struct trace_iterator, seq));
4503         cpumask_clear(iter->started);
4504         iter->pos = -1;
4505
4506         trace_event_read_lock();
4507         trace_access_lock(iter->cpu_file);
4508         while (trace_find_next_entry_inc(iter) != NULL) {
4509                 enum print_line_t ret;
4510                 int len = iter->seq.len;
4511
4512                 ret = print_trace_line(iter);
4513                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4514                         /* don't print partial lines */
4515                         iter->seq.len = len;
4516                         break;
4517                 }
4518                 if (ret != TRACE_TYPE_NO_CONSUME)
4519                         trace_consume(iter);
4520
4521                 if (iter->seq.len >= cnt)
4522                         break;
4523
4524                 /*
4525                  * Setting the full flag means we reached the trace_seq buffer
4526                  * size and we should leave by partial output condition above.
4527                  * One of the trace_seq_* functions is not used properly.
4528                  */
4529                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
4530                           iter->ent->type);
4531         }
4532         trace_access_unlock(iter->cpu_file);
4533         trace_event_read_unlock();
4534
4535         /* Now copy what we have to the user */
4536         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4537         if (iter->seq.readpos >= iter->seq.len)
4538                 trace_seq_init(&iter->seq);
4539
4540         /*
4541          * If there was nothing to send to user, in spite of consuming trace
4542          * entries, go back to wait for more entries.
4543          */
4544         if (sret == -EBUSY)
4545                 goto waitagain;
4546
4547 out:
4548         mutex_unlock(&iter->mutex);
4549
4550         return sret;
4551 }
4552
4553 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
4554                                      unsigned int idx)
4555 {
4556         __free_page(spd->pages[idx]);
4557 }
4558
4559 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
4560         .can_merge              = 0,
4561         .confirm                = generic_pipe_buf_confirm,
4562         .release                = generic_pipe_buf_release,
4563         .steal                  = generic_pipe_buf_steal,
4564         .get                    = generic_pipe_buf_get,
4565 };
4566
4567 static size_t
4568 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
4569 {
4570         size_t count;
4571         int ret;
4572
4573         /* Seq buffer is page-sized, exactly what we need. */
4574         for (;;) {
4575                 count = iter->seq.len;
4576                 ret = print_trace_line(iter);
4577                 count = iter->seq.len - count;
4578                 if (rem < count) {
4579                         rem = 0;
4580                         iter->seq.len -= count;
4581                         break;
4582                 }
4583                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4584                         iter->seq.len -= count;
4585                         break;
4586                 }
4587
4588                 if (ret != TRACE_TYPE_NO_CONSUME)
4589                         trace_consume(iter);
4590                 rem -= count;
4591                 if (!trace_find_next_entry_inc(iter))   {
4592                         rem = 0;
4593                         iter->ent = NULL;
4594                         break;
4595                 }
4596         }
4597
4598         return rem;
4599 }
4600
4601 static ssize_t tracing_splice_read_pipe(struct file *filp,
4602                                         loff_t *ppos,
4603                                         struct pipe_inode_info *pipe,
4604                                         size_t len,
4605                                         unsigned int flags)
4606 {
4607         struct page *pages_def[PIPE_DEF_BUFFERS];
4608         struct partial_page partial_def[PIPE_DEF_BUFFERS];
4609         struct trace_iterator *iter = filp->private_data;
4610         struct splice_pipe_desc spd = {
4611                 .pages          = pages_def,
4612                 .partial        = partial_def,
4613                 .nr_pages       = 0, /* This gets updated below. */
4614                 .nr_pages_max   = PIPE_DEF_BUFFERS,
4615                 .flags          = flags,
4616                 .ops            = &tracing_pipe_buf_ops,
4617                 .spd_release    = tracing_spd_release_pipe,
4618         };
4619         struct trace_array *tr = iter->tr;
4620         ssize_t ret;
4621         size_t rem;
4622         unsigned int i;
4623
4624         if (splice_grow_spd(pipe, &spd))
4625                 return -ENOMEM;
4626
4627         /* copy the tracer to avoid using a global lock all around */
4628         mutex_lock(&trace_types_lock);
4629         if (unlikely(iter->trace->name != tr->current_trace->name))
4630                 *iter->trace = *tr->current_trace;
4631         mutex_unlock(&trace_types_lock);
4632
4633         mutex_lock(&iter->mutex);
4634
4635         if (iter->trace->splice_read) {
4636                 ret = iter->trace->splice_read(iter, filp,
4637                                                ppos, pipe, len, flags);
4638                 if (ret)
4639                         goto out_err;
4640         }
4641
4642         ret = tracing_wait_pipe(filp);
4643         if (ret <= 0)
4644                 goto out_err;
4645
4646         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
4647                 ret = -EFAULT;
4648                 goto out_err;
4649         }
4650
4651         trace_event_read_lock();
4652         trace_access_lock(iter->cpu_file);
4653
4654         /* Fill as many pages as possible. */
4655         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
4656                 spd.pages[i] = alloc_page(GFP_KERNEL);
4657                 if (!spd.pages[i])
4658                         break;
4659
4660                 rem = tracing_fill_pipe_page(rem, iter);
4661
4662                 /* Copy the data into the page, so we can start over. */
4663                 ret = trace_seq_to_buffer(&iter->seq,
4664                                           page_address(spd.pages[i]),
4665                                           iter->seq.len);
4666                 if (ret < 0) {
4667                         __free_page(spd.pages[i]);
4668                         break;
4669                 }
4670                 spd.partial[i].offset = 0;
4671                 spd.partial[i].len = iter->seq.len;
4672
4673                 trace_seq_init(&iter->seq);
4674         }
4675
4676         trace_access_unlock(iter->cpu_file);
4677         trace_event_read_unlock();
4678         mutex_unlock(&iter->mutex);
4679
4680         spd.nr_pages = i;
4681
4682         ret = splice_to_pipe(pipe, &spd);
4683 out:
4684         splice_shrink_spd(&spd);
4685         return ret;
4686
4687 out_err:
4688         mutex_unlock(&iter->mutex);
4689         goto out;
4690 }
4691
4692 static ssize_t
4693 tracing_entries_read(struct file *filp, char __user *ubuf,
4694                      size_t cnt, loff_t *ppos)
4695 {
4696         struct inode *inode = file_inode(filp);
4697         struct trace_array *tr = inode->i_private;
4698         int cpu = tracing_get_cpu(inode);
4699         char buf[64];
4700         int r = 0;
4701         ssize_t ret;
4702
4703         mutex_lock(&trace_types_lock);
4704
4705         if (cpu == RING_BUFFER_ALL_CPUS) {
4706                 int cpu, buf_size_same;
4707                 unsigned long size;
4708
4709                 size = 0;
4710                 buf_size_same = 1;
4711                 /* check if all cpu sizes are same */
4712                 for_each_tracing_cpu(cpu) {
4713                         /* fill in the size from first enabled cpu */
4714                         if (size == 0)
4715                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
4716                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
4717                                 buf_size_same = 0;
4718                                 break;
4719                         }
4720                 }
4721
4722                 if (buf_size_same) {
4723                         if (!ring_buffer_expanded)
4724                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
4725                                             size >> 10,
4726                                             trace_buf_size >> 10);
4727                         else
4728                                 r = sprintf(buf, "%lu\n", size >> 10);
4729                 } else
4730                         r = sprintf(buf, "X\n");
4731         } else
4732                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
4733
4734         mutex_unlock(&trace_types_lock);
4735
4736         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4737         return ret;
4738 }
4739
4740 static ssize_t
4741 tracing_entries_write(struct file *filp, const char __user *ubuf,
4742                       size_t cnt, loff_t *ppos)
4743 {
4744         struct inode *inode = file_inode(filp);
4745         struct trace_array *tr = inode->i_private;
4746         unsigned long val;
4747         int ret;
4748
4749         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4750         if (ret)
4751                 return ret;
4752
4753         /* must have at least 1 entry */
4754         if (!val)
4755                 return -EINVAL;
4756
4757         /* value is in KB */
4758         val <<= 10;
4759         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
4760         if (ret < 0)
4761                 return ret;
4762
4763         *ppos += cnt;
4764
4765         return cnt;
4766 }
4767
4768 static ssize_t
4769 tracing_total_entries_read(struct file *filp, char __user *ubuf,
4770                                 size_t cnt, loff_t *ppos)
4771 {
4772         struct trace_array *tr = filp->private_data;
4773         char buf[64];
4774         int r, cpu;
4775         unsigned long size = 0, expanded_size = 0;
4776
4777         mutex_lock(&trace_types_lock);
4778         for_each_tracing_cpu(cpu) {
4779                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
4780                 if (!ring_buffer_expanded)
4781                         expanded_size += trace_buf_size >> 10;
4782         }
4783         if (ring_buffer_expanded)
4784                 r = sprintf(buf, "%lu\n", size);
4785         else
4786                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
4787         mutex_unlock(&trace_types_lock);
4788
4789         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4790 }
4791
4792 static ssize_t
4793 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
4794                           size_t cnt, loff_t *ppos)
4795 {
4796         /*
4797          * There is no need to read what the user has written, this function
4798          * is just to make sure that there is no error when "echo" is used
4799          */
4800
4801         *ppos += cnt;
4802
4803         return cnt;
4804 }
4805
4806 static int
4807 tracing_free_buffer_release(struct inode *inode, struct file *filp)
4808 {
4809         struct trace_array *tr = inode->i_private;
4810
4811         /* disable tracing ? */
4812         if (trace_flags & TRACE_ITER_STOP_ON_FREE)
4813                 tracer_tracing_off(tr);
4814         /* resize the ring buffer to 0 */
4815         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
4816
4817         trace_array_put(tr);
4818
4819         return 0;
4820 }
4821
4822 static ssize_t
4823 tracing_mark_write(struct file *filp, const char __user *ubuf,
4824                                         size_t cnt, loff_t *fpos)
4825 {
4826         unsigned long addr = (unsigned long)ubuf;
4827         struct trace_array *tr = filp->private_data;
4828         struct ring_buffer_event *event;
4829         struct ring_buffer *buffer;
4830         struct print_entry *entry;
4831         unsigned long irq_flags;
4832         struct page *pages[2];
4833         void *map_page[2];
4834         int nr_pages = 1;
4835         ssize_t written;
4836         int offset;
4837         int size;
4838         int len;
4839         int ret;
4840         int i;
4841
4842         if (tracing_disabled)
4843                 return -EINVAL;
4844
4845         if (!(trace_flags & TRACE_ITER_MARKERS))
4846                 return -EINVAL;
4847
4848         if (cnt > TRACE_BUF_SIZE)
4849                 cnt = TRACE_BUF_SIZE;
4850
4851         /*
4852          * Userspace is injecting traces into the kernel trace buffer.
4853          * We want to be as non intrusive as possible.
4854          * To do so, we do not want to allocate any special buffers
4855          * or take any locks, but instead write the userspace data
4856          * straight into the ring buffer.
4857          *
4858          * First we need to pin the userspace buffer into memory,
4859          * which, most likely it is, because it just referenced it.
4860          * But there's no guarantee that it is. By using get_user_pages_fast()
4861          * and kmap_atomic/kunmap_atomic() we can get access to the
4862          * pages directly. We then write the data directly into the
4863          * ring buffer.
4864          */
4865         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
4866
4867         /* check if we cross pages */
4868         if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
4869                 nr_pages = 2;
4870
4871         offset = addr & (PAGE_SIZE - 1);
4872         addr &= PAGE_MASK;
4873
4874         ret = get_user_pages_fast(addr, nr_pages, 0, pages);
4875         if (ret < nr_pages) {
4876                 while (--ret >= 0)
4877                         put_page(pages[ret]);
4878                 written = -EFAULT;
4879                 goto out;
4880         }
4881
4882         for (i = 0; i < nr_pages; i++)
4883                 map_page[i] = kmap_atomic(pages[i]);
4884
4885         local_save_flags(irq_flags);
4886         size = sizeof(*entry) + cnt + 2; /* possible \n added */
4887         buffer = tr->trace_buffer.buffer;
4888         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
4889                                           irq_flags, preempt_count());
4890         if (!event) {
4891                 /* Ring buffer disabled, return as if not open for write */
4892                 written = -EBADF;
4893                 goto out_unlock;
4894         }
4895
4896         entry = ring_buffer_event_data(event);
4897         entry->ip = _THIS_IP_;
4898
4899         if (nr_pages == 2) {
4900                 len = PAGE_SIZE - offset;
4901                 memcpy(&entry->buf, map_page[0] + offset, len);
4902                 memcpy(&entry->buf[len], map_page[1], cnt - len);
4903         } else
4904                 memcpy(&entry->buf, map_page[0] + offset, cnt);
4905
4906         if (entry->buf[cnt - 1] != '\n') {
4907                 entry->buf[cnt] = '\n';
4908                 entry->buf[cnt + 1] = '\0';
4909         } else
4910                 entry->buf[cnt] = '\0';
4911
4912         __buffer_unlock_commit(buffer, event);
4913
4914         written = cnt;
4915
4916         *fpos += written;
4917
4918  out_unlock:
4919         for (i = 0; i < nr_pages; i++){
4920                 kunmap_atomic(map_page[i]);
4921                 put_page(pages[i]);
4922         }
4923  out:
4924         return written;
4925 }
4926
4927 static int tracing_clock_show(struct seq_file *m, void *v)
4928 {
4929         struct trace_array *tr = m->private;
4930         int i;
4931
4932         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
4933                 seq_printf(m,
4934                         "%s%s%s%s", i ? " " : "",
4935                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
4936                         i == tr->clock_id ? "]" : "");
4937         seq_putc(m, '\n');
4938
4939         return 0;
4940 }
4941
4942 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
4943 {
4944         int i;
4945
4946         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
4947                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
4948                         break;
4949         }
4950         if (i == ARRAY_SIZE(trace_clocks))
4951                 return -EINVAL;
4952
4953         mutex_lock(&trace_types_lock);
4954
4955         tr->clock_id = i;
4956
4957         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
4958
4959         /*
4960          * New clock may not be consistent with the previous clock.
4961          * Reset the buffer so that it doesn't have incomparable timestamps.
4962          */
4963         tracing_reset_online_cpus(&tr->trace_buffer);
4964
4965 #ifdef CONFIG_TRACER_MAX_TRACE
4966         if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
4967                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
4968         tracing_reset_online_cpus(&tr->max_buffer);
4969 #endif
4970
4971         mutex_unlock(&trace_types_lock);
4972
4973         return 0;
4974 }
4975
4976 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
4977                                    size_t cnt, loff_t *fpos)
4978 {
4979         struct seq_file *m = filp->private_data;
4980         struct trace_array *tr = m->private;
4981         char buf[64];
4982         const char *clockstr;
4983         int ret;
4984
4985         if (cnt >= sizeof(buf))
4986                 return -EINVAL;
4987
4988         if (copy_from_user(&buf, ubuf, cnt))
4989                 return -EFAULT;
4990
4991         buf[cnt] = 0;
4992
4993         clockstr = strstrip(buf);
4994
4995         ret = tracing_set_clock(tr, clockstr);
4996         if (ret)
4997                 return ret;
4998
4999         *fpos += cnt;
5000
5001         return cnt;
5002 }
5003
5004 static int tracing_clock_open(struct inode *inode, struct file *file)
5005 {
5006         struct trace_array *tr = inode->i_private;
5007         int ret;
5008
5009         if (tracing_disabled)
5010                 return -ENODEV;
5011
5012         if (trace_array_get(tr))
5013                 return -ENODEV;
5014
5015         ret = single_open(file, tracing_clock_show, inode->i_private);
5016         if (ret < 0)
5017                 trace_array_put(tr);
5018
5019         return ret;
5020 }
5021
5022 struct ftrace_buffer_info {
5023         struct trace_iterator   iter;
5024         void                    *spare;
5025         unsigned int            read;
5026 };
5027
5028 #ifdef CONFIG_TRACER_SNAPSHOT
5029 static int tracing_snapshot_open(struct inode *inode, struct file *file)
5030 {
5031         struct trace_array *tr = inode->i_private;
5032         struct trace_iterator *iter;
5033         struct seq_file *m;
5034         int ret = 0;
5035
5036         if (trace_array_get(tr) < 0)
5037                 return -ENODEV;
5038
5039         if (file->f_mode & FMODE_READ) {
5040                 iter = __tracing_open(inode, file, true);
5041                 if (IS_ERR(iter))
5042                         ret = PTR_ERR(iter);
5043         } else {
5044                 /* Writes still need the seq_file to hold the private data */
5045                 ret = -ENOMEM;
5046                 m = kzalloc(sizeof(*m), GFP_KERNEL);
5047                 if (!m)
5048                         goto out;
5049                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5050                 if (!iter) {
5051                         kfree(m);
5052                         goto out;
5053                 }
5054                 ret = 0;
5055
5056                 iter->tr = tr;
5057                 iter->trace_buffer = &tr->max_buffer;
5058                 iter->cpu_file = tracing_get_cpu(inode);
5059                 m->private = iter;
5060                 file->private_data = m;
5061         }
5062 out:
5063         if (ret < 0)
5064                 trace_array_put(tr);
5065
5066         return ret;
5067 }
5068
5069 static ssize_t
5070 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
5071                        loff_t *ppos)
5072 {
5073         struct seq_file *m = filp->private_data;
5074         struct trace_iterator *iter = m->private;
5075         struct trace_array *tr = iter->tr;
5076         unsigned long val;
5077         int ret;
5078
5079         ret = tracing_update_buffers();
5080         if (ret < 0)
5081                 return ret;
5082
5083         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5084         if (ret)
5085                 return ret;
5086
5087         mutex_lock(&trace_types_lock);
5088
5089         if (tr->current_trace->use_max_tr) {
5090                 ret = -EBUSY;
5091                 goto out;
5092         }
5093
5094         switch (val) {
5095         case 0:
5096                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5097                         ret = -EINVAL;
5098                         break;
5099                 }
5100                 if (tr->allocated_snapshot)
5101                         free_snapshot(tr);
5102                 break;
5103         case 1:
5104 /* Only allow per-cpu swap if the ring buffer supports it */
5105 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
5106                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5107                         ret = -EINVAL;
5108                         break;
5109                 }
5110 #endif
5111                 if (!tr->allocated_snapshot) {
5112                         ret = alloc_snapshot(tr);
5113                         if (ret < 0)
5114                                 break;
5115                 }
5116                 local_irq_disable();
5117                 /* Now, we're going to swap */
5118                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5119                         update_max_tr(tr, current, smp_processor_id());
5120                 else
5121                         update_max_tr_single(tr, current, iter->cpu_file);
5122                 local_irq_enable();
5123                 break;
5124         default:
5125                 if (tr->allocated_snapshot) {
5126                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5127                                 tracing_reset_online_cpus(&tr->max_buffer);
5128                         else
5129                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
5130                 }
5131                 break;
5132         }
5133
5134         if (ret >= 0) {
5135                 *ppos += cnt;
5136                 ret = cnt;
5137         }
5138 out:
5139         mutex_unlock(&trace_types_lock);
5140         return ret;
5141 }
5142
5143 static int tracing_snapshot_release(struct inode *inode, struct file *file)
5144 {
5145         struct seq_file *m = file->private_data;
5146         int ret;
5147
5148         ret = tracing_release(inode, file);
5149
5150         if (file->f_mode & FMODE_READ)
5151                 return ret;
5152
5153         /* If write only, the seq_file is just a stub */
5154         if (m)
5155                 kfree(m->private);
5156         kfree(m);
5157
5158         return 0;
5159 }
5160
5161 static int tracing_buffers_open(struct inode *inode, struct file *filp);
5162 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
5163                                     size_t count, loff_t *ppos);
5164 static int tracing_buffers_release(struct inode *inode, struct file *file);
5165 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5166                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
5167
5168 static int snapshot_raw_open(struct inode *inode, struct file *filp)
5169 {
5170         struct ftrace_buffer_info *info;
5171         int ret;
5172
5173         ret = tracing_buffers_open(inode, filp);
5174         if (ret < 0)
5175                 return ret;
5176
5177         info = filp->private_data;
5178
5179         if (info->iter.trace->use_max_tr) {
5180                 tracing_buffers_release(inode, filp);
5181                 return -EBUSY;
5182         }
5183
5184         info->iter.snapshot = true;
5185         info->iter.trace_buffer = &info->iter.tr->max_buffer;
5186
5187         return ret;
5188 }
5189
5190 #endif /* CONFIG_TRACER_SNAPSHOT */
5191
5192
5193 static const struct file_operations tracing_thresh_fops = {
5194         .open           = tracing_open_generic,
5195         .read           = tracing_thresh_read,
5196         .write          = tracing_thresh_write,
5197         .llseek         = generic_file_llseek,
5198 };
5199
5200 static const struct file_operations tracing_max_lat_fops = {
5201         .open           = tracing_open_generic,
5202         .read           = tracing_max_lat_read,
5203         .write          = tracing_max_lat_write,
5204         .llseek         = generic_file_llseek,
5205 };
5206
5207 static const struct file_operations set_tracer_fops = {
5208         .open           = tracing_open_generic,
5209         .read           = tracing_set_trace_read,
5210         .write          = tracing_set_trace_write,
5211         .llseek         = generic_file_llseek,
5212 };
5213
5214 static const struct file_operations tracing_pipe_fops = {
5215         .open           = tracing_open_pipe,
5216         .poll           = tracing_poll_pipe,
5217         .read           = tracing_read_pipe,
5218         .splice_read    = tracing_splice_read_pipe,
5219         .release        = tracing_release_pipe,
5220         .llseek         = no_llseek,
5221 };
5222
5223 static const struct file_operations tracing_entries_fops = {
5224         .open           = tracing_open_generic_tr,
5225         .read           = tracing_entries_read,
5226         .write          = tracing_entries_write,
5227         .llseek         = generic_file_llseek,
5228         .release        = tracing_release_generic_tr,
5229 };
5230
5231 static const struct file_operations tracing_total_entries_fops = {
5232         .open           = tracing_open_generic_tr,
5233         .read           = tracing_total_entries_read,
5234         .llseek         = generic_file_llseek,
5235         .release        = tracing_release_generic_tr,
5236 };
5237
5238 static const struct file_operations tracing_free_buffer_fops = {
5239         .open           = tracing_open_generic_tr,
5240         .write          = tracing_free_buffer_write,
5241         .release        = tracing_free_buffer_release,
5242 };
5243
5244 static const struct file_operations tracing_mark_fops = {
5245         .open           = tracing_open_generic_tr,
5246         .write          = tracing_mark_write,
5247         .llseek         = generic_file_llseek,
5248         .release        = tracing_release_generic_tr,
5249 };
5250
5251 static const struct file_operations trace_clock_fops = {
5252         .open           = tracing_clock_open,
5253         .read           = seq_read,
5254         .llseek         = seq_lseek,
5255         .release        = tracing_single_release_tr,
5256         .write          = tracing_clock_write,
5257 };
5258
5259 #ifdef CONFIG_TRACER_SNAPSHOT
5260 static const struct file_operations snapshot_fops = {
5261         .open           = tracing_snapshot_open,
5262         .read           = seq_read,
5263         .write          = tracing_snapshot_write,
5264         .llseek         = tracing_lseek,
5265         .release        = tracing_snapshot_release,
5266 };
5267
5268 static const struct file_operations snapshot_raw_fops = {
5269         .open           = snapshot_raw_open,
5270         .read           = tracing_buffers_read,
5271         .release        = tracing_buffers_release,
5272         .splice_read    = tracing_buffers_splice_read,
5273         .llseek         = no_llseek,
5274 };
5275
5276 #endif /* CONFIG_TRACER_SNAPSHOT */
5277
5278 static int tracing_buffers_open(struct inode *inode, struct file *filp)
5279 {
5280         struct trace_array *tr = inode->i_private;
5281         struct ftrace_buffer_info *info;
5282         int ret;
5283
5284         if (tracing_disabled)
5285                 return -ENODEV;
5286
5287         if (trace_array_get(tr) < 0)
5288                 return -ENODEV;
5289
5290         info = kzalloc(sizeof(*info), GFP_KERNEL);
5291         if (!info) {
5292                 trace_array_put(tr);
5293                 return -ENOMEM;
5294         }
5295
5296         mutex_lock(&trace_types_lock);
5297
5298         info->iter.tr           = tr;
5299         info->iter.cpu_file     = tracing_get_cpu(inode);
5300         info->iter.trace        = tr->current_trace;
5301         info->iter.trace_buffer = &tr->trace_buffer;
5302         info->spare             = NULL;
5303         /* Force reading ring buffer for first read */
5304         info->read              = (unsigned int)-1;
5305
5306         filp->private_data = info;
5307
5308         mutex_unlock(&trace_types_lock);
5309
5310         ret = nonseekable_open(inode, filp);
5311         if (ret < 0)
5312                 trace_array_put(tr);
5313
5314         return ret;
5315 }
5316
5317 static unsigned int
5318 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
5319 {
5320         struct ftrace_buffer_info *info = filp->private_data;
5321         struct trace_iterator *iter = &info->iter;
5322
5323         return trace_poll(iter, filp, poll_table);
5324 }
5325
5326 static ssize_t
5327 tracing_buffers_read(struct file *filp, char __user *ubuf,
5328                      size_t count, loff_t *ppos)
5329 {
5330         struct ftrace_buffer_info *info = filp->private_data;
5331         struct trace_iterator *iter = &info->iter;
5332         ssize_t ret;
5333         ssize_t size;
5334
5335         if (!count)
5336                 return 0;
5337
5338         mutex_lock(&trace_types_lock);
5339
5340 #ifdef CONFIG_TRACER_MAX_TRACE
5341         if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5342                 size = -EBUSY;
5343                 goto out_unlock;
5344         }
5345 #endif
5346
5347         if (!info->spare)
5348                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
5349                                                           iter->cpu_file);
5350         size = -ENOMEM;
5351         if (!info->spare)
5352                 goto out_unlock;
5353
5354         /* Do we have previous read data to read? */
5355         if (info->read < PAGE_SIZE)
5356                 goto read;
5357
5358  again:
5359         trace_access_lock(iter->cpu_file);
5360         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
5361                                     &info->spare,
5362                                     count,
5363                                     iter->cpu_file, 0);
5364         trace_access_unlock(iter->cpu_file);
5365
5366         if (ret < 0) {
5367                 if (trace_empty(iter)) {
5368                         if ((filp->f_flags & O_NONBLOCK)) {
5369                                 size = -EAGAIN;
5370                                 goto out_unlock;
5371                         }
5372                         mutex_unlock(&trace_types_lock);
5373                         ret = wait_on_pipe(iter, false);
5374                         mutex_lock(&trace_types_lock);
5375                         if (ret) {
5376                                 size = ret;
5377                                 goto out_unlock;
5378                         }
5379                         goto again;
5380                 }
5381                 size = 0;
5382                 goto out_unlock;
5383         }
5384
5385         info->read = 0;
5386  read:
5387         size = PAGE_SIZE - info->read;
5388         if (size > count)
5389                 size = count;
5390
5391         ret = copy_to_user(ubuf, info->spare + info->read, size);
5392         if (ret == size) {
5393                 size = -EFAULT;
5394                 goto out_unlock;
5395         }
5396         size -= ret;
5397
5398         *ppos += size;
5399         info->read += size;
5400
5401  out_unlock:
5402         mutex_unlock(&trace_types_lock);
5403
5404         return size;
5405 }
5406
5407 static int tracing_buffers_release(struct inode *inode, struct file *file)
5408 {
5409         struct ftrace_buffer_info *info = file->private_data;
5410         struct trace_iterator *iter = &info->iter;
5411
5412         mutex_lock(&trace_types_lock);
5413
5414         __trace_array_put(iter->tr);
5415
5416         if (info->spare)
5417                 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
5418         kfree(info);
5419
5420         mutex_unlock(&trace_types_lock);
5421
5422         return 0;
5423 }
5424
5425 struct buffer_ref {
5426         struct ring_buffer      *buffer;
5427         void                    *page;
5428         int                     ref;
5429 };
5430
5431 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
5432                                     struct pipe_buffer *buf)
5433 {
5434         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5435
5436         if (--ref->ref)
5437                 return;
5438
5439         ring_buffer_free_read_page(ref->buffer, ref->page);
5440         kfree(ref);
5441         buf->private = 0;
5442 }
5443
5444 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
5445                                 struct pipe_buffer *buf)
5446 {
5447         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5448
5449         ref->ref++;
5450 }
5451
5452 /* Pipe buffer operations for a buffer. */
5453 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
5454         .can_merge              = 0,
5455         .confirm                = generic_pipe_buf_confirm,
5456         .release                = buffer_pipe_buf_release,
5457         .steal                  = generic_pipe_buf_steal,
5458         .get                    = buffer_pipe_buf_get,
5459 };
5460
5461 /*
5462  * Callback from splice_to_pipe(), if we need to release some pages
5463  * at the end of the spd in case we error'ed out in filling the pipe.
5464  */
5465 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
5466 {
5467         struct buffer_ref *ref =
5468                 (struct buffer_ref *)spd->partial[i].private;
5469
5470         if (--ref->ref)
5471                 return;
5472
5473         ring_buffer_free_read_page(ref->buffer, ref->page);
5474         kfree(ref);
5475         spd->partial[i].private = 0;
5476 }
5477
5478 static ssize_t
5479 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5480                             struct pipe_inode_info *pipe, size_t len,
5481                             unsigned int flags)
5482 {
5483         struct ftrace_buffer_info *info = file->private_data;
5484         struct trace_iterator *iter = &info->iter;
5485         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5486         struct page *pages_def[PIPE_DEF_BUFFERS];
5487         struct splice_pipe_desc spd = {
5488                 .pages          = pages_def,
5489                 .partial        = partial_def,
5490                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5491                 .flags          = flags,
5492                 .ops            = &buffer_pipe_buf_ops,
5493                 .spd_release    = buffer_spd_release,
5494         };
5495         struct buffer_ref *ref;
5496         int entries, size, i;
5497         ssize_t ret = 0;
5498
5499         mutex_lock(&trace_types_lock);
5500
5501 #ifdef CONFIG_TRACER_MAX_TRACE
5502         if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5503                 ret = -EBUSY;
5504                 goto out;
5505         }
5506 #endif
5507
5508         if (splice_grow_spd(pipe, &spd)) {
5509                 ret = -ENOMEM;
5510                 goto out;
5511         }
5512
5513         if (*ppos & (PAGE_SIZE - 1)) {
5514                 ret = -EINVAL;
5515                 goto out;
5516         }
5517
5518         if (len & (PAGE_SIZE - 1)) {
5519                 if (len < PAGE_SIZE) {
5520                         ret = -EINVAL;
5521                         goto out;
5522                 }
5523                 len &= PAGE_MASK;
5524         }
5525
5526  again:
5527         trace_access_lock(iter->cpu_file);
5528         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5529
5530         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
5531                 struct page *page;
5532                 int r;
5533
5534                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
5535                 if (!ref) {
5536                         ret = -ENOMEM;
5537                         break;
5538                 }
5539
5540                 ref->ref = 1;
5541                 ref->buffer = iter->trace_buffer->buffer;
5542                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
5543                 if (!ref->page) {
5544                         ret = -ENOMEM;
5545                         kfree(ref);
5546                         break;
5547                 }
5548
5549                 r = ring_buffer_read_page(ref->buffer, &ref->page,
5550                                           len, iter->cpu_file, 1);
5551                 if (r < 0) {
5552                         ring_buffer_free_read_page(ref->buffer, ref->page);
5553                         kfree(ref);
5554                         break;
5555                 }
5556
5557                 /*
5558                  * zero out any left over data, this is going to
5559                  * user land.
5560                  */
5561                 size = ring_buffer_page_len(ref->page);
5562                 if (size < PAGE_SIZE)
5563                         memset(ref->page + size, 0, PAGE_SIZE - size);
5564
5565                 page = virt_to_page(ref->page);
5566
5567                 spd.pages[i] = page;
5568                 spd.partial[i].len = PAGE_SIZE;
5569                 spd.partial[i].offset = 0;
5570                 spd.partial[i].private = (unsigned long)ref;
5571                 spd.nr_pages++;
5572                 *ppos += PAGE_SIZE;
5573
5574                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5575         }
5576
5577         trace_access_unlock(iter->cpu_file);
5578         spd.nr_pages = i;
5579
5580         /* did we read anything? */
5581         if (!spd.nr_pages) {
5582                 if (ret)
5583                         goto out;
5584
5585                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) {
5586                         ret = -EAGAIN;
5587                         goto out;
5588                 }
5589                 mutex_unlock(&trace_types_lock);
5590                 ret = wait_on_pipe(iter, true);
5591                 mutex_lock(&trace_types_lock);
5592                 if (ret)
5593                         goto out;
5594
5595                 goto again;
5596         }
5597
5598         ret = splice_to_pipe(pipe, &spd);
5599         splice_shrink_spd(&spd);
5600 out:
5601         mutex_unlock(&trace_types_lock);
5602
5603         return ret;
5604 }
5605
5606 static const struct file_operations tracing_buffers_fops = {
5607         .open           = tracing_buffers_open,
5608         .read           = tracing_buffers_read,
5609         .poll           = tracing_buffers_poll,
5610         .release        = tracing_buffers_release,
5611         .splice_read    = tracing_buffers_splice_read,
5612         .llseek         = no_llseek,
5613 };
5614
5615 static ssize_t
5616 tracing_stats_read(struct file *filp, char __user *ubuf,
5617                    size_t count, loff_t *ppos)
5618 {
5619         struct inode *inode = file_inode(filp);
5620         struct trace_array *tr = inode->i_private;
5621         struct trace_buffer *trace_buf = &tr->trace_buffer;
5622         int cpu = tracing_get_cpu(inode);
5623         struct trace_seq *s;
5624         unsigned long cnt;
5625         unsigned long long t;
5626         unsigned long usec_rem;
5627
5628         s = kmalloc(sizeof(*s), GFP_KERNEL);
5629         if (!s)
5630                 return -ENOMEM;
5631
5632         trace_seq_init(s);
5633
5634         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
5635         trace_seq_printf(s, "entries: %ld\n", cnt);
5636
5637         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
5638         trace_seq_printf(s, "overrun: %ld\n", cnt);
5639
5640         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
5641         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
5642
5643         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
5644         trace_seq_printf(s, "bytes: %ld\n", cnt);
5645
5646         if (trace_clocks[tr->clock_id].in_ns) {
5647                 /* local or global for trace_clock */
5648                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5649                 usec_rem = do_div(t, USEC_PER_SEC);
5650                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
5651                                                                 t, usec_rem);
5652
5653                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
5654                 usec_rem = do_div(t, USEC_PER_SEC);
5655                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
5656         } else {
5657                 /* counter or tsc mode for trace_clock */
5658                 trace_seq_printf(s, "oldest event ts: %llu\n",
5659                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5660
5661                 trace_seq_printf(s, "now ts: %llu\n",
5662                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
5663         }
5664
5665         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
5666         trace_seq_printf(s, "dropped events: %ld\n", cnt);
5667
5668         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
5669         trace_seq_printf(s, "read events: %ld\n", cnt);
5670
5671         count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
5672
5673         kfree(s);
5674
5675         return count;
5676 }
5677
5678 static const struct file_operations tracing_stats_fops = {
5679         .open           = tracing_open_generic_tr,
5680         .read           = tracing_stats_read,
5681         .llseek         = generic_file_llseek,
5682         .release        = tracing_release_generic_tr,
5683 };
5684
5685 #ifdef CONFIG_DYNAMIC_FTRACE
5686
5687 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
5688 {
5689         return 0;
5690 }
5691
5692 static ssize_t
5693 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
5694                   size_t cnt, loff_t *ppos)
5695 {
5696         static char ftrace_dyn_info_buffer[1024];
5697         static DEFINE_MUTEX(dyn_info_mutex);
5698         unsigned long *p = filp->private_data;
5699         char *buf = ftrace_dyn_info_buffer;
5700         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
5701         int r;
5702
5703         mutex_lock(&dyn_info_mutex);
5704         r = sprintf(buf, "%ld ", *p);
5705
5706         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
5707         buf[r++] = '\n';
5708
5709         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5710
5711         mutex_unlock(&dyn_info_mutex);
5712
5713         return r;
5714 }
5715
5716 static const struct file_operations tracing_dyn_info_fops = {
5717         .open           = tracing_open_generic,
5718         .read           = tracing_read_dyn_info,
5719         .llseek         = generic_file_llseek,
5720 };
5721 #endif /* CONFIG_DYNAMIC_FTRACE */
5722
5723 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
5724 static void
5725 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5726 {
5727         tracing_snapshot();
5728 }
5729
5730 static void
5731 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5732 {
5733         unsigned long *count = (long *)data;
5734
5735         if (!*count)
5736                 return;
5737
5738         if (*count != -1)
5739                 (*count)--;
5740
5741         tracing_snapshot();
5742 }
5743
5744 static int
5745 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
5746                       struct ftrace_probe_ops *ops, void *data)
5747 {
5748         long count = (long)data;
5749
5750         seq_printf(m, "%ps:", (void *)ip);
5751
5752         seq_printf(m, "snapshot");
5753
5754         if (count == -1)
5755                 seq_printf(m, ":unlimited\n");
5756         else
5757                 seq_printf(m, ":count=%ld\n", count);
5758
5759         return 0;
5760 }
5761
5762 static struct ftrace_probe_ops snapshot_probe_ops = {
5763         .func                   = ftrace_snapshot,
5764         .print                  = ftrace_snapshot_print,
5765 };
5766
5767 static struct ftrace_probe_ops snapshot_count_probe_ops = {
5768         .func                   = ftrace_count_snapshot,
5769         .print                  = ftrace_snapshot_print,
5770 };
5771
5772 static int
5773 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
5774                                char *glob, char *cmd, char *param, int enable)
5775 {
5776         struct ftrace_probe_ops *ops;
5777         void *count = (void *)-1;
5778         char *number;
5779         int ret;
5780
5781         /* hash funcs only work with set_ftrace_filter */
5782         if (!enable)
5783                 return -EINVAL;
5784
5785         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
5786
5787         if (glob[0] == '!') {
5788                 unregister_ftrace_function_probe_func(glob+1, ops);
5789                 return 0;
5790         }
5791
5792         if (!param)
5793                 goto out_reg;
5794
5795         number = strsep(&param, ":");
5796
5797         if (!strlen(number))
5798                 goto out_reg;
5799
5800         /*
5801          * We use the callback data field (which is a pointer)
5802          * as our counter.
5803          */
5804         ret = kstrtoul(number, 0, (unsigned long *)&count);
5805         if (ret)
5806                 return ret;
5807
5808  out_reg:
5809         ret = register_ftrace_function_probe(glob, ops, count);
5810
5811         if (ret >= 0)
5812                 alloc_snapshot(&global_trace);
5813
5814         return ret < 0 ? ret : 0;
5815 }
5816
5817 static struct ftrace_func_command ftrace_snapshot_cmd = {
5818         .name                   = "snapshot",
5819         .func                   = ftrace_trace_snapshot_callback,
5820 };
5821
5822 static __init int register_snapshot_cmd(void)
5823 {
5824         return register_ftrace_command(&ftrace_snapshot_cmd);
5825 }
5826 #else
5827 static inline __init int register_snapshot_cmd(void) { return 0; }
5828 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
5829
5830 struct dentry *tracing_init_dentry_tr(struct trace_array *tr)
5831 {
5832         if (tr->dir)
5833                 return tr->dir;
5834
5835         if (!debugfs_initialized())
5836                 return NULL;
5837
5838         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
5839                 tr->dir = debugfs_create_dir("tracing", NULL);
5840
5841         if (!tr->dir)
5842                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
5843
5844         return tr->dir;
5845 }
5846
5847 struct dentry *tracing_init_dentry(void)
5848 {
5849         return tracing_init_dentry_tr(&global_trace);
5850 }
5851
5852 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
5853 {
5854         struct dentry *d_tracer;
5855
5856         if (tr->percpu_dir)
5857                 return tr->percpu_dir;
5858
5859         d_tracer = tracing_init_dentry_tr(tr);
5860         if (!d_tracer)
5861                 return NULL;
5862
5863         tr->percpu_dir = debugfs_create_dir("per_cpu", d_tracer);
5864
5865         WARN_ONCE(!tr->percpu_dir,
5866                   "Could not create debugfs directory 'per_cpu/%d'\n", cpu);
5867
5868         return tr->percpu_dir;
5869 }
5870
5871 static struct dentry *
5872 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
5873                       void *data, long cpu, const struct file_operations *fops)
5874 {
5875         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
5876
5877         if (ret) /* See tracing_get_cpu() */
5878                 ret->d_inode->i_cdev = (void *)(cpu + 1);
5879         return ret;
5880 }
5881
5882 static void
5883 tracing_init_debugfs_percpu(struct trace_array *tr, long cpu)
5884 {
5885         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
5886         struct dentry *d_cpu;
5887         char cpu_dir[30]; /* 30 characters should be more than enough */
5888
5889         if (!d_percpu)
5890                 return;
5891
5892         snprintf(cpu_dir, 30, "cpu%ld", cpu);
5893         d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
5894         if (!d_cpu) {
5895                 pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
5896                 return;
5897         }
5898
5899         /* per cpu trace_pipe */
5900         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
5901                                 tr, cpu, &tracing_pipe_fops);
5902
5903         /* per cpu trace */
5904         trace_create_cpu_file("trace", 0644, d_cpu,
5905                                 tr, cpu, &tracing_fops);
5906
5907         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
5908                                 tr, cpu, &tracing_buffers_fops);
5909
5910         trace_create_cpu_file("stats", 0444, d_cpu,
5911                                 tr, cpu, &tracing_stats_fops);
5912
5913         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
5914                                 tr, cpu, &tracing_entries_fops);
5915
5916 #ifdef CONFIG_TRACER_SNAPSHOT
5917         trace_create_cpu_file("snapshot", 0644, d_cpu,
5918                                 tr, cpu, &snapshot_fops);
5919
5920         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
5921                                 tr, cpu, &snapshot_raw_fops);
5922 #endif
5923 }
5924
5925 #ifdef CONFIG_FTRACE_SELFTEST
5926 /* Let selftest have access to static functions in this file */
5927 #include "trace_selftest.c"
5928 #endif
5929
5930 struct trace_option_dentry {
5931         struct tracer_opt               *opt;
5932         struct tracer_flags             *flags;
5933         struct trace_array              *tr;
5934         struct dentry                   *entry;
5935 };
5936
5937 static ssize_t
5938 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
5939                         loff_t *ppos)
5940 {
5941         struct trace_option_dentry *topt = filp->private_data;
5942         char *buf;
5943
5944         if (topt->flags->val & topt->opt->bit)
5945                 buf = "1\n";
5946         else
5947                 buf = "0\n";
5948
5949         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5950 }
5951
5952 static ssize_t
5953 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
5954                          loff_t *ppos)
5955 {
5956         struct trace_option_dentry *topt = filp->private_data;
5957         unsigned long val;
5958         int ret;
5959
5960         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5961         if (ret)
5962                 return ret;
5963
5964         if (val != 0 && val != 1)
5965                 return -EINVAL;
5966
5967         if (!!(topt->flags->val & topt->opt->bit) != val) {
5968                 mutex_lock(&trace_types_lock);
5969                 ret = __set_tracer_option(topt->tr, topt->flags,
5970                                           topt->opt, !val);
5971                 mutex_unlock(&trace_types_lock);
5972                 if (ret)
5973                         return ret;
5974         }
5975
5976         *ppos += cnt;
5977
5978         return cnt;
5979 }
5980
5981
5982 static const struct file_operations trace_options_fops = {
5983         .open = tracing_open_generic,
5984         .read = trace_options_read,
5985         .write = trace_options_write,
5986         .llseek = generic_file_llseek,
5987 };
5988
5989 static ssize_t
5990 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
5991                         loff_t *ppos)
5992 {
5993         long index = (long)filp->private_data;
5994         char *buf;
5995
5996         if (trace_flags & (1 << index))
5997                 buf = "1\n";
5998         else
5999                 buf = "0\n";
6000
6001         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6002 }
6003
6004 static ssize_t
6005 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
6006                          loff_t *ppos)
6007 {
6008         struct trace_array *tr = &global_trace;
6009         long index = (long)filp->private_data;
6010         unsigned long val;
6011         int ret;
6012
6013         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6014         if (ret)
6015                 return ret;
6016
6017         if (val != 0 && val != 1)
6018                 return -EINVAL;
6019
6020         mutex_lock(&trace_types_lock);
6021         ret = set_tracer_flag(tr, 1 << index, val);
6022         mutex_unlock(&trace_types_lock);
6023
6024         if (ret < 0)
6025                 return ret;
6026
6027         *ppos += cnt;
6028
6029         return cnt;
6030 }
6031
6032 static const struct file_operations trace_options_core_fops = {
6033         .open = tracing_open_generic,
6034         .read = trace_options_core_read,
6035         .write = trace_options_core_write,
6036         .llseek = generic_file_llseek,
6037 };
6038
6039 struct dentry *trace_create_file(const char *name,
6040                                  umode_t mode,
6041                                  struct dentry *parent,
6042                                  void *data,
6043                                  const struct file_operations *fops)
6044 {
6045         struct dentry *ret;
6046
6047         ret = debugfs_create_file(name, mode, parent, data, fops);
6048         if (!ret)
6049                 pr_warning("Could not create debugfs '%s' entry\n", name);
6050
6051         return ret;
6052 }
6053
6054
6055 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
6056 {
6057         struct dentry *d_tracer;
6058
6059         if (tr->options)
6060                 return tr->options;
6061
6062         d_tracer = tracing_init_dentry_tr(tr);
6063         if (!d_tracer)
6064                 return NULL;
6065
6066         tr->options = debugfs_create_dir("options", d_tracer);
6067         if (!tr->options) {
6068                 pr_warning("Could not create debugfs directory 'options'\n");
6069                 return NULL;
6070         }
6071
6072         return tr->options;
6073 }
6074
6075 static void
6076 create_trace_option_file(struct trace_array *tr,
6077                          struct trace_option_dentry *topt,
6078                          struct tracer_flags *flags,
6079                          struct tracer_opt *opt)
6080 {
6081         struct dentry *t_options;
6082
6083         t_options = trace_options_init_dentry(tr);
6084         if (!t_options)
6085                 return;
6086
6087         topt->flags = flags;
6088         topt->opt = opt;
6089         topt->tr = tr;
6090
6091         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
6092                                     &trace_options_fops);
6093
6094 }
6095
6096 static struct trace_option_dentry *
6097 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
6098 {
6099         struct trace_option_dentry *topts;
6100         struct tracer_flags *flags;
6101         struct tracer_opt *opts;
6102         int cnt;
6103
6104         if (!tracer)
6105                 return NULL;
6106
6107         flags = tracer->flags;
6108
6109         if (!flags || !flags->opts)
6110                 return NULL;
6111
6112         opts = flags->opts;
6113
6114         for (cnt = 0; opts[cnt].name; cnt++)
6115                 ;
6116
6117         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
6118         if (!topts)
6119                 return NULL;
6120
6121         for (cnt = 0; opts[cnt].name; cnt++)
6122                 create_trace_option_file(tr, &topts[cnt], flags,
6123                                          &opts[cnt]);
6124
6125         return topts;
6126 }
6127
6128 static void
6129 destroy_trace_option_files(struct trace_option_dentry *topts)
6130 {
6131         int cnt;
6132
6133         if (!topts)
6134                 return;
6135
6136         for (cnt = 0; topts[cnt].opt; cnt++)
6137                 debugfs_remove(topts[cnt].entry);
6138
6139         kfree(topts);
6140 }
6141
6142 static struct dentry *
6143 create_trace_option_core_file(struct trace_array *tr,
6144                               const char *option, long index)
6145 {
6146         struct dentry *t_options;
6147
6148         t_options = trace_options_init_dentry(tr);
6149         if (!t_options)
6150                 return NULL;
6151
6152         return trace_create_file(option, 0644, t_options, (void *)index,
6153                                     &trace_options_core_fops);
6154 }
6155
6156 static __init void create_trace_options_dir(struct trace_array *tr)
6157 {
6158         struct dentry *t_options;
6159         int i;
6160
6161         t_options = trace_options_init_dentry(tr);
6162         if (!t_options)
6163                 return;
6164
6165         for (i = 0; trace_options[i]; i++)
6166                 create_trace_option_core_file(tr, trace_options[i], i);
6167 }
6168
6169 static ssize_t
6170 rb_simple_read(struct file *filp, char __user *ubuf,
6171                size_t cnt, loff_t *ppos)
6172 {
6173         struct trace_array *tr = filp->private_data;
6174         char buf[64];
6175         int r;
6176
6177         r = tracer_tracing_is_on(tr);
6178         r = sprintf(buf, "%d\n", r);
6179
6180         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6181 }
6182
6183 static ssize_t
6184 rb_simple_write(struct file *filp, const char __user *ubuf,
6185                 size_t cnt, loff_t *ppos)
6186 {
6187         struct trace_array *tr = filp->private_data;
6188         struct ring_buffer *buffer = tr->trace_buffer.buffer;
6189         unsigned long val;
6190         int ret;
6191
6192         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6193         if (ret)
6194                 return ret;
6195
6196         if (buffer) {
6197                 mutex_lock(&trace_types_lock);
6198                 if (val) {
6199                         tracer_tracing_on(tr);
6200                         if (tr->current_trace->start)
6201                                 tr->current_trace->start(tr);
6202                 } else {
6203                         tracer_tracing_off(tr);
6204                         if (tr->current_trace->stop)
6205                                 tr->current_trace->stop(tr);
6206                 }
6207                 mutex_unlock(&trace_types_lock);
6208         }
6209
6210         (*ppos)++;
6211
6212         return cnt;
6213 }
6214
6215 static const struct file_operations rb_simple_fops = {
6216         .open           = tracing_open_generic_tr,
6217         .read           = rb_simple_read,
6218         .write          = rb_simple_write,
6219         .release        = tracing_release_generic_tr,
6220         .llseek         = default_llseek,
6221 };
6222
6223 struct dentry *trace_instance_dir;
6224
6225 static void
6226 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer);
6227
6228 static int
6229 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
6230 {
6231         enum ring_buffer_flags rb_flags;
6232
6233         rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
6234
6235         buf->tr = tr;
6236
6237         buf->buffer = ring_buffer_alloc(size, rb_flags);
6238         if (!buf->buffer)
6239                 return -ENOMEM;
6240
6241         buf->data = alloc_percpu(struct trace_array_cpu);
6242         if (!buf->data) {
6243                 ring_buffer_free(buf->buffer);
6244                 return -ENOMEM;
6245         }
6246
6247         /* Allocate the first page for all buffers */
6248         set_buffer_entries(&tr->trace_buffer,
6249                            ring_buffer_size(tr->trace_buffer.buffer, 0));
6250
6251         return 0;
6252 }
6253
6254 static int allocate_trace_buffers(struct trace_array *tr, int size)
6255 {
6256         int ret;
6257
6258         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
6259         if (ret)
6260                 return ret;
6261
6262 #ifdef CONFIG_TRACER_MAX_TRACE
6263         ret = allocate_trace_buffer(tr, &tr->max_buffer,
6264                                     allocate_snapshot ? size : 1);
6265         if (WARN_ON(ret)) {
6266                 ring_buffer_free(tr->trace_buffer.buffer);
6267                 free_percpu(tr->trace_buffer.data);
6268                 return -ENOMEM;
6269         }
6270         tr->allocated_snapshot = allocate_snapshot;
6271
6272         /*
6273          * Only the top level trace array gets its snapshot allocated
6274          * from the kernel command line.
6275          */
6276         allocate_snapshot = false;
6277 #endif
6278         return 0;
6279 }
6280
6281 static void free_trace_buffer(struct trace_buffer *buf)
6282 {
6283         if (buf->buffer) {
6284                 ring_buffer_free(buf->buffer);
6285                 buf->buffer = NULL;
6286                 free_percpu(buf->data);
6287                 buf->data = NULL;
6288         }
6289 }
6290
6291 static void free_trace_buffers(struct trace_array *tr)
6292 {
6293         if (!tr)
6294                 return;
6295
6296         free_trace_buffer(&tr->trace_buffer);
6297
6298 #ifdef CONFIG_TRACER_MAX_TRACE
6299         free_trace_buffer(&tr->max_buffer);
6300 #endif
6301 }
6302
6303 static int new_instance_create(const char *name)
6304 {
6305         struct trace_array *tr;
6306         int ret;
6307
6308         mutex_lock(&trace_types_lock);
6309
6310         ret = -EEXIST;
6311         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6312                 if (tr->name && strcmp(tr->name, name) == 0)
6313                         goto out_unlock;
6314         }
6315
6316         ret = -ENOMEM;
6317         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
6318         if (!tr)
6319                 goto out_unlock;
6320
6321         tr->name = kstrdup(name, GFP_KERNEL);
6322         if (!tr->name)
6323                 goto out_free_tr;
6324
6325         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
6326                 goto out_free_tr;
6327
6328         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
6329
6330         raw_spin_lock_init(&tr->start_lock);
6331
6332         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
6333
6334         tr->current_trace = &nop_trace;
6335
6336         INIT_LIST_HEAD(&tr->systems);
6337         INIT_LIST_HEAD(&tr->events);
6338
6339         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
6340                 goto out_free_tr;
6341
6342         tr->dir = debugfs_create_dir(name, trace_instance_dir);
6343         if (!tr->dir)
6344                 goto out_free_tr;
6345
6346         ret = event_trace_add_tracer(tr->dir, tr);
6347         if (ret) {
6348                 debugfs_remove_recursive(tr->dir);
6349                 goto out_free_tr;
6350         }
6351
6352         init_tracer_debugfs(tr, tr->dir);
6353
6354         list_add(&tr->list, &ftrace_trace_arrays);
6355
6356         mutex_unlock(&trace_types_lock);
6357
6358         return 0;
6359
6360  out_free_tr:
6361         free_trace_buffers(tr);
6362         free_cpumask_var(tr->tracing_cpumask);
6363         kfree(tr->name);
6364         kfree(tr);
6365
6366  out_unlock:
6367         mutex_unlock(&trace_types_lock);
6368
6369         return ret;
6370
6371 }
6372
6373 static int instance_delete(const char *name)
6374 {
6375         struct trace_array *tr;
6376         int found = 0;
6377         int ret;
6378
6379         mutex_lock(&trace_types_lock);
6380
6381         ret = -ENODEV;
6382         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6383                 if (tr->name && strcmp(tr->name, name) == 0) {
6384                         found = 1;
6385                         break;
6386                 }
6387         }
6388         if (!found)
6389                 goto out_unlock;
6390
6391         ret = -EBUSY;
6392         if (tr->ref)
6393                 goto out_unlock;
6394
6395         list_del(&tr->list);
6396
6397         tracing_set_nop(tr);
6398         event_trace_del_tracer(tr);
6399         ftrace_destroy_function_files(tr);
6400         debugfs_remove_recursive(tr->dir);
6401         free_trace_buffers(tr);
6402
6403         kfree(tr->name);
6404         kfree(tr);
6405
6406         ret = 0;
6407
6408  out_unlock:
6409         mutex_unlock(&trace_types_lock);
6410
6411         return ret;
6412 }
6413
6414 static int instance_mkdir (struct inode *inode, struct dentry *dentry, umode_t mode)
6415 {
6416         struct dentry *parent;
6417         int ret;
6418
6419         /* Paranoid: Make sure the parent is the "instances" directory */
6420         parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
6421         if (WARN_ON_ONCE(parent != trace_instance_dir))
6422                 return -ENOENT;
6423
6424         /*
6425          * The inode mutex is locked, but debugfs_create_dir() will also
6426          * take the mutex. As the instances directory can not be destroyed
6427          * or changed in any other way, it is safe to unlock it, and
6428          * let the dentry try. If two users try to make the same dir at
6429          * the same time, then the new_instance_create() will determine the
6430          * winner.
6431          */
6432         mutex_unlock(&inode->i_mutex);
6433
6434         ret = new_instance_create(dentry->d_iname);
6435
6436         mutex_lock(&inode->i_mutex);
6437
6438         return ret;
6439 }
6440
6441 static int instance_rmdir(struct inode *inode, struct dentry *dentry)
6442 {
6443         struct dentry *parent;
6444         int ret;
6445
6446         /* Paranoid: Make sure the parent is the "instances" directory */
6447         parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
6448         if (WARN_ON_ONCE(parent != trace_instance_dir))
6449                 return -ENOENT;
6450
6451         /* The caller did a dget() on dentry */
6452         mutex_unlock(&dentry->d_inode->i_mutex);
6453
6454         /*
6455          * The inode mutex is locked, but debugfs_create_dir() will also
6456          * take the mutex. As the instances directory can not be destroyed
6457          * or changed in any other way, it is safe to unlock it, and
6458          * let the dentry try. If two users try to make the same dir at
6459          * the same time, then the instance_delete() will determine the
6460          * winner.
6461          */
6462         mutex_unlock(&inode->i_mutex);
6463
6464         ret = instance_delete(dentry->d_iname);
6465
6466         mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
6467         mutex_lock(&dentry->d_inode->i_mutex);
6468
6469         return ret;
6470 }
6471
6472 static const struct inode_operations instance_dir_inode_operations = {
6473         .lookup         = simple_lookup,
6474         .mkdir          = instance_mkdir,
6475         .rmdir          = instance_rmdir,
6476 };
6477
6478 static __init void create_trace_instances(struct dentry *d_tracer)
6479 {
6480         trace_instance_dir = debugfs_create_dir("instances", d_tracer);
6481         if (WARN_ON(!trace_instance_dir))
6482                 return;
6483
6484         /* Hijack the dir inode operations, to allow mkdir */
6485         trace_instance_dir->d_inode->i_op = &instance_dir_inode_operations;
6486 }
6487
6488 static void
6489 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
6490 {
6491         int cpu;
6492
6493         trace_create_file("available_tracers", 0444, d_tracer,
6494                         tr, &show_traces_fops);
6495
6496         trace_create_file("current_tracer", 0644, d_tracer,
6497                         tr, &set_tracer_fops);
6498
6499         trace_create_file("tracing_cpumask", 0644, d_tracer,
6500                           tr, &tracing_cpumask_fops);
6501
6502         trace_create_file("trace_options", 0644, d_tracer,
6503                           tr, &tracing_iter_fops);
6504
6505         trace_create_file("trace", 0644, d_tracer,
6506                           tr, &tracing_fops);
6507
6508         trace_create_file("trace_pipe", 0444, d_tracer,
6509                           tr, &tracing_pipe_fops);
6510
6511         trace_create_file("buffer_size_kb", 0644, d_tracer,
6512                           tr, &tracing_entries_fops);
6513
6514         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
6515                           tr, &tracing_total_entries_fops);
6516
6517         trace_create_file("free_buffer", 0200, d_tracer,
6518                           tr, &tracing_free_buffer_fops);
6519
6520         trace_create_file("trace_marker", 0220, d_tracer,
6521                           tr, &tracing_mark_fops);
6522
6523         trace_create_file("trace_clock", 0644, d_tracer, tr,
6524                           &trace_clock_fops);
6525
6526         trace_create_file("tracing_on", 0644, d_tracer,
6527                           tr, &rb_simple_fops);
6528
6529 #ifdef CONFIG_TRACER_MAX_TRACE
6530         trace_create_file("tracing_max_latency", 0644, d_tracer,
6531                         &tr->max_latency, &tracing_max_lat_fops);
6532 #endif
6533
6534         if (ftrace_create_function_files(tr, d_tracer))
6535                 WARN(1, "Could not allocate function filter files");
6536
6537 #ifdef CONFIG_TRACER_SNAPSHOT
6538         trace_create_file("snapshot", 0644, d_tracer,
6539                           tr, &snapshot_fops);
6540 #endif
6541
6542         for_each_tracing_cpu(cpu)
6543                 tracing_init_debugfs_percpu(tr, cpu);
6544
6545 }
6546
6547 static __init int tracer_init_debugfs(void)
6548 {
6549         struct dentry *d_tracer;
6550
6551         trace_access_lock_init();
6552
6553         d_tracer = tracing_init_dentry();
6554         if (!d_tracer)
6555                 return 0;
6556
6557         init_tracer_debugfs(&global_trace, d_tracer);
6558
6559         trace_create_file("tracing_thresh", 0644, d_tracer,
6560                         &global_trace, &tracing_thresh_fops);
6561
6562         trace_create_file("README", 0444, d_tracer,
6563                         NULL, &tracing_readme_fops);
6564
6565         trace_create_file("saved_cmdlines", 0444, d_tracer,
6566                         NULL, &tracing_saved_cmdlines_fops);
6567
6568         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
6569                           NULL, &tracing_saved_cmdlines_size_fops);
6570
6571 #ifdef CONFIG_DYNAMIC_FTRACE
6572         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
6573                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
6574 #endif
6575
6576         create_trace_instances(d_tracer);
6577
6578         create_trace_options_dir(&global_trace);
6579
6580         return 0;
6581 }
6582
6583 static int trace_panic_handler(struct notifier_block *this,
6584                                unsigned long event, void *unused)
6585 {
6586         if (ftrace_dump_on_oops)
6587                 ftrace_dump(ftrace_dump_on_oops);
6588         return NOTIFY_OK;
6589 }
6590
6591 static struct notifier_block trace_panic_notifier = {
6592         .notifier_call  = trace_panic_handler,
6593         .next           = NULL,
6594         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
6595 };
6596
6597 static int trace_die_handler(struct notifier_block *self,
6598                              unsigned long val,
6599                              void *data)
6600 {
6601         switch (val) {
6602         case DIE_OOPS:
6603                 if (ftrace_dump_on_oops)
6604                         ftrace_dump(ftrace_dump_on_oops);
6605                 break;
6606         default:
6607                 break;
6608         }
6609         return NOTIFY_OK;
6610 }
6611
6612 static struct notifier_block trace_die_notifier = {
6613         .notifier_call = trace_die_handler,
6614         .priority = 200
6615 };
6616
6617 /*
6618  * printk is set to max of 1024, we really don't need it that big.
6619  * Nothing should be printing 1000 characters anyway.
6620  */
6621 #define TRACE_MAX_PRINT         1000
6622
6623 /*
6624  * Define here KERN_TRACE so that we have one place to modify
6625  * it if we decide to change what log level the ftrace dump
6626  * should be at.
6627  */
6628 #define KERN_TRACE              KERN_EMERG
6629
6630 void
6631 trace_printk_seq(struct trace_seq *s)
6632 {
6633         /* Probably should print a warning here. */
6634         if (s->len >= TRACE_MAX_PRINT)
6635                 s->len = TRACE_MAX_PRINT;
6636
6637         /* should be zero ended, but we are paranoid. */
6638         s->buffer[s->len] = 0;
6639
6640         printk(KERN_TRACE "%s", s->buffer);
6641
6642         trace_seq_init(s);
6643 }
6644
6645 void trace_init_global_iter(struct trace_iterator *iter)
6646 {
6647         iter->tr = &global_trace;
6648         iter->trace = iter->tr->current_trace;
6649         iter->cpu_file = RING_BUFFER_ALL_CPUS;
6650         iter->trace_buffer = &global_trace.trace_buffer;
6651
6652         if (iter->trace && iter->trace->open)
6653                 iter->trace->open(iter);
6654
6655         /* Annotate start of buffers if we had overruns */
6656         if (ring_buffer_overruns(iter->trace_buffer->buffer))
6657                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
6658
6659         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6660         if (trace_clocks[iter->tr->clock_id].in_ns)
6661                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6662 }
6663
6664 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
6665 {
6666         /* use static because iter can be a bit big for the stack */
6667         static struct trace_iterator iter;
6668         static atomic_t dump_running;
6669         unsigned int old_userobj;
6670         unsigned long flags;
6671         int cnt = 0, cpu;
6672
6673         /* Only allow one dump user at a time. */
6674         if (atomic_inc_return(&dump_running) != 1) {
6675                 atomic_dec(&dump_running);
6676                 return;
6677         }
6678
6679         /*
6680          * Always turn off tracing when we dump.
6681          * We don't need to show trace output of what happens
6682          * between multiple crashes.
6683          *
6684          * If the user does a sysrq-z, then they can re-enable
6685          * tracing with echo 1 > tracing_on.
6686          */
6687         tracing_off();
6688
6689         local_irq_save(flags);
6690
6691         /* Simulate the iterator */
6692         trace_init_global_iter(&iter);
6693
6694         for_each_tracing_cpu(cpu) {
6695                 atomic_inc(&per_cpu_ptr(iter.tr->trace_buffer.data, cpu)->disabled);
6696         }
6697
6698         old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
6699
6700         /* don't look at user memory in panic mode */
6701         trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
6702
6703         switch (oops_dump_mode) {
6704         case DUMP_ALL:
6705                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6706                 break;
6707         case DUMP_ORIG:
6708                 iter.cpu_file = raw_smp_processor_id();
6709                 break;
6710         case DUMP_NONE:
6711                 goto out_enable;
6712         default:
6713                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
6714                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6715         }
6716
6717         printk(KERN_TRACE "Dumping ftrace buffer:\n");
6718
6719         /* Did function tracer already get disabled? */
6720         if (ftrace_is_dead()) {
6721                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
6722                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
6723         }
6724
6725         /*
6726          * We need to stop all tracing on all CPUS to read the
6727          * the next buffer. This is a bit expensive, but is
6728          * not done often. We fill all what we can read,
6729          * and then release the locks again.
6730          */
6731
6732         while (!trace_empty(&iter)) {
6733
6734                 if (!cnt)
6735                         printk(KERN_TRACE "---------------------------------\n");
6736
6737                 cnt++;
6738
6739                 /* reset all but tr, trace, and overruns */
6740                 memset(&iter.seq, 0,
6741                        sizeof(struct trace_iterator) -
6742                        offsetof(struct trace_iterator, seq));
6743                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
6744                 iter.pos = -1;
6745
6746                 if (trace_find_next_entry_inc(&iter) != NULL) {
6747                         int ret;
6748
6749                         ret = print_trace_line(&iter);
6750                         if (ret != TRACE_TYPE_NO_CONSUME)
6751                                 trace_consume(&iter);
6752                 }
6753                 touch_nmi_watchdog();
6754
6755                 trace_printk_seq(&iter.seq);
6756         }
6757
6758         if (!cnt)
6759                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
6760         else
6761                 printk(KERN_TRACE "---------------------------------\n");
6762
6763  out_enable:
6764         trace_flags |= old_userobj;
6765
6766         for_each_tracing_cpu(cpu) {
6767                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
6768         }
6769         atomic_dec(&dump_running);
6770         local_irq_restore(flags);
6771 }
6772 EXPORT_SYMBOL_GPL(ftrace_dump);
6773
6774 __init static int tracer_alloc_buffers(void)
6775 {
6776         int ring_buf_size;
6777         int ret = -ENOMEM;
6778
6779
6780         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
6781                 goto out;
6782
6783         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
6784                 goto out_free_buffer_mask;
6785
6786         /* Only allocate trace_printk buffers if a trace_printk exists */
6787         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
6788                 /* Must be called before global_trace.buffer is allocated */
6789                 trace_printk_init_buffers();
6790
6791         /* To save memory, keep the ring buffer size to its minimum */
6792         if (ring_buffer_expanded)
6793                 ring_buf_size = trace_buf_size;
6794         else
6795                 ring_buf_size = 1;
6796
6797         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
6798         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
6799
6800         raw_spin_lock_init(&global_trace.start_lock);
6801
6802         /* Used for event triggers */
6803         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
6804         if (!temp_buffer)
6805                 goto out_free_cpumask;
6806
6807         if (trace_create_savedcmd() < 0)
6808                 goto out_free_temp_buffer;
6809
6810         /* TODO: make the number of buffers hot pluggable with CPUS */
6811         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
6812                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
6813                 WARN_ON(1);
6814                 goto out_free_savedcmd;
6815         }
6816
6817         if (global_trace.buffer_disabled)
6818                 tracing_off();
6819
6820         if (trace_boot_clock) {
6821                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
6822                 if (ret < 0)
6823                         pr_warning("Trace clock %s not defined, going back to default\n",
6824                                    trace_boot_clock);
6825         }
6826
6827         /*
6828          * register_tracer() might reference current_trace, so it
6829          * needs to be set before we register anything. This is
6830          * just a bootstrap of current_trace anyway.
6831          */
6832         global_trace.current_trace = &nop_trace;
6833
6834         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
6835
6836         ftrace_init_global_array_ops(&global_trace);
6837
6838         register_tracer(&nop_trace);
6839
6840         /* All seems OK, enable tracing */
6841         tracing_disabled = 0;
6842
6843         atomic_notifier_chain_register(&panic_notifier_list,
6844                                        &trace_panic_notifier);
6845
6846         register_die_notifier(&trace_die_notifier);
6847
6848         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
6849
6850         INIT_LIST_HEAD(&global_trace.systems);
6851         INIT_LIST_HEAD(&global_trace.events);
6852         list_add(&global_trace.list, &ftrace_trace_arrays);
6853
6854         while (trace_boot_options) {
6855                 char *option;
6856
6857                 option = strsep(&trace_boot_options, ",");
6858                 trace_set_options(&global_trace, option);
6859         }
6860
6861         register_snapshot_cmd();
6862
6863         return 0;
6864
6865 out_free_savedcmd:
6866         free_saved_cmdlines_buffer(savedcmd);
6867 out_free_temp_buffer:
6868         ring_buffer_free(temp_buffer);
6869 out_free_cpumask:
6870         free_cpumask_var(global_trace.tracing_cpumask);
6871 out_free_buffer_mask:
6872         free_cpumask_var(tracing_buffer_mask);
6873 out:
6874         return ret;
6875 }
6876
6877 __init static int clear_boot_tracer(void)
6878 {
6879         /*
6880          * The default tracer at boot buffer is an init section.
6881          * This function is called in lateinit. If we did not
6882          * find the boot tracer, then clear it out, to prevent
6883          * later registration from accessing the buffer that is
6884          * about to be freed.
6885          */
6886         if (!default_bootup_tracer)
6887                 return 0;
6888
6889         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
6890                default_bootup_tracer);
6891         default_bootup_tracer = NULL;
6892
6893         return 0;
6894 }
6895
6896 early_initcall(tracer_alloc_buffers);
6897 fs_initcall(tracer_init_debugfs);
6898 late_initcall(clear_boot_tracer);