Merge branch 'timers-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[cascardo/linux.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/pagemap.h>
24 #include <linux/hardirq.h>
25 #include <linux/linkage.h>
26 #include <linux/uaccess.h>
27 #include <linux/kprobes.h>
28 #include <linux/ftrace.h>
29 #include <linux/module.h>
30 #include <linux/percpu.h>
31 #include <linux/splice.h>
32 #include <linux/kdebug.h>
33 #include <linux/string.h>
34 #include <linux/rwsem.h>
35 #include <linux/slab.h>
36 #include <linux/ctype.h>
37 #include <linux/init.h>
38 #include <linux/poll.h>
39 #include <linux/nmi.h>
40 #include <linux/fs.h>
41 #include <linux/sched/rt.h>
42
43 #include "trace.h"
44 #include "trace_output.h"
45
46 /*
47  * On boot up, the ring buffer is set to the minimum size, so that
48  * we do not waste memory on systems that are not using tracing.
49  */
50 bool ring_buffer_expanded;
51
52 /*
53  * We need to change this state when a selftest is running.
54  * A selftest will lurk into the ring-buffer to count the
55  * entries inserted during the selftest although some concurrent
56  * insertions into the ring-buffer such as trace_printk could occurred
57  * at the same time, giving false positive or negative results.
58  */
59 static bool __read_mostly tracing_selftest_running;
60
61 /*
62  * If a tracer is running, we do not want to run SELFTEST.
63  */
64 bool __read_mostly tracing_selftest_disabled;
65
66 /* For tracers that don't implement custom flags */
67 static struct tracer_opt dummy_tracer_opt[] = {
68         { }
69 };
70
71 static struct tracer_flags dummy_tracer_flags = {
72         .val = 0,
73         .opts = dummy_tracer_opt
74 };
75
76 static int
77 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
78 {
79         return 0;
80 }
81
82 /*
83  * To prevent the comm cache from being overwritten when no
84  * tracing is active, only save the comm when a trace event
85  * occurred.
86  */
87 static DEFINE_PER_CPU(bool, trace_cmdline_save);
88
89 /*
90  * Kill all tracing for good (never come back).
91  * It is initialized to 1 but will turn to zero if the initialization
92  * of the tracer is successful. But that is the only place that sets
93  * this back to zero.
94  */
95 static int tracing_disabled = 1;
96
97 DEFINE_PER_CPU(int, ftrace_cpu_disabled);
98
99 cpumask_var_t __read_mostly     tracing_buffer_mask;
100
101 /*
102  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
103  *
104  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
105  * is set, then ftrace_dump is called. This will output the contents
106  * of the ftrace buffers to the console.  This is very useful for
107  * capturing traces that lead to crashes and outputing it to a
108  * serial console.
109  *
110  * It is default off, but you can enable it with either specifying
111  * "ftrace_dump_on_oops" in the kernel command line, or setting
112  * /proc/sys/kernel/ftrace_dump_on_oops
113  * Set 1 if you want to dump buffers of all CPUs
114  * Set 2 if you want to dump the buffer of the CPU that triggered oops
115  */
116
117 enum ftrace_dump_mode ftrace_dump_on_oops;
118
119 /* When set, tracing will stop when a WARN*() is hit */
120 int __disable_trace_on_warning;
121
122 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
123
124 #define MAX_TRACER_SIZE         100
125 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
126 static char *default_bootup_tracer;
127
128 static bool allocate_snapshot;
129
130 static int __init set_cmdline_ftrace(char *str)
131 {
132         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
133         default_bootup_tracer = bootup_tracer_buf;
134         /* We are using ftrace early, expand it */
135         ring_buffer_expanded = true;
136         return 1;
137 }
138 __setup("ftrace=", set_cmdline_ftrace);
139
140 static int __init set_ftrace_dump_on_oops(char *str)
141 {
142         if (*str++ != '=' || !*str) {
143                 ftrace_dump_on_oops = DUMP_ALL;
144                 return 1;
145         }
146
147         if (!strcmp("orig_cpu", str)) {
148                 ftrace_dump_on_oops = DUMP_ORIG;
149                 return 1;
150         }
151
152         return 0;
153 }
154 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
155
156 static int __init stop_trace_on_warning(char *str)
157 {
158         __disable_trace_on_warning = 1;
159         return 1;
160 }
161 __setup("traceoff_on_warning=", stop_trace_on_warning);
162
163 static int __init boot_alloc_snapshot(char *str)
164 {
165         allocate_snapshot = true;
166         /* We also need the main ring buffer expanded */
167         ring_buffer_expanded = true;
168         return 1;
169 }
170 __setup("alloc_snapshot", boot_alloc_snapshot);
171
172
173 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
174 static char *trace_boot_options __initdata;
175
176 static int __init set_trace_boot_options(char *str)
177 {
178         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
179         trace_boot_options = trace_boot_options_buf;
180         return 0;
181 }
182 __setup("trace_options=", set_trace_boot_options);
183
184 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
185 static char *trace_boot_clock __initdata;
186
187 static int __init set_trace_boot_clock(char *str)
188 {
189         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
190         trace_boot_clock = trace_boot_clock_buf;
191         return 0;
192 }
193 __setup("trace_clock=", set_trace_boot_clock);
194
195
196 unsigned long long ns2usecs(cycle_t nsec)
197 {
198         nsec += 500;
199         do_div(nsec, 1000);
200         return nsec;
201 }
202
203 /*
204  * The global_trace is the descriptor that holds the tracing
205  * buffers for the live tracing. For each CPU, it contains
206  * a link list of pages that will store trace entries. The
207  * page descriptor of the pages in the memory is used to hold
208  * the link list by linking the lru item in the page descriptor
209  * to each of the pages in the buffer per CPU.
210  *
211  * For each active CPU there is a data field that holds the
212  * pages for the buffer for that CPU. Each CPU has the same number
213  * of pages allocated for its buffer.
214  */
215 static struct trace_array       global_trace;
216
217 LIST_HEAD(ftrace_trace_arrays);
218
219 int trace_array_get(struct trace_array *this_tr)
220 {
221         struct trace_array *tr;
222         int ret = -ENODEV;
223
224         mutex_lock(&trace_types_lock);
225         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
226                 if (tr == this_tr) {
227                         tr->ref++;
228                         ret = 0;
229                         break;
230                 }
231         }
232         mutex_unlock(&trace_types_lock);
233
234         return ret;
235 }
236
237 static void __trace_array_put(struct trace_array *this_tr)
238 {
239         WARN_ON(!this_tr->ref);
240         this_tr->ref--;
241 }
242
243 void trace_array_put(struct trace_array *this_tr)
244 {
245         mutex_lock(&trace_types_lock);
246         __trace_array_put(this_tr);
247         mutex_unlock(&trace_types_lock);
248 }
249
250 int filter_check_discard(struct ftrace_event_file *file, void *rec,
251                          struct ring_buffer *buffer,
252                          struct ring_buffer_event *event)
253 {
254         if (unlikely(file->flags & FTRACE_EVENT_FL_FILTERED) &&
255             !filter_match_preds(file->filter, rec)) {
256                 ring_buffer_discard_commit(buffer, event);
257                 return 1;
258         }
259
260         return 0;
261 }
262 EXPORT_SYMBOL_GPL(filter_check_discard);
263
264 int call_filter_check_discard(struct ftrace_event_call *call, void *rec,
265                               struct ring_buffer *buffer,
266                               struct ring_buffer_event *event)
267 {
268         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
269             !filter_match_preds(call->filter, rec)) {
270                 ring_buffer_discard_commit(buffer, event);
271                 return 1;
272         }
273
274         return 0;
275 }
276 EXPORT_SYMBOL_GPL(call_filter_check_discard);
277
278 static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
279 {
280         u64 ts;
281
282         /* Early boot up does not have a buffer yet */
283         if (!buf->buffer)
284                 return trace_clock_local();
285
286         ts = ring_buffer_time_stamp(buf->buffer, cpu);
287         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
288
289         return ts;
290 }
291
292 cycle_t ftrace_now(int cpu)
293 {
294         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
295 }
296
297 /**
298  * tracing_is_enabled - Show if global_trace has been disabled
299  *
300  * Shows if the global trace has been enabled or not. It uses the
301  * mirror flag "buffer_disabled" to be used in fast paths such as for
302  * the irqsoff tracer. But it may be inaccurate due to races. If you
303  * need to know the accurate state, use tracing_is_on() which is a little
304  * slower, but accurate.
305  */
306 int tracing_is_enabled(void)
307 {
308         /*
309          * For quick access (irqsoff uses this in fast path), just
310          * return the mirror variable of the state of the ring buffer.
311          * It's a little racy, but we don't really care.
312          */
313         smp_rmb();
314         return !global_trace.buffer_disabled;
315 }
316
317 /*
318  * trace_buf_size is the size in bytes that is allocated
319  * for a buffer. Note, the number of bytes is always rounded
320  * to page size.
321  *
322  * This number is purposely set to a low number of 16384.
323  * If the dump on oops happens, it will be much appreciated
324  * to not have to wait for all that output. Anyway this can be
325  * boot time and run time configurable.
326  */
327 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
328
329 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
330
331 /* trace_types holds a link list of available tracers. */
332 static struct tracer            *trace_types __read_mostly;
333
334 /*
335  * trace_types_lock is used to protect the trace_types list.
336  */
337 DEFINE_MUTEX(trace_types_lock);
338
339 /*
340  * serialize the access of the ring buffer
341  *
342  * ring buffer serializes readers, but it is low level protection.
343  * The validity of the events (which returns by ring_buffer_peek() ..etc)
344  * are not protected by ring buffer.
345  *
346  * The content of events may become garbage if we allow other process consumes
347  * these events concurrently:
348  *   A) the page of the consumed events may become a normal page
349  *      (not reader page) in ring buffer, and this page will be rewrited
350  *      by events producer.
351  *   B) The page of the consumed events may become a page for splice_read,
352  *      and this page will be returned to system.
353  *
354  * These primitives allow multi process access to different cpu ring buffer
355  * concurrently.
356  *
357  * These primitives don't distinguish read-only and read-consume access.
358  * Multi read-only access are also serialized.
359  */
360
361 #ifdef CONFIG_SMP
362 static DECLARE_RWSEM(all_cpu_access_lock);
363 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
364
365 static inline void trace_access_lock(int cpu)
366 {
367         if (cpu == RING_BUFFER_ALL_CPUS) {
368                 /* gain it for accessing the whole ring buffer. */
369                 down_write(&all_cpu_access_lock);
370         } else {
371                 /* gain it for accessing a cpu ring buffer. */
372
373                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
374                 down_read(&all_cpu_access_lock);
375
376                 /* Secondly block other access to this @cpu ring buffer. */
377                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
378         }
379 }
380
381 static inline void trace_access_unlock(int cpu)
382 {
383         if (cpu == RING_BUFFER_ALL_CPUS) {
384                 up_write(&all_cpu_access_lock);
385         } else {
386                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
387                 up_read(&all_cpu_access_lock);
388         }
389 }
390
391 static inline void trace_access_lock_init(void)
392 {
393         int cpu;
394
395         for_each_possible_cpu(cpu)
396                 mutex_init(&per_cpu(cpu_access_lock, cpu));
397 }
398
399 #else
400
401 static DEFINE_MUTEX(access_lock);
402
403 static inline void trace_access_lock(int cpu)
404 {
405         (void)cpu;
406         mutex_lock(&access_lock);
407 }
408
409 static inline void trace_access_unlock(int cpu)
410 {
411         (void)cpu;
412         mutex_unlock(&access_lock);
413 }
414
415 static inline void trace_access_lock_init(void)
416 {
417 }
418
419 #endif
420
421 /* trace_flags holds trace_options default values */
422 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
423         TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
424         TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |
425         TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | TRACE_ITER_FUNCTION;
426
427 static void tracer_tracing_on(struct trace_array *tr)
428 {
429         if (tr->trace_buffer.buffer)
430                 ring_buffer_record_on(tr->trace_buffer.buffer);
431         /*
432          * This flag is looked at when buffers haven't been allocated
433          * yet, or by some tracers (like irqsoff), that just want to
434          * know if the ring buffer has been disabled, but it can handle
435          * races of where it gets disabled but we still do a record.
436          * As the check is in the fast path of the tracers, it is more
437          * important to be fast than accurate.
438          */
439         tr->buffer_disabled = 0;
440         /* Make the flag seen by readers */
441         smp_wmb();
442 }
443
444 /**
445  * tracing_on - enable tracing buffers
446  *
447  * This function enables tracing buffers that may have been
448  * disabled with tracing_off.
449  */
450 void tracing_on(void)
451 {
452         tracer_tracing_on(&global_trace);
453 }
454 EXPORT_SYMBOL_GPL(tracing_on);
455
456 /**
457  * __trace_puts - write a constant string into the trace buffer.
458  * @ip:    The address of the caller
459  * @str:   The constant string to write
460  * @size:  The size of the string.
461  */
462 int __trace_puts(unsigned long ip, const char *str, int size)
463 {
464         struct ring_buffer_event *event;
465         struct ring_buffer *buffer;
466         struct print_entry *entry;
467         unsigned long irq_flags;
468         int alloc;
469         int pc;
470
471         if (!(trace_flags & TRACE_ITER_PRINTK))
472                 return 0;
473
474         pc = preempt_count();
475
476         if (unlikely(tracing_selftest_running || tracing_disabled))
477                 return 0;
478
479         alloc = sizeof(*entry) + size + 2; /* possible \n added */
480
481         local_save_flags(irq_flags);
482         buffer = global_trace.trace_buffer.buffer;
483         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
484                                           irq_flags, pc);
485         if (!event)
486                 return 0;
487
488         entry = ring_buffer_event_data(event);
489         entry->ip = ip;
490
491         memcpy(&entry->buf, str, size);
492
493         /* Add a newline if necessary */
494         if (entry->buf[size - 1] != '\n') {
495                 entry->buf[size] = '\n';
496                 entry->buf[size + 1] = '\0';
497         } else
498                 entry->buf[size] = '\0';
499
500         __buffer_unlock_commit(buffer, event);
501         ftrace_trace_stack(buffer, irq_flags, 4, pc);
502
503         return size;
504 }
505 EXPORT_SYMBOL_GPL(__trace_puts);
506
507 /**
508  * __trace_bputs - write the pointer to a constant string into trace buffer
509  * @ip:    The address of the caller
510  * @str:   The constant string to write to the buffer to
511  */
512 int __trace_bputs(unsigned long ip, const char *str)
513 {
514         struct ring_buffer_event *event;
515         struct ring_buffer *buffer;
516         struct bputs_entry *entry;
517         unsigned long irq_flags;
518         int size = sizeof(struct bputs_entry);
519         int pc;
520
521         if (!(trace_flags & TRACE_ITER_PRINTK))
522                 return 0;
523
524         pc = preempt_count();
525
526         if (unlikely(tracing_selftest_running || tracing_disabled))
527                 return 0;
528
529         local_save_flags(irq_flags);
530         buffer = global_trace.trace_buffer.buffer;
531         event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
532                                           irq_flags, pc);
533         if (!event)
534                 return 0;
535
536         entry = ring_buffer_event_data(event);
537         entry->ip                       = ip;
538         entry->str                      = str;
539
540         __buffer_unlock_commit(buffer, event);
541         ftrace_trace_stack(buffer, irq_flags, 4, pc);
542
543         return 1;
544 }
545 EXPORT_SYMBOL_GPL(__trace_bputs);
546
547 #ifdef CONFIG_TRACER_SNAPSHOT
548 /**
549  * trace_snapshot - take a snapshot of the current buffer.
550  *
551  * This causes a swap between the snapshot buffer and the current live
552  * tracing buffer. You can use this to take snapshots of the live
553  * trace when some condition is triggered, but continue to trace.
554  *
555  * Note, make sure to allocate the snapshot with either
556  * a tracing_snapshot_alloc(), or by doing it manually
557  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
558  *
559  * If the snapshot buffer is not allocated, it will stop tracing.
560  * Basically making a permanent snapshot.
561  */
562 void tracing_snapshot(void)
563 {
564         struct trace_array *tr = &global_trace;
565         struct tracer *tracer = tr->current_trace;
566         unsigned long flags;
567
568         if (in_nmi()) {
569                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
570                 internal_trace_puts("*** snapshot is being ignored        ***\n");
571                 return;
572         }
573
574         if (!tr->allocated_snapshot) {
575                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
576                 internal_trace_puts("*** stopping trace here!   ***\n");
577                 tracing_off();
578                 return;
579         }
580
581         /* Note, snapshot can not be used when the tracer uses it */
582         if (tracer->use_max_tr) {
583                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
584                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
585                 return;
586         }
587
588         local_irq_save(flags);
589         update_max_tr(tr, current, smp_processor_id());
590         local_irq_restore(flags);
591 }
592 EXPORT_SYMBOL_GPL(tracing_snapshot);
593
594 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
595                                         struct trace_buffer *size_buf, int cpu_id);
596 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
597
598 static int alloc_snapshot(struct trace_array *tr)
599 {
600         int ret;
601
602         if (!tr->allocated_snapshot) {
603
604                 /* allocate spare buffer */
605                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
606                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
607                 if (ret < 0)
608                         return ret;
609
610                 tr->allocated_snapshot = true;
611         }
612
613         return 0;
614 }
615
616 static void free_snapshot(struct trace_array *tr)
617 {
618         /*
619          * We don't free the ring buffer. instead, resize it because
620          * The max_tr ring buffer has some state (e.g. ring->clock) and
621          * we want preserve it.
622          */
623         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
624         set_buffer_entries(&tr->max_buffer, 1);
625         tracing_reset_online_cpus(&tr->max_buffer);
626         tr->allocated_snapshot = false;
627 }
628
629 /**
630  * tracing_alloc_snapshot - allocate snapshot buffer.
631  *
632  * This only allocates the snapshot buffer if it isn't already
633  * allocated - it doesn't also take a snapshot.
634  *
635  * This is meant to be used in cases where the snapshot buffer needs
636  * to be set up for events that can't sleep but need to be able to
637  * trigger a snapshot.
638  */
639 int tracing_alloc_snapshot(void)
640 {
641         struct trace_array *tr = &global_trace;
642         int ret;
643
644         ret = alloc_snapshot(tr);
645         WARN_ON(ret < 0);
646
647         return ret;
648 }
649 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
650
651 /**
652  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
653  *
654  * This is similar to trace_snapshot(), but it will allocate the
655  * snapshot buffer if it isn't already allocated. Use this only
656  * where it is safe to sleep, as the allocation may sleep.
657  *
658  * This causes a swap between the snapshot buffer and the current live
659  * tracing buffer. You can use this to take snapshots of the live
660  * trace when some condition is triggered, but continue to trace.
661  */
662 void tracing_snapshot_alloc(void)
663 {
664         int ret;
665
666         ret = tracing_alloc_snapshot();
667         if (ret < 0)
668                 return;
669
670         tracing_snapshot();
671 }
672 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
673 #else
674 void tracing_snapshot(void)
675 {
676         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
677 }
678 EXPORT_SYMBOL_GPL(tracing_snapshot);
679 int tracing_alloc_snapshot(void)
680 {
681         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
682         return -ENODEV;
683 }
684 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
685 void tracing_snapshot_alloc(void)
686 {
687         /* Give warning */
688         tracing_snapshot();
689 }
690 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
691 #endif /* CONFIG_TRACER_SNAPSHOT */
692
693 static void tracer_tracing_off(struct trace_array *tr)
694 {
695         if (tr->trace_buffer.buffer)
696                 ring_buffer_record_off(tr->trace_buffer.buffer);
697         /*
698          * This flag is looked at when buffers haven't been allocated
699          * yet, or by some tracers (like irqsoff), that just want to
700          * know if the ring buffer has been disabled, but it can handle
701          * races of where it gets disabled but we still do a record.
702          * As the check is in the fast path of the tracers, it is more
703          * important to be fast than accurate.
704          */
705         tr->buffer_disabled = 1;
706         /* Make the flag seen by readers */
707         smp_wmb();
708 }
709
710 /**
711  * tracing_off - turn off tracing buffers
712  *
713  * This function stops the tracing buffers from recording data.
714  * It does not disable any overhead the tracers themselves may
715  * be causing. This function simply causes all recording to
716  * the ring buffers to fail.
717  */
718 void tracing_off(void)
719 {
720         tracer_tracing_off(&global_trace);
721 }
722 EXPORT_SYMBOL_GPL(tracing_off);
723
724 void disable_trace_on_warning(void)
725 {
726         if (__disable_trace_on_warning)
727                 tracing_off();
728 }
729
730 /**
731  * tracer_tracing_is_on - show real state of ring buffer enabled
732  * @tr : the trace array to know if ring buffer is enabled
733  *
734  * Shows real state of the ring buffer if it is enabled or not.
735  */
736 static int tracer_tracing_is_on(struct trace_array *tr)
737 {
738         if (tr->trace_buffer.buffer)
739                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
740         return !tr->buffer_disabled;
741 }
742
743 /**
744  * tracing_is_on - show state of ring buffers enabled
745  */
746 int tracing_is_on(void)
747 {
748         return tracer_tracing_is_on(&global_trace);
749 }
750 EXPORT_SYMBOL_GPL(tracing_is_on);
751
752 static int __init set_buf_size(char *str)
753 {
754         unsigned long buf_size;
755
756         if (!str)
757                 return 0;
758         buf_size = memparse(str, &str);
759         /* nr_entries can not be zero */
760         if (buf_size == 0)
761                 return 0;
762         trace_buf_size = buf_size;
763         return 1;
764 }
765 __setup("trace_buf_size=", set_buf_size);
766
767 static int __init set_tracing_thresh(char *str)
768 {
769         unsigned long threshold;
770         int ret;
771
772         if (!str)
773                 return 0;
774         ret = kstrtoul(str, 0, &threshold);
775         if (ret < 0)
776                 return 0;
777         tracing_thresh = threshold * 1000;
778         return 1;
779 }
780 __setup("tracing_thresh=", set_tracing_thresh);
781
782 unsigned long nsecs_to_usecs(unsigned long nsecs)
783 {
784         return nsecs / 1000;
785 }
786
787 /* These must match the bit postions in trace_iterator_flags */
788 static const char *trace_options[] = {
789         "print-parent",
790         "sym-offset",
791         "sym-addr",
792         "verbose",
793         "raw",
794         "hex",
795         "bin",
796         "block",
797         "stacktrace",
798         "trace_printk",
799         "ftrace_preempt",
800         "branch",
801         "annotate",
802         "userstacktrace",
803         "sym-userobj",
804         "printk-msg-only",
805         "context-info",
806         "latency-format",
807         "sleep-time",
808         "graph-time",
809         "record-cmd",
810         "overwrite",
811         "disable_on_free",
812         "irq-info",
813         "markers",
814         "function-trace",
815         NULL
816 };
817
818 static struct {
819         u64 (*func)(void);
820         const char *name;
821         int in_ns;              /* is this clock in nanoseconds? */
822 } trace_clocks[] = {
823         { trace_clock_local,            "local",        1 },
824         { trace_clock_global,           "global",       1 },
825         { trace_clock_counter,          "counter",      0 },
826         { trace_clock_jiffies,          "uptime",       0 },
827         { trace_clock,                  "perf",         1 },
828         { ktime_get_mono_fast_ns,       "mono",         1 },
829         ARCH_TRACE_CLOCKS
830 };
831
832 /*
833  * trace_parser_get_init - gets the buffer for trace parser
834  */
835 int trace_parser_get_init(struct trace_parser *parser, int size)
836 {
837         memset(parser, 0, sizeof(*parser));
838
839         parser->buffer = kmalloc(size, GFP_KERNEL);
840         if (!parser->buffer)
841                 return 1;
842
843         parser->size = size;
844         return 0;
845 }
846
847 /*
848  * trace_parser_put - frees the buffer for trace parser
849  */
850 void trace_parser_put(struct trace_parser *parser)
851 {
852         kfree(parser->buffer);
853 }
854
855 /*
856  * trace_get_user - reads the user input string separated by  space
857  * (matched by isspace(ch))
858  *
859  * For each string found the 'struct trace_parser' is updated,
860  * and the function returns.
861  *
862  * Returns number of bytes read.
863  *
864  * See kernel/trace/trace.h for 'struct trace_parser' details.
865  */
866 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
867         size_t cnt, loff_t *ppos)
868 {
869         char ch;
870         size_t read = 0;
871         ssize_t ret;
872
873         if (!*ppos)
874                 trace_parser_clear(parser);
875
876         ret = get_user(ch, ubuf++);
877         if (ret)
878                 goto out;
879
880         read++;
881         cnt--;
882
883         /*
884          * The parser is not finished with the last write,
885          * continue reading the user input without skipping spaces.
886          */
887         if (!parser->cont) {
888                 /* skip white space */
889                 while (cnt && isspace(ch)) {
890                         ret = get_user(ch, ubuf++);
891                         if (ret)
892                                 goto out;
893                         read++;
894                         cnt--;
895                 }
896
897                 /* only spaces were written */
898                 if (isspace(ch)) {
899                         *ppos += read;
900                         ret = read;
901                         goto out;
902                 }
903
904                 parser->idx = 0;
905         }
906
907         /* read the non-space input */
908         while (cnt && !isspace(ch)) {
909                 if (parser->idx < parser->size - 1)
910                         parser->buffer[parser->idx++] = ch;
911                 else {
912                         ret = -EINVAL;
913                         goto out;
914                 }
915                 ret = get_user(ch, ubuf++);
916                 if (ret)
917                         goto out;
918                 read++;
919                 cnt--;
920         }
921
922         /* We either got finished input or we have to wait for another call. */
923         if (isspace(ch)) {
924                 parser->buffer[parser->idx] = 0;
925                 parser->cont = false;
926         } else if (parser->idx < parser->size - 1) {
927                 parser->cont = true;
928                 parser->buffer[parser->idx++] = ch;
929         } else {
930                 ret = -EINVAL;
931                 goto out;
932         }
933
934         *ppos += read;
935         ret = read;
936
937 out:
938         return ret;
939 }
940
941 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
942 {
943         int len;
944
945         if (s->len <= s->readpos)
946                 return -EBUSY;
947
948         len = s->len - s->readpos;
949         if (cnt > len)
950                 cnt = len;
951         memcpy(buf, s->buffer + s->readpos, cnt);
952
953         s->readpos += cnt;
954         return cnt;
955 }
956
957 unsigned long __read_mostly     tracing_thresh;
958
959 #ifdef CONFIG_TRACER_MAX_TRACE
960 /*
961  * Copy the new maximum trace into the separate maximum-trace
962  * structure. (this way the maximum trace is permanently saved,
963  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
964  */
965 static void
966 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
967 {
968         struct trace_buffer *trace_buf = &tr->trace_buffer;
969         struct trace_buffer *max_buf = &tr->max_buffer;
970         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
971         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
972
973         max_buf->cpu = cpu;
974         max_buf->time_start = data->preempt_timestamp;
975
976         max_data->saved_latency = tr->max_latency;
977         max_data->critical_start = data->critical_start;
978         max_data->critical_end = data->critical_end;
979
980         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
981         max_data->pid = tsk->pid;
982         /*
983          * If tsk == current, then use current_uid(), as that does not use
984          * RCU. The irq tracer can be called out of RCU scope.
985          */
986         if (tsk == current)
987                 max_data->uid = current_uid();
988         else
989                 max_data->uid = task_uid(tsk);
990
991         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
992         max_data->policy = tsk->policy;
993         max_data->rt_priority = tsk->rt_priority;
994
995         /* record this tasks comm */
996         tracing_record_cmdline(tsk);
997 }
998
999 /**
1000  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1001  * @tr: tracer
1002  * @tsk: the task with the latency
1003  * @cpu: The cpu that initiated the trace.
1004  *
1005  * Flip the buffers between the @tr and the max_tr and record information
1006  * about which task was the cause of this latency.
1007  */
1008 void
1009 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1010 {
1011         struct ring_buffer *buf;
1012
1013         if (tr->stop_count)
1014                 return;
1015
1016         WARN_ON_ONCE(!irqs_disabled());
1017
1018         if (!tr->allocated_snapshot) {
1019                 /* Only the nop tracer should hit this when disabling */
1020                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1021                 return;
1022         }
1023
1024         arch_spin_lock(&tr->max_lock);
1025
1026         buf = tr->trace_buffer.buffer;
1027         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1028         tr->max_buffer.buffer = buf;
1029
1030         __update_max_tr(tr, tsk, cpu);
1031         arch_spin_unlock(&tr->max_lock);
1032 }
1033
1034 /**
1035  * update_max_tr_single - only copy one trace over, and reset the rest
1036  * @tr - tracer
1037  * @tsk - task with the latency
1038  * @cpu - the cpu of the buffer to copy.
1039  *
1040  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1041  */
1042 void
1043 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1044 {
1045         int ret;
1046
1047         if (tr->stop_count)
1048                 return;
1049
1050         WARN_ON_ONCE(!irqs_disabled());
1051         if (!tr->allocated_snapshot) {
1052                 /* Only the nop tracer should hit this when disabling */
1053                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1054                 return;
1055         }
1056
1057         arch_spin_lock(&tr->max_lock);
1058
1059         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1060
1061         if (ret == -EBUSY) {
1062                 /*
1063                  * We failed to swap the buffer due to a commit taking
1064                  * place on this CPU. We fail to record, but we reset
1065                  * the max trace buffer (no one writes directly to it)
1066                  * and flag that it failed.
1067                  */
1068                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1069                         "Failed to swap buffers due to commit in progress\n");
1070         }
1071
1072         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1073
1074         __update_max_tr(tr, tsk, cpu);
1075         arch_spin_unlock(&tr->max_lock);
1076 }
1077 #endif /* CONFIG_TRACER_MAX_TRACE */
1078
1079 static int wait_on_pipe(struct trace_iterator *iter)
1080 {
1081         /* Iterators are static, they should be filled or empty */
1082         if (trace_buffer_iter(iter, iter->cpu_file))
1083                 return 0;
1084
1085         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file);
1086 }
1087
1088 #ifdef CONFIG_FTRACE_STARTUP_TEST
1089 static int run_tracer_selftest(struct tracer *type)
1090 {
1091         struct trace_array *tr = &global_trace;
1092         struct tracer *saved_tracer = tr->current_trace;
1093         int ret;
1094
1095         if (!type->selftest || tracing_selftest_disabled)
1096                 return 0;
1097
1098         /*
1099          * Run a selftest on this tracer.
1100          * Here we reset the trace buffer, and set the current
1101          * tracer to be this tracer. The tracer can then run some
1102          * internal tracing to verify that everything is in order.
1103          * If we fail, we do not register this tracer.
1104          */
1105         tracing_reset_online_cpus(&tr->trace_buffer);
1106
1107         tr->current_trace = type;
1108
1109 #ifdef CONFIG_TRACER_MAX_TRACE
1110         if (type->use_max_tr) {
1111                 /* If we expanded the buffers, make sure the max is expanded too */
1112                 if (ring_buffer_expanded)
1113                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1114                                            RING_BUFFER_ALL_CPUS);
1115                 tr->allocated_snapshot = true;
1116         }
1117 #endif
1118
1119         /* the test is responsible for initializing and enabling */
1120         pr_info("Testing tracer %s: ", type->name);
1121         ret = type->selftest(type, tr);
1122         /* the test is responsible for resetting too */
1123         tr->current_trace = saved_tracer;
1124         if (ret) {
1125                 printk(KERN_CONT "FAILED!\n");
1126                 /* Add the warning after printing 'FAILED' */
1127                 WARN_ON(1);
1128                 return -1;
1129         }
1130         /* Only reset on passing, to avoid touching corrupted buffers */
1131         tracing_reset_online_cpus(&tr->trace_buffer);
1132
1133 #ifdef CONFIG_TRACER_MAX_TRACE
1134         if (type->use_max_tr) {
1135                 tr->allocated_snapshot = false;
1136
1137                 /* Shrink the max buffer again */
1138                 if (ring_buffer_expanded)
1139                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1140                                            RING_BUFFER_ALL_CPUS);
1141         }
1142 #endif
1143
1144         printk(KERN_CONT "PASSED\n");
1145         return 0;
1146 }
1147 #else
1148 static inline int run_tracer_selftest(struct tracer *type)
1149 {
1150         return 0;
1151 }
1152 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1153
1154 /**
1155  * register_tracer - register a tracer with the ftrace system.
1156  * @type - the plugin for the tracer
1157  *
1158  * Register a new plugin tracer.
1159  */
1160 int register_tracer(struct tracer *type)
1161 {
1162         struct tracer *t;
1163         int ret = 0;
1164
1165         if (!type->name) {
1166                 pr_info("Tracer must have a name\n");
1167                 return -1;
1168         }
1169
1170         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1171                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1172                 return -1;
1173         }
1174
1175         mutex_lock(&trace_types_lock);
1176
1177         tracing_selftest_running = true;
1178
1179         for (t = trace_types; t; t = t->next) {
1180                 if (strcmp(type->name, t->name) == 0) {
1181                         /* already found */
1182                         pr_info("Tracer %s already registered\n",
1183                                 type->name);
1184                         ret = -1;
1185                         goto out;
1186                 }
1187         }
1188
1189         if (!type->set_flag)
1190                 type->set_flag = &dummy_set_flag;
1191         if (!type->flags)
1192                 type->flags = &dummy_tracer_flags;
1193         else
1194                 if (!type->flags->opts)
1195                         type->flags->opts = dummy_tracer_opt;
1196
1197         ret = run_tracer_selftest(type);
1198         if (ret < 0)
1199                 goto out;
1200
1201         type->next = trace_types;
1202         trace_types = type;
1203
1204  out:
1205         tracing_selftest_running = false;
1206         mutex_unlock(&trace_types_lock);
1207
1208         if (ret || !default_bootup_tracer)
1209                 goto out_unlock;
1210
1211         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1212                 goto out_unlock;
1213
1214         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1215         /* Do we want this tracer to start on bootup? */
1216         tracing_set_tracer(&global_trace, type->name);
1217         default_bootup_tracer = NULL;
1218         /* disable other selftests, since this will break it. */
1219         tracing_selftest_disabled = true;
1220 #ifdef CONFIG_FTRACE_STARTUP_TEST
1221         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1222                type->name);
1223 #endif
1224
1225  out_unlock:
1226         return ret;
1227 }
1228
1229 void tracing_reset(struct trace_buffer *buf, int cpu)
1230 {
1231         struct ring_buffer *buffer = buf->buffer;
1232
1233         if (!buffer)
1234                 return;
1235
1236         ring_buffer_record_disable(buffer);
1237
1238         /* Make sure all commits have finished */
1239         synchronize_sched();
1240         ring_buffer_reset_cpu(buffer, cpu);
1241
1242         ring_buffer_record_enable(buffer);
1243 }
1244
1245 void tracing_reset_online_cpus(struct trace_buffer *buf)
1246 {
1247         struct ring_buffer *buffer = buf->buffer;
1248         int cpu;
1249
1250         if (!buffer)
1251                 return;
1252
1253         ring_buffer_record_disable(buffer);
1254
1255         /* Make sure all commits have finished */
1256         synchronize_sched();
1257
1258         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1259
1260         for_each_online_cpu(cpu)
1261                 ring_buffer_reset_cpu(buffer, cpu);
1262
1263         ring_buffer_record_enable(buffer);
1264 }
1265
1266 /* Must have trace_types_lock held */
1267 void tracing_reset_all_online_cpus(void)
1268 {
1269         struct trace_array *tr;
1270
1271         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1272                 tracing_reset_online_cpus(&tr->trace_buffer);
1273 #ifdef CONFIG_TRACER_MAX_TRACE
1274                 tracing_reset_online_cpus(&tr->max_buffer);
1275 #endif
1276         }
1277 }
1278
1279 #define SAVED_CMDLINES_DEFAULT 128
1280 #define NO_CMDLINE_MAP UINT_MAX
1281 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1282 struct saved_cmdlines_buffer {
1283         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1284         unsigned *map_cmdline_to_pid;
1285         unsigned cmdline_num;
1286         int cmdline_idx;
1287         char *saved_cmdlines;
1288 };
1289 static struct saved_cmdlines_buffer *savedcmd;
1290
1291 /* temporary disable recording */
1292 static atomic_t trace_record_cmdline_disabled __read_mostly;
1293
1294 static inline char *get_saved_cmdlines(int idx)
1295 {
1296         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1297 }
1298
1299 static inline void set_cmdline(int idx, const char *cmdline)
1300 {
1301         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1302 }
1303
1304 static int allocate_cmdlines_buffer(unsigned int val,
1305                                     struct saved_cmdlines_buffer *s)
1306 {
1307         s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1308                                         GFP_KERNEL);
1309         if (!s->map_cmdline_to_pid)
1310                 return -ENOMEM;
1311
1312         s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1313         if (!s->saved_cmdlines) {
1314                 kfree(s->map_cmdline_to_pid);
1315                 return -ENOMEM;
1316         }
1317
1318         s->cmdline_idx = 0;
1319         s->cmdline_num = val;
1320         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1321                sizeof(s->map_pid_to_cmdline));
1322         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1323                val * sizeof(*s->map_cmdline_to_pid));
1324
1325         return 0;
1326 }
1327
1328 static int trace_create_savedcmd(void)
1329 {
1330         int ret;
1331
1332         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1333         if (!savedcmd)
1334                 return -ENOMEM;
1335
1336         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1337         if (ret < 0) {
1338                 kfree(savedcmd);
1339                 savedcmd = NULL;
1340                 return -ENOMEM;
1341         }
1342
1343         return 0;
1344 }
1345
1346 int is_tracing_stopped(void)
1347 {
1348         return global_trace.stop_count;
1349 }
1350
1351 /**
1352  * tracing_start - quick start of the tracer
1353  *
1354  * If tracing is enabled but was stopped by tracing_stop,
1355  * this will start the tracer back up.
1356  */
1357 void tracing_start(void)
1358 {
1359         struct ring_buffer *buffer;
1360         unsigned long flags;
1361
1362         if (tracing_disabled)
1363                 return;
1364
1365         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1366         if (--global_trace.stop_count) {
1367                 if (global_trace.stop_count < 0) {
1368                         /* Someone screwed up their debugging */
1369                         WARN_ON_ONCE(1);
1370                         global_trace.stop_count = 0;
1371                 }
1372                 goto out;
1373         }
1374
1375         /* Prevent the buffers from switching */
1376         arch_spin_lock(&global_trace.max_lock);
1377
1378         buffer = global_trace.trace_buffer.buffer;
1379         if (buffer)
1380                 ring_buffer_record_enable(buffer);
1381
1382 #ifdef CONFIG_TRACER_MAX_TRACE
1383         buffer = global_trace.max_buffer.buffer;
1384         if (buffer)
1385                 ring_buffer_record_enable(buffer);
1386 #endif
1387
1388         arch_spin_unlock(&global_trace.max_lock);
1389
1390  out:
1391         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1392 }
1393
1394 static void tracing_start_tr(struct trace_array *tr)
1395 {
1396         struct ring_buffer *buffer;
1397         unsigned long flags;
1398
1399         if (tracing_disabled)
1400                 return;
1401
1402         /* If global, we need to also start the max tracer */
1403         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1404                 return tracing_start();
1405
1406         raw_spin_lock_irqsave(&tr->start_lock, flags);
1407
1408         if (--tr->stop_count) {
1409                 if (tr->stop_count < 0) {
1410                         /* Someone screwed up their debugging */
1411                         WARN_ON_ONCE(1);
1412                         tr->stop_count = 0;
1413                 }
1414                 goto out;
1415         }
1416
1417         buffer = tr->trace_buffer.buffer;
1418         if (buffer)
1419                 ring_buffer_record_enable(buffer);
1420
1421  out:
1422         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1423 }
1424
1425 /**
1426  * tracing_stop - quick stop of the tracer
1427  *
1428  * Light weight way to stop tracing. Use in conjunction with
1429  * tracing_start.
1430  */
1431 void tracing_stop(void)
1432 {
1433         struct ring_buffer *buffer;
1434         unsigned long flags;
1435
1436         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1437         if (global_trace.stop_count++)
1438                 goto out;
1439
1440         /* Prevent the buffers from switching */
1441         arch_spin_lock(&global_trace.max_lock);
1442
1443         buffer = global_trace.trace_buffer.buffer;
1444         if (buffer)
1445                 ring_buffer_record_disable(buffer);
1446
1447 #ifdef CONFIG_TRACER_MAX_TRACE
1448         buffer = global_trace.max_buffer.buffer;
1449         if (buffer)
1450                 ring_buffer_record_disable(buffer);
1451 #endif
1452
1453         arch_spin_unlock(&global_trace.max_lock);
1454
1455  out:
1456         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1457 }
1458
1459 static void tracing_stop_tr(struct trace_array *tr)
1460 {
1461         struct ring_buffer *buffer;
1462         unsigned long flags;
1463
1464         /* If global, we need to also stop the max tracer */
1465         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1466                 return tracing_stop();
1467
1468         raw_spin_lock_irqsave(&tr->start_lock, flags);
1469         if (tr->stop_count++)
1470                 goto out;
1471
1472         buffer = tr->trace_buffer.buffer;
1473         if (buffer)
1474                 ring_buffer_record_disable(buffer);
1475
1476  out:
1477         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1478 }
1479
1480 void trace_stop_cmdline_recording(void);
1481
1482 static int trace_save_cmdline(struct task_struct *tsk)
1483 {
1484         unsigned pid, idx;
1485
1486         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1487                 return 0;
1488
1489         /*
1490          * It's not the end of the world if we don't get
1491          * the lock, but we also don't want to spin
1492          * nor do we want to disable interrupts,
1493          * so if we miss here, then better luck next time.
1494          */
1495         if (!arch_spin_trylock(&trace_cmdline_lock))
1496                 return 0;
1497
1498         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1499         if (idx == NO_CMDLINE_MAP) {
1500                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1501
1502                 /*
1503                  * Check whether the cmdline buffer at idx has a pid
1504                  * mapped. We are going to overwrite that entry so we
1505                  * need to clear the map_pid_to_cmdline. Otherwise we
1506                  * would read the new comm for the old pid.
1507                  */
1508                 pid = savedcmd->map_cmdline_to_pid[idx];
1509                 if (pid != NO_CMDLINE_MAP)
1510                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1511
1512                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1513                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1514
1515                 savedcmd->cmdline_idx = idx;
1516         }
1517
1518         set_cmdline(idx, tsk->comm);
1519
1520         arch_spin_unlock(&trace_cmdline_lock);
1521
1522         return 1;
1523 }
1524
1525 static void __trace_find_cmdline(int pid, char comm[])
1526 {
1527         unsigned map;
1528
1529         if (!pid) {
1530                 strcpy(comm, "<idle>");
1531                 return;
1532         }
1533
1534         if (WARN_ON_ONCE(pid < 0)) {
1535                 strcpy(comm, "<XXX>");
1536                 return;
1537         }
1538
1539         if (pid > PID_MAX_DEFAULT) {
1540                 strcpy(comm, "<...>");
1541                 return;
1542         }
1543
1544         map = savedcmd->map_pid_to_cmdline[pid];
1545         if (map != NO_CMDLINE_MAP)
1546                 strcpy(comm, get_saved_cmdlines(map));
1547         else
1548                 strcpy(comm, "<...>");
1549 }
1550
1551 void trace_find_cmdline(int pid, char comm[])
1552 {
1553         preempt_disable();
1554         arch_spin_lock(&trace_cmdline_lock);
1555
1556         __trace_find_cmdline(pid, comm);
1557
1558         arch_spin_unlock(&trace_cmdline_lock);
1559         preempt_enable();
1560 }
1561
1562 void tracing_record_cmdline(struct task_struct *tsk)
1563 {
1564         if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1565                 return;
1566
1567         if (!__this_cpu_read(trace_cmdline_save))
1568                 return;
1569
1570         if (trace_save_cmdline(tsk))
1571                 __this_cpu_write(trace_cmdline_save, false);
1572 }
1573
1574 void
1575 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1576                              int pc)
1577 {
1578         struct task_struct *tsk = current;
1579
1580         entry->preempt_count            = pc & 0xff;
1581         entry->pid                      = (tsk) ? tsk->pid : 0;
1582         entry->flags =
1583 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1584                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1585 #else
1586                 TRACE_FLAG_IRQS_NOSUPPORT |
1587 #endif
1588                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1589                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1590                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1591                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1592 }
1593 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1594
1595 struct ring_buffer_event *
1596 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1597                           int type,
1598                           unsigned long len,
1599                           unsigned long flags, int pc)
1600 {
1601         struct ring_buffer_event *event;
1602
1603         event = ring_buffer_lock_reserve(buffer, len);
1604         if (event != NULL) {
1605                 struct trace_entry *ent = ring_buffer_event_data(event);
1606
1607                 tracing_generic_entry_update(ent, flags, pc);
1608                 ent->type = type;
1609         }
1610
1611         return event;
1612 }
1613
1614 void
1615 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
1616 {
1617         __this_cpu_write(trace_cmdline_save, true);
1618         ring_buffer_unlock_commit(buffer, event);
1619 }
1620
1621 static inline void
1622 __trace_buffer_unlock_commit(struct ring_buffer *buffer,
1623                              struct ring_buffer_event *event,
1624                              unsigned long flags, int pc)
1625 {
1626         __buffer_unlock_commit(buffer, event);
1627
1628         ftrace_trace_stack(buffer, flags, 6, pc);
1629         ftrace_trace_userstack(buffer, flags, pc);
1630 }
1631
1632 void trace_buffer_unlock_commit(struct ring_buffer *buffer,
1633                                 struct ring_buffer_event *event,
1634                                 unsigned long flags, int pc)
1635 {
1636         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1637 }
1638 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
1639
1640 static struct ring_buffer *temp_buffer;
1641
1642 struct ring_buffer_event *
1643 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
1644                           struct ftrace_event_file *ftrace_file,
1645                           int type, unsigned long len,
1646                           unsigned long flags, int pc)
1647 {
1648         struct ring_buffer_event *entry;
1649
1650         *current_rb = ftrace_file->tr->trace_buffer.buffer;
1651         entry = trace_buffer_lock_reserve(*current_rb,
1652                                          type, len, flags, pc);
1653         /*
1654          * If tracing is off, but we have triggers enabled
1655          * we still need to look at the event data. Use the temp_buffer
1656          * to store the trace event for the tigger to use. It's recusive
1657          * safe and will not be recorded anywhere.
1658          */
1659         if (!entry && ftrace_file->flags & FTRACE_EVENT_FL_TRIGGER_COND) {
1660                 *current_rb = temp_buffer;
1661                 entry = trace_buffer_lock_reserve(*current_rb,
1662                                                   type, len, flags, pc);
1663         }
1664         return entry;
1665 }
1666 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
1667
1668 struct ring_buffer_event *
1669 trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
1670                                   int type, unsigned long len,
1671                                   unsigned long flags, int pc)
1672 {
1673         *current_rb = global_trace.trace_buffer.buffer;
1674         return trace_buffer_lock_reserve(*current_rb,
1675                                          type, len, flags, pc);
1676 }
1677 EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
1678
1679 void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
1680                                         struct ring_buffer_event *event,
1681                                         unsigned long flags, int pc)
1682 {
1683         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1684 }
1685 EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
1686
1687 void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer,
1688                                      struct ring_buffer_event *event,
1689                                      unsigned long flags, int pc,
1690                                      struct pt_regs *regs)
1691 {
1692         __buffer_unlock_commit(buffer, event);
1693
1694         ftrace_trace_stack_regs(buffer, flags, 0, pc, regs);
1695         ftrace_trace_userstack(buffer, flags, pc);
1696 }
1697 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
1698
1699 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
1700                                          struct ring_buffer_event *event)
1701 {
1702         ring_buffer_discard_commit(buffer, event);
1703 }
1704 EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
1705
1706 void
1707 trace_function(struct trace_array *tr,
1708                unsigned long ip, unsigned long parent_ip, unsigned long flags,
1709                int pc)
1710 {
1711         struct ftrace_event_call *call = &event_function;
1712         struct ring_buffer *buffer = tr->trace_buffer.buffer;
1713         struct ring_buffer_event *event;
1714         struct ftrace_entry *entry;
1715
1716         /* If we are reading the ring buffer, don't trace */
1717         if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
1718                 return;
1719
1720         event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
1721                                           flags, pc);
1722         if (!event)
1723                 return;
1724         entry   = ring_buffer_event_data(event);
1725         entry->ip                       = ip;
1726         entry->parent_ip                = parent_ip;
1727
1728         if (!call_filter_check_discard(call, entry, buffer, event))
1729                 __buffer_unlock_commit(buffer, event);
1730 }
1731
1732 #ifdef CONFIG_STACKTRACE
1733
1734 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
1735 struct ftrace_stack {
1736         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
1737 };
1738
1739 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
1740 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
1741
1742 static void __ftrace_trace_stack(struct ring_buffer *buffer,
1743                                  unsigned long flags,
1744                                  int skip, int pc, struct pt_regs *regs)
1745 {
1746         struct ftrace_event_call *call = &event_kernel_stack;
1747         struct ring_buffer_event *event;
1748         struct stack_entry *entry;
1749         struct stack_trace trace;
1750         int use_stack;
1751         int size = FTRACE_STACK_ENTRIES;
1752
1753         trace.nr_entries        = 0;
1754         trace.skip              = skip;
1755
1756         /*
1757          * Since events can happen in NMIs there's no safe way to
1758          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
1759          * or NMI comes in, it will just have to use the default
1760          * FTRACE_STACK_SIZE.
1761          */
1762         preempt_disable_notrace();
1763
1764         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
1765         /*
1766          * We don't need any atomic variables, just a barrier.
1767          * If an interrupt comes in, we don't care, because it would
1768          * have exited and put the counter back to what we want.
1769          * We just need a barrier to keep gcc from moving things
1770          * around.
1771          */
1772         barrier();
1773         if (use_stack == 1) {
1774                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
1775                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
1776
1777                 if (regs)
1778                         save_stack_trace_regs(regs, &trace);
1779                 else
1780                         save_stack_trace(&trace);
1781
1782                 if (trace.nr_entries > size)
1783                         size = trace.nr_entries;
1784         } else
1785                 /* From now on, use_stack is a boolean */
1786                 use_stack = 0;
1787
1788         size *= sizeof(unsigned long);
1789
1790         event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1791                                           sizeof(*entry) + size, flags, pc);
1792         if (!event)
1793                 goto out;
1794         entry = ring_buffer_event_data(event);
1795
1796         memset(&entry->caller, 0, size);
1797
1798         if (use_stack)
1799                 memcpy(&entry->caller, trace.entries,
1800                        trace.nr_entries * sizeof(unsigned long));
1801         else {
1802                 trace.max_entries       = FTRACE_STACK_ENTRIES;
1803                 trace.entries           = entry->caller;
1804                 if (regs)
1805                         save_stack_trace_regs(regs, &trace);
1806                 else
1807                         save_stack_trace(&trace);
1808         }
1809
1810         entry->size = trace.nr_entries;
1811
1812         if (!call_filter_check_discard(call, entry, buffer, event))
1813                 __buffer_unlock_commit(buffer, event);
1814
1815  out:
1816         /* Again, don't let gcc optimize things here */
1817         barrier();
1818         __this_cpu_dec(ftrace_stack_reserve);
1819         preempt_enable_notrace();
1820
1821 }
1822
1823 void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags,
1824                              int skip, int pc, struct pt_regs *regs)
1825 {
1826         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1827                 return;
1828
1829         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
1830 }
1831
1832 void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
1833                         int skip, int pc)
1834 {
1835         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1836                 return;
1837
1838         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
1839 }
1840
1841 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1842                    int pc)
1843 {
1844         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
1845 }
1846
1847 /**
1848  * trace_dump_stack - record a stack back trace in the trace buffer
1849  * @skip: Number of functions to skip (helper handlers)
1850  */
1851 void trace_dump_stack(int skip)
1852 {
1853         unsigned long flags;
1854
1855         if (tracing_disabled || tracing_selftest_running)
1856                 return;
1857
1858         local_save_flags(flags);
1859
1860         /*
1861          * Skip 3 more, seems to get us at the caller of
1862          * this function.
1863          */
1864         skip += 3;
1865         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
1866                              flags, skip, preempt_count(), NULL);
1867 }
1868
1869 static DEFINE_PER_CPU(int, user_stack_count);
1870
1871 void
1872 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1873 {
1874         struct ftrace_event_call *call = &event_user_stack;
1875         struct ring_buffer_event *event;
1876         struct userstack_entry *entry;
1877         struct stack_trace trace;
1878
1879         if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1880                 return;
1881
1882         /*
1883          * NMIs can not handle page faults, even with fix ups.
1884          * The save user stack can (and often does) fault.
1885          */
1886         if (unlikely(in_nmi()))
1887                 return;
1888
1889         /*
1890          * prevent recursion, since the user stack tracing may
1891          * trigger other kernel events.
1892          */
1893         preempt_disable();
1894         if (__this_cpu_read(user_stack_count))
1895                 goto out;
1896
1897         __this_cpu_inc(user_stack_count);
1898
1899         event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1900                                           sizeof(*entry), flags, pc);
1901         if (!event)
1902                 goto out_drop_count;
1903         entry   = ring_buffer_event_data(event);
1904
1905         entry->tgid             = current->tgid;
1906         memset(&entry->caller, 0, sizeof(entry->caller));
1907
1908         trace.nr_entries        = 0;
1909         trace.max_entries       = FTRACE_STACK_ENTRIES;
1910         trace.skip              = 0;
1911         trace.entries           = entry->caller;
1912
1913         save_stack_trace_user(&trace);
1914         if (!call_filter_check_discard(call, entry, buffer, event))
1915                 __buffer_unlock_commit(buffer, event);
1916
1917  out_drop_count:
1918         __this_cpu_dec(user_stack_count);
1919  out:
1920         preempt_enable();
1921 }
1922
1923 #ifdef UNUSED
1924 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1925 {
1926         ftrace_trace_userstack(tr, flags, preempt_count());
1927 }
1928 #endif /* UNUSED */
1929
1930 #endif /* CONFIG_STACKTRACE */
1931
1932 /* created for use with alloc_percpu */
1933 struct trace_buffer_struct {
1934         char buffer[TRACE_BUF_SIZE];
1935 };
1936
1937 static struct trace_buffer_struct *trace_percpu_buffer;
1938 static struct trace_buffer_struct *trace_percpu_sirq_buffer;
1939 static struct trace_buffer_struct *trace_percpu_irq_buffer;
1940 static struct trace_buffer_struct *trace_percpu_nmi_buffer;
1941
1942 /*
1943  * The buffer used is dependent on the context. There is a per cpu
1944  * buffer for normal context, softirq contex, hard irq context and
1945  * for NMI context. Thise allows for lockless recording.
1946  *
1947  * Note, if the buffers failed to be allocated, then this returns NULL
1948  */
1949 static char *get_trace_buf(void)
1950 {
1951         struct trace_buffer_struct *percpu_buffer;
1952
1953         /*
1954          * If we have allocated per cpu buffers, then we do not
1955          * need to do any locking.
1956          */
1957         if (in_nmi())
1958                 percpu_buffer = trace_percpu_nmi_buffer;
1959         else if (in_irq())
1960                 percpu_buffer = trace_percpu_irq_buffer;
1961         else if (in_softirq())
1962                 percpu_buffer = trace_percpu_sirq_buffer;
1963         else
1964                 percpu_buffer = trace_percpu_buffer;
1965
1966         if (!percpu_buffer)
1967                 return NULL;
1968
1969         return this_cpu_ptr(&percpu_buffer->buffer[0]);
1970 }
1971
1972 static int alloc_percpu_trace_buffer(void)
1973 {
1974         struct trace_buffer_struct *buffers;
1975         struct trace_buffer_struct *sirq_buffers;
1976         struct trace_buffer_struct *irq_buffers;
1977         struct trace_buffer_struct *nmi_buffers;
1978
1979         buffers = alloc_percpu(struct trace_buffer_struct);
1980         if (!buffers)
1981                 goto err_warn;
1982
1983         sirq_buffers = alloc_percpu(struct trace_buffer_struct);
1984         if (!sirq_buffers)
1985                 goto err_sirq;
1986
1987         irq_buffers = alloc_percpu(struct trace_buffer_struct);
1988         if (!irq_buffers)
1989                 goto err_irq;
1990
1991         nmi_buffers = alloc_percpu(struct trace_buffer_struct);
1992         if (!nmi_buffers)
1993                 goto err_nmi;
1994
1995         trace_percpu_buffer = buffers;
1996         trace_percpu_sirq_buffer = sirq_buffers;
1997         trace_percpu_irq_buffer = irq_buffers;
1998         trace_percpu_nmi_buffer = nmi_buffers;
1999
2000         return 0;
2001
2002  err_nmi:
2003         free_percpu(irq_buffers);
2004  err_irq:
2005         free_percpu(sirq_buffers);
2006  err_sirq:
2007         free_percpu(buffers);
2008  err_warn:
2009         WARN(1, "Could not allocate percpu trace_printk buffer");
2010         return -ENOMEM;
2011 }
2012
2013 static int buffers_allocated;
2014
2015 void trace_printk_init_buffers(void)
2016 {
2017         if (buffers_allocated)
2018                 return;
2019
2020         if (alloc_percpu_trace_buffer())
2021                 return;
2022
2023         /* trace_printk() is for debug use only. Don't use it in production. */
2024
2025         pr_warning("\n**********************************************************\n");
2026         pr_warning("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2027         pr_warning("**                                                      **\n");
2028         pr_warning("** trace_printk() being used. Allocating extra memory.  **\n");
2029         pr_warning("**                                                      **\n");
2030         pr_warning("** This means that this is a DEBUG kernel and it is     **\n");
2031         pr_warning("** unsafe for produciton use.                           **\n");
2032         pr_warning("**                                                      **\n");
2033         pr_warning("** If you see this message and you are not debugging    **\n");
2034         pr_warning("** the kernel, report this immediately to your vendor!  **\n");
2035         pr_warning("**                                                      **\n");
2036         pr_warning("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2037         pr_warning("**********************************************************\n");
2038
2039         /* Expand the buffers to set size */
2040         tracing_update_buffers();
2041
2042         buffers_allocated = 1;
2043
2044         /*
2045          * trace_printk_init_buffers() can be called by modules.
2046          * If that happens, then we need to start cmdline recording
2047          * directly here. If the global_trace.buffer is already
2048          * allocated here, then this was called by module code.
2049          */
2050         if (global_trace.trace_buffer.buffer)
2051                 tracing_start_cmdline_record();
2052 }
2053
2054 void trace_printk_start_comm(void)
2055 {
2056         /* Start tracing comms if trace printk is set */
2057         if (!buffers_allocated)
2058                 return;
2059         tracing_start_cmdline_record();
2060 }
2061
2062 static void trace_printk_start_stop_comm(int enabled)
2063 {
2064         if (!buffers_allocated)
2065                 return;
2066
2067         if (enabled)
2068                 tracing_start_cmdline_record();
2069         else
2070                 tracing_stop_cmdline_record();
2071 }
2072
2073 /**
2074  * trace_vbprintk - write binary msg to tracing buffer
2075  *
2076  */
2077 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2078 {
2079         struct ftrace_event_call *call = &event_bprint;
2080         struct ring_buffer_event *event;
2081         struct ring_buffer *buffer;
2082         struct trace_array *tr = &global_trace;
2083         struct bprint_entry *entry;
2084         unsigned long flags;
2085         char *tbuffer;
2086         int len = 0, size, pc;
2087
2088         if (unlikely(tracing_selftest_running || tracing_disabled))
2089                 return 0;
2090
2091         /* Don't pollute graph traces with trace_vprintk internals */
2092         pause_graph_tracing();
2093
2094         pc = preempt_count();
2095         preempt_disable_notrace();
2096
2097         tbuffer = get_trace_buf();
2098         if (!tbuffer) {
2099                 len = 0;
2100                 goto out;
2101         }
2102
2103         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2104
2105         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2106                 goto out;
2107
2108         local_save_flags(flags);
2109         size = sizeof(*entry) + sizeof(u32) * len;
2110         buffer = tr->trace_buffer.buffer;
2111         event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2112                                           flags, pc);
2113         if (!event)
2114                 goto out;
2115         entry = ring_buffer_event_data(event);
2116         entry->ip                       = ip;
2117         entry->fmt                      = fmt;
2118
2119         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2120         if (!call_filter_check_discard(call, entry, buffer, event)) {
2121                 __buffer_unlock_commit(buffer, event);
2122                 ftrace_trace_stack(buffer, flags, 6, pc);
2123         }
2124
2125 out:
2126         preempt_enable_notrace();
2127         unpause_graph_tracing();
2128
2129         return len;
2130 }
2131 EXPORT_SYMBOL_GPL(trace_vbprintk);
2132
2133 static int
2134 __trace_array_vprintk(struct ring_buffer *buffer,
2135                       unsigned long ip, const char *fmt, va_list args)
2136 {
2137         struct ftrace_event_call *call = &event_print;
2138         struct ring_buffer_event *event;
2139         int len = 0, size, pc;
2140         struct print_entry *entry;
2141         unsigned long flags;
2142         char *tbuffer;
2143
2144         if (tracing_disabled || tracing_selftest_running)
2145                 return 0;
2146
2147         /* Don't pollute graph traces with trace_vprintk internals */
2148         pause_graph_tracing();
2149
2150         pc = preempt_count();
2151         preempt_disable_notrace();
2152
2153
2154         tbuffer = get_trace_buf();
2155         if (!tbuffer) {
2156                 len = 0;
2157                 goto out;
2158         }
2159
2160         len = vsnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2161         if (len > TRACE_BUF_SIZE)
2162                 goto out;
2163
2164         local_save_flags(flags);
2165         size = sizeof(*entry) + len + 1;
2166         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2167                                           flags, pc);
2168         if (!event)
2169                 goto out;
2170         entry = ring_buffer_event_data(event);
2171         entry->ip = ip;
2172
2173         memcpy(&entry->buf, tbuffer, len);
2174         entry->buf[len] = '\0';
2175         if (!call_filter_check_discard(call, entry, buffer, event)) {
2176                 __buffer_unlock_commit(buffer, event);
2177                 ftrace_trace_stack(buffer, flags, 6, pc);
2178         }
2179  out:
2180         preempt_enable_notrace();
2181         unpause_graph_tracing();
2182
2183         return len;
2184 }
2185
2186 int trace_array_vprintk(struct trace_array *tr,
2187                         unsigned long ip, const char *fmt, va_list args)
2188 {
2189         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2190 }
2191
2192 int trace_array_printk(struct trace_array *tr,
2193                        unsigned long ip, const char *fmt, ...)
2194 {
2195         int ret;
2196         va_list ap;
2197
2198         if (!(trace_flags & TRACE_ITER_PRINTK))
2199                 return 0;
2200
2201         va_start(ap, fmt);
2202         ret = trace_array_vprintk(tr, ip, fmt, ap);
2203         va_end(ap);
2204         return ret;
2205 }
2206
2207 int trace_array_printk_buf(struct ring_buffer *buffer,
2208                            unsigned long ip, const char *fmt, ...)
2209 {
2210         int ret;
2211         va_list ap;
2212
2213         if (!(trace_flags & TRACE_ITER_PRINTK))
2214                 return 0;
2215
2216         va_start(ap, fmt);
2217         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2218         va_end(ap);
2219         return ret;
2220 }
2221
2222 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2223 {
2224         return trace_array_vprintk(&global_trace, ip, fmt, args);
2225 }
2226 EXPORT_SYMBOL_GPL(trace_vprintk);
2227
2228 static void trace_iterator_increment(struct trace_iterator *iter)
2229 {
2230         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2231
2232         iter->idx++;
2233         if (buf_iter)
2234                 ring_buffer_read(buf_iter, NULL);
2235 }
2236
2237 static struct trace_entry *
2238 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2239                 unsigned long *lost_events)
2240 {
2241         struct ring_buffer_event *event;
2242         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2243
2244         if (buf_iter)
2245                 event = ring_buffer_iter_peek(buf_iter, ts);
2246         else
2247                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2248                                          lost_events);
2249
2250         if (event) {
2251                 iter->ent_size = ring_buffer_event_length(event);
2252                 return ring_buffer_event_data(event);
2253         }
2254         iter->ent_size = 0;
2255         return NULL;
2256 }
2257
2258 static struct trace_entry *
2259 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2260                   unsigned long *missing_events, u64 *ent_ts)
2261 {
2262         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2263         struct trace_entry *ent, *next = NULL;
2264         unsigned long lost_events = 0, next_lost = 0;
2265         int cpu_file = iter->cpu_file;
2266         u64 next_ts = 0, ts;
2267         int next_cpu = -1;
2268         int next_size = 0;
2269         int cpu;
2270
2271         /*
2272          * If we are in a per_cpu trace file, don't bother by iterating over
2273          * all cpu and peek directly.
2274          */
2275         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2276                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2277                         return NULL;
2278                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2279                 if (ent_cpu)
2280                         *ent_cpu = cpu_file;
2281
2282                 return ent;
2283         }
2284
2285         for_each_tracing_cpu(cpu) {
2286
2287                 if (ring_buffer_empty_cpu(buffer, cpu))
2288                         continue;
2289
2290                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2291
2292                 /*
2293                  * Pick the entry with the smallest timestamp:
2294                  */
2295                 if (ent && (!next || ts < next_ts)) {
2296                         next = ent;
2297                         next_cpu = cpu;
2298                         next_ts = ts;
2299                         next_lost = lost_events;
2300                         next_size = iter->ent_size;
2301                 }
2302         }
2303
2304         iter->ent_size = next_size;
2305
2306         if (ent_cpu)
2307                 *ent_cpu = next_cpu;
2308
2309         if (ent_ts)
2310                 *ent_ts = next_ts;
2311
2312         if (missing_events)
2313                 *missing_events = next_lost;
2314
2315         return next;
2316 }
2317
2318 /* Find the next real entry, without updating the iterator itself */
2319 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2320                                           int *ent_cpu, u64 *ent_ts)
2321 {
2322         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2323 }
2324
2325 /* Find the next real entry, and increment the iterator to the next entry */
2326 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2327 {
2328         iter->ent = __find_next_entry(iter, &iter->cpu,
2329                                       &iter->lost_events, &iter->ts);
2330
2331         if (iter->ent)
2332                 trace_iterator_increment(iter);
2333
2334         return iter->ent ? iter : NULL;
2335 }
2336
2337 static void trace_consume(struct trace_iterator *iter)
2338 {
2339         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2340                             &iter->lost_events);
2341 }
2342
2343 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2344 {
2345         struct trace_iterator *iter = m->private;
2346         int i = (int)*pos;
2347         void *ent;
2348
2349         WARN_ON_ONCE(iter->leftover);
2350
2351         (*pos)++;
2352
2353         /* can't go backwards */
2354         if (iter->idx > i)
2355                 return NULL;
2356
2357         if (iter->idx < 0)
2358                 ent = trace_find_next_entry_inc(iter);
2359         else
2360                 ent = iter;
2361
2362         while (ent && iter->idx < i)
2363                 ent = trace_find_next_entry_inc(iter);
2364
2365         iter->pos = *pos;
2366
2367         return ent;
2368 }
2369
2370 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2371 {
2372         struct ring_buffer_event *event;
2373         struct ring_buffer_iter *buf_iter;
2374         unsigned long entries = 0;
2375         u64 ts;
2376
2377         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2378
2379         buf_iter = trace_buffer_iter(iter, cpu);
2380         if (!buf_iter)
2381                 return;
2382
2383         ring_buffer_iter_reset(buf_iter);
2384
2385         /*
2386          * We could have the case with the max latency tracers
2387          * that a reset never took place on a cpu. This is evident
2388          * by the timestamp being before the start of the buffer.
2389          */
2390         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2391                 if (ts >= iter->trace_buffer->time_start)
2392                         break;
2393                 entries++;
2394                 ring_buffer_read(buf_iter, NULL);
2395         }
2396
2397         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2398 }
2399
2400 /*
2401  * The current tracer is copied to avoid a global locking
2402  * all around.
2403  */
2404 static void *s_start(struct seq_file *m, loff_t *pos)
2405 {
2406         struct trace_iterator *iter = m->private;
2407         struct trace_array *tr = iter->tr;
2408         int cpu_file = iter->cpu_file;
2409         void *p = NULL;
2410         loff_t l = 0;
2411         int cpu;
2412
2413         /*
2414          * copy the tracer to avoid using a global lock all around.
2415          * iter->trace is a copy of current_trace, the pointer to the
2416          * name may be used instead of a strcmp(), as iter->trace->name
2417          * will point to the same string as current_trace->name.
2418          */
2419         mutex_lock(&trace_types_lock);
2420         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2421                 *iter->trace = *tr->current_trace;
2422         mutex_unlock(&trace_types_lock);
2423
2424 #ifdef CONFIG_TRACER_MAX_TRACE
2425         if (iter->snapshot && iter->trace->use_max_tr)
2426                 return ERR_PTR(-EBUSY);
2427 #endif
2428
2429         if (!iter->snapshot)
2430                 atomic_inc(&trace_record_cmdline_disabled);
2431
2432         if (*pos != iter->pos) {
2433                 iter->ent = NULL;
2434                 iter->cpu = 0;
2435                 iter->idx = -1;
2436
2437                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2438                         for_each_tracing_cpu(cpu)
2439                                 tracing_iter_reset(iter, cpu);
2440                 } else
2441                         tracing_iter_reset(iter, cpu_file);
2442
2443                 iter->leftover = 0;
2444                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2445                         ;
2446
2447         } else {
2448                 /*
2449                  * If we overflowed the seq_file before, then we want
2450                  * to just reuse the trace_seq buffer again.
2451                  */
2452                 if (iter->leftover)
2453                         p = iter;
2454                 else {
2455                         l = *pos - 1;
2456                         p = s_next(m, p, &l);
2457                 }
2458         }
2459
2460         trace_event_read_lock();
2461         trace_access_lock(cpu_file);
2462         return p;
2463 }
2464
2465 static void s_stop(struct seq_file *m, void *p)
2466 {
2467         struct trace_iterator *iter = m->private;
2468
2469 #ifdef CONFIG_TRACER_MAX_TRACE
2470         if (iter->snapshot && iter->trace->use_max_tr)
2471                 return;
2472 #endif
2473
2474         if (!iter->snapshot)
2475                 atomic_dec(&trace_record_cmdline_disabled);
2476
2477         trace_access_unlock(iter->cpu_file);
2478         trace_event_read_unlock();
2479 }
2480
2481 static void
2482 get_total_entries(struct trace_buffer *buf,
2483                   unsigned long *total, unsigned long *entries)
2484 {
2485         unsigned long count;
2486         int cpu;
2487
2488         *total = 0;
2489         *entries = 0;
2490
2491         for_each_tracing_cpu(cpu) {
2492                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2493                 /*
2494                  * If this buffer has skipped entries, then we hold all
2495                  * entries for the trace and we need to ignore the
2496                  * ones before the time stamp.
2497                  */
2498                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2499                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2500                         /* total is the same as the entries */
2501                         *total += count;
2502                 } else
2503                         *total += count +
2504                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
2505                 *entries += count;
2506         }
2507 }
2508
2509 static void print_lat_help_header(struct seq_file *m)
2510 {
2511         seq_puts(m, "#                  _------=> CPU#            \n");
2512         seq_puts(m, "#                 / _-----=> irqs-off        \n");
2513         seq_puts(m, "#                | / _----=> need-resched    \n");
2514         seq_puts(m, "#                || / _---=> hardirq/softirq \n");
2515         seq_puts(m, "#                ||| / _--=> preempt-depth   \n");
2516         seq_puts(m, "#                |||| /     delay             \n");
2517         seq_puts(m, "#  cmd     pid   ||||| time  |   caller      \n");
2518         seq_puts(m, "#     \\   /      |||||  \\    |   /           \n");
2519 }
2520
2521 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2522 {
2523         unsigned long total;
2524         unsigned long entries;
2525
2526         get_total_entries(buf, &total, &entries);
2527         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2528                    entries, total, num_online_cpus());
2529         seq_puts(m, "#\n");
2530 }
2531
2532 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2533 {
2534         print_event_info(buf, m);
2535         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n");
2536         seq_puts(m, "#              | |       |          |         |\n");
2537 }
2538
2539 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2540 {
2541         print_event_info(buf, m);
2542         seq_puts(m, "#                              _-----=> irqs-off\n");
2543         seq_puts(m, "#                             / _----=> need-resched\n");
2544         seq_puts(m, "#                            | / _---=> hardirq/softirq\n");
2545         seq_puts(m, "#                            || / _--=> preempt-depth\n");
2546         seq_puts(m, "#                            ||| /     delay\n");
2547         seq_puts(m, "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n");
2548         seq_puts(m, "#              | |       |   ||||       |         |\n");
2549 }
2550
2551 void
2552 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2553 {
2554         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2555         struct trace_buffer *buf = iter->trace_buffer;
2556         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2557         struct tracer *type = iter->trace;
2558         unsigned long entries;
2559         unsigned long total;
2560         const char *name = "preemption";
2561
2562         name = type->name;
2563
2564         get_total_entries(buf, &total, &entries);
2565
2566         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2567                    name, UTS_RELEASE);
2568         seq_puts(m, "# -----------------------------------"
2569                  "---------------------------------\n");
2570         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2571                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2572                    nsecs_to_usecs(data->saved_latency),
2573                    entries,
2574                    total,
2575                    buf->cpu,
2576 #if defined(CONFIG_PREEMPT_NONE)
2577                    "server",
2578 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2579                    "desktop",
2580 #elif defined(CONFIG_PREEMPT)
2581                    "preempt",
2582 #else
2583                    "unknown",
2584 #endif
2585                    /* These are reserved for later use */
2586                    0, 0, 0, 0);
2587 #ifdef CONFIG_SMP
2588         seq_printf(m, " #P:%d)\n", num_online_cpus());
2589 #else
2590         seq_puts(m, ")\n");
2591 #endif
2592         seq_puts(m, "#    -----------------\n");
2593         seq_printf(m, "#    | task: %.16s-%d "
2594                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2595                    data->comm, data->pid,
2596                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2597                    data->policy, data->rt_priority);
2598         seq_puts(m, "#    -----------------\n");
2599
2600         if (data->critical_start) {
2601                 seq_puts(m, "#  => started at: ");
2602                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2603                 trace_print_seq(m, &iter->seq);
2604                 seq_puts(m, "\n#  => ended at:   ");
2605                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2606                 trace_print_seq(m, &iter->seq);
2607                 seq_puts(m, "\n#\n");
2608         }
2609
2610         seq_puts(m, "#\n");
2611 }
2612
2613 static void test_cpu_buff_start(struct trace_iterator *iter)
2614 {
2615         struct trace_seq *s = &iter->seq;
2616
2617         if (!(trace_flags & TRACE_ITER_ANNOTATE))
2618                 return;
2619
2620         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
2621                 return;
2622
2623         if (cpumask_test_cpu(iter->cpu, iter->started))
2624                 return;
2625
2626         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
2627                 return;
2628
2629         cpumask_set_cpu(iter->cpu, iter->started);
2630
2631         /* Don't print started cpu buffer for the first entry of the trace */
2632         if (iter->idx > 1)
2633                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
2634                                 iter->cpu);
2635 }
2636
2637 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
2638 {
2639         struct trace_seq *s = &iter->seq;
2640         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2641         struct trace_entry *entry;
2642         struct trace_event *event;
2643
2644         entry = iter->ent;
2645
2646         test_cpu_buff_start(iter);
2647
2648         event = ftrace_find_event(entry->type);
2649
2650         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2651                 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2652                         if (!trace_print_lat_context(iter))
2653                                 goto partial;
2654                 } else {
2655                         if (!trace_print_context(iter))
2656                                 goto partial;
2657                 }
2658         }
2659
2660         if (event)
2661                 return event->funcs->trace(iter, sym_flags, event);
2662
2663         if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
2664                 goto partial;
2665
2666         return TRACE_TYPE_HANDLED;
2667 partial:
2668         return TRACE_TYPE_PARTIAL_LINE;
2669 }
2670
2671 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2672 {
2673         struct trace_seq *s = &iter->seq;
2674         struct trace_entry *entry;
2675         struct trace_event *event;
2676
2677         entry = iter->ent;
2678
2679         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2680                 if (!trace_seq_printf(s, "%d %d %llu ",
2681                                       entry->pid, iter->cpu, iter->ts))
2682                         goto partial;
2683         }
2684
2685         event = ftrace_find_event(entry->type);
2686         if (event)
2687                 return event->funcs->raw(iter, 0, event);
2688
2689         if (!trace_seq_printf(s, "%d ?\n", entry->type))
2690                 goto partial;
2691
2692         return TRACE_TYPE_HANDLED;
2693 partial:
2694         return TRACE_TYPE_PARTIAL_LINE;
2695 }
2696
2697 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2698 {
2699         struct trace_seq *s = &iter->seq;
2700         unsigned char newline = '\n';
2701         struct trace_entry *entry;
2702         struct trace_event *event;
2703
2704         entry = iter->ent;
2705
2706         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2707                 SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
2708                 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
2709                 SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
2710         }
2711
2712         event = ftrace_find_event(entry->type);
2713         if (event) {
2714                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
2715                 if (ret != TRACE_TYPE_HANDLED)
2716                         return ret;
2717         }
2718
2719         SEQ_PUT_FIELD_RET(s, newline);
2720
2721         return TRACE_TYPE_HANDLED;
2722 }
2723
2724 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2725 {
2726         struct trace_seq *s = &iter->seq;
2727         struct trace_entry *entry;
2728         struct trace_event *event;
2729
2730         entry = iter->ent;
2731
2732         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2733                 SEQ_PUT_FIELD_RET(s, entry->pid);
2734                 SEQ_PUT_FIELD_RET(s, iter->cpu);
2735                 SEQ_PUT_FIELD_RET(s, iter->ts);
2736         }
2737
2738         event = ftrace_find_event(entry->type);
2739         return event ? event->funcs->binary(iter, 0, event) :
2740                 TRACE_TYPE_HANDLED;
2741 }
2742
2743 int trace_empty(struct trace_iterator *iter)
2744 {
2745         struct ring_buffer_iter *buf_iter;
2746         int cpu;
2747
2748         /* If we are looking at one CPU buffer, only check that one */
2749         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
2750                 cpu = iter->cpu_file;
2751                 buf_iter = trace_buffer_iter(iter, cpu);
2752                 if (buf_iter) {
2753                         if (!ring_buffer_iter_empty(buf_iter))
2754                                 return 0;
2755                 } else {
2756                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2757                                 return 0;
2758                 }
2759                 return 1;
2760         }
2761
2762         for_each_tracing_cpu(cpu) {
2763                 buf_iter = trace_buffer_iter(iter, cpu);
2764                 if (buf_iter) {
2765                         if (!ring_buffer_iter_empty(buf_iter))
2766                                 return 0;
2767                 } else {
2768                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2769                                 return 0;
2770                 }
2771         }
2772
2773         return 1;
2774 }
2775
2776 /*  Called with trace_event_read_lock() held. */
2777 enum print_line_t print_trace_line(struct trace_iterator *iter)
2778 {
2779         enum print_line_t ret;
2780
2781         if (iter->lost_events &&
2782             !trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
2783                                  iter->cpu, iter->lost_events))
2784                 return TRACE_TYPE_PARTIAL_LINE;
2785
2786         if (iter->trace && iter->trace->print_line) {
2787                 ret = iter->trace->print_line(iter);
2788                 if (ret != TRACE_TYPE_UNHANDLED)
2789                         return ret;
2790         }
2791
2792         if (iter->ent->type == TRACE_BPUTS &&
2793                         trace_flags & TRACE_ITER_PRINTK &&
2794                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2795                 return trace_print_bputs_msg_only(iter);
2796
2797         if (iter->ent->type == TRACE_BPRINT &&
2798                         trace_flags & TRACE_ITER_PRINTK &&
2799                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2800                 return trace_print_bprintk_msg_only(iter);
2801
2802         if (iter->ent->type == TRACE_PRINT &&
2803                         trace_flags & TRACE_ITER_PRINTK &&
2804                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2805                 return trace_print_printk_msg_only(iter);
2806
2807         if (trace_flags & TRACE_ITER_BIN)
2808                 return print_bin_fmt(iter);
2809
2810         if (trace_flags & TRACE_ITER_HEX)
2811                 return print_hex_fmt(iter);
2812
2813         if (trace_flags & TRACE_ITER_RAW)
2814                 return print_raw_fmt(iter);
2815
2816         return print_trace_fmt(iter);
2817 }
2818
2819 void trace_latency_header(struct seq_file *m)
2820 {
2821         struct trace_iterator *iter = m->private;
2822
2823         /* print nothing if the buffers are empty */
2824         if (trace_empty(iter))
2825                 return;
2826
2827         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2828                 print_trace_header(m, iter);
2829
2830         if (!(trace_flags & TRACE_ITER_VERBOSE))
2831                 print_lat_help_header(m);
2832 }
2833
2834 void trace_default_header(struct seq_file *m)
2835 {
2836         struct trace_iterator *iter = m->private;
2837
2838         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
2839                 return;
2840
2841         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2842                 /* print nothing if the buffers are empty */
2843                 if (trace_empty(iter))
2844                         return;
2845                 print_trace_header(m, iter);
2846                 if (!(trace_flags & TRACE_ITER_VERBOSE))
2847                         print_lat_help_header(m);
2848         } else {
2849                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
2850                         if (trace_flags & TRACE_ITER_IRQ_INFO)
2851                                 print_func_help_header_irq(iter->trace_buffer, m);
2852                         else
2853                                 print_func_help_header(iter->trace_buffer, m);
2854                 }
2855         }
2856 }
2857
2858 static void test_ftrace_alive(struct seq_file *m)
2859 {
2860         if (!ftrace_is_dead())
2861                 return;
2862         seq_printf(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n");
2863         seq_printf(m, "#          MAY BE MISSING FUNCTION EVENTS\n");
2864 }
2865
2866 #ifdef CONFIG_TRACER_MAX_TRACE
2867 static void show_snapshot_main_help(struct seq_file *m)
2868 {
2869         seq_printf(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n");
2870         seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2871         seq_printf(m, "#                      Takes a snapshot of the main buffer.\n");
2872         seq_printf(m, "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n");
2873         seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
2874         seq_printf(m, "#                       is not a '0' or '1')\n");
2875 }
2876
2877 static void show_snapshot_percpu_help(struct seq_file *m)
2878 {
2879         seq_printf(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
2880 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2881         seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2882         seq_printf(m, "#                      Takes a snapshot of the main buffer for this cpu.\n");
2883 #else
2884         seq_printf(m, "# echo 1 > snapshot : Not supported with this kernel.\n");
2885         seq_printf(m, "#                     Must use main snapshot file to allocate.\n");
2886 #endif
2887         seq_printf(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n");
2888         seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
2889         seq_printf(m, "#                       is not a '0' or '1')\n");
2890 }
2891
2892 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
2893 {
2894         if (iter->tr->allocated_snapshot)
2895                 seq_printf(m, "#\n# * Snapshot is allocated *\n#\n");
2896         else
2897                 seq_printf(m, "#\n# * Snapshot is freed *\n#\n");
2898
2899         seq_printf(m, "# Snapshot commands:\n");
2900         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
2901                 show_snapshot_main_help(m);
2902         else
2903                 show_snapshot_percpu_help(m);
2904 }
2905 #else
2906 /* Should never be called */
2907 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
2908 #endif
2909
2910 static int s_show(struct seq_file *m, void *v)
2911 {
2912         struct trace_iterator *iter = v;
2913         int ret;
2914
2915         if (iter->ent == NULL) {
2916                 if (iter->tr) {
2917                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
2918                         seq_puts(m, "#\n");
2919                         test_ftrace_alive(m);
2920                 }
2921                 if (iter->snapshot && trace_empty(iter))
2922                         print_snapshot_help(m, iter);
2923                 else if (iter->trace && iter->trace->print_header)
2924                         iter->trace->print_header(m);
2925                 else
2926                         trace_default_header(m);
2927
2928         } else if (iter->leftover) {
2929                 /*
2930                  * If we filled the seq_file buffer earlier, we
2931                  * want to just show it now.
2932                  */
2933                 ret = trace_print_seq(m, &iter->seq);
2934
2935                 /* ret should this time be zero, but you never know */
2936                 iter->leftover = ret;
2937
2938         } else {
2939                 print_trace_line(iter);
2940                 ret = trace_print_seq(m, &iter->seq);
2941                 /*
2942                  * If we overflow the seq_file buffer, then it will
2943                  * ask us for this data again at start up.
2944                  * Use that instead.
2945                  *  ret is 0 if seq_file write succeeded.
2946                  *        -1 otherwise.
2947                  */
2948                 iter->leftover = ret;
2949         }
2950
2951         return 0;
2952 }
2953
2954 /*
2955  * Should be used after trace_array_get(), trace_types_lock
2956  * ensures that i_cdev was already initialized.
2957  */
2958 static inline int tracing_get_cpu(struct inode *inode)
2959 {
2960         if (inode->i_cdev) /* See trace_create_cpu_file() */
2961                 return (long)inode->i_cdev - 1;
2962         return RING_BUFFER_ALL_CPUS;
2963 }
2964
2965 static const struct seq_operations tracer_seq_ops = {
2966         .start          = s_start,
2967         .next           = s_next,
2968         .stop           = s_stop,
2969         .show           = s_show,
2970 };
2971
2972 static struct trace_iterator *
2973 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
2974 {
2975         struct trace_array *tr = inode->i_private;
2976         struct trace_iterator *iter;
2977         int cpu;
2978
2979         if (tracing_disabled)
2980                 return ERR_PTR(-ENODEV);
2981
2982         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
2983         if (!iter)
2984                 return ERR_PTR(-ENOMEM);
2985
2986         iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(),
2987                                     GFP_KERNEL);
2988         if (!iter->buffer_iter)
2989                 goto release;
2990
2991         /*
2992          * We make a copy of the current tracer to avoid concurrent
2993          * changes on it while we are reading.
2994          */
2995         mutex_lock(&trace_types_lock);
2996         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
2997         if (!iter->trace)
2998                 goto fail;
2999
3000         *iter->trace = *tr->current_trace;
3001
3002         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3003                 goto fail;
3004
3005         iter->tr = tr;
3006
3007 #ifdef CONFIG_TRACER_MAX_TRACE
3008         /* Currently only the top directory has a snapshot */
3009         if (tr->current_trace->print_max || snapshot)
3010                 iter->trace_buffer = &tr->max_buffer;
3011         else
3012 #endif
3013                 iter->trace_buffer = &tr->trace_buffer;
3014         iter->snapshot = snapshot;
3015         iter->pos = -1;
3016         iter->cpu_file = tracing_get_cpu(inode);
3017         mutex_init(&iter->mutex);
3018
3019         /* Notify the tracer early; before we stop tracing. */
3020         if (iter->trace && iter->trace->open)
3021                 iter->trace->open(iter);
3022
3023         /* Annotate start of buffers if we had overruns */
3024         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3025                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3026
3027         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3028         if (trace_clocks[tr->clock_id].in_ns)
3029                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3030
3031         /* stop the trace while dumping if we are not opening "snapshot" */
3032         if (!iter->snapshot)
3033                 tracing_stop_tr(tr);
3034
3035         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3036                 for_each_tracing_cpu(cpu) {
3037                         iter->buffer_iter[cpu] =
3038                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3039                 }
3040                 ring_buffer_read_prepare_sync();
3041                 for_each_tracing_cpu(cpu) {
3042                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3043                         tracing_iter_reset(iter, cpu);
3044                 }
3045         } else {
3046                 cpu = iter->cpu_file;
3047                 iter->buffer_iter[cpu] =
3048                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3049                 ring_buffer_read_prepare_sync();
3050                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3051                 tracing_iter_reset(iter, cpu);
3052         }
3053
3054         mutex_unlock(&trace_types_lock);
3055
3056         return iter;
3057
3058  fail:
3059         mutex_unlock(&trace_types_lock);
3060         kfree(iter->trace);
3061         kfree(iter->buffer_iter);
3062 release:
3063         seq_release_private(inode, file);
3064         return ERR_PTR(-ENOMEM);
3065 }
3066
3067 int tracing_open_generic(struct inode *inode, struct file *filp)
3068 {
3069         if (tracing_disabled)
3070                 return -ENODEV;
3071
3072         filp->private_data = inode->i_private;
3073         return 0;
3074 }
3075
3076 bool tracing_is_disabled(void)
3077 {
3078         return (tracing_disabled) ? true: false;
3079 }
3080
3081 /*
3082  * Open and update trace_array ref count.
3083  * Must have the current trace_array passed to it.
3084  */
3085 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3086 {
3087         struct trace_array *tr = inode->i_private;
3088
3089         if (tracing_disabled)
3090                 return -ENODEV;
3091
3092         if (trace_array_get(tr) < 0)
3093                 return -ENODEV;
3094
3095         filp->private_data = inode->i_private;
3096
3097         return 0;
3098 }
3099
3100 static int tracing_release(struct inode *inode, struct file *file)
3101 {
3102         struct trace_array *tr = inode->i_private;
3103         struct seq_file *m = file->private_data;
3104         struct trace_iterator *iter;
3105         int cpu;
3106
3107         if (!(file->f_mode & FMODE_READ)) {
3108                 trace_array_put(tr);
3109                 return 0;
3110         }
3111
3112         /* Writes do not use seq_file */
3113         iter = m->private;
3114         mutex_lock(&trace_types_lock);
3115
3116         for_each_tracing_cpu(cpu) {
3117                 if (iter->buffer_iter[cpu])
3118                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3119         }
3120
3121         if (iter->trace && iter->trace->close)
3122                 iter->trace->close(iter);
3123
3124         if (!iter->snapshot)
3125                 /* reenable tracing if it was previously enabled */
3126                 tracing_start_tr(tr);
3127
3128         __trace_array_put(tr);
3129
3130         mutex_unlock(&trace_types_lock);
3131
3132         mutex_destroy(&iter->mutex);
3133         free_cpumask_var(iter->started);
3134         kfree(iter->trace);
3135         kfree(iter->buffer_iter);
3136         seq_release_private(inode, file);
3137
3138         return 0;
3139 }
3140
3141 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3142 {
3143         struct trace_array *tr = inode->i_private;
3144
3145         trace_array_put(tr);
3146         return 0;
3147 }
3148
3149 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3150 {
3151         struct trace_array *tr = inode->i_private;
3152
3153         trace_array_put(tr);
3154
3155         return single_release(inode, file);
3156 }
3157
3158 static int tracing_open(struct inode *inode, struct file *file)
3159 {
3160         struct trace_array *tr = inode->i_private;
3161         struct trace_iterator *iter;
3162         int ret = 0;
3163
3164         if (trace_array_get(tr) < 0)
3165                 return -ENODEV;
3166
3167         /* If this file was open for write, then erase contents */
3168         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3169                 int cpu = tracing_get_cpu(inode);
3170
3171                 if (cpu == RING_BUFFER_ALL_CPUS)
3172                         tracing_reset_online_cpus(&tr->trace_buffer);
3173                 else
3174                         tracing_reset(&tr->trace_buffer, cpu);
3175         }
3176
3177         if (file->f_mode & FMODE_READ) {
3178                 iter = __tracing_open(inode, file, false);
3179                 if (IS_ERR(iter))
3180                         ret = PTR_ERR(iter);
3181                 else if (trace_flags & TRACE_ITER_LATENCY_FMT)
3182                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3183         }
3184
3185         if (ret < 0)
3186                 trace_array_put(tr);
3187
3188         return ret;
3189 }
3190
3191 /*
3192  * Some tracers are not suitable for instance buffers.
3193  * A tracer is always available for the global array (toplevel)
3194  * or if it explicitly states that it is.
3195  */
3196 static bool
3197 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3198 {
3199         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3200 }
3201
3202 /* Find the next tracer that this trace array may use */
3203 static struct tracer *
3204 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3205 {
3206         while (t && !trace_ok_for_array(t, tr))
3207                 t = t->next;
3208
3209         return t;
3210 }
3211
3212 static void *
3213 t_next(struct seq_file *m, void *v, loff_t *pos)
3214 {
3215         struct trace_array *tr = m->private;
3216         struct tracer *t = v;
3217
3218         (*pos)++;
3219
3220         if (t)
3221                 t = get_tracer_for_array(tr, t->next);
3222
3223         return t;
3224 }
3225
3226 static void *t_start(struct seq_file *m, loff_t *pos)
3227 {
3228         struct trace_array *tr = m->private;
3229         struct tracer *t;
3230         loff_t l = 0;
3231
3232         mutex_lock(&trace_types_lock);
3233
3234         t = get_tracer_for_array(tr, trace_types);
3235         for (; t && l < *pos; t = t_next(m, t, &l))
3236                         ;
3237
3238         return t;
3239 }
3240
3241 static void t_stop(struct seq_file *m, void *p)
3242 {
3243         mutex_unlock(&trace_types_lock);
3244 }
3245
3246 static int t_show(struct seq_file *m, void *v)
3247 {
3248         struct tracer *t = v;
3249
3250         if (!t)
3251                 return 0;
3252
3253         seq_printf(m, "%s", t->name);
3254         if (t->next)
3255                 seq_putc(m, ' ');
3256         else
3257                 seq_putc(m, '\n');
3258
3259         return 0;
3260 }
3261
3262 static const struct seq_operations show_traces_seq_ops = {
3263         .start          = t_start,
3264         .next           = t_next,
3265         .stop           = t_stop,
3266         .show           = t_show,
3267 };
3268
3269 static int show_traces_open(struct inode *inode, struct file *file)
3270 {
3271         struct trace_array *tr = inode->i_private;
3272         struct seq_file *m;
3273         int ret;
3274
3275         if (tracing_disabled)
3276                 return -ENODEV;
3277
3278         ret = seq_open(file, &show_traces_seq_ops);
3279         if (ret)
3280                 return ret;
3281
3282         m = file->private_data;
3283         m->private = tr;
3284
3285         return 0;
3286 }
3287
3288 static ssize_t
3289 tracing_write_stub(struct file *filp, const char __user *ubuf,
3290                    size_t count, loff_t *ppos)
3291 {
3292         return count;
3293 }
3294
3295 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3296 {
3297         int ret;
3298
3299         if (file->f_mode & FMODE_READ)
3300                 ret = seq_lseek(file, offset, whence);
3301         else
3302                 file->f_pos = ret = 0;
3303
3304         return ret;
3305 }
3306
3307 static const struct file_operations tracing_fops = {
3308         .open           = tracing_open,
3309         .read           = seq_read,
3310         .write          = tracing_write_stub,
3311         .llseek         = tracing_lseek,
3312         .release        = tracing_release,
3313 };
3314
3315 static const struct file_operations show_traces_fops = {
3316         .open           = show_traces_open,
3317         .read           = seq_read,
3318         .release        = seq_release,
3319         .llseek         = seq_lseek,
3320 };
3321
3322 /*
3323  * The tracer itself will not take this lock, but still we want
3324  * to provide a consistent cpumask to user-space:
3325  */
3326 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3327
3328 /*
3329  * Temporary storage for the character representation of the
3330  * CPU bitmask (and one more byte for the newline):
3331  */
3332 static char mask_str[NR_CPUS + 1];
3333
3334 static ssize_t
3335 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3336                      size_t count, loff_t *ppos)
3337 {
3338         struct trace_array *tr = file_inode(filp)->i_private;
3339         int len;
3340
3341         mutex_lock(&tracing_cpumask_update_lock);
3342
3343         len = cpumask_scnprintf(mask_str, count, tr->tracing_cpumask);
3344         if (count - len < 2) {
3345                 count = -EINVAL;
3346                 goto out_err;
3347         }
3348         len += sprintf(mask_str + len, "\n");
3349         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3350
3351 out_err:
3352         mutex_unlock(&tracing_cpumask_update_lock);
3353
3354         return count;
3355 }
3356
3357 static ssize_t
3358 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3359                       size_t count, loff_t *ppos)
3360 {
3361         struct trace_array *tr = file_inode(filp)->i_private;
3362         cpumask_var_t tracing_cpumask_new;
3363         int err, cpu;
3364
3365         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3366                 return -ENOMEM;
3367
3368         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3369         if (err)
3370                 goto err_unlock;
3371
3372         mutex_lock(&tracing_cpumask_update_lock);
3373
3374         local_irq_disable();
3375         arch_spin_lock(&tr->max_lock);
3376         for_each_tracing_cpu(cpu) {
3377                 /*
3378                  * Increase/decrease the disabled counter if we are
3379                  * about to flip a bit in the cpumask:
3380                  */
3381                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3382                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3383                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3384                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3385                 }
3386                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3387                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3388                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3389                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3390                 }
3391         }
3392         arch_spin_unlock(&tr->max_lock);
3393         local_irq_enable();
3394
3395         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3396
3397         mutex_unlock(&tracing_cpumask_update_lock);
3398         free_cpumask_var(tracing_cpumask_new);
3399
3400         return count;
3401
3402 err_unlock:
3403         free_cpumask_var(tracing_cpumask_new);
3404
3405         return err;
3406 }
3407
3408 static const struct file_operations tracing_cpumask_fops = {
3409         .open           = tracing_open_generic_tr,
3410         .read           = tracing_cpumask_read,
3411         .write          = tracing_cpumask_write,
3412         .release        = tracing_release_generic_tr,
3413         .llseek         = generic_file_llseek,
3414 };
3415
3416 static int tracing_trace_options_show(struct seq_file *m, void *v)
3417 {
3418         struct tracer_opt *trace_opts;
3419         struct trace_array *tr = m->private;
3420         u32 tracer_flags;
3421         int i;
3422
3423         mutex_lock(&trace_types_lock);
3424         tracer_flags = tr->current_trace->flags->val;
3425         trace_opts = tr->current_trace->flags->opts;
3426
3427         for (i = 0; trace_options[i]; i++) {
3428                 if (trace_flags & (1 << i))
3429                         seq_printf(m, "%s\n", trace_options[i]);
3430                 else
3431                         seq_printf(m, "no%s\n", trace_options[i]);
3432         }
3433
3434         for (i = 0; trace_opts[i].name; i++) {
3435                 if (tracer_flags & trace_opts[i].bit)
3436                         seq_printf(m, "%s\n", trace_opts[i].name);
3437                 else
3438                         seq_printf(m, "no%s\n", trace_opts[i].name);
3439         }
3440         mutex_unlock(&trace_types_lock);
3441
3442         return 0;
3443 }
3444
3445 static int __set_tracer_option(struct trace_array *tr,
3446                                struct tracer_flags *tracer_flags,
3447                                struct tracer_opt *opts, int neg)
3448 {
3449         struct tracer *trace = tr->current_trace;
3450         int ret;
3451
3452         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3453         if (ret)
3454                 return ret;
3455
3456         if (neg)
3457                 tracer_flags->val &= ~opts->bit;
3458         else
3459                 tracer_flags->val |= opts->bit;
3460         return 0;
3461 }
3462
3463 /* Try to assign a tracer specific option */
3464 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3465 {
3466         struct tracer *trace = tr->current_trace;
3467         struct tracer_flags *tracer_flags = trace->flags;
3468         struct tracer_opt *opts = NULL;
3469         int i;
3470
3471         for (i = 0; tracer_flags->opts[i].name; i++) {
3472                 opts = &tracer_flags->opts[i];
3473
3474                 if (strcmp(cmp, opts->name) == 0)
3475                         return __set_tracer_option(tr, trace->flags, opts, neg);
3476         }
3477
3478         return -EINVAL;
3479 }
3480
3481 /* Some tracers require overwrite to stay enabled */
3482 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3483 {
3484         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3485                 return -1;
3486
3487         return 0;
3488 }
3489
3490 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3491 {
3492         /* do nothing if flag is already set */
3493         if (!!(trace_flags & mask) == !!enabled)
3494                 return 0;
3495
3496         /* Give the tracer a chance to approve the change */
3497         if (tr->current_trace->flag_changed)
3498                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3499                         return -EINVAL;
3500
3501         if (enabled)
3502                 trace_flags |= mask;
3503         else
3504                 trace_flags &= ~mask;
3505
3506         if (mask == TRACE_ITER_RECORD_CMD)
3507                 trace_event_enable_cmd_record(enabled);
3508
3509         if (mask == TRACE_ITER_OVERWRITE) {
3510                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3511 #ifdef CONFIG_TRACER_MAX_TRACE
3512                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3513 #endif
3514         }
3515
3516         if (mask == TRACE_ITER_PRINTK)
3517                 trace_printk_start_stop_comm(enabled);
3518
3519         return 0;
3520 }
3521
3522 static int trace_set_options(struct trace_array *tr, char *option)
3523 {
3524         char *cmp;
3525         int neg = 0;
3526         int ret = -ENODEV;
3527         int i;
3528
3529         cmp = strstrip(option);
3530
3531         if (strncmp(cmp, "no", 2) == 0) {
3532                 neg = 1;
3533                 cmp += 2;
3534         }
3535
3536         mutex_lock(&trace_types_lock);
3537
3538         for (i = 0; trace_options[i]; i++) {
3539                 if (strcmp(cmp, trace_options[i]) == 0) {
3540                         ret = set_tracer_flag(tr, 1 << i, !neg);
3541                         break;
3542                 }
3543         }
3544
3545         /* If no option could be set, test the specific tracer options */
3546         if (!trace_options[i])
3547                 ret = set_tracer_option(tr, cmp, neg);
3548
3549         mutex_unlock(&trace_types_lock);
3550
3551         return ret;
3552 }
3553
3554 static ssize_t
3555 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3556                         size_t cnt, loff_t *ppos)
3557 {
3558         struct seq_file *m = filp->private_data;
3559         struct trace_array *tr = m->private;
3560         char buf[64];
3561         int ret;
3562
3563         if (cnt >= sizeof(buf))
3564                 return -EINVAL;
3565
3566         if (copy_from_user(&buf, ubuf, cnt))
3567                 return -EFAULT;
3568
3569         buf[cnt] = 0;
3570
3571         ret = trace_set_options(tr, buf);
3572         if (ret < 0)
3573                 return ret;
3574
3575         *ppos += cnt;
3576
3577         return cnt;
3578 }
3579
3580 static int tracing_trace_options_open(struct inode *inode, struct file *file)
3581 {
3582         struct trace_array *tr = inode->i_private;
3583         int ret;
3584
3585         if (tracing_disabled)
3586                 return -ENODEV;
3587
3588         if (trace_array_get(tr) < 0)
3589                 return -ENODEV;
3590
3591         ret = single_open(file, tracing_trace_options_show, inode->i_private);
3592         if (ret < 0)
3593                 trace_array_put(tr);
3594
3595         return ret;
3596 }
3597
3598 static const struct file_operations tracing_iter_fops = {
3599         .open           = tracing_trace_options_open,
3600         .read           = seq_read,
3601         .llseek         = seq_lseek,
3602         .release        = tracing_single_release_tr,
3603         .write          = tracing_trace_options_write,
3604 };
3605
3606 static const char readme_msg[] =
3607         "tracing mini-HOWTO:\n\n"
3608         "# echo 0 > tracing_on : quick way to disable tracing\n"
3609         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
3610         " Important files:\n"
3611         "  trace\t\t\t- The static contents of the buffer\n"
3612         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
3613         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
3614         "  current_tracer\t- function and latency tracers\n"
3615         "  available_tracers\t- list of configured tracers for current_tracer\n"
3616         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
3617         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
3618         "  trace_clock\t\t-change the clock used to order events\n"
3619         "       local:   Per cpu clock but may not be synced across CPUs\n"
3620         "      global:   Synced across CPUs but slows tracing down.\n"
3621         "     counter:   Not a clock, but just an increment\n"
3622         "      uptime:   Jiffy counter from time of boot\n"
3623         "        perf:   Same clock that perf events use\n"
3624 #ifdef CONFIG_X86_64
3625         "     x86-tsc:   TSC cycle counter\n"
3626 #endif
3627         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
3628         "  tracing_cpumask\t- Limit which CPUs to trace\n"
3629         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
3630         "\t\t\t  Remove sub-buffer with rmdir\n"
3631         "  trace_options\t\t- Set format or modify how tracing happens\n"
3632         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
3633         "\t\t\t  option name\n"
3634         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
3635 #ifdef CONFIG_DYNAMIC_FTRACE
3636         "\n  available_filter_functions - list of functions that can be filtered on\n"
3637         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
3638         "\t\t\t  functions\n"
3639         "\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3640         "\t     modules: Can select a group via module\n"
3641         "\t      Format: :mod:<module-name>\n"
3642         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
3643         "\t    triggers: a command to perform when function is hit\n"
3644         "\t      Format: <function>:<trigger>[:count]\n"
3645         "\t     trigger: traceon, traceoff\n"
3646         "\t\t      enable_event:<system>:<event>\n"
3647         "\t\t      disable_event:<system>:<event>\n"
3648 #ifdef CONFIG_STACKTRACE
3649         "\t\t      stacktrace\n"
3650 #endif
3651 #ifdef CONFIG_TRACER_SNAPSHOT
3652         "\t\t      snapshot\n"
3653 #endif
3654         "\t\t      dump\n"
3655         "\t\t      cpudump\n"
3656         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
3657         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
3658         "\t     The first one will disable tracing every time do_fault is hit\n"
3659         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
3660         "\t       The first time do trap is hit and it disables tracing, the\n"
3661         "\t       counter will decrement to 2. If tracing is already disabled,\n"
3662         "\t       the counter will not decrement. It only decrements when the\n"
3663         "\t       trigger did work\n"
3664         "\t     To remove trigger without count:\n"
3665         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
3666         "\t     To remove trigger with a count:\n"
3667         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
3668         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
3669         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3670         "\t    modules: Can select a group via module command :mod:\n"
3671         "\t    Does not accept triggers\n"
3672 #endif /* CONFIG_DYNAMIC_FTRACE */
3673 #ifdef CONFIG_FUNCTION_TRACER
3674         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
3675         "\t\t    (function)\n"
3676 #endif
3677 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
3678         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
3679         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
3680         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
3681 #endif
3682 #ifdef CONFIG_TRACER_SNAPSHOT
3683         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
3684         "\t\t\t  snapshot buffer. Read the contents for more\n"
3685         "\t\t\t  information\n"
3686 #endif
3687 #ifdef CONFIG_STACK_TRACER
3688         "  stack_trace\t\t- Shows the max stack trace when active\n"
3689         "  stack_max_size\t- Shows current max stack size that was traced\n"
3690         "\t\t\t  Write into this file to reset the max size (trigger a\n"
3691         "\t\t\t  new trace)\n"
3692 #ifdef CONFIG_DYNAMIC_FTRACE
3693         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
3694         "\t\t\t  traces\n"
3695 #endif
3696 #endif /* CONFIG_STACK_TRACER */
3697         "  events/\t\t- Directory containing all trace event subsystems:\n"
3698         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
3699         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
3700         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
3701         "\t\t\t  events\n"
3702         "      filter\t\t- If set, only events passing filter are traced\n"
3703         "  events/<system>/<event>/\t- Directory containing control files for\n"
3704         "\t\t\t  <event>:\n"
3705         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
3706         "      filter\t\t- If set, only events passing filter are traced\n"
3707         "      trigger\t\t- If set, a command to perform when event is hit\n"
3708         "\t    Format: <trigger>[:count][if <filter>]\n"
3709         "\t   trigger: traceon, traceoff\n"
3710         "\t            enable_event:<system>:<event>\n"
3711         "\t            disable_event:<system>:<event>\n"
3712 #ifdef CONFIG_STACKTRACE
3713         "\t\t    stacktrace\n"
3714 #endif
3715 #ifdef CONFIG_TRACER_SNAPSHOT
3716         "\t\t    snapshot\n"
3717 #endif
3718         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
3719         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
3720         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
3721         "\t                  events/block/block_unplug/trigger\n"
3722         "\t   The first disables tracing every time block_unplug is hit.\n"
3723         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
3724         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
3725         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
3726         "\t   Like function triggers, the counter is only decremented if it\n"
3727         "\t    enabled or disabled tracing.\n"
3728         "\t   To remove a trigger without a count:\n"
3729         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
3730         "\t   To remove a trigger with a count:\n"
3731         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
3732         "\t   Filters can be ignored when removing a trigger.\n"
3733 ;
3734
3735 static ssize_t
3736 tracing_readme_read(struct file *filp, char __user *ubuf,
3737                        size_t cnt, loff_t *ppos)
3738 {
3739         return simple_read_from_buffer(ubuf, cnt, ppos,
3740                                         readme_msg, strlen(readme_msg));
3741 }
3742
3743 static const struct file_operations tracing_readme_fops = {
3744         .open           = tracing_open_generic,
3745         .read           = tracing_readme_read,
3746         .llseek         = generic_file_llseek,
3747 };
3748
3749 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
3750 {
3751         unsigned int *ptr = v;
3752
3753         if (*pos || m->count)
3754                 ptr++;
3755
3756         (*pos)++;
3757
3758         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
3759              ptr++) {
3760                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
3761                         continue;
3762
3763                 return ptr;
3764         }
3765
3766         return NULL;
3767 }
3768
3769 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
3770 {
3771         void *v;
3772         loff_t l = 0;
3773
3774         preempt_disable();
3775         arch_spin_lock(&trace_cmdline_lock);
3776
3777         v = &savedcmd->map_cmdline_to_pid[0];
3778         while (l <= *pos) {
3779                 v = saved_cmdlines_next(m, v, &l);
3780                 if (!v)
3781                         return NULL;
3782         }
3783
3784         return v;
3785 }
3786
3787 static void saved_cmdlines_stop(struct seq_file *m, void *v)
3788 {
3789         arch_spin_unlock(&trace_cmdline_lock);
3790         preempt_enable();
3791 }
3792
3793 static int saved_cmdlines_show(struct seq_file *m, void *v)
3794 {
3795         char buf[TASK_COMM_LEN];
3796         unsigned int *pid = v;
3797
3798         __trace_find_cmdline(*pid, buf);
3799         seq_printf(m, "%d %s\n", *pid, buf);
3800         return 0;
3801 }
3802
3803 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
3804         .start          = saved_cmdlines_start,
3805         .next           = saved_cmdlines_next,
3806         .stop           = saved_cmdlines_stop,
3807         .show           = saved_cmdlines_show,
3808 };
3809
3810 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
3811 {
3812         if (tracing_disabled)
3813                 return -ENODEV;
3814
3815         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
3816 }
3817
3818 static const struct file_operations tracing_saved_cmdlines_fops = {
3819         .open           = tracing_saved_cmdlines_open,
3820         .read           = seq_read,
3821         .llseek         = seq_lseek,
3822         .release        = seq_release,
3823 };
3824
3825 static ssize_t
3826 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
3827                                  size_t cnt, loff_t *ppos)
3828 {
3829         char buf[64];
3830         int r;
3831
3832         arch_spin_lock(&trace_cmdline_lock);
3833         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
3834         arch_spin_unlock(&trace_cmdline_lock);
3835
3836         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3837 }
3838
3839 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
3840 {
3841         kfree(s->saved_cmdlines);
3842         kfree(s->map_cmdline_to_pid);
3843         kfree(s);
3844 }
3845
3846 static int tracing_resize_saved_cmdlines(unsigned int val)
3847 {
3848         struct saved_cmdlines_buffer *s, *savedcmd_temp;
3849
3850         s = kmalloc(sizeof(*s), GFP_KERNEL);
3851         if (!s)
3852                 return -ENOMEM;
3853
3854         if (allocate_cmdlines_buffer(val, s) < 0) {
3855                 kfree(s);
3856                 return -ENOMEM;
3857         }
3858
3859         arch_spin_lock(&trace_cmdline_lock);
3860         savedcmd_temp = savedcmd;
3861         savedcmd = s;
3862         arch_spin_unlock(&trace_cmdline_lock);
3863         free_saved_cmdlines_buffer(savedcmd_temp);
3864
3865         return 0;
3866 }
3867
3868 static ssize_t
3869 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
3870                                   size_t cnt, loff_t *ppos)
3871 {
3872         unsigned long val;
3873         int ret;
3874
3875         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
3876         if (ret)
3877                 return ret;
3878
3879         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
3880         if (!val || val > PID_MAX_DEFAULT)
3881                 return -EINVAL;
3882
3883         ret = tracing_resize_saved_cmdlines((unsigned int)val);
3884         if (ret < 0)
3885                 return ret;
3886
3887         *ppos += cnt;
3888
3889         return cnt;
3890 }
3891
3892 static const struct file_operations tracing_saved_cmdlines_size_fops = {
3893         .open           = tracing_open_generic,
3894         .read           = tracing_saved_cmdlines_size_read,
3895         .write          = tracing_saved_cmdlines_size_write,
3896 };
3897
3898 static ssize_t
3899 tracing_set_trace_read(struct file *filp, char __user *ubuf,
3900                        size_t cnt, loff_t *ppos)
3901 {
3902         struct trace_array *tr = filp->private_data;
3903         char buf[MAX_TRACER_SIZE+2];
3904         int r;
3905
3906         mutex_lock(&trace_types_lock);
3907         r = sprintf(buf, "%s\n", tr->current_trace->name);
3908         mutex_unlock(&trace_types_lock);
3909
3910         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3911 }
3912
3913 int tracer_init(struct tracer *t, struct trace_array *tr)
3914 {
3915         tracing_reset_online_cpus(&tr->trace_buffer);
3916         return t->init(tr);
3917 }
3918
3919 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
3920 {
3921         int cpu;
3922
3923         for_each_tracing_cpu(cpu)
3924                 per_cpu_ptr(buf->data, cpu)->entries = val;
3925 }
3926
3927 #ifdef CONFIG_TRACER_MAX_TRACE
3928 /* resize @tr's buffer to the size of @size_tr's entries */
3929 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
3930                                         struct trace_buffer *size_buf, int cpu_id)
3931 {
3932         int cpu, ret = 0;
3933
3934         if (cpu_id == RING_BUFFER_ALL_CPUS) {
3935                 for_each_tracing_cpu(cpu) {
3936                         ret = ring_buffer_resize(trace_buf->buffer,
3937                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
3938                         if (ret < 0)
3939                                 break;
3940                         per_cpu_ptr(trace_buf->data, cpu)->entries =
3941                                 per_cpu_ptr(size_buf->data, cpu)->entries;
3942                 }
3943         } else {
3944                 ret = ring_buffer_resize(trace_buf->buffer,
3945                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
3946                 if (ret == 0)
3947                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
3948                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
3949         }
3950
3951         return ret;
3952 }
3953 #endif /* CONFIG_TRACER_MAX_TRACE */
3954
3955 static int __tracing_resize_ring_buffer(struct trace_array *tr,
3956                                         unsigned long size, int cpu)
3957 {
3958         int ret;
3959
3960         /*
3961          * If kernel or user changes the size of the ring buffer
3962          * we use the size that was given, and we can forget about
3963          * expanding it later.
3964          */
3965         ring_buffer_expanded = true;
3966
3967         /* May be called before buffers are initialized */
3968         if (!tr->trace_buffer.buffer)
3969                 return 0;
3970
3971         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
3972         if (ret < 0)
3973                 return ret;
3974
3975 #ifdef CONFIG_TRACER_MAX_TRACE
3976         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
3977             !tr->current_trace->use_max_tr)
3978                 goto out;
3979
3980         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
3981         if (ret < 0) {
3982                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
3983                                                      &tr->trace_buffer, cpu);
3984                 if (r < 0) {
3985                         /*
3986                          * AARGH! We are left with different
3987                          * size max buffer!!!!
3988                          * The max buffer is our "snapshot" buffer.
3989                          * When a tracer needs a snapshot (one of the
3990                          * latency tracers), it swaps the max buffer
3991                          * with the saved snap shot. We succeeded to
3992                          * update the size of the main buffer, but failed to
3993                          * update the size of the max buffer. But when we tried
3994                          * to reset the main buffer to the original size, we
3995                          * failed there too. This is very unlikely to
3996                          * happen, but if it does, warn and kill all
3997                          * tracing.
3998                          */
3999                         WARN_ON(1);
4000                         tracing_disabled = 1;
4001                 }
4002                 return ret;
4003         }
4004
4005         if (cpu == RING_BUFFER_ALL_CPUS)
4006                 set_buffer_entries(&tr->max_buffer, size);
4007         else
4008                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4009
4010  out:
4011 #endif /* CONFIG_TRACER_MAX_TRACE */
4012
4013         if (cpu == RING_BUFFER_ALL_CPUS)
4014                 set_buffer_entries(&tr->trace_buffer, size);
4015         else
4016                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4017
4018         return ret;
4019 }
4020
4021 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4022                                           unsigned long size, int cpu_id)
4023 {
4024         int ret = size;
4025
4026         mutex_lock(&trace_types_lock);
4027
4028         if (cpu_id != RING_BUFFER_ALL_CPUS) {
4029                 /* make sure, this cpu is enabled in the mask */
4030                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4031                         ret = -EINVAL;
4032                         goto out;
4033                 }
4034         }
4035
4036         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4037         if (ret < 0)
4038                 ret = -ENOMEM;
4039
4040 out:
4041         mutex_unlock(&trace_types_lock);
4042
4043         return ret;
4044 }
4045
4046
4047 /**
4048  * tracing_update_buffers - used by tracing facility to expand ring buffers
4049  *
4050  * To save on memory when the tracing is never used on a system with it
4051  * configured in. The ring buffers are set to a minimum size. But once
4052  * a user starts to use the tracing facility, then they need to grow
4053  * to their default size.
4054  *
4055  * This function is to be called when a tracer is about to be used.
4056  */
4057 int tracing_update_buffers(void)
4058 {
4059         int ret = 0;
4060
4061         mutex_lock(&trace_types_lock);
4062         if (!ring_buffer_expanded)
4063                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4064                                                 RING_BUFFER_ALL_CPUS);
4065         mutex_unlock(&trace_types_lock);
4066
4067         return ret;
4068 }
4069
4070 struct trace_option_dentry;
4071
4072 static struct trace_option_dentry *
4073 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4074
4075 static void
4076 destroy_trace_option_files(struct trace_option_dentry *topts);
4077
4078 /*
4079  * Used to clear out the tracer before deletion of an instance.
4080  * Must have trace_types_lock held.
4081  */
4082 static void tracing_set_nop(struct trace_array *tr)
4083 {
4084         if (tr->current_trace == &nop_trace)
4085                 return;
4086         
4087         tr->current_trace->enabled--;
4088
4089         if (tr->current_trace->reset)
4090                 tr->current_trace->reset(tr);
4091
4092         tr->current_trace = &nop_trace;
4093 }
4094
4095 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
4096 {
4097         static struct trace_option_dentry *topts;
4098         struct tracer *t;
4099 #ifdef CONFIG_TRACER_MAX_TRACE
4100         bool had_max_tr;
4101 #endif
4102         int ret = 0;
4103
4104         mutex_lock(&trace_types_lock);
4105
4106         if (!ring_buffer_expanded) {
4107                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
4108                                                 RING_BUFFER_ALL_CPUS);
4109                 if (ret < 0)
4110                         goto out;
4111                 ret = 0;
4112         }
4113
4114         for (t = trace_types; t; t = t->next) {
4115                 if (strcmp(t->name, buf) == 0)
4116                         break;
4117         }
4118         if (!t) {
4119                 ret = -EINVAL;
4120                 goto out;
4121         }
4122         if (t == tr->current_trace)
4123                 goto out;
4124
4125         /* Some tracers are only allowed for the top level buffer */
4126         if (!trace_ok_for_array(t, tr)) {
4127                 ret = -EINVAL;
4128                 goto out;
4129         }
4130
4131         trace_branch_disable();
4132
4133         tr->current_trace->enabled--;
4134
4135         if (tr->current_trace->reset)
4136                 tr->current_trace->reset(tr);
4137
4138         /* Current trace needs to be nop_trace before synchronize_sched */
4139         tr->current_trace = &nop_trace;
4140
4141 #ifdef CONFIG_TRACER_MAX_TRACE
4142         had_max_tr = tr->allocated_snapshot;
4143
4144         if (had_max_tr && !t->use_max_tr) {
4145                 /*
4146                  * We need to make sure that the update_max_tr sees that
4147                  * current_trace changed to nop_trace to keep it from
4148                  * swapping the buffers after we resize it.
4149                  * The update_max_tr is called from interrupts disabled
4150                  * so a synchronized_sched() is sufficient.
4151                  */
4152                 synchronize_sched();
4153                 free_snapshot(tr);
4154         }
4155 #endif
4156         /* Currently, only the top instance has options */
4157         if (tr->flags & TRACE_ARRAY_FL_GLOBAL) {
4158                 destroy_trace_option_files(topts);
4159                 topts = create_trace_option_files(tr, t);
4160         }
4161
4162 #ifdef CONFIG_TRACER_MAX_TRACE
4163         if (t->use_max_tr && !had_max_tr) {
4164                 ret = alloc_snapshot(tr);
4165                 if (ret < 0)
4166                         goto out;
4167         }
4168 #endif
4169
4170         if (t->init) {
4171                 ret = tracer_init(t, tr);
4172                 if (ret)
4173                         goto out;
4174         }
4175
4176         tr->current_trace = t;
4177         tr->current_trace->enabled++;
4178         trace_branch_enable(tr);
4179  out:
4180         mutex_unlock(&trace_types_lock);
4181
4182         return ret;
4183 }
4184
4185 static ssize_t
4186 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
4187                         size_t cnt, loff_t *ppos)
4188 {
4189         struct trace_array *tr = filp->private_data;
4190         char buf[MAX_TRACER_SIZE+1];
4191         int i;
4192         size_t ret;
4193         int err;
4194
4195         ret = cnt;
4196
4197         if (cnt > MAX_TRACER_SIZE)
4198                 cnt = MAX_TRACER_SIZE;
4199
4200         if (copy_from_user(&buf, ubuf, cnt))
4201                 return -EFAULT;
4202
4203         buf[cnt] = 0;
4204
4205         /* strip ending whitespace. */
4206         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4207                 buf[i] = 0;
4208
4209         err = tracing_set_tracer(tr, buf);
4210         if (err)
4211                 return err;
4212
4213         *ppos += ret;
4214
4215         return ret;
4216 }
4217
4218 static ssize_t
4219 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
4220                    size_t cnt, loff_t *ppos)
4221 {
4222         char buf[64];
4223         int r;
4224
4225         r = snprintf(buf, sizeof(buf), "%ld\n",
4226                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4227         if (r > sizeof(buf))
4228                 r = sizeof(buf);
4229         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4230 }
4231
4232 static ssize_t
4233 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
4234                     size_t cnt, loff_t *ppos)
4235 {
4236         unsigned long val;
4237         int ret;
4238
4239         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4240         if (ret)
4241                 return ret;
4242
4243         *ptr = val * 1000;
4244
4245         return cnt;
4246 }
4247
4248 static ssize_t
4249 tracing_thresh_read(struct file *filp, char __user *ubuf,
4250                     size_t cnt, loff_t *ppos)
4251 {
4252         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
4253 }
4254
4255 static ssize_t
4256 tracing_thresh_write(struct file *filp, const char __user *ubuf,
4257                      size_t cnt, loff_t *ppos)
4258 {
4259         struct trace_array *tr = filp->private_data;
4260         int ret;
4261
4262         mutex_lock(&trace_types_lock);
4263         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
4264         if (ret < 0)
4265                 goto out;
4266
4267         if (tr->current_trace->update_thresh) {
4268                 ret = tr->current_trace->update_thresh(tr);
4269                 if (ret < 0)
4270                         goto out;
4271         }
4272
4273         ret = cnt;
4274 out:
4275         mutex_unlock(&trace_types_lock);
4276
4277         return ret;
4278 }
4279
4280 static ssize_t
4281 tracing_max_lat_read(struct file *filp, char __user *ubuf,
4282                      size_t cnt, loff_t *ppos)
4283 {
4284         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
4285 }
4286
4287 static ssize_t
4288 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4289                       size_t cnt, loff_t *ppos)
4290 {
4291         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
4292 }
4293
4294 static int tracing_open_pipe(struct inode *inode, struct file *filp)
4295 {
4296         struct trace_array *tr = inode->i_private;
4297         struct trace_iterator *iter;
4298         int ret = 0;
4299
4300         if (tracing_disabled)
4301                 return -ENODEV;
4302
4303         if (trace_array_get(tr) < 0)
4304                 return -ENODEV;
4305
4306         mutex_lock(&trace_types_lock);
4307
4308         /* create a buffer to store the information to pass to userspace */
4309         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4310         if (!iter) {
4311                 ret = -ENOMEM;
4312                 __trace_array_put(tr);
4313                 goto out;
4314         }
4315
4316         /*
4317          * We make a copy of the current tracer to avoid concurrent
4318          * changes on it while we are reading.
4319          */
4320         iter->trace = kmalloc(sizeof(*iter->trace), GFP_KERNEL);
4321         if (!iter->trace) {
4322                 ret = -ENOMEM;
4323                 goto fail;
4324         }
4325         *iter->trace = *tr->current_trace;
4326
4327         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
4328                 ret = -ENOMEM;
4329                 goto fail;
4330         }
4331
4332         /* trace pipe does not show start of buffer */
4333         cpumask_setall(iter->started);
4334
4335         if (trace_flags & TRACE_ITER_LATENCY_FMT)
4336                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4337
4338         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4339         if (trace_clocks[tr->clock_id].in_ns)
4340                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4341
4342         iter->tr = tr;
4343         iter->trace_buffer = &tr->trace_buffer;
4344         iter->cpu_file = tracing_get_cpu(inode);
4345         mutex_init(&iter->mutex);
4346         filp->private_data = iter;
4347
4348         if (iter->trace->pipe_open)
4349                 iter->trace->pipe_open(iter);
4350
4351         nonseekable_open(inode, filp);
4352 out:
4353         mutex_unlock(&trace_types_lock);
4354         return ret;
4355
4356 fail:
4357         kfree(iter->trace);
4358         kfree(iter);
4359         __trace_array_put(tr);
4360         mutex_unlock(&trace_types_lock);
4361         return ret;
4362 }
4363
4364 static int tracing_release_pipe(struct inode *inode, struct file *file)
4365 {
4366         struct trace_iterator *iter = file->private_data;
4367         struct trace_array *tr = inode->i_private;
4368
4369         mutex_lock(&trace_types_lock);
4370
4371         if (iter->trace->pipe_close)
4372                 iter->trace->pipe_close(iter);
4373
4374         mutex_unlock(&trace_types_lock);
4375
4376         free_cpumask_var(iter->started);
4377         mutex_destroy(&iter->mutex);
4378         kfree(iter->trace);
4379         kfree(iter);
4380
4381         trace_array_put(tr);
4382
4383         return 0;
4384 }
4385
4386 static unsigned int
4387 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
4388 {
4389         /* Iterators are static, they should be filled or empty */
4390         if (trace_buffer_iter(iter, iter->cpu_file))
4391                 return POLLIN | POLLRDNORM;
4392
4393         if (trace_flags & TRACE_ITER_BLOCK)
4394                 /*
4395                  * Always select as readable when in blocking mode
4396                  */
4397                 return POLLIN | POLLRDNORM;
4398         else
4399                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
4400                                              filp, poll_table);
4401 }
4402
4403 static unsigned int
4404 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
4405 {
4406         struct trace_iterator *iter = filp->private_data;
4407
4408         return trace_poll(iter, filp, poll_table);
4409 }
4410
4411 /* Must be called with trace_types_lock mutex held. */
4412 static int tracing_wait_pipe(struct file *filp)
4413 {
4414         struct trace_iterator *iter = filp->private_data;
4415         int ret;
4416
4417         while (trace_empty(iter)) {
4418
4419                 if ((filp->f_flags & O_NONBLOCK)) {
4420                         return -EAGAIN;
4421                 }
4422
4423                 /*
4424                  * We block until we read something and tracing is disabled.
4425                  * We still block if tracing is disabled, but we have never
4426                  * read anything. This allows a user to cat this file, and
4427                  * then enable tracing. But after we have read something,
4428                  * we give an EOF when tracing is again disabled.
4429                  *
4430                  * iter->pos will be 0 if we haven't read anything.
4431                  */
4432                 if (!tracing_is_on() && iter->pos)
4433                         break;
4434
4435                 mutex_unlock(&iter->mutex);
4436
4437                 ret = wait_on_pipe(iter);
4438
4439                 mutex_lock(&iter->mutex);
4440
4441                 if (ret)
4442                         return ret;
4443
4444                 if (signal_pending(current))
4445                         return -EINTR;
4446         }
4447
4448         return 1;
4449 }
4450
4451 /*
4452  * Consumer reader.
4453  */
4454 static ssize_t
4455 tracing_read_pipe(struct file *filp, char __user *ubuf,
4456                   size_t cnt, loff_t *ppos)
4457 {
4458         struct trace_iterator *iter = filp->private_data;
4459         struct trace_array *tr = iter->tr;
4460         ssize_t sret;
4461
4462         /* return any leftover data */
4463         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4464         if (sret != -EBUSY)
4465                 return sret;
4466
4467         trace_seq_init(&iter->seq);
4468
4469         /* copy the tracer to avoid using a global lock all around */
4470         mutex_lock(&trace_types_lock);
4471         if (unlikely(iter->trace->name != tr->current_trace->name))
4472                 *iter->trace = *tr->current_trace;
4473         mutex_unlock(&trace_types_lock);
4474
4475         /*
4476          * Avoid more than one consumer on a single file descriptor
4477          * This is just a matter of traces coherency, the ring buffer itself
4478          * is protected.
4479          */
4480         mutex_lock(&iter->mutex);
4481         if (iter->trace->read) {
4482                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
4483                 if (sret)
4484                         goto out;
4485         }
4486
4487 waitagain:
4488         sret = tracing_wait_pipe(filp);
4489         if (sret <= 0)
4490                 goto out;
4491
4492         /* stop when tracing is finished */
4493         if (trace_empty(iter)) {
4494                 sret = 0;
4495                 goto out;
4496         }
4497
4498         if (cnt >= PAGE_SIZE)
4499                 cnt = PAGE_SIZE - 1;
4500
4501         /* reset all but tr, trace, and overruns */
4502         memset(&iter->seq, 0,
4503                sizeof(struct trace_iterator) -
4504                offsetof(struct trace_iterator, seq));
4505         cpumask_clear(iter->started);
4506         iter->pos = -1;
4507
4508         trace_event_read_lock();
4509         trace_access_lock(iter->cpu_file);
4510         while (trace_find_next_entry_inc(iter) != NULL) {
4511                 enum print_line_t ret;
4512                 int len = iter->seq.len;
4513
4514                 ret = print_trace_line(iter);
4515                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4516                         /* don't print partial lines */
4517                         iter->seq.len = len;
4518                         break;
4519                 }
4520                 if (ret != TRACE_TYPE_NO_CONSUME)
4521                         trace_consume(iter);
4522
4523                 if (iter->seq.len >= cnt)
4524                         break;
4525
4526                 /*
4527                  * Setting the full flag means we reached the trace_seq buffer
4528                  * size and we should leave by partial output condition above.
4529                  * One of the trace_seq_* functions is not used properly.
4530                  */
4531                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
4532                           iter->ent->type);
4533         }
4534         trace_access_unlock(iter->cpu_file);
4535         trace_event_read_unlock();
4536
4537         /* Now copy what we have to the user */
4538         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4539         if (iter->seq.readpos >= iter->seq.len)
4540                 trace_seq_init(&iter->seq);
4541
4542         /*
4543          * If there was nothing to send to user, in spite of consuming trace
4544          * entries, go back to wait for more entries.
4545          */
4546         if (sret == -EBUSY)
4547                 goto waitagain;
4548
4549 out:
4550         mutex_unlock(&iter->mutex);
4551
4552         return sret;
4553 }
4554
4555 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
4556                                      unsigned int idx)
4557 {
4558         __free_page(spd->pages[idx]);
4559 }
4560
4561 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
4562         .can_merge              = 0,
4563         .confirm                = generic_pipe_buf_confirm,
4564         .release                = generic_pipe_buf_release,
4565         .steal                  = generic_pipe_buf_steal,
4566         .get                    = generic_pipe_buf_get,
4567 };
4568
4569 static size_t
4570 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
4571 {
4572         size_t count;
4573         int ret;
4574
4575         /* Seq buffer is page-sized, exactly what we need. */
4576         for (;;) {
4577                 count = iter->seq.len;
4578                 ret = print_trace_line(iter);
4579                 count = iter->seq.len - count;
4580                 if (rem < count) {
4581                         rem = 0;
4582                         iter->seq.len -= count;
4583                         break;
4584                 }
4585                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4586                         iter->seq.len -= count;
4587                         break;
4588                 }
4589
4590                 if (ret != TRACE_TYPE_NO_CONSUME)
4591                         trace_consume(iter);
4592                 rem -= count;
4593                 if (!trace_find_next_entry_inc(iter))   {
4594                         rem = 0;
4595                         iter->ent = NULL;
4596                         break;
4597                 }
4598         }
4599
4600         return rem;
4601 }
4602
4603 static ssize_t tracing_splice_read_pipe(struct file *filp,
4604                                         loff_t *ppos,
4605                                         struct pipe_inode_info *pipe,
4606                                         size_t len,
4607                                         unsigned int flags)
4608 {
4609         struct page *pages_def[PIPE_DEF_BUFFERS];
4610         struct partial_page partial_def[PIPE_DEF_BUFFERS];
4611         struct trace_iterator *iter = filp->private_data;
4612         struct splice_pipe_desc spd = {
4613                 .pages          = pages_def,
4614                 .partial        = partial_def,
4615                 .nr_pages       = 0, /* This gets updated below. */
4616                 .nr_pages_max   = PIPE_DEF_BUFFERS,
4617                 .flags          = flags,
4618                 .ops            = &tracing_pipe_buf_ops,
4619                 .spd_release    = tracing_spd_release_pipe,
4620         };
4621         struct trace_array *tr = iter->tr;
4622         ssize_t ret;
4623         size_t rem;
4624         unsigned int i;
4625
4626         if (splice_grow_spd(pipe, &spd))
4627                 return -ENOMEM;
4628
4629         /* copy the tracer to avoid using a global lock all around */
4630         mutex_lock(&trace_types_lock);
4631         if (unlikely(iter->trace->name != tr->current_trace->name))
4632                 *iter->trace = *tr->current_trace;
4633         mutex_unlock(&trace_types_lock);
4634
4635         mutex_lock(&iter->mutex);
4636
4637         if (iter->trace->splice_read) {
4638                 ret = iter->trace->splice_read(iter, filp,
4639                                                ppos, pipe, len, flags);
4640                 if (ret)
4641                         goto out_err;
4642         }
4643
4644         ret = tracing_wait_pipe(filp);
4645         if (ret <= 0)
4646                 goto out_err;
4647
4648         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
4649                 ret = -EFAULT;
4650                 goto out_err;
4651         }
4652
4653         trace_event_read_lock();
4654         trace_access_lock(iter->cpu_file);
4655
4656         /* Fill as many pages as possible. */
4657         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
4658                 spd.pages[i] = alloc_page(GFP_KERNEL);
4659                 if (!spd.pages[i])
4660                         break;
4661
4662                 rem = tracing_fill_pipe_page(rem, iter);
4663
4664                 /* Copy the data into the page, so we can start over. */
4665                 ret = trace_seq_to_buffer(&iter->seq,
4666                                           page_address(spd.pages[i]),
4667                                           iter->seq.len);
4668                 if (ret < 0) {
4669                         __free_page(spd.pages[i]);
4670                         break;
4671                 }
4672                 spd.partial[i].offset = 0;
4673                 spd.partial[i].len = iter->seq.len;
4674
4675                 trace_seq_init(&iter->seq);
4676         }
4677
4678         trace_access_unlock(iter->cpu_file);
4679         trace_event_read_unlock();
4680         mutex_unlock(&iter->mutex);
4681
4682         spd.nr_pages = i;
4683
4684         ret = splice_to_pipe(pipe, &spd);
4685 out:
4686         splice_shrink_spd(&spd);
4687         return ret;
4688
4689 out_err:
4690         mutex_unlock(&iter->mutex);
4691         goto out;
4692 }
4693
4694 static ssize_t
4695 tracing_entries_read(struct file *filp, char __user *ubuf,
4696                      size_t cnt, loff_t *ppos)
4697 {
4698         struct inode *inode = file_inode(filp);
4699         struct trace_array *tr = inode->i_private;
4700         int cpu = tracing_get_cpu(inode);
4701         char buf[64];
4702         int r = 0;
4703         ssize_t ret;
4704
4705         mutex_lock(&trace_types_lock);
4706
4707         if (cpu == RING_BUFFER_ALL_CPUS) {
4708                 int cpu, buf_size_same;
4709                 unsigned long size;
4710
4711                 size = 0;
4712                 buf_size_same = 1;
4713                 /* check if all cpu sizes are same */
4714                 for_each_tracing_cpu(cpu) {
4715                         /* fill in the size from first enabled cpu */
4716                         if (size == 0)
4717                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
4718                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
4719                                 buf_size_same = 0;
4720                                 break;
4721                         }
4722                 }
4723
4724                 if (buf_size_same) {
4725                         if (!ring_buffer_expanded)
4726                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
4727                                             size >> 10,
4728                                             trace_buf_size >> 10);
4729                         else
4730                                 r = sprintf(buf, "%lu\n", size >> 10);
4731                 } else
4732                         r = sprintf(buf, "X\n");
4733         } else
4734                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
4735
4736         mutex_unlock(&trace_types_lock);
4737
4738         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4739         return ret;
4740 }
4741
4742 static ssize_t
4743 tracing_entries_write(struct file *filp, const char __user *ubuf,
4744                       size_t cnt, loff_t *ppos)
4745 {
4746         struct inode *inode = file_inode(filp);
4747         struct trace_array *tr = inode->i_private;
4748         unsigned long val;
4749         int ret;
4750
4751         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4752         if (ret)
4753                 return ret;
4754
4755         /* must have at least 1 entry */
4756         if (!val)
4757                 return -EINVAL;
4758
4759         /* value is in KB */
4760         val <<= 10;
4761         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
4762         if (ret < 0)
4763                 return ret;
4764
4765         *ppos += cnt;
4766
4767         return cnt;
4768 }
4769
4770 static ssize_t
4771 tracing_total_entries_read(struct file *filp, char __user *ubuf,
4772                                 size_t cnt, loff_t *ppos)
4773 {
4774         struct trace_array *tr = filp->private_data;
4775         char buf[64];
4776         int r, cpu;
4777         unsigned long size = 0, expanded_size = 0;
4778
4779         mutex_lock(&trace_types_lock);
4780         for_each_tracing_cpu(cpu) {
4781                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
4782                 if (!ring_buffer_expanded)
4783                         expanded_size += trace_buf_size >> 10;
4784         }
4785         if (ring_buffer_expanded)
4786                 r = sprintf(buf, "%lu\n", size);
4787         else
4788                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
4789         mutex_unlock(&trace_types_lock);
4790
4791         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4792 }
4793
4794 static ssize_t
4795 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
4796                           size_t cnt, loff_t *ppos)
4797 {
4798         /*
4799          * There is no need to read what the user has written, this function
4800          * is just to make sure that there is no error when "echo" is used
4801          */
4802
4803         *ppos += cnt;
4804
4805         return cnt;
4806 }
4807
4808 static int
4809 tracing_free_buffer_release(struct inode *inode, struct file *filp)
4810 {
4811         struct trace_array *tr = inode->i_private;
4812
4813         /* disable tracing ? */
4814         if (trace_flags & TRACE_ITER_STOP_ON_FREE)
4815                 tracer_tracing_off(tr);
4816         /* resize the ring buffer to 0 */
4817         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
4818
4819         trace_array_put(tr);
4820
4821         return 0;
4822 }
4823
4824 static ssize_t
4825 tracing_mark_write(struct file *filp, const char __user *ubuf,
4826                                         size_t cnt, loff_t *fpos)
4827 {
4828         unsigned long addr = (unsigned long)ubuf;
4829         struct trace_array *tr = filp->private_data;
4830         struct ring_buffer_event *event;
4831         struct ring_buffer *buffer;
4832         struct print_entry *entry;
4833         unsigned long irq_flags;
4834         struct page *pages[2];
4835         void *map_page[2];
4836         int nr_pages = 1;
4837         ssize_t written;
4838         int offset;
4839         int size;
4840         int len;
4841         int ret;
4842         int i;
4843
4844         if (tracing_disabled)
4845                 return -EINVAL;
4846
4847         if (!(trace_flags & TRACE_ITER_MARKERS))
4848                 return -EINVAL;
4849
4850         if (cnt > TRACE_BUF_SIZE)
4851                 cnt = TRACE_BUF_SIZE;
4852
4853         /*
4854          * Userspace is injecting traces into the kernel trace buffer.
4855          * We want to be as non intrusive as possible.
4856          * To do so, we do not want to allocate any special buffers
4857          * or take any locks, but instead write the userspace data
4858          * straight into the ring buffer.
4859          *
4860          * First we need to pin the userspace buffer into memory,
4861          * which, most likely it is, because it just referenced it.
4862          * But there's no guarantee that it is. By using get_user_pages_fast()
4863          * and kmap_atomic/kunmap_atomic() we can get access to the
4864          * pages directly. We then write the data directly into the
4865          * ring buffer.
4866          */
4867         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
4868
4869         /* check if we cross pages */
4870         if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
4871                 nr_pages = 2;
4872
4873         offset = addr & (PAGE_SIZE - 1);
4874         addr &= PAGE_MASK;
4875
4876         ret = get_user_pages_fast(addr, nr_pages, 0, pages);
4877         if (ret < nr_pages) {
4878                 while (--ret >= 0)
4879                         put_page(pages[ret]);
4880                 written = -EFAULT;
4881                 goto out;
4882         }
4883
4884         for (i = 0; i < nr_pages; i++)
4885                 map_page[i] = kmap_atomic(pages[i]);
4886
4887         local_save_flags(irq_flags);
4888         size = sizeof(*entry) + cnt + 2; /* possible \n added */
4889         buffer = tr->trace_buffer.buffer;
4890         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
4891                                           irq_flags, preempt_count());
4892         if (!event) {
4893                 /* Ring buffer disabled, return as if not open for write */
4894                 written = -EBADF;
4895                 goto out_unlock;
4896         }
4897
4898         entry = ring_buffer_event_data(event);
4899         entry->ip = _THIS_IP_;
4900
4901         if (nr_pages == 2) {
4902                 len = PAGE_SIZE - offset;
4903                 memcpy(&entry->buf, map_page[0] + offset, len);
4904                 memcpy(&entry->buf[len], map_page[1], cnt - len);
4905         } else
4906                 memcpy(&entry->buf, map_page[0] + offset, cnt);
4907
4908         if (entry->buf[cnt - 1] != '\n') {
4909                 entry->buf[cnt] = '\n';
4910                 entry->buf[cnt + 1] = '\0';
4911         } else
4912                 entry->buf[cnt] = '\0';
4913
4914         __buffer_unlock_commit(buffer, event);
4915
4916         written = cnt;
4917
4918         *fpos += written;
4919
4920  out_unlock:
4921         for (i = 0; i < nr_pages; i++){
4922                 kunmap_atomic(map_page[i]);
4923                 put_page(pages[i]);
4924         }
4925  out:
4926         return written;
4927 }
4928
4929 static int tracing_clock_show(struct seq_file *m, void *v)
4930 {
4931         struct trace_array *tr = m->private;
4932         int i;
4933
4934         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
4935                 seq_printf(m,
4936                         "%s%s%s%s", i ? " " : "",
4937                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
4938                         i == tr->clock_id ? "]" : "");
4939         seq_putc(m, '\n');
4940
4941         return 0;
4942 }
4943
4944 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
4945 {
4946         int i;
4947
4948         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
4949                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
4950                         break;
4951         }
4952         if (i == ARRAY_SIZE(trace_clocks))
4953                 return -EINVAL;
4954
4955         mutex_lock(&trace_types_lock);
4956
4957         tr->clock_id = i;
4958
4959         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
4960
4961         /*
4962          * New clock may not be consistent with the previous clock.
4963          * Reset the buffer so that it doesn't have incomparable timestamps.
4964          */
4965         tracing_reset_online_cpus(&tr->trace_buffer);
4966
4967 #ifdef CONFIG_TRACER_MAX_TRACE
4968         if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
4969                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
4970         tracing_reset_online_cpus(&tr->max_buffer);
4971 #endif
4972
4973         mutex_unlock(&trace_types_lock);
4974
4975         return 0;
4976 }
4977
4978 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
4979                                    size_t cnt, loff_t *fpos)
4980 {
4981         struct seq_file *m = filp->private_data;
4982         struct trace_array *tr = m->private;
4983         char buf[64];
4984         const char *clockstr;
4985         int ret;
4986
4987         if (cnt >= sizeof(buf))
4988                 return -EINVAL;
4989
4990         if (copy_from_user(&buf, ubuf, cnt))
4991                 return -EFAULT;
4992
4993         buf[cnt] = 0;
4994
4995         clockstr = strstrip(buf);
4996
4997         ret = tracing_set_clock(tr, clockstr);
4998         if (ret)
4999                 return ret;
5000
5001         *fpos += cnt;
5002
5003         return cnt;
5004 }
5005
5006 static int tracing_clock_open(struct inode *inode, struct file *file)
5007 {
5008         struct trace_array *tr = inode->i_private;
5009         int ret;
5010
5011         if (tracing_disabled)
5012                 return -ENODEV;
5013
5014         if (trace_array_get(tr))
5015                 return -ENODEV;
5016
5017         ret = single_open(file, tracing_clock_show, inode->i_private);
5018         if (ret < 0)
5019                 trace_array_put(tr);
5020
5021         return ret;
5022 }
5023
5024 struct ftrace_buffer_info {
5025         struct trace_iterator   iter;
5026         void                    *spare;
5027         unsigned int            read;
5028 };
5029
5030 #ifdef CONFIG_TRACER_SNAPSHOT
5031 static int tracing_snapshot_open(struct inode *inode, struct file *file)
5032 {
5033         struct trace_array *tr = inode->i_private;
5034         struct trace_iterator *iter;
5035         struct seq_file *m;
5036         int ret = 0;
5037
5038         if (trace_array_get(tr) < 0)
5039                 return -ENODEV;
5040
5041         if (file->f_mode & FMODE_READ) {
5042                 iter = __tracing_open(inode, file, true);
5043                 if (IS_ERR(iter))
5044                         ret = PTR_ERR(iter);
5045         } else {
5046                 /* Writes still need the seq_file to hold the private data */
5047                 ret = -ENOMEM;
5048                 m = kzalloc(sizeof(*m), GFP_KERNEL);
5049                 if (!m)
5050                         goto out;
5051                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5052                 if (!iter) {
5053                         kfree(m);
5054                         goto out;
5055                 }
5056                 ret = 0;
5057
5058                 iter->tr = tr;
5059                 iter->trace_buffer = &tr->max_buffer;
5060                 iter->cpu_file = tracing_get_cpu(inode);
5061                 m->private = iter;
5062                 file->private_data = m;
5063         }
5064 out:
5065         if (ret < 0)
5066                 trace_array_put(tr);
5067
5068         return ret;
5069 }
5070
5071 static ssize_t
5072 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
5073                        loff_t *ppos)
5074 {
5075         struct seq_file *m = filp->private_data;
5076         struct trace_iterator *iter = m->private;
5077         struct trace_array *tr = iter->tr;
5078         unsigned long val;
5079         int ret;
5080
5081         ret = tracing_update_buffers();
5082         if (ret < 0)
5083                 return ret;
5084
5085         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5086         if (ret)
5087                 return ret;
5088
5089         mutex_lock(&trace_types_lock);
5090
5091         if (tr->current_trace->use_max_tr) {
5092                 ret = -EBUSY;
5093                 goto out;
5094         }
5095
5096         switch (val) {
5097         case 0:
5098                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5099                         ret = -EINVAL;
5100                         break;
5101                 }
5102                 if (tr->allocated_snapshot)
5103                         free_snapshot(tr);
5104                 break;
5105         case 1:
5106 /* Only allow per-cpu swap if the ring buffer supports it */
5107 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
5108                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5109                         ret = -EINVAL;
5110                         break;
5111                 }
5112 #endif
5113                 if (!tr->allocated_snapshot) {
5114                         ret = alloc_snapshot(tr);
5115                         if (ret < 0)
5116                                 break;
5117                 }
5118                 local_irq_disable();
5119                 /* Now, we're going to swap */
5120                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5121                         update_max_tr(tr, current, smp_processor_id());
5122                 else
5123                         update_max_tr_single(tr, current, iter->cpu_file);
5124                 local_irq_enable();
5125                 break;
5126         default:
5127                 if (tr->allocated_snapshot) {
5128                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5129                                 tracing_reset_online_cpus(&tr->max_buffer);
5130                         else
5131                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
5132                 }
5133                 break;
5134         }
5135
5136         if (ret >= 0) {
5137                 *ppos += cnt;
5138                 ret = cnt;
5139         }
5140 out:
5141         mutex_unlock(&trace_types_lock);
5142         return ret;
5143 }
5144
5145 static int tracing_snapshot_release(struct inode *inode, struct file *file)
5146 {
5147         struct seq_file *m = file->private_data;
5148         int ret;
5149
5150         ret = tracing_release(inode, file);
5151
5152         if (file->f_mode & FMODE_READ)
5153                 return ret;
5154
5155         /* If write only, the seq_file is just a stub */
5156         if (m)
5157                 kfree(m->private);
5158         kfree(m);
5159
5160         return 0;
5161 }
5162
5163 static int tracing_buffers_open(struct inode *inode, struct file *filp);
5164 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
5165                                     size_t count, loff_t *ppos);
5166 static int tracing_buffers_release(struct inode *inode, struct file *file);
5167 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5168                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
5169
5170 static int snapshot_raw_open(struct inode *inode, struct file *filp)
5171 {
5172         struct ftrace_buffer_info *info;
5173         int ret;
5174
5175         ret = tracing_buffers_open(inode, filp);
5176         if (ret < 0)
5177                 return ret;
5178
5179         info = filp->private_data;
5180
5181         if (info->iter.trace->use_max_tr) {
5182                 tracing_buffers_release(inode, filp);
5183                 return -EBUSY;
5184         }
5185
5186         info->iter.snapshot = true;
5187         info->iter.trace_buffer = &info->iter.tr->max_buffer;
5188
5189         return ret;
5190 }
5191
5192 #endif /* CONFIG_TRACER_SNAPSHOT */
5193
5194
5195 static const struct file_operations tracing_thresh_fops = {
5196         .open           = tracing_open_generic,
5197         .read           = tracing_thresh_read,
5198         .write          = tracing_thresh_write,
5199         .llseek         = generic_file_llseek,
5200 };
5201
5202 static const struct file_operations tracing_max_lat_fops = {
5203         .open           = tracing_open_generic,
5204         .read           = tracing_max_lat_read,
5205         .write          = tracing_max_lat_write,
5206         .llseek         = generic_file_llseek,
5207 };
5208
5209 static const struct file_operations set_tracer_fops = {
5210         .open           = tracing_open_generic,
5211         .read           = tracing_set_trace_read,
5212         .write          = tracing_set_trace_write,
5213         .llseek         = generic_file_llseek,
5214 };
5215
5216 static const struct file_operations tracing_pipe_fops = {
5217         .open           = tracing_open_pipe,
5218         .poll           = tracing_poll_pipe,
5219         .read           = tracing_read_pipe,
5220         .splice_read    = tracing_splice_read_pipe,
5221         .release        = tracing_release_pipe,
5222         .llseek         = no_llseek,
5223 };
5224
5225 static const struct file_operations tracing_entries_fops = {
5226         .open           = tracing_open_generic_tr,
5227         .read           = tracing_entries_read,
5228         .write          = tracing_entries_write,
5229         .llseek         = generic_file_llseek,
5230         .release        = tracing_release_generic_tr,
5231 };
5232
5233 static const struct file_operations tracing_total_entries_fops = {
5234         .open           = tracing_open_generic_tr,
5235         .read           = tracing_total_entries_read,
5236         .llseek         = generic_file_llseek,
5237         .release        = tracing_release_generic_tr,
5238 };
5239
5240 static const struct file_operations tracing_free_buffer_fops = {
5241         .open           = tracing_open_generic_tr,
5242         .write          = tracing_free_buffer_write,
5243         .release        = tracing_free_buffer_release,
5244 };
5245
5246 static const struct file_operations tracing_mark_fops = {
5247         .open           = tracing_open_generic_tr,
5248         .write          = tracing_mark_write,
5249         .llseek         = generic_file_llseek,
5250         .release        = tracing_release_generic_tr,
5251 };
5252
5253 static const struct file_operations trace_clock_fops = {
5254         .open           = tracing_clock_open,
5255         .read           = seq_read,
5256         .llseek         = seq_lseek,
5257         .release        = tracing_single_release_tr,
5258         .write          = tracing_clock_write,
5259 };
5260
5261 #ifdef CONFIG_TRACER_SNAPSHOT
5262 static const struct file_operations snapshot_fops = {
5263         .open           = tracing_snapshot_open,
5264         .read           = seq_read,
5265         .write          = tracing_snapshot_write,
5266         .llseek         = tracing_lseek,
5267         .release        = tracing_snapshot_release,
5268 };
5269
5270 static const struct file_operations snapshot_raw_fops = {
5271         .open           = snapshot_raw_open,
5272         .read           = tracing_buffers_read,
5273         .release        = tracing_buffers_release,
5274         .splice_read    = tracing_buffers_splice_read,
5275         .llseek         = no_llseek,
5276 };
5277
5278 #endif /* CONFIG_TRACER_SNAPSHOT */
5279
5280 static int tracing_buffers_open(struct inode *inode, struct file *filp)
5281 {
5282         struct trace_array *tr = inode->i_private;
5283         struct ftrace_buffer_info *info;
5284         int ret;
5285
5286         if (tracing_disabled)
5287                 return -ENODEV;
5288
5289         if (trace_array_get(tr) < 0)
5290                 return -ENODEV;
5291
5292         info = kzalloc(sizeof(*info), GFP_KERNEL);
5293         if (!info) {
5294                 trace_array_put(tr);
5295                 return -ENOMEM;
5296         }
5297
5298         mutex_lock(&trace_types_lock);
5299
5300         info->iter.tr           = tr;
5301         info->iter.cpu_file     = tracing_get_cpu(inode);
5302         info->iter.trace        = tr->current_trace;
5303         info->iter.trace_buffer = &tr->trace_buffer;
5304         info->spare             = NULL;
5305         /* Force reading ring buffer for first read */
5306         info->read              = (unsigned int)-1;
5307
5308         filp->private_data = info;
5309
5310         mutex_unlock(&trace_types_lock);
5311
5312         ret = nonseekable_open(inode, filp);
5313         if (ret < 0)
5314                 trace_array_put(tr);
5315
5316         return ret;
5317 }
5318
5319 static unsigned int
5320 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
5321 {
5322         struct ftrace_buffer_info *info = filp->private_data;
5323         struct trace_iterator *iter = &info->iter;
5324
5325         return trace_poll(iter, filp, poll_table);
5326 }
5327
5328 static ssize_t
5329 tracing_buffers_read(struct file *filp, char __user *ubuf,
5330                      size_t count, loff_t *ppos)
5331 {
5332         struct ftrace_buffer_info *info = filp->private_data;
5333         struct trace_iterator *iter = &info->iter;
5334         ssize_t ret;
5335         ssize_t size;
5336
5337         if (!count)
5338                 return 0;
5339
5340         mutex_lock(&trace_types_lock);
5341
5342 #ifdef CONFIG_TRACER_MAX_TRACE
5343         if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5344                 size = -EBUSY;
5345                 goto out_unlock;
5346         }
5347 #endif
5348
5349         if (!info->spare)
5350                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
5351                                                           iter->cpu_file);
5352         size = -ENOMEM;
5353         if (!info->spare)
5354                 goto out_unlock;
5355
5356         /* Do we have previous read data to read? */
5357         if (info->read < PAGE_SIZE)
5358                 goto read;
5359
5360  again:
5361         trace_access_lock(iter->cpu_file);
5362         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
5363                                     &info->spare,
5364                                     count,
5365                                     iter->cpu_file, 0);
5366         trace_access_unlock(iter->cpu_file);
5367
5368         if (ret < 0) {
5369                 if (trace_empty(iter)) {
5370                         if ((filp->f_flags & O_NONBLOCK)) {
5371                                 size = -EAGAIN;
5372                                 goto out_unlock;
5373                         }
5374                         mutex_unlock(&trace_types_lock);
5375                         ret = wait_on_pipe(iter);
5376                         mutex_lock(&trace_types_lock);
5377                         if (ret) {
5378                                 size = ret;
5379                                 goto out_unlock;
5380                         }
5381                         if (signal_pending(current)) {
5382                                 size = -EINTR;
5383                                 goto out_unlock;
5384                         }
5385                         goto again;
5386                 }
5387                 size = 0;
5388                 goto out_unlock;
5389         }
5390
5391         info->read = 0;
5392  read:
5393         size = PAGE_SIZE - info->read;
5394         if (size > count)
5395                 size = count;
5396
5397         ret = copy_to_user(ubuf, info->spare + info->read, size);
5398         if (ret == size) {
5399                 size = -EFAULT;
5400                 goto out_unlock;
5401         }
5402         size -= ret;
5403
5404         *ppos += size;
5405         info->read += size;
5406
5407  out_unlock:
5408         mutex_unlock(&trace_types_lock);
5409
5410         return size;
5411 }
5412
5413 static int tracing_buffers_release(struct inode *inode, struct file *file)
5414 {
5415         struct ftrace_buffer_info *info = file->private_data;
5416         struct trace_iterator *iter = &info->iter;
5417
5418         mutex_lock(&trace_types_lock);
5419
5420         __trace_array_put(iter->tr);
5421
5422         if (info->spare)
5423                 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
5424         kfree(info);
5425
5426         mutex_unlock(&trace_types_lock);
5427
5428         return 0;
5429 }
5430
5431 struct buffer_ref {
5432         struct ring_buffer      *buffer;
5433         void                    *page;
5434         int                     ref;
5435 };
5436
5437 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
5438                                     struct pipe_buffer *buf)
5439 {
5440         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5441
5442         if (--ref->ref)
5443                 return;
5444
5445         ring_buffer_free_read_page(ref->buffer, ref->page);
5446         kfree(ref);
5447         buf->private = 0;
5448 }
5449
5450 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
5451                                 struct pipe_buffer *buf)
5452 {
5453         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5454
5455         ref->ref++;
5456 }
5457
5458 /* Pipe buffer operations for a buffer. */
5459 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
5460         .can_merge              = 0,
5461         .confirm                = generic_pipe_buf_confirm,
5462         .release                = buffer_pipe_buf_release,
5463         .steal                  = generic_pipe_buf_steal,
5464         .get                    = buffer_pipe_buf_get,
5465 };
5466
5467 /*
5468  * Callback from splice_to_pipe(), if we need to release some pages
5469  * at the end of the spd in case we error'ed out in filling the pipe.
5470  */
5471 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
5472 {
5473         struct buffer_ref *ref =
5474                 (struct buffer_ref *)spd->partial[i].private;
5475
5476         if (--ref->ref)
5477                 return;
5478
5479         ring_buffer_free_read_page(ref->buffer, ref->page);
5480         kfree(ref);
5481         spd->partial[i].private = 0;
5482 }
5483
5484 static ssize_t
5485 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5486                             struct pipe_inode_info *pipe, size_t len,
5487                             unsigned int flags)
5488 {
5489         struct ftrace_buffer_info *info = file->private_data;
5490         struct trace_iterator *iter = &info->iter;
5491         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5492         struct page *pages_def[PIPE_DEF_BUFFERS];
5493         struct splice_pipe_desc spd = {
5494                 .pages          = pages_def,
5495                 .partial        = partial_def,
5496                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5497                 .flags          = flags,
5498                 .ops            = &buffer_pipe_buf_ops,
5499                 .spd_release    = buffer_spd_release,
5500         };
5501         struct buffer_ref *ref;
5502         int entries, size, i;
5503         ssize_t ret;
5504
5505         mutex_lock(&trace_types_lock);
5506
5507 #ifdef CONFIG_TRACER_MAX_TRACE
5508         if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5509                 ret = -EBUSY;
5510                 goto out;
5511         }
5512 #endif
5513
5514         if (splice_grow_spd(pipe, &spd)) {
5515                 ret = -ENOMEM;
5516                 goto out;
5517         }
5518
5519         if (*ppos & (PAGE_SIZE - 1)) {
5520                 ret = -EINVAL;
5521                 goto out;
5522         }
5523
5524         if (len & (PAGE_SIZE - 1)) {
5525                 if (len < PAGE_SIZE) {
5526                         ret = -EINVAL;
5527                         goto out;
5528                 }
5529                 len &= PAGE_MASK;
5530         }
5531
5532  again:
5533         trace_access_lock(iter->cpu_file);
5534         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5535
5536         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
5537                 struct page *page;
5538                 int r;
5539
5540                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
5541                 if (!ref)
5542                         break;
5543
5544                 ref->ref = 1;
5545                 ref->buffer = iter->trace_buffer->buffer;
5546                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
5547                 if (!ref->page) {
5548                         kfree(ref);
5549                         break;
5550                 }
5551
5552                 r = ring_buffer_read_page(ref->buffer, &ref->page,
5553                                           len, iter->cpu_file, 1);
5554                 if (r < 0) {
5555                         ring_buffer_free_read_page(ref->buffer, ref->page);
5556                         kfree(ref);
5557                         break;
5558                 }
5559
5560                 /*
5561                  * zero out any left over data, this is going to
5562                  * user land.
5563                  */
5564                 size = ring_buffer_page_len(ref->page);
5565                 if (size < PAGE_SIZE)
5566                         memset(ref->page + size, 0, PAGE_SIZE - size);
5567
5568                 page = virt_to_page(ref->page);
5569
5570                 spd.pages[i] = page;
5571                 spd.partial[i].len = PAGE_SIZE;
5572                 spd.partial[i].offset = 0;
5573                 spd.partial[i].private = (unsigned long)ref;
5574                 spd.nr_pages++;
5575                 *ppos += PAGE_SIZE;
5576
5577                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5578         }
5579
5580         trace_access_unlock(iter->cpu_file);
5581         spd.nr_pages = i;
5582
5583         /* did we read anything? */
5584         if (!spd.nr_pages) {
5585                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) {
5586                         ret = -EAGAIN;
5587                         goto out;
5588                 }
5589                 mutex_unlock(&trace_types_lock);
5590                 ret = wait_on_pipe(iter);
5591                 mutex_lock(&trace_types_lock);
5592                 if (ret)
5593                         goto out;
5594                 if (signal_pending(current)) {
5595                         ret = -EINTR;
5596                         goto out;
5597                 }
5598                 goto again;
5599         }
5600
5601         ret = splice_to_pipe(pipe, &spd);
5602         splice_shrink_spd(&spd);
5603 out:
5604         mutex_unlock(&trace_types_lock);
5605
5606         return ret;
5607 }
5608
5609 static const struct file_operations tracing_buffers_fops = {
5610         .open           = tracing_buffers_open,
5611         .read           = tracing_buffers_read,
5612         .poll           = tracing_buffers_poll,
5613         .release        = tracing_buffers_release,
5614         .splice_read    = tracing_buffers_splice_read,
5615         .llseek         = no_llseek,
5616 };
5617
5618 static ssize_t
5619 tracing_stats_read(struct file *filp, char __user *ubuf,
5620                    size_t count, loff_t *ppos)
5621 {
5622         struct inode *inode = file_inode(filp);
5623         struct trace_array *tr = inode->i_private;
5624         struct trace_buffer *trace_buf = &tr->trace_buffer;
5625         int cpu = tracing_get_cpu(inode);
5626         struct trace_seq *s;
5627         unsigned long cnt;
5628         unsigned long long t;
5629         unsigned long usec_rem;
5630
5631         s = kmalloc(sizeof(*s), GFP_KERNEL);
5632         if (!s)
5633                 return -ENOMEM;
5634
5635         trace_seq_init(s);
5636
5637         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
5638         trace_seq_printf(s, "entries: %ld\n", cnt);
5639
5640         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
5641         trace_seq_printf(s, "overrun: %ld\n", cnt);
5642
5643         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
5644         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
5645
5646         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
5647         trace_seq_printf(s, "bytes: %ld\n", cnt);
5648
5649         if (trace_clocks[tr->clock_id].in_ns) {
5650                 /* local or global for trace_clock */
5651                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5652                 usec_rem = do_div(t, USEC_PER_SEC);
5653                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
5654                                                                 t, usec_rem);
5655
5656                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
5657                 usec_rem = do_div(t, USEC_PER_SEC);
5658                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
5659         } else {
5660                 /* counter or tsc mode for trace_clock */
5661                 trace_seq_printf(s, "oldest event ts: %llu\n",
5662                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5663
5664                 trace_seq_printf(s, "now ts: %llu\n",
5665                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
5666         }
5667
5668         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
5669         trace_seq_printf(s, "dropped events: %ld\n", cnt);
5670
5671         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
5672         trace_seq_printf(s, "read events: %ld\n", cnt);
5673
5674         count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
5675
5676         kfree(s);
5677
5678         return count;
5679 }
5680
5681 static const struct file_operations tracing_stats_fops = {
5682         .open           = tracing_open_generic_tr,
5683         .read           = tracing_stats_read,
5684         .llseek         = generic_file_llseek,
5685         .release        = tracing_release_generic_tr,
5686 };
5687
5688 #ifdef CONFIG_DYNAMIC_FTRACE
5689
5690 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
5691 {
5692         return 0;
5693 }
5694
5695 static ssize_t
5696 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
5697                   size_t cnt, loff_t *ppos)
5698 {
5699         static char ftrace_dyn_info_buffer[1024];
5700         static DEFINE_MUTEX(dyn_info_mutex);
5701         unsigned long *p = filp->private_data;
5702         char *buf = ftrace_dyn_info_buffer;
5703         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
5704         int r;
5705
5706         mutex_lock(&dyn_info_mutex);
5707         r = sprintf(buf, "%ld ", *p);
5708
5709         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
5710         buf[r++] = '\n';
5711
5712         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5713
5714         mutex_unlock(&dyn_info_mutex);
5715
5716         return r;
5717 }
5718
5719 static const struct file_operations tracing_dyn_info_fops = {
5720         .open           = tracing_open_generic,
5721         .read           = tracing_read_dyn_info,
5722         .llseek         = generic_file_llseek,
5723 };
5724 #endif /* CONFIG_DYNAMIC_FTRACE */
5725
5726 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
5727 static void
5728 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5729 {
5730         tracing_snapshot();
5731 }
5732
5733 static void
5734 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5735 {
5736         unsigned long *count = (long *)data;
5737
5738         if (!*count)
5739                 return;
5740
5741         if (*count != -1)
5742                 (*count)--;
5743
5744         tracing_snapshot();
5745 }
5746
5747 static int
5748 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
5749                       struct ftrace_probe_ops *ops, void *data)
5750 {
5751         long count = (long)data;
5752
5753         seq_printf(m, "%ps:", (void *)ip);
5754
5755         seq_printf(m, "snapshot");
5756
5757         if (count == -1)
5758                 seq_printf(m, ":unlimited\n");
5759         else
5760                 seq_printf(m, ":count=%ld\n", count);
5761
5762         return 0;
5763 }
5764
5765 static struct ftrace_probe_ops snapshot_probe_ops = {
5766         .func                   = ftrace_snapshot,
5767         .print                  = ftrace_snapshot_print,
5768 };
5769
5770 static struct ftrace_probe_ops snapshot_count_probe_ops = {
5771         .func                   = ftrace_count_snapshot,
5772         .print                  = ftrace_snapshot_print,
5773 };
5774
5775 static int
5776 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
5777                                char *glob, char *cmd, char *param, int enable)
5778 {
5779         struct ftrace_probe_ops *ops;
5780         void *count = (void *)-1;
5781         char *number;
5782         int ret;
5783
5784         /* hash funcs only work with set_ftrace_filter */
5785         if (!enable)
5786                 return -EINVAL;
5787
5788         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
5789
5790         if (glob[0] == '!') {
5791                 unregister_ftrace_function_probe_func(glob+1, ops);
5792                 return 0;
5793         }
5794
5795         if (!param)
5796                 goto out_reg;
5797
5798         number = strsep(&param, ":");
5799
5800         if (!strlen(number))
5801                 goto out_reg;
5802
5803         /*
5804          * We use the callback data field (which is a pointer)
5805          * as our counter.
5806          */
5807         ret = kstrtoul(number, 0, (unsigned long *)&count);
5808         if (ret)
5809                 return ret;
5810
5811  out_reg:
5812         ret = register_ftrace_function_probe(glob, ops, count);
5813
5814         if (ret >= 0)
5815                 alloc_snapshot(&global_trace);
5816
5817         return ret < 0 ? ret : 0;
5818 }
5819
5820 static struct ftrace_func_command ftrace_snapshot_cmd = {
5821         .name                   = "snapshot",
5822         .func                   = ftrace_trace_snapshot_callback,
5823 };
5824
5825 static __init int register_snapshot_cmd(void)
5826 {
5827         return register_ftrace_command(&ftrace_snapshot_cmd);
5828 }
5829 #else
5830 static inline __init int register_snapshot_cmd(void) { return 0; }
5831 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
5832
5833 struct dentry *tracing_init_dentry_tr(struct trace_array *tr)
5834 {
5835         if (tr->dir)
5836                 return tr->dir;
5837
5838         if (!debugfs_initialized())
5839                 return NULL;
5840
5841         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
5842                 tr->dir = debugfs_create_dir("tracing", NULL);
5843
5844         if (!tr->dir)
5845                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
5846
5847         return tr->dir;
5848 }
5849
5850 struct dentry *tracing_init_dentry(void)
5851 {
5852         return tracing_init_dentry_tr(&global_trace);
5853 }
5854
5855 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
5856 {
5857         struct dentry *d_tracer;
5858
5859         if (tr->percpu_dir)
5860                 return tr->percpu_dir;
5861
5862         d_tracer = tracing_init_dentry_tr(tr);
5863         if (!d_tracer)
5864                 return NULL;
5865
5866         tr->percpu_dir = debugfs_create_dir("per_cpu", d_tracer);
5867
5868         WARN_ONCE(!tr->percpu_dir,
5869                   "Could not create debugfs directory 'per_cpu/%d'\n", cpu);
5870
5871         return tr->percpu_dir;
5872 }
5873
5874 static struct dentry *
5875 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
5876                       void *data, long cpu, const struct file_operations *fops)
5877 {
5878         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
5879
5880         if (ret) /* See tracing_get_cpu() */
5881                 ret->d_inode->i_cdev = (void *)(cpu + 1);
5882         return ret;
5883 }
5884
5885 static void
5886 tracing_init_debugfs_percpu(struct trace_array *tr, long cpu)
5887 {
5888         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
5889         struct dentry *d_cpu;
5890         char cpu_dir[30]; /* 30 characters should be more than enough */
5891
5892         if (!d_percpu)
5893                 return;
5894
5895         snprintf(cpu_dir, 30, "cpu%ld", cpu);
5896         d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
5897         if (!d_cpu) {
5898                 pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
5899                 return;
5900         }
5901
5902         /* per cpu trace_pipe */
5903         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
5904                                 tr, cpu, &tracing_pipe_fops);
5905
5906         /* per cpu trace */
5907         trace_create_cpu_file("trace", 0644, d_cpu,
5908                                 tr, cpu, &tracing_fops);
5909
5910         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
5911                                 tr, cpu, &tracing_buffers_fops);
5912
5913         trace_create_cpu_file("stats", 0444, d_cpu,
5914                                 tr, cpu, &tracing_stats_fops);
5915
5916         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
5917                                 tr, cpu, &tracing_entries_fops);
5918
5919 #ifdef CONFIG_TRACER_SNAPSHOT
5920         trace_create_cpu_file("snapshot", 0644, d_cpu,
5921                                 tr, cpu, &snapshot_fops);
5922
5923         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
5924                                 tr, cpu, &snapshot_raw_fops);
5925 #endif
5926 }
5927
5928 #ifdef CONFIG_FTRACE_SELFTEST
5929 /* Let selftest have access to static functions in this file */
5930 #include "trace_selftest.c"
5931 #endif
5932
5933 struct trace_option_dentry {
5934         struct tracer_opt               *opt;
5935         struct tracer_flags             *flags;
5936         struct trace_array              *tr;
5937         struct dentry                   *entry;
5938 };
5939
5940 static ssize_t
5941 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
5942                         loff_t *ppos)
5943 {
5944         struct trace_option_dentry *topt = filp->private_data;
5945         char *buf;
5946
5947         if (topt->flags->val & topt->opt->bit)
5948                 buf = "1\n";
5949         else
5950                 buf = "0\n";
5951
5952         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5953 }
5954
5955 static ssize_t
5956 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
5957                          loff_t *ppos)
5958 {
5959         struct trace_option_dentry *topt = filp->private_data;
5960         unsigned long val;
5961         int ret;
5962
5963         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5964         if (ret)
5965                 return ret;
5966
5967         if (val != 0 && val != 1)
5968                 return -EINVAL;
5969
5970         if (!!(topt->flags->val & topt->opt->bit) != val) {
5971                 mutex_lock(&trace_types_lock);
5972                 ret = __set_tracer_option(topt->tr, topt->flags,
5973                                           topt->opt, !val);
5974                 mutex_unlock(&trace_types_lock);
5975                 if (ret)
5976                         return ret;
5977         }
5978
5979         *ppos += cnt;
5980
5981         return cnt;
5982 }
5983
5984
5985 static const struct file_operations trace_options_fops = {
5986         .open = tracing_open_generic,
5987         .read = trace_options_read,
5988         .write = trace_options_write,
5989         .llseek = generic_file_llseek,
5990 };
5991
5992 static ssize_t
5993 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
5994                         loff_t *ppos)
5995 {
5996         long index = (long)filp->private_data;
5997         char *buf;
5998
5999         if (trace_flags & (1 << index))
6000                 buf = "1\n";
6001         else
6002                 buf = "0\n";
6003
6004         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6005 }
6006
6007 static ssize_t
6008 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
6009                          loff_t *ppos)
6010 {
6011         struct trace_array *tr = &global_trace;
6012         long index = (long)filp->private_data;
6013         unsigned long val;
6014         int ret;
6015
6016         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6017         if (ret)
6018                 return ret;
6019
6020         if (val != 0 && val != 1)
6021                 return -EINVAL;
6022
6023         mutex_lock(&trace_types_lock);
6024         ret = set_tracer_flag(tr, 1 << index, val);
6025         mutex_unlock(&trace_types_lock);
6026
6027         if (ret < 0)
6028                 return ret;
6029
6030         *ppos += cnt;
6031
6032         return cnt;
6033 }
6034
6035 static const struct file_operations trace_options_core_fops = {
6036         .open = tracing_open_generic,
6037         .read = trace_options_core_read,
6038         .write = trace_options_core_write,
6039         .llseek = generic_file_llseek,
6040 };
6041
6042 struct dentry *trace_create_file(const char *name,
6043                                  umode_t mode,
6044                                  struct dentry *parent,
6045                                  void *data,
6046                                  const struct file_operations *fops)
6047 {
6048         struct dentry *ret;
6049
6050         ret = debugfs_create_file(name, mode, parent, data, fops);
6051         if (!ret)
6052                 pr_warning("Could not create debugfs '%s' entry\n", name);
6053
6054         return ret;
6055 }
6056
6057
6058 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
6059 {
6060         struct dentry *d_tracer;
6061
6062         if (tr->options)
6063                 return tr->options;
6064
6065         d_tracer = tracing_init_dentry_tr(tr);
6066         if (!d_tracer)
6067                 return NULL;
6068
6069         tr->options = debugfs_create_dir("options", d_tracer);
6070         if (!tr->options) {
6071                 pr_warning("Could not create debugfs directory 'options'\n");
6072                 return NULL;
6073         }
6074
6075         return tr->options;
6076 }
6077
6078 static void
6079 create_trace_option_file(struct trace_array *tr,
6080                          struct trace_option_dentry *topt,
6081                          struct tracer_flags *flags,
6082                          struct tracer_opt *opt)
6083 {
6084         struct dentry *t_options;
6085
6086         t_options = trace_options_init_dentry(tr);
6087         if (!t_options)
6088                 return;
6089
6090         topt->flags = flags;
6091         topt->opt = opt;
6092         topt->tr = tr;
6093
6094         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
6095                                     &trace_options_fops);
6096
6097 }
6098
6099 static struct trace_option_dentry *
6100 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
6101 {
6102         struct trace_option_dentry *topts;
6103         struct tracer_flags *flags;
6104         struct tracer_opt *opts;
6105         int cnt;
6106
6107         if (!tracer)
6108                 return NULL;
6109
6110         flags = tracer->flags;
6111
6112         if (!flags || !flags->opts)
6113                 return NULL;
6114
6115         opts = flags->opts;
6116
6117         for (cnt = 0; opts[cnt].name; cnt++)
6118                 ;
6119
6120         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
6121         if (!topts)
6122                 return NULL;
6123
6124         for (cnt = 0; opts[cnt].name; cnt++)
6125                 create_trace_option_file(tr, &topts[cnt], flags,
6126                                          &opts[cnt]);
6127
6128         return topts;
6129 }
6130
6131 static void
6132 destroy_trace_option_files(struct trace_option_dentry *topts)
6133 {
6134         int cnt;
6135
6136         if (!topts)
6137                 return;
6138
6139         for (cnt = 0; topts[cnt].opt; cnt++)
6140                 debugfs_remove(topts[cnt].entry);
6141
6142         kfree(topts);
6143 }
6144
6145 static struct dentry *
6146 create_trace_option_core_file(struct trace_array *tr,
6147                               const char *option, long index)
6148 {
6149         struct dentry *t_options;
6150
6151         t_options = trace_options_init_dentry(tr);
6152         if (!t_options)
6153                 return NULL;
6154
6155         return trace_create_file(option, 0644, t_options, (void *)index,
6156                                     &trace_options_core_fops);
6157 }
6158
6159 static __init void create_trace_options_dir(struct trace_array *tr)
6160 {
6161         struct dentry *t_options;
6162         int i;
6163
6164         t_options = trace_options_init_dentry(tr);
6165         if (!t_options)
6166                 return;
6167
6168         for (i = 0; trace_options[i]; i++)
6169                 create_trace_option_core_file(tr, trace_options[i], i);
6170 }
6171
6172 static ssize_t
6173 rb_simple_read(struct file *filp, char __user *ubuf,
6174                size_t cnt, loff_t *ppos)
6175 {
6176         struct trace_array *tr = filp->private_data;
6177         char buf[64];
6178         int r;
6179
6180         r = tracer_tracing_is_on(tr);
6181         r = sprintf(buf, "%d\n", r);
6182
6183         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6184 }
6185
6186 static ssize_t
6187 rb_simple_write(struct file *filp, const char __user *ubuf,
6188                 size_t cnt, loff_t *ppos)
6189 {
6190         struct trace_array *tr = filp->private_data;
6191         struct ring_buffer *buffer = tr->trace_buffer.buffer;
6192         unsigned long val;
6193         int ret;
6194
6195         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6196         if (ret)
6197                 return ret;
6198
6199         if (buffer) {
6200                 mutex_lock(&trace_types_lock);
6201                 if (val) {
6202                         tracer_tracing_on(tr);
6203                         if (tr->current_trace->start)
6204                                 tr->current_trace->start(tr);
6205                 } else {
6206                         tracer_tracing_off(tr);
6207                         if (tr->current_trace->stop)
6208                                 tr->current_trace->stop(tr);
6209                 }
6210                 mutex_unlock(&trace_types_lock);
6211         }
6212
6213         (*ppos)++;
6214
6215         return cnt;
6216 }
6217
6218 static const struct file_operations rb_simple_fops = {
6219         .open           = tracing_open_generic_tr,
6220         .read           = rb_simple_read,
6221         .write          = rb_simple_write,
6222         .release        = tracing_release_generic_tr,
6223         .llseek         = default_llseek,
6224 };
6225
6226 struct dentry *trace_instance_dir;
6227
6228 static void
6229 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer);
6230
6231 static int
6232 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
6233 {
6234         enum ring_buffer_flags rb_flags;
6235
6236         rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
6237
6238         buf->tr = tr;
6239
6240         buf->buffer = ring_buffer_alloc(size, rb_flags);
6241         if (!buf->buffer)
6242                 return -ENOMEM;
6243
6244         buf->data = alloc_percpu(struct trace_array_cpu);
6245         if (!buf->data) {
6246                 ring_buffer_free(buf->buffer);
6247                 return -ENOMEM;
6248         }
6249
6250         /* Allocate the first page for all buffers */
6251         set_buffer_entries(&tr->trace_buffer,
6252                            ring_buffer_size(tr->trace_buffer.buffer, 0));
6253
6254         return 0;
6255 }
6256
6257 static int allocate_trace_buffers(struct trace_array *tr, int size)
6258 {
6259         int ret;
6260
6261         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
6262         if (ret)
6263                 return ret;
6264
6265 #ifdef CONFIG_TRACER_MAX_TRACE
6266         ret = allocate_trace_buffer(tr, &tr->max_buffer,
6267                                     allocate_snapshot ? size : 1);
6268         if (WARN_ON(ret)) {
6269                 ring_buffer_free(tr->trace_buffer.buffer);
6270                 free_percpu(tr->trace_buffer.data);
6271                 return -ENOMEM;
6272         }
6273         tr->allocated_snapshot = allocate_snapshot;
6274
6275         /*
6276          * Only the top level trace array gets its snapshot allocated
6277          * from the kernel command line.
6278          */
6279         allocate_snapshot = false;
6280 #endif
6281         return 0;
6282 }
6283
6284 static void free_trace_buffer(struct trace_buffer *buf)
6285 {
6286         if (buf->buffer) {
6287                 ring_buffer_free(buf->buffer);
6288                 buf->buffer = NULL;
6289                 free_percpu(buf->data);
6290                 buf->data = NULL;
6291         }
6292 }
6293
6294 static void free_trace_buffers(struct trace_array *tr)
6295 {
6296         if (!tr)
6297                 return;
6298
6299         free_trace_buffer(&tr->trace_buffer);
6300
6301 #ifdef CONFIG_TRACER_MAX_TRACE
6302         free_trace_buffer(&tr->max_buffer);
6303 #endif
6304 }
6305
6306 static int new_instance_create(const char *name)
6307 {
6308         struct trace_array *tr;
6309         int ret;
6310
6311         mutex_lock(&trace_types_lock);
6312
6313         ret = -EEXIST;
6314         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6315                 if (tr->name && strcmp(tr->name, name) == 0)
6316                         goto out_unlock;
6317         }
6318
6319         ret = -ENOMEM;
6320         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
6321         if (!tr)
6322                 goto out_unlock;
6323
6324         tr->name = kstrdup(name, GFP_KERNEL);
6325         if (!tr->name)
6326                 goto out_free_tr;
6327
6328         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
6329                 goto out_free_tr;
6330
6331         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
6332
6333         raw_spin_lock_init(&tr->start_lock);
6334
6335         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
6336
6337         tr->current_trace = &nop_trace;
6338
6339         INIT_LIST_HEAD(&tr->systems);
6340         INIT_LIST_HEAD(&tr->events);
6341
6342         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
6343                 goto out_free_tr;
6344
6345         tr->dir = debugfs_create_dir(name, trace_instance_dir);
6346         if (!tr->dir)
6347                 goto out_free_tr;
6348
6349         ret = event_trace_add_tracer(tr->dir, tr);
6350         if (ret) {
6351                 debugfs_remove_recursive(tr->dir);
6352                 goto out_free_tr;
6353         }
6354
6355         init_tracer_debugfs(tr, tr->dir);
6356
6357         list_add(&tr->list, &ftrace_trace_arrays);
6358
6359         mutex_unlock(&trace_types_lock);
6360
6361         return 0;
6362
6363  out_free_tr:
6364         free_trace_buffers(tr);
6365         free_cpumask_var(tr->tracing_cpumask);
6366         kfree(tr->name);
6367         kfree(tr);
6368
6369  out_unlock:
6370         mutex_unlock(&trace_types_lock);
6371
6372         return ret;
6373
6374 }
6375
6376 static int instance_delete(const char *name)
6377 {
6378         struct trace_array *tr;
6379         int found = 0;
6380         int ret;
6381
6382         mutex_lock(&trace_types_lock);
6383
6384         ret = -ENODEV;
6385         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6386                 if (tr->name && strcmp(tr->name, name) == 0) {
6387                         found = 1;
6388                         break;
6389                 }
6390         }
6391         if (!found)
6392                 goto out_unlock;
6393
6394         ret = -EBUSY;
6395         if (tr->ref)
6396                 goto out_unlock;
6397
6398         list_del(&tr->list);
6399
6400         tracing_set_nop(tr);
6401         event_trace_del_tracer(tr);
6402         ftrace_destroy_function_files(tr);
6403         debugfs_remove_recursive(tr->dir);
6404         free_trace_buffers(tr);
6405
6406         kfree(tr->name);
6407         kfree(tr);
6408
6409         ret = 0;
6410
6411  out_unlock:
6412         mutex_unlock(&trace_types_lock);
6413
6414         return ret;
6415 }
6416
6417 static int instance_mkdir (struct inode *inode, struct dentry *dentry, umode_t mode)
6418 {
6419         struct dentry *parent;
6420         int ret;
6421
6422         /* Paranoid: Make sure the parent is the "instances" directory */
6423         parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
6424         if (WARN_ON_ONCE(parent != trace_instance_dir))
6425                 return -ENOENT;
6426
6427         /*
6428          * The inode mutex is locked, but debugfs_create_dir() will also
6429          * take the mutex. As the instances directory can not be destroyed
6430          * or changed in any other way, it is safe to unlock it, and
6431          * let the dentry try. If two users try to make the same dir at
6432          * the same time, then the new_instance_create() will determine the
6433          * winner.
6434          */
6435         mutex_unlock(&inode->i_mutex);
6436
6437         ret = new_instance_create(dentry->d_iname);
6438
6439         mutex_lock(&inode->i_mutex);
6440
6441         return ret;
6442 }
6443
6444 static int instance_rmdir(struct inode *inode, struct dentry *dentry)
6445 {
6446         struct dentry *parent;
6447         int ret;
6448
6449         /* Paranoid: Make sure the parent is the "instances" directory */
6450         parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
6451         if (WARN_ON_ONCE(parent != trace_instance_dir))
6452                 return -ENOENT;
6453
6454         /* The caller did a dget() on dentry */
6455         mutex_unlock(&dentry->d_inode->i_mutex);
6456
6457         /*
6458          * The inode mutex is locked, but debugfs_create_dir() will also
6459          * take the mutex. As the instances directory can not be destroyed
6460          * or changed in any other way, it is safe to unlock it, and
6461          * let the dentry try. If two users try to make the same dir at
6462          * the same time, then the instance_delete() will determine the
6463          * winner.
6464          */
6465         mutex_unlock(&inode->i_mutex);
6466
6467         ret = instance_delete(dentry->d_iname);
6468
6469         mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
6470         mutex_lock(&dentry->d_inode->i_mutex);
6471
6472         return ret;
6473 }
6474
6475 static const struct inode_operations instance_dir_inode_operations = {
6476         .lookup         = simple_lookup,
6477         .mkdir          = instance_mkdir,
6478         .rmdir          = instance_rmdir,
6479 };
6480
6481 static __init void create_trace_instances(struct dentry *d_tracer)
6482 {
6483         trace_instance_dir = debugfs_create_dir("instances", d_tracer);
6484         if (WARN_ON(!trace_instance_dir))
6485                 return;
6486
6487         /* Hijack the dir inode operations, to allow mkdir */
6488         trace_instance_dir->d_inode->i_op = &instance_dir_inode_operations;
6489 }
6490
6491 static void
6492 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
6493 {
6494         int cpu;
6495
6496         trace_create_file("available_tracers", 0444, d_tracer,
6497                         tr, &show_traces_fops);
6498
6499         trace_create_file("current_tracer", 0644, d_tracer,
6500                         tr, &set_tracer_fops);
6501
6502         trace_create_file("tracing_cpumask", 0644, d_tracer,
6503                           tr, &tracing_cpumask_fops);
6504
6505         trace_create_file("trace_options", 0644, d_tracer,
6506                           tr, &tracing_iter_fops);
6507
6508         trace_create_file("trace", 0644, d_tracer,
6509                           tr, &tracing_fops);
6510
6511         trace_create_file("trace_pipe", 0444, d_tracer,
6512                           tr, &tracing_pipe_fops);
6513
6514         trace_create_file("buffer_size_kb", 0644, d_tracer,
6515                           tr, &tracing_entries_fops);
6516
6517         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
6518                           tr, &tracing_total_entries_fops);
6519
6520         trace_create_file("free_buffer", 0200, d_tracer,
6521                           tr, &tracing_free_buffer_fops);
6522
6523         trace_create_file("trace_marker", 0220, d_tracer,
6524                           tr, &tracing_mark_fops);
6525
6526         trace_create_file("trace_clock", 0644, d_tracer, tr,
6527                           &trace_clock_fops);
6528
6529         trace_create_file("tracing_on", 0644, d_tracer,
6530                           tr, &rb_simple_fops);
6531
6532 #ifdef CONFIG_TRACER_MAX_TRACE
6533         trace_create_file("tracing_max_latency", 0644, d_tracer,
6534                         &tr->max_latency, &tracing_max_lat_fops);
6535 #endif
6536
6537         if (ftrace_create_function_files(tr, d_tracer))
6538                 WARN(1, "Could not allocate function filter files");
6539
6540 #ifdef CONFIG_TRACER_SNAPSHOT
6541         trace_create_file("snapshot", 0644, d_tracer,
6542                           tr, &snapshot_fops);
6543 #endif
6544
6545         for_each_tracing_cpu(cpu)
6546                 tracing_init_debugfs_percpu(tr, cpu);
6547
6548 }
6549
6550 static __init int tracer_init_debugfs(void)
6551 {
6552         struct dentry *d_tracer;
6553
6554         trace_access_lock_init();
6555
6556         d_tracer = tracing_init_dentry();
6557         if (!d_tracer)
6558                 return 0;
6559
6560         init_tracer_debugfs(&global_trace, d_tracer);
6561
6562         trace_create_file("tracing_thresh", 0644, d_tracer,
6563                         &global_trace, &tracing_thresh_fops);
6564
6565         trace_create_file("README", 0444, d_tracer,
6566                         NULL, &tracing_readme_fops);
6567
6568         trace_create_file("saved_cmdlines", 0444, d_tracer,
6569                         NULL, &tracing_saved_cmdlines_fops);
6570
6571         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
6572                           NULL, &tracing_saved_cmdlines_size_fops);
6573
6574 #ifdef CONFIG_DYNAMIC_FTRACE
6575         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
6576                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
6577 #endif
6578
6579         create_trace_instances(d_tracer);
6580
6581         create_trace_options_dir(&global_trace);
6582
6583         return 0;
6584 }
6585
6586 static int trace_panic_handler(struct notifier_block *this,
6587                                unsigned long event, void *unused)
6588 {
6589         if (ftrace_dump_on_oops)
6590                 ftrace_dump(ftrace_dump_on_oops);
6591         return NOTIFY_OK;
6592 }
6593
6594 static struct notifier_block trace_panic_notifier = {
6595         .notifier_call  = trace_panic_handler,
6596         .next           = NULL,
6597         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
6598 };
6599
6600 static int trace_die_handler(struct notifier_block *self,
6601                              unsigned long val,
6602                              void *data)
6603 {
6604         switch (val) {
6605         case DIE_OOPS:
6606                 if (ftrace_dump_on_oops)
6607                         ftrace_dump(ftrace_dump_on_oops);
6608                 break;
6609         default:
6610                 break;
6611         }
6612         return NOTIFY_OK;
6613 }
6614
6615 static struct notifier_block trace_die_notifier = {
6616         .notifier_call = trace_die_handler,
6617         .priority = 200
6618 };
6619
6620 /*
6621  * printk is set to max of 1024, we really don't need it that big.
6622  * Nothing should be printing 1000 characters anyway.
6623  */
6624 #define TRACE_MAX_PRINT         1000
6625
6626 /*
6627  * Define here KERN_TRACE so that we have one place to modify
6628  * it if we decide to change what log level the ftrace dump
6629  * should be at.
6630  */
6631 #define KERN_TRACE              KERN_EMERG
6632
6633 void
6634 trace_printk_seq(struct trace_seq *s)
6635 {
6636         /* Probably should print a warning here. */
6637         if (s->len >= TRACE_MAX_PRINT)
6638                 s->len = TRACE_MAX_PRINT;
6639
6640         /* should be zero ended, but we are paranoid. */
6641         s->buffer[s->len] = 0;
6642
6643         printk(KERN_TRACE "%s", s->buffer);
6644
6645         trace_seq_init(s);
6646 }
6647
6648 void trace_init_global_iter(struct trace_iterator *iter)
6649 {
6650         iter->tr = &global_trace;
6651         iter->trace = iter->tr->current_trace;
6652         iter->cpu_file = RING_BUFFER_ALL_CPUS;
6653         iter->trace_buffer = &global_trace.trace_buffer;
6654
6655         if (iter->trace && iter->trace->open)
6656                 iter->trace->open(iter);
6657
6658         /* Annotate start of buffers if we had overruns */
6659         if (ring_buffer_overruns(iter->trace_buffer->buffer))
6660                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
6661
6662         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6663         if (trace_clocks[iter->tr->clock_id].in_ns)
6664                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6665 }
6666
6667 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
6668 {
6669         /* use static because iter can be a bit big for the stack */
6670         static struct trace_iterator iter;
6671         static atomic_t dump_running;
6672         unsigned int old_userobj;
6673         unsigned long flags;
6674         int cnt = 0, cpu;
6675
6676         /* Only allow one dump user at a time. */
6677         if (atomic_inc_return(&dump_running) != 1) {
6678                 atomic_dec(&dump_running);
6679                 return;
6680         }
6681
6682         /*
6683          * Always turn off tracing when we dump.
6684          * We don't need to show trace output of what happens
6685          * between multiple crashes.
6686          *
6687          * If the user does a sysrq-z, then they can re-enable
6688          * tracing with echo 1 > tracing_on.
6689          */
6690         tracing_off();
6691
6692         local_irq_save(flags);
6693
6694         /* Simulate the iterator */
6695         trace_init_global_iter(&iter);
6696
6697         for_each_tracing_cpu(cpu) {
6698                 atomic_inc(&per_cpu_ptr(iter.tr->trace_buffer.data, cpu)->disabled);
6699         }
6700
6701         old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
6702
6703         /* don't look at user memory in panic mode */
6704         trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
6705
6706         switch (oops_dump_mode) {
6707         case DUMP_ALL:
6708                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6709                 break;
6710         case DUMP_ORIG:
6711                 iter.cpu_file = raw_smp_processor_id();
6712                 break;
6713         case DUMP_NONE:
6714                 goto out_enable;
6715         default:
6716                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
6717                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6718         }
6719
6720         printk(KERN_TRACE "Dumping ftrace buffer:\n");
6721
6722         /* Did function tracer already get disabled? */
6723         if (ftrace_is_dead()) {
6724                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
6725                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
6726         }
6727
6728         /*
6729          * We need to stop all tracing on all CPUS to read the
6730          * the next buffer. This is a bit expensive, but is
6731          * not done often. We fill all what we can read,
6732          * and then release the locks again.
6733          */
6734
6735         while (!trace_empty(&iter)) {
6736
6737                 if (!cnt)
6738                         printk(KERN_TRACE "---------------------------------\n");
6739
6740                 cnt++;
6741
6742                 /* reset all but tr, trace, and overruns */
6743                 memset(&iter.seq, 0,
6744                        sizeof(struct trace_iterator) -
6745                        offsetof(struct trace_iterator, seq));
6746                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
6747                 iter.pos = -1;
6748
6749                 if (trace_find_next_entry_inc(&iter) != NULL) {
6750                         int ret;
6751
6752                         ret = print_trace_line(&iter);
6753                         if (ret != TRACE_TYPE_NO_CONSUME)
6754                                 trace_consume(&iter);
6755                 }
6756                 touch_nmi_watchdog();
6757
6758                 trace_printk_seq(&iter.seq);
6759         }
6760
6761         if (!cnt)
6762                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
6763         else
6764                 printk(KERN_TRACE "---------------------------------\n");
6765
6766  out_enable:
6767         trace_flags |= old_userobj;
6768
6769         for_each_tracing_cpu(cpu) {
6770                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
6771         }
6772         atomic_dec(&dump_running);
6773         local_irq_restore(flags);
6774 }
6775 EXPORT_SYMBOL_GPL(ftrace_dump);
6776
6777 __init static int tracer_alloc_buffers(void)
6778 {
6779         int ring_buf_size;
6780         int ret = -ENOMEM;
6781
6782
6783         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
6784                 goto out;
6785
6786         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
6787                 goto out_free_buffer_mask;
6788
6789         /* Only allocate trace_printk buffers if a trace_printk exists */
6790         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
6791                 /* Must be called before global_trace.buffer is allocated */
6792                 trace_printk_init_buffers();
6793
6794         /* To save memory, keep the ring buffer size to its minimum */
6795         if (ring_buffer_expanded)
6796                 ring_buf_size = trace_buf_size;
6797         else
6798                 ring_buf_size = 1;
6799
6800         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
6801         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
6802
6803         raw_spin_lock_init(&global_trace.start_lock);
6804
6805         /* Used for event triggers */
6806         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
6807         if (!temp_buffer)
6808                 goto out_free_cpumask;
6809
6810         if (trace_create_savedcmd() < 0)
6811                 goto out_free_temp_buffer;
6812
6813         /* TODO: make the number of buffers hot pluggable with CPUS */
6814         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
6815                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
6816                 WARN_ON(1);
6817                 goto out_free_savedcmd;
6818         }
6819
6820         if (global_trace.buffer_disabled)
6821                 tracing_off();
6822
6823         if (trace_boot_clock) {
6824                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
6825                 if (ret < 0)
6826                         pr_warning("Trace clock %s not defined, going back to default\n",
6827                                    trace_boot_clock);
6828         }
6829
6830         /*
6831          * register_tracer() might reference current_trace, so it
6832          * needs to be set before we register anything. This is
6833          * just a bootstrap of current_trace anyway.
6834          */
6835         global_trace.current_trace = &nop_trace;
6836
6837         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
6838
6839         ftrace_init_global_array_ops(&global_trace);
6840
6841         register_tracer(&nop_trace);
6842
6843         /* All seems OK, enable tracing */
6844         tracing_disabled = 0;
6845
6846         atomic_notifier_chain_register(&panic_notifier_list,
6847                                        &trace_panic_notifier);
6848
6849         register_die_notifier(&trace_die_notifier);
6850
6851         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
6852
6853         INIT_LIST_HEAD(&global_trace.systems);
6854         INIT_LIST_HEAD(&global_trace.events);
6855         list_add(&global_trace.list, &ftrace_trace_arrays);
6856
6857         while (trace_boot_options) {
6858                 char *option;
6859
6860                 option = strsep(&trace_boot_options, ",");
6861                 trace_set_options(&global_trace, option);
6862         }
6863
6864         register_snapshot_cmd();
6865
6866         return 0;
6867
6868 out_free_savedcmd:
6869         free_saved_cmdlines_buffer(savedcmd);
6870 out_free_temp_buffer:
6871         ring_buffer_free(temp_buffer);
6872 out_free_cpumask:
6873         free_cpumask_var(global_trace.tracing_cpumask);
6874 out_free_buffer_mask:
6875         free_cpumask_var(tracing_buffer_mask);
6876 out:
6877         return ret;
6878 }
6879
6880 __init static int clear_boot_tracer(void)
6881 {
6882         /*
6883          * The default tracer at boot buffer is an init section.
6884          * This function is called in lateinit. If we did not
6885          * find the boot tracer, then clear it out, to prevent
6886          * later registration from accessing the buffer that is
6887          * about to be freed.
6888          */
6889         if (!default_bootup_tracer)
6890                 return 0;
6891
6892         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
6893                default_bootup_tracer);
6894         default_bootup_tracer = NULL;
6895
6896         return 0;
6897 }
6898
6899 early_initcall(tracer_alloc_buffers);
6900 fs_initcall(tracer_init_debugfs);
6901 late_initcall(clear_boot_tracer);