Merge tag 'powerpc-3.19-1' of git://git.kernel.org/pub/scm/linux/kernel/git/mpe/linux
[cascardo/linux.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/pagemap.h>
24 #include <linux/hardirq.h>
25 #include <linux/linkage.h>
26 #include <linux/uaccess.h>
27 #include <linux/kprobes.h>
28 #include <linux/ftrace.h>
29 #include <linux/module.h>
30 #include <linux/percpu.h>
31 #include <linux/splice.h>
32 #include <linux/kdebug.h>
33 #include <linux/string.h>
34 #include <linux/rwsem.h>
35 #include <linux/slab.h>
36 #include <linux/ctype.h>
37 #include <linux/init.h>
38 #include <linux/poll.h>
39 #include <linux/nmi.h>
40 #include <linux/fs.h>
41 #include <linux/sched/rt.h>
42
43 #include "trace.h"
44 #include "trace_output.h"
45
46 /*
47  * On boot up, the ring buffer is set to the minimum size, so that
48  * we do not waste memory on systems that are not using tracing.
49  */
50 bool ring_buffer_expanded;
51
52 /*
53  * We need to change this state when a selftest is running.
54  * A selftest will lurk into the ring-buffer to count the
55  * entries inserted during the selftest although some concurrent
56  * insertions into the ring-buffer such as trace_printk could occurred
57  * at the same time, giving false positive or negative results.
58  */
59 static bool __read_mostly tracing_selftest_running;
60
61 /*
62  * If a tracer is running, we do not want to run SELFTEST.
63  */
64 bool __read_mostly tracing_selftest_disabled;
65
66 /* For tracers that don't implement custom flags */
67 static struct tracer_opt dummy_tracer_opt[] = {
68         { }
69 };
70
71 static struct tracer_flags dummy_tracer_flags = {
72         .val = 0,
73         .opts = dummy_tracer_opt
74 };
75
76 static int
77 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
78 {
79         return 0;
80 }
81
82 /*
83  * To prevent the comm cache from being overwritten when no
84  * tracing is active, only save the comm when a trace event
85  * occurred.
86  */
87 static DEFINE_PER_CPU(bool, trace_cmdline_save);
88
89 /*
90  * Kill all tracing for good (never come back).
91  * It is initialized to 1 but will turn to zero if the initialization
92  * of the tracer is successful. But that is the only place that sets
93  * this back to zero.
94  */
95 static int tracing_disabled = 1;
96
97 DEFINE_PER_CPU(int, ftrace_cpu_disabled);
98
99 cpumask_var_t __read_mostly     tracing_buffer_mask;
100
101 /*
102  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
103  *
104  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
105  * is set, then ftrace_dump is called. This will output the contents
106  * of the ftrace buffers to the console.  This is very useful for
107  * capturing traces that lead to crashes and outputing it to a
108  * serial console.
109  *
110  * It is default off, but you can enable it with either specifying
111  * "ftrace_dump_on_oops" in the kernel command line, or setting
112  * /proc/sys/kernel/ftrace_dump_on_oops
113  * Set 1 if you want to dump buffers of all CPUs
114  * Set 2 if you want to dump the buffer of the CPU that triggered oops
115  */
116
117 enum ftrace_dump_mode ftrace_dump_on_oops;
118
119 /* When set, tracing will stop when a WARN*() is hit */
120 int __disable_trace_on_warning;
121
122 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
123
124 #define MAX_TRACER_SIZE         100
125 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
126 static char *default_bootup_tracer;
127
128 static bool allocate_snapshot;
129
130 static int __init set_cmdline_ftrace(char *str)
131 {
132         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
133         default_bootup_tracer = bootup_tracer_buf;
134         /* We are using ftrace early, expand it */
135         ring_buffer_expanded = true;
136         return 1;
137 }
138 __setup("ftrace=", set_cmdline_ftrace);
139
140 static int __init set_ftrace_dump_on_oops(char *str)
141 {
142         if (*str++ != '=' || !*str) {
143                 ftrace_dump_on_oops = DUMP_ALL;
144                 return 1;
145         }
146
147         if (!strcmp("orig_cpu", str)) {
148                 ftrace_dump_on_oops = DUMP_ORIG;
149                 return 1;
150         }
151
152         return 0;
153 }
154 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
155
156 static int __init stop_trace_on_warning(char *str)
157 {
158         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
159                 __disable_trace_on_warning = 1;
160         return 1;
161 }
162 __setup("traceoff_on_warning", stop_trace_on_warning);
163
164 static int __init boot_alloc_snapshot(char *str)
165 {
166         allocate_snapshot = true;
167         /* We also need the main ring buffer expanded */
168         ring_buffer_expanded = true;
169         return 1;
170 }
171 __setup("alloc_snapshot", boot_alloc_snapshot);
172
173
174 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
175 static char *trace_boot_options __initdata;
176
177 static int __init set_trace_boot_options(char *str)
178 {
179         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
180         trace_boot_options = trace_boot_options_buf;
181         return 0;
182 }
183 __setup("trace_options=", set_trace_boot_options);
184
185 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
186 static char *trace_boot_clock __initdata;
187
188 static int __init set_trace_boot_clock(char *str)
189 {
190         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
191         trace_boot_clock = trace_boot_clock_buf;
192         return 0;
193 }
194 __setup("trace_clock=", set_trace_boot_clock);
195
196
197 unsigned long long ns2usecs(cycle_t nsec)
198 {
199         nsec += 500;
200         do_div(nsec, 1000);
201         return nsec;
202 }
203
204 /*
205  * The global_trace is the descriptor that holds the tracing
206  * buffers for the live tracing. For each CPU, it contains
207  * a link list of pages that will store trace entries. The
208  * page descriptor of the pages in the memory is used to hold
209  * the link list by linking the lru item in the page descriptor
210  * to each of the pages in the buffer per CPU.
211  *
212  * For each active CPU there is a data field that holds the
213  * pages for the buffer for that CPU. Each CPU has the same number
214  * of pages allocated for its buffer.
215  */
216 static struct trace_array       global_trace;
217
218 LIST_HEAD(ftrace_trace_arrays);
219
220 int trace_array_get(struct trace_array *this_tr)
221 {
222         struct trace_array *tr;
223         int ret = -ENODEV;
224
225         mutex_lock(&trace_types_lock);
226         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
227                 if (tr == this_tr) {
228                         tr->ref++;
229                         ret = 0;
230                         break;
231                 }
232         }
233         mutex_unlock(&trace_types_lock);
234
235         return ret;
236 }
237
238 static void __trace_array_put(struct trace_array *this_tr)
239 {
240         WARN_ON(!this_tr->ref);
241         this_tr->ref--;
242 }
243
244 void trace_array_put(struct trace_array *this_tr)
245 {
246         mutex_lock(&trace_types_lock);
247         __trace_array_put(this_tr);
248         mutex_unlock(&trace_types_lock);
249 }
250
251 int filter_check_discard(struct ftrace_event_file *file, void *rec,
252                          struct ring_buffer *buffer,
253                          struct ring_buffer_event *event)
254 {
255         if (unlikely(file->flags & FTRACE_EVENT_FL_FILTERED) &&
256             !filter_match_preds(file->filter, rec)) {
257                 ring_buffer_discard_commit(buffer, event);
258                 return 1;
259         }
260
261         return 0;
262 }
263 EXPORT_SYMBOL_GPL(filter_check_discard);
264
265 int call_filter_check_discard(struct ftrace_event_call *call, void *rec,
266                               struct ring_buffer *buffer,
267                               struct ring_buffer_event *event)
268 {
269         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
270             !filter_match_preds(call->filter, rec)) {
271                 ring_buffer_discard_commit(buffer, event);
272                 return 1;
273         }
274
275         return 0;
276 }
277 EXPORT_SYMBOL_GPL(call_filter_check_discard);
278
279 static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
280 {
281         u64 ts;
282
283         /* Early boot up does not have a buffer yet */
284         if (!buf->buffer)
285                 return trace_clock_local();
286
287         ts = ring_buffer_time_stamp(buf->buffer, cpu);
288         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
289
290         return ts;
291 }
292
293 cycle_t ftrace_now(int cpu)
294 {
295         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
296 }
297
298 /**
299  * tracing_is_enabled - Show if global_trace has been disabled
300  *
301  * Shows if the global trace has been enabled or not. It uses the
302  * mirror flag "buffer_disabled" to be used in fast paths such as for
303  * the irqsoff tracer. But it may be inaccurate due to races. If you
304  * need to know the accurate state, use tracing_is_on() which is a little
305  * slower, but accurate.
306  */
307 int tracing_is_enabled(void)
308 {
309         /*
310          * For quick access (irqsoff uses this in fast path), just
311          * return the mirror variable of the state of the ring buffer.
312          * It's a little racy, but we don't really care.
313          */
314         smp_rmb();
315         return !global_trace.buffer_disabled;
316 }
317
318 /*
319  * trace_buf_size is the size in bytes that is allocated
320  * for a buffer. Note, the number of bytes is always rounded
321  * to page size.
322  *
323  * This number is purposely set to a low number of 16384.
324  * If the dump on oops happens, it will be much appreciated
325  * to not have to wait for all that output. Anyway this can be
326  * boot time and run time configurable.
327  */
328 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
329
330 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
331
332 /* trace_types holds a link list of available tracers. */
333 static struct tracer            *trace_types __read_mostly;
334
335 /*
336  * trace_types_lock is used to protect the trace_types list.
337  */
338 DEFINE_MUTEX(trace_types_lock);
339
340 /*
341  * serialize the access of the ring buffer
342  *
343  * ring buffer serializes readers, but it is low level protection.
344  * The validity of the events (which returns by ring_buffer_peek() ..etc)
345  * are not protected by ring buffer.
346  *
347  * The content of events may become garbage if we allow other process consumes
348  * these events concurrently:
349  *   A) the page of the consumed events may become a normal page
350  *      (not reader page) in ring buffer, and this page will be rewrited
351  *      by events producer.
352  *   B) The page of the consumed events may become a page for splice_read,
353  *      and this page will be returned to system.
354  *
355  * These primitives allow multi process access to different cpu ring buffer
356  * concurrently.
357  *
358  * These primitives don't distinguish read-only and read-consume access.
359  * Multi read-only access are also serialized.
360  */
361
362 #ifdef CONFIG_SMP
363 static DECLARE_RWSEM(all_cpu_access_lock);
364 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
365
366 static inline void trace_access_lock(int cpu)
367 {
368         if (cpu == RING_BUFFER_ALL_CPUS) {
369                 /* gain it for accessing the whole ring buffer. */
370                 down_write(&all_cpu_access_lock);
371         } else {
372                 /* gain it for accessing a cpu ring buffer. */
373
374                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
375                 down_read(&all_cpu_access_lock);
376
377                 /* Secondly block other access to this @cpu ring buffer. */
378                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
379         }
380 }
381
382 static inline void trace_access_unlock(int cpu)
383 {
384         if (cpu == RING_BUFFER_ALL_CPUS) {
385                 up_write(&all_cpu_access_lock);
386         } else {
387                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
388                 up_read(&all_cpu_access_lock);
389         }
390 }
391
392 static inline void trace_access_lock_init(void)
393 {
394         int cpu;
395
396         for_each_possible_cpu(cpu)
397                 mutex_init(&per_cpu(cpu_access_lock, cpu));
398 }
399
400 #else
401
402 static DEFINE_MUTEX(access_lock);
403
404 static inline void trace_access_lock(int cpu)
405 {
406         (void)cpu;
407         mutex_lock(&access_lock);
408 }
409
410 static inline void trace_access_unlock(int cpu)
411 {
412         (void)cpu;
413         mutex_unlock(&access_lock);
414 }
415
416 static inline void trace_access_lock_init(void)
417 {
418 }
419
420 #endif
421
422 /* trace_flags holds trace_options default values */
423 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
424         TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
425         TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |
426         TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | TRACE_ITER_FUNCTION;
427
428 static void tracer_tracing_on(struct trace_array *tr)
429 {
430         if (tr->trace_buffer.buffer)
431                 ring_buffer_record_on(tr->trace_buffer.buffer);
432         /*
433          * This flag is looked at when buffers haven't been allocated
434          * yet, or by some tracers (like irqsoff), that just want to
435          * know if the ring buffer has been disabled, but it can handle
436          * races of where it gets disabled but we still do a record.
437          * As the check is in the fast path of the tracers, it is more
438          * important to be fast than accurate.
439          */
440         tr->buffer_disabled = 0;
441         /* Make the flag seen by readers */
442         smp_wmb();
443 }
444
445 /**
446  * tracing_on - enable tracing buffers
447  *
448  * This function enables tracing buffers that may have been
449  * disabled with tracing_off.
450  */
451 void tracing_on(void)
452 {
453         tracer_tracing_on(&global_trace);
454 }
455 EXPORT_SYMBOL_GPL(tracing_on);
456
457 /**
458  * __trace_puts - write a constant string into the trace buffer.
459  * @ip:    The address of the caller
460  * @str:   The constant string to write
461  * @size:  The size of the string.
462  */
463 int __trace_puts(unsigned long ip, const char *str, int size)
464 {
465         struct ring_buffer_event *event;
466         struct ring_buffer *buffer;
467         struct print_entry *entry;
468         unsigned long irq_flags;
469         int alloc;
470         int pc;
471
472         if (!(trace_flags & TRACE_ITER_PRINTK))
473                 return 0;
474
475         pc = preempt_count();
476
477         if (unlikely(tracing_selftest_running || tracing_disabled))
478                 return 0;
479
480         alloc = sizeof(*entry) + size + 2; /* possible \n added */
481
482         local_save_flags(irq_flags);
483         buffer = global_trace.trace_buffer.buffer;
484         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
485                                           irq_flags, pc);
486         if (!event)
487                 return 0;
488
489         entry = ring_buffer_event_data(event);
490         entry->ip = ip;
491
492         memcpy(&entry->buf, str, size);
493
494         /* Add a newline if necessary */
495         if (entry->buf[size - 1] != '\n') {
496                 entry->buf[size] = '\n';
497                 entry->buf[size + 1] = '\0';
498         } else
499                 entry->buf[size] = '\0';
500
501         __buffer_unlock_commit(buffer, event);
502         ftrace_trace_stack(buffer, irq_flags, 4, pc);
503
504         return size;
505 }
506 EXPORT_SYMBOL_GPL(__trace_puts);
507
508 /**
509  * __trace_bputs - write the pointer to a constant string into trace buffer
510  * @ip:    The address of the caller
511  * @str:   The constant string to write to the buffer to
512  */
513 int __trace_bputs(unsigned long ip, const char *str)
514 {
515         struct ring_buffer_event *event;
516         struct ring_buffer *buffer;
517         struct bputs_entry *entry;
518         unsigned long irq_flags;
519         int size = sizeof(struct bputs_entry);
520         int pc;
521
522         if (!(trace_flags & TRACE_ITER_PRINTK))
523                 return 0;
524
525         pc = preempt_count();
526
527         if (unlikely(tracing_selftest_running || tracing_disabled))
528                 return 0;
529
530         local_save_flags(irq_flags);
531         buffer = global_trace.trace_buffer.buffer;
532         event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
533                                           irq_flags, pc);
534         if (!event)
535                 return 0;
536
537         entry = ring_buffer_event_data(event);
538         entry->ip                       = ip;
539         entry->str                      = str;
540
541         __buffer_unlock_commit(buffer, event);
542         ftrace_trace_stack(buffer, irq_flags, 4, pc);
543
544         return 1;
545 }
546 EXPORT_SYMBOL_GPL(__trace_bputs);
547
548 #ifdef CONFIG_TRACER_SNAPSHOT
549 /**
550  * trace_snapshot - take a snapshot of the current buffer.
551  *
552  * This causes a swap between the snapshot buffer and the current live
553  * tracing buffer. You can use this to take snapshots of the live
554  * trace when some condition is triggered, but continue to trace.
555  *
556  * Note, make sure to allocate the snapshot with either
557  * a tracing_snapshot_alloc(), or by doing it manually
558  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
559  *
560  * If the snapshot buffer is not allocated, it will stop tracing.
561  * Basically making a permanent snapshot.
562  */
563 void tracing_snapshot(void)
564 {
565         struct trace_array *tr = &global_trace;
566         struct tracer *tracer = tr->current_trace;
567         unsigned long flags;
568
569         if (in_nmi()) {
570                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
571                 internal_trace_puts("*** snapshot is being ignored        ***\n");
572                 return;
573         }
574
575         if (!tr->allocated_snapshot) {
576                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
577                 internal_trace_puts("*** stopping trace here!   ***\n");
578                 tracing_off();
579                 return;
580         }
581
582         /* Note, snapshot can not be used when the tracer uses it */
583         if (tracer->use_max_tr) {
584                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
585                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
586                 return;
587         }
588
589         local_irq_save(flags);
590         update_max_tr(tr, current, smp_processor_id());
591         local_irq_restore(flags);
592 }
593 EXPORT_SYMBOL_GPL(tracing_snapshot);
594
595 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
596                                         struct trace_buffer *size_buf, int cpu_id);
597 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
598
599 static int alloc_snapshot(struct trace_array *tr)
600 {
601         int ret;
602
603         if (!tr->allocated_snapshot) {
604
605                 /* allocate spare buffer */
606                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
607                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
608                 if (ret < 0)
609                         return ret;
610
611                 tr->allocated_snapshot = true;
612         }
613
614         return 0;
615 }
616
617 static void free_snapshot(struct trace_array *tr)
618 {
619         /*
620          * We don't free the ring buffer. instead, resize it because
621          * The max_tr ring buffer has some state (e.g. ring->clock) and
622          * we want preserve it.
623          */
624         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
625         set_buffer_entries(&tr->max_buffer, 1);
626         tracing_reset_online_cpus(&tr->max_buffer);
627         tr->allocated_snapshot = false;
628 }
629
630 /**
631  * tracing_alloc_snapshot - allocate snapshot buffer.
632  *
633  * This only allocates the snapshot buffer if it isn't already
634  * allocated - it doesn't also take a snapshot.
635  *
636  * This is meant to be used in cases where the snapshot buffer needs
637  * to be set up for events that can't sleep but need to be able to
638  * trigger a snapshot.
639  */
640 int tracing_alloc_snapshot(void)
641 {
642         struct trace_array *tr = &global_trace;
643         int ret;
644
645         ret = alloc_snapshot(tr);
646         WARN_ON(ret < 0);
647
648         return ret;
649 }
650 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
651
652 /**
653  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
654  *
655  * This is similar to trace_snapshot(), but it will allocate the
656  * snapshot buffer if it isn't already allocated. Use this only
657  * where it is safe to sleep, as the allocation may sleep.
658  *
659  * This causes a swap between the snapshot buffer and the current live
660  * tracing buffer. You can use this to take snapshots of the live
661  * trace when some condition is triggered, but continue to trace.
662  */
663 void tracing_snapshot_alloc(void)
664 {
665         int ret;
666
667         ret = tracing_alloc_snapshot();
668         if (ret < 0)
669                 return;
670
671         tracing_snapshot();
672 }
673 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
674 #else
675 void tracing_snapshot(void)
676 {
677         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
678 }
679 EXPORT_SYMBOL_GPL(tracing_snapshot);
680 int tracing_alloc_snapshot(void)
681 {
682         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
683         return -ENODEV;
684 }
685 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
686 void tracing_snapshot_alloc(void)
687 {
688         /* Give warning */
689         tracing_snapshot();
690 }
691 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
692 #endif /* CONFIG_TRACER_SNAPSHOT */
693
694 static void tracer_tracing_off(struct trace_array *tr)
695 {
696         if (tr->trace_buffer.buffer)
697                 ring_buffer_record_off(tr->trace_buffer.buffer);
698         /*
699          * This flag is looked at when buffers haven't been allocated
700          * yet, or by some tracers (like irqsoff), that just want to
701          * know if the ring buffer has been disabled, but it can handle
702          * races of where it gets disabled but we still do a record.
703          * As the check is in the fast path of the tracers, it is more
704          * important to be fast than accurate.
705          */
706         tr->buffer_disabled = 1;
707         /* Make the flag seen by readers */
708         smp_wmb();
709 }
710
711 /**
712  * tracing_off - turn off tracing buffers
713  *
714  * This function stops the tracing buffers from recording data.
715  * It does not disable any overhead the tracers themselves may
716  * be causing. This function simply causes all recording to
717  * the ring buffers to fail.
718  */
719 void tracing_off(void)
720 {
721         tracer_tracing_off(&global_trace);
722 }
723 EXPORT_SYMBOL_GPL(tracing_off);
724
725 void disable_trace_on_warning(void)
726 {
727         if (__disable_trace_on_warning)
728                 tracing_off();
729 }
730
731 /**
732  * tracer_tracing_is_on - show real state of ring buffer enabled
733  * @tr : the trace array to know if ring buffer is enabled
734  *
735  * Shows real state of the ring buffer if it is enabled or not.
736  */
737 static int tracer_tracing_is_on(struct trace_array *tr)
738 {
739         if (tr->trace_buffer.buffer)
740                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
741         return !tr->buffer_disabled;
742 }
743
744 /**
745  * tracing_is_on - show state of ring buffers enabled
746  */
747 int tracing_is_on(void)
748 {
749         return tracer_tracing_is_on(&global_trace);
750 }
751 EXPORT_SYMBOL_GPL(tracing_is_on);
752
753 static int __init set_buf_size(char *str)
754 {
755         unsigned long buf_size;
756
757         if (!str)
758                 return 0;
759         buf_size = memparse(str, &str);
760         /* nr_entries can not be zero */
761         if (buf_size == 0)
762                 return 0;
763         trace_buf_size = buf_size;
764         return 1;
765 }
766 __setup("trace_buf_size=", set_buf_size);
767
768 static int __init set_tracing_thresh(char *str)
769 {
770         unsigned long threshold;
771         int ret;
772
773         if (!str)
774                 return 0;
775         ret = kstrtoul(str, 0, &threshold);
776         if (ret < 0)
777                 return 0;
778         tracing_thresh = threshold * 1000;
779         return 1;
780 }
781 __setup("tracing_thresh=", set_tracing_thresh);
782
783 unsigned long nsecs_to_usecs(unsigned long nsecs)
784 {
785         return nsecs / 1000;
786 }
787
788 /* These must match the bit postions in trace_iterator_flags */
789 static const char *trace_options[] = {
790         "print-parent",
791         "sym-offset",
792         "sym-addr",
793         "verbose",
794         "raw",
795         "hex",
796         "bin",
797         "block",
798         "stacktrace",
799         "trace_printk",
800         "ftrace_preempt",
801         "branch",
802         "annotate",
803         "userstacktrace",
804         "sym-userobj",
805         "printk-msg-only",
806         "context-info",
807         "latency-format",
808         "sleep-time",
809         "graph-time",
810         "record-cmd",
811         "overwrite",
812         "disable_on_free",
813         "irq-info",
814         "markers",
815         "function-trace",
816         NULL
817 };
818
819 static struct {
820         u64 (*func)(void);
821         const char *name;
822         int in_ns;              /* is this clock in nanoseconds? */
823 } trace_clocks[] = {
824         { trace_clock_local,            "local",        1 },
825         { trace_clock_global,           "global",       1 },
826         { trace_clock_counter,          "counter",      0 },
827         { trace_clock_jiffies,          "uptime",       0 },
828         { trace_clock,                  "perf",         1 },
829         { ktime_get_mono_fast_ns,       "mono",         1 },
830         ARCH_TRACE_CLOCKS
831 };
832
833 /*
834  * trace_parser_get_init - gets the buffer for trace parser
835  */
836 int trace_parser_get_init(struct trace_parser *parser, int size)
837 {
838         memset(parser, 0, sizeof(*parser));
839
840         parser->buffer = kmalloc(size, GFP_KERNEL);
841         if (!parser->buffer)
842                 return 1;
843
844         parser->size = size;
845         return 0;
846 }
847
848 /*
849  * trace_parser_put - frees the buffer for trace parser
850  */
851 void trace_parser_put(struct trace_parser *parser)
852 {
853         kfree(parser->buffer);
854 }
855
856 /*
857  * trace_get_user - reads the user input string separated by  space
858  * (matched by isspace(ch))
859  *
860  * For each string found the 'struct trace_parser' is updated,
861  * and the function returns.
862  *
863  * Returns number of bytes read.
864  *
865  * See kernel/trace/trace.h for 'struct trace_parser' details.
866  */
867 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
868         size_t cnt, loff_t *ppos)
869 {
870         char ch;
871         size_t read = 0;
872         ssize_t ret;
873
874         if (!*ppos)
875                 trace_parser_clear(parser);
876
877         ret = get_user(ch, ubuf++);
878         if (ret)
879                 goto out;
880
881         read++;
882         cnt--;
883
884         /*
885          * The parser is not finished with the last write,
886          * continue reading the user input without skipping spaces.
887          */
888         if (!parser->cont) {
889                 /* skip white space */
890                 while (cnt && isspace(ch)) {
891                         ret = get_user(ch, ubuf++);
892                         if (ret)
893                                 goto out;
894                         read++;
895                         cnt--;
896                 }
897
898                 /* only spaces were written */
899                 if (isspace(ch)) {
900                         *ppos += read;
901                         ret = read;
902                         goto out;
903                 }
904
905                 parser->idx = 0;
906         }
907
908         /* read the non-space input */
909         while (cnt && !isspace(ch)) {
910                 if (parser->idx < parser->size - 1)
911                         parser->buffer[parser->idx++] = ch;
912                 else {
913                         ret = -EINVAL;
914                         goto out;
915                 }
916                 ret = get_user(ch, ubuf++);
917                 if (ret)
918                         goto out;
919                 read++;
920                 cnt--;
921         }
922
923         /* We either got finished input or we have to wait for another call. */
924         if (isspace(ch)) {
925                 parser->buffer[parser->idx] = 0;
926                 parser->cont = false;
927         } else if (parser->idx < parser->size - 1) {
928                 parser->cont = true;
929                 parser->buffer[parser->idx++] = ch;
930         } else {
931                 ret = -EINVAL;
932                 goto out;
933         }
934
935         *ppos += read;
936         ret = read;
937
938 out:
939         return ret;
940 }
941
942 /* TODO add a seq_buf_to_buffer() */
943 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
944 {
945         int len;
946
947         if (trace_seq_used(s) <= s->seq.readpos)
948                 return -EBUSY;
949
950         len = trace_seq_used(s) - s->seq.readpos;
951         if (cnt > len)
952                 cnt = len;
953         memcpy(buf, s->buffer + s->seq.readpos, cnt);
954
955         s->seq.readpos += cnt;
956         return cnt;
957 }
958
959 unsigned long __read_mostly     tracing_thresh;
960
961 #ifdef CONFIG_TRACER_MAX_TRACE
962 /*
963  * Copy the new maximum trace into the separate maximum-trace
964  * structure. (this way the maximum trace is permanently saved,
965  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
966  */
967 static void
968 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
969 {
970         struct trace_buffer *trace_buf = &tr->trace_buffer;
971         struct trace_buffer *max_buf = &tr->max_buffer;
972         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
973         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
974
975         max_buf->cpu = cpu;
976         max_buf->time_start = data->preempt_timestamp;
977
978         max_data->saved_latency = tr->max_latency;
979         max_data->critical_start = data->critical_start;
980         max_data->critical_end = data->critical_end;
981
982         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
983         max_data->pid = tsk->pid;
984         /*
985          * If tsk == current, then use current_uid(), as that does not use
986          * RCU. The irq tracer can be called out of RCU scope.
987          */
988         if (tsk == current)
989                 max_data->uid = current_uid();
990         else
991                 max_data->uid = task_uid(tsk);
992
993         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
994         max_data->policy = tsk->policy;
995         max_data->rt_priority = tsk->rt_priority;
996
997         /* record this tasks comm */
998         tracing_record_cmdline(tsk);
999 }
1000
1001 /**
1002  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1003  * @tr: tracer
1004  * @tsk: the task with the latency
1005  * @cpu: The cpu that initiated the trace.
1006  *
1007  * Flip the buffers between the @tr and the max_tr and record information
1008  * about which task was the cause of this latency.
1009  */
1010 void
1011 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1012 {
1013         struct ring_buffer *buf;
1014
1015         if (tr->stop_count)
1016                 return;
1017
1018         WARN_ON_ONCE(!irqs_disabled());
1019
1020         if (!tr->allocated_snapshot) {
1021                 /* Only the nop tracer should hit this when disabling */
1022                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1023                 return;
1024         }
1025
1026         arch_spin_lock(&tr->max_lock);
1027
1028         buf = tr->trace_buffer.buffer;
1029         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1030         tr->max_buffer.buffer = buf;
1031
1032         __update_max_tr(tr, tsk, cpu);
1033         arch_spin_unlock(&tr->max_lock);
1034 }
1035
1036 /**
1037  * update_max_tr_single - only copy one trace over, and reset the rest
1038  * @tr - tracer
1039  * @tsk - task with the latency
1040  * @cpu - the cpu of the buffer to copy.
1041  *
1042  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1043  */
1044 void
1045 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1046 {
1047         int ret;
1048
1049         if (tr->stop_count)
1050                 return;
1051
1052         WARN_ON_ONCE(!irqs_disabled());
1053         if (!tr->allocated_snapshot) {
1054                 /* Only the nop tracer should hit this when disabling */
1055                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1056                 return;
1057         }
1058
1059         arch_spin_lock(&tr->max_lock);
1060
1061         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1062
1063         if (ret == -EBUSY) {
1064                 /*
1065                  * We failed to swap the buffer due to a commit taking
1066                  * place on this CPU. We fail to record, but we reset
1067                  * the max trace buffer (no one writes directly to it)
1068                  * and flag that it failed.
1069                  */
1070                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1071                         "Failed to swap buffers due to commit in progress\n");
1072         }
1073
1074         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1075
1076         __update_max_tr(tr, tsk, cpu);
1077         arch_spin_unlock(&tr->max_lock);
1078 }
1079 #endif /* CONFIG_TRACER_MAX_TRACE */
1080
1081 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1082 {
1083         /* Iterators are static, they should be filled or empty */
1084         if (trace_buffer_iter(iter, iter->cpu_file))
1085                 return 0;
1086
1087         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1088                                 full);
1089 }
1090
1091 #ifdef CONFIG_FTRACE_STARTUP_TEST
1092 static int run_tracer_selftest(struct tracer *type)
1093 {
1094         struct trace_array *tr = &global_trace;
1095         struct tracer *saved_tracer = tr->current_trace;
1096         int ret;
1097
1098         if (!type->selftest || tracing_selftest_disabled)
1099                 return 0;
1100
1101         /*
1102          * Run a selftest on this tracer.
1103          * Here we reset the trace buffer, and set the current
1104          * tracer to be this tracer. The tracer can then run some
1105          * internal tracing to verify that everything is in order.
1106          * If we fail, we do not register this tracer.
1107          */
1108         tracing_reset_online_cpus(&tr->trace_buffer);
1109
1110         tr->current_trace = type;
1111
1112 #ifdef CONFIG_TRACER_MAX_TRACE
1113         if (type->use_max_tr) {
1114                 /* If we expanded the buffers, make sure the max is expanded too */
1115                 if (ring_buffer_expanded)
1116                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1117                                            RING_BUFFER_ALL_CPUS);
1118                 tr->allocated_snapshot = true;
1119         }
1120 #endif
1121
1122         /* the test is responsible for initializing and enabling */
1123         pr_info("Testing tracer %s: ", type->name);
1124         ret = type->selftest(type, tr);
1125         /* the test is responsible for resetting too */
1126         tr->current_trace = saved_tracer;
1127         if (ret) {
1128                 printk(KERN_CONT "FAILED!\n");
1129                 /* Add the warning after printing 'FAILED' */
1130                 WARN_ON(1);
1131                 return -1;
1132         }
1133         /* Only reset on passing, to avoid touching corrupted buffers */
1134         tracing_reset_online_cpus(&tr->trace_buffer);
1135
1136 #ifdef CONFIG_TRACER_MAX_TRACE
1137         if (type->use_max_tr) {
1138                 tr->allocated_snapshot = false;
1139
1140                 /* Shrink the max buffer again */
1141                 if (ring_buffer_expanded)
1142                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1143                                            RING_BUFFER_ALL_CPUS);
1144         }
1145 #endif
1146
1147         printk(KERN_CONT "PASSED\n");
1148         return 0;
1149 }
1150 #else
1151 static inline int run_tracer_selftest(struct tracer *type)
1152 {
1153         return 0;
1154 }
1155 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1156
1157 /**
1158  * register_tracer - register a tracer with the ftrace system.
1159  * @type - the plugin for the tracer
1160  *
1161  * Register a new plugin tracer.
1162  */
1163 int register_tracer(struct tracer *type)
1164 {
1165         struct tracer *t;
1166         int ret = 0;
1167
1168         if (!type->name) {
1169                 pr_info("Tracer must have a name\n");
1170                 return -1;
1171         }
1172
1173         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1174                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1175                 return -1;
1176         }
1177
1178         mutex_lock(&trace_types_lock);
1179
1180         tracing_selftest_running = true;
1181
1182         for (t = trace_types; t; t = t->next) {
1183                 if (strcmp(type->name, t->name) == 0) {
1184                         /* already found */
1185                         pr_info("Tracer %s already registered\n",
1186                                 type->name);
1187                         ret = -1;
1188                         goto out;
1189                 }
1190         }
1191
1192         if (!type->set_flag)
1193                 type->set_flag = &dummy_set_flag;
1194         if (!type->flags)
1195                 type->flags = &dummy_tracer_flags;
1196         else
1197                 if (!type->flags->opts)
1198                         type->flags->opts = dummy_tracer_opt;
1199
1200         ret = run_tracer_selftest(type);
1201         if (ret < 0)
1202                 goto out;
1203
1204         type->next = trace_types;
1205         trace_types = type;
1206
1207  out:
1208         tracing_selftest_running = false;
1209         mutex_unlock(&trace_types_lock);
1210
1211         if (ret || !default_bootup_tracer)
1212                 goto out_unlock;
1213
1214         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1215                 goto out_unlock;
1216
1217         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1218         /* Do we want this tracer to start on bootup? */
1219         tracing_set_tracer(&global_trace, type->name);
1220         default_bootup_tracer = NULL;
1221         /* disable other selftests, since this will break it. */
1222         tracing_selftest_disabled = true;
1223 #ifdef CONFIG_FTRACE_STARTUP_TEST
1224         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1225                type->name);
1226 #endif
1227
1228  out_unlock:
1229         return ret;
1230 }
1231
1232 void tracing_reset(struct trace_buffer *buf, int cpu)
1233 {
1234         struct ring_buffer *buffer = buf->buffer;
1235
1236         if (!buffer)
1237                 return;
1238
1239         ring_buffer_record_disable(buffer);
1240
1241         /* Make sure all commits have finished */
1242         synchronize_sched();
1243         ring_buffer_reset_cpu(buffer, cpu);
1244
1245         ring_buffer_record_enable(buffer);
1246 }
1247
1248 void tracing_reset_online_cpus(struct trace_buffer *buf)
1249 {
1250         struct ring_buffer *buffer = buf->buffer;
1251         int cpu;
1252
1253         if (!buffer)
1254                 return;
1255
1256         ring_buffer_record_disable(buffer);
1257
1258         /* Make sure all commits have finished */
1259         synchronize_sched();
1260
1261         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1262
1263         for_each_online_cpu(cpu)
1264                 ring_buffer_reset_cpu(buffer, cpu);
1265
1266         ring_buffer_record_enable(buffer);
1267 }
1268
1269 /* Must have trace_types_lock held */
1270 void tracing_reset_all_online_cpus(void)
1271 {
1272         struct trace_array *tr;
1273
1274         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1275                 tracing_reset_online_cpus(&tr->trace_buffer);
1276 #ifdef CONFIG_TRACER_MAX_TRACE
1277                 tracing_reset_online_cpus(&tr->max_buffer);
1278 #endif
1279         }
1280 }
1281
1282 #define SAVED_CMDLINES_DEFAULT 128
1283 #define NO_CMDLINE_MAP UINT_MAX
1284 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1285 struct saved_cmdlines_buffer {
1286         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1287         unsigned *map_cmdline_to_pid;
1288         unsigned cmdline_num;
1289         int cmdline_idx;
1290         char *saved_cmdlines;
1291 };
1292 static struct saved_cmdlines_buffer *savedcmd;
1293
1294 /* temporary disable recording */
1295 static atomic_t trace_record_cmdline_disabled __read_mostly;
1296
1297 static inline char *get_saved_cmdlines(int idx)
1298 {
1299         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1300 }
1301
1302 static inline void set_cmdline(int idx, const char *cmdline)
1303 {
1304         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1305 }
1306
1307 static int allocate_cmdlines_buffer(unsigned int val,
1308                                     struct saved_cmdlines_buffer *s)
1309 {
1310         s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1311                                         GFP_KERNEL);
1312         if (!s->map_cmdline_to_pid)
1313                 return -ENOMEM;
1314
1315         s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1316         if (!s->saved_cmdlines) {
1317                 kfree(s->map_cmdline_to_pid);
1318                 return -ENOMEM;
1319         }
1320
1321         s->cmdline_idx = 0;
1322         s->cmdline_num = val;
1323         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1324                sizeof(s->map_pid_to_cmdline));
1325         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1326                val * sizeof(*s->map_cmdline_to_pid));
1327
1328         return 0;
1329 }
1330
1331 static int trace_create_savedcmd(void)
1332 {
1333         int ret;
1334
1335         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1336         if (!savedcmd)
1337                 return -ENOMEM;
1338
1339         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1340         if (ret < 0) {
1341                 kfree(savedcmd);
1342                 savedcmd = NULL;
1343                 return -ENOMEM;
1344         }
1345
1346         return 0;
1347 }
1348
1349 int is_tracing_stopped(void)
1350 {
1351         return global_trace.stop_count;
1352 }
1353
1354 /**
1355  * tracing_start - quick start of the tracer
1356  *
1357  * If tracing is enabled but was stopped by tracing_stop,
1358  * this will start the tracer back up.
1359  */
1360 void tracing_start(void)
1361 {
1362         struct ring_buffer *buffer;
1363         unsigned long flags;
1364
1365         if (tracing_disabled)
1366                 return;
1367
1368         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1369         if (--global_trace.stop_count) {
1370                 if (global_trace.stop_count < 0) {
1371                         /* Someone screwed up their debugging */
1372                         WARN_ON_ONCE(1);
1373                         global_trace.stop_count = 0;
1374                 }
1375                 goto out;
1376         }
1377
1378         /* Prevent the buffers from switching */
1379         arch_spin_lock(&global_trace.max_lock);
1380
1381         buffer = global_trace.trace_buffer.buffer;
1382         if (buffer)
1383                 ring_buffer_record_enable(buffer);
1384
1385 #ifdef CONFIG_TRACER_MAX_TRACE
1386         buffer = global_trace.max_buffer.buffer;
1387         if (buffer)
1388                 ring_buffer_record_enable(buffer);
1389 #endif
1390
1391         arch_spin_unlock(&global_trace.max_lock);
1392
1393  out:
1394         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1395 }
1396
1397 static void tracing_start_tr(struct trace_array *tr)
1398 {
1399         struct ring_buffer *buffer;
1400         unsigned long flags;
1401
1402         if (tracing_disabled)
1403                 return;
1404
1405         /* If global, we need to also start the max tracer */
1406         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1407                 return tracing_start();
1408
1409         raw_spin_lock_irqsave(&tr->start_lock, flags);
1410
1411         if (--tr->stop_count) {
1412                 if (tr->stop_count < 0) {
1413                         /* Someone screwed up their debugging */
1414                         WARN_ON_ONCE(1);
1415                         tr->stop_count = 0;
1416                 }
1417                 goto out;
1418         }
1419
1420         buffer = tr->trace_buffer.buffer;
1421         if (buffer)
1422                 ring_buffer_record_enable(buffer);
1423
1424  out:
1425         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1426 }
1427
1428 /**
1429  * tracing_stop - quick stop of the tracer
1430  *
1431  * Light weight way to stop tracing. Use in conjunction with
1432  * tracing_start.
1433  */
1434 void tracing_stop(void)
1435 {
1436         struct ring_buffer *buffer;
1437         unsigned long flags;
1438
1439         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1440         if (global_trace.stop_count++)
1441                 goto out;
1442
1443         /* Prevent the buffers from switching */
1444         arch_spin_lock(&global_trace.max_lock);
1445
1446         buffer = global_trace.trace_buffer.buffer;
1447         if (buffer)
1448                 ring_buffer_record_disable(buffer);
1449
1450 #ifdef CONFIG_TRACER_MAX_TRACE
1451         buffer = global_trace.max_buffer.buffer;
1452         if (buffer)
1453                 ring_buffer_record_disable(buffer);
1454 #endif
1455
1456         arch_spin_unlock(&global_trace.max_lock);
1457
1458  out:
1459         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1460 }
1461
1462 static void tracing_stop_tr(struct trace_array *tr)
1463 {
1464         struct ring_buffer *buffer;
1465         unsigned long flags;
1466
1467         /* If global, we need to also stop the max tracer */
1468         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1469                 return tracing_stop();
1470
1471         raw_spin_lock_irqsave(&tr->start_lock, flags);
1472         if (tr->stop_count++)
1473                 goto out;
1474
1475         buffer = tr->trace_buffer.buffer;
1476         if (buffer)
1477                 ring_buffer_record_disable(buffer);
1478
1479  out:
1480         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1481 }
1482
1483 void trace_stop_cmdline_recording(void);
1484
1485 static int trace_save_cmdline(struct task_struct *tsk)
1486 {
1487         unsigned pid, idx;
1488
1489         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1490                 return 0;
1491
1492         /*
1493          * It's not the end of the world if we don't get
1494          * the lock, but we also don't want to spin
1495          * nor do we want to disable interrupts,
1496          * so if we miss here, then better luck next time.
1497          */
1498         if (!arch_spin_trylock(&trace_cmdline_lock))
1499                 return 0;
1500
1501         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1502         if (idx == NO_CMDLINE_MAP) {
1503                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1504
1505                 /*
1506                  * Check whether the cmdline buffer at idx has a pid
1507                  * mapped. We are going to overwrite that entry so we
1508                  * need to clear the map_pid_to_cmdline. Otherwise we
1509                  * would read the new comm for the old pid.
1510                  */
1511                 pid = savedcmd->map_cmdline_to_pid[idx];
1512                 if (pid != NO_CMDLINE_MAP)
1513                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1514
1515                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1516                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1517
1518                 savedcmd->cmdline_idx = idx;
1519         }
1520
1521         set_cmdline(idx, tsk->comm);
1522
1523         arch_spin_unlock(&trace_cmdline_lock);
1524
1525         return 1;
1526 }
1527
1528 static void __trace_find_cmdline(int pid, char comm[])
1529 {
1530         unsigned map;
1531
1532         if (!pid) {
1533                 strcpy(comm, "<idle>");
1534                 return;
1535         }
1536
1537         if (WARN_ON_ONCE(pid < 0)) {
1538                 strcpy(comm, "<XXX>");
1539                 return;
1540         }
1541
1542         if (pid > PID_MAX_DEFAULT) {
1543                 strcpy(comm, "<...>");
1544                 return;
1545         }
1546
1547         map = savedcmd->map_pid_to_cmdline[pid];
1548         if (map != NO_CMDLINE_MAP)
1549                 strcpy(comm, get_saved_cmdlines(map));
1550         else
1551                 strcpy(comm, "<...>");
1552 }
1553
1554 void trace_find_cmdline(int pid, char comm[])
1555 {
1556         preempt_disable();
1557         arch_spin_lock(&trace_cmdline_lock);
1558
1559         __trace_find_cmdline(pid, comm);
1560
1561         arch_spin_unlock(&trace_cmdline_lock);
1562         preempt_enable();
1563 }
1564
1565 void tracing_record_cmdline(struct task_struct *tsk)
1566 {
1567         if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1568                 return;
1569
1570         if (!__this_cpu_read(trace_cmdline_save))
1571                 return;
1572
1573         if (trace_save_cmdline(tsk))
1574                 __this_cpu_write(trace_cmdline_save, false);
1575 }
1576
1577 void
1578 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1579                              int pc)
1580 {
1581         struct task_struct *tsk = current;
1582
1583         entry->preempt_count            = pc & 0xff;
1584         entry->pid                      = (tsk) ? tsk->pid : 0;
1585         entry->flags =
1586 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1587                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1588 #else
1589                 TRACE_FLAG_IRQS_NOSUPPORT |
1590 #endif
1591                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1592                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1593                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1594                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1595 }
1596 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1597
1598 struct ring_buffer_event *
1599 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1600                           int type,
1601                           unsigned long len,
1602                           unsigned long flags, int pc)
1603 {
1604         struct ring_buffer_event *event;
1605
1606         event = ring_buffer_lock_reserve(buffer, len);
1607         if (event != NULL) {
1608                 struct trace_entry *ent = ring_buffer_event_data(event);
1609
1610                 tracing_generic_entry_update(ent, flags, pc);
1611                 ent->type = type;
1612         }
1613
1614         return event;
1615 }
1616
1617 void
1618 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
1619 {
1620         __this_cpu_write(trace_cmdline_save, true);
1621         ring_buffer_unlock_commit(buffer, event);
1622 }
1623
1624 static inline void
1625 __trace_buffer_unlock_commit(struct ring_buffer *buffer,
1626                              struct ring_buffer_event *event,
1627                              unsigned long flags, int pc)
1628 {
1629         __buffer_unlock_commit(buffer, event);
1630
1631         ftrace_trace_stack(buffer, flags, 6, pc);
1632         ftrace_trace_userstack(buffer, flags, pc);
1633 }
1634
1635 void trace_buffer_unlock_commit(struct ring_buffer *buffer,
1636                                 struct ring_buffer_event *event,
1637                                 unsigned long flags, int pc)
1638 {
1639         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1640 }
1641 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
1642
1643 static struct ring_buffer *temp_buffer;
1644
1645 struct ring_buffer_event *
1646 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
1647                           struct ftrace_event_file *ftrace_file,
1648                           int type, unsigned long len,
1649                           unsigned long flags, int pc)
1650 {
1651         struct ring_buffer_event *entry;
1652
1653         *current_rb = ftrace_file->tr->trace_buffer.buffer;
1654         entry = trace_buffer_lock_reserve(*current_rb,
1655                                          type, len, flags, pc);
1656         /*
1657          * If tracing is off, but we have triggers enabled
1658          * we still need to look at the event data. Use the temp_buffer
1659          * to store the trace event for the tigger to use. It's recusive
1660          * safe and will not be recorded anywhere.
1661          */
1662         if (!entry && ftrace_file->flags & FTRACE_EVENT_FL_TRIGGER_COND) {
1663                 *current_rb = temp_buffer;
1664                 entry = trace_buffer_lock_reserve(*current_rb,
1665                                                   type, len, flags, pc);
1666         }
1667         return entry;
1668 }
1669 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
1670
1671 struct ring_buffer_event *
1672 trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
1673                                   int type, unsigned long len,
1674                                   unsigned long flags, int pc)
1675 {
1676         *current_rb = global_trace.trace_buffer.buffer;
1677         return trace_buffer_lock_reserve(*current_rb,
1678                                          type, len, flags, pc);
1679 }
1680 EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
1681
1682 void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
1683                                         struct ring_buffer_event *event,
1684                                         unsigned long flags, int pc)
1685 {
1686         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1687 }
1688 EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
1689
1690 void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer,
1691                                      struct ring_buffer_event *event,
1692                                      unsigned long flags, int pc,
1693                                      struct pt_regs *regs)
1694 {
1695         __buffer_unlock_commit(buffer, event);
1696
1697         ftrace_trace_stack_regs(buffer, flags, 0, pc, regs);
1698         ftrace_trace_userstack(buffer, flags, pc);
1699 }
1700 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
1701
1702 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
1703                                          struct ring_buffer_event *event)
1704 {
1705         ring_buffer_discard_commit(buffer, event);
1706 }
1707 EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
1708
1709 void
1710 trace_function(struct trace_array *tr,
1711                unsigned long ip, unsigned long parent_ip, unsigned long flags,
1712                int pc)
1713 {
1714         struct ftrace_event_call *call = &event_function;
1715         struct ring_buffer *buffer = tr->trace_buffer.buffer;
1716         struct ring_buffer_event *event;
1717         struct ftrace_entry *entry;
1718
1719         /* If we are reading the ring buffer, don't trace */
1720         if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
1721                 return;
1722
1723         event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
1724                                           flags, pc);
1725         if (!event)
1726                 return;
1727         entry   = ring_buffer_event_data(event);
1728         entry->ip                       = ip;
1729         entry->parent_ip                = parent_ip;
1730
1731         if (!call_filter_check_discard(call, entry, buffer, event))
1732                 __buffer_unlock_commit(buffer, event);
1733 }
1734
1735 #ifdef CONFIG_STACKTRACE
1736
1737 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
1738 struct ftrace_stack {
1739         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
1740 };
1741
1742 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
1743 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
1744
1745 static void __ftrace_trace_stack(struct ring_buffer *buffer,
1746                                  unsigned long flags,
1747                                  int skip, int pc, struct pt_regs *regs)
1748 {
1749         struct ftrace_event_call *call = &event_kernel_stack;
1750         struct ring_buffer_event *event;
1751         struct stack_entry *entry;
1752         struct stack_trace trace;
1753         int use_stack;
1754         int size = FTRACE_STACK_ENTRIES;
1755
1756         trace.nr_entries        = 0;
1757         trace.skip              = skip;
1758
1759         /*
1760          * Since events can happen in NMIs there's no safe way to
1761          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
1762          * or NMI comes in, it will just have to use the default
1763          * FTRACE_STACK_SIZE.
1764          */
1765         preempt_disable_notrace();
1766
1767         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
1768         /*
1769          * We don't need any atomic variables, just a barrier.
1770          * If an interrupt comes in, we don't care, because it would
1771          * have exited and put the counter back to what we want.
1772          * We just need a barrier to keep gcc from moving things
1773          * around.
1774          */
1775         barrier();
1776         if (use_stack == 1) {
1777                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
1778                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
1779
1780                 if (regs)
1781                         save_stack_trace_regs(regs, &trace);
1782                 else
1783                         save_stack_trace(&trace);
1784
1785                 if (trace.nr_entries > size)
1786                         size = trace.nr_entries;
1787         } else
1788                 /* From now on, use_stack is a boolean */
1789                 use_stack = 0;
1790
1791         size *= sizeof(unsigned long);
1792
1793         event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1794                                           sizeof(*entry) + size, flags, pc);
1795         if (!event)
1796                 goto out;
1797         entry = ring_buffer_event_data(event);
1798
1799         memset(&entry->caller, 0, size);
1800
1801         if (use_stack)
1802                 memcpy(&entry->caller, trace.entries,
1803                        trace.nr_entries * sizeof(unsigned long));
1804         else {
1805                 trace.max_entries       = FTRACE_STACK_ENTRIES;
1806                 trace.entries           = entry->caller;
1807                 if (regs)
1808                         save_stack_trace_regs(regs, &trace);
1809                 else
1810                         save_stack_trace(&trace);
1811         }
1812
1813         entry->size = trace.nr_entries;
1814
1815         if (!call_filter_check_discard(call, entry, buffer, event))
1816                 __buffer_unlock_commit(buffer, event);
1817
1818  out:
1819         /* Again, don't let gcc optimize things here */
1820         barrier();
1821         __this_cpu_dec(ftrace_stack_reserve);
1822         preempt_enable_notrace();
1823
1824 }
1825
1826 void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags,
1827                              int skip, int pc, struct pt_regs *regs)
1828 {
1829         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1830                 return;
1831
1832         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
1833 }
1834
1835 void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
1836                         int skip, int pc)
1837 {
1838         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1839                 return;
1840
1841         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
1842 }
1843
1844 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1845                    int pc)
1846 {
1847         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
1848 }
1849
1850 /**
1851  * trace_dump_stack - record a stack back trace in the trace buffer
1852  * @skip: Number of functions to skip (helper handlers)
1853  */
1854 void trace_dump_stack(int skip)
1855 {
1856         unsigned long flags;
1857
1858         if (tracing_disabled || tracing_selftest_running)
1859                 return;
1860
1861         local_save_flags(flags);
1862
1863         /*
1864          * Skip 3 more, seems to get us at the caller of
1865          * this function.
1866          */
1867         skip += 3;
1868         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
1869                              flags, skip, preempt_count(), NULL);
1870 }
1871
1872 static DEFINE_PER_CPU(int, user_stack_count);
1873
1874 void
1875 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1876 {
1877         struct ftrace_event_call *call = &event_user_stack;
1878         struct ring_buffer_event *event;
1879         struct userstack_entry *entry;
1880         struct stack_trace trace;
1881
1882         if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1883                 return;
1884
1885         /*
1886          * NMIs can not handle page faults, even with fix ups.
1887          * The save user stack can (and often does) fault.
1888          */
1889         if (unlikely(in_nmi()))
1890                 return;
1891
1892         /*
1893          * prevent recursion, since the user stack tracing may
1894          * trigger other kernel events.
1895          */
1896         preempt_disable();
1897         if (__this_cpu_read(user_stack_count))
1898                 goto out;
1899
1900         __this_cpu_inc(user_stack_count);
1901
1902         event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1903                                           sizeof(*entry), flags, pc);
1904         if (!event)
1905                 goto out_drop_count;
1906         entry   = ring_buffer_event_data(event);
1907
1908         entry->tgid             = current->tgid;
1909         memset(&entry->caller, 0, sizeof(entry->caller));
1910
1911         trace.nr_entries        = 0;
1912         trace.max_entries       = FTRACE_STACK_ENTRIES;
1913         trace.skip              = 0;
1914         trace.entries           = entry->caller;
1915
1916         save_stack_trace_user(&trace);
1917         if (!call_filter_check_discard(call, entry, buffer, event))
1918                 __buffer_unlock_commit(buffer, event);
1919
1920  out_drop_count:
1921         __this_cpu_dec(user_stack_count);
1922  out:
1923         preempt_enable();
1924 }
1925
1926 #ifdef UNUSED
1927 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1928 {
1929         ftrace_trace_userstack(tr, flags, preempt_count());
1930 }
1931 #endif /* UNUSED */
1932
1933 #endif /* CONFIG_STACKTRACE */
1934
1935 /* created for use with alloc_percpu */
1936 struct trace_buffer_struct {
1937         char buffer[TRACE_BUF_SIZE];
1938 };
1939
1940 static struct trace_buffer_struct *trace_percpu_buffer;
1941 static struct trace_buffer_struct *trace_percpu_sirq_buffer;
1942 static struct trace_buffer_struct *trace_percpu_irq_buffer;
1943 static struct trace_buffer_struct *trace_percpu_nmi_buffer;
1944
1945 /*
1946  * The buffer used is dependent on the context. There is a per cpu
1947  * buffer for normal context, softirq contex, hard irq context and
1948  * for NMI context. Thise allows for lockless recording.
1949  *
1950  * Note, if the buffers failed to be allocated, then this returns NULL
1951  */
1952 static char *get_trace_buf(void)
1953 {
1954         struct trace_buffer_struct *percpu_buffer;
1955
1956         /*
1957          * If we have allocated per cpu buffers, then we do not
1958          * need to do any locking.
1959          */
1960         if (in_nmi())
1961                 percpu_buffer = trace_percpu_nmi_buffer;
1962         else if (in_irq())
1963                 percpu_buffer = trace_percpu_irq_buffer;
1964         else if (in_softirq())
1965                 percpu_buffer = trace_percpu_sirq_buffer;
1966         else
1967                 percpu_buffer = trace_percpu_buffer;
1968
1969         if (!percpu_buffer)
1970                 return NULL;
1971
1972         return this_cpu_ptr(&percpu_buffer->buffer[0]);
1973 }
1974
1975 static int alloc_percpu_trace_buffer(void)
1976 {
1977         struct trace_buffer_struct *buffers;
1978         struct trace_buffer_struct *sirq_buffers;
1979         struct trace_buffer_struct *irq_buffers;
1980         struct trace_buffer_struct *nmi_buffers;
1981
1982         buffers = alloc_percpu(struct trace_buffer_struct);
1983         if (!buffers)
1984                 goto err_warn;
1985
1986         sirq_buffers = alloc_percpu(struct trace_buffer_struct);
1987         if (!sirq_buffers)
1988                 goto err_sirq;
1989
1990         irq_buffers = alloc_percpu(struct trace_buffer_struct);
1991         if (!irq_buffers)
1992                 goto err_irq;
1993
1994         nmi_buffers = alloc_percpu(struct trace_buffer_struct);
1995         if (!nmi_buffers)
1996                 goto err_nmi;
1997
1998         trace_percpu_buffer = buffers;
1999         trace_percpu_sirq_buffer = sirq_buffers;
2000         trace_percpu_irq_buffer = irq_buffers;
2001         trace_percpu_nmi_buffer = nmi_buffers;
2002
2003         return 0;
2004
2005  err_nmi:
2006         free_percpu(irq_buffers);
2007  err_irq:
2008         free_percpu(sirq_buffers);
2009  err_sirq:
2010         free_percpu(buffers);
2011  err_warn:
2012         WARN(1, "Could not allocate percpu trace_printk buffer");
2013         return -ENOMEM;
2014 }
2015
2016 static int buffers_allocated;
2017
2018 void trace_printk_init_buffers(void)
2019 {
2020         if (buffers_allocated)
2021                 return;
2022
2023         if (alloc_percpu_trace_buffer())
2024                 return;
2025
2026         /* trace_printk() is for debug use only. Don't use it in production. */
2027
2028         pr_warning("\n**********************************************************\n");
2029         pr_warning("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2030         pr_warning("**                                                      **\n");
2031         pr_warning("** trace_printk() being used. Allocating extra memory.  **\n");
2032         pr_warning("**                                                      **\n");
2033         pr_warning("** This means that this is a DEBUG kernel and it is     **\n");
2034         pr_warning("** unsafe for produciton use.                           **\n");
2035         pr_warning("**                                                      **\n");
2036         pr_warning("** If you see this message and you are not debugging    **\n");
2037         pr_warning("** the kernel, report this immediately to your vendor!  **\n");
2038         pr_warning("**                                                      **\n");
2039         pr_warning("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2040         pr_warning("**********************************************************\n");
2041
2042         /* Expand the buffers to set size */
2043         tracing_update_buffers();
2044
2045         buffers_allocated = 1;
2046
2047         /*
2048          * trace_printk_init_buffers() can be called by modules.
2049          * If that happens, then we need to start cmdline recording
2050          * directly here. If the global_trace.buffer is already
2051          * allocated here, then this was called by module code.
2052          */
2053         if (global_trace.trace_buffer.buffer)
2054                 tracing_start_cmdline_record();
2055 }
2056
2057 void trace_printk_start_comm(void)
2058 {
2059         /* Start tracing comms if trace printk is set */
2060         if (!buffers_allocated)
2061                 return;
2062         tracing_start_cmdline_record();
2063 }
2064
2065 static void trace_printk_start_stop_comm(int enabled)
2066 {
2067         if (!buffers_allocated)
2068                 return;
2069
2070         if (enabled)
2071                 tracing_start_cmdline_record();
2072         else
2073                 tracing_stop_cmdline_record();
2074 }
2075
2076 /**
2077  * trace_vbprintk - write binary msg to tracing buffer
2078  *
2079  */
2080 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2081 {
2082         struct ftrace_event_call *call = &event_bprint;
2083         struct ring_buffer_event *event;
2084         struct ring_buffer *buffer;
2085         struct trace_array *tr = &global_trace;
2086         struct bprint_entry *entry;
2087         unsigned long flags;
2088         char *tbuffer;
2089         int len = 0, size, pc;
2090
2091         if (unlikely(tracing_selftest_running || tracing_disabled))
2092                 return 0;
2093
2094         /* Don't pollute graph traces with trace_vprintk internals */
2095         pause_graph_tracing();
2096
2097         pc = preempt_count();
2098         preempt_disable_notrace();
2099
2100         tbuffer = get_trace_buf();
2101         if (!tbuffer) {
2102                 len = 0;
2103                 goto out;
2104         }
2105
2106         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2107
2108         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2109                 goto out;
2110
2111         local_save_flags(flags);
2112         size = sizeof(*entry) + sizeof(u32) * len;
2113         buffer = tr->trace_buffer.buffer;
2114         event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2115                                           flags, pc);
2116         if (!event)
2117                 goto out;
2118         entry = ring_buffer_event_data(event);
2119         entry->ip                       = ip;
2120         entry->fmt                      = fmt;
2121
2122         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2123         if (!call_filter_check_discard(call, entry, buffer, event)) {
2124                 __buffer_unlock_commit(buffer, event);
2125                 ftrace_trace_stack(buffer, flags, 6, pc);
2126         }
2127
2128 out:
2129         preempt_enable_notrace();
2130         unpause_graph_tracing();
2131
2132         return len;
2133 }
2134 EXPORT_SYMBOL_GPL(trace_vbprintk);
2135
2136 static int
2137 __trace_array_vprintk(struct ring_buffer *buffer,
2138                       unsigned long ip, const char *fmt, va_list args)
2139 {
2140         struct ftrace_event_call *call = &event_print;
2141         struct ring_buffer_event *event;
2142         int len = 0, size, pc;
2143         struct print_entry *entry;
2144         unsigned long flags;
2145         char *tbuffer;
2146
2147         if (tracing_disabled || tracing_selftest_running)
2148                 return 0;
2149
2150         /* Don't pollute graph traces with trace_vprintk internals */
2151         pause_graph_tracing();
2152
2153         pc = preempt_count();
2154         preempt_disable_notrace();
2155
2156
2157         tbuffer = get_trace_buf();
2158         if (!tbuffer) {
2159                 len = 0;
2160                 goto out;
2161         }
2162
2163         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2164
2165         local_save_flags(flags);
2166         size = sizeof(*entry) + len + 1;
2167         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2168                                           flags, pc);
2169         if (!event)
2170                 goto out;
2171         entry = ring_buffer_event_data(event);
2172         entry->ip = ip;
2173
2174         memcpy(&entry->buf, tbuffer, len + 1);
2175         if (!call_filter_check_discard(call, entry, buffer, event)) {
2176                 __buffer_unlock_commit(buffer, event);
2177                 ftrace_trace_stack(buffer, flags, 6, pc);
2178         }
2179  out:
2180         preempt_enable_notrace();
2181         unpause_graph_tracing();
2182
2183         return len;
2184 }
2185
2186 int trace_array_vprintk(struct trace_array *tr,
2187                         unsigned long ip, const char *fmt, va_list args)
2188 {
2189         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2190 }
2191
2192 int trace_array_printk(struct trace_array *tr,
2193                        unsigned long ip, const char *fmt, ...)
2194 {
2195         int ret;
2196         va_list ap;
2197
2198         if (!(trace_flags & TRACE_ITER_PRINTK))
2199                 return 0;
2200
2201         va_start(ap, fmt);
2202         ret = trace_array_vprintk(tr, ip, fmt, ap);
2203         va_end(ap);
2204         return ret;
2205 }
2206
2207 int trace_array_printk_buf(struct ring_buffer *buffer,
2208                            unsigned long ip, const char *fmt, ...)
2209 {
2210         int ret;
2211         va_list ap;
2212
2213         if (!(trace_flags & TRACE_ITER_PRINTK))
2214                 return 0;
2215
2216         va_start(ap, fmt);
2217         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2218         va_end(ap);
2219         return ret;
2220 }
2221
2222 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2223 {
2224         return trace_array_vprintk(&global_trace, ip, fmt, args);
2225 }
2226 EXPORT_SYMBOL_GPL(trace_vprintk);
2227
2228 static void trace_iterator_increment(struct trace_iterator *iter)
2229 {
2230         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2231
2232         iter->idx++;
2233         if (buf_iter)
2234                 ring_buffer_read(buf_iter, NULL);
2235 }
2236
2237 static struct trace_entry *
2238 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2239                 unsigned long *lost_events)
2240 {
2241         struct ring_buffer_event *event;
2242         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2243
2244         if (buf_iter)
2245                 event = ring_buffer_iter_peek(buf_iter, ts);
2246         else
2247                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2248                                          lost_events);
2249
2250         if (event) {
2251                 iter->ent_size = ring_buffer_event_length(event);
2252                 return ring_buffer_event_data(event);
2253         }
2254         iter->ent_size = 0;
2255         return NULL;
2256 }
2257
2258 static struct trace_entry *
2259 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2260                   unsigned long *missing_events, u64 *ent_ts)
2261 {
2262         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2263         struct trace_entry *ent, *next = NULL;
2264         unsigned long lost_events = 0, next_lost = 0;
2265         int cpu_file = iter->cpu_file;
2266         u64 next_ts = 0, ts;
2267         int next_cpu = -1;
2268         int next_size = 0;
2269         int cpu;
2270
2271         /*
2272          * If we are in a per_cpu trace file, don't bother by iterating over
2273          * all cpu and peek directly.
2274          */
2275         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2276                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2277                         return NULL;
2278                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2279                 if (ent_cpu)
2280                         *ent_cpu = cpu_file;
2281
2282                 return ent;
2283         }
2284
2285         for_each_tracing_cpu(cpu) {
2286
2287                 if (ring_buffer_empty_cpu(buffer, cpu))
2288                         continue;
2289
2290                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2291
2292                 /*
2293                  * Pick the entry with the smallest timestamp:
2294                  */
2295                 if (ent && (!next || ts < next_ts)) {
2296                         next = ent;
2297                         next_cpu = cpu;
2298                         next_ts = ts;
2299                         next_lost = lost_events;
2300                         next_size = iter->ent_size;
2301                 }
2302         }
2303
2304         iter->ent_size = next_size;
2305
2306         if (ent_cpu)
2307                 *ent_cpu = next_cpu;
2308
2309         if (ent_ts)
2310                 *ent_ts = next_ts;
2311
2312         if (missing_events)
2313                 *missing_events = next_lost;
2314
2315         return next;
2316 }
2317
2318 /* Find the next real entry, without updating the iterator itself */
2319 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2320                                           int *ent_cpu, u64 *ent_ts)
2321 {
2322         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2323 }
2324
2325 /* Find the next real entry, and increment the iterator to the next entry */
2326 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2327 {
2328         iter->ent = __find_next_entry(iter, &iter->cpu,
2329                                       &iter->lost_events, &iter->ts);
2330
2331         if (iter->ent)
2332                 trace_iterator_increment(iter);
2333
2334         return iter->ent ? iter : NULL;
2335 }
2336
2337 static void trace_consume(struct trace_iterator *iter)
2338 {
2339         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2340                             &iter->lost_events);
2341 }
2342
2343 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2344 {
2345         struct trace_iterator *iter = m->private;
2346         int i = (int)*pos;
2347         void *ent;
2348
2349         WARN_ON_ONCE(iter->leftover);
2350
2351         (*pos)++;
2352
2353         /* can't go backwards */
2354         if (iter->idx > i)
2355                 return NULL;
2356
2357         if (iter->idx < 0)
2358                 ent = trace_find_next_entry_inc(iter);
2359         else
2360                 ent = iter;
2361
2362         while (ent && iter->idx < i)
2363                 ent = trace_find_next_entry_inc(iter);
2364
2365         iter->pos = *pos;
2366
2367         return ent;
2368 }
2369
2370 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2371 {
2372         struct ring_buffer_event *event;
2373         struct ring_buffer_iter *buf_iter;
2374         unsigned long entries = 0;
2375         u64 ts;
2376
2377         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2378
2379         buf_iter = trace_buffer_iter(iter, cpu);
2380         if (!buf_iter)
2381                 return;
2382
2383         ring_buffer_iter_reset(buf_iter);
2384
2385         /*
2386          * We could have the case with the max latency tracers
2387          * that a reset never took place on a cpu. This is evident
2388          * by the timestamp being before the start of the buffer.
2389          */
2390         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2391                 if (ts >= iter->trace_buffer->time_start)
2392                         break;
2393                 entries++;
2394                 ring_buffer_read(buf_iter, NULL);
2395         }
2396
2397         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2398 }
2399
2400 /*
2401  * The current tracer is copied to avoid a global locking
2402  * all around.
2403  */
2404 static void *s_start(struct seq_file *m, loff_t *pos)
2405 {
2406         struct trace_iterator *iter = m->private;
2407         struct trace_array *tr = iter->tr;
2408         int cpu_file = iter->cpu_file;
2409         void *p = NULL;
2410         loff_t l = 0;
2411         int cpu;
2412
2413         /*
2414          * copy the tracer to avoid using a global lock all around.
2415          * iter->trace is a copy of current_trace, the pointer to the
2416          * name may be used instead of a strcmp(), as iter->trace->name
2417          * will point to the same string as current_trace->name.
2418          */
2419         mutex_lock(&trace_types_lock);
2420         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2421                 *iter->trace = *tr->current_trace;
2422         mutex_unlock(&trace_types_lock);
2423
2424 #ifdef CONFIG_TRACER_MAX_TRACE
2425         if (iter->snapshot && iter->trace->use_max_tr)
2426                 return ERR_PTR(-EBUSY);
2427 #endif
2428
2429         if (!iter->snapshot)
2430                 atomic_inc(&trace_record_cmdline_disabled);
2431
2432         if (*pos != iter->pos) {
2433                 iter->ent = NULL;
2434                 iter->cpu = 0;
2435                 iter->idx = -1;
2436
2437                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2438                         for_each_tracing_cpu(cpu)
2439                                 tracing_iter_reset(iter, cpu);
2440                 } else
2441                         tracing_iter_reset(iter, cpu_file);
2442
2443                 iter->leftover = 0;
2444                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2445                         ;
2446
2447         } else {
2448                 /*
2449                  * If we overflowed the seq_file before, then we want
2450                  * to just reuse the trace_seq buffer again.
2451                  */
2452                 if (iter->leftover)
2453                         p = iter;
2454                 else {
2455                         l = *pos - 1;
2456                         p = s_next(m, p, &l);
2457                 }
2458         }
2459
2460         trace_event_read_lock();
2461         trace_access_lock(cpu_file);
2462         return p;
2463 }
2464
2465 static void s_stop(struct seq_file *m, void *p)
2466 {
2467         struct trace_iterator *iter = m->private;
2468
2469 #ifdef CONFIG_TRACER_MAX_TRACE
2470         if (iter->snapshot && iter->trace->use_max_tr)
2471                 return;
2472 #endif
2473
2474         if (!iter->snapshot)
2475                 atomic_dec(&trace_record_cmdline_disabled);
2476
2477         trace_access_unlock(iter->cpu_file);
2478         trace_event_read_unlock();
2479 }
2480
2481 static void
2482 get_total_entries(struct trace_buffer *buf,
2483                   unsigned long *total, unsigned long *entries)
2484 {
2485         unsigned long count;
2486         int cpu;
2487
2488         *total = 0;
2489         *entries = 0;
2490
2491         for_each_tracing_cpu(cpu) {
2492                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2493                 /*
2494                  * If this buffer has skipped entries, then we hold all
2495                  * entries for the trace and we need to ignore the
2496                  * ones before the time stamp.
2497                  */
2498                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2499                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2500                         /* total is the same as the entries */
2501                         *total += count;
2502                 } else
2503                         *total += count +
2504                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
2505                 *entries += count;
2506         }
2507 }
2508
2509 static void print_lat_help_header(struct seq_file *m)
2510 {
2511         seq_puts(m, "#                  _------=> CPU#            \n"
2512                     "#                 / _-----=> irqs-off        \n"
2513                     "#                | / _----=> need-resched    \n"
2514                     "#                || / _---=> hardirq/softirq \n"
2515                     "#                ||| / _--=> preempt-depth   \n"
2516                     "#                |||| /     delay            \n"
2517                     "#  cmd     pid   ||||| time  |   caller      \n"
2518                     "#     \\   /      |||||  \\    |   /         \n");
2519 }
2520
2521 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2522 {
2523         unsigned long total;
2524         unsigned long entries;
2525
2526         get_total_entries(buf, &total, &entries);
2527         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2528                    entries, total, num_online_cpus());
2529         seq_puts(m, "#\n");
2530 }
2531
2532 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2533 {
2534         print_event_info(buf, m);
2535         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n"
2536                     "#              | |       |          |         |\n");
2537 }
2538
2539 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2540 {
2541         print_event_info(buf, m);
2542         seq_puts(m, "#                              _-----=> irqs-off\n"
2543                     "#                             / _----=> need-resched\n"
2544                     "#                            | / _---=> hardirq/softirq\n"
2545                     "#                            || / _--=> preempt-depth\n"
2546                     "#                            ||| /     delay\n"
2547                     "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n"
2548                     "#              | |       |   ||||       |         |\n");
2549 }
2550
2551 void
2552 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2553 {
2554         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2555         struct trace_buffer *buf = iter->trace_buffer;
2556         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2557         struct tracer *type = iter->trace;
2558         unsigned long entries;
2559         unsigned long total;
2560         const char *name = "preemption";
2561
2562         name = type->name;
2563
2564         get_total_entries(buf, &total, &entries);
2565
2566         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2567                    name, UTS_RELEASE);
2568         seq_puts(m, "# -----------------------------------"
2569                  "---------------------------------\n");
2570         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2571                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2572                    nsecs_to_usecs(data->saved_latency),
2573                    entries,
2574                    total,
2575                    buf->cpu,
2576 #if defined(CONFIG_PREEMPT_NONE)
2577                    "server",
2578 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2579                    "desktop",
2580 #elif defined(CONFIG_PREEMPT)
2581                    "preempt",
2582 #else
2583                    "unknown",
2584 #endif
2585                    /* These are reserved for later use */
2586                    0, 0, 0, 0);
2587 #ifdef CONFIG_SMP
2588         seq_printf(m, " #P:%d)\n", num_online_cpus());
2589 #else
2590         seq_puts(m, ")\n");
2591 #endif
2592         seq_puts(m, "#    -----------------\n");
2593         seq_printf(m, "#    | task: %.16s-%d "
2594                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2595                    data->comm, data->pid,
2596                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2597                    data->policy, data->rt_priority);
2598         seq_puts(m, "#    -----------------\n");
2599
2600         if (data->critical_start) {
2601                 seq_puts(m, "#  => started at: ");
2602                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2603                 trace_print_seq(m, &iter->seq);
2604                 seq_puts(m, "\n#  => ended at:   ");
2605                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2606                 trace_print_seq(m, &iter->seq);
2607                 seq_puts(m, "\n#\n");
2608         }
2609
2610         seq_puts(m, "#\n");
2611 }
2612
2613 static void test_cpu_buff_start(struct trace_iterator *iter)
2614 {
2615         struct trace_seq *s = &iter->seq;
2616
2617         if (!(trace_flags & TRACE_ITER_ANNOTATE))
2618                 return;
2619
2620         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
2621                 return;
2622
2623         if (cpumask_test_cpu(iter->cpu, iter->started))
2624                 return;
2625
2626         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
2627                 return;
2628
2629         cpumask_set_cpu(iter->cpu, iter->started);
2630
2631         /* Don't print started cpu buffer for the first entry of the trace */
2632         if (iter->idx > 1)
2633                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
2634                                 iter->cpu);
2635 }
2636
2637 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
2638 {
2639         struct trace_seq *s = &iter->seq;
2640         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2641         struct trace_entry *entry;
2642         struct trace_event *event;
2643
2644         entry = iter->ent;
2645
2646         test_cpu_buff_start(iter);
2647
2648         event = ftrace_find_event(entry->type);
2649
2650         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2651                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2652                         trace_print_lat_context(iter);
2653                 else
2654                         trace_print_context(iter);
2655         }
2656
2657         if (trace_seq_has_overflowed(s))
2658                 return TRACE_TYPE_PARTIAL_LINE;
2659
2660         if (event)
2661                 return event->funcs->trace(iter, sym_flags, event);
2662
2663         trace_seq_printf(s, "Unknown type %d\n", entry->type);
2664
2665         return trace_handle_return(s);
2666 }
2667
2668 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2669 {
2670         struct trace_seq *s = &iter->seq;
2671         struct trace_entry *entry;
2672         struct trace_event *event;
2673
2674         entry = iter->ent;
2675
2676         if (trace_flags & TRACE_ITER_CONTEXT_INFO)
2677                 trace_seq_printf(s, "%d %d %llu ",
2678                                  entry->pid, iter->cpu, iter->ts);
2679
2680         if (trace_seq_has_overflowed(s))
2681                 return TRACE_TYPE_PARTIAL_LINE;
2682
2683         event = ftrace_find_event(entry->type);
2684         if (event)
2685                 return event->funcs->raw(iter, 0, event);
2686
2687         trace_seq_printf(s, "%d ?\n", entry->type);
2688
2689         return trace_handle_return(s);
2690 }
2691
2692 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2693 {
2694         struct trace_seq *s = &iter->seq;
2695         unsigned char newline = '\n';
2696         struct trace_entry *entry;
2697         struct trace_event *event;
2698
2699         entry = iter->ent;
2700
2701         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2702                 SEQ_PUT_HEX_FIELD(s, entry->pid);
2703                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
2704                 SEQ_PUT_HEX_FIELD(s, iter->ts);
2705                 if (trace_seq_has_overflowed(s))
2706                         return TRACE_TYPE_PARTIAL_LINE;
2707         }
2708
2709         event = ftrace_find_event(entry->type);
2710         if (event) {
2711                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
2712                 if (ret != TRACE_TYPE_HANDLED)
2713                         return ret;
2714         }
2715
2716         SEQ_PUT_FIELD(s, newline);
2717
2718         return trace_handle_return(s);
2719 }
2720
2721 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2722 {
2723         struct trace_seq *s = &iter->seq;
2724         struct trace_entry *entry;
2725         struct trace_event *event;
2726
2727         entry = iter->ent;
2728
2729         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2730                 SEQ_PUT_FIELD(s, entry->pid);
2731                 SEQ_PUT_FIELD(s, iter->cpu);
2732                 SEQ_PUT_FIELD(s, iter->ts);
2733                 if (trace_seq_has_overflowed(s))
2734                         return TRACE_TYPE_PARTIAL_LINE;
2735         }
2736
2737         event = ftrace_find_event(entry->type);
2738         return event ? event->funcs->binary(iter, 0, event) :
2739                 TRACE_TYPE_HANDLED;
2740 }
2741
2742 int trace_empty(struct trace_iterator *iter)
2743 {
2744         struct ring_buffer_iter *buf_iter;
2745         int cpu;
2746
2747         /* If we are looking at one CPU buffer, only check that one */
2748         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
2749                 cpu = iter->cpu_file;
2750                 buf_iter = trace_buffer_iter(iter, cpu);
2751                 if (buf_iter) {
2752                         if (!ring_buffer_iter_empty(buf_iter))
2753                                 return 0;
2754                 } else {
2755                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2756                                 return 0;
2757                 }
2758                 return 1;
2759         }
2760
2761         for_each_tracing_cpu(cpu) {
2762                 buf_iter = trace_buffer_iter(iter, cpu);
2763                 if (buf_iter) {
2764                         if (!ring_buffer_iter_empty(buf_iter))
2765                                 return 0;
2766                 } else {
2767                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2768                                 return 0;
2769                 }
2770         }
2771
2772         return 1;
2773 }
2774
2775 /*  Called with trace_event_read_lock() held. */
2776 enum print_line_t print_trace_line(struct trace_iterator *iter)
2777 {
2778         enum print_line_t ret;
2779
2780         if (iter->lost_events) {
2781                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
2782                                  iter->cpu, iter->lost_events);
2783                 if (trace_seq_has_overflowed(&iter->seq))
2784                         return TRACE_TYPE_PARTIAL_LINE;
2785         }
2786
2787         if (iter->trace && iter->trace->print_line) {
2788                 ret = iter->trace->print_line(iter);
2789                 if (ret != TRACE_TYPE_UNHANDLED)
2790                         return ret;
2791         }
2792
2793         if (iter->ent->type == TRACE_BPUTS &&
2794                         trace_flags & TRACE_ITER_PRINTK &&
2795                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2796                 return trace_print_bputs_msg_only(iter);
2797
2798         if (iter->ent->type == TRACE_BPRINT &&
2799                         trace_flags & TRACE_ITER_PRINTK &&
2800                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2801                 return trace_print_bprintk_msg_only(iter);
2802
2803         if (iter->ent->type == TRACE_PRINT &&
2804                         trace_flags & TRACE_ITER_PRINTK &&
2805                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2806                 return trace_print_printk_msg_only(iter);
2807
2808         if (trace_flags & TRACE_ITER_BIN)
2809                 return print_bin_fmt(iter);
2810
2811         if (trace_flags & TRACE_ITER_HEX)
2812                 return print_hex_fmt(iter);
2813
2814         if (trace_flags & TRACE_ITER_RAW)
2815                 return print_raw_fmt(iter);
2816
2817         return print_trace_fmt(iter);
2818 }
2819
2820 void trace_latency_header(struct seq_file *m)
2821 {
2822         struct trace_iterator *iter = m->private;
2823
2824         /* print nothing if the buffers are empty */
2825         if (trace_empty(iter))
2826                 return;
2827
2828         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2829                 print_trace_header(m, iter);
2830
2831         if (!(trace_flags & TRACE_ITER_VERBOSE))
2832                 print_lat_help_header(m);
2833 }
2834
2835 void trace_default_header(struct seq_file *m)
2836 {
2837         struct trace_iterator *iter = m->private;
2838
2839         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
2840                 return;
2841
2842         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2843                 /* print nothing if the buffers are empty */
2844                 if (trace_empty(iter))
2845                         return;
2846                 print_trace_header(m, iter);
2847                 if (!(trace_flags & TRACE_ITER_VERBOSE))
2848                         print_lat_help_header(m);
2849         } else {
2850                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
2851                         if (trace_flags & TRACE_ITER_IRQ_INFO)
2852                                 print_func_help_header_irq(iter->trace_buffer, m);
2853                         else
2854                                 print_func_help_header(iter->trace_buffer, m);
2855                 }
2856         }
2857 }
2858
2859 static void test_ftrace_alive(struct seq_file *m)
2860 {
2861         if (!ftrace_is_dead())
2862                 return;
2863         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
2864                     "#          MAY BE MISSING FUNCTION EVENTS\n");
2865 }
2866
2867 #ifdef CONFIG_TRACER_MAX_TRACE
2868 static void show_snapshot_main_help(struct seq_file *m)
2869 {
2870         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
2871                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
2872                     "#                      Takes a snapshot of the main buffer.\n"
2873                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
2874                     "#                      (Doesn't have to be '2' works with any number that\n"
2875                     "#                       is not a '0' or '1')\n");
2876 }
2877
2878 static void show_snapshot_percpu_help(struct seq_file *m)
2879 {
2880         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
2881 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2882         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
2883                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
2884 #else
2885         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
2886                     "#                     Must use main snapshot file to allocate.\n");
2887 #endif
2888         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
2889                     "#                      (Doesn't have to be '2' works with any number that\n"
2890                     "#                       is not a '0' or '1')\n");
2891 }
2892
2893 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
2894 {
2895         if (iter->tr->allocated_snapshot)
2896                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
2897         else
2898                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
2899
2900         seq_puts(m, "# Snapshot commands:\n");
2901         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
2902                 show_snapshot_main_help(m);
2903         else
2904                 show_snapshot_percpu_help(m);
2905 }
2906 #else
2907 /* Should never be called */
2908 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
2909 #endif
2910
2911 static int s_show(struct seq_file *m, void *v)
2912 {
2913         struct trace_iterator *iter = v;
2914         int ret;
2915
2916         if (iter->ent == NULL) {
2917                 if (iter->tr) {
2918                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
2919                         seq_puts(m, "#\n");
2920                         test_ftrace_alive(m);
2921                 }
2922                 if (iter->snapshot && trace_empty(iter))
2923                         print_snapshot_help(m, iter);
2924                 else if (iter->trace && iter->trace->print_header)
2925                         iter->trace->print_header(m);
2926                 else
2927                         trace_default_header(m);
2928
2929         } else if (iter->leftover) {
2930                 /*
2931                  * If we filled the seq_file buffer earlier, we
2932                  * want to just show it now.
2933                  */
2934                 ret = trace_print_seq(m, &iter->seq);
2935
2936                 /* ret should this time be zero, but you never know */
2937                 iter->leftover = ret;
2938
2939         } else {
2940                 print_trace_line(iter);
2941                 ret = trace_print_seq(m, &iter->seq);
2942                 /*
2943                  * If we overflow the seq_file buffer, then it will
2944                  * ask us for this data again at start up.
2945                  * Use that instead.
2946                  *  ret is 0 if seq_file write succeeded.
2947                  *        -1 otherwise.
2948                  */
2949                 iter->leftover = ret;
2950         }
2951
2952         return 0;
2953 }
2954
2955 /*
2956  * Should be used after trace_array_get(), trace_types_lock
2957  * ensures that i_cdev was already initialized.
2958  */
2959 static inline int tracing_get_cpu(struct inode *inode)
2960 {
2961         if (inode->i_cdev) /* See trace_create_cpu_file() */
2962                 return (long)inode->i_cdev - 1;
2963         return RING_BUFFER_ALL_CPUS;
2964 }
2965
2966 static const struct seq_operations tracer_seq_ops = {
2967         .start          = s_start,
2968         .next           = s_next,
2969         .stop           = s_stop,
2970         .show           = s_show,
2971 };
2972
2973 static struct trace_iterator *
2974 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
2975 {
2976         struct trace_array *tr = inode->i_private;
2977         struct trace_iterator *iter;
2978         int cpu;
2979
2980         if (tracing_disabled)
2981                 return ERR_PTR(-ENODEV);
2982
2983         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
2984         if (!iter)
2985                 return ERR_PTR(-ENOMEM);
2986
2987         iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(),
2988                                     GFP_KERNEL);
2989         if (!iter->buffer_iter)
2990                 goto release;
2991
2992         /*
2993          * We make a copy of the current tracer to avoid concurrent
2994          * changes on it while we are reading.
2995          */
2996         mutex_lock(&trace_types_lock);
2997         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
2998         if (!iter->trace)
2999                 goto fail;
3000
3001         *iter->trace = *tr->current_trace;
3002
3003         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3004                 goto fail;
3005
3006         iter->tr = tr;
3007
3008 #ifdef CONFIG_TRACER_MAX_TRACE
3009         /* Currently only the top directory has a snapshot */
3010         if (tr->current_trace->print_max || snapshot)
3011                 iter->trace_buffer = &tr->max_buffer;
3012         else
3013 #endif
3014                 iter->trace_buffer = &tr->trace_buffer;
3015         iter->snapshot = snapshot;
3016         iter->pos = -1;
3017         iter->cpu_file = tracing_get_cpu(inode);
3018         mutex_init(&iter->mutex);
3019
3020         /* Notify the tracer early; before we stop tracing. */
3021         if (iter->trace && iter->trace->open)
3022                 iter->trace->open(iter);
3023
3024         /* Annotate start of buffers if we had overruns */
3025         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3026                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3027
3028         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3029         if (trace_clocks[tr->clock_id].in_ns)
3030                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3031
3032         /* stop the trace while dumping if we are not opening "snapshot" */
3033         if (!iter->snapshot)
3034                 tracing_stop_tr(tr);
3035
3036         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3037                 for_each_tracing_cpu(cpu) {
3038                         iter->buffer_iter[cpu] =
3039                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3040                 }
3041                 ring_buffer_read_prepare_sync();
3042                 for_each_tracing_cpu(cpu) {
3043                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3044                         tracing_iter_reset(iter, cpu);
3045                 }
3046         } else {
3047                 cpu = iter->cpu_file;
3048                 iter->buffer_iter[cpu] =
3049                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3050                 ring_buffer_read_prepare_sync();
3051                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3052                 tracing_iter_reset(iter, cpu);
3053         }
3054
3055         mutex_unlock(&trace_types_lock);
3056
3057         return iter;
3058
3059  fail:
3060         mutex_unlock(&trace_types_lock);
3061         kfree(iter->trace);
3062         kfree(iter->buffer_iter);
3063 release:
3064         seq_release_private(inode, file);
3065         return ERR_PTR(-ENOMEM);
3066 }
3067
3068 int tracing_open_generic(struct inode *inode, struct file *filp)
3069 {
3070         if (tracing_disabled)
3071                 return -ENODEV;
3072
3073         filp->private_data = inode->i_private;
3074         return 0;
3075 }
3076
3077 bool tracing_is_disabled(void)
3078 {
3079         return (tracing_disabled) ? true: false;
3080 }
3081
3082 /*
3083  * Open and update trace_array ref count.
3084  * Must have the current trace_array passed to it.
3085  */
3086 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3087 {
3088         struct trace_array *tr = inode->i_private;
3089
3090         if (tracing_disabled)
3091                 return -ENODEV;
3092
3093         if (trace_array_get(tr) < 0)
3094                 return -ENODEV;
3095
3096         filp->private_data = inode->i_private;
3097
3098         return 0;
3099 }
3100
3101 static int tracing_release(struct inode *inode, struct file *file)
3102 {
3103         struct trace_array *tr = inode->i_private;
3104         struct seq_file *m = file->private_data;
3105         struct trace_iterator *iter;
3106         int cpu;
3107
3108         if (!(file->f_mode & FMODE_READ)) {
3109                 trace_array_put(tr);
3110                 return 0;
3111         }
3112
3113         /* Writes do not use seq_file */
3114         iter = m->private;
3115         mutex_lock(&trace_types_lock);
3116
3117         for_each_tracing_cpu(cpu) {
3118                 if (iter->buffer_iter[cpu])
3119                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3120         }
3121
3122         if (iter->trace && iter->trace->close)
3123                 iter->trace->close(iter);
3124
3125         if (!iter->snapshot)
3126                 /* reenable tracing if it was previously enabled */
3127                 tracing_start_tr(tr);
3128
3129         __trace_array_put(tr);
3130
3131         mutex_unlock(&trace_types_lock);
3132
3133         mutex_destroy(&iter->mutex);
3134         free_cpumask_var(iter->started);
3135         kfree(iter->trace);
3136         kfree(iter->buffer_iter);
3137         seq_release_private(inode, file);
3138
3139         return 0;
3140 }
3141
3142 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3143 {
3144         struct trace_array *tr = inode->i_private;
3145
3146         trace_array_put(tr);
3147         return 0;
3148 }
3149
3150 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3151 {
3152         struct trace_array *tr = inode->i_private;
3153
3154         trace_array_put(tr);
3155
3156         return single_release(inode, file);
3157 }
3158
3159 static int tracing_open(struct inode *inode, struct file *file)
3160 {
3161         struct trace_array *tr = inode->i_private;
3162         struct trace_iterator *iter;
3163         int ret = 0;
3164
3165         if (trace_array_get(tr) < 0)
3166                 return -ENODEV;
3167
3168         /* If this file was open for write, then erase contents */
3169         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3170                 int cpu = tracing_get_cpu(inode);
3171
3172                 if (cpu == RING_BUFFER_ALL_CPUS)
3173                         tracing_reset_online_cpus(&tr->trace_buffer);
3174                 else
3175                         tracing_reset(&tr->trace_buffer, cpu);
3176         }
3177
3178         if (file->f_mode & FMODE_READ) {
3179                 iter = __tracing_open(inode, file, false);
3180                 if (IS_ERR(iter))
3181                         ret = PTR_ERR(iter);
3182                 else if (trace_flags & TRACE_ITER_LATENCY_FMT)
3183                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3184         }
3185
3186         if (ret < 0)
3187                 trace_array_put(tr);
3188
3189         return ret;
3190 }
3191
3192 /*
3193  * Some tracers are not suitable for instance buffers.
3194  * A tracer is always available for the global array (toplevel)
3195  * or if it explicitly states that it is.
3196  */
3197 static bool
3198 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3199 {
3200         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3201 }
3202
3203 /* Find the next tracer that this trace array may use */
3204 static struct tracer *
3205 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3206 {
3207         while (t && !trace_ok_for_array(t, tr))
3208                 t = t->next;
3209
3210         return t;
3211 }
3212
3213 static void *
3214 t_next(struct seq_file *m, void *v, loff_t *pos)
3215 {
3216         struct trace_array *tr = m->private;
3217         struct tracer *t = v;
3218
3219         (*pos)++;
3220
3221         if (t)
3222                 t = get_tracer_for_array(tr, t->next);
3223
3224         return t;
3225 }
3226
3227 static void *t_start(struct seq_file *m, loff_t *pos)
3228 {
3229         struct trace_array *tr = m->private;
3230         struct tracer *t;
3231         loff_t l = 0;
3232
3233         mutex_lock(&trace_types_lock);
3234
3235         t = get_tracer_for_array(tr, trace_types);
3236         for (; t && l < *pos; t = t_next(m, t, &l))
3237                         ;
3238
3239         return t;
3240 }
3241
3242 static void t_stop(struct seq_file *m, void *p)
3243 {
3244         mutex_unlock(&trace_types_lock);
3245 }
3246
3247 static int t_show(struct seq_file *m, void *v)
3248 {
3249         struct tracer *t = v;
3250
3251         if (!t)
3252                 return 0;
3253
3254         seq_puts(m, t->name);
3255         if (t->next)
3256                 seq_putc(m, ' ');
3257         else
3258                 seq_putc(m, '\n');
3259
3260         return 0;
3261 }
3262
3263 static const struct seq_operations show_traces_seq_ops = {
3264         .start          = t_start,
3265         .next           = t_next,
3266         .stop           = t_stop,
3267         .show           = t_show,
3268 };
3269
3270 static int show_traces_open(struct inode *inode, struct file *file)
3271 {
3272         struct trace_array *tr = inode->i_private;
3273         struct seq_file *m;
3274         int ret;
3275
3276         if (tracing_disabled)
3277                 return -ENODEV;
3278
3279         ret = seq_open(file, &show_traces_seq_ops);
3280         if (ret)
3281                 return ret;
3282
3283         m = file->private_data;
3284         m->private = tr;
3285
3286         return 0;
3287 }
3288
3289 static ssize_t
3290 tracing_write_stub(struct file *filp, const char __user *ubuf,
3291                    size_t count, loff_t *ppos)
3292 {
3293         return count;
3294 }
3295
3296 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3297 {
3298         int ret;
3299
3300         if (file->f_mode & FMODE_READ)
3301                 ret = seq_lseek(file, offset, whence);
3302         else
3303                 file->f_pos = ret = 0;
3304
3305         return ret;
3306 }
3307
3308 static const struct file_operations tracing_fops = {
3309         .open           = tracing_open,
3310         .read           = seq_read,
3311         .write          = tracing_write_stub,
3312         .llseek         = tracing_lseek,
3313         .release        = tracing_release,
3314 };
3315
3316 static const struct file_operations show_traces_fops = {
3317         .open           = show_traces_open,
3318         .read           = seq_read,
3319         .release        = seq_release,
3320         .llseek         = seq_lseek,
3321 };
3322
3323 /*
3324  * The tracer itself will not take this lock, but still we want
3325  * to provide a consistent cpumask to user-space:
3326  */
3327 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3328
3329 /*
3330  * Temporary storage for the character representation of the
3331  * CPU bitmask (and one more byte for the newline):
3332  */
3333 static char mask_str[NR_CPUS + 1];
3334
3335 static ssize_t
3336 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3337                      size_t count, loff_t *ppos)
3338 {
3339         struct trace_array *tr = file_inode(filp)->i_private;
3340         int len;
3341
3342         mutex_lock(&tracing_cpumask_update_lock);
3343
3344         len = cpumask_scnprintf(mask_str, count, tr->tracing_cpumask);
3345         if (count - len < 2) {
3346                 count = -EINVAL;
3347                 goto out_err;
3348         }
3349         len += sprintf(mask_str + len, "\n");
3350         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3351
3352 out_err:
3353         mutex_unlock(&tracing_cpumask_update_lock);
3354
3355         return count;
3356 }
3357
3358 static ssize_t
3359 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3360                       size_t count, loff_t *ppos)
3361 {
3362         struct trace_array *tr = file_inode(filp)->i_private;
3363         cpumask_var_t tracing_cpumask_new;
3364         int err, cpu;
3365
3366         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3367                 return -ENOMEM;
3368
3369         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3370         if (err)
3371                 goto err_unlock;
3372
3373         mutex_lock(&tracing_cpumask_update_lock);
3374
3375         local_irq_disable();
3376         arch_spin_lock(&tr->max_lock);
3377         for_each_tracing_cpu(cpu) {
3378                 /*
3379                  * Increase/decrease the disabled counter if we are
3380                  * about to flip a bit in the cpumask:
3381                  */
3382                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3383                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3384                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3385                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3386                 }
3387                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3388                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3389                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3390                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3391                 }
3392         }
3393         arch_spin_unlock(&tr->max_lock);
3394         local_irq_enable();
3395
3396         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3397
3398         mutex_unlock(&tracing_cpumask_update_lock);
3399         free_cpumask_var(tracing_cpumask_new);
3400
3401         return count;
3402
3403 err_unlock:
3404         free_cpumask_var(tracing_cpumask_new);
3405
3406         return err;
3407 }
3408
3409 static const struct file_operations tracing_cpumask_fops = {
3410         .open           = tracing_open_generic_tr,
3411         .read           = tracing_cpumask_read,
3412         .write          = tracing_cpumask_write,
3413         .release        = tracing_release_generic_tr,
3414         .llseek         = generic_file_llseek,
3415 };
3416
3417 static int tracing_trace_options_show(struct seq_file *m, void *v)
3418 {
3419         struct tracer_opt *trace_opts;
3420         struct trace_array *tr = m->private;
3421         u32 tracer_flags;
3422         int i;
3423
3424         mutex_lock(&trace_types_lock);
3425         tracer_flags = tr->current_trace->flags->val;
3426         trace_opts = tr->current_trace->flags->opts;
3427
3428         for (i = 0; trace_options[i]; i++) {
3429                 if (trace_flags & (1 << i))
3430                         seq_printf(m, "%s\n", trace_options[i]);
3431                 else
3432                         seq_printf(m, "no%s\n", trace_options[i]);
3433         }
3434
3435         for (i = 0; trace_opts[i].name; i++) {
3436                 if (tracer_flags & trace_opts[i].bit)
3437                         seq_printf(m, "%s\n", trace_opts[i].name);
3438                 else
3439                         seq_printf(m, "no%s\n", trace_opts[i].name);
3440         }
3441         mutex_unlock(&trace_types_lock);
3442
3443         return 0;
3444 }
3445
3446 static int __set_tracer_option(struct trace_array *tr,
3447                                struct tracer_flags *tracer_flags,
3448                                struct tracer_opt *opts, int neg)
3449 {
3450         struct tracer *trace = tr->current_trace;
3451         int ret;
3452
3453         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3454         if (ret)
3455                 return ret;
3456
3457         if (neg)
3458                 tracer_flags->val &= ~opts->bit;
3459         else
3460                 tracer_flags->val |= opts->bit;
3461         return 0;
3462 }
3463
3464 /* Try to assign a tracer specific option */
3465 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3466 {
3467         struct tracer *trace = tr->current_trace;
3468         struct tracer_flags *tracer_flags = trace->flags;
3469         struct tracer_opt *opts = NULL;
3470         int i;
3471
3472         for (i = 0; tracer_flags->opts[i].name; i++) {
3473                 opts = &tracer_flags->opts[i];
3474
3475                 if (strcmp(cmp, opts->name) == 0)
3476                         return __set_tracer_option(tr, trace->flags, opts, neg);
3477         }
3478
3479         return -EINVAL;
3480 }
3481
3482 /* Some tracers require overwrite to stay enabled */
3483 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3484 {
3485         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3486                 return -1;
3487
3488         return 0;
3489 }
3490
3491 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3492 {
3493         /* do nothing if flag is already set */
3494         if (!!(trace_flags & mask) == !!enabled)
3495                 return 0;
3496
3497         /* Give the tracer a chance to approve the change */
3498         if (tr->current_trace->flag_changed)
3499                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3500                         return -EINVAL;
3501
3502         if (enabled)
3503                 trace_flags |= mask;
3504         else
3505                 trace_flags &= ~mask;
3506
3507         if (mask == TRACE_ITER_RECORD_CMD)
3508                 trace_event_enable_cmd_record(enabled);
3509
3510         if (mask == TRACE_ITER_OVERWRITE) {
3511                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3512 #ifdef CONFIG_TRACER_MAX_TRACE
3513                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3514 #endif
3515         }
3516
3517         if (mask == TRACE_ITER_PRINTK)
3518                 trace_printk_start_stop_comm(enabled);
3519
3520         return 0;
3521 }
3522
3523 static int trace_set_options(struct trace_array *tr, char *option)
3524 {
3525         char *cmp;
3526         int neg = 0;
3527         int ret = -ENODEV;
3528         int i;
3529
3530         cmp = strstrip(option);
3531
3532         if (strncmp(cmp, "no", 2) == 0) {
3533                 neg = 1;
3534                 cmp += 2;
3535         }
3536
3537         mutex_lock(&trace_types_lock);
3538
3539         for (i = 0; trace_options[i]; i++) {
3540                 if (strcmp(cmp, trace_options[i]) == 0) {
3541                         ret = set_tracer_flag(tr, 1 << i, !neg);
3542                         break;
3543                 }
3544         }
3545
3546         /* If no option could be set, test the specific tracer options */
3547         if (!trace_options[i])
3548                 ret = set_tracer_option(tr, cmp, neg);
3549
3550         mutex_unlock(&trace_types_lock);
3551
3552         return ret;
3553 }
3554
3555 static ssize_t
3556 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3557                         size_t cnt, loff_t *ppos)
3558 {
3559         struct seq_file *m = filp->private_data;
3560         struct trace_array *tr = m->private;
3561         char buf[64];
3562         int ret;
3563
3564         if (cnt >= sizeof(buf))
3565                 return -EINVAL;
3566
3567         if (copy_from_user(&buf, ubuf, cnt))
3568                 return -EFAULT;
3569
3570         buf[cnt] = 0;
3571
3572         ret = trace_set_options(tr, buf);
3573         if (ret < 0)
3574                 return ret;
3575
3576         *ppos += cnt;
3577
3578         return cnt;
3579 }
3580
3581 static int tracing_trace_options_open(struct inode *inode, struct file *file)
3582 {
3583         struct trace_array *tr = inode->i_private;
3584         int ret;
3585
3586         if (tracing_disabled)
3587                 return -ENODEV;
3588
3589         if (trace_array_get(tr) < 0)
3590                 return -ENODEV;
3591
3592         ret = single_open(file, tracing_trace_options_show, inode->i_private);
3593         if (ret < 0)
3594                 trace_array_put(tr);
3595
3596         return ret;
3597 }
3598
3599 static const struct file_operations tracing_iter_fops = {
3600         .open           = tracing_trace_options_open,
3601         .read           = seq_read,
3602         .llseek         = seq_lseek,
3603         .release        = tracing_single_release_tr,
3604         .write          = tracing_trace_options_write,
3605 };
3606
3607 static const char readme_msg[] =
3608         "tracing mini-HOWTO:\n\n"
3609         "# echo 0 > tracing_on : quick way to disable tracing\n"
3610         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
3611         " Important files:\n"
3612         "  trace\t\t\t- The static contents of the buffer\n"
3613         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
3614         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
3615         "  current_tracer\t- function and latency tracers\n"
3616         "  available_tracers\t- list of configured tracers for current_tracer\n"
3617         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
3618         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
3619         "  trace_clock\t\t-change the clock used to order events\n"
3620         "       local:   Per cpu clock but may not be synced across CPUs\n"
3621         "      global:   Synced across CPUs but slows tracing down.\n"
3622         "     counter:   Not a clock, but just an increment\n"
3623         "      uptime:   Jiffy counter from time of boot\n"
3624         "        perf:   Same clock that perf events use\n"
3625 #ifdef CONFIG_X86_64
3626         "     x86-tsc:   TSC cycle counter\n"
3627 #endif
3628         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
3629         "  tracing_cpumask\t- Limit which CPUs to trace\n"
3630         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
3631         "\t\t\t  Remove sub-buffer with rmdir\n"
3632         "  trace_options\t\t- Set format or modify how tracing happens\n"
3633         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
3634         "\t\t\t  option name\n"
3635         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
3636 #ifdef CONFIG_DYNAMIC_FTRACE
3637         "\n  available_filter_functions - list of functions that can be filtered on\n"
3638         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
3639         "\t\t\t  functions\n"
3640         "\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3641         "\t     modules: Can select a group via module\n"
3642         "\t      Format: :mod:<module-name>\n"
3643         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
3644         "\t    triggers: a command to perform when function is hit\n"
3645         "\t      Format: <function>:<trigger>[:count]\n"
3646         "\t     trigger: traceon, traceoff\n"
3647         "\t\t      enable_event:<system>:<event>\n"
3648         "\t\t      disable_event:<system>:<event>\n"
3649 #ifdef CONFIG_STACKTRACE
3650         "\t\t      stacktrace\n"
3651 #endif
3652 #ifdef CONFIG_TRACER_SNAPSHOT
3653         "\t\t      snapshot\n"
3654 #endif
3655         "\t\t      dump\n"
3656         "\t\t      cpudump\n"
3657         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
3658         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
3659         "\t     The first one will disable tracing every time do_fault is hit\n"
3660         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
3661         "\t       The first time do trap is hit and it disables tracing, the\n"
3662         "\t       counter will decrement to 2. If tracing is already disabled,\n"
3663         "\t       the counter will not decrement. It only decrements when the\n"
3664         "\t       trigger did work\n"
3665         "\t     To remove trigger without count:\n"
3666         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
3667         "\t     To remove trigger with a count:\n"
3668         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
3669         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
3670         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3671         "\t    modules: Can select a group via module command :mod:\n"
3672         "\t    Does not accept triggers\n"
3673 #endif /* CONFIG_DYNAMIC_FTRACE */
3674 #ifdef CONFIG_FUNCTION_TRACER
3675         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
3676         "\t\t    (function)\n"
3677 #endif
3678 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
3679         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
3680         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
3681         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
3682 #endif
3683 #ifdef CONFIG_TRACER_SNAPSHOT
3684         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
3685         "\t\t\t  snapshot buffer. Read the contents for more\n"
3686         "\t\t\t  information\n"
3687 #endif
3688 #ifdef CONFIG_STACK_TRACER
3689         "  stack_trace\t\t- Shows the max stack trace when active\n"
3690         "  stack_max_size\t- Shows current max stack size that was traced\n"
3691         "\t\t\t  Write into this file to reset the max size (trigger a\n"
3692         "\t\t\t  new trace)\n"
3693 #ifdef CONFIG_DYNAMIC_FTRACE
3694         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
3695         "\t\t\t  traces\n"
3696 #endif
3697 #endif /* CONFIG_STACK_TRACER */
3698         "  events/\t\t- Directory containing all trace event subsystems:\n"
3699         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
3700         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
3701         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
3702         "\t\t\t  events\n"
3703         "      filter\t\t- If set, only events passing filter are traced\n"
3704         "  events/<system>/<event>/\t- Directory containing control files for\n"
3705         "\t\t\t  <event>:\n"
3706         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
3707         "      filter\t\t- If set, only events passing filter are traced\n"
3708         "      trigger\t\t- If set, a command to perform when event is hit\n"
3709         "\t    Format: <trigger>[:count][if <filter>]\n"
3710         "\t   trigger: traceon, traceoff\n"
3711         "\t            enable_event:<system>:<event>\n"
3712         "\t            disable_event:<system>:<event>\n"
3713 #ifdef CONFIG_STACKTRACE
3714         "\t\t    stacktrace\n"
3715 #endif
3716 #ifdef CONFIG_TRACER_SNAPSHOT
3717         "\t\t    snapshot\n"
3718 #endif
3719         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
3720         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
3721         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
3722         "\t                  events/block/block_unplug/trigger\n"
3723         "\t   The first disables tracing every time block_unplug is hit.\n"
3724         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
3725         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
3726         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
3727         "\t   Like function triggers, the counter is only decremented if it\n"
3728         "\t    enabled or disabled tracing.\n"
3729         "\t   To remove a trigger without a count:\n"
3730         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
3731         "\t   To remove a trigger with a count:\n"
3732         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
3733         "\t   Filters can be ignored when removing a trigger.\n"
3734 ;
3735
3736 static ssize_t
3737 tracing_readme_read(struct file *filp, char __user *ubuf,
3738                        size_t cnt, loff_t *ppos)
3739 {
3740         return simple_read_from_buffer(ubuf, cnt, ppos,
3741                                         readme_msg, strlen(readme_msg));
3742 }
3743
3744 static const struct file_operations tracing_readme_fops = {
3745         .open           = tracing_open_generic,
3746         .read           = tracing_readme_read,
3747         .llseek         = generic_file_llseek,
3748 };
3749
3750 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
3751 {
3752         unsigned int *ptr = v;
3753
3754         if (*pos || m->count)
3755                 ptr++;
3756
3757         (*pos)++;
3758
3759         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
3760              ptr++) {
3761                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
3762                         continue;
3763
3764                 return ptr;
3765         }
3766
3767         return NULL;
3768 }
3769
3770 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
3771 {
3772         void *v;
3773         loff_t l = 0;
3774
3775         preempt_disable();
3776         arch_spin_lock(&trace_cmdline_lock);
3777
3778         v = &savedcmd->map_cmdline_to_pid[0];
3779         while (l <= *pos) {
3780                 v = saved_cmdlines_next(m, v, &l);
3781                 if (!v)
3782                         return NULL;
3783         }
3784
3785         return v;
3786 }
3787
3788 static void saved_cmdlines_stop(struct seq_file *m, void *v)
3789 {
3790         arch_spin_unlock(&trace_cmdline_lock);
3791         preempt_enable();
3792 }
3793
3794 static int saved_cmdlines_show(struct seq_file *m, void *v)
3795 {
3796         char buf[TASK_COMM_LEN];
3797         unsigned int *pid = v;
3798
3799         __trace_find_cmdline(*pid, buf);
3800         seq_printf(m, "%d %s\n", *pid, buf);
3801         return 0;
3802 }
3803
3804 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
3805         .start          = saved_cmdlines_start,
3806         .next           = saved_cmdlines_next,
3807         .stop           = saved_cmdlines_stop,
3808         .show           = saved_cmdlines_show,
3809 };
3810
3811 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
3812 {
3813         if (tracing_disabled)
3814                 return -ENODEV;
3815
3816         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
3817 }
3818
3819 static const struct file_operations tracing_saved_cmdlines_fops = {
3820         .open           = tracing_saved_cmdlines_open,
3821         .read           = seq_read,
3822         .llseek         = seq_lseek,
3823         .release        = seq_release,
3824 };
3825
3826 static ssize_t
3827 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
3828                                  size_t cnt, loff_t *ppos)
3829 {
3830         char buf[64];
3831         int r;
3832
3833         arch_spin_lock(&trace_cmdline_lock);
3834         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
3835         arch_spin_unlock(&trace_cmdline_lock);
3836
3837         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3838 }
3839
3840 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
3841 {
3842         kfree(s->saved_cmdlines);
3843         kfree(s->map_cmdline_to_pid);
3844         kfree(s);
3845 }
3846
3847 static int tracing_resize_saved_cmdlines(unsigned int val)
3848 {
3849         struct saved_cmdlines_buffer *s, *savedcmd_temp;
3850
3851         s = kmalloc(sizeof(*s), GFP_KERNEL);
3852         if (!s)
3853                 return -ENOMEM;
3854
3855         if (allocate_cmdlines_buffer(val, s) < 0) {
3856                 kfree(s);
3857                 return -ENOMEM;
3858         }
3859
3860         arch_spin_lock(&trace_cmdline_lock);
3861         savedcmd_temp = savedcmd;
3862         savedcmd = s;
3863         arch_spin_unlock(&trace_cmdline_lock);
3864         free_saved_cmdlines_buffer(savedcmd_temp);
3865
3866         return 0;
3867 }
3868
3869 static ssize_t
3870 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
3871                                   size_t cnt, loff_t *ppos)
3872 {
3873         unsigned long val;
3874         int ret;
3875
3876         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
3877         if (ret)
3878                 return ret;
3879
3880         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
3881         if (!val || val > PID_MAX_DEFAULT)
3882                 return -EINVAL;
3883
3884         ret = tracing_resize_saved_cmdlines((unsigned int)val);
3885         if (ret < 0)
3886                 return ret;
3887
3888         *ppos += cnt;
3889
3890         return cnt;
3891 }
3892
3893 static const struct file_operations tracing_saved_cmdlines_size_fops = {
3894         .open           = tracing_open_generic,
3895         .read           = tracing_saved_cmdlines_size_read,
3896         .write          = tracing_saved_cmdlines_size_write,
3897 };
3898
3899 static ssize_t
3900 tracing_set_trace_read(struct file *filp, char __user *ubuf,
3901                        size_t cnt, loff_t *ppos)
3902 {
3903         struct trace_array *tr = filp->private_data;
3904         char buf[MAX_TRACER_SIZE+2];
3905         int r;
3906
3907         mutex_lock(&trace_types_lock);
3908         r = sprintf(buf, "%s\n", tr->current_trace->name);
3909         mutex_unlock(&trace_types_lock);
3910
3911         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3912 }
3913
3914 int tracer_init(struct tracer *t, struct trace_array *tr)
3915 {
3916         tracing_reset_online_cpus(&tr->trace_buffer);
3917         return t->init(tr);
3918 }
3919
3920 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
3921 {
3922         int cpu;
3923
3924         for_each_tracing_cpu(cpu)
3925                 per_cpu_ptr(buf->data, cpu)->entries = val;
3926 }
3927
3928 #ifdef CONFIG_TRACER_MAX_TRACE
3929 /* resize @tr's buffer to the size of @size_tr's entries */
3930 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
3931                                         struct trace_buffer *size_buf, int cpu_id)
3932 {
3933         int cpu, ret = 0;
3934
3935         if (cpu_id == RING_BUFFER_ALL_CPUS) {
3936                 for_each_tracing_cpu(cpu) {
3937                         ret = ring_buffer_resize(trace_buf->buffer,
3938                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
3939                         if (ret < 0)
3940                                 break;
3941                         per_cpu_ptr(trace_buf->data, cpu)->entries =
3942                                 per_cpu_ptr(size_buf->data, cpu)->entries;
3943                 }
3944         } else {
3945                 ret = ring_buffer_resize(trace_buf->buffer,
3946                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
3947                 if (ret == 0)
3948                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
3949                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
3950         }
3951
3952         return ret;
3953 }
3954 #endif /* CONFIG_TRACER_MAX_TRACE */
3955
3956 static int __tracing_resize_ring_buffer(struct trace_array *tr,
3957                                         unsigned long size, int cpu)
3958 {
3959         int ret;
3960
3961         /*
3962          * If kernel or user changes the size of the ring buffer
3963          * we use the size that was given, and we can forget about
3964          * expanding it later.
3965          */
3966         ring_buffer_expanded = true;
3967
3968         /* May be called before buffers are initialized */
3969         if (!tr->trace_buffer.buffer)
3970                 return 0;
3971
3972         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
3973         if (ret < 0)
3974                 return ret;
3975
3976 #ifdef CONFIG_TRACER_MAX_TRACE
3977         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
3978             !tr->current_trace->use_max_tr)
3979                 goto out;
3980
3981         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
3982         if (ret < 0) {
3983                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
3984                                                      &tr->trace_buffer, cpu);
3985                 if (r < 0) {
3986                         /*
3987                          * AARGH! We are left with different
3988                          * size max buffer!!!!
3989                          * The max buffer is our "snapshot" buffer.
3990                          * When a tracer needs a snapshot (one of the
3991                          * latency tracers), it swaps the max buffer
3992                          * with the saved snap shot. We succeeded to
3993                          * update the size of the main buffer, but failed to
3994                          * update the size of the max buffer. But when we tried
3995                          * to reset the main buffer to the original size, we
3996                          * failed there too. This is very unlikely to
3997                          * happen, but if it does, warn and kill all
3998                          * tracing.
3999                          */
4000                         WARN_ON(1);
4001                         tracing_disabled = 1;
4002                 }
4003                 return ret;
4004         }
4005
4006         if (cpu == RING_BUFFER_ALL_CPUS)
4007                 set_buffer_entries(&tr->max_buffer, size);
4008         else
4009                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4010
4011  out:
4012 #endif /* CONFIG_TRACER_MAX_TRACE */
4013
4014         if (cpu == RING_BUFFER_ALL_CPUS)
4015                 set_buffer_entries(&tr->trace_buffer, size);
4016         else
4017                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4018
4019         return ret;
4020 }
4021
4022 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4023                                           unsigned long size, int cpu_id)
4024 {
4025         int ret = size;
4026
4027         mutex_lock(&trace_types_lock);
4028
4029         if (cpu_id != RING_BUFFER_ALL_CPUS) {
4030                 /* make sure, this cpu is enabled in the mask */
4031                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4032                         ret = -EINVAL;
4033                         goto out;
4034                 }
4035         }
4036
4037         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4038         if (ret < 0)
4039                 ret = -ENOMEM;
4040
4041 out:
4042         mutex_unlock(&trace_types_lock);
4043
4044         return ret;
4045 }
4046
4047
4048 /**
4049  * tracing_update_buffers - used by tracing facility to expand ring buffers
4050  *
4051  * To save on memory when the tracing is never used on a system with it
4052  * configured in. The ring buffers are set to a minimum size. But once
4053  * a user starts to use the tracing facility, then they need to grow
4054  * to their default size.
4055  *
4056  * This function is to be called when a tracer is about to be used.
4057  */
4058 int tracing_update_buffers(void)
4059 {
4060         int ret = 0;
4061
4062         mutex_lock(&trace_types_lock);
4063         if (!ring_buffer_expanded)
4064                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4065                                                 RING_BUFFER_ALL_CPUS);
4066         mutex_unlock(&trace_types_lock);
4067
4068         return ret;
4069 }
4070
4071 struct trace_option_dentry;
4072
4073 static struct trace_option_dentry *
4074 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4075
4076 static void
4077 destroy_trace_option_files(struct trace_option_dentry *topts);
4078
4079 /*
4080  * Used to clear out the tracer before deletion of an instance.
4081  * Must have trace_types_lock held.
4082  */
4083 static void tracing_set_nop(struct trace_array *tr)
4084 {
4085         if (tr->current_trace == &nop_trace)
4086                 return;
4087         
4088         tr->current_trace->enabled--;
4089
4090         if (tr->current_trace->reset)
4091                 tr->current_trace->reset(tr);
4092
4093         tr->current_trace = &nop_trace;
4094 }
4095
4096 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
4097 {
4098         static struct trace_option_dentry *topts;
4099         struct tracer *t;
4100 #ifdef CONFIG_TRACER_MAX_TRACE
4101         bool had_max_tr;
4102 #endif
4103         int ret = 0;
4104
4105         mutex_lock(&trace_types_lock);
4106
4107         if (!ring_buffer_expanded) {
4108                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
4109                                                 RING_BUFFER_ALL_CPUS);
4110                 if (ret < 0)
4111                         goto out;
4112                 ret = 0;
4113         }
4114
4115         for (t = trace_types; t; t = t->next) {
4116                 if (strcmp(t->name, buf) == 0)
4117                         break;
4118         }
4119         if (!t) {
4120                 ret = -EINVAL;
4121                 goto out;
4122         }
4123         if (t == tr->current_trace)
4124                 goto out;
4125
4126         /* Some tracers are only allowed for the top level buffer */
4127         if (!trace_ok_for_array(t, tr)) {
4128                 ret = -EINVAL;
4129                 goto out;
4130         }
4131
4132         trace_branch_disable();
4133
4134         tr->current_trace->enabled--;
4135
4136         if (tr->current_trace->reset)
4137                 tr->current_trace->reset(tr);
4138
4139         /* Current trace needs to be nop_trace before synchronize_sched */
4140         tr->current_trace = &nop_trace;
4141
4142 #ifdef CONFIG_TRACER_MAX_TRACE
4143         had_max_tr = tr->allocated_snapshot;
4144
4145         if (had_max_tr && !t->use_max_tr) {
4146                 /*
4147                  * We need to make sure that the update_max_tr sees that
4148                  * current_trace changed to nop_trace to keep it from
4149                  * swapping the buffers after we resize it.
4150                  * The update_max_tr is called from interrupts disabled
4151                  * so a synchronized_sched() is sufficient.
4152                  */
4153                 synchronize_sched();
4154                 free_snapshot(tr);
4155         }
4156 #endif
4157         /* Currently, only the top instance has options */
4158         if (tr->flags & TRACE_ARRAY_FL_GLOBAL) {
4159                 destroy_trace_option_files(topts);
4160                 topts = create_trace_option_files(tr, t);
4161         }
4162
4163 #ifdef CONFIG_TRACER_MAX_TRACE
4164         if (t->use_max_tr && !had_max_tr) {
4165                 ret = alloc_snapshot(tr);
4166                 if (ret < 0)
4167                         goto out;
4168         }
4169 #endif
4170
4171         if (t->init) {
4172                 ret = tracer_init(t, tr);
4173                 if (ret)
4174                         goto out;
4175         }
4176
4177         tr->current_trace = t;
4178         tr->current_trace->enabled++;
4179         trace_branch_enable(tr);
4180  out:
4181         mutex_unlock(&trace_types_lock);
4182
4183         return ret;
4184 }
4185
4186 static ssize_t
4187 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
4188                         size_t cnt, loff_t *ppos)
4189 {
4190         struct trace_array *tr = filp->private_data;
4191         char buf[MAX_TRACER_SIZE+1];
4192         int i;
4193         size_t ret;
4194         int err;
4195
4196         ret = cnt;
4197
4198         if (cnt > MAX_TRACER_SIZE)
4199                 cnt = MAX_TRACER_SIZE;
4200
4201         if (copy_from_user(&buf, ubuf, cnt))
4202                 return -EFAULT;
4203
4204         buf[cnt] = 0;
4205
4206         /* strip ending whitespace. */
4207         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4208                 buf[i] = 0;
4209
4210         err = tracing_set_tracer(tr, buf);
4211         if (err)
4212                 return err;
4213
4214         *ppos += ret;
4215
4216         return ret;
4217 }
4218
4219 static ssize_t
4220 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
4221                    size_t cnt, loff_t *ppos)
4222 {
4223         char buf[64];
4224         int r;
4225
4226         r = snprintf(buf, sizeof(buf), "%ld\n",
4227                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4228         if (r > sizeof(buf))
4229                 r = sizeof(buf);
4230         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4231 }
4232
4233 static ssize_t
4234 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
4235                     size_t cnt, loff_t *ppos)
4236 {
4237         unsigned long val;
4238         int ret;
4239
4240         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4241         if (ret)
4242                 return ret;
4243
4244         *ptr = val * 1000;
4245
4246         return cnt;
4247 }
4248
4249 static ssize_t
4250 tracing_thresh_read(struct file *filp, char __user *ubuf,
4251                     size_t cnt, loff_t *ppos)
4252 {
4253         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
4254 }
4255
4256 static ssize_t
4257 tracing_thresh_write(struct file *filp, const char __user *ubuf,
4258                      size_t cnt, loff_t *ppos)
4259 {
4260         struct trace_array *tr = filp->private_data;
4261         int ret;
4262
4263         mutex_lock(&trace_types_lock);
4264         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
4265         if (ret < 0)
4266                 goto out;
4267
4268         if (tr->current_trace->update_thresh) {
4269                 ret = tr->current_trace->update_thresh(tr);
4270                 if (ret < 0)
4271                         goto out;
4272         }
4273
4274         ret = cnt;
4275 out:
4276         mutex_unlock(&trace_types_lock);
4277
4278         return ret;
4279 }
4280
4281 static ssize_t
4282 tracing_max_lat_read(struct file *filp, char __user *ubuf,
4283                      size_t cnt, loff_t *ppos)
4284 {
4285         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
4286 }
4287
4288 static ssize_t
4289 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4290                       size_t cnt, loff_t *ppos)
4291 {
4292         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
4293 }
4294
4295 static int tracing_open_pipe(struct inode *inode, struct file *filp)
4296 {
4297         struct trace_array *tr = inode->i_private;
4298         struct trace_iterator *iter;
4299         int ret = 0;
4300
4301         if (tracing_disabled)
4302                 return -ENODEV;
4303
4304         if (trace_array_get(tr) < 0)
4305                 return -ENODEV;
4306
4307         mutex_lock(&trace_types_lock);
4308
4309         /* create a buffer to store the information to pass to userspace */
4310         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4311         if (!iter) {
4312                 ret = -ENOMEM;
4313                 __trace_array_put(tr);
4314                 goto out;
4315         }
4316
4317         trace_seq_init(&iter->seq);
4318
4319         /*
4320          * We make a copy of the current tracer to avoid concurrent
4321          * changes on it while we are reading.
4322          */
4323         iter->trace = kmalloc(sizeof(*iter->trace), GFP_KERNEL);
4324         if (!iter->trace) {
4325                 ret = -ENOMEM;
4326                 goto fail;
4327         }
4328         *iter->trace = *tr->current_trace;
4329
4330         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
4331                 ret = -ENOMEM;
4332                 goto fail;
4333         }
4334
4335         /* trace pipe does not show start of buffer */
4336         cpumask_setall(iter->started);
4337
4338         if (trace_flags & TRACE_ITER_LATENCY_FMT)
4339                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4340
4341         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4342         if (trace_clocks[tr->clock_id].in_ns)
4343                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4344
4345         iter->tr = tr;
4346         iter->trace_buffer = &tr->trace_buffer;
4347         iter->cpu_file = tracing_get_cpu(inode);
4348         mutex_init(&iter->mutex);
4349         filp->private_data = iter;
4350
4351         if (iter->trace->pipe_open)
4352                 iter->trace->pipe_open(iter);
4353
4354         nonseekable_open(inode, filp);
4355 out:
4356         mutex_unlock(&trace_types_lock);
4357         return ret;
4358
4359 fail:
4360         kfree(iter->trace);
4361         kfree(iter);
4362         __trace_array_put(tr);
4363         mutex_unlock(&trace_types_lock);
4364         return ret;
4365 }
4366
4367 static int tracing_release_pipe(struct inode *inode, struct file *file)
4368 {
4369         struct trace_iterator *iter = file->private_data;
4370         struct trace_array *tr = inode->i_private;
4371
4372         mutex_lock(&trace_types_lock);
4373
4374         if (iter->trace->pipe_close)
4375                 iter->trace->pipe_close(iter);
4376
4377         mutex_unlock(&trace_types_lock);
4378
4379         free_cpumask_var(iter->started);
4380         mutex_destroy(&iter->mutex);
4381         kfree(iter->trace);
4382         kfree(iter);
4383
4384         trace_array_put(tr);
4385
4386         return 0;
4387 }
4388
4389 static unsigned int
4390 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
4391 {
4392         /* Iterators are static, they should be filled or empty */
4393         if (trace_buffer_iter(iter, iter->cpu_file))
4394                 return POLLIN | POLLRDNORM;
4395
4396         if (trace_flags & TRACE_ITER_BLOCK)
4397                 /*
4398                  * Always select as readable when in blocking mode
4399                  */
4400                 return POLLIN | POLLRDNORM;
4401         else
4402                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
4403                                              filp, poll_table);
4404 }
4405
4406 static unsigned int
4407 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
4408 {
4409         struct trace_iterator *iter = filp->private_data;
4410
4411         return trace_poll(iter, filp, poll_table);
4412 }
4413
4414 /* Must be called with trace_types_lock mutex held. */
4415 static int tracing_wait_pipe(struct file *filp)
4416 {
4417         struct trace_iterator *iter = filp->private_data;
4418         int ret;
4419
4420         while (trace_empty(iter)) {
4421
4422                 if ((filp->f_flags & O_NONBLOCK)) {
4423                         return -EAGAIN;
4424                 }
4425
4426                 /*
4427                  * We block until we read something and tracing is disabled.
4428                  * We still block if tracing is disabled, but we have never
4429                  * read anything. This allows a user to cat this file, and
4430                  * then enable tracing. But after we have read something,
4431                  * we give an EOF when tracing is again disabled.
4432                  *
4433                  * iter->pos will be 0 if we haven't read anything.
4434                  */
4435                 if (!tracing_is_on() && iter->pos)
4436                         break;
4437
4438                 mutex_unlock(&iter->mutex);
4439
4440                 ret = wait_on_pipe(iter, false);
4441
4442                 mutex_lock(&iter->mutex);
4443
4444                 if (ret)
4445                         return ret;
4446         }
4447
4448         return 1;
4449 }
4450
4451 /*
4452  * Consumer reader.
4453  */
4454 static ssize_t
4455 tracing_read_pipe(struct file *filp, char __user *ubuf,
4456                   size_t cnt, loff_t *ppos)
4457 {
4458         struct trace_iterator *iter = filp->private_data;
4459         struct trace_array *tr = iter->tr;
4460         ssize_t sret;
4461
4462         /* return any leftover data */
4463         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4464         if (sret != -EBUSY)
4465                 return sret;
4466
4467         trace_seq_init(&iter->seq);
4468
4469         /* copy the tracer to avoid using a global lock all around */
4470         mutex_lock(&trace_types_lock);
4471         if (unlikely(iter->trace->name != tr->current_trace->name))
4472                 *iter->trace = *tr->current_trace;
4473         mutex_unlock(&trace_types_lock);
4474
4475         /*
4476          * Avoid more than one consumer on a single file descriptor
4477          * This is just a matter of traces coherency, the ring buffer itself
4478          * is protected.
4479          */
4480         mutex_lock(&iter->mutex);
4481         if (iter->trace->read) {
4482                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
4483                 if (sret)
4484                         goto out;
4485         }
4486
4487 waitagain:
4488         sret = tracing_wait_pipe(filp);
4489         if (sret <= 0)
4490                 goto out;
4491
4492         /* stop when tracing is finished */
4493         if (trace_empty(iter)) {
4494                 sret = 0;
4495                 goto out;
4496         }
4497
4498         if (cnt >= PAGE_SIZE)
4499                 cnt = PAGE_SIZE - 1;
4500
4501         /* reset all but tr, trace, and overruns */
4502         memset(&iter->seq, 0,
4503                sizeof(struct trace_iterator) -
4504                offsetof(struct trace_iterator, seq));
4505         cpumask_clear(iter->started);
4506         iter->pos = -1;
4507
4508         trace_event_read_lock();
4509         trace_access_lock(iter->cpu_file);
4510         while (trace_find_next_entry_inc(iter) != NULL) {
4511                 enum print_line_t ret;
4512                 int save_len = iter->seq.seq.len;
4513
4514                 ret = print_trace_line(iter);
4515                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4516                         /* don't print partial lines */
4517                         iter->seq.seq.len = save_len;
4518                         break;
4519                 }
4520                 if (ret != TRACE_TYPE_NO_CONSUME)
4521                         trace_consume(iter);
4522
4523                 if (trace_seq_used(&iter->seq) >= cnt)
4524                         break;
4525
4526                 /*
4527                  * Setting the full flag means we reached the trace_seq buffer
4528                  * size and we should leave by partial output condition above.
4529                  * One of the trace_seq_* functions is not used properly.
4530                  */
4531                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
4532                           iter->ent->type);
4533         }
4534         trace_access_unlock(iter->cpu_file);
4535         trace_event_read_unlock();
4536
4537         /* Now copy what we have to the user */
4538         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4539         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
4540                 trace_seq_init(&iter->seq);
4541
4542         /*
4543          * If there was nothing to send to user, in spite of consuming trace
4544          * entries, go back to wait for more entries.
4545          */
4546         if (sret == -EBUSY)
4547                 goto waitagain;
4548
4549 out:
4550         mutex_unlock(&iter->mutex);
4551
4552         return sret;
4553 }
4554
4555 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
4556                                      unsigned int idx)
4557 {
4558         __free_page(spd->pages[idx]);
4559 }
4560
4561 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
4562         .can_merge              = 0,
4563         .confirm                = generic_pipe_buf_confirm,
4564         .release                = generic_pipe_buf_release,
4565         .steal                  = generic_pipe_buf_steal,
4566         .get                    = generic_pipe_buf_get,
4567 };
4568
4569 static size_t
4570 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
4571 {
4572         size_t count;
4573         int save_len;
4574         int ret;
4575
4576         /* Seq buffer is page-sized, exactly what we need. */
4577         for (;;) {
4578                 save_len = iter->seq.seq.len;
4579                 ret = print_trace_line(iter);
4580
4581                 if (trace_seq_has_overflowed(&iter->seq)) {
4582                         iter->seq.seq.len = save_len;
4583                         break;
4584                 }
4585
4586                 /*
4587                  * This should not be hit, because it should only
4588                  * be set if the iter->seq overflowed. But check it
4589                  * anyway to be safe.
4590                  */
4591                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4592                         iter->seq.seq.len = save_len;
4593                         break;
4594                 }
4595
4596                 count = trace_seq_used(&iter->seq) - save_len;
4597                 if (rem < count) {
4598                         rem = 0;
4599                         iter->seq.seq.len = save_len;
4600                         break;
4601                 }
4602
4603                 if (ret != TRACE_TYPE_NO_CONSUME)
4604                         trace_consume(iter);
4605                 rem -= count;
4606                 if (!trace_find_next_entry_inc(iter))   {
4607                         rem = 0;
4608                         iter->ent = NULL;
4609                         break;
4610                 }
4611         }
4612
4613         return rem;
4614 }
4615
4616 static ssize_t tracing_splice_read_pipe(struct file *filp,
4617                                         loff_t *ppos,
4618                                         struct pipe_inode_info *pipe,
4619                                         size_t len,
4620                                         unsigned int flags)
4621 {
4622         struct page *pages_def[PIPE_DEF_BUFFERS];
4623         struct partial_page partial_def[PIPE_DEF_BUFFERS];
4624         struct trace_iterator *iter = filp->private_data;
4625         struct splice_pipe_desc spd = {
4626                 .pages          = pages_def,
4627                 .partial        = partial_def,
4628                 .nr_pages       = 0, /* This gets updated below. */
4629                 .nr_pages_max   = PIPE_DEF_BUFFERS,
4630                 .flags          = flags,
4631                 .ops            = &tracing_pipe_buf_ops,
4632                 .spd_release    = tracing_spd_release_pipe,
4633         };
4634         struct trace_array *tr = iter->tr;
4635         ssize_t ret;
4636         size_t rem;
4637         unsigned int i;
4638
4639         if (splice_grow_spd(pipe, &spd))
4640                 return -ENOMEM;
4641
4642         /* copy the tracer to avoid using a global lock all around */
4643         mutex_lock(&trace_types_lock);
4644         if (unlikely(iter->trace->name != tr->current_trace->name))
4645                 *iter->trace = *tr->current_trace;
4646         mutex_unlock(&trace_types_lock);
4647
4648         mutex_lock(&iter->mutex);
4649
4650         if (iter->trace->splice_read) {
4651                 ret = iter->trace->splice_read(iter, filp,
4652                                                ppos, pipe, len, flags);
4653                 if (ret)
4654                         goto out_err;
4655         }
4656
4657         ret = tracing_wait_pipe(filp);
4658         if (ret <= 0)
4659                 goto out_err;
4660
4661         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
4662                 ret = -EFAULT;
4663                 goto out_err;
4664         }
4665
4666         trace_event_read_lock();
4667         trace_access_lock(iter->cpu_file);
4668
4669         /* Fill as many pages as possible. */
4670         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
4671                 spd.pages[i] = alloc_page(GFP_KERNEL);
4672                 if (!spd.pages[i])
4673                         break;
4674
4675                 rem = tracing_fill_pipe_page(rem, iter);
4676
4677                 /* Copy the data into the page, so we can start over. */
4678                 ret = trace_seq_to_buffer(&iter->seq,
4679                                           page_address(spd.pages[i]),
4680                                           trace_seq_used(&iter->seq));
4681                 if (ret < 0) {
4682                         __free_page(spd.pages[i]);
4683                         break;
4684                 }
4685                 spd.partial[i].offset = 0;
4686                 spd.partial[i].len = trace_seq_used(&iter->seq);
4687
4688                 trace_seq_init(&iter->seq);
4689         }
4690
4691         trace_access_unlock(iter->cpu_file);
4692         trace_event_read_unlock();
4693         mutex_unlock(&iter->mutex);
4694
4695         spd.nr_pages = i;
4696
4697         ret = splice_to_pipe(pipe, &spd);
4698 out:
4699         splice_shrink_spd(&spd);
4700         return ret;
4701
4702 out_err:
4703         mutex_unlock(&iter->mutex);
4704         goto out;
4705 }
4706
4707 static ssize_t
4708 tracing_entries_read(struct file *filp, char __user *ubuf,
4709                      size_t cnt, loff_t *ppos)
4710 {
4711         struct inode *inode = file_inode(filp);
4712         struct trace_array *tr = inode->i_private;
4713         int cpu = tracing_get_cpu(inode);
4714         char buf[64];
4715         int r = 0;
4716         ssize_t ret;
4717
4718         mutex_lock(&trace_types_lock);
4719
4720         if (cpu == RING_BUFFER_ALL_CPUS) {
4721                 int cpu, buf_size_same;
4722                 unsigned long size;
4723
4724                 size = 0;
4725                 buf_size_same = 1;
4726                 /* check if all cpu sizes are same */
4727                 for_each_tracing_cpu(cpu) {
4728                         /* fill in the size from first enabled cpu */
4729                         if (size == 0)
4730                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
4731                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
4732                                 buf_size_same = 0;
4733                                 break;
4734                         }
4735                 }
4736
4737                 if (buf_size_same) {
4738                         if (!ring_buffer_expanded)
4739                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
4740                                             size >> 10,
4741                                             trace_buf_size >> 10);
4742                         else
4743                                 r = sprintf(buf, "%lu\n", size >> 10);
4744                 } else
4745                         r = sprintf(buf, "X\n");
4746         } else
4747                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
4748
4749         mutex_unlock(&trace_types_lock);
4750
4751         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4752         return ret;
4753 }
4754
4755 static ssize_t
4756 tracing_entries_write(struct file *filp, const char __user *ubuf,
4757                       size_t cnt, loff_t *ppos)
4758 {
4759         struct inode *inode = file_inode(filp);
4760         struct trace_array *tr = inode->i_private;
4761         unsigned long val;
4762         int ret;
4763
4764         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4765         if (ret)
4766                 return ret;
4767
4768         /* must have at least 1 entry */
4769         if (!val)
4770                 return -EINVAL;
4771
4772         /* value is in KB */
4773         val <<= 10;
4774         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
4775         if (ret < 0)
4776                 return ret;
4777
4778         *ppos += cnt;
4779
4780         return cnt;
4781 }
4782
4783 static ssize_t
4784 tracing_total_entries_read(struct file *filp, char __user *ubuf,
4785                                 size_t cnt, loff_t *ppos)
4786 {
4787         struct trace_array *tr = filp->private_data;
4788         char buf[64];
4789         int r, cpu;
4790         unsigned long size = 0, expanded_size = 0;
4791
4792         mutex_lock(&trace_types_lock);
4793         for_each_tracing_cpu(cpu) {
4794                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
4795                 if (!ring_buffer_expanded)
4796                         expanded_size += trace_buf_size >> 10;
4797         }
4798         if (ring_buffer_expanded)
4799                 r = sprintf(buf, "%lu\n", size);
4800         else
4801                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
4802         mutex_unlock(&trace_types_lock);
4803
4804         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4805 }
4806
4807 static ssize_t
4808 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
4809                           size_t cnt, loff_t *ppos)
4810 {
4811         /*
4812          * There is no need to read what the user has written, this function
4813          * is just to make sure that there is no error when "echo" is used
4814          */
4815
4816         *ppos += cnt;
4817
4818         return cnt;
4819 }
4820
4821 static int
4822 tracing_free_buffer_release(struct inode *inode, struct file *filp)
4823 {
4824         struct trace_array *tr = inode->i_private;
4825
4826         /* disable tracing ? */
4827         if (trace_flags & TRACE_ITER_STOP_ON_FREE)
4828                 tracer_tracing_off(tr);
4829         /* resize the ring buffer to 0 */
4830         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
4831
4832         trace_array_put(tr);
4833
4834         return 0;
4835 }
4836
4837 static ssize_t
4838 tracing_mark_write(struct file *filp, const char __user *ubuf,
4839                                         size_t cnt, loff_t *fpos)
4840 {
4841         unsigned long addr = (unsigned long)ubuf;
4842         struct trace_array *tr = filp->private_data;
4843         struct ring_buffer_event *event;
4844         struct ring_buffer *buffer;
4845         struct print_entry *entry;
4846         unsigned long irq_flags;
4847         struct page *pages[2];
4848         void *map_page[2];
4849         int nr_pages = 1;
4850         ssize_t written;
4851         int offset;
4852         int size;
4853         int len;
4854         int ret;
4855         int i;
4856
4857         if (tracing_disabled)
4858                 return -EINVAL;
4859
4860         if (!(trace_flags & TRACE_ITER_MARKERS))
4861                 return -EINVAL;
4862
4863         if (cnt > TRACE_BUF_SIZE)
4864                 cnt = TRACE_BUF_SIZE;
4865
4866         /*
4867          * Userspace is injecting traces into the kernel trace buffer.
4868          * We want to be as non intrusive as possible.
4869          * To do so, we do not want to allocate any special buffers
4870          * or take any locks, but instead write the userspace data
4871          * straight into the ring buffer.
4872          *
4873          * First we need to pin the userspace buffer into memory,
4874          * which, most likely it is, because it just referenced it.
4875          * But there's no guarantee that it is. By using get_user_pages_fast()
4876          * and kmap_atomic/kunmap_atomic() we can get access to the
4877          * pages directly. We then write the data directly into the
4878          * ring buffer.
4879          */
4880         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
4881
4882         /* check if we cross pages */
4883         if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
4884                 nr_pages = 2;
4885
4886         offset = addr & (PAGE_SIZE - 1);
4887         addr &= PAGE_MASK;
4888
4889         ret = get_user_pages_fast(addr, nr_pages, 0, pages);
4890         if (ret < nr_pages) {
4891                 while (--ret >= 0)
4892                         put_page(pages[ret]);
4893                 written = -EFAULT;
4894                 goto out;
4895         }
4896
4897         for (i = 0; i < nr_pages; i++)
4898                 map_page[i] = kmap_atomic(pages[i]);
4899
4900         local_save_flags(irq_flags);
4901         size = sizeof(*entry) + cnt + 2; /* possible \n added */
4902         buffer = tr->trace_buffer.buffer;
4903         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
4904                                           irq_flags, preempt_count());
4905         if (!event) {
4906                 /* Ring buffer disabled, return as if not open for write */
4907                 written = -EBADF;
4908                 goto out_unlock;
4909         }
4910
4911         entry = ring_buffer_event_data(event);
4912         entry->ip = _THIS_IP_;
4913
4914         if (nr_pages == 2) {
4915                 len = PAGE_SIZE - offset;
4916                 memcpy(&entry->buf, map_page[0] + offset, len);
4917                 memcpy(&entry->buf[len], map_page[1], cnt - len);
4918         } else
4919                 memcpy(&entry->buf, map_page[0] + offset, cnt);
4920
4921         if (entry->buf[cnt - 1] != '\n') {
4922                 entry->buf[cnt] = '\n';
4923                 entry->buf[cnt + 1] = '\0';
4924         } else
4925                 entry->buf[cnt] = '\0';
4926
4927         __buffer_unlock_commit(buffer, event);
4928
4929         written = cnt;
4930
4931         *fpos += written;
4932
4933  out_unlock:
4934         for (i = 0; i < nr_pages; i++){
4935                 kunmap_atomic(map_page[i]);
4936                 put_page(pages[i]);
4937         }
4938  out:
4939         return written;
4940 }
4941
4942 static int tracing_clock_show(struct seq_file *m, void *v)
4943 {
4944         struct trace_array *tr = m->private;
4945         int i;
4946
4947         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
4948                 seq_printf(m,
4949                         "%s%s%s%s", i ? " " : "",
4950                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
4951                         i == tr->clock_id ? "]" : "");
4952         seq_putc(m, '\n');
4953
4954         return 0;
4955 }
4956
4957 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
4958 {
4959         int i;
4960
4961         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
4962                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
4963                         break;
4964         }
4965         if (i == ARRAY_SIZE(trace_clocks))
4966                 return -EINVAL;
4967
4968         mutex_lock(&trace_types_lock);
4969
4970         tr->clock_id = i;
4971
4972         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
4973
4974         /*
4975          * New clock may not be consistent with the previous clock.
4976          * Reset the buffer so that it doesn't have incomparable timestamps.
4977          */
4978         tracing_reset_online_cpus(&tr->trace_buffer);
4979
4980 #ifdef CONFIG_TRACER_MAX_TRACE
4981         if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
4982                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
4983         tracing_reset_online_cpus(&tr->max_buffer);
4984 #endif
4985
4986         mutex_unlock(&trace_types_lock);
4987
4988         return 0;
4989 }
4990
4991 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
4992                                    size_t cnt, loff_t *fpos)
4993 {
4994         struct seq_file *m = filp->private_data;
4995         struct trace_array *tr = m->private;
4996         char buf[64];
4997         const char *clockstr;
4998         int ret;
4999
5000         if (cnt >= sizeof(buf))
5001                 return -EINVAL;
5002
5003         if (copy_from_user(&buf, ubuf, cnt))
5004                 return -EFAULT;
5005
5006         buf[cnt] = 0;
5007
5008         clockstr = strstrip(buf);
5009
5010         ret = tracing_set_clock(tr, clockstr);
5011         if (ret)
5012                 return ret;
5013
5014         *fpos += cnt;
5015
5016         return cnt;
5017 }
5018
5019 static int tracing_clock_open(struct inode *inode, struct file *file)
5020 {
5021         struct trace_array *tr = inode->i_private;
5022         int ret;
5023
5024         if (tracing_disabled)
5025                 return -ENODEV;
5026
5027         if (trace_array_get(tr))
5028                 return -ENODEV;
5029
5030         ret = single_open(file, tracing_clock_show, inode->i_private);
5031         if (ret < 0)
5032                 trace_array_put(tr);
5033
5034         return ret;
5035 }
5036
5037 struct ftrace_buffer_info {
5038         struct trace_iterator   iter;
5039         void                    *spare;
5040         unsigned int            read;
5041 };
5042
5043 #ifdef CONFIG_TRACER_SNAPSHOT
5044 static int tracing_snapshot_open(struct inode *inode, struct file *file)
5045 {
5046         struct trace_array *tr = inode->i_private;
5047         struct trace_iterator *iter;
5048         struct seq_file *m;
5049         int ret = 0;
5050
5051         if (trace_array_get(tr) < 0)
5052                 return -ENODEV;
5053
5054         if (file->f_mode & FMODE_READ) {
5055                 iter = __tracing_open(inode, file, true);
5056                 if (IS_ERR(iter))
5057                         ret = PTR_ERR(iter);
5058         } else {
5059                 /* Writes still need the seq_file to hold the private data */
5060                 ret = -ENOMEM;
5061                 m = kzalloc(sizeof(*m), GFP_KERNEL);
5062                 if (!m)
5063                         goto out;
5064                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5065                 if (!iter) {
5066                         kfree(m);
5067                         goto out;
5068                 }
5069                 ret = 0;
5070
5071                 iter->tr = tr;
5072                 iter->trace_buffer = &tr->max_buffer;
5073                 iter->cpu_file = tracing_get_cpu(inode);
5074                 m->private = iter;
5075                 file->private_data = m;
5076         }
5077 out:
5078         if (ret < 0)
5079                 trace_array_put(tr);
5080
5081         return ret;
5082 }
5083
5084 static ssize_t
5085 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
5086                        loff_t *ppos)
5087 {
5088         struct seq_file *m = filp->private_data;
5089         struct trace_iterator *iter = m->private;
5090         struct trace_array *tr = iter->tr;
5091         unsigned long val;
5092         int ret;
5093
5094         ret = tracing_update_buffers();
5095         if (ret < 0)
5096                 return ret;
5097
5098         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5099         if (ret)
5100                 return ret;
5101
5102         mutex_lock(&trace_types_lock);
5103
5104         if (tr->current_trace->use_max_tr) {
5105                 ret = -EBUSY;
5106                 goto out;
5107         }
5108
5109         switch (val) {
5110         case 0:
5111                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5112                         ret = -EINVAL;
5113                         break;
5114                 }
5115                 if (tr->allocated_snapshot)
5116                         free_snapshot(tr);
5117                 break;
5118         case 1:
5119 /* Only allow per-cpu swap if the ring buffer supports it */
5120 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
5121                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5122                         ret = -EINVAL;
5123                         break;
5124                 }
5125 #endif
5126                 if (!tr->allocated_snapshot) {
5127                         ret = alloc_snapshot(tr);
5128                         if (ret < 0)
5129                                 break;
5130                 }
5131                 local_irq_disable();
5132                 /* Now, we're going to swap */
5133                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5134                         update_max_tr(tr, current, smp_processor_id());
5135                 else
5136                         update_max_tr_single(tr, current, iter->cpu_file);
5137                 local_irq_enable();
5138                 break;
5139         default:
5140                 if (tr->allocated_snapshot) {
5141                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5142                                 tracing_reset_online_cpus(&tr->max_buffer);
5143                         else
5144                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
5145                 }
5146                 break;
5147         }
5148
5149         if (ret >= 0) {
5150                 *ppos += cnt;
5151                 ret = cnt;
5152         }
5153 out:
5154         mutex_unlock(&trace_types_lock);
5155         return ret;
5156 }
5157
5158 static int tracing_snapshot_release(struct inode *inode, struct file *file)
5159 {
5160         struct seq_file *m = file->private_data;
5161         int ret;
5162
5163         ret = tracing_release(inode, file);
5164
5165         if (file->f_mode & FMODE_READ)
5166                 return ret;
5167
5168         /* If write only, the seq_file is just a stub */
5169         if (m)
5170                 kfree(m->private);
5171         kfree(m);
5172
5173         return 0;
5174 }
5175
5176 static int tracing_buffers_open(struct inode *inode, struct file *filp);
5177 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
5178                                     size_t count, loff_t *ppos);
5179 static int tracing_buffers_release(struct inode *inode, struct file *file);
5180 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5181                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
5182
5183 static int snapshot_raw_open(struct inode *inode, struct file *filp)
5184 {
5185         struct ftrace_buffer_info *info;
5186         int ret;
5187
5188         ret = tracing_buffers_open(inode, filp);
5189         if (ret < 0)
5190                 return ret;
5191
5192         info = filp->private_data;
5193
5194         if (info->iter.trace->use_max_tr) {
5195                 tracing_buffers_release(inode, filp);
5196                 return -EBUSY;
5197         }
5198
5199         info->iter.snapshot = true;
5200         info->iter.trace_buffer = &info->iter.tr->max_buffer;
5201
5202         return ret;
5203 }
5204
5205 #endif /* CONFIG_TRACER_SNAPSHOT */
5206
5207
5208 static const struct file_operations tracing_thresh_fops = {
5209         .open           = tracing_open_generic,
5210         .read           = tracing_thresh_read,
5211         .write          = tracing_thresh_write,
5212         .llseek         = generic_file_llseek,
5213 };
5214
5215 static const struct file_operations tracing_max_lat_fops = {
5216         .open           = tracing_open_generic,
5217         .read           = tracing_max_lat_read,
5218         .write          = tracing_max_lat_write,
5219         .llseek         = generic_file_llseek,
5220 };
5221
5222 static const struct file_operations set_tracer_fops = {
5223         .open           = tracing_open_generic,
5224         .read           = tracing_set_trace_read,
5225         .write          = tracing_set_trace_write,
5226         .llseek         = generic_file_llseek,
5227 };
5228
5229 static const struct file_operations tracing_pipe_fops = {
5230         .open           = tracing_open_pipe,
5231         .poll           = tracing_poll_pipe,
5232         .read           = tracing_read_pipe,
5233         .splice_read    = tracing_splice_read_pipe,
5234         .release        = tracing_release_pipe,
5235         .llseek         = no_llseek,
5236 };
5237
5238 static const struct file_operations tracing_entries_fops = {
5239         .open           = tracing_open_generic_tr,
5240         .read           = tracing_entries_read,
5241         .write          = tracing_entries_write,
5242         .llseek         = generic_file_llseek,
5243         .release        = tracing_release_generic_tr,
5244 };
5245
5246 static const struct file_operations tracing_total_entries_fops = {
5247         .open           = tracing_open_generic_tr,
5248         .read           = tracing_total_entries_read,
5249         .llseek         = generic_file_llseek,
5250         .release        = tracing_release_generic_tr,
5251 };
5252
5253 static const struct file_operations tracing_free_buffer_fops = {
5254         .open           = tracing_open_generic_tr,
5255         .write          = tracing_free_buffer_write,
5256         .release        = tracing_free_buffer_release,
5257 };
5258
5259 static const struct file_operations tracing_mark_fops = {
5260         .open           = tracing_open_generic_tr,
5261         .write          = tracing_mark_write,
5262         .llseek         = generic_file_llseek,
5263         .release        = tracing_release_generic_tr,
5264 };
5265
5266 static const struct file_operations trace_clock_fops = {
5267         .open           = tracing_clock_open,
5268         .read           = seq_read,
5269         .llseek         = seq_lseek,
5270         .release        = tracing_single_release_tr,
5271         .write          = tracing_clock_write,
5272 };
5273
5274 #ifdef CONFIG_TRACER_SNAPSHOT
5275 static const struct file_operations snapshot_fops = {
5276         .open           = tracing_snapshot_open,
5277         .read           = seq_read,
5278         .write          = tracing_snapshot_write,
5279         .llseek         = tracing_lseek,
5280         .release        = tracing_snapshot_release,
5281 };
5282
5283 static const struct file_operations snapshot_raw_fops = {
5284         .open           = snapshot_raw_open,
5285         .read           = tracing_buffers_read,
5286         .release        = tracing_buffers_release,
5287         .splice_read    = tracing_buffers_splice_read,
5288         .llseek         = no_llseek,
5289 };
5290
5291 #endif /* CONFIG_TRACER_SNAPSHOT */
5292
5293 static int tracing_buffers_open(struct inode *inode, struct file *filp)
5294 {
5295         struct trace_array *tr = inode->i_private;
5296         struct ftrace_buffer_info *info;
5297         int ret;
5298
5299         if (tracing_disabled)
5300                 return -ENODEV;
5301
5302         if (trace_array_get(tr) < 0)
5303                 return -ENODEV;
5304
5305         info = kzalloc(sizeof(*info), GFP_KERNEL);
5306         if (!info) {
5307                 trace_array_put(tr);
5308                 return -ENOMEM;
5309         }
5310
5311         mutex_lock(&trace_types_lock);
5312
5313         info->iter.tr           = tr;
5314         info->iter.cpu_file     = tracing_get_cpu(inode);
5315         info->iter.trace        = tr->current_trace;
5316         info->iter.trace_buffer = &tr->trace_buffer;
5317         info->spare             = NULL;
5318         /* Force reading ring buffer for first read */
5319         info->read              = (unsigned int)-1;
5320
5321         filp->private_data = info;
5322
5323         mutex_unlock(&trace_types_lock);
5324
5325         ret = nonseekable_open(inode, filp);
5326         if (ret < 0)
5327                 trace_array_put(tr);
5328
5329         return ret;
5330 }
5331
5332 static unsigned int
5333 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
5334 {
5335         struct ftrace_buffer_info *info = filp->private_data;
5336         struct trace_iterator *iter = &info->iter;
5337
5338         return trace_poll(iter, filp, poll_table);
5339 }
5340
5341 static ssize_t
5342 tracing_buffers_read(struct file *filp, char __user *ubuf,
5343                      size_t count, loff_t *ppos)
5344 {
5345         struct ftrace_buffer_info *info = filp->private_data;
5346         struct trace_iterator *iter = &info->iter;
5347         ssize_t ret;
5348         ssize_t size;
5349
5350         if (!count)
5351                 return 0;
5352
5353         mutex_lock(&trace_types_lock);
5354
5355 #ifdef CONFIG_TRACER_MAX_TRACE
5356         if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5357                 size = -EBUSY;
5358                 goto out_unlock;
5359         }
5360 #endif
5361
5362         if (!info->spare)
5363                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
5364                                                           iter->cpu_file);
5365         size = -ENOMEM;
5366         if (!info->spare)
5367                 goto out_unlock;
5368
5369         /* Do we have previous read data to read? */
5370         if (info->read < PAGE_SIZE)
5371                 goto read;
5372
5373  again:
5374         trace_access_lock(iter->cpu_file);
5375         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
5376                                     &info->spare,
5377                                     count,
5378                                     iter->cpu_file, 0);
5379         trace_access_unlock(iter->cpu_file);
5380
5381         if (ret < 0) {
5382                 if (trace_empty(iter)) {
5383                         if ((filp->f_flags & O_NONBLOCK)) {
5384                                 size = -EAGAIN;
5385                                 goto out_unlock;
5386                         }
5387                         mutex_unlock(&trace_types_lock);
5388                         ret = wait_on_pipe(iter, false);
5389                         mutex_lock(&trace_types_lock);
5390                         if (ret) {
5391                                 size = ret;
5392                                 goto out_unlock;
5393                         }
5394                         goto again;
5395                 }
5396                 size = 0;
5397                 goto out_unlock;
5398         }
5399
5400         info->read = 0;
5401  read:
5402         size = PAGE_SIZE - info->read;
5403         if (size > count)
5404                 size = count;
5405
5406         ret = copy_to_user(ubuf, info->spare + info->read, size);
5407         if (ret == size) {
5408                 size = -EFAULT;
5409                 goto out_unlock;
5410         }
5411         size -= ret;
5412
5413         *ppos += size;
5414         info->read += size;
5415
5416  out_unlock:
5417         mutex_unlock(&trace_types_lock);
5418
5419         return size;
5420 }
5421
5422 static int tracing_buffers_release(struct inode *inode, struct file *file)
5423 {
5424         struct ftrace_buffer_info *info = file->private_data;
5425         struct trace_iterator *iter = &info->iter;
5426
5427         mutex_lock(&trace_types_lock);
5428
5429         __trace_array_put(iter->tr);
5430
5431         if (info->spare)
5432                 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
5433         kfree(info);
5434
5435         mutex_unlock(&trace_types_lock);
5436
5437         return 0;
5438 }
5439
5440 struct buffer_ref {
5441         struct ring_buffer      *buffer;
5442         void                    *page;
5443         int                     ref;
5444 };
5445
5446 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
5447                                     struct pipe_buffer *buf)
5448 {
5449         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5450
5451         if (--ref->ref)
5452                 return;
5453
5454         ring_buffer_free_read_page(ref->buffer, ref->page);
5455         kfree(ref);
5456         buf->private = 0;
5457 }
5458
5459 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
5460                                 struct pipe_buffer *buf)
5461 {
5462         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5463
5464         ref->ref++;
5465 }
5466
5467 /* Pipe buffer operations for a buffer. */
5468 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
5469         .can_merge              = 0,
5470         .confirm                = generic_pipe_buf_confirm,
5471         .release                = buffer_pipe_buf_release,
5472         .steal                  = generic_pipe_buf_steal,
5473         .get                    = buffer_pipe_buf_get,
5474 };
5475
5476 /*
5477  * Callback from splice_to_pipe(), if we need to release some pages
5478  * at the end of the spd in case we error'ed out in filling the pipe.
5479  */
5480 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
5481 {
5482         struct buffer_ref *ref =
5483                 (struct buffer_ref *)spd->partial[i].private;
5484
5485         if (--ref->ref)
5486                 return;
5487
5488         ring_buffer_free_read_page(ref->buffer, ref->page);
5489         kfree(ref);
5490         spd->partial[i].private = 0;
5491 }
5492
5493 static ssize_t
5494 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5495                             struct pipe_inode_info *pipe, size_t len,
5496                             unsigned int flags)
5497 {
5498         struct ftrace_buffer_info *info = file->private_data;
5499         struct trace_iterator *iter = &info->iter;
5500         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5501         struct page *pages_def[PIPE_DEF_BUFFERS];
5502         struct splice_pipe_desc spd = {
5503                 .pages          = pages_def,
5504                 .partial        = partial_def,
5505                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5506                 .flags          = flags,
5507                 .ops            = &buffer_pipe_buf_ops,
5508                 .spd_release    = buffer_spd_release,
5509         };
5510         struct buffer_ref *ref;
5511         int entries, size, i;
5512         ssize_t ret = 0;
5513
5514         mutex_lock(&trace_types_lock);
5515
5516 #ifdef CONFIG_TRACER_MAX_TRACE
5517         if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5518                 ret = -EBUSY;
5519                 goto out;
5520         }
5521 #endif
5522
5523         if (splice_grow_spd(pipe, &spd)) {
5524                 ret = -ENOMEM;
5525                 goto out;
5526         }
5527
5528         if (*ppos & (PAGE_SIZE - 1)) {
5529                 ret = -EINVAL;
5530                 goto out;
5531         }
5532
5533         if (len & (PAGE_SIZE - 1)) {
5534                 if (len < PAGE_SIZE) {
5535                         ret = -EINVAL;
5536                         goto out;
5537                 }
5538                 len &= PAGE_MASK;
5539         }
5540
5541  again:
5542         trace_access_lock(iter->cpu_file);
5543         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5544
5545         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
5546                 struct page *page;
5547                 int r;
5548
5549                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
5550                 if (!ref) {
5551                         ret = -ENOMEM;
5552                         break;
5553                 }
5554
5555                 ref->ref = 1;
5556                 ref->buffer = iter->trace_buffer->buffer;
5557                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
5558                 if (!ref->page) {
5559                         ret = -ENOMEM;
5560                         kfree(ref);
5561                         break;
5562                 }
5563
5564                 r = ring_buffer_read_page(ref->buffer, &ref->page,
5565                                           len, iter->cpu_file, 1);
5566                 if (r < 0) {
5567                         ring_buffer_free_read_page(ref->buffer, ref->page);
5568                         kfree(ref);
5569                         break;
5570                 }
5571
5572                 /*
5573                  * zero out any left over data, this is going to
5574                  * user land.
5575                  */
5576                 size = ring_buffer_page_len(ref->page);
5577                 if (size < PAGE_SIZE)
5578                         memset(ref->page + size, 0, PAGE_SIZE - size);
5579
5580                 page = virt_to_page(ref->page);
5581
5582                 spd.pages[i] = page;
5583                 spd.partial[i].len = PAGE_SIZE;
5584                 spd.partial[i].offset = 0;
5585                 spd.partial[i].private = (unsigned long)ref;
5586                 spd.nr_pages++;
5587                 *ppos += PAGE_SIZE;
5588
5589                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5590         }
5591
5592         trace_access_unlock(iter->cpu_file);
5593         spd.nr_pages = i;
5594
5595         /* did we read anything? */
5596         if (!spd.nr_pages) {
5597                 if (ret)
5598                         goto out;
5599
5600                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) {
5601                         ret = -EAGAIN;
5602                         goto out;
5603                 }
5604                 mutex_unlock(&trace_types_lock);
5605                 ret = wait_on_pipe(iter, true);
5606                 mutex_lock(&trace_types_lock);
5607                 if (ret)
5608                         goto out;
5609
5610                 goto again;
5611         }
5612
5613         ret = splice_to_pipe(pipe, &spd);
5614         splice_shrink_spd(&spd);
5615 out:
5616         mutex_unlock(&trace_types_lock);
5617
5618         return ret;
5619 }
5620
5621 static const struct file_operations tracing_buffers_fops = {
5622         .open           = tracing_buffers_open,
5623         .read           = tracing_buffers_read,
5624         .poll           = tracing_buffers_poll,
5625         .release        = tracing_buffers_release,
5626         .splice_read    = tracing_buffers_splice_read,
5627         .llseek         = no_llseek,
5628 };
5629
5630 static ssize_t
5631 tracing_stats_read(struct file *filp, char __user *ubuf,
5632                    size_t count, loff_t *ppos)
5633 {
5634         struct inode *inode = file_inode(filp);
5635         struct trace_array *tr = inode->i_private;
5636         struct trace_buffer *trace_buf = &tr->trace_buffer;
5637         int cpu = tracing_get_cpu(inode);
5638         struct trace_seq *s;
5639         unsigned long cnt;
5640         unsigned long long t;
5641         unsigned long usec_rem;
5642
5643         s = kmalloc(sizeof(*s), GFP_KERNEL);
5644         if (!s)
5645                 return -ENOMEM;
5646
5647         trace_seq_init(s);
5648
5649         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
5650         trace_seq_printf(s, "entries: %ld\n", cnt);
5651
5652         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
5653         trace_seq_printf(s, "overrun: %ld\n", cnt);
5654
5655         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
5656         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
5657
5658         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
5659         trace_seq_printf(s, "bytes: %ld\n", cnt);
5660
5661         if (trace_clocks[tr->clock_id].in_ns) {
5662                 /* local or global for trace_clock */
5663                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5664                 usec_rem = do_div(t, USEC_PER_SEC);
5665                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
5666                                                                 t, usec_rem);
5667
5668                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
5669                 usec_rem = do_div(t, USEC_PER_SEC);
5670                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
5671         } else {
5672                 /* counter or tsc mode for trace_clock */
5673                 trace_seq_printf(s, "oldest event ts: %llu\n",
5674                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5675
5676                 trace_seq_printf(s, "now ts: %llu\n",
5677                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
5678         }
5679
5680         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
5681         trace_seq_printf(s, "dropped events: %ld\n", cnt);
5682
5683         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
5684         trace_seq_printf(s, "read events: %ld\n", cnt);
5685
5686         count = simple_read_from_buffer(ubuf, count, ppos,
5687                                         s->buffer, trace_seq_used(s));
5688
5689         kfree(s);
5690
5691         return count;
5692 }
5693
5694 static const struct file_operations tracing_stats_fops = {
5695         .open           = tracing_open_generic_tr,
5696         .read           = tracing_stats_read,
5697         .llseek         = generic_file_llseek,
5698         .release        = tracing_release_generic_tr,
5699 };
5700
5701 #ifdef CONFIG_DYNAMIC_FTRACE
5702
5703 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
5704 {
5705         return 0;
5706 }
5707
5708 static ssize_t
5709 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
5710                   size_t cnt, loff_t *ppos)
5711 {
5712         static char ftrace_dyn_info_buffer[1024];
5713         static DEFINE_MUTEX(dyn_info_mutex);
5714         unsigned long *p = filp->private_data;
5715         char *buf = ftrace_dyn_info_buffer;
5716         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
5717         int r;
5718
5719         mutex_lock(&dyn_info_mutex);
5720         r = sprintf(buf, "%ld ", *p);
5721
5722         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
5723         buf[r++] = '\n';
5724
5725         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5726
5727         mutex_unlock(&dyn_info_mutex);
5728
5729         return r;
5730 }
5731
5732 static const struct file_operations tracing_dyn_info_fops = {
5733         .open           = tracing_open_generic,
5734         .read           = tracing_read_dyn_info,
5735         .llseek         = generic_file_llseek,
5736 };
5737 #endif /* CONFIG_DYNAMIC_FTRACE */
5738
5739 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
5740 static void
5741 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5742 {
5743         tracing_snapshot();
5744 }
5745
5746 static void
5747 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5748 {
5749         unsigned long *count = (long *)data;
5750
5751         if (!*count)
5752                 return;
5753
5754         if (*count != -1)
5755                 (*count)--;
5756
5757         tracing_snapshot();
5758 }
5759
5760 static int
5761 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
5762                       struct ftrace_probe_ops *ops, void *data)
5763 {
5764         long count = (long)data;
5765
5766         seq_printf(m, "%ps:", (void *)ip);
5767
5768         seq_puts(m, "snapshot");
5769
5770         if (count == -1)
5771                 seq_puts(m, ":unlimited\n");
5772         else
5773                 seq_printf(m, ":count=%ld\n", count);
5774
5775         return 0;
5776 }
5777
5778 static struct ftrace_probe_ops snapshot_probe_ops = {
5779         .func                   = ftrace_snapshot,
5780         .print                  = ftrace_snapshot_print,
5781 };
5782
5783 static struct ftrace_probe_ops snapshot_count_probe_ops = {
5784         .func                   = ftrace_count_snapshot,
5785         .print                  = ftrace_snapshot_print,
5786 };
5787
5788 static int
5789 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
5790                                char *glob, char *cmd, char *param, int enable)
5791 {
5792         struct ftrace_probe_ops *ops;
5793         void *count = (void *)-1;
5794         char *number;
5795         int ret;
5796
5797         /* hash funcs only work with set_ftrace_filter */
5798         if (!enable)
5799                 return -EINVAL;
5800
5801         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
5802
5803         if (glob[0] == '!') {
5804                 unregister_ftrace_function_probe_func(glob+1, ops);
5805                 return 0;
5806         }
5807
5808         if (!param)
5809                 goto out_reg;
5810
5811         number = strsep(&param, ":");
5812
5813         if (!strlen(number))
5814                 goto out_reg;
5815
5816         /*
5817          * We use the callback data field (which is a pointer)
5818          * as our counter.
5819          */
5820         ret = kstrtoul(number, 0, (unsigned long *)&count);
5821         if (ret)
5822                 return ret;
5823
5824  out_reg:
5825         ret = register_ftrace_function_probe(glob, ops, count);
5826
5827         if (ret >= 0)
5828                 alloc_snapshot(&global_trace);
5829
5830         return ret < 0 ? ret : 0;
5831 }
5832
5833 static struct ftrace_func_command ftrace_snapshot_cmd = {
5834         .name                   = "snapshot",
5835         .func                   = ftrace_trace_snapshot_callback,
5836 };
5837
5838 static __init int register_snapshot_cmd(void)
5839 {
5840         return register_ftrace_command(&ftrace_snapshot_cmd);
5841 }
5842 #else
5843 static inline __init int register_snapshot_cmd(void) { return 0; }
5844 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
5845
5846 struct dentry *tracing_init_dentry_tr(struct trace_array *tr)
5847 {
5848         if (tr->dir)
5849                 return tr->dir;
5850
5851         if (!debugfs_initialized())
5852                 return NULL;
5853
5854         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
5855                 tr->dir = debugfs_create_dir("tracing", NULL);
5856
5857         if (!tr->dir)
5858                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
5859
5860         return tr->dir;
5861 }
5862
5863 struct dentry *tracing_init_dentry(void)
5864 {
5865         return tracing_init_dentry_tr(&global_trace);
5866 }
5867
5868 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
5869 {
5870         struct dentry *d_tracer;
5871
5872         if (tr->percpu_dir)
5873                 return tr->percpu_dir;
5874
5875         d_tracer = tracing_init_dentry_tr(tr);
5876         if (!d_tracer)
5877                 return NULL;
5878
5879         tr->percpu_dir = debugfs_create_dir("per_cpu", d_tracer);
5880
5881         WARN_ONCE(!tr->percpu_dir,
5882                   "Could not create debugfs directory 'per_cpu/%d'\n", cpu);
5883
5884         return tr->percpu_dir;
5885 }
5886
5887 static struct dentry *
5888 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
5889                       void *data, long cpu, const struct file_operations *fops)
5890 {
5891         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
5892
5893         if (ret) /* See tracing_get_cpu() */
5894                 ret->d_inode->i_cdev = (void *)(cpu + 1);
5895         return ret;
5896 }
5897
5898 static void
5899 tracing_init_debugfs_percpu(struct trace_array *tr, long cpu)
5900 {
5901         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
5902         struct dentry *d_cpu;
5903         char cpu_dir[30]; /* 30 characters should be more than enough */
5904
5905         if (!d_percpu)
5906                 return;
5907
5908         snprintf(cpu_dir, 30, "cpu%ld", cpu);
5909         d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
5910         if (!d_cpu) {
5911                 pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
5912                 return;
5913         }
5914
5915         /* per cpu trace_pipe */
5916         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
5917                                 tr, cpu, &tracing_pipe_fops);
5918
5919         /* per cpu trace */
5920         trace_create_cpu_file("trace", 0644, d_cpu,
5921                                 tr, cpu, &tracing_fops);
5922
5923         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
5924                                 tr, cpu, &tracing_buffers_fops);
5925
5926         trace_create_cpu_file("stats", 0444, d_cpu,
5927                                 tr, cpu, &tracing_stats_fops);
5928
5929         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
5930                                 tr, cpu, &tracing_entries_fops);
5931
5932 #ifdef CONFIG_TRACER_SNAPSHOT
5933         trace_create_cpu_file("snapshot", 0644, d_cpu,
5934                                 tr, cpu, &snapshot_fops);
5935
5936         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
5937                                 tr, cpu, &snapshot_raw_fops);
5938 #endif
5939 }
5940
5941 #ifdef CONFIG_FTRACE_SELFTEST
5942 /* Let selftest have access to static functions in this file */
5943 #include "trace_selftest.c"
5944 #endif
5945
5946 struct trace_option_dentry {
5947         struct tracer_opt               *opt;
5948         struct tracer_flags             *flags;
5949         struct trace_array              *tr;
5950         struct dentry                   *entry;
5951 };
5952
5953 static ssize_t
5954 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
5955                         loff_t *ppos)
5956 {
5957         struct trace_option_dentry *topt = filp->private_data;
5958         char *buf;
5959
5960         if (topt->flags->val & topt->opt->bit)
5961                 buf = "1\n";
5962         else
5963                 buf = "0\n";
5964
5965         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5966 }
5967
5968 static ssize_t
5969 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
5970                          loff_t *ppos)
5971 {
5972         struct trace_option_dentry *topt = filp->private_data;
5973         unsigned long val;
5974         int ret;
5975
5976         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5977         if (ret)
5978                 return ret;
5979
5980         if (val != 0 && val != 1)
5981                 return -EINVAL;
5982
5983         if (!!(topt->flags->val & topt->opt->bit) != val) {
5984                 mutex_lock(&trace_types_lock);
5985                 ret = __set_tracer_option(topt->tr, topt->flags,
5986                                           topt->opt, !val);
5987                 mutex_unlock(&trace_types_lock);
5988                 if (ret)
5989                         return ret;
5990         }
5991
5992         *ppos += cnt;
5993
5994         return cnt;
5995 }
5996
5997
5998 static const struct file_operations trace_options_fops = {
5999         .open = tracing_open_generic,
6000         .read = trace_options_read,
6001         .write = trace_options_write,
6002         .llseek = generic_file_llseek,
6003 };
6004
6005 static ssize_t
6006 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
6007                         loff_t *ppos)
6008 {
6009         long index = (long)filp->private_data;
6010         char *buf;
6011
6012         if (trace_flags & (1 << index))
6013                 buf = "1\n";
6014         else
6015                 buf = "0\n";
6016
6017         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6018 }
6019
6020 static ssize_t
6021 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
6022                          loff_t *ppos)
6023 {
6024         struct trace_array *tr = &global_trace;
6025         long index = (long)filp->private_data;
6026         unsigned long val;
6027         int ret;
6028
6029         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6030         if (ret)
6031                 return ret;
6032
6033         if (val != 0 && val != 1)
6034                 return -EINVAL;
6035
6036         mutex_lock(&trace_types_lock);
6037         ret = set_tracer_flag(tr, 1 << index, val);
6038         mutex_unlock(&trace_types_lock);
6039
6040         if (ret < 0)
6041                 return ret;
6042
6043         *ppos += cnt;
6044
6045         return cnt;
6046 }
6047
6048 static const struct file_operations trace_options_core_fops = {
6049         .open = tracing_open_generic,
6050         .read = trace_options_core_read,
6051         .write = trace_options_core_write,
6052         .llseek = generic_file_llseek,
6053 };
6054
6055 struct dentry *trace_create_file(const char *name,
6056                                  umode_t mode,
6057                                  struct dentry *parent,
6058                                  void *data,
6059                                  const struct file_operations *fops)
6060 {
6061         struct dentry *ret;
6062
6063         ret = debugfs_create_file(name, mode, parent, data, fops);
6064         if (!ret)
6065                 pr_warning("Could not create debugfs '%s' entry\n", name);
6066
6067         return ret;
6068 }
6069
6070
6071 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
6072 {
6073         struct dentry *d_tracer;
6074
6075         if (tr->options)
6076                 return tr->options;
6077
6078         d_tracer = tracing_init_dentry_tr(tr);
6079         if (!d_tracer)
6080                 return NULL;
6081
6082         tr->options = debugfs_create_dir("options", d_tracer);
6083         if (!tr->options) {
6084                 pr_warning("Could not create debugfs directory 'options'\n");
6085                 return NULL;
6086         }
6087
6088         return tr->options;
6089 }
6090
6091 static void
6092 create_trace_option_file(struct trace_array *tr,
6093                          struct trace_option_dentry *topt,
6094                          struct tracer_flags *flags,
6095                          struct tracer_opt *opt)
6096 {
6097         struct dentry *t_options;
6098
6099         t_options = trace_options_init_dentry(tr);
6100         if (!t_options)
6101                 return;
6102
6103         topt->flags = flags;
6104         topt->opt = opt;
6105         topt->tr = tr;
6106
6107         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
6108                                     &trace_options_fops);
6109
6110 }
6111
6112 static struct trace_option_dentry *
6113 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
6114 {
6115         struct trace_option_dentry *topts;
6116         struct tracer_flags *flags;
6117         struct tracer_opt *opts;
6118         int cnt;
6119
6120         if (!tracer)
6121                 return NULL;
6122
6123         flags = tracer->flags;
6124
6125         if (!flags || !flags->opts)
6126                 return NULL;
6127
6128         opts = flags->opts;
6129
6130         for (cnt = 0; opts[cnt].name; cnt++)
6131                 ;
6132
6133         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
6134         if (!topts)
6135                 return NULL;
6136
6137         for (cnt = 0; opts[cnt].name; cnt++)
6138                 create_trace_option_file(tr, &topts[cnt], flags,
6139                                          &opts[cnt]);
6140
6141         return topts;
6142 }
6143
6144 static void
6145 destroy_trace_option_files(struct trace_option_dentry *topts)
6146 {
6147         int cnt;
6148
6149         if (!topts)
6150                 return;
6151
6152         for (cnt = 0; topts[cnt].opt; cnt++)
6153                 debugfs_remove(topts[cnt].entry);
6154
6155         kfree(topts);
6156 }
6157
6158 static struct dentry *
6159 create_trace_option_core_file(struct trace_array *tr,
6160                               const char *option, long index)
6161 {
6162         struct dentry *t_options;
6163
6164         t_options = trace_options_init_dentry(tr);
6165         if (!t_options)
6166                 return NULL;
6167
6168         return trace_create_file(option, 0644, t_options, (void *)index,
6169                                     &trace_options_core_fops);
6170 }
6171
6172 static __init void create_trace_options_dir(struct trace_array *tr)
6173 {
6174         struct dentry *t_options;
6175         int i;
6176
6177         t_options = trace_options_init_dentry(tr);
6178         if (!t_options)
6179                 return;
6180
6181         for (i = 0; trace_options[i]; i++)
6182                 create_trace_option_core_file(tr, trace_options[i], i);
6183 }
6184
6185 static ssize_t
6186 rb_simple_read(struct file *filp, char __user *ubuf,
6187                size_t cnt, loff_t *ppos)
6188 {
6189         struct trace_array *tr = filp->private_data;
6190         char buf[64];
6191         int r;
6192
6193         r = tracer_tracing_is_on(tr);
6194         r = sprintf(buf, "%d\n", r);
6195
6196         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6197 }
6198
6199 static ssize_t
6200 rb_simple_write(struct file *filp, const char __user *ubuf,
6201                 size_t cnt, loff_t *ppos)
6202 {
6203         struct trace_array *tr = filp->private_data;
6204         struct ring_buffer *buffer = tr->trace_buffer.buffer;
6205         unsigned long val;
6206         int ret;
6207
6208         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6209         if (ret)
6210                 return ret;
6211
6212         if (buffer) {
6213                 mutex_lock(&trace_types_lock);
6214                 if (val) {
6215                         tracer_tracing_on(tr);
6216                         if (tr->current_trace->start)
6217                                 tr->current_trace->start(tr);
6218                 } else {
6219                         tracer_tracing_off(tr);
6220                         if (tr->current_trace->stop)
6221                                 tr->current_trace->stop(tr);
6222                 }
6223                 mutex_unlock(&trace_types_lock);
6224         }
6225
6226         (*ppos)++;
6227
6228         return cnt;
6229 }
6230
6231 static const struct file_operations rb_simple_fops = {
6232         .open           = tracing_open_generic_tr,
6233         .read           = rb_simple_read,
6234         .write          = rb_simple_write,
6235         .release        = tracing_release_generic_tr,
6236         .llseek         = default_llseek,
6237 };
6238
6239 struct dentry *trace_instance_dir;
6240
6241 static void
6242 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer);
6243
6244 static int
6245 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
6246 {
6247         enum ring_buffer_flags rb_flags;
6248
6249         rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
6250
6251         buf->tr = tr;
6252
6253         buf->buffer = ring_buffer_alloc(size, rb_flags);
6254         if (!buf->buffer)
6255                 return -ENOMEM;
6256
6257         buf->data = alloc_percpu(struct trace_array_cpu);
6258         if (!buf->data) {
6259                 ring_buffer_free(buf->buffer);
6260                 return -ENOMEM;
6261         }
6262
6263         /* Allocate the first page for all buffers */
6264         set_buffer_entries(&tr->trace_buffer,
6265                            ring_buffer_size(tr->trace_buffer.buffer, 0));
6266
6267         return 0;
6268 }
6269
6270 static int allocate_trace_buffers(struct trace_array *tr, int size)
6271 {
6272         int ret;
6273
6274         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
6275         if (ret)
6276                 return ret;
6277
6278 #ifdef CONFIG_TRACER_MAX_TRACE
6279         ret = allocate_trace_buffer(tr, &tr->max_buffer,
6280                                     allocate_snapshot ? size : 1);
6281         if (WARN_ON(ret)) {
6282                 ring_buffer_free(tr->trace_buffer.buffer);
6283                 free_percpu(tr->trace_buffer.data);
6284                 return -ENOMEM;
6285         }
6286         tr->allocated_snapshot = allocate_snapshot;
6287
6288         /*
6289          * Only the top level trace array gets its snapshot allocated
6290          * from the kernel command line.
6291          */
6292         allocate_snapshot = false;
6293 #endif
6294         return 0;
6295 }
6296
6297 static void free_trace_buffer(struct trace_buffer *buf)
6298 {
6299         if (buf->buffer) {
6300                 ring_buffer_free(buf->buffer);
6301                 buf->buffer = NULL;
6302                 free_percpu(buf->data);
6303                 buf->data = NULL;
6304         }
6305 }
6306
6307 static void free_trace_buffers(struct trace_array *tr)
6308 {
6309         if (!tr)
6310                 return;
6311
6312         free_trace_buffer(&tr->trace_buffer);
6313
6314 #ifdef CONFIG_TRACER_MAX_TRACE
6315         free_trace_buffer(&tr->max_buffer);
6316 #endif
6317 }
6318
6319 static int new_instance_create(const char *name)
6320 {
6321         struct trace_array *tr;
6322         int ret;
6323
6324         mutex_lock(&trace_types_lock);
6325
6326         ret = -EEXIST;
6327         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6328                 if (tr->name && strcmp(tr->name, name) == 0)
6329                         goto out_unlock;
6330         }
6331
6332         ret = -ENOMEM;
6333         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
6334         if (!tr)
6335                 goto out_unlock;
6336
6337         tr->name = kstrdup(name, GFP_KERNEL);
6338         if (!tr->name)
6339                 goto out_free_tr;
6340
6341         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
6342                 goto out_free_tr;
6343
6344         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
6345
6346         raw_spin_lock_init(&tr->start_lock);
6347
6348         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
6349
6350         tr->current_trace = &nop_trace;
6351
6352         INIT_LIST_HEAD(&tr->systems);
6353         INIT_LIST_HEAD(&tr->events);
6354
6355         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
6356                 goto out_free_tr;
6357
6358         tr->dir = debugfs_create_dir(name, trace_instance_dir);
6359         if (!tr->dir)
6360                 goto out_free_tr;
6361
6362         ret = event_trace_add_tracer(tr->dir, tr);
6363         if (ret) {
6364                 debugfs_remove_recursive(tr->dir);
6365                 goto out_free_tr;
6366         }
6367
6368         init_tracer_debugfs(tr, tr->dir);
6369
6370         list_add(&tr->list, &ftrace_trace_arrays);
6371
6372         mutex_unlock(&trace_types_lock);
6373
6374         return 0;
6375
6376  out_free_tr:
6377         free_trace_buffers(tr);
6378         free_cpumask_var(tr->tracing_cpumask);
6379         kfree(tr->name);
6380         kfree(tr);
6381
6382  out_unlock:
6383         mutex_unlock(&trace_types_lock);
6384
6385         return ret;
6386
6387 }
6388
6389 static int instance_delete(const char *name)
6390 {
6391         struct trace_array *tr;
6392         int found = 0;
6393         int ret;
6394
6395         mutex_lock(&trace_types_lock);
6396
6397         ret = -ENODEV;
6398         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6399                 if (tr->name && strcmp(tr->name, name) == 0) {
6400                         found = 1;
6401                         break;
6402                 }
6403         }
6404         if (!found)
6405                 goto out_unlock;
6406
6407         ret = -EBUSY;
6408         if (tr->ref)
6409                 goto out_unlock;
6410
6411         list_del(&tr->list);
6412
6413         tracing_set_nop(tr);
6414         event_trace_del_tracer(tr);
6415         ftrace_destroy_function_files(tr);
6416         debugfs_remove_recursive(tr->dir);
6417         free_trace_buffers(tr);
6418
6419         kfree(tr->name);
6420         kfree(tr);
6421
6422         ret = 0;
6423
6424  out_unlock:
6425         mutex_unlock(&trace_types_lock);
6426
6427         return ret;
6428 }
6429
6430 static int instance_mkdir (struct inode *inode, struct dentry *dentry, umode_t mode)
6431 {
6432         struct dentry *parent;
6433         int ret;
6434
6435         /* Paranoid: Make sure the parent is the "instances" directory */
6436         parent = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias);
6437         if (WARN_ON_ONCE(parent != trace_instance_dir))
6438                 return -ENOENT;
6439
6440         /*
6441          * The inode mutex is locked, but debugfs_create_dir() will also
6442          * take the mutex. As the instances directory can not be destroyed
6443          * or changed in any other way, it is safe to unlock it, and
6444          * let the dentry try. If two users try to make the same dir at
6445          * the same time, then the new_instance_create() will determine the
6446          * winner.
6447          */
6448         mutex_unlock(&inode->i_mutex);
6449
6450         ret = new_instance_create(dentry->d_iname);
6451
6452         mutex_lock(&inode->i_mutex);
6453
6454         return ret;
6455 }
6456
6457 static int instance_rmdir(struct inode *inode, struct dentry *dentry)
6458 {
6459         struct dentry *parent;
6460         int ret;
6461
6462         /* Paranoid: Make sure the parent is the "instances" directory */
6463         parent = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias);
6464         if (WARN_ON_ONCE(parent != trace_instance_dir))
6465                 return -ENOENT;
6466
6467         /* The caller did a dget() on dentry */
6468         mutex_unlock(&dentry->d_inode->i_mutex);
6469
6470         /*
6471          * The inode mutex is locked, but debugfs_create_dir() will also
6472          * take the mutex. As the instances directory can not be destroyed
6473          * or changed in any other way, it is safe to unlock it, and
6474          * let the dentry try. If two users try to make the same dir at
6475          * the same time, then the instance_delete() will determine the
6476          * winner.
6477          */
6478         mutex_unlock(&inode->i_mutex);
6479
6480         ret = instance_delete(dentry->d_iname);
6481
6482         mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
6483         mutex_lock(&dentry->d_inode->i_mutex);
6484
6485         return ret;
6486 }
6487
6488 static const struct inode_operations instance_dir_inode_operations = {
6489         .lookup         = simple_lookup,
6490         .mkdir          = instance_mkdir,
6491         .rmdir          = instance_rmdir,
6492 };
6493
6494 static __init void create_trace_instances(struct dentry *d_tracer)
6495 {
6496         trace_instance_dir = debugfs_create_dir("instances", d_tracer);
6497         if (WARN_ON(!trace_instance_dir))
6498                 return;
6499
6500         /* Hijack the dir inode operations, to allow mkdir */
6501         trace_instance_dir->d_inode->i_op = &instance_dir_inode_operations;
6502 }
6503
6504 static void
6505 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
6506 {
6507         int cpu;
6508
6509         trace_create_file("available_tracers", 0444, d_tracer,
6510                         tr, &show_traces_fops);
6511
6512         trace_create_file("current_tracer", 0644, d_tracer,
6513                         tr, &set_tracer_fops);
6514
6515         trace_create_file("tracing_cpumask", 0644, d_tracer,
6516                           tr, &tracing_cpumask_fops);
6517
6518         trace_create_file("trace_options", 0644, d_tracer,
6519                           tr, &tracing_iter_fops);
6520
6521         trace_create_file("trace", 0644, d_tracer,
6522                           tr, &tracing_fops);
6523
6524         trace_create_file("trace_pipe", 0444, d_tracer,
6525                           tr, &tracing_pipe_fops);
6526
6527         trace_create_file("buffer_size_kb", 0644, d_tracer,
6528                           tr, &tracing_entries_fops);
6529
6530         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
6531                           tr, &tracing_total_entries_fops);
6532
6533         trace_create_file("free_buffer", 0200, d_tracer,
6534                           tr, &tracing_free_buffer_fops);
6535
6536         trace_create_file("trace_marker", 0220, d_tracer,
6537                           tr, &tracing_mark_fops);
6538
6539         trace_create_file("trace_clock", 0644, d_tracer, tr,
6540                           &trace_clock_fops);
6541
6542         trace_create_file("tracing_on", 0644, d_tracer,
6543                           tr, &rb_simple_fops);
6544
6545 #ifdef CONFIG_TRACER_MAX_TRACE
6546         trace_create_file("tracing_max_latency", 0644, d_tracer,
6547                         &tr->max_latency, &tracing_max_lat_fops);
6548 #endif
6549
6550         if (ftrace_create_function_files(tr, d_tracer))
6551                 WARN(1, "Could not allocate function filter files");
6552
6553 #ifdef CONFIG_TRACER_SNAPSHOT
6554         trace_create_file("snapshot", 0644, d_tracer,
6555                           tr, &snapshot_fops);
6556 #endif
6557
6558         for_each_tracing_cpu(cpu)
6559                 tracing_init_debugfs_percpu(tr, cpu);
6560
6561 }
6562
6563 static __init int tracer_init_debugfs(void)
6564 {
6565         struct dentry *d_tracer;
6566
6567         trace_access_lock_init();
6568
6569         d_tracer = tracing_init_dentry();
6570         if (!d_tracer)
6571                 return 0;
6572
6573         init_tracer_debugfs(&global_trace, d_tracer);
6574
6575         trace_create_file("tracing_thresh", 0644, d_tracer,
6576                         &global_trace, &tracing_thresh_fops);
6577
6578         trace_create_file("README", 0444, d_tracer,
6579                         NULL, &tracing_readme_fops);
6580
6581         trace_create_file("saved_cmdlines", 0444, d_tracer,
6582                         NULL, &tracing_saved_cmdlines_fops);
6583
6584         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
6585                           NULL, &tracing_saved_cmdlines_size_fops);
6586
6587 #ifdef CONFIG_DYNAMIC_FTRACE
6588         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
6589                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
6590 #endif
6591
6592         create_trace_instances(d_tracer);
6593
6594         create_trace_options_dir(&global_trace);
6595
6596         return 0;
6597 }
6598
6599 static int trace_panic_handler(struct notifier_block *this,
6600                                unsigned long event, void *unused)
6601 {
6602         if (ftrace_dump_on_oops)
6603                 ftrace_dump(ftrace_dump_on_oops);
6604         return NOTIFY_OK;
6605 }
6606
6607 static struct notifier_block trace_panic_notifier = {
6608         .notifier_call  = trace_panic_handler,
6609         .next           = NULL,
6610         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
6611 };
6612
6613 static int trace_die_handler(struct notifier_block *self,
6614                              unsigned long val,
6615                              void *data)
6616 {
6617         switch (val) {
6618         case DIE_OOPS:
6619                 if (ftrace_dump_on_oops)
6620                         ftrace_dump(ftrace_dump_on_oops);
6621                 break;
6622         default:
6623                 break;
6624         }
6625         return NOTIFY_OK;
6626 }
6627
6628 static struct notifier_block trace_die_notifier = {
6629         .notifier_call = trace_die_handler,
6630         .priority = 200
6631 };
6632
6633 /*
6634  * printk is set to max of 1024, we really don't need it that big.
6635  * Nothing should be printing 1000 characters anyway.
6636  */
6637 #define TRACE_MAX_PRINT         1000
6638
6639 /*
6640  * Define here KERN_TRACE so that we have one place to modify
6641  * it if we decide to change what log level the ftrace dump
6642  * should be at.
6643  */
6644 #define KERN_TRACE              KERN_EMERG
6645
6646 void
6647 trace_printk_seq(struct trace_seq *s)
6648 {
6649         /* Probably should print a warning here. */
6650         if (s->seq.len >= TRACE_MAX_PRINT)
6651                 s->seq.len = TRACE_MAX_PRINT;
6652
6653         /*
6654          * More paranoid code. Although the buffer size is set to
6655          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
6656          * an extra layer of protection.
6657          */
6658         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
6659                 s->seq.len = s->seq.size - 1;
6660
6661         /* should be zero ended, but we are paranoid. */
6662         s->buffer[s->seq.len] = 0;
6663
6664         printk(KERN_TRACE "%s", s->buffer);
6665
6666         trace_seq_init(s);
6667 }
6668
6669 void trace_init_global_iter(struct trace_iterator *iter)
6670 {
6671         iter->tr = &global_trace;
6672         iter->trace = iter->tr->current_trace;
6673         iter->cpu_file = RING_BUFFER_ALL_CPUS;
6674         iter->trace_buffer = &global_trace.trace_buffer;
6675
6676         if (iter->trace && iter->trace->open)
6677                 iter->trace->open(iter);
6678
6679         /* Annotate start of buffers if we had overruns */
6680         if (ring_buffer_overruns(iter->trace_buffer->buffer))
6681                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
6682
6683         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6684         if (trace_clocks[iter->tr->clock_id].in_ns)
6685                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6686 }
6687
6688 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
6689 {
6690         /* use static because iter can be a bit big for the stack */
6691         static struct trace_iterator iter;
6692         static atomic_t dump_running;
6693         unsigned int old_userobj;
6694         unsigned long flags;
6695         int cnt = 0, cpu;
6696
6697         /* Only allow one dump user at a time. */
6698         if (atomic_inc_return(&dump_running) != 1) {
6699                 atomic_dec(&dump_running);
6700                 return;
6701         }
6702
6703         /*
6704          * Always turn off tracing when we dump.
6705          * We don't need to show trace output of what happens
6706          * between multiple crashes.
6707          *
6708          * If the user does a sysrq-z, then they can re-enable
6709          * tracing with echo 1 > tracing_on.
6710          */
6711         tracing_off();
6712
6713         local_irq_save(flags);
6714
6715         /* Simulate the iterator */
6716         trace_init_global_iter(&iter);
6717
6718         for_each_tracing_cpu(cpu) {
6719                 atomic_inc(&per_cpu_ptr(iter.tr->trace_buffer.data, cpu)->disabled);
6720         }
6721
6722         old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
6723
6724         /* don't look at user memory in panic mode */
6725         trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
6726
6727         switch (oops_dump_mode) {
6728         case DUMP_ALL:
6729                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6730                 break;
6731         case DUMP_ORIG:
6732                 iter.cpu_file = raw_smp_processor_id();
6733                 break;
6734         case DUMP_NONE:
6735                 goto out_enable;
6736         default:
6737                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
6738                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6739         }
6740
6741         printk(KERN_TRACE "Dumping ftrace buffer:\n");
6742
6743         /* Did function tracer already get disabled? */
6744         if (ftrace_is_dead()) {
6745                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
6746                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
6747         }
6748
6749         /*
6750          * We need to stop all tracing on all CPUS to read the
6751          * the next buffer. This is a bit expensive, but is
6752          * not done often. We fill all what we can read,
6753          * and then release the locks again.
6754          */
6755
6756         while (!trace_empty(&iter)) {
6757
6758                 if (!cnt)
6759                         printk(KERN_TRACE "---------------------------------\n");
6760
6761                 cnt++;
6762
6763                 /* reset all but tr, trace, and overruns */
6764                 memset(&iter.seq, 0,
6765                        sizeof(struct trace_iterator) -
6766                        offsetof(struct trace_iterator, seq));
6767                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
6768                 iter.pos = -1;
6769
6770                 if (trace_find_next_entry_inc(&iter) != NULL) {
6771                         int ret;
6772
6773                         ret = print_trace_line(&iter);
6774                         if (ret != TRACE_TYPE_NO_CONSUME)
6775                                 trace_consume(&iter);
6776                 }
6777                 touch_nmi_watchdog();
6778
6779                 trace_printk_seq(&iter.seq);
6780         }
6781
6782         if (!cnt)
6783                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
6784         else
6785                 printk(KERN_TRACE "---------------------------------\n");
6786
6787  out_enable:
6788         trace_flags |= old_userobj;
6789
6790         for_each_tracing_cpu(cpu) {
6791                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
6792         }
6793         atomic_dec(&dump_running);
6794         local_irq_restore(flags);
6795 }
6796 EXPORT_SYMBOL_GPL(ftrace_dump);
6797
6798 __init static int tracer_alloc_buffers(void)
6799 {
6800         int ring_buf_size;
6801         int ret = -ENOMEM;
6802
6803
6804         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
6805                 goto out;
6806
6807         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
6808                 goto out_free_buffer_mask;
6809
6810         /* Only allocate trace_printk buffers if a trace_printk exists */
6811         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
6812                 /* Must be called before global_trace.buffer is allocated */
6813                 trace_printk_init_buffers();
6814
6815         /* To save memory, keep the ring buffer size to its minimum */
6816         if (ring_buffer_expanded)
6817                 ring_buf_size = trace_buf_size;
6818         else
6819                 ring_buf_size = 1;
6820
6821         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
6822         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
6823
6824         raw_spin_lock_init(&global_trace.start_lock);
6825
6826         /* Used for event triggers */
6827         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
6828         if (!temp_buffer)
6829                 goto out_free_cpumask;
6830
6831         if (trace_create_savedcmd() < 0)
6832                 goto out_free_temp_buffer;
6833
6834         /* TODO: make the number of buffers hot pluggable with CPUS */
6835         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
6836                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
6837                 WARN_ON(1);
6838                 goto out_free_savedcmd;
6839         }
6840
6841         if (global_trace.buffer_disabled)
6842                 tracing_off();
6843
6844         if (trace_boot_clock) {
6845                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
6846                 if (ret < 0)
6847                         pr_warning("Trace clock %s not defined, going back to default\n",
6848                                    trace_boot_clock);
6849         }
6850
6851         /*
6852          * register_tracer() might reference current_trace, so it
6853          * needs to be set before we register anything. This is
6854          * just a bootstrap of current_trace anyway.
6855          */
6856         global_trace.current_trace = &nop_trace;
6857
6858         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
6859
6860         ftrace_init_global_array_ops(&global_trace);
6861
6862         register_tracer(&nop_trace);
6863
6864         /* All seems OK, enable tracing */
6865         tracing_disabled = 0;
6866
6867         atomic_notifier_chain_register(&panic_notifier_list,
6868                                        &trace_panic_notifier);
6869
6870         register_die_notifier(&trace_die_notifier);
6871
6872         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
6873
6874         INIT_LIST_HEAD(&global_trace.systems);
6875         INIT_LIST_HEAD(&global_trace.events);
6876         list_add(&global_trace.list, &ftrace_trace_arrays);
6877
6878         while (trace_boot_options) {
6879                 char *option;
6880
6881                 option = strsep(&trace_boot_options, ",");
6882                 trace_set_options(&global_trace, option);
6883         }
6884
6885         register_snapshot_cmd();
6886
6887         return 0;
6888
6889 out_free_savedcmd:
6890         free_saved_cmdlines_buffer(savedcmd);
6891 out_free_temp_buffer:
6892         ring_buffer_free(temp_buffer);
6893 out_free_cpumask:
6894         free_cpumask_var(global_trace.tracing_cpumask);
6895 out_free_buffer_mask:
6896         free_cpumask_var(tracing_buffer_mask);
6897 out:
6898         return ret;
6899 }
6900
6901 __init static int clear_boot_tracer(void)
6902 {
6903         /*
6904          * The default tracer at boot buffer is an init section.
6905          * This function is called in lateinit. If we did not
6906          * find the boot tracer, then clear it out, to prevent
6907          * later registration from accessing the buffer that is
6908          * about to be freed.
6909          */
6910         if (!default_bootup_tracer)
6911                 return 0;
6912
6913         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
6914                default_bootup_tracer);
6915         default_bootup_tracer = NULL;
6916
6917         return 0;
6918 }
6919
6920 early_initcall(tracer_alloc_buffers);
6921 fs_initcall(tracer_init_debugfs);
6922 late_initcall(clear_boot_tracer);