Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input
[cascardo/linux.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/kprobes.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/sched/rt.h>
44
45 #include "trace.h"
46 #include "trace_output.h"
47
48 /*
49  * On boot up, the ring buffer is set to the minimum size, so that
50  * we do not waste memory on systems that are not using tracing.
51  */
52 bool ring_buffer_expanded;
53
54 /*
55  * We need to change this state when a selftest is running.
56  * A selftest will lurk into the ring-buffer to count the
57  * entries inserted during the selftest although some concurrent
58  * insertions into the ring-buffer such as trace_printk could occurred
59  * at the same time, giving false positive or negative results.
60  */
61 static bool __read_mostly tracing_selftest_running;
62
63 /*
64  * If a tracer is running, we do not want to run SELFTEST.
65  */
66 bool __read_mostly tracing_selftest_disabled;
67
68 /* Pipe tracepoints to printk */
69 struct trace_iterator *tracepoint_print_iter;
70 int tracepoint_printk;
71
72 /* For tracers that don't implement custom flags */
73 static struct tracer_opt dummy_tracer_opt[] = {
74         { }
75 };
76
77 static int
78 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
79 {
80         return 0;
81 }
82
83 /*
84  * To prevent the comm cache from being overwritten when no
85  * tracing is active, only save the comm when a trace event
86  * occurred.
87  */
88 static DEFINE_PER_CPU(bool, trace_cmdline_save);
89
90 /*
91  * Kill all tracing for good (never come back).
92  * It is initialized to 1 but will turn to zero if the initialization
93  * of the tracer is successful. But that is the only place that sets
94  * this back to zero.
95  */
96 static int tracing_disabled = 1;
97
98 cpumask_var_t __read_mostly     tracing_buffer_mask;
99
100 /*
101  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
102  *
103  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
104  * is set, then ftrace_dump is called. This will output the contents
105  * of the ftrace buffers to the console.  This is very useful for
106  * capturing traces that lead to crashes and outputing it to a
107  * serial console.
108  *
109  * It is default off, but you can enable it with either specifying
110  * "ftrace_dump_on_oops" in the kernel command line, or setting
111  * /proc/sys/kernel/ftrace_dump_on_oops
112  * Set 1 if you want to dump buffers of all CPUs
113  * Set 2 if you want to dump the buffer of the CPU that triggered oops
114  */
115
116 enum ftrace_dump_mode ftrace_dump_on_oops;
117
118 /* When set, tracing will stop when a WARN*() is hit */
119 int __disable_trace_on_warning;
120
121 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
122 /* Map of enums to their values, for "enum_map" file */
123 struct trace_enum_map_head {
124         struct module                   *mod;
125         unsigned long                   length;
126 };
127
128 union trace_enum_map_item;
129
130 struct trace_enum_map_tail {
131         /*
132          * "end" is first and points to NULL as it must be different
133          * than "mod" or "enum_string"
134          */
135         union trace_enum_map_item       *next;
136         const char                      *end;   /* points to NULL */
137 };
138
139 static DEFINE_MUTEX(trace_enum_mutex);
140
141 /*
142  * The trace_enum_maps are saved in an array with two extra elements,
143  * one at the beginning, and one at the end. The beginning item contains
144  * the count of the saved maps (head.length), and the module they
145  * belong to if not built in (head.mod). The ending item contains a
146  * pointer to the next array of saved enum_map items.
147  */
148 union trace_enum_map_item {
149         struct trace_enum_map           map;
150         struct trace_enum_map_head      head;
151         struct trace_enum_map_tail      tail;
152 };
153
154 static union trace_enum_map_item *trace_enum_maps;
155 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
156
157 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
158
159 #define MAX_TRACER_SIZE         100
160 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
161 static char *default_bootup_tracer;
162
163 static bool allocate_snapshot;
164
165 static int __init set_cmdline_ftrace(char *str)
166 {
167         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
168         default_bootup_tracer = bootup_tracer_buf;
169         /* We are using ftrace early, expand it */
170         ring_buffer_expanded = true;
171         return 1;
172 }
173 __setup("ftrace=", set_cmdline_ftrace);
174
175 static int __init set_ftrace_dump_on_oops(char *str)
176 {
177         if (*str++ != '=' || !*str) {
178                 ftrace_dump_on_oops = DUMP_ALL;
179                 return 1;
180         }
181
182         if (!strcmp("orig_cpu", str)) {
183                 ftrace_dump_on_oops = DUMP_ORIG;
184                 return 1;
185         }
186
187         return 0;
188 }
189 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
190
191 static int __init stop_trace_on_warning(char *str)
192 {
193         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
194                 __disable_trace_on_warning = 1;
195         return 1;
196 }
197 __setup("traceoff_on_warning", stop_trace_on_warning);
198
199 static int __init boot_alloc_snapshot(char *str)
200 {
201         allocate_snapshot = true;
202         /* We also need the main ring buffer expanded */
203         ring_buffer_expanded = true;
204         return 1;
205 }
206 __setup("alloc_snapshot", boot_alloc_snapshot);
207
208
209 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
210
211 static int __init set_trace_boot_options(char *str)
212 {
213         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
214         return 0;
215 }
216 __setup("trace_options=", set_trace_boot_options);
217
218 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
219 static char *trace_boot_clock __initdata;
220
221 static int __init set_trace_boot_clock(char *str)
222 {
223         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
224         trace_boot_clock = trace_boot_clock_buf;
225         return 0;
226 }
227 __setup("trace_clock=", set_trace_boot_clock);
228
229 static int __init set_tracepoint_printk(char *str)
230 {
231         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
232                 tracepoint_printk = 1;
233         return 1;
234 }
235 __setup("tp_printk", set_tracepoint_printk);
236
237 unsigned long long ns2usecs(cycle_t nsec)
238 {
239         nsec += 500;
240         do_div(nsec, 1000);
241         return nsec;
242 }
243
244 /* trace_flags holds trace_options default values */
245 #define TRACE_DEFAULT_FLAGS                                             \
246         (FUNCTION_DEFAULT_FLAGS |                                       \
247          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
248          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
249          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
250          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
251
252 /* trace_options that are only supported by global_trace */
253 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
254                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
255
256 /* trace_flags that are default zero for instances */
257 #define ZEROED_TRACE_FLAGS \
258         TRACE_ITER_EVENT_FORK
259
260 /*
261  * The global_trace is the descriptor that holds the tracing
262  * buffers for the live tracing. For each CPU, it contains
263  * a link list of pages that will store trace entries. The
264  * page descriptor of the pages in the memory is used to hold
265  * the link list by linking the lru item in the page descriptor
266  * to each of the pages in the buffer per CPU.
267  *
268  * For each active CPU there is a data field that holds the
269  * pages for the buffer for that CPU. Each CPU has the same number
270  * of pages allocated for its buffer.
271  */
272 static struct trace_array global_trace = {
273         .trace_flags = TRACE_DEFAULT_FLAGS,
274 };
275
276 LIST_HEAD(ftrace_trace_arrays);
277
278 int trace_array_get(struct trace_array *this_tr)
279 {
280         struct trace_array *tr;
281         int ret = -ENODEV;
282
283         mutex_lock(&trace_types_lock);
284         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
285                 if (tr == this_tr) {
286                         tr->ref++;
287                         ret = 0;
288                         break;
289                 }
290         }
291         mutex_unlock(&trace_types_lock);
292
293         return ret;
294 }
295
296 static void __trace_array_put(struct trace_array *this_tr)
297 {
298         WARN_ON(!this_tr->ref);
299         this_tr->ref--;
300 }
301
302 void trace_array_put(struct trace_array *this_tr)
303 {
304         mutex_lock(&trace_types_lock);
305         __trace_array_put(this_tr);
306         mutex_unlock(&trace_types_lock);
307 }
308
309 int call_filter_check_discard(struct trace_event_call *call, void *rec,
310                               struct ring_buffer *buffer,
311                               struct ring_buffer_event *event)
312 {
313         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
314             !filter_match_preds(call->filter, rec)) {
315                 __trace_event_discard_commit(buffer, event);
316                 return 1;
317         }
318
319         return 0;
320 }
321
322 static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
323 {
324         u64 ts;
325
326         /* Early boot up does not have a buffer yet */
327         if (!buf->buffer)
328                 return trace_clock_local();
329
330         ts = ring_buffer_time_stamp(buf->buffer, cpu);
331         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
332
333         return ts;
334 }
335
336 cycle_t ftrace_now(int cpu)
337 {
338         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
339 }
340
341 /**
342  * tracing_is_enabled - Show if global_trace has been disabled
343  *
344  * Shows if the global trace has been enabled or not. It uses the
345  * mirror flag "buffer_disabled" to be used in fast paths such as for
346  * the irqsoff tracer. But it may be inaccurate due to races. If you
347  * need to know the accurate state, use tracing_is_on() which is a little
348  * slower, but accurate.
349  */
350 int tracing_is_enabled(void)
351 {
352         /*
353          * For quick access (irqsoff uses this in fast path), just
354          * return the mirror variable of the state of the ring buffer.
355          * It's a little racy, but we don't really care.
356          */
357         smp_rmb();
358         return !global_trace.buffer_disabled;
359 }
360
361 /*
362  * trace_buf_size is the size in bytes that is allocated
363  * for a buffer. Note, the number of bytes is always rounded
364  * to page size.
365  *
366  * This number is purposely set to a low number of 16384.
367  * If the dump on oops happens, it will be much appreciated
368  * to not have to wait for all that output. Anyway this can be
369  * boot time and run time configurable.
370  */
371 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
372
373 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
374
375 /* trace_types holds a link list of available tracers. */
376 static struct tracer            *trace_types __read_mostly;
377
378 /*
379  * trace_types_lock is used to protect the trace_types list.
380  */
381 DEFINE_MUTEX(trace_types_lock);
382
383 /*
384  * serialize the access of the ring buffer
385  *
386  * ring buffer serializes readers, but it is low level protection.
387  * The validity of the events (which returns by ring_buffer_peek() ..etc)
388  * are not protected by ring buffer.
389  *
390  * The content of events may become garbage if we allow other process consumes
391  * these events concurrently:
392  *   A) the page of the consumed events may become a normal page
393  *      (not reader page) in ring buffer, and this page will be rewrited
394  *      by events producer.
395  *   B) The page of the consumed events may become a page for splice_read,
396  *      and this page will be returned to system.
397  *
398  * These primitives allow multi process access to different cpu ring buffer
399  * concurrently.
400  *
401  * These primitives don't distinguish read-only and read-consume access.
402  * Multi read-only access are also serialized.
403  */
404
405 #ifdef CONFIG_SMP
406 static DECLARE_RWSEM(all_cpu_access_lock);
407 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
408
409 static inline void trace_access_lock(int cpu)
410 {
411         if (cpu == RING_BUFFER_ALL_CPUS) {
412                 /* gain it for accessing the whole ring buffer. */
413                 down_write(&all_cpu_access_lock);
414         } else {
415                 /* gain it for accessing a cpu ring buffer. */
416
417                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
418                 down_read(&all_cpu_access_lock);
419
420                 /* Secondly block other access to this @cpu ring buffer. */
421                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
422         }
423 }
424
425 static inline void trace_access_unlock(int cpu)
426 {
427         if (cpu == RING_BUFFER_ALL_CPUS) {
428                 up_write(&all_cpu_access_lock);
429         } else {
430                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
431                 up_read(&all_cpu_access_lock);
432         }
433 }
434
435 static inline void trace_access_lock_init(void)
436 {
437         int cpu;
438
439         for_each_possible_cpu(cpu)
440                 mutex_init(&per_cpu(cpu_access_lock, cpu));
441 }
442
443 #else
444
445 static DEFINE_MUTEX(access_lock);
446
447 static inline void trace_access_lock(int cpu)
448 {
449         (void)cpu;
450         mutex_lock(&access_lock);
451 }
452
453 static inline void trace_access_unlock(int cpu)
454 {
455         (void)cpu;
456         mutex_unlock(&access_lock);
457 }
458
459 static inline void trace_access_lock_init(void)
460 {
461 }
462
463 #endif
464
465 #ifdef CONFIG_STACKTRACE
466 static void __ftrace_trace_stack(struct ring_buffer *buffer,
467                                  unsigned long flags,
468                                  int skip, int pc, struct pt_regs *regs);
469 static inline void ftrace_trace_stack(struct trace_array *tr,
470                                       struct ring_buffer *buffer,
471                                       unsigned long flags,
472                                       int skip, int pc, struct pt_regs *regs);
473
474 #else
475 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
476                                         unsigned long flags,
477                                         int skip, int pc, struct pt_regs *regs)
478 {
479 }
480 static inline void ftrace_trace_stack(struct trace_array *tr,
481                                       struct ring_buffer *buffer,
482                                       unsigned long flags,
483                                       int skip, int pc, struct pt_regs *regs)
484 {
485 }
486
487 #endif
488
489 static void tracer_tracing_on(struct trace_array *tr)
490 {
491         if (tr->trace_buffer.buffer)
492                 ring_buffer_record_on(tr->trace_buffer.buffer);
493         /*
494          * This flag is looked at when buffers haven't been allocated
495          * yet, or by some tracers (like irqsoff), that just want to
496          * know if the ring buffer has been disabled, but it can handle
497          * races of where it gets disabled but we still do a record.
498          * As the check is in the fast path of the tracers, it is more
499          * important to be fast than accurate.
500          */
501         tr->buffer_disabled = 0;
502         /* Make the flag seen by readers */
503         smp_wmb();
504 }
505
506 /**
507  * tracing_on - enable tracing buffers
508  *
509  * This function enables tracing buffers that may have been
510  * disabled with tracing_off.
511  */
512 void tracing_on(void)
513 {
514         tracer_tracing_on(&global_trace);
515 }
516 EXPORT_SYMBOL_GPL(tracing_on);
517
518 /**
519  * __trace_puts - write a constant string into the trace buffer.
520  * @ip:    The address of the caller
521  * @str:   The constant string to write
522  * @size:  The size of the string.
523  */
524 int __trace_puts(unsigned long ip, const char *str, int size)
525 {
526         struct ring_buffer_event *event;
527         struct ring_buffer *buffer;
528         struct print_entry *entry;
529         unsigned long irq_flags;
530         int alloc;
531         int pc;
532
533         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
534                 return 0;
535
536         pc = preempt_count();
537
538         if (unlikely(tracing_selftest_running || tracing_disabled))
539                 return 0;
540
541         alloc = sizeof(*entry) + size + 2; /* possible \n added */
542
543         local_save_flags(irq_flags);
544         buffer = global_trace.trace_buffer.buffer;
545         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
546                                           irq_flags, pc);
547         if (!event)
548                 return 0;
549
550         entry = ring_buffer_event_data(event);
551         entry->ip = ip;
552
553         memcpy(&entry->buf, str, size);
554
555         /* Add a newline if necessary */
556         if (entry->buf[size - 1] != '\n') {
557                 entry->buf[size] = '\n';
558                 entry->buf[size + 1] = '\0';
559         } else
560                 entry->buf[size] = '\0';
561
562         __buffer_unlock_commit(buffer, event);
563         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
564
565         return size;
566 }
567 EXPORT_SYMBOL_GPL(__trace_puts);
568
569 /**
570  * __trace_bputs - write the pointer to a constant string into trace buffer
571  * @ip:    The address of the caller
572  * @str:   The constant string to write to the buffer to
573  */
574 int __trace_bputs(unsigned long ip, const char *str)
575 {
576         struct ring_buffer_event *event;
577         struct ring_buffer *buffer;
578         struct bputs_entry *entry;
579         unsigned long irq_flags;
580         int size = sizeof(struct bputs_entry);
581         int pc;
582
583         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
584                 return 0;
585
586         pc = preempt_count();
587
588         if (unlikely(tracing_selftest_running || tracing_disabled))
589                 return 0;
590
591         local_save_flags(irq_flags);
592         buffer = global_trace.trace_buffer.buffer;
593         event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
594                                           irq_flags, pc);
595         if (!event)
596                 return 0;
597
598         entry = ring_buffer_event_data(event);
599         entry->ip                       = ip;
600         entry->str                      = str;
601
602         __buffer_unlock_commit(buffer, event);
603         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
604
605         return 1;
606 }
607 EXPORT_SYMBOL_GPL(__trace_bputs);
608
609 #ifdef CONFIG_TRACER_SNAPSHOT
610 /**
611  * trace_snapshot - take a snapshot of the current buffer.
612  *
613  * This causes a swap between the snapshot buffer and the current live
614  * tracing buffer. You can use this to take snapshots of the live
615  * trace when some condition is triggered, but continue to trace.
616  *
617  * Note, make sure to allocate the snapshot with either
618  * a tracing_snapshot_alloc(), or by doing it manually
619  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
620  *
621  * If the snapshot buffer is not allocated, it will stop tracing.
622  * Basically making a permanent snapshot.
623  */
624 void tracing_snapshot(void)
625 {
626         struct trace_array *tr = &global_trace;
627         struct tracer *tracer = tr->current_trace;
628         unsigned long flags;
629
630         if (in_nmi()) {
631                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
632                 internal_trace_puts("*** snapshot is being ignored        ***\n");
633                 return;
634         }
635
636         if (!tr->allocated_snapshot) {
637                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
638                 internal_trace_puts("*** stopping trace here!   ***\n");
639                 tracing_off();
640                 return;
641         }
642
643         /* Note, snapshot can not be used when the tracer uses it */
644         if (tracer->use_max_tr) {
645                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
646                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
647                 return;
648         }
649
650         local_irq_save(flags);
651         update_max_tr(tr, current, smp_processor_id());
652         local_irq_restore(flags);
653 }
654 EXPORT_SYMBOL_GPL(tracing_snapshot);
655
656 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
657                                         struct trace_buffer *size_buf, int cpu_id);
658 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
659
660 static int alloc_snapshot(struct trace_array *tr)
661 {
662         int ret;
663
664         if (!tr->allocated_snapshot) {
665
666                 /* allocate spare buffer */
667                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
668                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
669                 if (ret < 0)
670                         return ret;
671
672                 tr->allocated_snapshot = true;
673         }
674
675         return 0;
676 }
677
678 static void free_snapshot(struct trace_array *tr)
679 {
680         /*
681          * We don't free the ring buffer. instead, resize it because
682          * The max_tr ring buffer has some state (e.g. ring->clock) and
683          * we want preserve it.
684          */
685         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
686         set_buffer_entries(&tr->max_buffer, 1);
687         tracing_reset_online_cpus(&tr->max_buffer);
688         tr->allocated_snapshot = false;
689 }
690
691 /**
692  * tracing_alloc_snapshot - allocate snapshot buffer.
693  *
694  * This only allocates the snapshot buffer if it isn't already
695  * allocated - it doesn't also take a snapshot.
696  *
697  * This is meant to be used in cases where the snapshot buffer needs
698  * to be set up for events that can't sleep but need to be able to
699  * trigger a snapshot.
700  */
701 int tracing_alloc_snapshot(void)
702 {
703         struct trace_array *tr = &global_trace;
704         int ret;
705
706         ret = alloc_snapshot(tr);
707         WARN_ON(ret < 0);
708
709         return ret;
710 }
711 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
712
713 /**
714  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
715  *
716  * This is similar to trace_snapshot(), but it will allocate the
717  * snapshot buffer if it isn't already allocated. Use this only
718  * where it is safe to sleep, as the allocation may sleep.
719  *
720  * This causes a swap between the snapshot buffer and the current live
721  * tracing buffer. You can use this to take snapshots of the live
722  * trace when some condition is triggered, but continue to trace.
723  */
724 void tracing_snapshot_alloc(void)
725 {
726         int ret;
727
728         ret = tracing_alloc_snapshot();
729         if (ret < 0)
730                 return;
731
732         tracing_snapshot();
733 }
734 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
735 #else
736 void tracing_snapshot(void)
737 {
738         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
739 }
740 EXPORT_SYMBOL_GPL(tracing_snapshot);
741 int tracing_alloc_snapshot(void)
742 {
743         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
744         return -ENODEV;
745 }
746 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
747 void tracing_snapshot_alloc(void)
748 {
749         /* Give warning */
750         tracing_snapshot();
751 }
752 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
753 #endif /* CONFIG_TRACER_SNAPSHOT */
754
755 static void tracer_tracing_off(struct trace_array *tr)
756 {
757         if (tr->trace_buffer.buffer)
758                 ring_buffer_record_off(tr->trace_buffer.buffer);
759         /*
760          * This flag is looked at when buffers haven't been allocated
761          * yet, or by some tracers (like irqsoff), that just want to
762          * know if the ring buffer has been disabled, but it can handle
763          * races of where it gets disabled but we still do a record.
764          * As the check is in the fast path of the tracers, it is more
765          * important to be fast than accurate.
766          */
767         tr->buffer_disabled = 1;
768         /* Make the flag seen by readers */
769         smp_wmb();
770 }
771
772 /**
773  * tracing_off - turn off tracing buffers
774  *
775  * This function stops the tracing buffers from recording data.
776  * It does not disable any overhead the tracers themselves may
777  * be causing. This function simply causes all recording to
778  * the ring buffers to fail.
779  */
780 void tracing_off(void)
781 {
782         tracer_tracing_off(&global_trace);
783 }
784 EXPORT_SYMBOL_GPL(tracing_off);
785
786 void disable_trace_on_warning(void)
787 {
788         if (__disable_trace_on_warning)
789                 tracing_off();
790 }
791
792 /**
793  * tracer_tracing_is_on - show real state of ring buffer enabled
794  * @tr : the trace array to know if ring buffer is enabled
795  *
796  * Shows real state of the ring buffer if it is enabled or not.
797  */
798 static int tracer_tracing_is_on(struct trace_array *tr)
799 {
800         if (tr->trace_buffer.buffer)
801                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
802         return !tr->buffer_disabled;
803 }
804
805 /**
806  * tracing_is_on - show state of ring buffers enabled
807  */
808 int tracing_is_on(void)
809 {
810         return tracer_tracing_is_on(&global_trace);
811 }
812 EXPORT_SYMBOL_GPL(tracing_is_on);
813
814 static int __init set_buf_size(char *str)
815 {
816         unsigned long buf_size;
817
818         if (!str)
819                 return 0;
820         buf_size = memparse(str, &str);
821         /* nr_entries can not be zero */
822         if (buf_size == 0)
823                 return 0;
824         trace_buf_size = buf_size;
825         return 1;
826 }
827 __setup("trace_buf_size=", set_buf_size);
828
829 static int __init set_tracing_thresh(char *str)
830 {
831         unsigned long threshold;
832         int ret;
833
834         if (!str)
835                 return 0;
836         ret = kstrtoul(str, 0, &threshold);
837         if (ret < 0)
838                 return 0;
839         tracing_thresh = threshold * 1000;
840         return 1;
841 }
842 __setup("tracing_thresh=", set_tracing_thresh);
843
844 unsigned long nsecs_to_usecs(unsigned long nsecs)
845 {
846         return nsecs / 1000;
847 }
848
849 /*
850  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
851  * It uses C(a, b) where 'a' is the enum name and 'b' is the string that
852  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
853  * of strings in the order that the enums were defined.
854  */
855 #undef C
856 #define C(a, b) b
857
858 /* These must match the bit postions in trace_iterator_flags */
859 static const char *trace_options[] = {
860         TRACE_FLAGS
861         NULL
862 };
863
864 static struct {
865         u64 (*func)(void);
866         const char *name;
867         int in_ns;              /* is this clock in nanoseconds? */
868 } trace_clocks[] = {
869         { trace_clock_local,            "local",        1 },
870         { trace_clock_global,           "global",       1 },
871         { trace_clock_counter,          "counter",      0 },
872         { trace_clock_jiffies,          "uptime",       0 },
873         { trace_clock,                  "perf",         1 },
874         { ktime_get_mono_fast_ns,       "mono",         1 },
875         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
876         ARCH_TRACE_CLOCKS
877 };
878
879 /*
880  * trace_parser_get_init - gets the buffer for trace parser
881  */
882 int trace_parser_get_init(struct trace_parser *parser, int size)
883 {
884         memset(parser, 0, sizeof(*parser));
885
886         parser->buffer = kmalloc(size, GFP_KERNEL);
887         if (!parser->buffer)
888                 return 1;
889
890         parser->size = size;
891         return 0;
892 }
893
894 /*
895  * trace_parser_put - frees the buffer for trace parser
896  */
897 void trace_parser_put(struct trace_parser *parser)
898 {
899         kfree(parser->buffer);
900 }
901
902 /*
903  * trace_get_user - reads the user input string separated by  space
904  * (matched by isspace(ch))
905  *
906  * For each string found the 'struct trace_parser' is updated,
907  * and the function returns.
908  *
909  * Returns number of bytes read.
910  *
911  * See kernel/trace/trace.h for 'struct trace_parser' details.
912  */
913 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
914         size_t cnt, loff_t *ppos)
915 {
916         char ch;
917         size_t read = 0;
918         ssize_t ret;
919
920         if (!*ppos)
921                 trace_parser_clear(parser);
922
923         ret = get_user(ch, ubuf++);
924         if (ret)
925                 goto out;
926
927         read++;
928         cnt--;
929
930         /*
931          * The parser is not finished with the last write,
932          * continue reading the user input without skipping spaces.
933          */
934         if (!parser->cont) {
935                 /* skip white space */
936                 while (cnt && isspace(ch)) {
937                         ret = get_user(ch, ubuf++);
938                         if (ret)
939                                 goto out;
940                         read++;
941                         cnt--;
942                 }
943
944                 /* only spaces were written */
945                 if (isspace(ch)) {
946                         *ppos += read;
947                         ret = read;
948                         goto out;
949                 }
950
951                 parser->idx = 0;
952         }
953
954         /* read the non-space input */
955         while (cnt && !isspace(ch)) {
956                 if (parser->idx < parser->size - 1)
957                         parser->buffer[parser->idx++] = ch;
958                 else {
959                         ret = -EINVAL;
960                         goto out;
961                 }
962                 ret = get_user(ch, ubuf++);
963                 if (ret)
964                         goto out;
965                 read++;
966                 cnt--;
967         }
968
969         /* We either got finished input or we have to wait for another call. */
970         if (isspace(ch)) {
971                 parser->buffer[parser->idx] = 0;
972                 parser->cont = false;
973         } else if (parser->idx < parser->size - 1) {
974                 parser->cont = true;
975                 parser->buffer[parser->idx++] = ch;
976         } else {
977                 ret = -EINVAL;
978                 goto out;
979         }
980
981         *ppos += read;
982         ret = read;
983
984 out:
985         return ret;
986 }
987
988 /* TODO add a seq_buf_to_buffer() */
989 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
990 {
991         int len;
992
993         if (trace_seq_used(s) <= s->seq.readpos)
994                 return -EBUSY;
995
996         len = trace_seq_used(s) - s->seq.readpos;
997         if (cnt > len)
998                 cnt = len;
999         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1000
1001         s->seq.readpos += cnt;
1002         return cnt;
1003 }
1004
1005 unsigned long __read_mostly     tracing_thresh;
1006
1007 #ifdef CONFIG_TRACER_MAX_TRACE
1008 /*
1009  * Copy the new maximum trace into the separate maximum-trace
1010  * structure. (this way the maximum trace is permanently saved,
1011  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1012  */
1013 static void
1014 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1015 {
1016         struct trace_buffer *trace_buf = &tr->trace_buffer;
1017         struct trace_buffer *max_buf = &tr->max_buffer;
1018         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1019         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1020
1021         max_buf->cpu = cpu;
1022         max_buf->time_start = data->preempt_timestamp;
1023
1024         max_data->saved_latency = tr->max_latency;
1025         max_data->critical_start = data->critical_start;
1026         max_data->critical_end = data->critical_end;
1027
1028         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1029         max_data->pid = tsk->pid;
1030         /*
1031          * If tsk == current, then use current_uid(), as that does not use
1032          * RCU. The irq tracer can be called out of RCU scope.
1033          */
1034         if (tsk == current)
1035                 max_data->uid = current_uid();
1036         else
1037                 max_data->uid = task_uid(tsk);
1038
1039         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1040         max_data->policy = tsk->policy;
1041         max_data->rt_priority = tsk->rt_priority;
1042
1043         /* record this tasks comm */
1044         tracing_record_cmdline(tsk);
1045 }
1046
1047 /**
1048  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1049  * @tr: tracer
1050  * @tsk: the task with the latency
1051  * @cpu: The cpu that initiated the trace.
1052  *
1053  * Flip the buffers between the @tr and the max_tr and record information
1054  * about which task was the cause of this latency.
1055  */
1056 void
1057 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1058 {
1059         struct ring_buffer *buf;
1060
1061         if (tr->stop_count)
1062                 return;
1063
1064         WARN_ON_ONCE(!irqs_disabled());
1065
1066         if (!tr->allocated_snapshot) {
1067                 /* Only the nop tracer should hit this when disabling */
1068                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1069                 return;
1070         }
1071
1072         arch_spin_lock(&tr->max_lock);
1073
1074         buf = tr->trace_buffer.buffer;
1075         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1076         tr->max_buffer.buffer = buf;
1077
1078         __update_max_tr(tr, tsk, cpu);
1079         arch_spin_unlock(&tr->max_lock);
1080 }
1081
1082 /**
1083  * update_max_tr_single - only copy one trace over, and reset the rest
1084  * @tr - tracer
1085  * @tsk - task with the latency
1086  * @cpu - the cpu of the buffer to copy.
1087  *
1088  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1089  */
1090 void
1091 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1092 {
1093         int ret;
1094
1095         if (tr->stop_count)
1096                 return;
1097
1098         WARN_ON_ONCE(!irqs_disabled());
1099         if (!tr->allocated_snapshot) {
1100                 /* Only the nop tracer should hit this when disabling */
1101                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1102                 return;
1103         }
1104
1105         arch_spin_lock(&tr->max_lock);
1106
1107         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1108
1109         if (ret == -EBUSY) {
1110                 /*
1111                  * We failed to swap the buffer due to a commit taking
1112                  * place on this CPU. We fail to record, but we reset
1113                  * the max trace buffer (no one writes directly to it)
1114                  * and flag that it failed.
1115                  */
1116                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1117                         "Failed to swap buffers due to commit in progress\n");
1118         }
1119
1120         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1121
1122         __update_max_tr(tr, tsk, cpu);
1123         arch_spin_unlock(&tr->max_lock);
1124 }
1125 #endif /* CONFIG_TRACER_MAX_TRACE */
1126
1127 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1128 {
1129         /* Iterators are static, they should be filled or empty */
1130         if (trace_buffer_iter(iter, iter->cpu_file))
1131                 return 0;
1132
1133         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1134                                 full);
1135 }
1136
1137 #ifdef CONFIG_FTRACE_STARTUP_TEST
1138 static int run_tracer_selftest(struct tracer *type)
1139 {
1140         struct trace_array *tr = &global_trace;
1141         struct tracer *saved_tracer = tr->current_trace;
1142         int ret;
1143
1144         if (!type->selftest || tracing_selftest_disabled)
1145                 return 0;
1146
1147         /*
1148          * Run a selftest on this tracer.
1149          * Here we reset the trace buffer, and set the current
1150          * tracer to be this tracer. The tracer can then run some
1151          * internal tracing to verify that everything is in order.
1152          * If we fail, we do not register this tracer.
1153          */
1154         tracing_reset_online_cpus(&tr->trace_buffer);
1155
1156         tr->current_trace = type;
1157
1158 #ifdef CONFIG_TRACER_MAX_TRACE
1159         if (type->use_max_tr) {
1160                 /* If we expanded the buffers, make sure the max is expanded too */
1161                 if (ring_buffer_expanded)
1162                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1163                                            RING_BUFFER_ALL_CPUS);
1164                 tr->allocated_snapshot = true;
1165         }
1166 #endif
1167
1168         /* the test is responsible for initializing and enabling */
1169         pr_info("Testing tracer %s: ", type->name);
1170         ret = type->selftest(type, tr);
1171         /* the test is responsible for resetting too */
1172         tr->current_trace = saved_tracer;
1173         if (ret) {
1174                 printk(KERN_CONT "FAILED!\n");
1175                 /* Add the warning after printing 'FAILED' */
1176                 WARN_ON(1);
1177                 return -1;
1178         }
1179         /* Only reset on passing, to avoid touching corrupted buffers */
1180         tracing_reset_online_cpus(&tr->trace_buffer);
1181
1182 #ifdef CONFIG_TRACER_MAX_TRACE
1183         if (type->use_max_tr) {
1184                 tr->allocated_snapshot = false;
1185
1186                 /* Shrink the max buffer again */
1187                 if (ring_buffer_expanded)
1188                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1189                                            RING_BUFFER_ALL_CPUS);
1190         }
1191 #endif
1192
1193         printk(KERN_CONT "PASSED\n");
1194         return 0;
1195 }
1196 #else
1197 static inline int run_tracer_selftest(struct tracer *type)
1198 {
1199         return 0;
1200 }
1201 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1202
1203 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1204
1205 static void __init apply_trace_boot_options(void);
1206
1207 /**
1208  * register_tracer - register a tracer with the ftrace system.
1209  * @type - the plugin for the tracer
1210  *
1211  * Register a new plugin tracer.
1212  */
1213 int __init register_tracer(struct tracer *type)
1214 {
1215         struct tracer *t;
1216         int ret = 0;
1217
1218         if (!type->name) {
1219                 pr_info("Tracer must have a name\n");
1220                 return -1;
1221         }
1222
1223         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1224                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1225                 return -1;
1226         }
1227
1228         mutex_lock(&trace_types_lock);
1229
1230         tracing_selftest_running = true;
1231
1232         for (t = trace_types; t; t = t->next) {
1233                 if (strcmp(type->name, t->name) == 0) {
1234                         /* already found */
1235                         pr_info("Tracer %s already registered\n",
1236                                 type->name);
1237                         ret = -1;
1238                         goto out;
1239                 }
1240         }
1241
1242         if (!type->set_flag)
1243                 type->set_flag = &dummy_set_flag;
1244         if (!type->flags) {
1245                 /*allocate a dummy tracer_flags*/
1246                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1247                 if (!type->flags) {
1248                         ret = -ENOMEM;
1249                         goto out;
1250                 }
1251                 type->flags->val = 0;
1252                 type->flags->opts = dummy_tracer_opt;
1253         } else
1254                 if (!type->flags->opts)
1255                         type->flags->opts = dummy_tracer_opt;
1256
1257         /* store the tracer for __set_tracer_option */
1258         type->flags->trace = type;
1259
1260         ret = run_tracer_selftest(type);
1261         if (ret < 0)
1262                 goto out;
1263
1264         type->next = trace_types;
1265         trace_types = type;
1266         add_tracer_options(&global_trace, type);
1267
1268  out:
1269         tracing_selftest_running = false;
1270         mutex_unlock(&trace_types_lock);
1271
1272         if (ret || !default_bootup_tracer)
1273                 goto out_unlock;
1274
1275         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1276                 goto out_unlock;
1277
1278         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1279         /* Do we want this tracer to start on bootup? */
1280         tracing_set_tracer(&global_trace, type->name);
1281         default_bootup_tracer = NULL;
1282
1283         apply_trace_boot_options();
1284
1285         /* disable other selftests, since this will break it. */
1286         tracing_selftest_disabled = true;
1287 #ifdef CONFIG_FTRACE_STARTUP_TEST
1288         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1289                type->name);
1290 #endif
1291
1292  out_unlock:
1293         return ret;
1294 }
1295
1296 void tracing_reset(struct trace_buffer *buf, int cpu)
1297 {
1298         struct ring_buffer *buffer = buf->buffer;
1299
1300         if (!buffer)
1301                 return;
1302
1303         ring_buffer_record_disable(buffer);
1304
1305         /* Make sure all commits have finished */
1306         synchronize_sched();
1307         ring_buffer_reset_cpu(buffer, cpu);
1308
1309         ring_buffer_record_enable(buffer);
1310 }
1311
1312 void tracing_reset_online_cpus(struct trace_buffer *buf)
1313 {
1314         struct ring_buffer *buffer = buf->buffer;
1315         int cpu;
1316
1317         if (!buffer)
1318                 return;
1319
1320         ring_buffer_record_disable(buffer);
1321
1322         /* Make sure all commits have finished */
1323         synchronize_sched();
1324
1325         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1326
1327         for_each_online_cpu(cpu)
1328                 ring_buffer_reset_cpu(buffer, cpu);
1329
1330         ring_buffer_record_enable(buffer);
1331 }
1332
1333 /* Must have trace_types_lock held */
1334 void tracing_reset_all_online_cpus(void)
1335 {
1336         struct trace_array *tr;
1337
1338         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1339                 tracing_reset_online_cpus(&tr->trace_buffer);
1340 #ifdef CONFIG_TRACER_MAX_TRACE
1341                 tracing_reset_online_cpus(&tr->max_buffer);
1342 #endif
1343         }
1344 }
1345
1346 #define SAVED_CMDLINES_DEFAULT 128
1347 #define NO_CMDLINE_MAP UINT_MAX
1348 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1349 struct saved_cmdlines_buffer {
1350         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1351         unsigned *map_cmdline_to_pid;
1352         unsigned cmdline_num;
1353         int cmdline_idx;
1354         char *saved_cmdlines;
1355 };
1356 static struct saved_cmdlines_buffer *savedcmd;
1357
1358 /* temporary disable recording */
1359 static atomic_t trace_record_cmdline_disabled __read_mostly;
1360
1361 static inline char *get_saved_cmdlines(int idx)
1362 {
1363         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1364 }
1365
1366 static inline void set_cmdline(int idx, const char *cmdline)
1367 {
1368         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1369 }
1370
1371 static int allocate_cmdlines_buffer(unsigned int val,
1372                                     struct saved_cmdlines_buffer *s)
1373 {
1374         s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1375                                         GFP_KERNEL);
1376         if (!s->map_cmdline_to_pid)
1377                 return -ENOMEM;
1378
1379         s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1380         if (!s->saved_cmdlines) {
1381                 kfree(s->map_cmdline_to_pid);
1382                 return -ENOMEM;
1383         }
1384
1385         s->cmdline_idx = 0;
1386         s->cmdline_num = val;
1387         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1388                sizeof(s->map_pid_to_cmdline));
1389         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1390                val * sizeof(*s->map_cmdline_to_pid));
1391
1392         return 0;
1393 }
1394
1395 static int trace_create_savedcmd(void)
1396 {
1397         int ret;
1398
1399         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1400         if (!savedcmd)
1401                 return -ENOMEM;
1402
1403         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1404         if (ret < 0) {
1405                 kfree(savedcmd);
1406                 savedcmd = NULL;
1407                 return -ENOMEM;
1408         }
1409
1410         return 0;
1411 }
1412
1413 int is_tracing_stopped(void)
1414 {
1415         return global_trace.stop_count;
1416 }
1417
1418 /**
1419  * tracing_start - quick start of the tracer
1420  *
1421  * If tracing is enabled but was stopped by tracing_stop,
1422  * this will start the tracer back up.
1423  */
1424 void tracing_start(void)
1425 {
1426         struct ring_buffer *buffer;
1427         unsigned long flags;
1428
1429         if (tracing_disabled)
1430                 return;
1431
1432         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1433         if (--global_trace.stop_count) {
1434                 if (global_trace.stop_count < 0) {
1435                         /* Someone screwed up their debugging */
1436                         WARN_ON_ONCE(1);
1437                         global_trace.stop_count = 0;
1438                 }
1439                 goto out;
1440         }
1441
1442         /* Prevent the buffers from switching */
1443         arch_spin_lock(&global_trace.max_lock);
1444
1445         buffer = global_trace.trace_buffer.buffer;
1446         if (buffer)
1447                 ring_buffer_record_enable(buffer);
1448
1449 #ifdef CONFIG_TRACER_MAX_TRACE
1450         buffer = global_trace.max_buffer.buffer;
1451         if (buffer)
1452                 ring_buffer_record_enable(buffer);
1453 #endif
1454
1455         arch_spin_unlock(&global_trace.max_lock);
1456
1457  out:
1458         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1459 }
1460
1461 static void tracing_start_tr(struct trace_array *tr)
1462 {
1463         struct ring_buffer *buffer;
1464         unsigned long flags;
1465
1466         if (tracing_disabled)
1467                 return;
1468
1469         /* If global, we need to also start the max tracer */
1470         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1471                 return tracing_start();
1472
1473         raw_spin_lock_irqsave(&tr->start_lock, flags);
1474
1475         if (--tr->stop_count) {
1476                 if (tr->stop_count < 0) {
1477                         /* Someone screwed up their debugging */
1478                         WARN_ON_ONCE(1);
1479                         tr->stop_count = 0;
1480                 }
1481                 goto out;
1482         }
1483
1484         buffer = tr->trace_buffer.buffer;
1485         if (buffer)
1486                 ring_buffer_record_enable(buffer);
1487
1488  out:
1489         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1490 }
1491
1492 /**
1493  * tracing_stop - quick stop of the tracer
1494  *
1495  * Light weight way to stop tracing. Use in conjunction with
1496  * tracing_start.
1497  */
1498 void tracing_stop(void)
1499 {
1500         struct ring_buffer *buffer;
1501         unsigned long flags;
1502
1503         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1504         if (global_trace.stop_count++)
1505                 goto out;
1506
1507         /* Prevent the buffers from switching */
1508         arch_spin_lock(&global_trace.max_lock);
1509
1510         buffer = global_trace.trace_buffer.buffer;
1511         if (buffer)
1512                 ring_buffer_record_disable(buffer);
1513
1514 #ifdef CONFIG_TRACER_MAX_TRACE
1515         buffer = global_trace.max_buffer.buffer;
1516         if (buffer)
1517                 ring_buffer_record_disable(buffer);
1518 #endif
1519
1520         arch_spin_unlock(&global_trace.max_lock);
1521
1522  out:
1523         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1524 }
1525
1526 static void tracing_stop_tr(struct trace_array *tr)
1527 {
1528         struct ring_buffer *buffer;
1529         unsigned long flags;
1530
1531         /* If global, we need to also stop the max tracer */
1532         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1533                 return tracing_stop();
1534
1535         raw_spin_lock_irqsave(&tr->start_lock, flags);
1536         if (tr->stop_count++)
1537                 goto out;
1538
1539         buffer = tr->trace_buffer.buffer;
1540         if (buffer)
1541                 ring_buffer_record_disable(buffer);
1542
1543  out:
1544         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1545 }
1546
1547 void trace_stop_cmdline_recording(void);
1548
1549 static int trace_save_cmdline(struct task_struct *tsk)
1550 {
1551         unsigned pid, idx;
1552
1553         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1554                 return 0;
1555
1556         /*
1557          * It's not the end of the world if we don't get
1558          * the lock, but we also don't want to spin
1559          * nor do we want to disable interrupts,
1560          * so if we miss here, then better luck next time.
1561          */
1562         if (!arch_spin_trylock(&trace_cmdline_lock))
1563                 return 0;
1564
1565         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1566         if (idx == NO_CMDLINE_MAP) {
1567                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1568
1569                 /*
1570                  * Check whether the cmdline buffer at idx has a pid
1571                  * mapped. We are going to overwrite that entry so we
1572                  * need to clear the map_pid_to_cmdline. Otherwise we
1573                  * would read the new comm for the old pid.
1574                  */
1575                 pid = savedcmd->map_cmdline_to_pid[idx];
1576                 if (pid != NO_CMDLINE_MAP)
1577                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1578
1579                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1580                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1581
1582                 savedcmd->cmdline_idx = idx;
1583         }
1584
1585         set_cmdline(idx, tsk->comm);
1586
1587         arch_spin_unlock(&trace_cmdline_lock);
1588
1589         return 1;
1590 }
1591
1592 static void __trace_find_cmdline(int pid, char comm[])
1593 {
1594         unsigned map;
1595
1596         if (!pid) {
1597                 strcpy(comm, "<idle>");
1598                 return;
1599         }
1600
1601         if (WARN_ON_ONCE(pid < 0)) {
1602                 strcpy(comm, "<XXX>");
1603                 return;
1604         }
1605
1606         if (pid > PID_MAX_DEFAULT) {
1607                 strcpy(comm, "<...>");
1608                 return;
1609         }
1610
1611         map = savedcmd->map_pid_to_cmdline[pid];
1612         if (map != NO_CMDLINE_MAP)
1613                 strcpy(comm, get_saved_cmdlines(map));
1614         else
1615                 strcpy(comm, "<...>");
1616 }
1617
1618 void trace_find_cmdline(int pid, char comm[])
1619 {
1620         preempt_disable();
1621         arch_spin_lock(&trace_cmdline_lock);
1622
1623         __trace_find_cmdline(pid, comm);
1624
1625         arch_spin_unlock(&trace_cmdline_lock);
1626         preempt_enable();
1627 }
1628
1629 void tracing_record_cmdline(struct task_struct *tsk)
1630 {
1631         if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1632                 return;
1633
1634         if (!__this_cpu_read(trace_cmdline_save))
1635                 return;
1636
1637         if (trace_save_cmdline(tsk))
1638                 __this_cpu_write(trace_cmdline_save, false);
1639 }
1640
1641 void
1642 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1643                              int pc)
1644 {
1645         struct task_struct *tsk = current;
1646
1647         entry->preempt_count            = pc & 0xff;
1648         entry->pid                      = (tsk) ? tsk->pid : 0;
1649         entry->flags =
1650 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1651                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1652 #else
1653                 TRACE_FLAG_IRQS_NOSUPPORT |
1654 #endif
1655                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
1656                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1657                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1658                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1659                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1660 }
1661 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1662
1663 static __always_inline void
1664 trace_event_setup(struct ring_buffer_event *event,
1665                   int type, unsigned long flags, int pc)
1666 {
1667         struct trace_entry *ent = ring_buffer_event_data(event);
1668
1669         tracing_generic_entry_update(ent, flags, pc);
1670         ent->type = type;
1671 }
1672
1673 struct ring_buffer_event *
1674 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1675                           int type,
1676                           unsigned long len,
1677                           unsigned long flags, int pc)
1678 {
1679         struct ring_buffer_event *event;
1680
1681         event = ring_buffer_lock_reserve(buffer, len);
1682         if (event != NULL)
1683                 trace_event_setup(event, type, flags, pc);
1684
1685         return event;
1686 }
1687
1688 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
1689 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
1690 static int trace_buffered_event_ref;
1691
1692 /**
1693  * trace_buffered_event_enable - enable buffering events
1694  *
1695  * When events are being filtered, it is quicker to use a temporary
1696  * buffer to write the event data into if there's a likely chance
1697  * that it will not be committed. The discard of the ring buffer
1698  * is not as fast as committing, and is much slower than copying
1699  * a commit.
1700  *
1701  * When an event is to be filtered, allocate per cpu buffers to
1702  * write the event data into, and if the event is filtered and discarded
1703  * it is simply dropped, otherwise, the entire data is to be committed
1704  * in one shot.
1705  */
1706 void trace_buffered_event_enable(void)
1707 {
1708         struct ring_buffer_event *event;
1709         struct page *page;
1710         int cpu;
1711
1712         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
1713
1714         if (trace_buffered_event_ref++)
1715                 return;
1716
1717         for_each_tracing_cpu(cpu) {
1718                 page = alloc_pages_node(cpu_to_node(cpu),
1719                                         GFP_KERNEL | __GFP_NORETRY, 0);
1720                 if (!page)
1721                         goto failed;
1722
1723                 event = page_address(page);
1724                 memset(event, 0, sizeof(*event));
1725
1726                 per_cpu(trace_buffered_event, cpu) = event;
1727
1728                 preempt_disable();
1729                 if (cpu == smp_processor_id() &&
1730                     this_cpu_read(trace_buffered_event) !=
1731                     per_cpu(trace_buffered_event, cpu))
1732                         WARN_ON_ONCE(1);
1733                 preempt_enable();
1734         }
1735
1736         return;
1737  failed:
1738         trace_buffered_event_disable();
1739 }
1740
1741 static void enable_trace_buffered_event(void *data)
1742 {
1743         /* Probably not needed, but do it anyway */
1744         smp_rmb();
1745         this_cpu_dec(trace_buffered_event_cnt);
1746 }
1747
1748 static void disable_trace_buffered_event(void *data)
1749 {
1750         this_cpu_inc(trace_buffered_event_cnt);
1751 }
1752
1753 /**
1754  * trace_buffered_event_disable - disable buffering events
1755  *
1756  * When a filter is removed, it is faster to not use the buffered
1757  * events, and to commit directly into the ring buffer. Free up
1758  * the temp buffers when there are no more users. This requires
1759  * special synchronization with current events.
1760  */
1761 void trace_buffered_event_disable(void)
1762 {
1763         int cpu;
1764
1765         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
1766
1767         if (WARN_ON_ONCE(!trace_buffered_event_ref))
1768                 return;
1769
1770         if (--trace_buffered_event_ref)
1771                 return;
1772
1773         preempt_disable();
1774         /* For each CPU, set the buffer as used. */
1775         smp_call_function_many(tracing_buffer_mask,
1776                                disable_trace_buffered_event, NULL, 1);
1777         preempt_enable();
1778
1779         /* Wait for all current users to finish */
1780         synchronize_sched();
1781
1782         for_each_tracing_cpu(cpu) {
1783                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
1784                 per_cpu(trace_buffered_event, cpu) = NULL;
1785         }
1786         /*
1787          * Make sure trace_buffered_event is NULL before clearing
1788          * trace_buffered_event_cnt.
1789          */
1790         smp_wmb();
1791
1792         preempt_disable();
1793         /* Do the work on each cpu */
1794         smp_call_function_many(tracing_buffer_mask,
1795                                enable_trace_buffered_event, NULL, 1);
1796         preempt_enable();
1797 }
1798
1799 void
1800 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
1801 {
1802         __this_cpu_write(trace_cmdline_save, true);
1803
1804         /* If this is the temp buffer, we need to commit fully */
1805         if (this_cpu_read(trace_buffered_event) == event) {
1806                 /* Length is in event->array[0] */
1807                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
1808                 /* Release the temp buffer */
1809                 this_cpu_dec(trace_buffered_event_cnt);
1810         } else
1811                 ring_buffer_unlock_commit(buffer, event);
1812 }
1813
1814 static struct ring_buffer *temp_buffer;
1815
1816 struct ring_buffer_event *
1817 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
1818                           struct trace_event_file *trace_file,
1819                           int type, unsigned long len,
1820                           unsigned long flags, int pc)
1821 {
1822         struct ring_buffer_event *entry;
1823         int val;
1824
1825         *current_rb = trace_file->tr->trace_buffer.buffer;
1826
1827         if ((trace_file->flags &
1828              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
1829             (entry = this_cpu_read(trace_buffered_event))) {
1830                 /* Try to use the per cpu buffer first */
1831                 val = this_cpu_inc_return(trace_buffered_event_cnt);
1832                 if (val == 1) {
1833                         trace_event_setup(entry, type, flags, pc);
1834                         entry->array[0] = len;
1835                         return entry;
1836                 }
1837                 this_cpu_dec(trace_buffered_event_cnt);
1838         }
1839
1840         entry = trace_buffer_lock_reserve(*current_rb,
1841                                          type, len, flags, pc);
1842         /*
1843          * If tracing is off, but we have triggers enabled
1844          * we still need to look at the event data. Use the temp_buffer
1845          * to store the trace event for the tigger to use. It's recusive
1846          * safe and will not be recorded anywhere.
1847          */
1848         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
1849                 *current_rb = temp_buffer;
1850                 entry = trace_buffer_lock_reserve(*current_rb,
1851                                                   type, len, flags, pc);
1852         }
1853         return entry;
1854 }
1855 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
1856
1857 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
1858                                      struct ring_buffer *buffer,
1859                                      struct ring_buffer_event *event,
1860                                      unsigned long flags, int pc,
1861                                      struct pt_regs *regs)
1862 {
1863         __buffer_unlock_commit(buffer, event);
1864
1865         ftrace_trace_stack(tr, buffer, flags, 0, pc, regs);
1866         ftrace_trace_userstack(buffer, flags, pc);
1867 }
1868
1869 void
1870 trace_function(struct trace_array *tr,
1871                unsigned long ip, unsigned long parent_ip, unsigned long flags,
1872                int pc)
1873 {
1874         struct trace_event_call *call = &event_function;
1875         struct ring_buffer *buffer = tr->trace_buffer.buffer;
1876         struct ring_buffer_event *event;
1877         struct ftrace_entry *entry;
1878
1879         event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
1880                                           flags, pc);
1881         if (!event)
1882                 return;
1883         entry   = ring_buffer_event_data(event);
1884         entry->ip                       = ip;
1885         entry->parent_ip                = parent_ip;
1886
1887         if (!call_filter_check_discard(call, entry, buffer, event))
1888                 __buffer_unlock_commit(buffer, event);
1889 }
1890
1891 #ifdef CONFIG_STACKTRACE
1892
1893 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
1894 struct ftrace_stack {
1895         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
1896 };
1897
1898 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
1899 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
1900
1901 static void __ftrace_trace_stack(struct ring_buffer *buffer,
1902                                  unsigned long flags,
1903                                  int skip, int pc, struct pt_regs *regs)
1904 {
1905         struct trace_event_call *call = &event_kernel_stack;
1906         struct ring_buffer_event *event;
1907         struct stack_entry *entry;
1908         struct stack_trace trace;
1909         int use_stack;
1910         int size = FTRACE_STACK_ENTRIES;
1911
1912         trace.nr_entries        = 0;
1913         trace.skip              = skip;
1914
1915         /*
1916          * Since events can happen in NMIs there's no safe way to
1917          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
1918          * or NMI comes in, it will just have to use the default
1919          * FTRACE_STACK_SIZE.
1920          */
1921         preempt_disable_notrace();
1922
1923         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
1924         /*
1925          * We don't need any atomic variables, just a barrier.
1926          * If an interrupt comes in, we don't care, because it would
1927          * have exited and put the counter back to what we want.
1928          * We just need a barrier to keep gcc from moving things
1929          * around.
1930          */
1931         barrier();
1932         if (use_stack == 1) {
1933                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
1934                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
1935
1936                 if (regs)
1937                         save_stack_trace_regs(regs, &trace);
1938                 else
1939                         save_stack_trace(&trace);
1940
1941                 if (trace.nr_entries > size)
1942                         size = trace.nr_entries;
1943         } else
1944                 /* From now on, use_stack is a boolean */
1945                 use_stack = 0;
1946
1947         size *= sizeof(unsigned long);
1948
1949         event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1950                                           sizeof(*entry) + size, flags, pc);
1951         if (!event)
1952                 goto out;
1953         entry = ring_buffer_event_data(event);
1954
1955         memset(&entry->caller, 0, size);
1956
1957         if (use_stack)
1958                 memcpy(&entry->caller, trace.entries,
1959                        trace.nr_entries * sizeof(unsigned long));
1960         else {
1961                 trace.max_entries       = FTRACE_STACK_ENTRIES;
1962                 trace.entries           = entry->caller;
1963                 if (regs)
1964                         save_stack_trace_regs(regs, &trace);
1965                 else
1966                         save_stack_trace(&trace);
1967         }
1968
1969         entry->size = trace.nr_entries;
1970
1971         if (!call_filter_check_discard(call, entry, buffer, event))
1972                 __buffer_unlock_commit(buffer, event);
1973
1974  out:
1975         /* Again, don't let gcc optimize things here */
1976         barrier();
1977         __this_cpu_dec(ftrace_stack_reserve);
1978         preempt_enable_notrace();
1979
1980 }
1981
1982 static inline void ftrace_trace_stack(struct trace_array *tr,
1983                                       struct ring_buffer *buffer,
1984                                       unsigned long flags,
1985                                       int skip, int pc, struct pt_regs *regs)
1986 {
1987         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
1988                 return;
1989
1990         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
1991 }
1992
1993 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1994                    int pc)
1995 {
1996         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
1997 }
1998
1999 /**
2000  * trace_dump_stack - record a stack back trace in the trace buffer
2001  * @skip: Number of functions to skip (helper handlers)
2002  */
2003 void trace_dump_stack(int skip)
2004 {
2005         unsigned long flags;
2006
2007         if (tracing_disabled || tracing_selftest_running)
2008                 return;
2009
2010         local_save_flags(flags);
2011
2012         /*
2013          * Skip 3 more, seems to get us at the caller of
2014          * this function.
2015          */
2016         skip += 3;
2017         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2018                              flags, skip, preempt_count(), NULL);
2019 }
2020
2021 static DEFINE_PER_CPU(int, user_stack_count);
2022
2023 void
2024 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2025 {
2026         struct trace_event_call *call = &event_user_stack;
2027         struct ring_buffer_event *event;
2028         struct userstack_entry *entry;
2029         struct stack_trace trace;
2030
2031         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2032                 return;
2033
2034         /*
2035          * NMIs can not handle page faults, even with fix ups.
2036          * The save user stack can (and often does) fault.
2037          */
2038         if (unlikely(in_nmi()))
2039                 return;
2040
2041         /*
2042          * prevent recursion, since the user stack tracing may
2043          * trigger other kernel events.
2044          */
2045         preempt_disable();
2046         if (__this_cpu_read(user_stack_count))
2047                 goto out;
2048
2049         __this_cpu_inc(user_stack_count);
2050
2051         event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2052                                           sizeof(*entry), flags, pc);
2053         if (!event)
2054                 goto out_drop_count;
2055         entry   = ring_buffer_event_data(event);
2056
2057         entry->tgid             = current->tgid;
2058         memset(&entry->caller, 0, sizeof(entry->caller));
2059
2060         trace.nr_entries        = 0;
2061         trace.max_entries       = FTRACE_STACK_ENTRIES;
2062         trace.skip              = 0;
2063         trace.entries           = entry->caller;
2064
2065         save_stack_trace_user(&trace);
2066         if (!call_filter_check_discard(call, entry, buffer, event))
2067                 __buffer_unlock_commit(buffer, event);
2068
2069  out_drop_count:
2070         __this_cpu_dec(user_stack_count);
2071  out:
2072         preempt_enable();
2073 }
2074
2075 #ifdef UNUSED
2076 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2077 {
2078         ftrace_trace_userstack(tr, flags, preempt_count());
2079 }
2080 #endif /* UNUSED */
2081
2082 #endif /* CONFIG_STACKTRACE */
2083
2084 /* created for use with alloc_percpu */
2085 struct trace_buffer_struct {
2086         char buffer[TRACE_BUF_SIZE];
2087 };
2088
2089 static struct trace_buffer_struct *trace_percpu_buffer;
2090 static struct trace_buffer_struct *trace_percpu_sirq_buffer;
2091 static struct trace_buffer_struct *trace_percpu_irq_buffer;
2092 static struct trace_buffer_struct *trace_percpu_nmi_buffer;
2093
2094 /*
2095  * The buffer used is dependent on the context. There is a per cpu
2096  * buffer for normal context, softirq contex, hard irq context and
2097  * for NMI context. Thise allows for lockless recording.
2098  *
2099  * Note, if the buffers failed to be allocated, then this returns NULL
2100  */
2101 static char *get_trace_buf(void)
2102 {
2103         struct trace_buffer_struct *percpu_buffer;
2104
2105         /*
2106          * If we have allocated per cpu buffers, then we do not
2107          * need to do any locking.
2108          */
2109         if (in_nmi())
2110                 percpu_buffer = trace_percpu_nmi_buffer;
2111         else if (in_irq())
2112                 percpu_buffer = trace_percpu_irq_buffer;
2113         else if (in_softirq())
2114                 percpu_buffer = trace_percpu_sirq_buffer;
2115         else
2116                 percpu_buffer = trace_percpu_buffer;
2117
2118         if (!percpu_buffer)
2119                 return NULL;
2120
2121         return this_cpu_ptr(&percpu_buffer->buffer[0]);
2122 }
2123
2124 static int alloc_percpu_trace_buffer(void)
2125 {
2126         struct trace_buffer_struct *buffers;
2127         struct trace_buffer_struct *sirq_buffers;
2128         struct trace_buffer_struct *irq_buffers;
2129         struct trace_buffer_struct *nmi_buffers;
2130
2131         buffers = alloc_percpu(struct trace_buffer_struct);
2132         if (!buffers)
2133                 goto err_warn;
2134
2135         sirq_buffers = alloc_percpu(struct trace_buffer_struct);
2136         if (!sirq_buffers)
2137                 goto err_sirq;
2138
2139         irq_buffers = alloc_percpu(struct trace_buffer_struct);
2140         if (!irq_buffers)
2141                 goto err_irq;
2142
2143         nmi_buffers = alloc_percpu(struct trace_buffer_struct);
2144         if (!nmi_buffers)
2145                 goto err_nmi;
2146
2147         trace_percpu_buffer = buffers;
2148         trace_percpu_sirq_buffer = sirq_buffers;
2149         trace_percpu_irq_buffer = irq_buffers;
2150         trace_percpu_nmi_buffer = nmi_buffers;
2151
2152         return 0;
2153
2154  err_nmi:
2155         free_percpu(irq_buffers);
2156  err_irq:
2157         free_percpu(sirq_buffers);
2158  err_sirq:
2159         free_percpu(buffers);
2160  err_warn:
2161         WARN(1, "Could not allocate percpu trace_printk buffer");
2162         return -ENOMEM;
2163 }
2164
2165 static int buffers_allocated;
2166
2167 void trace_printk_init_buffers(void)
2168 {
2169         if (buffers_allocated)
2170                 return;
2171
2172         if (alloc_percpu_trace_buffer())
2173                 return;
2174
2175         /* trace_printk() is for debug use only. Don't use it in production. */
2176
2177         pr_warn("\n");
2178         pr_warn("**********************************************************\n");
2179         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2180         pr_warn("**                                                      **\n");
2181         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2182         pr_warn("**                                                      **\n");
2183         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2184         pr_warn("** unsafe for production use.                           **\n");
2185         pr_warn("**                                                      **\n");
2186         pr_warn("** If you see this message and you are not debugging    **\n");
2187         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2188         pr_warn("**                                                      **\n");
2189         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2190         pr_warn("**********************************************************\n");
2191
2192         /* Expand the buffers to set size */
2193         tracing_update_buffers();
2194
2195         buffers_allocated = 1;
2196
2197         /*
2198          * trace_printk_init_buffers() can be called by modules.
2199          * If that happens, then we need to start cmdline recording
2200          * directly here. If the global_trace.buffer is already
2201          * allocated here, then this was called by module code.
2202          */
2203         if (global_trace.trace_buffer.buffer)
2204                 tracing_start_cmdline_record();
2205 }
2206
2207 void trace_printk_start_comm(void)
2208 {
2209         /* Start tracing comms if trace printk is set */
2210         if (!buffers_allocated)
2211                 return;
2212         tracing_start_cmdline_record();
2213 }
2214
2215 static void trace_printk_start_stop_comm(int enabled)
2216 {
2217         if (!buffers_allocated)
2218                 return;
2219
2220         if (enabled)
2221                 tracing_start_cmdline_record();
2222         else
2223                 tracing_stop_cmdline_record();
2224 }
2225
2226 /**
2227  * trace_vbprintk - write binary msg to tracing buffer
2228  *
2229  */
2230 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2231 {
2232         struct trace_event_call *call = &event_bprint;
2233         struct ring_buffer_event *event;
2234         struct ring_buffer *buffer;
2235         struct trace_array *tr = &global_trace;
2236         struct bprint_entry *entry;
2237         unsigned long flags;
2238         char *tbuffer;
2239         int len = 0, size, pc;
2240
2241         if (unlikely(tracing_selftest_running || tracing_disabled))
2242                 return 0;
2243
2244         /* Don't pollute graph traces with trace_vprintk internals */
2245         pause_graph_tracing();
2246
2247         pc = preempt_count();
2248         preempt_disable_notrace();
2249
2250         tbuffer = get_trace_buf();
2251         if (!tbuffer) {
2252                 len = 0;
2253                 goto out;
2254         }
2255
2256         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2257
2258         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2259                 goto out;
2260
2261         local_save_flags(flags);
2262         size = sizeof(*entry) + sizeof(u32) * len;
2263         buffer = tr->trace_buffer.buffer;
2264         event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2265                                           flags, pc);
2266         if (!event)
2267                 goto out;
2268         entry = ring_buffer_event_data(event);
2269         entry->ip                       = ip;
2270         entry->fmt                      = fmt;
2271
2272         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2273         if (!call_filter_check_discard(call, entry, buffer, event)) {
2274                 __buffer_unlock_commit(buffer, event);
2275                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2276         }
2277
2278 out:
2279         preempt_enable_notrace();
2280         unpause_graph_tracing();
2281
2282         return len;
2283 }
2284 EXPORT_SYMBOL_GPL(trace_vbprintk);
2285
2286 static int
2287 __trace_array_vprintk(struct ring_buffer *buffer,
2288                       unsigned long ip, const char *fmt, va_list args)
2289 {
2290         struct trace_event_call *call = &event_print;
2291         struct ring_buffer_event *event;
2292         int len = 0, size, pc;
2293         struct print_entry *entry;
2294         unsigned long flags;
2295         char *tbuffer;
2296
2297         if (tracing_disabled || tracing_selftest_running)
2298                 return 0;
2299
2300         /* Don't pollute graph traces with trace_vprintk internals */
2301         pause_graph_tracing();
2302
2303         pc = preempt_count();
2304         preempt_disable_notrace();
2305
2306
2307         tbuffer = get_trace_buf();
2308         if (!tbuffer) {
2309                 len = 0;
2310                 goto out;
2311         }
2312
2313         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2314
2315         local_save_flags(flags);
2316         size = sizeof(*entry) + len + 1;
2317         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2318                                           flags, pc);
2319         if (!event)
2320                 goto out;
2321         entry = ring_buffer_event_data(event);
2322         entry->ip = ip;
2323
2324         memcpy(&entry->buf, tbuffer, len + 1);
2325         if (!call_filter_check_discard(call, entry, buffer, event)) {
2326                 __buffer_unlock_commit(buffer, event);
2327                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2328         }
2329  out:
2330         preempt_enable_notrace();
2331         unpause_graph_tracing();
2332
2333         return len;
2334 }
2335
2336 int trace_array_vprintk(struct trace_array *tr,
2337                         unsigned long ip, const char *fmt, va_list args)
2338 {
2339         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2340 }
2341
2342 int trace_array_printk(struct trace_array *tr,
2343                        unsigned long ip, const char *fmt, ...)
2344 {
2345         int ret;
2346         va_list ap;
2347
2348         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2349                 return 0;
2350
2351         va_start(ap, fmt);
2352         ret = trace_array_vprintk(tr, ip, fmt, ap);
2353         va_end(ap);
2354         return ret;
2355 }
2356
2357 int trace_array_printk_buf(struct ring_buffer *buffer,
2358                            unsigned long ip, const char *fmt, ...)
2359 {
2360         int ret;
2361         va_list ap;
2362
2363         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2364                 return 0;
2365
2366         va_start(ap, fmt);
2367         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2368         va_end(ap);
2369         return ret;
2370 }
2371
2372 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2373 {
2374         return trace_array_vprintk(&global_trace, ip, fmt, args);
2375 }
2376 EXPORT_SYMBOL_GPL(trace_vprintk);
2377
2378 static void trace_iterator_increment(struct trace_iterator *iter)
2379 {
2380         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2381
2382         iter->idx++;
2383         if (buf_iter)
2384                 ring_buffer_read(buf_iter, NULL);
2385 }
2386
2387 static struct trace_entry *
2388 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2389                 unsigned long *lost_events)
2390 {
2391         struct ring_buffer_event *event;
2392         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2393
2394         if (buf_iter)
2395                 event = ring_buffer_iter_peek(buf_iter, ts);
2396         else
2397                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2398                                          lost_events);
2399
2400         if (event) {
2401                 iter->ent_size = ring_buffer_event_length(event);
2402                 return ring_buffer_event_data(event);
2403         }
2404         iter->ent_size = 0;
2405         return NULL;
2406 }
2407
2408 static struct trace_entry *
2409 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2410                   unsigned long *missing_events, u64 *ent_ts)
2411 {
2412         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2413         struct trace_entry *ent, *next = NULL;
2414         unsigned long lost_events = 0, next_lost = 0;
2415         int cpu_file = iter->cpu_file;
2416         u64 next_ts = 0, ts;
2417         int next_cpu = -1;
2418         int next_size = 0;
2419         int cpu;
2420
2421         /*
2422          * If we are in a per_cpu trace file, don't bother by iterating over
2423          * all cpu and peek directly.
2424          */
2425         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2426                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2427                         return NULL;
2428                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2429                 if (ent_cpu)
2430                         *ent_cpu = cpu_file;
2431
2432                 return ent;
2433         }
2434
2435         for_each_tracing_cpu(cpu) {
2436
2437                 if (ring_buffer_empty_cpu(buffer, cpu))
2438                         continue;
2439
2440                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2441
2442                 /*
2443                  * Pick the entry with the smallest timestamp:
2444                  */
2445                 if (ent && (!next || ts < next_ts)) {
2446                         next = ent;
2447                         next_cpu = cpu;
2448                         next_ts = ts;
2449                         next_lost = lost_events;
2450                         next_size = iter->ent_size;
2451                 }
2452         }
2453
2454         iter->ent_size = next_size;
2455
2456         if (ent_cpu)
2457                 *ent_cpu = next_cpu;
2458
2459         if (ent_ts)
2460                 *ent_ts = next_ts;
2461
2462         if (missing_events)
2463                 *missing_events = next_lost;
2464
2465         return next;
2466 }
2467
2468 /* Find the next real entry, without updating the iterator itself */
2469 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2470                                           int *ent_cpu, u64 *ent_ts)
2471 {
2472         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2473 }
2474
2475 /* Find the next real entry, and increment the iterator to the next entry */
2476 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2477 {
2478         iter->ent = __find_next_entry(iter, &iter->cpu,
2479                                       &iter->lost_events, &iter->ts);
2480
2481         if (iter->ent)
2482                 trace_iterator_increment(iter);
2483
2484         return iter->ent ? iter : NULL;
2485 }
2486
2487 static void trace_consume(struct trace_iterator *iter)
2488 {
2489         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2490                             &iter->lost_events);
2491 }
2492
2493 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2494 {
2495         struct trace_iterator *iter = m->private;
2496         int i = (int)*pos;
2497         void *ent;
2498
2499         WARN_ON_ONCE(iter->leftover);
2500
2501         (*pos)++;
2502
2503         /* can't go backwards */
2504         if (iter->idx > i)
2505                 return NULL;
2506
2507         if (iter->idx < 0)
2508                 ent = trace_find_next_entry_inc(iter);
2509         else
2510                 ent = iter;
2511
2512         while (ent && iter->idx < i)
2513                 ent = trace_find_next_entry_inc(iter);
2514
2515         iter->pos = *pos;
2516
2517         return ent;
2518 }
2519
2520 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2521 {
2522         struct ring_buffer_event *event;
2523         struct ring_buffer_iter *buf_iter;
2524         unsigned long entries = 0;
2525         u64 ts;
2526
2527         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2528
2529         buf_iter = trace_buffer_iter(iter, cpu);
2530         if (!buf_iter)
2531                 return;
2532
2533         ring_buffer_iter_reset(buf_iter);
2534
2535         /*
2536          * We could have the case with the max latency tracers
2537          * that a reset never took place on a cpu. This is evident
2538          * by the timestamp being before the start of the buffer.
2539          */
2540         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2541                 if (ts >= iter->trace_buffer->time_start)
2542                         break;
2543                 entries++;
2544                 ring_buffer_read(buf_iter, NULL);
2545         }
2546
2547         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2548 }
2549
2550 /*
2551  * The current tracer is copied to avoid a global locking
2552  * all around.
2553  */
2554 static void *s_start(struct seq_file *m, loff_t *pos)
2555 {
2556         struct trace_iterator *iter = m->private;
2557         struct trace_array *tr = iter->tr;
2558         int cpu_file = iter->cpu_file;
2559         void *p = NULL;
2560         loff_t l = 0;
2561         int cpu;
2562
2563         /*
2564          * copy the tracer to avoid using a global lock all around.
2565          * iter->trace is a copy of current_trace, the pointer to the
2566          * name may be used instead of a strcmp(), as iter->trace->name
2567          * will point to the same string as current_trace->name.
2568          */
2569         mutex_lock(&trace_types_lock);
2570         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2571                 *iter->trace = *tr->current_trace;
2572         mutex_unlock(&trace_types_lock);
2573
2574 #ifdef CONFIG_TRACER_MAX_TRACE
2575         if (iter->snapshot && iter->trace->use_max_tr)
2576                 return ERR_PTR(-EBUSY);
2577 #endif
2578
2579         if (!iter->snapshot)
2580                 atomic_inc(&trace_record_cmdline_disabled);
2581
2582         if (*pos != iter->pos) {
2583                 iter->ent = NULL;
2584                 iter->cpu = 0;
2585                 iter->idx = -1;
2586
2587                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2588                         for_each_tracing_cpu(cpu)
2589                                 tracing_iter_reset(iter, cpu);
2590                 } else
2591                         tracing_iter_reset(iter, cpu_file);
2592
2593                 iter->leftover = 0;
2594                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2595                         ;
2596
2597         } else {
2598                 /*
2599                  * If we overflowed the seq_file before, then we want
2600                  * to just reuse the trace_seq buffer again.
2601                  */
2602                 if (iter->leftover)
2603                         p = iter;
2604                 else {
2605                         l = *pos - 1;
2606                         p = s_next(m, p, &l);
2607                 }
2608         }
2609
2610         trace_event_read_lock();
2611         trace_access_lock(cpu_file);
2612         return p;
2613 }
2614
2615 static void s_stop(struct seq_file *m, void *p)
2616 {
2617         struct trace_iterator *iter = m->private;
2618
2619 #ifdef CONFIG_TRACER_MAX_TRACE
2620         if (iter->snapshot && iter->trace->use_max_tr)
2621                 return;
2622 #endif
2623
2624         if (!iter->snapshot)
2625                 atomic_dec(&trace_record_cmdline_disabled);
2626
2627         trace_access_unlock(iter->cpu_file);
2628         trace_event_read_unlock();
2629 }
2630
2631 static void
2632 get_total_entries(struct trace_buffer *buf,
2633                   unsigned long *total, unsigned long *entries)
2634 {
2635         unsigned long count;
2636         int cpu;
2637
2638         *total = 0;
2639         *entries = 0;
2640
2641         for_each_tracing_cpu(cpu) {
2642                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2643                 /*
2644                  * If this buffer has skipped entries, then we hold all
2645                  * entries for the trace and we need to ignore the
2646                  * ones before the time stamp.
2647                  */
2648                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2649                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2650                         /* total is the same as the entries */
2651                         *total += count;
2652                 } else
2653                         *total += count +
2654                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
2655                 *entries += count;
2656         }
2657 }
2658
2659 static void print_lat_help_header(struct seq_file *m)
2660 {
2661         seq_puts(m, "#                  _------=> CPU#            \n"
2662                     "#                 / _-----=> irqs-off        \n"
2663                     "#                | / _----=> need-resched    \n"
2664                     "#                || / _---=> hardirq/softirq \n"
2665                     "#                ||| / _--=> preempt-depth   \n"
2666                     "#                |||| /     delay            \n"
2667                     "#  cmd     pid   ||||| time  |   caller      \n"
2668                     "#     \\   /      |||||  \\    |   /         \n");
2669 }
2670
2671 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2672 {
2673         unsigned long total;
2674         unsigned long entries;
2675
2676         get_total_entries(buf, &total, &entries);
2677         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2678                    entries, total, num_online_cpus());
2679         seq_puts(m, "#\n");
2680 }
2681
2682 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2683 {
2684         print_event_info(buf, m);
2685         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n"
2686                     "#              | |       |          |         |\n");
2687 }
2688
2689 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2690 {
2691         print_event_info(buf, m);
2692         seq_puts(m, "#                              _-----=> irqs-off\n"
2693                     "#                             / _----=> need-resched\n"
2694                     "#                            | / _---=> hardirq/softirq\n"
2695                     "#                            || / _--=> preempt-depth\n"
2696                     "#                            ||| /     delay\n"
2697                     "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n"
2698                     "#              | |       |   ||||       |         |\n");
2699 }
2700
2701 void
2702 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2703 {
2704         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
2705         struct trace_buffer *buf = iter->trace_buffer;
2706         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2707         struct tracer *type = iter->trace;
2708         unsigned long entries;
2709         unsigned long total;
2710         const char *name = "preemption";
2711
2712         name = type->name;
2713
2714         get_total_entries(buf, &total, &entries);
2715
2716         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2717                    name, UTS_RELEASE);
2718         seq_puts(m, "# -----------------------------------"
2719                  "---------------------------------\n");
2720         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2721                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2722                    nsecs_to_usecs(data->saved_latency),
2723                    entries,
2724                    total,
2725                    buf->cpu,
2726 #if defined(CONFIG_PREEMPT_NONE)
2727                    "server",
2728 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2729                    "desktop",
2730 #elif defined(CONFIG_PREEMPT)
2731                    "preempt",
2732 #else
2733                    "unknown",
2734 #endif
2735                    /* These are reserved for later use */
2736                    0, 0, 0, 0);
2737 #ifdef CONFIG_SMP
2738         seq_printf(m, " #P:%d)\n", num_online_cpus());
2739 #else
2740         seq_puts(m, ")\n");
2741 #endif
2742         seq_puts(m, "#    -----------------\n");
2743         seq_printf(m, "#    | task: %.16s-%d "
2744                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2745                    data->comm, data->pid,
2746                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2747                    data->policy, data->rt_priority);
2748         seq_puts(m, "#    -----------------\n");
2749
2750         if (data->critical_start) {
2751                 seq_puts(m, "#  => started at: ");
2752                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2753                 trace_print_seq(m, &iter->seq);
2754                 seq_puts(m, "\n#  => ended at:   ");
2755                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2756                 trace_print_seq(m, &iter->seq);
2757                 seq_puts(m, "\n#\n");
2758         }
2759
2760         seq_puts(m, "#\n");
2761 }
2762
2763 static void test_cpu_buff_start(struct trace_iterator *iter)
2764 {
2765         struct trace_seq *s = &iter->seq;
2766         struct trace_array *tr = iter->tr;
2767
2768         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
2769                 return;
2770
2771         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
2772                 return;
2773
2774         if (iter->started && cpumask_test_cpu(iter->cpu, iter->started))
2775                 return;
2776
2777         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
2778                 return;
2779
2780         if (iter->started)
2781                 cpumask_set_cpu(iter->cpu, iter->started);
2782
2783         /* Don't print started cpu buffer for the first entry of the trace */
2784         if (iter->idx > 1)
2785                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
2786                                 iter->cpu);
2787 }
2788
2789 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
2790 {
2791         struct trace_array *tr = iter->tr;
2792         struct trace_seq *s = &iter->seq;
2793         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
2794         struct trace_entry *entry;
2795         struct trace_event *event;
2796
2797         entry = iter->ent;
2798
2799         test_cpu_buff_start(iter);
2800
2801         event = ftrace_find_event(entry->type);
2802
2803         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
2804                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2805                         trace_print_lat_context(iter);
2806                 else
2807                         trace_print_context(iter);
2808         }
2809
2810         if (trace_seq_has_overflowed(s))
2811                 return TRACE_TYPE_PARTIAL_LINE;
2812
2813         if (event)
2814                 return event->funcs->trace(iter, sym_flags, event);
2815
2816         trace_seq_printf(s, "Unknown type %d\n", entry->type);
2817
2818         return trace_handle_return(s);
2819 }
2820
2821 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2822 {
2823         struct trace_array *tr = iter->tr;
2824         struct trace_seq *s = &iter->seq;
2825         struct trace_entry *entry;
2826         struct trace_event *event;
2827
2828         entry = iter->ent;
2829
2830         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
2831                 trace_seq_printf(s, "%d %d %llu ",
2832                                  entry->pid, iter->cpu, iter->ts);
2833
2834         if (trace_seq_has_overflowed(s))
2835                 return TRACE_TYPE_PARTIAL_LINE;
2836
2837         event = ftrace_find_event(entry->type);
2838         if (event)
2839                 return event->funcs->raw(iter, 0, event);
2840
2841         trace_seq_printf(s, "%d ?\n", entry->type);
2842
2843         return trace_handle_return(s);
2844 }
2845
2846 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2847 {
2848         struct trace_array *tr = iter->tr;
2849         struct trace_seq *s = &iter->seq;
2850         unsigned char newline = '\n';
2851         struct trace_entry *entry;
2852         struct trace_event *event;
2853
2854         entry = iter->ent;
2855
2856         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
2857                 SEQ_PUT_HEX_FIELD(s, entry->pid);
2858                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
2859                 SEQ_PUT_HEX_FIELD(s, iter->ts);
2860                 if (trace_seq_has_overflowed(s))
2861                         return TRACE_TYPE_PARTIAL_LINE;
2862         }
2863
2864         event = ftrace_find_event(entry->type);
2865         if (event) {
2866                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
2867                 if (ret != TRACE_TYPE_HANDLED)
2868                         return ret;
2869         }
2870
2871         SEQ_PUT_FIELD(s, newline);
2872
2873         return trace_handle_return(s);
2874 }
2875
2876 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2877 {
2878         struct trace_array *tr = iter->tr;
2879         struct trace_seq *s = &iter->seq;
2880         struct trace_entry *entry;
2881         struct trace_event *event;
2882
2883         entry = iter->ent;
2884
2885         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
2886                 SEQ_PUT_FIELD(s, entry->pid);
2887                 SEQ_PUT_FIELD(s, iter->cpu);
2888                 SEQ_PUT_FIELD(s, iter->ts);
2889                 if (trace_seq_has_overflowed(s))
2890                         return TRACE_TYPE_PARTIAL_LINE;
2891         }
2892
2893         event = ftrace_find_event(entry->type);
2894         return event ? event->funcs->binary(iter, 0, event) :
2895                 TRACE_TYPE_HANDLED;
2896 }
2897
2898 int trace_empty(struct trace_iterator *iter)
2899 {
2900         struct ring_buffer_iter *buf_iter;
2901         int cpu;
2902
2903         /* If we are looking at one CPU buffer, only check that one */
2904         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
2905                 cpu = iter->cpu_file;
2906                 buf_iter = trace_buffer_iter(iter, cpu);
2907                 if (buf_iter) {
2908                         if (!ring_buffer_iter_empty(buf_iter))
2909                                 return 0;
2910                 } else {
2911                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2912                                 return 0;
2913                 }
2914                 return 1;
2915         }
2916
2917         for_each_tracing_cpu(cpu) {
2918                 buf_iter = trace_buffer_iter(iter, cpu);
2919                 if (buf_iter) {
2920                         if (!ring_buffer_iter_empty(buf_iter))
2921                                 return 0;
2922                 } else {
2923                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2924                                 return 0;
2925                 }
2926         }
2927
2928         return 1;
2929 }
2930
2931 /*  Called with trace_event_read_lock() held. */
2932 enum print_line_t print_trace_line(struct trace_iterator *iter)
2933 {
2934         struct trace_array *tr = iter->tr;
2935         unsigned long trace_flags = tr->trace_flags;
2936         enum print_line_t ret;
2937
2938         if (iter->lost_events) {
2939                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
2940                                  iter->cpu, iter->lost_events);
2941                 if (trace_seq_has_overflowed(&iter->seq))
2942                         return TRACE_TYPE_PARTIAL_LINE;
2943         }
2944
2945         if (iter->trace && iter->trace->print_line) {
2946                 ret = iter->trace->print_line(iter);
2947                 if (ret != TRACE_TYPE_UNHANDLED)
2948                         return ret;
2949         }
2950
2951         if (iter->ent->type == TRACE_BPUTS &&
2952                         trace_flags & TRACE_ITER_PRINTK &&
2953                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2954                 return trace_print_bputs_msg_only(iter);
2955
2956         if (iter->ent->type == TRACE_BPRINT &&
2957                         trace_flags & TRACE_ITER_PRINTK &&
2958                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2959                 return trace_print_bprintk_msg_only(iter);
2960
2961         if (iter->ent->type == TRACE_PRINT &&
2962                         trace_flags & TRACE_ITER_PRINTK &&
2963                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2964                 return trace_print_printk_msg_only(iter);
2965
2966         if (trace_flags & TRACE_ITER_BIN)
2967                 return print_bin_fmt(iter);
2968
2969         if (trace_flags & TRACE_ITER_HEX)
2970                 return print_hex_fmt(iter);
2971
2972         if (trace_flags & TRACE_ITER_RAW)
2973                 return print_raw_fmt(iter);
2974
2975         return print_trace_fmt(iter);
2976 }
2977
2978 void trace_latency_header(struct seq_file *m)
2979 {
2980         struct trace_iterator *iter = m->private;
2981         struct trace_array *tr = iter->tr;
2982
2983         /* print nothing if the buffers are empty */
2984         if (trace_empty(iter))
2985                 return;
2986
2987         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2988                 print_trace_header(m, iter);
2989
2990         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
2991                 print_lat_help_header(m);
2992 }
2993
2994 void trace_default_header(struct seq_file *m)
2995 {
2996         struct trace_iterator *iter = m->private;
2997         struct trace_array *tr = iter->tr;
2998         unsigned long trace_flags = tr->trace_flags;
2999
3000         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3001                 return;
3002
3003         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3004                 /* print nothing if the buffers are empty */
3005                 if (trace_empty(iter))
3006                         return;
3007                 print_trace_header(m, iter);
3008                 if (!(trace_flags & TRACE_ITER_VERBOSE))
3009                         print_lat_help_header(m);
3010         } else {
3011                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3012                         if (trace_flags & TRACE_ITER_IRQ_INFO)
3013                                 print_func_help_header_irq(iter->trace_buffer, m);
3014                         else
3015                                 print_func_help_header(iter->trace_buffer, m);
3016                 }
3017         }
3018 }
3019
3020 static void test_ftrace_alive(struct seq_file *m)
3021 {
3022         if (!ftrace_is_dead())
3023                 return;
3024         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3025                     "#          MAY BE MISSING FUNCTION EVENTS\n");
3026 }
3027
3028 #ifdef CONFIG_TRACER_MAX_TRACE
3029 static void show_snapshot_main_help(struct seq_file *m)
3030 {
3031         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3032                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3033                     "#                      Takes a snapshot of the main buffer.\n"
3034                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3035                     "#                      (Doesn't have to be '2' works with any number that\n"
3036                     "#                       is not a '0' or '1')\n");
3037 }
3038
3039 static void show_snapshot_percpu_help(struct seq_file *m)
3040 {
3041         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3042 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3043         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3044                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
3045 #else
3046         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3047                     "#                     Must use main snapshot file to allocate.\n");
3048 #endif
3049         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3050                     "#                      (Doesn't have to be '2' works with any number that\n"
3051                     "#                       is not a '0' or '1')\n");
3052 }
3053
3054 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3055 {
3056         if (iter->tr->allocated_snapshot)
3057                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3058         else
3059                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3060
3061         seq_puts(m, "# Snapshot commands:\n");
3062         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3063                 show_snapshot_main_help(m);
3064         else
3065                 show_snapshot_percpu_help(m);
3066 }
3067 #else
3068 /* Should never be called */
3069 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3070 #endif
3071
3072 static int s_show(struct seq_file *m, void *v)
3073 {
3074         struct trace_iterator *iter = v;
3075         int ret;
3076
3077         if (iter->ent == NULL) {
3078                 if (iter->tr) {
3079                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
3080                         seq_puts(m, "#\n");
3081                         test_ftrace_alive(m);
3082                 }
3083                 if (iter->snapshot && trace_empty(iter))
3084                         print_snapshot_help(m, iter);
3085                 else if (iter->trace && iter->trace->print_header)
3086                         iter->trace->print_header(m);
3087                 else
3088                         trace_default_header(m);
3089
3090         } else if (iter->leftover) {
3091                 /*
3092                  * If we filled the seq_file buffer earlier, we
3093                  * want to just show it now.
3094                  */
3095                 ret = trace_print_seq(m, &iter->seq);
3096
3097                 /* ret should this time be zero, but you never know */
3098                 iter->leftover = ret;
3099
3100         } else {
3101                 print_trace_line(iter);
3102                 ret = trace_print_seq(m, &iter->seq);
3103                 /*
3104                  * If we overflow the seq_file buffer, then it will
3105                  * ask us for this data again at start up.
3106                  * Use that instead.
3107                  *  ret is 0 if seq_file write succeeded.
3108                  *        -1 otherwise.
3109                  */
3110                 iter->leftover = ret;
3111         }
3112
3113         return 0;
3114 }
3115
3116 /*
3117  * Should be used after trace_array_get(), trace_types_lock
3118  * ensures that i_cdev was already initialized.
3119  */
3120 static inline int tracing_get_cpu(struct inode *inode)
3121 {
3122         if (inode->i_cdev) /* See trace_create_cpu_file() */
3123                 return (long)inode->i_cdev - 1;
3124         return RING_BUFFER_ALL_CPUS;
3125 }
3126
3127 static const struct seq_operations tracer_seq_ops = {
3128         .start          = s_start,
3129         .next           = s_next,
3130         .stop           = s_stop,
3131         .show           = s_show,
3132 };
3133
3134 static struct trace_iterator *
3135 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3136 {
3137         struct trace_array *tr = inode->i_private;
3138         struct trace_iterator *iter;
3139         int cpu;
3140
3141         if (tracing_disabled)
3142                 return ERR_PTR(-ENODEV);
3143
3144         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3145         if (!iter)
3146                 return ERR_PTR(-ENOMEM);
3147
3148         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3149                                     GFP_KERNEL);
3150         if (!iter->buffer_iter)
3151                 goto release;
3152
3153         /*
3154          * We make a copy of the current tracer to avoid concurrent
3155          * changes on it while we are reading.
3156          */
3157         mutex_lock(&trace_types_lock);
3158         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3159         if (!iter->trace)
3160                 goto fail;
3161
3162         *iter->trace = *tr->current_trace;
3163
3164         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3165                 goto fail;
3166
3167         iter->tr = tr;
3168
3169 #ifdef CONFIG_TRACER_MAX_TRACE
3170         /* Currently only the top directory has a snapshot */
3171         if (tr->current_trace->print_max || snapshot)
3172                 iter->trace_buffer = &tr->max_buffer;
3173         else
3174 #endif
3175                 iter->trace_buffer = &tr->trace_buffer;
3176         iter->snapshot = snapshot;
3177         iter->pos = -1;
3178         iter->cpu_file = tracing_get_cpu(inode);
3179         mutex_init(&iter->mutex);
3180
3181         /* Notify the tracer early; before we stop tracing. */
3182         if (iter->trace && iter->trace->open)
3183                 iter->trace->open(iter);
3184
3185         /* Annotate start of buffers if we had overruns */
3186         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3187                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3188
3189         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3190         if (trace_clocks[tr->clock_id].in_ns)
3191                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3192
3193         /* stop the trace while dumping if we are not opening "snapshot" */
3194         if (!iter->snapshot)
3195                 tracing_stop_tr(tr);
3196
3197         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3198                 for_each_tracing_cpu(cpu) {
3199                         iter->buffer_iter[cpu] =
3200                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3201                 }
3202                 ring_buffer_read_prepare_sync();
3203                 for_each_tracing_cpu(cpu) {
3204                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3205                         tracing_iter_reset(iter, cpu);
3206                 }
3207         } else {
3208                 cpu = iter->cpu_file;
3209                 iter->buffer_iter[cpu] =
3210                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3211                 ring_buffer_read_prepare_sync();
3212                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3213                 tracing_iter_reset(iter, cpu);
3214         }
3215
3216         mutex_unlock(&trace_types_lock);
3217
3218         return iter;
3219
3220  fail:
3221         mutex_unlock(&trace_types_lock);
3222         kfree(iter->trace);
3223         kfree(iter->buffer_iter);
3224 release:
3225         seq_release_private(inode, file);
3226         return ERR_PTR(-ENOMEM);
3227 }
3228
3229 int tracing_open_generic(struct inode *inode, struct file *filp)
3230 {
3231         if (tracing_disabled)
3232                 return -ENODEV;
3233
3234         filp->private_data = inode->i_private;
3235         return 0;
3236 }
3237
3238 bool tracing_is_disabled(void)
3239 {
3240         return (tracing_disabled) ? true: false;
3241 }
3242
3243 /*
3244  * Open and update trace_array ref count.
3245  * Must have the current trace_array passed to it.
3246  */
3247 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3248 {
3249         struct trace_array *tr = inode->i_private;
3250
3251         if (tracing_disabled)
3252                 return -ENODEV;
3253
3254         if (trace_array_get(tr) < 0)
3255                 return -ENODEV;
3256
3257         filp->private_data = inode->i_private;
3258
3259         return 0;
3260 }
3261
3262 static int tracing_release(struct inode *inode, struct file *file)
3263 {
3264         struct trace_array *tr = inode->i_private;
3265         struct seq_file *m = file->private_data;
3266         struct trace_iterator *iter;
3267         int cpu;
3268
3269         if (!(file->f_mode & FMODE_READ)) {
3270                 trace_array_put(tr);
3271                 return 0;
3272         }
3273
3274         /* Writes do not use seq_file */
3275         iter = m->private;
3276         mutex_lock(&trace_types_lock);
3277
3278         for_each_tracing_cpu(cpu) {
3279                 if (iter->buffer_iter[cpu])
3280                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3281         }
3282
3283         if (iter->trace && iter->trace->close)
3284                 iter->trace->close(iter);
3285
3286         if (!iter->snapshot)
3287                 /* reenable tracing if it was previously enabled */
3288                 tracing_start_tr(tr);
3289
3290         __trace_array_put(tr);
3291
3292         mutex_unlock(&trace_types_lock);
3293
3294         mutex_destroy(&iter->mutex);
3295         free_cpumask_var(iter->started);
3296         kfree(iter->trace);
3297         kfree(iter->buffer_iter);
3298         seq_release_private(inode, file);
3299
3300         return 0;
3301 }
3302
3303 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3304 {
3305         struct trace_array *tr = inode->i_private;
3306
3307         trace_array_put(tr);
3308         return 0;
3309 }
3310
3311 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3312 {
3313         struct trace_array *tr = inode->i_private;
3314
3315         trace_array_put(tr);
3316
3317         return single_release(inode, file);
3318 }
3319
3320 static int tracing_open(struct inode *inode, struct file *file)
3321 {
3322         struct trace_array *tr = inode->i_private;
3323         struct trace_iterator *iter;
3324         int ret = 0;
3325
3326         if (trace_array_get(tr) < 0)
3327                 return -ENODEV;
3328
3329         /* If this file was open for write, then erase contents */
3330         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3331                 int cpu = tracing_get_cpu(inode);
3332
3333                 if (cpu == RING_BUFFER_ALL_CPUS)
3334                         tracing_reset_online_cpus(&tr->trace_buffer);
3335                 else
3336                         tracing_reset(&tr->trace_buffer, cpu);
3337         }
3338
3339         if (file->f_mode & FMODE_READ) {
3340                 iter = __tracing_open(inode, file, false);
3341                 if (IS_ERR(iter))
3342                         ret = PTR_ERR(iter);
3343                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3344                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3345         }
3346
3347         if (ret < 0)
3348                 trace_array_put(tr);
3349
3350         return ret;
3351 }
3352
3353 /*
3354  * Some tracers are not suitable for instance buffers.
3355  * A tracer is always available for the global array (toplevel)
3356  * or if it explicitly states that it is.
3357  */
3358 static bool
3359 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3360 {
3361         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3362 }
3363
3364 /* Find the next tracer that this trace array may use */
3365 static struct tracer *
3366 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3367 {
3368         while (t && !trace_ok_for_array(t, tr))
3369                 t = t->next;
3370
3371         return t;
3372 }
3373
3374 static void *
3375 t_next(struct seq_file *m, void *v, loff_t *pos)
3376 {
3377         struct trace_array *tr = m->private;
3378         struct tracer *t = v;
3379
3380         (*pos)++;
3381
3382         if (t)
3383                 t = get_tracer_for_array(tr, t->next);
3384
3385         return t;
3386 }
3387
3388 static void *t_start(struct seq_file *m, loff_t *pos)
3389 {
3390         struct trace_array *tr = m->private;
3391         struct tracer *t;
3392         loff_t l = 0;
3393
3394         mutex_lock(&trace_types_lock);
3395
3396         t = get_tracer_for_array(tr, trace_types);
3397         for (; t && l < *pos; t = t_next(m, t, &l))
3398                         ;
3399
3400         return t;
3401 }
3402
3403 static void t_stop(struct seq_file *m, void *p)
3404 {
3405         mutex_unlock(&trace_types_lock);
3406 }
3407
3408 static int t_show(struct seq_file *m, void *v)
3409 {
3410         struct tracer *t = v;
3411
3412         if (!t)
3413                 return 0;
3414
3415         seq_puts(m, t->name);
3416         if (t->next)
3417                 seq_putc(m, ' ');
3418         else
3419                 seq_putc(m, '\n');
3420
3421         return 0;
3422 }
3423
3424 static const struct seq_operations show_traces_seq_ops = {
3425         .start          = t_start,
3426         .next           = t_next,
3427         .stop           = t_stop,
3428         .show           = t_show,
3429 };
3430
3431 static int show_traces_open(struct inode *inode, struct file *file)
3432 {
3433         struct trace_array *tr = inode->i_private;
3434         struct seq_file *m;
3435         int ret;
3436
3437         if (tracing_disabled)
3438                 return -ENODEV;
3439
3440         ret = seq_open(file, &show_traces_seq_ops);
3441         if (ret)
3442                 return ret;
3443
3444         m = file->private_data;
3445         m->private = tr;
3446
3447         return 0;
3448 }
3449
3450 static ssize_t
3451 tracing_write_stub(struct file *filp, const char __user *ubuf,
3452                    size_t count, loff_t *ppos)
3453 {
3454         return count;
3455 }
3456
3457 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3458 {
3459         int ret;
3460
3461         if (file->f_mode & FMODE_READ)
3462                 ret = seq_lseek(file, offset, whence);
3463         else
3464                 file->f_pos = ret = 0;
3465
3466         return ret;
3467 }
3468
3469 static const struct file_operations tracing_fops = {
3470         .open           = tracing_open,
3471         .read           = seq_read,
3472         .write          = tracing_write_stub,
3473         .llseek         = tracing_lseek,
3474         .release        = tracing_release,
3475 };
3476
3477 static const struct file_operations show_traces_fops = {
3478         .open           = show_traces_open,
3479         .read           = seq_read,
3480         .release        = seq_release,
3481         .llseek         = seq_lseek,
3482 };
3483
3484 /*
3485  * The tracer itself will not take this lock, but still we want
3486  * to provide a consistent cpumask to user-space:
3487  */
3488 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3489
3490 /*
3491  * Temporary storage for the character representation of the
3492  * CPU bitmask (and one more byte for the newline):
3493  */
3494 static char mask_str[NR_CPUS + 1];
3495
3496 static ssize_t
3497 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3498                      size_t count, loff_t *ppos)
3499 {
3500         struct trace_array *tr = file_inode(filp)->i_private;
3501         int len;
3502
3503         mutex_lock(&tracing_cpumask_update_lock);
3504
3505         len = snprintf(mask_str, count, "%*pb\n",
3506                        cpumask_pr_args(tr->tracing_cpumask));
3507         if (len >= count) {
3508                 count = -EINVAL;
3509                 goto out_err;
3510         }
3511         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3512
3513 out_err:
3514         mutex_unlock(&tracing_cpumask_update_lock);
3515
3516         return count;
3517 }
3518
3519 static ssize_t
3520 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3521                       size_t count, loff_t *ppos)
3522 {
3523         struct trace_array *tr = file_inode(filp)->i_private;
3524         cpumask_var_t tracing_cpumask_new;
3525         int err, cpu;
3526
3527         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3528                 return -ENOMEM;
3529
3530         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3531         if (err)
3532                 goto err_unlock;
3533
3534         mutex_lock(&tracing_cpumask_update_lock);
3535
3536         local_irq_disable();
3537         arch_spin_lock(&tr->max_lock);
3538         for_each_tracing_cpu(cpu) {
3539                 /*
3540                  * Increase/decrease the disabled counter if we are
3541                  * about to flip a bit in the cpumask:
3542                  */
3543                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3544                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3545                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3546                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3547                 }
3548                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3549                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3550                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3551                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3552                 }
3553         }
3554         arch_spin_unlock(&tr->max_lock);
3555         local_irq_enable();
3556
3557         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3558
3559         mutex_unlock(&tracing_cpumask_update_lock);
3560         free_cpumask_var(tracing_cpumask_new);
3561
3562         return count;
3563
3564 err_unlock:
3565         free_cpumask_var(tracing_cpumask_new);
3566
3567         return err;
3568 }
3569
3570 static const struct file_operations tracing_cpumask_fops = {
3571         .open           = tracing_open_generic_tr,
3572         .read           = tracing_cpumask_read,
3573         .write          = tracing_cpumask_write,
3574         .release        = tracing_release_generic_tr,
3575         .llseek         = generic_file_llseek,
3576 };
3577
3578 static int tracing_trace_options_show(struct seq_file *m, void *v)
3579 {
3580         struct tracer_opt *trace_opts;
3581         struct trace_array *tr = m->private;
3582         u32 tracer_flags;
3583         int i;
3584
3585         mutex_lock(&trace_types_lock);
3586         tracer_flags = tr->current_trace->flags->val;
3587         trace_opts = tr->current_trace->flags->opts;
3588
3589         for (i = 0; trace_options[i]; i++) {
3590                 if (tr->trace_flags & (1 << i))
3591                         seq_printf(m, "%s\n", trace_options[i]);
3592                 else
3593                         seq_printf(m, "no%s\n", trace_options[i]);
3594         }
3595
3596         for (i = 0; trace_opts[i].name; i++) {
3597                 if (tracer_flags & trace_opts[i].bit)
3598                         seq_printf(m, "%s\n", trace_opts[i].name);
3599                 else
3600                         seq_printf(m, "no%s\n", trace_opts[i].name);
3601         }
3602         mutex_unlock(&trace_types_lock);
3603
3604         return 0;
3605 }
3606
3607 static int __set_tracer_option(struct trace_array *tr,
3608                                struct tracer_flags *tracer_flags,
3609                                struct tracer_opt *opts, int neg)
3610 {
3611         struct tracer *trace = tracer_flags->trace;
3612         int ret;
3613
3614         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3615         if (ret)
3616                 return ret;
3617
3618         if (neg)
3619                 tracer_flags->val &= ~opts->bit;
3620         else
3621                 tracer_flags->val |= opts->bit;
3622         return 0;
3623 }
3624
3625 /* Try to assign a tracer specific option */
3626 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3627 {
3628         struct tracer *trace = tr->current_trace;
3629         struct tracer_flags *tracer_flags = trace->flags;
3630         struct tracer_opt *opts = NULL;
3631         int i;
3632
3633         for (i = 0; tracer_flags->opts[i].name; i++) {
3634                 opts = &tracer_flags->opts[i];
3635
3636                 if (strcmp(cmp, opts->name) == 0)
3637                         return __set_tracer_option(tr, trace->flags, opts, neg);
3638         }
3639
3640         return -EINVAL;
3641 }
3642
3643 /* Some tracers require overwrite to stay enabled */
3644 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3645 {
3646         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3647                 return -1;
3648
3649         return 0;
3650 }
3651
3652 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3653 {
3654         /* do nothing if flag is already set */
3655         if (!!(tr->trace_flags & mask) == !!enabled)
3656                 return 0;
3657
3658         /* Give the tracer a chance to approve the change */
3659         if (tr->current_trace->flag_changed)
3660                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3661                         return -EINVAL;
3662
3663         if (enabled)
3664                 tr->trace_flags |= mask;
3665         else
3666                 tr->trace_flags &= ~mask;
3667
3668         if (mask == TRACE_ITER_RECORD_CMD)
3669                 trace_event_enable_cmd_record(enabled);
3670
3671         if (mask == TRACE_ITER_EVENT_FORK)
3672                 trace_event_follow_fork(tr, enabled);
3673
3674         if (mask == TRACE_ITER_OVERWRITE) {
3675                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3676 #ifdef CONFIG_TRACER_MAX_TRACE
3677                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3678 #endif
3679         }
3680
3681         if (mask == TRACE_ITER_PRINTK) {
3682                 trace_printk_start_stop_comm(enabled);
3683                 trace_printk_control(enabled);
3684         }
3685
3686         return 0;
3687 }
3688
3689 static int trace_set_options(struct trace_array *tr, char *option)
3690 {
3691         char *cmp;
3692         int neg = 0;
3693         int ret = -ENODEV;
3694         int i;
3695         size_t orig_len = strlen(option);
3696
3697         cmp = strstrip(option);
3698
3699         if (strncmp(cmp, "no", 2) == 0) {
3700                 neg = 1;
3701                 cmp += 2;
3702         }
3703
3704         mutex_lock(&trace_types_lock);
3705
3706         for (i = 0; trace_options[i]; i++) {
3707                 if (strcmp(cmp, trace_options[i]) == 0) {
3708                         ret = set_tracer_flag(tr, 1 << i, !neg);
3709                         break;
3710                 }
3711         }
3712
3713         /* If no option could be set, test the specific tracer options */
3714         if (!trace_options[i])
3715                 ret = set_tracer_option(tr, cmp, neg);
3716
3717         mutex_unlock(&trace_types_lock);
3718
3719         /*
3720          * If the first trailing whitespace is replaced with '\0' by strstrip,
3721          * turn it back into a space.
3722          */
3723         if (orig_len > strlen(option))
3724                 option[strlen(option)] = ' ';
3725
3726         return ret;
3727 }
3728
3729 static void __init apply_trace_boot_options(void)
3730 {
3731         char *buf = trace_boot_options_buf;
3732         char *option;
3733
3734         while (true) {
3735                 option = strsep(&buf, ",");
3736
3737                 if (!option)
3738                         break;
3739
3740                 if (*option)
3741                         trace_set_options(&global_trace, option);
3742
3743                 /* Put back the comma to allow this to be called again */
3744                 if (buf)
3745                         *(buf - 1) = ',';
3746         }
3747 }
3748
3749 static ssize_t
3750 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3751                         size_t cnt, loff_t *ppos)
3752 {
3753         struct seq_file *m = filp->private_data;
3754         struct trace_array *tr = m->private;
3755         char buf[64];
3756         int ret;
3757
3758         if (cnt >= sizeof(buf))
3759                 return -EINVAL;
3760
3761         if (copy_from_user(buf, ubuf, cnt))
3762                 return -EFAULT;
3763
3764         buf[cnt] = 0;
3765
3766         ret = trace_set_options(tr, buf);
3767         if (ret < 0)
3768                 return ret;
3769
3770         *ppos += cnt;
3771
3772         return cnt;
3773 }
3774
3775 static int tracing_trace_options_open(struct inode *inode, struct file *file)
3776 {
3777         struct trace_array *tr = inode->i_private;
3778         int ret;
3779
3780         if (tracing_disabled)
3781                 return -ENODEV;
3782
3783         if (trace_array_get(tr) < 0)
3784                 return -ENODEV;
3785
3786         ret = single_open(file, tracing_trace_options_show, inode->i_private);
3787         if (ret < 0)
3788                 trace_array_put(tr);
3789
3790         return ret;
3791 }
3792
3793 static const struct file_operations tracing_iter_fops = {
3794         .open           = tracing_trace_options_open,
3795         .read           = seq_read,
3796         .llseek         = seq_lseek,
3797         .release        = tracing_single_release_tr,
3798         .write          = tracing_trace_options_write,
3799 };
3800
3801 static const char readme_msg[] =
3802         "tracing mini-HOWTO:\n\n"
3803         "# echo 0 > tracing_on : quick way to disable tracing\n"
3804         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
3805         " Important files:\n"
3806         "  trace\t\t\t- The static contents of the buffer\n"
3807         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
3808         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
3809         "  current_tracer\t- function and latency tracers\n"
3810         "  available_tracers\t- list of configured tracers for current_tracer\n"
3811         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
3812         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
3813         "  trace_clock\t\t-change the clock used to order events\n"
3814         "       local:   Per cpu clock but may not be synced across CPUs\n"
3815         "      global:   Synced across CPUs but slows tracing down.\n"
3816         "     counter:   Not a clock, but just an increment\n"
3817         "      uptime:   Jiffy counter from time of boot\n"
3818         "        perf:   Same clock that perf events use\n"
3819 #ifdef CONFIG_X86_64
3820         "     x86-tsc:   TSC cycle counter\n"
3821 #endif
3822         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
3823         "  tracing_cpumask\t- Limit which CPUs to trace\n"
3824         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
3825         "\t\t\t  Remove sub-buffer with rmdir\n"
3826         "  trace_options\t\t- Set format or modify how tracing happens\n"
3827         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
3828         "\t\t\t  option name\n"
3829         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
3830 #ifdef CONFIG_DYNAMIC_FTRACE
3831         "\n  available_filter_functions - list of functions that can be filtered on\n"
3832         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
3833         "\t\t\t  functions\n"
3834         "\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3835         "\t     modules: Can select a group via module\n"
3836         "\t      Format: :mod:<module-name>\n"
3837         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
3838         "\t    triggers: a command to perform when function is hit\n"
3839         "\t      Format: <function>:<trigger>[:count]\n"
3840         "\t     trigger: traceon, traceoff\n"
3841         "\t\t      enable_event:<system>:<event>\n"
3842         "\t\t      disable_event:<system>:<event>\n"
3843 #ifdef CONFIG_STACKTRACE
3844         "\t\t      stacktrace\n"
3845 #endif
3846 #ifdef CONFIG_TRACER_SNAPSHOT
3847         "\t\t      snapshot\n"
3848 #endif
3849         "\t\t      dump\n"
3850         "\t\t      cpudump\n"
3851         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
3852         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
3853         "\t     The first one will disable tracing every time do_fault is hit\n"
3854         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
3855         "\t       The first time do trap is hit and it disables tracing, the\n"
3856         "\t       counter will decrement to 2. If tracing is already disabled,\n"
3857         "\t       the counter will not decrement. It only decrements when the\n"
3858         "\t       trigger did work\n"
3859         "\t     To remove trigger without count:\n"
3860         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
3861         "\t     To remove trigger with a count:\n"
3862         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
3863         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
3864         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3865         "\t    modules: Can select a group via module command :mod:\n"
3866         "\t    Does not accept triggers\n"
3867 #endif /* CONFIG_DYNAMIC_FTRACE */
3868 #ifdef CONFIG_FUNCTION_TRACER
3869         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
3870         "\t\t    (function)\n"
3871 #endif
3872 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
3873         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
3874         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
3875         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
3876 #endif
3877 #ifdef CONFIG_TRACER_SNAPSHOT
3878         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
3879         "\t\t\t  snapshot buffer. Read the contents for more\n"
3880         "\t\t\t  information\n"
3881 #endif
3882 #ifdef CONFIG_STACK_TRACER
3883         "  stack_trace\t\t- Shows the max stack trace when active\n"
3884         "  stack_max_size\t- Shows current max stack size that was traced\n"
3885         "\t\t\t  Write into this file to reset the max size (trigger a\n"
3886         "\t\t\t  new trace)\n"
3887 #ifdef CONFIG_DYNAMIC_FTRACE
3888         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
3889         "\t\t\t  traces\n"
3890 #endif
3891 #endif /* CONFIG_STACK_TRACER */
3892         "  events/\t\t- Directory containing all trace event subsystems:\n"
3893         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
3894         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
3895         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
3896         "\t\t\t  events\n"
3897         "      filter\t\t- If set, only events passing filter are traced\n"
3898         "  events/<system>/<event>/\t- Directory containing control files for\n"
3899         "\t\t\t  <event>:\n"
3900         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
3901         "      filter\t\t- If set, only events passing filter are traced\n"
3902         "      trigger\t\t- If set, a command to perform when event is hit\n"
3903         "\t    Format: <trigger>[:count][if <filter>]\n"
3904         "\t   trigger: traceon, traceoff\n"
3905         "\t            enable_event:<system>:<event>\n"
3906         "\t            disable_event:<system>:<event>\n"
3907 #ifdef CONFIG_HIST_TRIGGERS
3908         "\t            enable_hist:<system>:<event>\n"
3909         "\t            disable_hist:<system>:<event>\n"
3910 #endif
3911 #ifdef CONFIG_STACKTRACE
3912         "\t\t    stacktrace\n"
3913 #endif
3914 #ifdef CONFIG_TRACER_SNAPSHOT
3915         "\t\t    snapshot\n"
3916 #endif
3917 #ifdef CONFIG_HIST_TRIGGERS
3918         "\t\t    hist (see below)\n"
3919 #endif
3920         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
3921         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
3922         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
3923         "\t                  events/block/block_unplug/trigger\n"
3924         "\t   The first disables tracing every time block_unplug is hit.\n"
3925         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
3926         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
3927         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
3928         "\t   Like function triggers, the counter is only decremented if it\n"
3929         "\t    enabled or disabled tracing.\n"
3930         "\t   To remove a trigger without a count:\n"
3931         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
3932         "\t   To remove a trigger with a count:\n"
3933         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
3934         "\t   Filters can be ignored when removing a trigger.\n"
3935 #ifdef CONFIG_HIST_TRIGGERS
3936         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
3937         "\t    Format: hist:keys=<field1[,field2,...]>\n"
3938         "\t            [:values=<field1[,field2,...]>]\n"
3939         "\t            [:sort=<field1[,field2,...]>]\n"
3940         "\t            [:size=#entries]\n"
3941         "\t            [:pause][:continue][:clear]\n"
3942         "\t            [:name=histname1]\n"
3943         "\t            [if <filter>]\n\n"
3944         "\t    When a matching event is hit, an entry is added to a hash\n"
3945         "\t    table using the key(s) and value(s) named, and the value of a\n"
3946         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
3947         "\t    correspond to fields in the event's format description.  Keys\n"
3948         "\t    can be any field, or the special string 'stacktrace'.\n"
3949         "\t    Compound keys consisting of up to two fields can be specified\n"
3950         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
3951         "\t    fields.  Sort keys consisting of up to two fields can be\n"
3952         "\t    specified using the 'sort' keyword.  The sort direction can\n"
3953         "\t    be modified by appending '.descending' or '.ascending' to a\n"
3954         "\t    sort field.  The 'size' parameter can be used to specify more\n"
3955         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
3956         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
3957         "\t    its histogram data will be shared with other triggers of the\n"
3958         "\t    same name, and trigger hits will update this common data.\n\n"
3959         "\t    Reading the 'hist' file for the event will dump the hash\n"
3960         "\t    table in its entirety to stdout.  If there are multiple hist\n"
3961         "\t    triggers attached to an event, there will be a table for each\n"
3962         "\t    trigger in the output.  The table displayed for a named\n"
3963         "\t    trigger will be the same as any other instance having the\n"
3964         "\t    same name.  The default format used to display a given field\n"
3965         "\t    can be modified by appending any of the following modifiers\n"
3966         "\t    to the field name, as applicable:\n\n"
3967         "\t            .hex        display a number as a hex value\n"
3968         "\t            .sym        display an address as a symbol\n"
3969         "\t            .sym-offset display an address as a symbol and offset\n"
3970         "\t            .execname   display a common_pid as a program name\n"
3971         "\t            .syscall    display a syscall id as a syscall name\n\n"
3972         "\t            .log2       display log2 value rather than raw number\n\n"
3973         "\t    The 'pause' parameter can be used to pause an existing hist\n"
3974         "\t    trigger or to start a hist trigger but not log any events\n"
3975         "\t    until told to do so.  'continue' can be used to start or\n"
3976         "\t    restart a paused hist trigger.\n\n"
3977         "\t    The 'clear' parameter will clear the contents of a running\n"
3978         "\t    hist trigger and leave its current paused/active state\n"
3979         "\t    unchanged.\n\n"
3980         "\t    The enable_hist and disable_hist triggers can be used to\n"
3981         "\t    have one event conditionally start and stop another event's\n"
3982         "\t    already-attached hist trigger.  The syntax is analagous to\n"
3983         "\t    the enable_event and disable_event triggers.\n"
3984 #endif
3985 ;
3986
3987 static ssize_t
3988 tracing_readme_read(struct file *filp, char __user *ubuf,
3989                        size_t cnt, loff_t *ppos)
3990 {
3991         return simple_read_from_buffer(ubuf, cnt, ppos,
3992                                         readme_msg, strlen(readme_msg));
3993 }
3994
3995 static const struct file_operations tracing_readme_fops = {
3996         .open           = tracing_open_generic,
3997         .read           = tracing_readme_read,
3998         .llseek         = generic_file_llseek,
3999 };
4000
4001 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4002 {
4003         unsigned int *ptr = v;
4004
4005         if (*pos || m->count)
4006                 ptr++;
4007
4008         (*pos)++;
4009
4010         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4011              ptr++) {
4012                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4013                         continue;
4014
4015                 return ptr;
4016         }
4017
4018         return NULL;
4019 }
4020
4021 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4022 {
4023         void *v;
4024         loff_t l = 0;
4025
4026         preempt_disable();
4027         arch_spin_lock(&trace_cmdline_lock);
4028
4029         v = &savedcmd->map_cmdline_to_pid[0];
4030         while (l <= *pos) {
4031                 v = saved_cmdlines_next(m, v, &l);
4032                 if (!v)
4033                         return NULL;
4034         }
4035
4036         return v;
4037 }
4038
4039 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4040 {
4041         arch_spin_unlock(&trace_cmdline_lock);
4042         preempt_enable();
4043 }
4044
4045 static int saved_cmdlines_show(struct seq_file *m, void *v)
4046 {
4047         char buf[TASK_COMM_LEN];
4048         unsigned int *pid = v;
4049
4050         __trace_find_cmdline(*pid, buf);
4051         seq_printf(m, "%d %s\n", *pid, buf);
4052         return 0;
4053 }
4054
4055 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4056         .start          = saved_cmdlines_start,
4057         .next           = saved_cmdlines_next,
4058         .stop           = saved_cmdlines_stop,
4059         .show           = saved_cmdlines_show,
4060 };
4061
4062 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4063 {
4064         if (tracing_disabled)
4065                 return -ENODEV;
4066
4067         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4068 }
4069
4070 static const struct file_operations tracing_saved_cmdlines_fops = {
4071         .open           = tracing_saved_cmdlines_open,
4072         .read           = seq_read,
4073         .llseek         = seq_lseek,
4074         .release        = seq_release,
4075 };
4076
4077 static ssize_t
4078 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4079                                  size_t cnt, loff_t *ppos)
4080 {
4081         char buf[64];
4082         int r;
4083
4084         arch_spin_lock(&trace_cmdline_lock);
4085         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4086         arch_spin_unlock(&trace_cmdline_lock);
4087
4088         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4089 }
4090
4091 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4092 {
4093         kfree(s->saved_cmdlines);
4094         kfree(s->map_cmdline_to_pid);
4095         kfree(s);
4096 }
4097
4098 static int tracing_resize_saved_cmdlines(unsigned int val)
4099 {
4100         struct saved_cmdlines_buffer *s, *savedcmd_temp;
4101
4102         s = kmalloc(sizeof(*s), GFP_KERNEL);
4103         if (!s)
4104                 return -ENOMEM;
4105
4106         if (allocate_cmdlines_buffer(val, s) < 0) {
4107                 kfree(s);
4108                 return -ENOMEM;
4109         }
4110
4111         arch_spin_lock(&trace_cmdline_lock);
4112         savedcmd_temp = savedcmd;
4113         savedcmd = s;
4114         arch_spin_unlock(&trace_cmdline_lock);
4115         free_saved_cmdlines_buffer(savedcmd_temp);
4116
4117         return 0;
4118 }
4119
4120 static ssize_t
4121 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4122                                   size_t cnt, loff_t *ppos)
4123 {
4124         unsigned long val;
4125         int ret;
4126
4127         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4128         if (ret)
4129                 return ret;
4130
4131         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4132         if (!val || val > PID_MAX_DEFAULT)
4133                 return -EINVAL;
4134
4135         ret = tracing_resize_saved_cmdlines((unsigned int)val);
4136         if (ret < 0)
4137                 return ret;
4138
4139         *ppos += cnt;
4140
4141         return cnt;
4142 }
4143
4144 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4145         .open           = tracing_open_generic,
4146         .read           = tracing_saved_cmdlines_size_read,
4147         .write          = tracing_saved_cmdlines_size_write,
4148 };
4149
4150 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
4151 static union trace_enum_map_item *
4152 update_enum_map(union trace_enum_map_item *ptr)
4153 {
4154         if (!ptr->map.enum_string) {
4155                 if (ptr->tail.next) {
4156                         ptr = ptr->tail.next;
4157                         /* Set ptr to the next real item (skip head) */
4158                         ptr++;
4159                 } else
4160                         return NULL;
4161         }
4162         return ptr;
4163 }
4164
4165 static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos)
4166 {
4167         union trace_enum_map_item *ptr = v;
4168
4169         /*
4170          * Paranoid! If ptr points to end, we don't want to increment past it.
4171          * This really should never happen.
4172          */
4173         ptr = update_enum_map(ptr);
4174         if (WARN_ON_ONCE(!ptr))
4175                 return NULL;
4176
4177         ptr++;
4178
4179         (*pos)++;
4180
4181         ptr = update_enum_map(ptr);
4182
4183         return ptr;
4184 }
4185
4186 static void *enum_map_start(struct seq_file *m, loff_t *pos)
4187 {
4188         union trace_enum_map_item *v;
4189         loff_t l = 0;
4190
4191         mutex_lock(&trace_enum_mutex);
4192
4193         v = trace_enum_maps;
4194         if (v)
4195                 v++;
4196
4197         while (v && l < *pos) {
4198                 v = enum_map_next(m, v, &l);
4199         }
4200
4201         return v;
4202 }
4203
4204 static void enum_map_stop(struct seq_file *m, void *v)
4205 {
4206         mutex_unlock(&trace_enum_mutex);
4207 }
4208
4209 static int enum_map_show(struct seq_file *m, void *v)
4210 {
4211         union trace_enum_map_item *ptr = v;
4212
4213         seq_printf(m, "%s %ld (%s)\n",
4214                    ptr->map.enum_string, ptr->map.enum_value,
4215                    ptr->map.system);
4216
4217         return 0;
4218 }
4219
4220 static const struct seq_operations tracing_enum_map_seq_ops = {
4221         .start          = enum_map_start,
4222         .next           = enum_map_next,
4223         .stop           = enum_map_stop,
4224         .show           = enum_map_show,
4225 };
4226
4227 static int tracing_enum_map_open(struct inode *inode, struct file *filp)
4228 {
4229         if (tracing_disabled)
4230                 return -ENODEV;
4231
4232         return seq_open(filp, &tracing_enum_map_seq_ops);
4233 }
4234
4235 static const struct file_operations tracing_enum_map_fops = {
4236         .open           = tracing_enum_map_open,
4237         .read           = seq_read,
4238         .llseek         = seq_lseek,
4239         .release        = seq_release,
4240 };
4241
4242 static inline union trace_enum_map_item *
4243 trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
4244 {
4245         /* Return tail of array given the head */
4246         return ptr + ptr->head.length + 1;
4247 }
4248
4249 static void
4250 trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
4251                            int len)
4252 {
4253         struct trace_enum_map **stop;
4254         struct trace_enum_map **map;
4255         union trace_enum_map_item *map_array;
4256         union trace_enum_map_item *ptr;
4257
4258         stop = start + len;
4259
4260         /*
4261          * The trace_enum_maps contains the map plus a head and tail item,
4262          * where the head holds the module and length of array, and the
4263          * tail holds a pointer to the next list.
4264          */
4265         map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4266         if (!map_array) {
4267                 pr_warn("Unable to allocate trace enum mapping\n");
4268                 return;
4269         }
4270
4271         mutex_lock(&trace_enum_mutex);
4272
4273         if (!trace_enum_maps)
4274                 trace_enum_maps = map_array;
4275         else {
4276                 ptr = trace_enum_maps;
4277                 for (;;) {
4278                         ptr = trace_enum_jmp_to_tail(ptr);
4279                         if (!ptr->tail.next)
4280                                 break;
4281                         ptr = ptr->tail.next;
4282
4283                 }
4284                 ptr->tail.next = map_array;
4285         }
4286         map_array->head.mod = mod;
4287         map_array->head.length = len;
4288         map_array++;
4289
4290         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4291                 map_array->map = **map;
4292                 map_array++;
4293         }
4294         memset(map_array, 0, sizeof(*map_array));
4295
4296         mutex_unlock(&trace_enum_mutex);
4297 }
4298
4299 static void trace_create_enum_file(struct dentry *d_tracer)
4300 {
4301         trace_create_file("enum_map", 0444, d_tracer,
4302                           NULL, &tracing_enum_map_fops);
4303 }
4304
4305 #else /* CONFIG_TRACE_ENUM_MAP_FILE */
4306 static inline void trace_create_enum_file(struct dentry *d_tracer) { }
4307 static inline void trace_insert_enum_map_file(struct module *mod,
4308                               struct trace_enum_map **start, int len) { }
4309 #endif /* !CONFIG_TRACE_ENUM_MAP_FILE */
4310
4311 static void trace_insert_enum_map(struct module *mod,
4312                                   struct trace_enum_map **start, int len)
4313 {
4314         struct trace_enum_map **map;
4315
4316         if (len <= 0)
4317                 return;
4318
4319         map = start;
4320
4321         trace_event_enum_update(map, len);
4322
4323         trace_insert_enum_map_file(mod, start, len);
4324 }
4325
4326 static ssize_t
4327 tracing_set_trace_read(struct file *filp, char __user *ubuf,
4328                        size_t cnt, loff_t *ppos)
4329 {
4330         struct trace_array *tr = filp->private_data;
4331         char buf[MAX_TRACER_SIZE+2];
4332         int r;
4333
4334         mutex_lock(&trace_types_lock);
4335         r = sprintf(buf, "%s\n", tr->current_trace->name);
4336         mutex_unlock(&trace_types_lock);
4337
4338         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4339 }
4340
4341 int tracer_init(struct tracer *t, struct trace_array *tr)
4342 {
4343         tracing_reset_online_cpus(&tr->trace_buffer);
4344         return t->init(tr);
4345 }
4346
4347 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4348 {
4349         int cpu;
4350
4351         for_each_tracing_cpu(cpu)
4352                 per_cpu_ptr(buf->data, cpu)->entries = val;
4353 }
4354
4355 #ifdef CONFIG_TRACER_MAX_TRACE
4356 /* resize @tr's buffer to the size of @size_tr's entries */
4357 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
4358                                         struct trace_buffer *size_buf, int cpu_id)
4359 {
4360         int cpu, ret = 0;
4361
4362         if (cpu_id == RING_BUFFER_ALL_CPUS) {
4363                 for_each_tracing_cpu(cpu) {
4364                         ret = ring_buffer_resize(trace_buf->buffer,
4365                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4366                         if (ret < 0)
4367                                 break;
4368                         per_cpu_ptr(trace_buf->data, cpu)->entries =
4369                                 per_cpu_ptr(size_buf->data, cpu)->entries;
4370                 }
4371         } else {
4372                 ret = ring_buffer_resize(trace_buf->buffer,
4373                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4374                 if (ret == 0)
4375                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
4376                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
4377         }
4378
4379         return ret;
4380 }
4381 #endif /* CONFIG_TRACER_MAX_TRACE */
4382
4383 static int __tracing_resize_ring_buffer(struct trace_array *tr,
4384                                         unsigned long size, int cpu)
4385 {
4386         int ret;
4387
4388         /*
4389          * If kernel or user changes the size of the ring buffer
4390          * we use the size that was given, and we can forget about
4391          * expanding it later.
4392          */
4393         ring_buffer_expanded = true;
4394
4395         /* May be called before buffers are initialized */
4396         if (!tr->trace_buffer.buffer)
4397                 return 0;
4398
4399         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
4400         if (ret < 0)
4401                 return ret;
4402
4403 #ifdef CONFIG_TRACER_MAX_TRACE
4404         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
4405             !tr->current_trace->use_max_tr)
4406                 goto out;
4407
4408         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
4409         if (ret < 0) {
4410                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
4411                                                      &tr->trace_buffer, cpu);
4412                 if (r < 0) {
4413                         /*
4414                          * AARGH! We are left with different
4415                          * size max buffer!!!!
4416                          * The max buffer is our "snapshot" buffer.
4417                          * When a tracer needs a snapshot (one of the
4418                          * latency tracers), it swaps the max buffer
4419                          * with the saved snap shot. We succeeded to
4420                          * update the size of the main buffer, but failed to
4421                          * update the size of the max buffer. But when we tried
4422                          * to reset the main buffer to the original size, we
4423                          * failed there too. This is very unlikely to
4424                          * happen, but if it does, warn and kill all
4425                          * tracing.
4426                          */
4427                         WARN_ON(1);
4428                         tracing_disabled = 1;
4429                 }
4430                 return ret;
4431         }
4432
4433         if (cpu == RING_BUFFER_ALL_CPUS)
4434                 set_buffer_entries(&tr->max_buffer, size);
4435         else
4436                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4437
4438  out:
4439 #endif /* CONFIG_TRACER_MAX_TRACE */
4440
4441         if (cpu == RING_BUFFER_ALL_CPUS)
4442                 set_buffer_entries(&tr->trace_buffer, size);
4443         else
4444                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4445
4446         return ret;
4447 }
4448
4449 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4450                                           unsigned long size, int cpu_id)
4451 {
4452         int ret = size;
4453
4454         mutex_lock(&trace_types_lock);
4455
4456         if (cpu_id != RING_BUFFER_ALL_CPUS) {
4457                 /* make sure, this cpu is enabled in the mask */
4458                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4459                         ret = -EINVAL;
4460                         goto out;
4461                 }
4462         }
4463
4464         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4465         if (ret < 0)
4466                 ret = -ENOMEM;
4467
4468 out:
4469         mutex_unlock(&trace_types_lock);
4470
4471         return ret;
4472 }
4473
4474
4475 /**
4476  * tracing_update_buffers - used by tracing facility to expand ring buffers
4477  *
4478  * To save on memory when the tracing is never used on a system with it
4479  * configured in. The ring buffers are set to a minimum size. But once
4480  * a user starts to use the tracing facility, then they need to grow
4481  * to their default size.
4482  *
4483  * This function is to be called when a tracer is about to be used.
4484  */
4485 int tracing_update_buffers(void)
4486 {
4487         int ret = 0;
4488
4489         mutex_lock(&trace_types_lock);
4490         if (!ring_buffer_expanded)
4491                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4492                                                 RING_BUFFER_ALL_CPUS);
4493         mutex_unlock(&trace_types_lock);
4494
4495         return ret;
4496 }
4497
4498 struct trace_option_dentry;
4499
4500 static void
4501 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4502
4503 /*
4504  * Used to clear out the tracer before deletion of an instance.
4505  * Must have trace_types_lock held.
4506  */
4507 static void tracing_set_nop(struct trace_array *tr)
4508 {
4509         if (tr->current_trace == &nop_trace)
4510                 return;
4511         
4512         tr->current_trace->enabled--;
4513
4514         if (tr->current_trace->reset)
4515                 tr->current_trace->reset(tr);
4516
4517         tr->current_trace = &nop_trace;
4518 }
4519
4520 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
4521 {
4522         /* Only enable if the directory has been created already. */
4523         if (!tr->dir)
4524                 return;
4525
4526         create_trace_option_files(tr, t);
4527 }
4528
4529 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
4530 {
4531         struct tracer *t;
4532 #ifdef CONFIG_TRACER_MAX_TRACE
4533         bool had_max_tr;
4534 #endif
4535         int ret = 0;
4536
4537         mutex_lock(&trace_types_lock);
4538
4539         if (!ring_buffer_expanded) {
4540                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
4541                                                 RING_BUFFER_ALL_CPUS);
4542                 if (ret < 0)
4543                         goto out;
4544                 ret = 0;
4545         }
4546
4547         for (t = trace_types; t; t = t->next) {
4548                 if (strcmp(t->name, buf) == 0)
4549                         break;
4550         }
4551         if (!t) {
4552                 ret = -EINVAL;
4553                 goto out;
4554         }
4555         if (t == tr->current_trace)
4556                 goto out;
4557
4558         /* Some tracers are only allowed for the top level buffer */
4559         if (!trace_ok_for_array(t, tr)) {
4560                 ret = -EINVAL;
4561                 goto out;
4562         }
4563
4564         /* If trace pipe files are being read, we can't change the tracer */
4565         if (tr->current_trace->ref) {
4566                 ret = -EBUSY;
4567                 goto out;
4568         }
4569
4570         trace_branch_disable();
4571
4572         tr->current_trace->enabled--;
4573
4574         if (tr->current_trace->reset)
4575                 tr->current_trace->reset(tr);
4576
4577         /* Current trace needs to be nop_trace before synchronize_sched */
4578         tr->current_trace = &nop_trace;
4579
4580 #ifdef CONFIG_TRACER_MAX_TRACE
4581         had_max_tr = tr->allocated_snapshot;
4582
4583         if (had_max_tr && !t->use_max_tr) {
4584                 /*
4585                  * We need to make sure that the update_max_tr sees that
4586                  * current_trace changed to nop_trace to keep it from
4587                  * swapping the buffers after we resize it.
4588                  * The update_max_tr is called from interrupts disabled
4589                  * so a synchronized_sched() is sufficient.
4590                  */
4591                 synchronize_sched();
4592                 free_snapshot(tr);
4593         }
4594 #endif
4595
4596 #ifdef CONFIG_TRACER_MAX_TRACE
4597         if (t->use_max_tr && !had_max_tr) {
4598                 ret = alloc_snapshot(tr);
4599                 if (ret < 0)
4600                         goto out;
4601         }
4602 #endif
4603
4604         if (t->init) {
4605                 ret = tracer_init(t, tr);
4606                 if (ret)
4607                         goto out;
4608         }
4609
4610         tr->current_trace = t;
4611         tr->current_trace->enabled++;
4612         trace_branch_enable(tr);
4613  out:
4614         mutex_unlock(&trace_types_lock);
4615
4616         return ret;
4617 }
4618
4619 static ssize_t
4620 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
4621                         size_t cnt, loff_t *ppos)
4622 {
4623         struct trace_array *tr = filp->private_data;
4624         char buf[MAX_TRACER_SIZE+1];
4625         int i;
4626         size_t ret;
4627         int err;
4628
4629         ret = cnt;
4630
4631         if (cnt > MAX_TRACER_SIZE)
4632                 cnt = MAX_TRACER_SIZE;
4633
4634         if (copy_from_user(buf, ubuf, cnt))
4635                 return -EFAULT;
4636
4637         buf[cnt] = 0;
4638
4639         /* strip ending whitespace. */
4640         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4641                 buf[i] = 0;
4642
4643         err = tracing_set_tracer(tr, buf);
4644         if (err)
4645                 return err;
4646
4647         *ppos += ret;
4648
4649         return ret;
4650 }
4651
4652 static ssize_t
4653 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
4654                    size_t cnt, loff_t *ppos)
4655 {
4656         char buf[64];
4657         int r;
4658
4659         r = snprintf(buf, sizeof(buf), "%ld\n",
4660                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4661         if (r > sizeof(buf))
4662                 r = sizeof(buf);
4663         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4664 }
4665
4666 static ssize_t
4667 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
4668                     size_t cnt, loff_t *ppos)
4669 {
4670         unsigned long val;
4671         int ret;
4672
4673         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4674         if (ret)
4675                 return ret;
4676
4677         *ptr = val * 1000;
4678
4679         return cnt;
4680 }
4681
4682 static ssize_t
4683 tracing_thresh_read(struct file *filp, char __user *ubuf,
4684                     size_t cnt, loff_t *ppos)
4685 {
4686         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
4687 }
4688
4689 static ssize_t
4690 tracing_thresh_write(struct file *filp, const char __user *ubuf,
4691                      size_t cnt, loff_t *ppos)
4692 {
4693         struct trace_array *tr = filp->private_data;
4694         int ret;
4695
4696         mutex_lock(&trace_types_lock);
4697         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
4698         if (ret < 0)
4699                 goto out;
4700
4701         if (tr->current_trace->update_thresh) {
4702                 ret = tr->current_trace->update_thresh(tr);
4703                 if (ret < 0)
4704                         goto out;
4705         }
4706
4707         ret = cnt;
4708 out:
4709         mutex_unlock(&trace_types_lock);
4710
4711         return ret;
4712 }
4713
4714 #ifdef CONFIG_TRACER_MAX_TRACE
4715
4716 static ssize_t
4717 tracing_max_lat_read(struct file *filp, char __user *ubuf,
4718                      size_t cnt, loff_t *ppos)
4719 {
4720         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
4721 }
4722
4723 static ssize_t
4724 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4725                       size_t cnt, loff_t *ppos)
4726 {
4727         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
4728 }
4729
4730 #endif
4731
4732 static int tracing_open_pipe(struct inode *inode, struct file *filp)
4733 {
4734         struct trace_array *tr = inode->i_private;
4735         struct trace_iterator *iter;
4736         int ret = 0;
4737
4738         if (tracing_disabled)
4739                 return -ENODEV;
4740
4741         if (trace_array_get(tr) < 0)
4742                 return -ENODEV;
4743
4744         mutex_lock(&trace_types_lock);
4745
4746         /* create a buffer to store the information to pass to userspace */
4747         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4748         if (!iter) {
4749                 ret = -ENOMEM;
4750                 __trace_array_put(tr);
4751                 goto out;
4752         }
4753
4754         trace_seq_init(&iter->seq);
4755         iter->trace = tr->current_trace;
4756
4757         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
4758                 ret = -ENOMEM;
4759                 goto fail;
4760         }
4761
4762         /* trace pipe does not show start of buffer */
4763         cpumask_setall(iter->started);
4764
4765         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4766                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4767
4768         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4769         if (trace_clocks[tr->clock_id].in_ns)
4770                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4771
4772         iter->tr = tr;
4773         iter->trace_buffer = &tr->trace_buffer;
4774         iter->cpu_file = tracing_get_cpu(inode);
4775         mutex_init(&iter->mutex);
4776         filp->private_data = iter;
4777
4778         if (iter->trace->pipe_open)
4779                 iter->trace->pipe_open(iter);
4780
4781         nonseekable_open(inode, filp);
4782
4783         tr->current_trace->ref++;
4784 out:
4785         mutex_unlock(&trace_types_lock);
4786         return ret;
4787
4788 fail:
4789         kfree(iter->trace);
4790         kfree(iter);
4791         __trace_array_put(tr);
4792         mutex_unlock(&trace_types_lock);
4793         return ret;
4794 }
4795
4796 static int tracing_release_pipe(struct inode *inode, struct file *file)
4797 {
4798         struct trace_iterator *iter = file->private_data;
4799         struct trace_array *tr = inode->i_private;
4800
4801         mutex_lock(&trace_types_lock);
4802
4803         tr->current_trace->ref--;
4804
4805         if (iter->trace->pipe_close)
4806                 iter->trace->pipe_close(iter);
4807
4808         mutex_unlock(&trace_types_lock);
4809
4810         free_cpumask_var(iter->started);
4811         mutex_destroy(&iter->mutex);
4812         kfree(iter);
4813
4814         trace_array_put(tr);
4815
4816         return 0;
4817 }
4818
4819 static unsigned int
4820 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
4821 {
4822         struct trace_array *tr = iter->tr;
4823
4824         /* Iterators are static, they should be filled or empty */
4825         if (trace_buffer_iter(iter, iter->cpu_file))
4826                 return POLLIN | POLLRDNORM;
4827
4828         if (tr->trace_flags & TRACE_ITER_BLOCK)
4829                 /*
4830                  * Always select as readable when in blocking mode
4831                  */
4832                 return POLLIN | POLLRDNORM;
4833         else
4834                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
4835                                              filp, poll_table);
4836 }
4837
4838 static unsigned int
4839 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
4840 {
4841         struct trace_iterator *iter = filp->private_data;
4842
4843         return trace_poll(iter, filp, poll_table);
4844 }
4845
4846 /* Must be called with iter->mutex held. */
4847 static int tracing_wait_pipe(struct file *filp)
4848 {
4849         struct trace_iterator *iter = filp->private_data;
4850         int ret;
4851
4852         while (trace_empty(iter)) {
4853
4854                 if ((filp->f_flags & O_NONBLOCK)) {
4855                         return -EAGAIN;
4856                 }
4857
4858                 /*
4859                  * We block until we read something and tracing is disabled.
4860                  * We still block if tracing is disabled, but we have never
4861                  * read anything. This allows a user to cat this file, and
4862                  * then enable tracing. But after we have read something,
4863                  * we give an EOF when tracing is again disabled.
4864                  *
4865                  * iter->pos will be 0 if we haven't read anything.
4866                  */
4867                 if (!tracing_is_on() && iter->pos)
4868                         break;
4869
4870                 mutex_unlock(&iter->mutex);
4871
4872                 ret = wait_on_pipe(iter, false);
4873
4874                 mutex_lock(&iter->mutex);
4875
4876                 if (ret)
4877                         return ret;
4878         }
4879
4880         return 1;
4881 }
4882
4883 /*
4884  * Consumer reader.
4885  */
4886 static ssize_t
4887 tracing_read_pipe(struct file *filp, char __user *ubuf,
4888                   size_t cnt, loff_t *ppos)
4889 {
4890         struct trace_iterator *iter = filp->private_data;
4891         ssize_t sret;
4892
4893         /* return any leftover data */
4894         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4895         if (sret != -EBUSY)
4896                 return sret;
4897
4898         trace_seq_init(&iter->seq);
4899
4900         /*
4901          * Avoid more than one consumer on a single file descriptor
4902          * This is just a matter of traces coherency, the ring buffer itself
4903          * is protected.
4904          */
4905         mutex_lock(&iter->mutex);
4906         if (iter->trace->read) {
4907                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
4908                 if (sret)
4909                         goto out;
4910         }
4911
4912 waitagain:
4913         sret = tracing_wait_pipe(filp);
4914         if (sret <= 0)
4915                 goto out;
4916
4917         /* stop when tracing is finished */
4918         if (trace_empty(iter)) {
4919                 sret = 0;
4920                 goto out;
4921         }
4922
4923         if (cnt >= PAGE_SIZE)
4924                 cnt = PAGE_SIZE - 1;
4925
4926         /* reset all but tr, trace, and overruns */
4927         memset(&iter->seq, 0,
4928                sizeof(struct trace_iterator) -
4929                offsetof(struct trace_iterator, seq));
4930         cpumask_clear(iter->started);
4931         iter->pos = -1;
4932
4933         trace_event_read_lock();
4934         trace_access_lock(iter->cpu_file);
4935         while (trace_find_next_entry_inc(iter) != NULL) {
4936                 enum print_line_t ret;
4937                 int save_len = iter->seq.seq.len;
4938
4939                 ret = print_trace_line(iter);
4940                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4941                         /* don't print partial lines */
4942                         iter->seq.seq.len = save_len;
4943                         break;
4944                 }
4945                 if (ret != TRACE_TYPE_NO_CONSUME)
4946                         trace_consume(iter);
4947
4948                 if (trace_seq_used(&iter->seq) >= cnt)
4949                         break;
4950
4951                 /*
4952                  * Setting the full flag means we reached the trace_seq buffer
4953                  * size and we should leave by partial output condition above.
4954                  * One of the trace_seq_* functions is not used properly.
4955                  */
4956                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
4957                           iter->ent->type);
4958         }
4959         trace_access_unlock(iter->cpu_file);
4960         trace_event_read_unlock();
4961
4962         /* Now copy what we have to the user */
4963         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4964         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
4965                 trace_seq_init(&iter->seq);
4966
4967         /*
4968          * If there was nothing to send to user, in spite of consuming trace
4969          * entries, go back to wait for more entries.
4970          */
4971         if (sret == -EBUSY)
4972                 goto waitagain;
4973
4974 out:
4975         mutex_unlock(&iter->mutex);
4976
4977         return sret;
4978 }
4979
4980 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
4981                                      unsigned int idx)
4982 {
4983         __free_page(spd->pages[idx]);
4984 }
4985
4986 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
4987         .can_merge              = 0,
4988         .confirm                = generic_pipe_buf_confirm,
4989         .release                = generic_pipe_buf_release,
4990         .steal                  = generic_pipe_buf_steal,
4991         .get                    = generic_pipe_buf_get,
4992 };
4993
4994 static size_t
4995 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
4996 {
4997         size_t count;
4998         int save_len;
4999         int ret;
5000
5001         /* Seq buffer is page-sized, exactly what we need. */
5002         for (;;) {
5003                 save_len = iter->seq.seq.len;
5004                 ret = print_trace_line(iter);
5005
5006                 if (trace_seq_has_overflowed(&iter->seq)) {
5007                         iter->seq.seq.len = save_len;
5008                         break;
5009                 }
5010
5011                 /*
5012                  * This should not be hit, because it should only
5013                  * be set if the iter->seq overflowed. But check it
5014                  * anyway to be safe.
5015                  */
5016                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5017                         iter->seq.seq.len = save_len;
5018                         break;
5019                 }
5020
5021                 count = trace_seq_used(&iter->seq) - save_len;
5022                 if (rem < count) {
5023                         rem = 0;
5024                         iter->seq.seq.len = save_len;
5025                         break;
5026                 }
5027
5028                 if (ret != TRACE_TYPE_NO_CONSUME)
5029                         trace_consume(iter);
5030                 rem -= count;
5031                 if (!trace_find_next_entry_inc(iter))   {
5032                         rem = 0;
5033                         iter->ent = NULL;
5034                         break;
5035                 }
5036         }
5037
5038         return rem;
5039 }
5040
5041 static ssize_t tracing_splice_read_pipe(struct file *filp,
5042                                         loff_t *ppos,
5043                                         struct pipe_inode_info *pipe,
5044                                         size_t len,
5045                                         unsigned int flags)
5046 {
5047         struct page *pages_def[PIPE_DEF_BUFFERS];
5048         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5049         struct trace_iterator *iter = filp->private_data;
5050         struct splice_pipe_desc spd = {
5051                 .pages          = pages_def,
5052                 .partial        = partial_def,
5053                 .nr_pages       = 0, /* This gets updated below. */
5054                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5055                 .flags          = flags,
5056                 .ops            = &tracing_pipe_buf_ops,
5057                 .spd_release    = tracing_spd_release_pipe,
5058         };
5059         ssize_t ret;
5060         size_t rem;
5061         unsigned int i;
5062
5063         if (splice_grow_spd(pipe, &spd))
5064                 return -ENOMEM;
5065
5066         mutex_lock(&iter->mutex);
5067
5068         if (iter->trace->splice_read) {
5069                 ret = iter->trace->splice_read(iter, filp,
5070                                                ppos, pipe, len, flags);
5071                 if (ret)
5072                         goto out_err;
5073         }
5074
5075         ret = tracing_wait_pipe(filp);
5076         if (ret <= 0)
5077                 goto out_err;
5078
5079         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5080                 ret = -EFAULT;
5081                 goto out_err;
5082         }
5083
5084         trace_event_read_lock();
5085         trace_access_lock(iter->cpu_file);
5086
5087         /* Fill as many pages as possible. */
5088         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5089                 spd.pages[i] = alloc_page(GFP_KERNEL);
5090                 if (!spd.pages[i])
5091                         break;
5092
5093                 rem = tracing_fill_pipe_page(rem, iter);
5094
5095                 /* Copy the data into the page, so we can start over. */
5096                 ret = trace_seq_to_buffer(&iter->seq,
5097                                           page_address(spd.pages[i]),
5098                                           trace_seq_used(&iter->seq));
5099                 if (ret < 0) {
5100                         __free_page(spd.pages[i]);
5101                         break;
5102                 }
5103                 spd.partial[i].offset = 0;
5104                 spd.partial[i].len = trace_seq_used(&iter->seq);
5105
5106                 trace_seq_init(&iter->seq);
5107         }
5108
5109         trace_access_unlock(iter->cpu_file);
5110         trace_event_read_unlock();
5111         mutex_unlock(&iter->mutex);
5112
5113         spd.nr_pages = i;
5114
5115         if (i)
5116                 ret = splice_to_pipe(pipe, &spd);
5117         else
5118                 ret = 0;
5119 out:
5120         splice_shrink_spd(&spd);
5121         return ret;
5122
5123 out_err:
5124         mutex_unlock(&iter->mutex);
5125         goto out;
5126 }
5127
5128 static ssize_t
5129 tracing_entries_read(struct file *filp, char __user *ubuf,
5130                      size_t cnt, loff_t *ppos)
5131 {
5132         struct inode *inode = file_inode(filp);
5133         struct trace_array *tr = inode->i_private;
5134         int cpu = tracing_get_cpu(inode);
5135         char buf[64];
5136         int r = 0;
5137         ssize_t ret;
5138
5139         mutex_lock(&trace_types_lock);
5140
5141         if (cpu == RING_BUFFER_ALL_CPUS) {
5142                 int cpu, buf_size_same;
5143                 unsigned long size;
5144
5145                 size = 0;
5146                 buf_size_same = 1;
5147                 /* check if all cpu sizes are same */
5148                 for_each_tracing_cpu(cpu) {
5149                         /* fill in the size from first enabled cpu */
5150                         if (size == 0)
5151                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5152                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5153                                 buf_size_same = 0;
5154                                 break;
5155                         }
5156                 }
5157
5158                 if (buf_size_same) {
5159                         if (!ring_buffer_expanded)
5160                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
5161                                             size >> 10,
5162                                             trace_buf_size >> 10);
5163                         else
5164                                 r = sprintf(buf, "%lu\n", size >> 10);
5165                 } else
5166                         r = sprintf(buf, "X\n");
5167         } else
5168                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5169
5170         mutex_unlock(&trace_types_lock);
5171
5172         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5173         return ret;
5174 }
5175
5176 static ssize_t
5177 tracing_entries_write(struct file *filp, const char __user *ubuf,
5178                       size_t cnt, loff_t *ppos)
5179 {
5180         struct inode *inode = file_inode(filp);
5181         struct trace_array *tr = inode->i_private;
5182         unsigned long val;
5183         int ret;
5184
5185         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5186         if (ret)
5187                 return ret;
5188
5189         /* must have at least 1 entry */
5190         if (!val)
5191                 return -EINVAL;
5192
5193         /* value is in KB */
5194         val <<= 10;
5195         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5196         if (ret < 0)
5197                 return ret;
5198
5199         *ppos += cnt;
5200
5201         return cnt;
5202 }
5203
5204 static ssize_t
5205 tracing_total_entries_read(struct file *filp, char __user *ubuf,
5206                                 size_t cnt, loff_t *ppos)
5207 {
5208         struct trace_array *tr = filp->private_data;
5209         char buf[64];
5210         int r, cpu;
5211         unsigned long size = 0, expanded_size = 0;
5212
5213         mutex_lock(&trace_types_lock);
5214         for_each_tracing_cpu(cpu) {
5215                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5216                 if (!ring_buffer_expanded)
5217                         expanded_size += trace_buf_size >> 10;
5218         }
5219         if (ring_buffer_expanded)
5220                 r = sprintf(buf, "%lu\n", size);
5221         else
5222                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5223         mutex_unlock(&trace_types_lock);
5224
5225         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5226 }
5227
5228 static ssize_t
5229 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5230                           size_t cnt, loff_t *ppos)
5231 {
5232         /*
5233          * There is no need to read what the user has written, this function
5234          * is just to make sure that there is no error when "echo" is used
5235          */
5236
5237         *ppos += cnt;
5238
5239         return cnt;
5240 }
5241
5242 static int
5243 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5244 {
5245         struct trace_array *tr = inode->i_private;
5246
5247         /* disable tracing ? */
5248         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5249                 tracer_tracing_off(tr);
5250         /* resize the ring buffer to 0 */
5251         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5252
5253         trace_array_put(tr);
5254
5255         return 0;
5256 }
5257
5258 static ssize_t
5259 tracing_mark_write(struct file *filp, const char __user *ubuf,
5260                                         size_t cnt, loff_t *fpos)
5261 {
5262         unsigned long addr = (unsigned long)ubuf;
5263         struct trace_array *tr = filp->private_data;
5264         struct ring_buffer_event *event;
5265         struct ring_buffer *buffer;
5266         struct print_entry *entry;
5267         unsigned long irq_flags;
5268         struct page *pages[2];
5269         void *map_page[2];
5270         int nr_pages = 1;
5271         ssize_t written;
5272         int offset;
5273         int size;
5274         int len;
5275         int ret;
5276         int i;
5277
5278         if (tracing_disabled)
5279                 return -EINVAL;
5280
5281         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5282                 return -EINVAL;
5283
5284         if (cnt > TRACE_BUF_SIZE)
5285                 cnt = TRACE_BUF_SIZE;
5286
5287         /*
5288          * Userspace is injecting traces into the kernel trace buffer.
5289          * We want to be as non intrusive as possible.
5290          * To do so, we do not want to allocate any special buffers
5291          * or take any locks, but instead write the userspace data
5292          * straight into the ring buffer.
5293          *
5294          * First we need to pin the userspace buffer into memory,
5295          * which, most likely it is, because it just referenced it.
5296          * But there's no guarantee that it is. By using get_user_pages_fast()
5297          * and kmap_atomic/kunmap_atomic() we can get access to the
5298          * pages directly. We then write the data directly into the
5299          * ring buffer.
5300          */
5301         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5302
5303         /* check if we cross pages */
5304         if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
5305                 nr_pages = 2;
5306
5307         offset = addr & (PAGE_SIZE - 1);
5308         addr &= PAGE_MASK;
5309
5310         ret = get_user_pages_fast(addr, nr_pages, 0, pages);
5311         if (ret < nr_pages) {
5312                 while (--ret >= 0)
5313                         put_page(pages[ret]);
5314                 written = -EFAULT;
5315                 goto out;
5316         }
5317
5318         for (i = 0; i < nr_pages; i++)
5319                 map_page[i] = kmap_atomic(pages[i]);
5320
5321         local_save_flags(irq_flags);
5322         size = sizeof(*entry) + cnt + 2; /* possible \n added */
5323         buffer = tr->trace_buffer.buffer;
5324         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5325                                           irq_flags, preempt_count());
5326         if (!event) {
5327                 /* Ring buffer disabled, return as if not open for write */
5328                 written = -EBADF;
5329                 goto out_unlock;
5330         }
5331
5332         entry = ring_buffer_event_data(event);
5333         entry->ip = _THIS_IP_;
5334
5335         if (nr_pages == 2) {
5336                 len = PAGE_SIZE - offset;
5337                 memcpy(&entry->buf, map_page[0] + offset, len);
5338                 memcpy(&entry->buf[len], map_page[1], cnt - len);
5339         } else
5340                 memcpy(&entry->buf, map_page[0] + offset, cnt);
5341
5342         if (entry->buf[cnt - 1] != '\n') {
5343                 entry->buf[cnt] = '\n';
5344                 entry->buf[cnt + 1] = '\0';
5345         } else
5346                 entry->buf[cnt] = '\0';
5347
5348         __buffer_unlock_commit(buffer, event);
5349
5350         written = cnt;
5351
5352         *fpos += written;
5353
5354  out_unlock:
5355         for (i = nr_pages - 1; i >= 0; i--) {
5356                 kunmap_atomic(map_page[i]);
5357                 put_page(pages[i]);
5358         }
5359  out:
5360         return written;
5361 }
5362
5363 static int tracing_clock_show(struct seq_file *m, void *v)
5364 {
5365         struct trace_array *tr = m->private;
5366         int i;
5367
5368         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
5369                 seq_printf(m,
5370                         "%s%s%s%s", i ? " " : "",
5371                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
5372                         i == tr->clock_id ? "]" : "");
5373         seq_putc(m, '\n');
5374
5375         return 0;
5376 }
5377
5378 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
5379 {
5380         int i;
5381
5382         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
5383                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
5384                         break;
5385         }
5386         if (i == ARRAY_SIZE(trace_clocks))
5387                 return -EINVAL;
5388
5389         mutex_lock(&trace_types_lock);
5390
5391         tr->clock_id = i;
5392
5393         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
5394
5395         /*
5396          * New clock may not be consistent with the previous clock.
5397          * Reset the buffer so that it doesn't have incomparable timestamps.
5398          */
5399         tracing_reset_online_cpus(&tr->trace_buffer);
5400
5401 #ifdef CONFIG_TRACER_MAX_TRACE
5402         if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
5403                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
5404         tracing_reset_online_cpus(&tr->max_buffer);
5405 #endif
5406
5407         mutex_unlock(&trace_types_lock);
5408
5409         return 0;
5410 }
5411
5412 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
5413                                    size_t cnt, loff_t *fpos)
5414 {
5415         struct seq_file *m = filp->private_data;
5416         struct trace_array *tr = m->private;
5417         char buf[64];
5418         const char *clockstr;
5419         int ret;
5420
5421         if (cnt >= sizeof(buf))
5422                 return -EINVAL;
5423
5424         if (copy_from_user(buf, ubuf, cnt))
5425                 return -EFAULT;
5426
5427         buf[cnt] = 0;
5428
5429         clockstr = strstrip(buf);
5430
5431         ret = tracing_set_clock(tr, clockstr);
5432         if (ret)
5433                 return ret;
5434
5435         *fpos += cnt;
5436
5437         return cnt;
5438 }
5439
5440 static int tracing_clock_open(struct inode *inode, struct file *file)
5441 {
5442         struct trace_array *tr = inode->i_private;
5443         int ret;
5444
5445         if (tracing_disabled)
5446                 return -ENODEV;
5447
5448         if (trace_array_get(tr))
5449                 return -ENODEV;
5450
5451         ret = single_open(file, tracing_clock_show, inode->i_private);
5452         if (ret < 0)
5453                 trace_array_put(tr);
5454
5455         return ret;
5456 }
5457
5458 struct ftrace_buffer_info {
5459         struct trace_iterator   iter;
5460         void                    *spare;
5461         unsigned int            read;
5462 };
5463
5464 #ifdef CONFIG_TRACER_SNAPSHOT
5465 static int tracing_snapshot_open(struct inode *inode, struct file *file)
5466 {
5467         struct trace_array *tr = inode->i_private;
5468         struct trace_iterator *iter;
5469         struct seq_file *m;
5470         int ret = 0;
5471
5472         if (trace_array_get(tr) < 0)
5473                 return -ENODEV;
5474
5475         if (file->f_mode & FMODE_READ) {
5476                 iter = __tracing_open(inode, file, true);
5477                 if (IS_ERR(iter))
5478                         ret = PTR_ERR(iter);
5479         } else {
5480                 /* Writes still need the seq_file to hold the private data */
5481                 ret = -ENOMEM;
5482                 m = kzalloc(sizeof(*m), GFP_KERNEL);
5483                 if (!m)
5484                         goto out;
5485                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5486                 if (!iter) {
5487                         kfree(m);
5488                         goto out;
5489                 }
5490                 ret = 0;
5491
5492                 iter->tr = tr;
5493                 iter->trace_buffer = &tr->max_buffer;
5494                 iter->cpu_file = tracing_get_cpu(inode);
5495                 m->private = iter;
5496                 file->private_data = m;
5497         }
5498 out:
5499         if (ret < 0)
5500                 trace_array_put(tr);
5501
5502         return ret;
5503 }
5504
5505 static ssize_t
5506 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
5507                        loff_t *ppos)
5508 {
5509         struct seq_file *m = filp->private_data;
5510         struct trace_iterator *iter = m->private;
5511         struct trace_array *tr = iter->tr;
5512         unsigned long val;
5513         int ret;
5514
5515         ret = tracing_update_buffers();
5516         if (ret < 0)
5517                 return ret;
5518
5519         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5520         if (ret)
5521                 return ret;
5522
5523         mutex_lock(&trace_types_lock);
5524
5525         if (tr->current_trace->use_max_tr) {
5526                 ret = -EBUSY;
5527                 goto out;
5528         }
5529
5530         switch (val) {
5531         case 0:
5532                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5533                         ret = -EINVAL;
5534                         break;
5535                 }
5536                 if (tr->allocated_snapshot)
5537                         free_snapshot(tr);
5538                 break;
5539         case 1:
5540 /* Only allow per-cpu swap if the ring buffer supports it */
5541 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
5542                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5543                         ret = -EINVAL;
5544                         break;
5545                 }
5546 #endif
5547                 if (!tr->allocated_snapshot) {
5548                         ret = alloc_snapshot(tr);
5549                         if (ret < 0)
5550                                 break;
5551                 }
5552                 local_irq_disable();
5553                 /* Now, we're going to swap */
5554                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5555                         update_max_tr(tr, current, smp_processor_id());
5556                 else
5557                         update_max_tr_single(tr, current, iter->cpu_file);
5558                 local_irq_enable();
5559                 break;
5560         default:
5561                 if (tr->allocated_snapshot) {
5562                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5563                                 tracing_reset_online_cpus(&tr->max_buffer);
5564                         else
5565                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
5566                 }
5567                 break;
5568         }
5569
5570         if (ret >= 0) {
5571                 *ppos += cnt;
5572                 ret = cnt;
5573         }
5574 out:
5575         mutex_unlock(&trace_types_lock);
5576         return ret;
5577 }
5578
5579 static int tracing_snapshot_release(struct inode *inode, struct file *file)
5580 {
5581         struct seq_file *m = file->private_data;
5582         int ret;
5583
5584         ret = tracing_release(inode, file);
5585
5586         if (file->f_mode & FMODE_READ)
5587                 return ret;
5588
5589         /* If write only, the seq_file is just a stub */
5590         if (m)
5591                 kfree(m->private);
5592         kfree(m);
5593
5594         return 0;
5595 }
5596
5597 static int tracing_buffers_open(struct inode *inode, struct file *filp);
5598 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
5599                                     size_t count, loff_t *ppos);
5600 static int tracing_buffers_release(struct inode *inode, struct file *file);
5601 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5602                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
5603
5604 static int snapshot_raw_open(struct inode *inode, struct file *filp)
5605 {
5606         struct ftrace_buffer_info *info;
5607         int ret;
5608
5609         ret = tracing_buffers_open(inode, filp);
5610         if (ret < 0)
5611                 return ret;
5612
5613         info = filp->private_data;
5614
5615         if (info->iter.trace->use_max_tr) {
5616                 tracing_buffers_release(inode, filp);
5617                 return -EBUSY;
5618         }
5619
5620         info->iter.snapshot = true;
5621         info->iter.trace_buffer = &info->iter.tr->max_buffer;
5622
5623         return ret;
5624 }
5625
5626 #endif /* CONFIG_TRACER_SNAPSHOT */
5627
5628
5629 static const struct file_operations tracing_thresh_fops = {
5630         .open           = tracing_open_generic,
5631         .read           = tracing_thresh_read,
5632         .write          = tracing_thresh_write,
5633         .llseek         = generic_file_llseek,
5634 };
5635
5636 #ifdef CONFIG_TRACER_MAX_TRACE
5637 static const struct file_operations tracing_max_lat_fops = {
5638         .open           = tracing_open_generic,
5639         .read           = tracing_max_lat_read,
5640         .write          = tracing_max_lat_write,
5641         .llseek         = generic_file_llseek,
5642 };
5643 #endif
5644
5645 static const struct file_operations set_tracer_fops = {
5646         .open           = tracing_open_generic,
5647         .read           = tracing_set_trace_read,
5648         .write          = tracing_set_trace_write,
5649         .llseek         = generic_file_llseek,
5650 };
5651
5652 static const struct file_operations tracing_pipe_fops = {
5653         .open           = tracing_open_pipe,
5654         .poll           = tracing_poll_pipe,
5655         .read           = tracing_read_pipe,
5656         .splice_read    = tracing_splice_read_pipe,
5657         .release        = tracing_release_pipe,
5658         .llseek         = no_llseek,
5659 };
5660
5661 static const struct file_operations tracing_entries_fops = {
5662         .open           = tracing_open_generic_tr,
5663         .read           = tracing_entries_read,
5664         .write          = tracing_entries_write,
5665         .llseek         = generic_file_llseek,
5666         .release        = tracing_release_generic_tr,
5667 };
5668
5669 static const struct file_operations tracing_total_entries_fops = {
5670         .open           = tracing_open_generic_tr,
5671         .read           = tracing_total_entries_read,
5672         .llseek         = generic_file_llseek,
5673         .release        = tracing_release_generic_tr,
5674 };
5675
5676 static const struct file_operations tracing_free_buffer_fops = {
5677         .open           = tracing_open_generic_tr,
5678         .write          = tracing_free_buffer_write,
5679         .release        = tracing_free_buffer_release,
5680 };
5681
5682 static const struct file_operations tracing_mark_fops = {
5683         .open           = tracing_open_generic_tr,
5684         .write          = tracing_mark_write,
5685         .llseek         = generic_file_llseek,
5686         .release        = tracing_release_generic_tr,
5687 };
5688
5689 static const struct file_operations trace_clock_fops = {
5690         .open           = tracing_clock_open,
5691         .read           = seq_read,
5692         .llseek         = seq_lseek,
5693         .release        = tracing_single_release_tr,
5694         .write          = tracing_clock_write,
5695 };
5696
5697 #ifdef CONFIG_TRACER_SNAPSHOT
5698 static const struct file_operations snapshot_fops = {
5699         .open           = tracing_snapshot_open,
5700         .read           = seq_read,
5701         .write          = tracing_snapshot_write,
5702         .llseek         = tracing_lseek,
5703         .release        = tracing_snapshot_release,
5704 };
5705
5706 static const struct file_operations snapshot_raw_fops = {
5707         .open           = snapshot_raw_open,
5708         .read           = tracing_buffers_read,
5709         .release        = tracing_buffers_release,
5710         .splice_read    = tracing_buffers_splice_read,
5711         .llseek         = no_llseek,
5712 };
5713
5714 #endif /* CONFIG_TRACER_SNAPSHOT */
5715
5716 static int tracing_buffers_open(struct inode *inode, struct file *filp)
5717 {
5718         struct trace_array *tr = inode->i_private;
5719         struct ftrace_buffer_info *info;
5720         int ret;
5721
5722         if (tracing_disabled)
5723                 return -ENODEV;
5724
5725         if (trace_array_get(tr) < 0)
5726                 return -ENODEV;
5727
5728         info = kzalloc(sizeof(*info), GFP_KERNEL);
5729         if (!info) {
5730                 trace_array_put(tr);
5731                 return -ENOMEM;
5732         }
5733
5734         mutex_lock(&trace_types_lock);
5735
5736         info->iter.tr           = tr;
5737         info->iter.cpu_file     = tracing_get_cpu(inode);
5738         info->iter.trace        = tr->current_trace;
5739         info->iter.trace_buffer = &tr->trace_buffer;
5740         info->spare             = NULL;
5741         /* Force reading ring buffer for first read */
5742         info->read              = (unsigned int)-1;
5743
5744         filp->private_data = info;
5745
5746         tr->current_trace->ref++;
5747
5748         mutex_unlock(&trace_types_lock);
5749
5750         ret = nonseekable_open(inode, filp);
5751         if (ret < 0)
5752                 trace_array_put(tr);
5753
5754         return ret;
5755 }
5756
5757 static unsigned int
5758 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
5759 {
5760         struct ftrace_buffer_info *info = filp->private_data;
5761         struct trace_iterator *iter = &info->iter;
5762
5763         return trace_poll(iter, filp, poll_table);
5764 }
5765
5766 static ssize_t
5767 tracing_buffers_read(struct file *filp, char __user *ubuf,
5768                      size_t count, loff_t *ppos)
5769 {
5770         struct ftrace_buffer_info *info = filp->private_data;
5771         struct trace_iterator *iter = &info->iter;
5772         ssize_t ret;
5773         ssize_t size;
5774
5775         if (!count)
5776                 return 0;
5777
5778 #ifdef CONFIG_TRACER_MAX_TRACE
5779         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
5780                 return -EBUSY;
5781 #endif
5782
5783         if (!info->spare)
5784                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
5785                                                           iter->cpu_file);
5786         if (!info->spare)
5787                 return -ENOMEM;
5788
5789         /* Do we have previous read data to read? */
5790         if (info->read < PAGE_SIZE)
5791                 goto read;
5792
5793  again:
5794         trace_access_lock(iter->cpu_file);
5795         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
5796                                     &info->spare,
5797                                     count,
5798                                     iter->cpu_file, 0);
5799         trace_access_unlock(iter->cpu_file);
5800
5801         if (ret < 0) {
5802                 if (trace_empty(iter)) {
5803                         if ((filp->f_flags & O_NONBLOCK))
5804                                 return -EAGAIN;
5805
5806                         ret = wait_on_pipe(iter, false);
5807                         if (ret)
5808                                 return ret;
5809
5810                         goto again;
5811                 }
5812                 return 0;
5813         }
5814
5815         info->read = 0;
5816  read:
5817         size = PAGE_SIZE - info->read;
5818         if (size > count)
5819                 size = count;
5820
5821         ret = copy_to_user(ubuf, info->spare + info->read, size);
5822         if (ret == size)
5823                 return -EFAULT;
5824
5825         size -= ret;
5826
5827         *ppos += size;
5828         info->read += size;
5829
5830         return size;
5831 }
5832
5833 static int tracing_buffers_release(struct inode *inode, struct file *file)
5834 {
5835         struct ftrace_buffer_info *info = file->private_data;
5836         struct trace_iterator *iter = &info->iter;
5837
5838         mutex_lock(&trace_types_lock);
5839
5840         iter->tr->current_trace->ref--;
5841
5842         __trace_array_put(iter->tr);
5843
5844         if (info->spare)
5845                 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
5846         kfree(info);
5847
5848         mutex_unlock(&trace_types_lock);
5849
5850         return 0;
5851 }
5852
5853 struct buffer_ref {
5854         struct ring_buffer      *buffer;
5855         void                    *page;
5856         int                     ref;
5857 };
5858
5859 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
5860                                     struct pipe_buffer *buf)
5861 {
5862         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5863
5864         if (--ref->ref)
5865                 return;
5866
5867         ring_buffer_free_read_page(ref->buffer, ref->page);
5868         kfree(ref);
5869         buf->private = 0;
5870 }
5871
5872 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
5873                                 struct pipe_buffer *buf)
5874 {
5875         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5876
5877         ref->ref++;
5878 }
5879
5880 /* Pipe buffer operations for a buffer. */
5881 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
5882         .can_merge              = 0,
5883         .confirm                = generic_pipe_buf_confirm,
5884         .release                = buffer_pipe_buf_release,
5885         .steal                  = generic_pipe_buf_steal,
5886         .get                    = buffer_pipe_buf_get,
5887 };
5888
5889 /*
5890  * Callback from splice_to_pipe(), if we need to release some pages
5891  * at the end of the spd in case we error'ed out in filling the pipe.
5892  */
5893 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
5894 {
5895         struct buffer_ref *ref =
5896                 (struct buffer_ref *)spd->partial[i].private;
5897
5898         if (--ref->ref)
5899                 return;
5900
5901         ring_buffer_free_read_page(ref->buffer, ref->page);
5902         kfree(ref);
5903         spd->partial[i].private = 0;
5904 }
5905
5906 static ssize_t
5907 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5908                             struct pipe_inode_info *pipe, size_t len,
5909                             unsigned int flags)
5910 {
5911         struct ftrace_buffer_info *info = file->private_data;
5912         struct trace_iterator *iter = &info->iter;
5913         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5914         struct page *pages_def[PIPE_DEF_BUFFERS];
5915         struct splice_pipe_desc spd = {
5916                 .pages          = pages_def,
5917                 .partial        = partial_def,
5918                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5919                 .flags          = flags,
5920                 .ops            = &buffer_pipe_buf_ops,
5921                 .spd_release    = buffer_spd_release,
5922         };
5923         struct buffer_ref *ref;
5924         int entries, size, i;
5925         ssize_t ret = 0;
5926
5927 #ifdef CONFIG_TRACER_MAX_TRACE
5928         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
5929                 return -EBUSY;
5930 #endif
5931
5932         if (splice_grow_spd(pipe, &spd))
5933                 return -ENOMEM;
5934
5935         if (*ppos & (PAGE_SIZE - 1))
5936                 return -EINVAL;
5937
5938         if (len & (PAGE_SIZE - 1)) {
5939                 if (len < PAGE_SIZE)
5940                         return -EINVAL;
5941                 len &= PAGE_MASK;
5942         }
5943
5944  again:
5945         trace_access_lock(iter->cpu_file);
5946         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5947
5948         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
5949                 struct page *page;
5950                 int r;
5951
5952                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
5953                 if (!ref) {
5954                         ret = -ENOMEM;
5955                         break;
5956                 }
5957
5958                 ref->ref = 1;
5959                 ref->buffer = iter->trace_buffer->buffer;
5960                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
5961                 if (!ref->page) {
5962                         ret = -ENOMEM;
5963                         kfree(ref);
5964                         break;
5965                 }
5966
5967                 r = ring_buffer_read_page(ref->buffer, &ref->page,
5968                                           len, iter->cpu_file, 1);
5969                 if (r < 0) {
5970                         ring_buffer_free_read_page(ref->buffer, ref->page);
5971                         kfree(ref);
5972                         break;
5973                 }
5974
5975                 /*
5976                  * zero out any left over data, this is going to
5977                  * user land.
5978                  */
5979                 size = ring_buffer_page_len(ref->page);
5980                 if (size < PAGE_SIZE)
5981                         memset(ref->page + size, 0, PAGE_SIZE - size);
5982
5983                 page = virt_to_page(ref->page);
5984
5985                 spd.pages[i] = page;
5986                 spd.partial[i].len = PAGE_SIZE;
5987                 spd.partial[i].offset = 0;
5988                 spd.partial[i].private = (unsigned long)ref;
5989                 spd.nr_pages++;
5990                 *ppos += PAGE_SIZE;
5991
5992                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5993         }
5994
5995         trace_access_unlock(iter->cpu_file);
5996         spd.nr_pages = i;
5997
5998         /* did we read anything? */
5999         if (!spd.nr_pages) {
6000                 if (ret)
6001                         return ret;
6002
6003                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6004                         return -EAGAIN;
6005
6006                 ret = wait_on_pipe(iter, true);
6007                 if (ret)
6008                         return ret;
6009
6010                 goto again;
6011         }
6012
6013         ret = splice_to_pipe(pipe, &spd);
6014         splice_shrink_spd(&spd);
6015
6016         return ret;
6017 }
6018
6019 static const struct file_operations tracing_buffers_fops = {
6020         .open           = tracing_buffers_open,
6021         .read           = tracing_buffers_read,
6022         .poll           = tracing_buffers_poll,
6023         .release        = tracing_buffers_release,
6024         .splice_read    = tracing_buffers_splice_read,
6025         .llseek         = no_llseek,
6026 };
6027
6028 static ssize_t
6029 tracing_stats_read(struct file *filp, char __user *ubuf,
6030                    size_t count, loff_t *ppos)
6031 {
6032         struct inode *inode = file_inode(filp);
6033         struct trace_array *tr = inode->i_private;
6034         struct trace_buffer *trace_buf = &tr->trace_buffer;
6035         int cpu = tracing_get_cpu(inode);
6036         struct trace_seq *s;
6037         unsigned long cnt;
6038         unsigned long long t;
6039         unsigned long usec_rem;
6040
6041         s = kmalloc(sizeof(*s), GFP_KERNEL);
6042         if (!s)
6043                 return -ENOMEM;
6044
6045         trace_seq_init(s);
6046
6047         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6048         trace_seq_printf(s, "entries: %ld\n", cnt);
6049
6050         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6051         trace_seq_printf(s, "overrun: %ld\n", cnt);
6052
6053         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6054         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6055
6056         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6057         trace_seq_printf(s, "bytes: %ld\n", cnt);
6058
6059         if (trace_clocks[tr->clock_id].in_ns) {
6060                 /* local or global for trace_clock */
6061                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6062                 usec_rem = do_div(t, USEC_PER_SEC);
6063                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6064                                                                 t, usec_rem);
6065
6066                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6067                 usec_rem = do_div(t, USEC_PER_SEC);
6068                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6069         } else {
6070                 /* counter or tsc mode for trace_clock */
6071                 trace_seq_printf(s, "oldest event ts: %llu\n",
6072                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6073
6074                 trace_seq_printf(s, "now ts: %llu\n",
6075                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
6076         }
6077
6078         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6079         trace_seq_printf(s, "dropped events: %ld\n", cnt);
6080
6081         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
6082         trace_seq_printf(s, "read events: %ld\n", cnt);
6083
6084         count = simple_read_from_buffer(ubuf, count, ppos,
6085                                         s->buffer, trace_seq_used(s));
6086
6087         kfree(s);
6088
6089         return count;
6090 }
6091
6092 static const struct file_operations tracing_stats_fops = {
6093         .open           = tracing_open_generic_tr,
6094         .read           = tracing_stats_read,
6095         .llseek         = generic_file_llseek,
6096         .release        = tracing_release_generic_tr,
6097 };
6098
6099 #ifdef CONFIG_DYNAMIC_FTRACE
6100
6101 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
6102 {
6103         return 0;
6104 }
6105
6106 static ssize_t
6107 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
6108                   size_t cnt, loff_t *ppos)
6109 {
6110         static char ftrace_dyn_info_buffer[1024];
6111         static DEFINE_MUTEX(dyn_info_mutex);
6112         unsigned long *p = filp->private_data;
6113         char *buf = ftrace_dyn_info_buffer;
6114         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
6115         int r;
6116
6117         mutex_lock(&dyn_info_mutex);
6118         r = sprintf(buf, "%ld ", *p);
6119
6120         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
6121         buf[r++] = '\n';
6122
6123         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6124
6125         mutex_unlock(&dyn_info_mutex);
6126
6127         return r;
6128 }
6129
6130 static const struct file_operations tracing_dyn_info_fops = {
6131         .open           = tracing_open_generic,
6132         .read           = tracing_read_dyn_info,
6133         .llseek         = generic_file_llseek,
6134 };
6135 #endif /* CONFIG_DYNAMIC_FTRACE */
6136
6137 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6138 static void
6139 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6140 {
6141         tracing_snapshot();
6142 }
6143
6144 static void
6145 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6146 {
6147         unsigned long *count = (long *)data;
6148
6149         if (!*count)
6150                 return;
6151
6152         if (*count != -1)
6153                 (*count)--;
6154
6155         tracing_snapshot();
6156 }
6157
6158 static int
6159 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
6160                       struct ftrace_probe_ops *ops, void *data)
6161 {
6162         long count = (long)data;
6163
6164         seq_printf(m, "%ps:", (void *)ip);
6165
6166         seq_puts(m, "snapshot");
6167
6168         if (count == -1)
6169                 seq_puts(m, ":unlimited\n");
6170         else
6171                 seq_printf(m, ":count=%ld\n", count);
6172
6173         return 0;
6174 }
6175
6176 static struct ftrace_probe_ops snapshot_probe_ops = {
6177         .func                   = ftrace_snapshot,
6178         .print                  = ftrace_snapshot_print,
6179 };
6180
6181 static struct ftrace_probe_ops snapshot_count_probe_ops = {
6182         .func                   = ftrace_count_snapshot,
6183         .print                  = ftrace_snapshot_print,
6184 };
6185
6186 static int
6187 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
6188                                char *glob, char *cmd, char *param, int enable)
6189 {
6190         struct ftrace_probe_ops *ops;
6191         void *count = (void *)-1;
6192         char *number;
6193         int ret;
6194
6195         /* hash funcs only work with set_ftrace_filter */
6196         if (!enable)
6197                 return -EINVAL;
6198
6199         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
6200
6201         if (glob[0] == '!') {
6202                 unregister_ftrace_function_probe_func(glob+1, ops);
6203                 return 0;
6204         }
6205
6206         if (!param)
6207                 goto out_reg;
6208
6209         number = strsep(&param, ":");
6210
6211         if (!strlen(number))
6212                 goto out_reg;
6213
6214         /*
6215          * We use the callback data field (which is a pointer)
6216          * as our counter.
6217          */
6218         ret = kstrtoul(number, 0, (unsigned long *)&count);
6219         if (ret)
6220                 return ret;
6221
6222  out_reg:
6223         ret = register_ftrace_function_probe(glob, ops, count);
6224
6225         if (ret >= 0)
6226                 alloc_snapshot(&global_trace);
6227
6228         return ret < 0 ? ret : 0;
6229 }
6230
6231 static struct ftrace_func_command ftrace_snapshot_cmd = {
6232         .name                   = "snapshot",
6233         .func                   = ftrace_trace_snapshot_callback,
6234 };
6235
6236 static __init int register_snapshot_cmd(void)
6237 {
6238         return register_ftrace_command(&ftrace_snapshot_cmd);
6239 }
6240 #else
6241 static inline __init int register_snapshot_cmd(void) { return 0; }
6242 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
6243
6244 static struct dentry *tracing_get_dentry(struct trace_array *tr)
6245 {
6246         if (WARN_ON(!tr->dir))
6247                 return ERR_PTR(-ENODEV);
6248
6249         /* Top directory uses NULL as the parent */
6250         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
6251                 return NULL;
6252
6253         /* All sub buffers have a descriptor */
6254         return tr->dir;
6255 }
6256
6257 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
6258 {
6259         struct dentry *d_tracer;
6260
6261         if (tr->percpu_dir)
6262                 return tr->percpu_dir;
6263
6264         d_tracer = tracing_get_dentry(tr);
6265         if (IS_ERR(d_tracer))
6266                 return NULL;
6267
6268         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
6269
6270         WARN_ONCE(!tr->percpu_dir,
6271                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
6272
6273         return tr->percpu_dir;
6274 }
6275
6276 static struct dentry *
6277 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
6278                       void *data, long cpu, const struct file_operations *fops)
6279 {
6280         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
6281
6282         if (ret) /* See tracing_get_cpu() */
6283                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
6284         return ret;
6285 }
6286
6287 static void
6288 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
6289 {
6290         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
6291         struct dentry *d_cpu;
6292         char cpu_dir[30]; /* 30 characters should be more than enough */
6293
6294         if (!d_percpu)
6295                 return;
6296
6297         snprintf(cpu_dir, 30, "cpu%ld", cpu);
6298         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
6299         if (!d_cpu) {
6300                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
6301                 return;
6302         }
6303
6304         /* per cpu trace_pipe */
6305         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
6306                                 tr, cpu, &tracing_pipe_fops);
6307
6308         /* per cpu trace */
6309         trace_create_cpu_file("trace", 0644, d_cpu,
6310                                 tr, cpu, &tracing_fops);
6311
6312         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
6313                                 tr, cpu, &tracing_buffers_fops);
6314
6315         trace_create_cpu_file("stats", 0444, d_cpu,
6316                                 tr, cpu, &tracing_stats_fops);
6317
6318         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
6319                                 tr, cpu, &tracing_entries_fops);
6320
6321 #ifdef CONFIG_TRACER_SNAPSHOT
6322         trace_create_cpu_file("snapshot", 0644, d_cpu,
6323                                 tr, cpu, &snapshot_fops);
6324
6325         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
6326                                 tr, cpu, &snapshot_raw_fops);
6327 #endif
6328 }
6329
6330 #ifdef CONFIG_FTRACE_SELFTEST
6331 /* Let selftest have access to static functions in this file */
6332 #include "trace_selftest.c"
6333 #endif
6334
6335 static ssize_t
6336 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
6337                         loff_t *ppos)
6338 {
6339         struct trace_option_dentry *topt = filp->private_data;
6340         char *buf;
6341
6342         if (topt->flags->val & topt->opt->bit)
6343                 buf = "1\n";
6344         else
6345                 buf = "0\n";
6346
6347         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6348 }
6349
6350 static ssize_t
6351 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
6352                          loff_t *ppos)
6353 {
6354         struct trace_option_dentry *topt = filp->private_data;
6355         unsigned long val;
6356         int ret;
6357
6358         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6359         if (ret)
6360                 return ret;
6361
6362         if (val != 0 && val != 1)
6363                 return -EINVAL;
6364
6365         if (!!(topt->flags->val & topt->opt->bit) != val) {
6366                 mutex_lock(&trace_types_lock);
6367                 ret = __set_tracer_option(topt->tr, topt->flags,
6368                                           topt->opt, !val);
6369                 mutex_unlock(&trace_types_lock);
6370                 if (ret)
6371                         return ret;
6372         }
6373
6374         *ppos += cnt;
6375
6376         return cnt;
6377 }
6378
6379
6380 static const struct file_operations trace_options_fops = {
6381         .open = tracing_open_generic,
6382         .read = trace_options_read,
6383         .write = trace_options_write,
6384         .llseek = generic_file_llseek,
6385 };
6386
6387 /*
6388  * In order to pass in both the trace_array descriptor as well as the index
6389  * to the flag that the trace option file represents, the trace_array
6390  * has a character array of trace_flags_index[], which holds the index
6391  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
6392  * The address of this character array is passed to the flag option file
6393  * read/write callbacks.
6394  *
6395  * In order to extract both the index and the trace_array descriptor,
6396  * get_tr_index() uses the following algorithm.
6397  *
6398  *   idx = *ptr;
6399  *
6400  * As the pointer itself contains the address of the index (remember
6401  * index[1] == 1).
6402  *
6403  * Then to get the trace_array descriptor, by subtracting that index
6404  * from the ptr, we get to the start of the index itself.
6405  *
6406  *   ptr - idx == &index[0]
6407  *
6408  * Then a simple container_of() from that pointer gets us to the
6409  * trace_array descriptor.
6410  */
6411 static void get_tr_index(void *data, struct trace_array **ptr,
6412                          unsigned int *pindex)
6413 {
6414         *pindex = *(unsigned char *)data;
6415
6416         *ptr = container_of(data - *pindex, struct trace_array,
6417                             trace_flags_index);
6418 }
6419
6420 static ssize_t
6421 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
6422                         loff_t *ppos)
6423 {
6424         void *tr_index = filp->private_data;
6425         struct trace_array *tr;
6426         unsigned int index;
6427         char *buf;
6428
6429         get_tr_index(tr_index, &tr, &index);
6430
6431         if (tr->trace_flags & (1 << index))
6432                 buf = "1\n";
6433         else
6434                 buf = "0\n";
6435
6436         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6437 }
6438
6439 static ssize_t
6440 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
6441                          loff_t *ppos)
6442 {
6443         void *tr_index = filp->private_data;
6444         struct trace_array *tr;
6445         unsigned int index;
6446         unsigned long val;
6447         int ret;
6448
6449         get_tr_index(tr_index, &tr, &index);
6450
6451         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6452         if (ret)
6453                 return ret;
6454
6455         if (val != 0 && val != 1)
6456                 return -EINVAL;
6457
6458         mutex_lock(&trace_types_lock);
6459         ret = set_tracer_flag(tr, 1 << index, val);
6460         mutex_unlock(&trace_types_lock);
6461
6462         if (ret < 0)
6463                 return ret;
6464
6465         *ppos += cnt;
6466
6467         return cnt;
6468 }
6469
6470 static const struct file_operations trace_options_core_fops = {
6471         .open = tracing_open_generic,
6472         .read = trace_options_core_read,
6473         .write = trace_options_core_write,
6474         .llseek = generic_file_llseek,
6475 };
6476
6477 struct dentry *trace_create_file(const char *name,
6478                                  umode_t mode,
6479                                  struct dentry *parent,
6480                                  void *data,
6481                                  const struct file_operations *fops)
6482 {
6483         struct dentry *ret;
6484
6485         ret = tracefs_create_file(name, mode, parent, data, fops);
6486         if (!ret)
6487                 pr_warn("Could not create tracefs '%s' entry\n", name);
6488
6489         return ret;
6490 }
6491
6492
6493 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
6494 {
6495         struct dentry *d_tracer;
6496
6497         if (tr->options)
6498                 return tr->options;
6499
6500         d_tracer = tracing_get_dentry(tr);
6501         if (IS_ERR(d_tracer))
6502                 return NULL;
6503
6504         tr->options = tracefs_create_dir("options", d_tracer);
6505         if (!tr->options) {
6506                 pr_warn("Could not create tracefs directory 'options'\n");
6507                 return NULL;
6508         }
6509
6510         return tr->options;
6511 }
6512
6513 static void
6514 create_trace_option_file(struct trace_array *tr,
6515                          struct trace_option_dentry *topt,
6516                          struct tracer_flags *flags,
6517                          struct tracer_opt *opt)
6518 {
6519         struct dentry *t_options;
6520
6521         t_options = trace_options_init_dentry(tr);
6522         if (!t_options)
6523                 return;
6524
6525         topt->flags = flags;
6526         topt->opt = opt;
6527         topt->tr = tr;
6528
6529         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
6530                                     &trace_options_fops);
6531
6532 }
6533
6534 static void
6535 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
6536 {
6537         struct trace_option_dentry *topts;
6538         struct trace_options *tr_topts;
6539         struct tracer_flags *flags;
6540         struct tracer_opt *opts;
6541         int cnt;
6542         int i;
6543
6544         if (!tracer)
6545                 return;
6546
6547         flags = tracer->flags;
6548
6549         if (!flags || !flags->opts)
6550                 return;
6551
6552         /*
6553          * If this is an instance, only create flags for tracers
6554          * the instance may have.
6555          */
6556         if (!trace_ok_for_array(tracer, tr))
6557                 return;
6558
6559         for (i = 0; i < tr->nr_topts; i++) {
6560                 /* Make sure there's no duplicate flags. */
6561                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
6562                         return;
6563         }
6564
6565         opts = flags->opts;
6566
6567         for (cnt = 0; opts[cnt].name; cnt++)
6568                 ;
6569
6570         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
6571         if (!topts)
6572                 return;
6573
6574         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
6575                             GFP_KERNEL);
6576         if (!tr_topts) {
6577                 kfree(topts);
6578                 return;
6579         }
6580
6581         tr->topts = tr_topts;
6582         tr->topts[tr->nr_topts].tracer = tracer;
6583         tr->topts[tr->nr_topts].topts = topts;
6584         tr->nr_topts++;
6585
6586         for (cnt = 0; opts[cnt].name; cnt++) {
6587                 create_trace_option_file(tr, &topts[cnt], flags,
6588                                          &opts[cnt]);
6589                 WARN_ONCE(topts[cnt].entry == NULL,
6590                           "Failed to create trace option: %s",
6591                           opts[cnt].name);
6592         }
6593 }
6594
6595 static struct dentry *
6596 create_trace_option_core_file(struct trace_array *tr,
6597                               const char *option, long index)
6598 {
6599         struct dentry *t_options;
6600
6601         t_options = trace_options_init_dentry(tr);
6602         if (!t_options)
6603                 return NULL;
6604
6605         return trace_create_file(option, 0644, t_options,
6606                                  (void *)&tr->trace_flags_index[index],
6607                                  &trace_options_core_fops);
6608 }
6609
6610 static void create_trace_options_dir(struct trace_array *tr)
6611 {
6612         struct dentry *t_options;
6613         bool top_level = tr == &global_trace;
6614         int i;
6615
6616         t_options = trace_options_init_dentry(tr);
6617         if (!t_options)
6618                 return;
6619
6620         for (i = 0; trace_options[i]; i++) {
6621                 if (top_level ||
6622                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
6623                         create_trace_option_core_file(tr, trace_options[i], i);
6624         }
6625 }
6626
6627 static ssize_t
6628 rb_simple_read(struct file *filp, char __user *ubuf,
6629                size_t cnt, loff_t *ppos)
6630 {
6631         struct trace_array *tr = filp->private_data;
6632         char buf[64];
6633         int r;
6634
6635         r = tracer_tracing_is_on(tr);
6636         r = sprintf(buf, "%d\n", r);
6637
6638         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6639 }
6640
6641 static ssize_t
6642 rb_simple_write(struct file *filp, const char __user *ubuf,
6643                 size_t cnt, loff_t *ppos)
6644 {
6645         struct trace_array *tr = filp->private_data;
6646         struct ring_buffer *buffer = tr->trace_buffer.buffer;
6647         unsigned long val;
6648         int ret;
6649
6650         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6651         if (ret)
6652                 return ret;
6653
6654         if (buffer) {
6655                 mutex_lock(&trace_types_lock);
6656                 if (val) {
6657                         tracer_tracing_on(tr);
6658                         if (tr->current_trace->start)
6659                                 tr->current_trace->start(tr);
6660                 } else {
6661                         tracer_tracing_off(tr);
6662                         if (tr->current_trace->stop)
6663                                 tr->current_trace->stop(tr);
6664                 }
6665                 mutex_unlock(&trace_types_lock);
6666         }
6667
6668         (*ppos)++;
6669
6670         return cnt;
6671 }
6672
6673 static const struct file_operations rb_simple_fops = {
6674         .open           = tracing_open_generic_tr,
6675         .read           = rb_simple_read,
6676         .write          = rb_simple_write,
6677         .release        = tracing_release_generic_tr,
6678         .llseek         = default_llseek,
6679 };
6680
6681 struct dentry *trace_instance_dir;
6682
6683 static void
6684 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
6685
6686 static int
6687 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
6688 {
6689         enum ring_buffer_flags rb_flags;
6690
6691         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
6692
6693         buf->tr = tr;
6694
6695         buf->buffer = ring_buffer_alloc(size, rb_flags);
6696         if (!buf->buffer)
6697                 return -ENOMEM;
6698
6699         buf->data = alloc_percpu(struct trace_array_cpu);
6700         if (!buf->data) {
6701                 ring_buffer_free(buf->buffer);
6702                 return -ENOMEM;
6703         }
6704
6705         /* Allocate the first page for all buffers */
6706         set_buffer_entries(&tr->trace_buffer,
6707                            ring_buffer_size(tr->trace_buffer.buffer, 0));
6708
6709         return 0;
6710 }
6711
6712 static int allocate_trace_buffers(struct trace_array *tr, int size)
6713 {
6714         int ret;
6715
6716         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
6717         if (ret)
6718                 return ret;
6719
6720 #ifdef CONFIG_TRACER_MAX_TRACE
6721         ret = allocate_trace_buffer(tr, &tr->max_buffer,
6722                                     allocate_snapshot ? size : 1);
6723         if (WARN_ON(ret)) {
6724                 ring_buffer_free(tr->trace_buffer.buffer);
6725                 free_percpu(tr->trace_buffer.data);
6726                 return -ENOMEM;
6727         }
6728         tr->allocated_snapshot = allocate_snapshot;
6729
6730         /*
6731          * Only the top level trace array gets its snapshot allocated
6732          * from the kernel command line.
6733          */
6734         allocate_snapshot = false;
6735 #endif
6736         return 0;
6737 }
6738
6739 static void free_trace_buffer(struct trace_buffer *buf)
6740 {
6741         if (buf->buffer) {
6742                 ring_buffer_free(buf->buffer);
6743                 buf->buffer = NULL;
6744                 free_percpu(buf->data);
6745                 buf->data = NULL;
6746         }
6747 }
6748
6749 static void free_trace_buffers(struct trace_array *tr)
6750 {
6751         if (!tr)
6752                 return;
6753
6754         free_trace_buffer(&tr->trace_buffer);
6755
6756 #ifdef CONFIG_TRACER_MAX_TRACE
6757         free_trace_buffer(&tr->max_buffer);
6758 #endif
6759 }
6760
6761 static void init_trace_flags_index(struct trace_array *tr)
6762 {
6763         int i;
6764
6765         /* Used by the trace options files */
6766         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
6767                 tr->trace_flags_index[i] = i;
6768 }
6769
6770 static void __update_tracer_options(struct trace_array *tr)
6771 {
6772         struct tracer *t;
6773
6774         for (t = trace_types; t; t = t->next)
6775                 add_tracer_options(tr, t);
6776 }
6777
6778 static void update_tracer_options(struct trace_array *tr)
6779 {
6780         mutex_lock(&trace_types_lock);
6781         __update_tracer_options(tr);
6782         mutex_unlock(&trace_types_lock);
6783 }
6784
6785 static int instance_mkdir(const char *name)
6786 {
6787         struct trace_array *tr;
6788         int ret;
6789
6790         mutex_lock(&trace_types_lock);
6791
6792         ret = -EEXIST;
6793         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6794                 if (tr->name && strcmp(tr->name, name) == 0)
6795                         goto out_unlock;
6796         }
6797
6798         ret = -ENOMEM;
6799         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
6800         if (!tr)
6801                 goto out_unlock;
6802
6803         tr->name = kstrdup(name, GFP_KERNEL);
6804         if (!tr->name)
6805                 goto out_free_tr;
6806
6807         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
6808                 goto out_free_tr;
6809
6810         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
6811
6812         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
6813
6814         raw_spin_lock_init(&tr->start_lock);
6815
6816         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
6817
6818         tr->current_trace = &nop_trace;
6819
6820         INIT_LIST_HEAD(&tr->systems);
6821         INIT_LIST_HEAD(&tr->events);
6822
6823         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
6824                 goto out_free_tr;
6825
6826         tr->dir = tracefs_create_dir(name, trace_instance_dir);
6827         if (!tr->dir)
6828                 goto out_free_tr;
6829
6830         ret = event_trace_add_tracer(tr->dir, tr);
6831         if (ret) {
6832                 tracefs_remove_recursive(tr->dir);
6833                 goto out_free_tr;
6834         }
6835
6836         init_tracer_tracefs(tr, tr->dir);
6837         init_trace_flags_index(tr);
6838         __update_tracer_options(tr);
6839
6840         list_add(&tr->list, &ftrace_trace_arrays);
6841
6842         mutex_unlock(&trace_types_lock);
6843
6844         return 0;
6845
6846  out_free_tr:
6847         free_trace_buffers(tr);
6848         free_cpumask_var(tr->tracing_cpumask);
6849         kfree(tr->name);
6850         kfree(tr);
6851
6852  out_unlock:
6853         mutex_unlock(&trace_types_lock);
6854
6855         return ret;
6856
6857 }
6858
6859 static int instance_rmdir(const char *name)
6860 {
6861         struct trace_array *tr;
6862         int found = 0;
6863         int ret;
6864         int i;
6865
6866         mutex_lock(&trace_types_lock);
6867
6868         ret = -ENODEV;
6869         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6870                 if (tr->name && strcmp(tr->name, name) == 0) {
6871                         found = 1;
6872                         break;
6873                 }
6874         }
6875         if (!found)
6876                 goto out_unlock;
6877
6878         ret = -EBUSY;
6879         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
6880                 goto out_unlock;
6881
6882         list_del(&tr->list);
6883
6884         /* Disable all the flags that were enabled coming in */
6885         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
6886                 if ((1 << i) & ZEROED_TRACE_FLAGS)
6887                         set_tracer_flag(tr, 1 << i, 0);
6888         }
6889
6890         tracing_set_nop(tr);
6891         event_trace_del_tracer(tr);
6892         ftrace_destroy_function_files(tr);
6893         tracefs_remove_recursive(tr->dir);
6894         free_trace_buffers(tr);
6895
6896         for (i = 0; i < tr->nr_topts; i++) {
6897                 kfree(tr->topts[i].topts);
6898         }
6899         kfree(tr->topts);
6900
6901         kfree(tr->name);
6902         kfree(tr);
6903
6904         ret = 0;
6905
6906  out_unlock:
6907         mutex_unlock(&trace_types_lock);
6908
6909         return ret;
6910 }
6911
6912 static __init void create_trace_instances(struct dentry *d_tracer)
6913 {
6914         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
6915                                                          instance_mkdir,
6916                                                          instance_rmdir);
6917         if (WARN_ON(!trace_instance_dir))
6918                 return;
6919 }
6920
6921 static void
6922 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
6923 {
6924         int cpu;
6925
6926         trace_create_file("available_tracers", 0444, d_tracer,
6927                         tr, &show_traces_fops);
6928
6929         trace_create_file("current_tracer", 0644, d_tracer,
6930                         tr, &set_tracer_fops);
6931
6932         trace_create_file("tracing_cpumask", 0644, d_tracer,
6933                           tr, &tracing_cpumask_fops);
6934
6935         trace_create_file("trace_options", 0644, d_tracer,
6936                           tr, &tracing_iter_fops);
6937
6938         trace_create_file("trace", 0644, d_tracer,
6939                           tr, &tracing_fops);
6940
6941         trace_create_file("trace_pipe", 0444, d_tracer,
6942                           tr, &tracing_pipe_fops);
6943
6944         trace_create_file("buffer_size_kb", 0644, d_tracer,
6945                           tr, &tracing_entries_fops);
6946
6947         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
6948                           tr, &tracing_total_entries_fops);
6949
6950         trace_create_file("free_buffer", 0200, d_tracer,
6951                           tr, &tracing_free_buffer_fops);
6952
6953         trace_create_file("trace_marker", 0220, d_tracer,
6954                           tr, &tracing_mark_fops);
6955
6956         trace_create_file("trace_clock", 0644, d_tracer, tr,
6957                           &trace_clock_fops);
6958
6959         trace_create_file("tracing_on", 0644, d_tracer,
6960                           tr, &rb_simple_fops);
6961
6962         create_trace_options_dir(tr);
6963
6964 #ifdef CONFIG_TRACER_MAX_TRACE
6965         trace_create_file("tracing_max_latency", 0644, d_tracer,
6966                         &tr->max_latency, &tracing_max_lat_fops);
6967 #endif
6968
6969         if (ftrace_create_function_files(tr, d_tracer))
6970                 WARN(1, "Could not allocate function filter files");
6971
6972 #ifdef CONFIG_TRACER_SNAPSHOT
6973         trace_create_file("snapshot", 0644, d_tracer,
6974                           tr, &snapshot_fops);
6975 #endif
6976
6977         for_each_tracing_cpu(cpu)
6978                 tracing_init_tracefs_percpu(tr, cpu);
6979
6980 }
6981
6982 static struct vfsmount *trace_automount(void *ingore)
6983 {
6984         struct vfsmount *mnt;
6985         struct file_system_type *type;
6986
6987         /*
6988          * To maintain backward compatibility for tools that mount
6989          * debugfs to get to the tracing facility, tracefs is automatically
6990          * mounted to the debugfs/tracing directory.
6991          */
6992         type = get_fs_type("tracefs");
6993         if (!type)
6994                 return NULL;
6995         mnt = vfs_kern_mount(type, 0, "tracefs", NULL);
6996         put_filesystem(type);
6997         if (IS_ERR(mnt))
6998                 return NULL;
6999         mntget(mnt);
7000
7001         return mnt;
7002 }
7003
7004 /**
7005  * tracing_init_dentry - initialize top level trace array
7006  *
7007  * This is called when creating files or directories in the tracing
7008  * directory. It is called via fs_initcall() by any of the boot up code
7009  * and expects to return the dentry of the top level tracing directory.
7010  */
7011 struct dentry *tracing_init_dentry(void)
7012 {
7013         struct trace_array *tr = &global_trace;
7014
7015         /* The top level trace array uses  NULL as parent */
7016         if (tr->dir)
7017                 return NULL;
7018
7019         if (WARN_ON(!tracefs_initialized()) ||
7020                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
7021                  WARN_ON(!debugfs_initialized())))
7022                 return ERR_PTR(-ENODEV);
7023
7024         /*
7025          * As there may still be users that expect the tracing
7026          * files to exist in debugfs/tracing, we must automount
7027          * the tracefs file system there, so older tools still
7028          * work with the newer kerenl.
7029          */
7030         tr->dir = debugfs_create_automount("tracing", NULL,
7031                                            trace_automount, NULL);
7032         if (!tr->dir) {
7033                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
7034                 return ERR_PTR(-ENOMEM);
7035         }
7036
7037         return NULL;
7038 }
7039
7040 extern struct trace_enum_map *__start_ftrace_enum_maps[];
7041 extern struct trace_enum_map *__stop_ftrace_enum_maps[];
7042
7043 static void __init trace_enum_init(void)
7044 {
7045         int len;
7046
7047         len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps;
7048         trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len);
7049 }
7050
7051 #ifdef CONFIG_MODULES
7052 static void trace_module_add_enums(struct module *mod)
7053 {
7054         if (!mod->num_trace_enums)
7055                 return;
7056
7057         /*
7058          * Modules with bad taint do not have events created, do
7059          * not bother with enums either.
7060          */
7061         if (trace_module_has_bad_taint(mod))
7062                 return;
7063
7064         trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums);
7065 }
7066
7067 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
7068 static void trace_module_remove_enums(struct module *mod)
7069 {
7070         union trace_enum_map_item *map;
7071         union trace_enum_map_item **last = &trace_enum_maps;
7072
7073         if (!mod->num_trace_enums)
7074                 return;
7075
7076         mutex_lock(&trace_enum_mutex);
7077
7078         map = trace_enum_maps;
7079
7080         while (map) {
7081                 if (map->head.mod == mod)
7082                         break;
7083                 map = trace_enum_jmp_to_tail(map);
7084                 last = &map->tail.next;
7085                 map = map->tail.next;
7086         }
7087         if (!map)
7088                 goto out;
7089
7090         *last = trace_enum_jmp_to_tail(map)->tail.next;
7091         kfree(map);
7092  out:
7093         mutex_unlock(&trace_enum_mutex);
7094 }
7095 #else
7096 static inline void trace_module_remove_enums(struct module *mod) { }
7097 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
7098
7099 static int trace_module_notify(struct notifier_block *self,
7100                                unsigned long val, void *data)
7101 {
7102         struct module *mod = data;
7103
7104         switch (val) {
7105         case MODULE_STATE_COMING:
7106                 trace_module_add_enums(mod);
7107                 break;
7108         case MODULE_STATE_GOING:
7109                 trace_module_remove_enums(mod);
7110                 break;
7111         }
7112
7113         return 0;
7114 }
7115
7116 static struct notifier_block trace_module_nb = {
7117         .notifier_call = trace_module_notify,
7118         .priority = 0,
7119 };
7120 #endif /* CONFIG_MODULES */
7121
7122 static __init int tracer_init_tracefs(void)
7123 {
7124         struct dentry *d_tracer;
7125
7126         trace_access_lock_init();
7127
7128         d_tracer = tracing_init_dentry();
7129         if (IS_ERR(d_tracer))
7130                 return 0;
7131
7132         init_tracer_tracefs(&global_trace, d_tracer);
7133
7134         trace_create_file("tracing_thresh", 0644, d_tracer,
7135                         &global_trace, &tracing_thresh_fops);
7136
7137         trace_create_file("README", 0444, d_tracer,
7138                         NULL, &tracing_readme_fops);
7139
7140         trace_create_file("saved_cmdlines", 0444, d_tracer,
7141                         NULL, &tracing_saved_cmdlines_fops);
7142
7143         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
7144                           NULL, &tracing_saved_cmdlines_size_fops);
7145
7146         trace_enum_init();
7147
7148         trace_create_enum_file(d_tracer);
7149
7150 #ifdef CONFIG_MODULES
7151         register_module_notifier(&trace_module_nb);
7152 #endif
7153
7154 #ifdef CONFIG_DYNAMIC_FTRACE
7155         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
7156                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
7157 #endif
7158
7159         create_trace_instances(d_tracer);
7160
7161         update_tracer_options(&global_trace);
7162
7163         return 0;
7164 }
7165
7166 static int trace_panic_handler(struct notifier_block *this,
7167                                unsigned long event, void *unused)
7168 {
7169         if (ftrace_dump_on_oops)
7170                 ftrace_dump(ftrace_dump_on_oops);
7171         return NOTIFY_OK;
7172 }
7173
7174 static struct notifier_block trace_panic_notifier = {
7175         .notifier_call  = trace_panic_handler,
7176         .next           = NULL,
7177         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
7178 };
7179
7180 static int trace_die_handler(struct notifier_block *self,
7181                              unsigned long val,
7182                              void *data)
7183 {
7184         switch (val) {
7185         case DIE_OOPS:
7186                 if (ftrace_dump_on_oops)
7187                         ftrace_dump(ftrace_dump_on_oops);
7188                 break;
7189         default:
7190                 break;
7191         }
7192         return NOTIFY_OK;
7193 }
7194
7195 static struct notifier_block trace_die_notifier = {
7196         .notifier_call = trace_die_handler,
7197         .priority = 200
7198 };
7199
7200 /*
7201  * printk is set to max of 1024, we really don't need it that big.
7202  * Nothing should be printing 1000 characters anyway.
7203  */
7204 #define TRACE_MAX_PRINT         1000
7205
7206 /*
7207  * Define here KERN_TRACE so that we have one place to modify
7208  * it if we decide to change what log level the ftrace dump
7209  * should be at.
7210  */
7211 #define KERN_TRACE              KERN_EMERG
7212
7213 void
7214 trace_printk_seq(struct trace_seq *s)
7215 {
7216         /* Probably should print a warning here. */
7217         if (s->seq.len >= TRACE_MAX_PRINT)
7218                 s->seq.len = TRACE_MAX_PRINT;
7219
7220         /*
7221          * More paranoid code. Although the buffer size is set to
7222          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
7223          * an extra layer of protection.
7224          */
7225         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
7226                 s->seq.len = s->seq.size - 1;
7227
7228         /* should be zero ended, but we are paranoid. */
7229         s->buffer[s->seq.len] = 0;
7230
7231         printk(KERN_TRACE "%s", s->buffer);
7232
7233         trace_seq_init(s);
7234 }
7235
7236 void trace_init_global_iter(struct trace_iterator *iter)
7237 {
7238         iter->tr = &global_trace;
7239         iter->trace = iter->tr->current_trace;
7240         iter->cpu_file = RING_BUFFER_ALL_CPUS;
7241         iter->trace_buffer = &global_trace.trace_buffer;
7242
7243         if (iter->trace && iter->trace->open)
7244                 iter->trace->open(iter);
7245
7246         /* Annotate start of buffers if we had overruns */
7247         if (ring_buffer_overruns(iter->trace_buffer->buffer))
7248                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
7249
7250         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
7251         if (trace_clocks[iter->tr->clock_id].in_ns)
7252                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
7253 }
7254
7255 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
7256 {
7257         /* use static because iter can be a bit big for the stack */
7258         static struct trace_iterator iter;
7259         static atomic_t dump_running;
7260         struct trace_array *tr = &global_trace;
7261         unsigned int old_userobj;
7262         unsigned long flags;
7263         int cnt = 0, cpu;
7264
7265         /* Only allow one dump user at a time. */
7266         if (atomic_inc_return(&dump_running) != 1) {
7267                 atomic_dec(&dump_running);
7268                 return;
7269         }
7270
7271         /*
7272          * Always turn off tracing when we dump.
7273          * We don't need to show trace output of what happens
7274          * between multiple crashes.
7275          *
7276          * If the user does a sysrq-z, then they can re-enable
7277          * tracing with echo 1 > tracing_on.
7278          */
7279         tracing_off();
7280
7281         local_irq_save(flags);
7282
7283         /* Simulate the iterator */
7284         trace_init_global_iter(&iter);
7285
7286         for_each_tracing_cpu(cpu) {
7287                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7288         }
7289
7290         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
7291
7292         /* don't look at user memory in panic mode */
7293         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
7294
7295         switch (oops_dump_mode) {
7296         case DUMP_ALL:
7297                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7298                 break;
7299         case DUMP_ORIG:
7300                 iter.cpu_file = raw_smp_processor_id();
7301                 break;
7302         case DUMP_NONE:
7303                 goto out_enable;
7304         default:
7305                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
7306                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7307         }
7308
7309         printk(KERN_TRACE "Dumping ftrace buffer:\n");
7310
7311         /* Did function tracer already get disabled? */
7312         if (ftrace_is_dead()) {
7313                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
7314                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
7315         }
7316
7317         /*
7318          * We need to stop all tracing on all CPUS to read the
7319          * the next buffer. This is a bit expensive, but is
7320          * not done often. We fill all what we can read,
7321          * and then release the locks again.
7322          */
7323
7324         while (!trace_empty(&iter)) {
7325
7326                 if (!cnt)
7327                         printk(KERN_TRACE "---------------------------------\n");
7328
7329                 cnt++;
7330
7331                 /* reset all but tr, trace, and overruns */
7332                 memset(&iter.seq, 0,
7333                        sizeof(struct trace_iterator) -
7334                        offsetof(struct trace_iterator, seq));
7335                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
7336                 iter.pos = -1;
7337
7338                 if (trace_find_next_entry_inc(&iter) != NULL) {
7339                         int ret;
7340
7341                         ret = print_trace_line(&iter);
7342                         if (ret != TRACE_TYPE_NO_CONSUME)
7343                                 trace_consume(&iter);
7344                 }
7345                 touch_nmi_watchdog();
7346
7347                 trace_printk_seq(&iter.seq);
7348         }
7349
7350         if (!cnt)
7351                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
7352         else
7353                 printk(KERN_TRACE "---------------------------------\n");
7354
7355  out_enable:
7356         tr->trace_flags |= old_userobj;
7357
7358         for_each_tracing_cpu(cpu) {
7359                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7360         }
7361         atomic_dec(&dump_running);
7362         local_irq_restore(flags);
7363 }
7364 EXPORT_SYMBOL_GPL(ftrace_dump);
7365
7366 __init static int tracer_alloc_buffers(void)
7367 {
7368         int ring_buf_size;
7369         int ret = -ENOMEM;
7370
7371         /*
7372          * Make sure we don't accidently add more trace options
7373          * than we have bits for.
7374          */
7375         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
7376
7377         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
7378                 goto out;
7379
7380         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
7381                 goto out_free_buffer_mask;
7382
7383         /* Only allocate trace_printk buffers if a trace_printk exists */
7384         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
7385                 /* Must be called before global_trace.buffer is allocated */
7386                 trace_printk_init_buffers();
7387
7388         /* To save memory, keep the ring buffer size to its minimum */
7389         if (ring_buffer_expanded)
7390                 ring_buf_size = trace_buf_size;
7391         else
7392                 ring_buf_size = 1;
7393
7394         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
7395         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
7396
7397         raw_spin_lock_init(&global_trace.start_lock);
7398
7399         /* Used for event triggers */
7400         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
7401         if (!temp_buffer)
7402                 goto out_free_cpumask;
7403
7404         if (trace_create_savedcmd() < 0)
7405                 goto out_free_temp_buffer;
7406
7407         /* TODO: make the number of buffers hot pluggable with CPUS */
7408         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
7409                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
7410                 WARN_ON(1);
7411                 goto out_free_savedcmd;
7412         }
7413
7414         if (global_trace.buffer_disabled)
7415                 tracing_off();
7416
7417         if (trace_boot_clock) {
7418                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
7419                 if (ret < 0)
7420                         pr_warn("Trace clock %s not defined, going back to default\n",
7421                                 trace_boot_clock);
7422         }
7423
7424         /*
7425          * register_tracer() might reference current_trace, so it
7426          * needs to be set before we register anything. This is
7427          * just a bootstrap of current_trace anyway.
7428          */
7429         global_trace.current_trace = &nop_trace;
7430
7431         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7432
7433         ftrace_init_global_array_ops(&global_trace);
7434
7435         init_trace_flags_index(&global_trace);
7436
7437         register_tracer(&nop_trace);
7438
7439         /* All seems OK, enable tracing */
7440         tracing_disabled = 0;
7441
7442         atomic_notifier_chain_register(&panic_notifier_list,
7443                                        &trace_panic_notifier);
7444
7445         register_die_notifier(&trace_die_notifier);
7446
7447         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
7448
7449         INIT_LIST_HEAD(&global_trace.systems);
7450         INIT_LIST_HEAD(&global_trace.events);
7451         list_add(&global_trace.list, &ftrace_trace_arrays);
7452
7453         apply_trace_boot_options();
7454
7455         register_snapshot_cmd();
7456
7457         return 0;
7458
7459 out_free_savedcmd:
7460         free_saved_cmdlines_buffer(savedcmd);
7461 out_free_temp_buffer:
7462         ring_buffer_free(temp_buffer);
7463 out_free_cpumask:
7464         free_cpumask_var(global_trace.tracing_cpumask);
7465 out_free_buffer_mask:
7466         free_cpumask_var(tracing_buffer_mask);
7467 out:
7468         return ret;
7469 }
7470
7471 void __init trace_init(void)
7472 {
7473         if (tracepoint_printk) {
7474                 tracepoint_print_iter =
7475                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
7476                 if (WARN_ON(!tracepoint_print_iter))
7477                         tracepoint_printk = 0;
7478         }
7479         tracer_alloc_buffers();
7480         trace_event_init();
7481 }
7482
7483 __init static int clear_boot_tracer(void)
7484 {
7485         /*
7486          * The default tracer at boot buffer is an init section.
7487          * This function is called in lateinit. If we did not
7488          * find the boot tracer, then clear it out, to prevent
7489          * later registration from accessing the buffer that is
7490          * about to be freed.
7491          */
7492         if (!default_bootup_tracer)
7493                 return 0;
7494
7495         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
7496                default_bootup_tracer);
7497         default_bootup_tracer = NULL;
7498
7499         return 0;
7500 }
7501
7502 fs_initcall(tracer_init_tracefs);
7503 late_initcall(clear_boot_tracer);