fix memory leaks in tracing_buffers_splice_read()
[cascardo/linux.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/kprobes.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/sched/rt.h>
44
45 #include "trace.h"
46 #include "trace_output.h"
47
48 /*
49  * On boot up, the ring buffer is set to the minimum size, so that
50  * we do not waste memory on systems that are not using tracing.
51  */
52 bool ring_buffer_expanded;
53
54 /*
55  * We need to change this state when a selftest is running.
56  * A selftest will lurk into the ring-buffer to count the
57  * entries inserted during the selftest although some concurrent
58  * insertions into the ring-buffer such as trace_printk could occurred
59  * at the same time, giving false positive or negative results.
60  */
61 static bool __read_mostly tracing_selftest_running;
62
63 /*
64  * If a tracer is running, we do not want to run SELFTEST.
65  */
66 bool __read_mostly tracing_selftest_disabled;
67
68 /* Pipe tracepoints to printk */
69 struct trace_iterator *tracepoint_print_iter;
70 int tracepoint_printk;
71
72 /* For tracers that don't implement custom flags */
73 static struct tracer_opt dummy_tracer_opt[] = {
74         { }
75 };
76
77 static int
78 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
79 {
80         return 0;
81 }
82
83 /*
84  * To prevent the comm cache from being overwritten when no
85  * tracing is active, only save the comm when a trace event
86  * occurred.
87  */
88 static DEFINE_PER_CPU(bool, trace_cmdline_save);
89
90 /*
91  * Kill all tracing for good (never come back).
92  * It is initialized to 1 but will turn to zero if the initialization
93  * of the tracer is successful. But that is the only place that sets
94  * this back to zero.
95  */
96 static int tracing_disabled = 1;
97
98 cpumask_var_t __read_mostly     tracing_buffer_mask;
99
100 /*
101  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
102  *
103  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
104  * is set, then ftrace_dump is called. This will output the contents
105  * of the ftrace buffers to the console.  This is very useful for
106  * capturing traces that lead to crashes and outputing it to a
107  * serial console.
108  *
109  * It is default off, but you can enable it with either specifying
110  * "ftrace_dump_on_oops" in the kernel command line, or setting
111  * /proc/sys/kernel/ftrace_dump_on_oops
112  * Set 1 if you want to dump buffers of all CPUs
113  * Set 2 if you want to dump the buffer of the CPU that triggered oops
114  */
115
116 enum ftrace_dump_mode ftrace_dump_on_oops;
117
118 /* When set, tracing will stop when a WARN*() is hit */
119 int __disable_trace_on_warning;
120
121 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
122 /* Map of enums to their values, for "enum_map" file */
123 struct trace_enum_map_head {
124         struct module                   *mod;
125         unsigned long                   length;
126 };
127
128 union trace_enum_map_item;
129
130 struct trace_enum_map_tail {
131         /*
132          * "end" is first and points to NULL as it must be different
133          * than "mod" or "enum_string"
134          */
135         union trace_enum_map_item       *next;
136         const char                      *end;   /* points to NULL */
137 };
138
139 static DEFINE_MUTEX(trace_enum_mutex);
140
141 /*
142  * The trace_enum_maps are saved in an array with two extra elements,
143  * one at the beginning, and one at the end. The beginning item contains
144  * the count of the saved maps (head.length), and the module they
145  * belong to if not built in (head.mod). The ending item contains a
146  * pointer to the next array of saved enum_map items.
147  */
148 union trace_enum_map_item {
149         struct trace_enum_map           map;
150         struct trace_enum_map_head      head;
151         struct trace_enum_map_tail      tail;
152 };
153
154 static union trace_enum_map_item *trace_enum_maps;
155 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
156
157 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
158
159 #define MAX_TRACER_SIZE         100
160 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
161 static char *default_bootup_tracer;
162
163 static bool allocate_snapshot;
164
165 static int __init set_cmdline_ftrace(char *str)
166 {
167         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
168         default_bootup_tracer = bootup_tracer_buf;
169         /* We are using ftrace early, expand it */
170         ring_buffer_expanded = true;
171         return 1;
172 }
173 __setup("ftrace=", set_cmdline_ftrace);
174
175 static int __init set_ftrace_dump_on_oops(char *str)
176 {
177         if (*str++ != '=' || !*str) {
178                 ftrace_dump_on_oops = DUMP_ALL;
179                 return 1;
180         }
181
182         if (!strcmp("orig_cpu", str)) {
183                 ftrace_dump_on_oops = DUMP_ORIG;
184                 return 1;
185         }
186
187         return 0;
188 }
189 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
190
191 static int __init stop_trace_on_warning(char *str)
192 {
193         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
194                 __disable_trace_on_warning = 1;
195         return 1;
196 }
197 __setup("traceoff_on_warning", stop_trace_on_warning);
198
199 static int __init boot_alloc_snapshot(char *str)
200 {
201         allocate_snapshot = true;
202         /* We also need the main ring buffer expanded */
203         ring_buffer_expanded = true;
204         return 1;
205 }
206 __setup("alloc_snapshot", boot_alloc_snapshot);
207
208
209 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
210
211 static int __init set_trace_boot_options(char *str)
212 {
213         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
214         return 0;
215 }
216 __setup("trace_options=", set_trace_boot_options);
217
218 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
219 static char *trace_boot_clock __initdata;
220
221 static int __init set_trace_boot_clock(char *str)
222 {
223         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
224         trace_boot_clock = trace_boot_clock_buf;
225         return 0;
226 }
227 __setup("trace_clock=", set_trace_boot_clock);
228
229 static int __init set_tracepoint_printk(char *str)
230 {
231         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
232                 tracepoint_printk = 1;
233         return 1;
234 }
235 __setup("tp_printk", set_tracepoint_printk);
236
237 unsigned long long ns2usecs(cycle_t nsec)
238 {
239         nsec += 500;
240         do_div(nsec, 1000);
241         return nsec;
242 }
243
244 /* trace_flags holds trace_options default values */
245 #define TRACE_DEFAULT_FLAGS                                             \
246         (FUNCTION_DEFAULT_FLAGS |                                       \
247          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
248          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
249          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
250          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
251
252 /* trace_options that are only supported by global_trace */
253 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
254                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
255
256 /* trace_flags that are default zero for instances */
257 #define ZEROED_TRACE_FLAGS \
258         TRACE_ITER_EVENT_FORK
259
260 /*
261  * The global_trace is the descriptor that holds the tracing
262  * buffers for the live tracing. For each CPU, it contains
263  * a link list of pages that will store trace entries. The
264  * page descriptor of the pages in the memory is used to hold
265  * the link list by linking the lru item in the page descriptor
266  * to each of the pages in the buffer per CPU.
267  *
268  * For each active CPU there is a data field that holds the
269  * pages for the buffer for that CPU. Each CPU has the same number
270  * of pages allocated for its buffer.
271  */
272 static struct trace_array global_trace = {
273         .trace_flags = TRACE_DEFAULT_FLAGS,
274 };
275
276 LIST_HEAD(ftrace_trace_arrays);
277
278 int trace_array_get(struct trace_array *this_tr)
279 {
280         struct trace_array *tr;
281         int ret = -ENODEV;
282
283         mutex_lock(&trace_types_lock);
284         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
285                 if (tr == this_tr) {
286                         tr->ref++;
287                         ret = 0;
288                         break;
289                 }
290         }
291         mutex_unlock(&trace_types_lock);
292
293         return ret;
294 }
295
296 static void __trace_array_put(struct trace_array *this_tr)
297 {
298         WARN_ON(!this_tr->ref);
299         this_tr->ref--;
300 }
301
302 void trace_array_put(struct trace_array *this_tr)
303 {
304         mutex_lock(&trace_types_lock);
305         __trace_array_put(this_tr);
306         mutex_unlock(&trace_types_lock);
307 }
308
309 int call_filter_check_discard(struct trace_event_call *call, void *rec,
310                               struct ring_buffer *buffer,
311                               struct ring_buffer_event *event)
312 {
313         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
314             !filter_match_preds(call->filter, rec)) {
315                 __trace_event_discard_commit(buffer, event);
316                 return 1;
317         }
318
319         return 0;
320 }
321
322 static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
323 {
324         u64 ts;
325
326         /* Early boot up does not have a buffer yet */
327         if (!buf->buffer)
328                 return trace_clock_local();
329
330         ts = ring_buffer_time_stamp(buf->buffer, cpu);
331         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
332
333         return ts;
334 }
335
336 cycle_t ftrace_now(int cpu)
337 {
338         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
339 }
340
341 /**
342  * tracing_is_enabled - Show if global_trace has been disabled
343  *
344  * Shows if the global trace has been enabled or not. It uses the
345  * mirror flag "buffer_disabled" to be used in fast paths such as for
346  * the irqsoff tracer. But it may be inaccurate due to races. If you
347  * need to know the accurate state, use tracing_is_on() which is a little
348  * slower, but accurate.
349  */
350 int tracing_is_enabled(void)
351 {
352         /*
353          * For quick access (irqsoff uses this in fast path), just
354          * return the mirror variable of the state of the ring buffer.
355          * It's a little racy, but we don't really care.
356          */
357         smp_rmb();
358         return !global_trace.buffer_disabled;
359 }
360
361 /*
362  * trace_buf_size is the size in bytes that is allocated
363  * for a buffer. Note, the number of bytes is always rounded
364  * to page size.
365  *
366  * This number is purposely set to a low number of 16384.
367  * If the dump on oops happens, it will be much appreciated
368  * to not have to wait for all that output. Anyway this can be
369  * boot time and run time configurable.
370  */
371 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
372
373 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
374
375 /* trace_types holds a link list of available tracers. */
376 static struct tracer            *trace_types __read_mostly;
377
378 /*
379  * trace_types_lock is used to protect the trace_types list.
380  */
381 DEFINE_MUTEX(trace_types_lock);
382
383 /*
384  * serialize the access of the ring buffer
385  *
386  * ring buffer serializes readers, but it is low level protection.
387  * The validity of the events (which returns by ring_buffer_peek() ..etc)
388  * are not protected by ring buffer.
389  *
390  * The content of events may become garbage if we allow other process consumes
391  * these events concurrently:
392  *   A) the page of the consumed events may become a normal page
393  *      (not reader page) in ring buffer, and this page will be rewrited
394  *      by events producer.
395  *   B) The page of the consumed events may become a page for splice_read,
396  *      and this page will be returned to system.
397  *
398  * These primitives allow multi process access to different cpu ring buffer
399  * concurrently.
400  *
401  * These primitives don't distinguish read-only and read-consume access.
402  * Multi read-only access are also serialized.
403  */
404
405 #ifdef CONFIG_SMP
406 static DECLARE_RWSEM(all_cpu_access_lock);
407 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
408
409 static inline void trace_access_lock(int cpu)
410 {
411         if (cpu == RING_BUFFER_ALL_CPUS) {
412                 /* gain it for accessing the whole ring buffer. */
413                 down_write(&all_cpu_access_lock);
414         } else {
415                 /* gain it for accessing a cpu ring buffer. */
416
417                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
418                 down_read(&all_cpu_access_lock);
419
420                 /* Secondly block other access to this @cpu ring buffer. */
421                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
422         }
423 }
424
425 static inline void trace_access_unlock(int cpu)
426 {
427         if (cpu == RING_BUFFER_ALL_CPUS) {
428                 up_write(&all_cpu_access_lock);
429         } else {
430                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
431                 up_read(&all_cpu_access_lock);
432         }
433 }
434
435 static inline void trace_access_lock_init(void)
436 {
437         int cpu;
438
439         for_each_possible_cpu(cpu)
440                 mutex_init(&per_cpu(cpu_access_lock, cpu));
441 }
442
443 #else
444
445 static DEFINE_MUTEX(access_lock);
446
447 static inline void trace_access_lock(int cpu)
448 {
449         (void)cpu;
450         mutex_lock(&access_lock);
451 }
452
453 static inline void trace_access_unlock(int cpu)
454 {
455         (void)cpu;
456         mutex_unlock(&access_lock);
457 }
458
459 static inline void trace_access_lock_init(void)
460 {
461 }
462
463 #endif
464
465 #ifdef CONFIG_STACKTRACE
466 static void __ftrace_trace_stack(struct ring_buffer *buffer,
467                                  unsigned long flags,
468                                  int skip, int pc, struct pt_regs *regs);
469 static inline void ftrace_trace_stack(struct trace_array *tr,
470                                       struct ring_buffer *buffer,
471                                       unsigned long flags,
472                                       int skip, int pc, struct pt_regs *regs);
473
474 #else
475 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
476                                         unsigned long flags,
477                                         int skip, int pc, struct pt_regs *regs)
478 {
479 }
480 static inline void ftrace_trace_stack(struct trace_array *tr,
481                                       struct ring_buffer *buffer,
482                                       unsigned long flags,
483                                       int skip, int pc, struct pt_regs *regs)
484 {
485 }
486
487 #endif
488
489 static void tracer_tracing_on(struct trace_array *tr)
490 {
491         if (tr->trace_buffer.buffer)
492                 ring_buffer_record_on(tr->trace_buffer.buffer);
493         /*
494          * This flag is looked at when buffers haven't been allocated
495          * yet, or by some tracers (like irqsoff), that just want to
496          * know if the ring buffer has been disabled, but it can handle
497          * races of where it gets disabled but we still do a record.
498          * As the check is in the fast path of the tracers, it is more
499          * important to be fast than accurate.
500          */
501         tr->buffer_disabled = 0;
502         /* Make the flag seen by readers */
503         smp_wmb();
504 }
505
506 /**
507  * tracing_on - enable tracing buffers
508  *
509  * This function enables tracing buffers that may have been
510  * disabled with tracing_off.
511  */
512 void tracing_on(void)
513 {
514         tracer_tracing_on(&global_trace);
515 }
516 EXPORT_SYMBOL_GPL(tracing_on);
517
518 /**
519  * __trace_puts - write a constant string into the trace buffer.
520  * @ip:    The address of the caller
521  * @str:   The constant string to write
522  * @size:  The size of the string.
523  */
524 int __trace_puts(unsigned long ip, const char *str, int size)
525 {
526         struct ring_buffer_event *event;
527         struct ring_buffer *buffer;
528         struct print_entry *entry;
529         unsigned long irq_flags;
530         int alloc;
531         int pc;
532
533         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
534                 return 0;
535
536         pc = preempt_count();
537
538         if (unlikely(tracing_selftest_running || tracing_disabled))
539                 return 0;
540
541         alloc = sizeof(*entry) + size + 2; /* possible \n added */
542
543         local_save_flags(irq_flags);
544         buffer = global_trace.trace_buffer.buffer;
545         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
546                                           irq_flags, pc);
547         if (!event)
548                 return 0;
549
550         entry = ring_buffer_event_data(event);
551         entry->ip = ip;
552
553         memcpy(&entry->buf, str, size);
554
555         /* Add a newline if necessary */
556         if (entry->buf[size - 1] != '\n') {
557                 entry->buf[size] = '\n';
558                 entry->buf[size + 1] = '\0';
559         } else
560                 entry->buf[size] = '\0';
561
562         __buffer_unlock_commit(buffer, event);
563         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
564
565         return size;
566 }
567 EXPORT_SYMBOL_GPL(__trace_puts);
568
569 /**
570  * __trace_bputs - write the pointer to a constant string into trace buffer
571  * @ip:    The address of the caller
572  * @str:   The constant string to write to the buffer to
573  */
574 int __trace_bputs(unsigned long ip, const char *str)
575 {
576         struct ring_buffer_event *event;
577         struct ring_buffer *buffer;
578         struct bputs_entry *entry;
579         unsigned long irq_flags;
580         int size = sizeof(struct bputs_entry);
581         int pc;
582
583         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
584                 return 0;
585
586         pc = preempt_count();
587
588         if (unlikely(tracing_selftest_running || tracing_disabled))
589                 return 0;
590
591         local_save_flags(irq_flags);
592         buffer = global_trace.trace_buffer.buffer;
593         event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
594                                           irq_flags, pc);
595         if (!event)
596                 return 0;
597
598         entry = ring_buffer_event_data(event);
599         entry->ip                       = ip;
600         entry->str                      = str;
601
602         __buffer_unlock_commit(buffer, event);
603         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
604
605         return 1;
606 }
607 EXPORT_SYMBOL_GPL(__trace_bputs);
608
609 #ifdef CONFIG_TRACER_SNAPSHOT
610 /**
611  * trace_snapshot - take a snapshot of the current buffer.
612  *
613  * This causes a swap between the snapshot buffer and the current live
614  * tracing buffer. You can use this to take snapshots of the live
615  * trace when some condition is triggered, but continue to trace.
616  *
617  * Note, make sure to allocate the snapshot with either
618  * a tracing_snapshot_alloc(), or by doing it manually
619  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
620  *
621  * If the snapshot buffer is not allocated, it will stop tracing.
622  * Basically making a permanent snapshot.
623  */
624 void tracing_snapshot(void)
625 {
626         struct trace_array *tr = &global_trace;
627         struct tracer *tracer = tr->current_trace;
628         unsigned long flags;
629
630         if (in_nmi()) {
631                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
632                 internal_trace_puts("*** snapshot is being ignored        ***\n");
633                 return;
634         }
635
636         if (!tr->allocated_snapshot) {
637                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
638                 internal_trace_puts("*** stopping trace here!   ***\n");
639                 tracing_off();
640                 return;
641         }
642
643         /* Note, snapshot can not be used when the tracer uses it */
644         if (tracer->use_max_tr) {
645                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
646                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
647                 return;
648         }
649
650         local_irq_save(flags);
651         update_max_tr(tr, current, smp_processor_id());
652         local_irq_restore(flags);
653 }
654 EXPORT_SYMBOL_GPL(tracing_snapshot);
655
656 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
657                                         struct trace_buffer *size_buf, int cpu_id);
658 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
659
660 static int alloc_snapshot(struct trace_array *tr)
661 {
662         int ret;
663
664         if (!tr->allocated_snapshot) {
665
666                 /* allocate spare buffer */
667                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
668                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
669                 if (ret < 0)
670                         return ret;
671
672                 tr->allocated_snapshot = true;
673         }
674
675         return 0;
676 }
677
678 static void free_snapshot(struct trace_array *tr)
679 {
680         /*
681          * We don't free the ring buffer. instead, resize it because
682          * The max_tr ring buffer has some state (e.g. ring->clock) and
683          * we want preserve it.
684          */
685         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
686         set_buffer_entries(&tr->max_buffer, 1);
687         tracing_reset_online_cpus(&tr->max_buffer);
688         tr->allocated_snapshot = false;
689 }
690
691 /**
692  * tracing_alloc_snapshot - allocate snapshot buffer.
693  *
694  * This only allocates the snapshot buffer if it isn't already
695  * allocated - it doesn't also take a snapshot.
696  *
697  * This is meant to be used in cases where the snapshot buffer needs
698  * to be set up for events that can't sleep but need to be able to
699  * trigger a snapshot.
700  */
701 int tracing_alloc_snapshot(void)
702 {
703         struct trace_array *tr = &global_trace;
704         int ret;
705
706         ret = alloc_snapshot(tr);
707         WARN_ON(ret < 0);
708
709         return ret;
710 }
711 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
712
713 /**
714  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
715  *
716  * This is similar to trace_snapshot(), but it will allocate the
717  * snapshot buffer if it isn't already allocated. Use this only
718  * where it is safe to sleep, as the allocation may sleep.
719  *
720  * This causes a swap between the snapshot buffer and the current live
721  * tracing buffer. You can use this to take snapshots of the live
722  * trace when some condition is triggered, but continue to trace.
723  */
724 void tracing_snapshot_alloc(void)
725 {
726         int ret;
727
728         ret = tracing_alloc_snapshot();
729         if (ret < 0)
730                 return;
731
732         tracing_snapshot();
733 }
734 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
735 #else
736 void tracing_snapshot(void)
737 {
738         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
739 }
740 EXPORT_SYMBOL_GPL(tracing_snapshot);
741 int tracing_alloc_snapshot(void)
742 {
743         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
744         return -ENODEV;
745 }
746 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
747 void tracing_snapshot_alloc(void)
748 {
749         /* Give warning */
750         tracing_snapshot();
751 }
752 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
753 #endif /* CONFIG_TRACER_SNAPSHOT */
754
755 static void tracer_tracing_off(struct trace_array *tr)
756 {
757         if (tr->trace_buffer.buffer)
758                 ring_buffer_record_off(tr->trace_buffer.buffer);
759         /*
760          * This flag is looked at when buffers haven't been allocated
761          * yet, or by some tracers (like irqsoff), that just want to
762          * know if the ring buffer has been disabled, but it can handle
763          * races of where it gets disabled but we still do a record.
764          * As the check is in the fast path of the tracers, it is more
765          * important to be fast than accurate.
766          */
767         tr->buffer_disabled = 1;
768         /* Make the flag seen by readers */
769         smp_wmb();
770 }
771
772 /**
773  * tracing_off - turn off tracing buffers
774  *
775  * This function stops the tracing buffers from recording data.
776  * It does not disable any overhead the tracers themselves may
777  * be causing. This function simply causes all recording to
778  * the ring buffers to fail.
779  */
780 void tracing_off(void)
781 {
782         tracer_tracing_off(&global_trace);
783 }
784 EXPORT_SYMBOL_GPL(tracing_off);
785
786 void disable_trace_on_warning(void)
787 {
788         if (__disable_trace_on_warning)
789                 tracing_off();
790 }
791
792 /**
793  * tracer_tracing_is_on - show real state of ring buffer enabled
794  * @tr : the trace array to know if ring buffer is enabled
795  *
796  * Shows real state of the ring buffer if it is enabled or not.
797  */
798 static int tracer_tracing_is_on(struct trace_array *tr)
799 {
800         if (tr->trace_buffer.buffer)
801                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
802         return !tr->buffer_disabled;
803 }
804
805 /**
806  * tracing_is_on - show state of ring buffers enabled
807  */
808 int tracing_is_on(void)
809 {
810         return tracer_tracing_is_on(&global_trace);
811 }
812 EXPORT_SYMBOL_GPL(tracing_is_on);
813
814 static int __init set_buf_size(char *str)
815 {
816         unsigned long buf_size;
817
818         if (!str)
819                 return 0;
820         buf_size = memparse(str, &str);
821         /* nr_entries can not be zero */
822         if (buf_size == 0)
823                 return 0;
824         trace_buf_size = buf_size;
825         return 1;
826 }
827 __setup("trace_buf_size=", set_buf_size);
828
829 static int __init set_tracing_thresh(char *str)
830 {
831         unsigned long threshold;
832         int ret;
833
834         if (!str)
835                 return 0;
836         ret = kstrtoul(str, 0, &threshold);
837         if (ret < 0)
838                 return 0;
839         tracing_thresh = threshold * 1000;
840         return 1;
841 }
842 __setup("tracing_thresh=", set_tracing_thresh);
843
844 unsigned long nsecs_to_usecs(unsigned long nsecs)
845 {
846         return nsecs / 1000;
847 }
848
849 /*
850  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
851  * It uses C(a, b) where 'a' is the enum name and 'b' is the string that
852  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
853  * of strings in the order that the enums were defined.
854  */
855 #undef C
856 #define C(a, b) b
857
858 /* These must match the bit postions in trace_iterator_flags */
859 static const char *trace_options[] = {
860         TRACE_FLAGS
861         NULL
862 };
863
864 static struct {
865         u64 (*func)(void);
866         const char *name;
867         int in_ns;              /* is this clock in nanoseconds? */
868 } trace_clocks[] = {
869         { trace_clock_local,            "local",        1 },
870         { trace_clock_global,           "global",       1 },
871         { trace_clock_counter,          "counter",      0 },
872         { trace_clock_jiffies,          "uptime",       0 },
873         { trace_clock,                  "perf",         1 },
874         { ktime_get_mono_fast_ns,       "mono",         1 },
875         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
876         ARCH_TRACE_CLOCKS
877 };
878
879 /*
880  * trace_parser_get_init - gets the buffer for trace parser
881  */
882 int trace_parser_get_init(struct trace_parser *parser, int size)
883 {
884         memset(parser, 0, sizeof(*parser));
885
886         parser->buffer = kmalloc(size, GFP_KERNEL);
887         if (!parser->buffer)
888                 return 1;
889
890         parser->size = size;
891         return 0;
892 }
893
894 /*
895  * trace_parser_put - frees the buffer for trace parser
896  */
897 void trace_parser_put(struct trace_parser *parser)
898 {
899         kfree(parser->buffer);
900 }
901
902 /*
903  * trace_get_user - reads the user input string separated by  space
904  * (matched by isspace(ch))
905  *
906  * For each string found the 'struct trace_parser' is updated,
907  * and the function returns.
908  *
909  * Returns number of bytes read.
910  *
911  * See kernel/trace/trace.h for 'struct trace_parser' details.
912  */
913 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
914         size_t cnt, loff_t *ppos)
915 {
916         char ch;
917         size_t read = 0;
918         ssize_t ret;
919
920         if (!*ppos)
921                 trace_parser_clear(parser);
922
923         ret = get_user(ch, ubuf++);
924         if (ret)
925                 goto out;
926
927         read++;
928         cnt--;
929
930         /*
931          * The parser is not finished with the last write,
932          * continue reading the user input without skipping spaces.
933          */
934         if (!parser->cont) {
935                 /* skip white space */
936                 while (cnt && isspace(ch)) {
937                         ret = get_user(ch, ubuf++);
938                         if (ret)
939                                 goto out;
940                         read++;
941                         cnt--;
942                 }
943
944                 /* only spaces were written */
945                 if (isspace(ch)) {
946                         *ppos += read;
947                         ret = read;
948                         goto out;
949                 }
950
951                 parser->idx = 0;
952         }
953
954         /* read the non-space input */
955         while (cnt && !isspace(ch)) {
956                 if (parser->idx < parser->size - 1)
957                         parser->buffer[parser->idx++] = ch;
958                 else {
959                         ret = -EINVAL;
960                         goto out;
961                 }
962                 ret = get_user(ch, ubuf++);
963                 if (ret)
964                         goto out;
965                 read++;
966                 cnt--;
967         }
968
969         /* We either got finished input or we have to wait for another call. */
970         if (isspace(ch)) {
971                 parser->buffer[parser->idx] = 0;
972                 parser->cont = false;
973         } else if (parser->idx < parser->size - 1) {
974                 parser->cont = true;
975                 parser->buffer[parser->idx++] = ch;
976         } else {
977                 ret = -EINVAL;
978                 goto out;
979         }
980
981         *ppos += read;
982         ret = read;
983
984 out:
985         return ret;
986 }
987
988 /* TODO add a seq_buf_to_buffer() */
989 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
990 {
991         int len;
992
993         if (trace_seq_used(s) <= s->seq.readpos)
994                 return -EBUSY;
995
996         len = trace_seq_used(s) - s->seq.readpos;
997         if (cnt > len)
998                 cnt = len;
999         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1000
1001         s->seq.readpos += cnt;
1002         return cnt;
1003 }
1004
1005 unsigned long __read_mostly     tracing_thresh;
1006
1007 #ifdef CONFIG_TRACER_MAX_TRACE
1008 /*
1009  * Copy the new maximum trace into the separate maximum-trace
1010  * structure. (this way the maximum trace is permanently saved,
1011  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1012  */
1013 static void
1014 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1015 {
1016         struct trace_buffer *trace_buf = &tr->trace_buffer;
1017         struct trace_buffer *max_buf = &tr->max_buffer;
1018         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1019         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1020
1021         max_buf->cpu = cpu;
1022         max_buf->time_start = data->preempt_timestamp;
1023
1024         max_data->saved_latency = tr->max_latency;
1025         max_data->critical_start = data->critical_start;
1026         max_data->critical_end = data->critical_end;
1027
1028         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1029         max_data->pid = tsk->pid;
1030         /*
1031          * If tsk == current, then use current_uid(), as that does not use
1032          * RCU. The irq tracer can be called out of RCU scope.
1033          */
1034         if (tsk == current)
1035                 max_data->uid = current_uid();
1036         else
1037                 max_data->uid = task_uid(tsk);
1038
1039         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1040         max_data->policy = tsk->policy;
1041         max_data->rt_priority = tsk->rt_priority;
1042
1043         /* record this tasks comm */
1044         tracing_record_cmdline(tsk);
1045 }
1046
1047 /**
1048  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1049  * @tr: tracer
1050  * @tsk: the task with the latency
1051  * @cpu: The cpu that initiated the trace.
1052  *
1053  * Flip the buffers between the @tr and the max_tr and record information
1054  * about which task was the cause of this latency.
1055  */
1056 void
1057 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1058 {
1059         struct ring_buffer *buf;
1060
1061         if (tr->stop_count)
1062                 return;
1063
1064         WARN_ON_ONCE(!irqs_disabled());
1065
1066         if (!tr->allocated_snapshot) {
1067                 /* Only the nop tracer should hit this when disabling */
1068                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1069                 return;
1070         }
1071
1072         arch_spin_lock(&tr->max_lock);
1073
1074         buf = tr->trace_buffer.buffer;
1075         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1076         tr->max_buffer.buffer = buf;
1077
1078         __update_max_tr(tr, tsk, cpu);
1079         arch_spin_unlock(&tr->max_lock);
1080 }
1081
1082 /**
1083  * update_max_tr_single - only copy one trace over, and reset the rest
1084  * @tr - tracer
1085  * @tsk - task with the latency
1086  * @cpu - the cpu of the buffer to copy.
1087  *
1088  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1089  */
1090 void
1091 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1092 {
1093         int ret;
1094
1095         if (tr->stop_count)
1096                 return;
1097
1098         WARN_ON_ONCE(!irqs_disabled());
1099         if (!tr->allocated_snapshot) {
1100                 /* Only the nop tracer should hit this when disabling */
1101                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1102                 return;
1103         }
1104
1105         arch_spin_lock(&tr->max_lock);
1106
1107         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1108
1109         if (ret == -EBUSY) {
1110                 /*
1111                  * We failed to swap the buffer due to a commit taking
1112                  * place on this CPU. We fail to record, but we reset
1113                  * the max trace buffer (no one writes directly to it)
1114                  * and flag that it failed.
1115                  */
1116                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1117                         "Failed to swap buffers due to commit in progress\n");
1118         }
1119
1120         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1121
1122         __update_max_tr(tr, tsk, cpu);
1123         arch_spin_unlock(&tr->max_lock);
1124 }
1125 #endif /* CONFIG_TRACER_MAX_TRACE */
1126
1127 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1128 {
1129         /* Iterators are static, they should be filled or empty */
1130         if (trace_buffer_iter(iter, iter->cpu_file))
1131                 return 0;
1132
1133         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1134                                 full);
1135 }
1136
1137 #ifdef CONFIG_FTRACE_STARTUP_TEST
1138 static int run_tracer_selftest(struct tracer *type)
1139 {
1140         struct trace_array *tr = &global_trace;
1141         struct tracer *saved_tracer = tr->current_trace;
1142         int ret;
1143
1144         if (!type->selftest || tracing_selftest_disabled)
1145                 return 0;
1146
1147         /*
1148          * Run a selftest on this tracer.
1149          * Here we reset the trace buffer, and set the current
1150          * tracer to be this tracer. The tracer can then run some
1151          * internal tracing to verify that everything is in order.
1152          * If we fail, we do not register this tracer.
1153          */
1154         tracing_reset_online_cpus(&tr->trace_buffer);
1155
1156         tr->current_trace = type;
1157
1158 #ifdef CONFIG_TRACER_MAX_TRACE
1159         if (type->use_max_tr) {
1160                 /* If we expanded the buffers, make sure the max is expanded too */
1161                 if (ring_buffer_expanded)
1162                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1163                                            RING_BUFFER_ALL_CPUS);
1164                 tr->allocated_snapshot = true;
1165         }
1166 #endif
1167
1168         /* the test is responsible for initializing and enabling */
1169         pr_info("Testing tracer %s: ", type->name);
1170         ret = type->selftest(type, tr);
1171         /* the test is responsible for resetting too */
1172         tr->current_trace = saved_tracer;
1173         if (ret) {
1174                 printk(KERN_CONT "FAILED!\n");
1175                 /* Add the warning after printing 'FAILED' */
1176                 WARN_ON(1);
1177                 return -1;
1178         }
1179         /* Only reset on passing, to avoid touching corrupted buffers */
1180         tracing_reset_online_cpus(&tr->trace_buffer);
1181
1182 #ifdef CONFIG_TRACER_MAX_TRACE
1183         if (type->use_max_tr) {
1184                 tr->allocated_snapshot = false;
1185
1186                 /* Shrink the max buffer again */
1187                 if (ring_buffer_expanded)
1188                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1189                                            RING_BUFFER_ALL_CPUS);
1190         }
1191 #endif
1192
1193         printk(KERN_CONT "PASSED\n");
1194         return 0;
1195 }
1196 #else
1197 static inline int run_tracer_selftest(struct tracer *type)
1198 {
1199         return 0;
1200 }
1201 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1202
1203 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1204
1205 static void __init apply_trace_boot_options(void);
1206
1207 /**
1208  * register_tracer - register a tracer with the ftrace system.
1209  * @type - the plugin for the tracer
1210  *
1211  * Register a new plugin tracer.
1212  */
1213 int __init register_tracer(struct tracer *type)
1214 {
1215         struct tracer *t;
1216         int ret = 0;
1217
1218         if (!type->name) {
1219                 pr_info("Tracer must have a name\n");
1220                 return -1;
1221         }
1222
1223         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1224                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1225                 return -1;
1226         }
1227
1228         mutex_lock(&trace_types_lock);
1229
1230         tracing_selftest_running = true;
1231
1232         for (t = trace_types; t; t = t->next) {
1233                 if (strcmp(type->name, t->name) == 0) {
1234                         /* already found */
1235                         pr_info("Tracer %s already registered\n",
1236                                 type->name);
1237                         ret = -1;
1238                         goto out;
1239                 }
1240         }
1241
1242         if (!type->set_flag)
1243                 type->set_flag = &dummy_set_flag;
1244         if (!type->flags) {
1245                 /*allocate a dummy tracer_flags*/
1246                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1247                 if (!type->flags) {
1248                         ret = -ENOMEM;
1249                         goto out;
1250                 }
1251                 type->flags->val = 0;
1252                 type->flags->opts = dummy_tracer_opt;
1253         } else
1254                 if (!type->flags->opts)
1255                         type->flags->opts = dummy_tracer_opt;
1256
1257         /* store the tracer for __set_tracer_option */
1258         type->flags->trace = type;
1259
1260         ret = run_tracer_selftest(type);
1261         if (ret < 0)
1262                 goto out;
1263
1264         type->next = trace_types;
1265         trace_types = type;
1266         add_tracer_options(&global_trace, type);
1267
1268  out:
1269         tracing_selftest_running = false;
1270         mutex_unlock(&trace_types_lock);
1271
1272         if (ret || !default_bootup_tracer)
1273                 goto out_unlock;
1274
1275         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1276                 goto out_unlock;
1277
1278         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1279         /* Do we want this tracer to start on bootup? */
1280         tracing_set_tracer(&global_trace, type->name);
1281         default_bootup_tracer = NULL;
1282
1283         apply_trace_boot_options();
1284
1285         /* disable other selftests, since this will break it. */
1286         tracing_selftest_disabled = true;
1287 #ifdef CONFIG_FTRACE_STARTUP_TEST
1288         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1289                type->name);
1290 #endif
1291
1292  out_unlock:
1293         return ret;
1294 }
1295
1296 void tracing_reset(struct trace_buffer *buf, int cpu)
1297 {
1298         struct ring_buffer *buffer = buf->buffer;
1299
1300         if (!buffer)
1301                 return;
1302
1303         ring_buffer_record_disable(buffer);
1304
1305         /* Make sure all commits have finished */
1306         synchronize_sched();
1307         ring_buffer_reset_cpu(buffer, cpu);
1308
1309         ring_buffer_record_enable(buffer);
1310 }
1311
1312 void tracing_reset_online_cpus(struct trace_buffer *buf)
1313 {
1314         struct ring_buffer *buffer = buf->buffer;
1315         int cpu;
1316
1317         if (!buffer)
1318                 return;
1319
1320         ring_buffer_record_disable(buffer);
1321
1322         /* Make sure all commits have finished */
1323         synchronize_sched();
1324
1325         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1326
1327         for_each_online_cpu(cpu)
1328                 ring_buffer_reset_cpu(buffer, cpu);
1329
1330         ring_buffer_record_enable(buffer);
1331 }
1332
1333 /* Must have trace_types_lock held */
1334 void tracing_reset_all_online_cpus(void)
1335 {
1336         struct trace_array *tr;
1337
1338         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1339                 tracing_reset_online_cpus(&tr->trace_buffer);
1340 #ifdef CONFIG_TRACER_MAX_TRACE
1341                 tracing_reset_online_cpus(&tr->max_buffer);
1342 #endif
1343         }
1344 }
1345
1346 #define SAVED_CMDLINES_DEFAULT 128
1347 #define NO_CMDLINE_MAP UINT_MAX
1348 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1349 struct saved_cmdlines_buffer {
1350         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1351         unsigned *map_cmdline_to_pid;
1352         unsigned cmdline_num;
1353         int cmdline_idx;
1354         char *saved_cmdlines;
1355 };
1356 static struct saved_cmdlines_buffer *savedcmd;
1357
1358 /* temporary disable recording */
1359 static atomic_t trace_record_cmdline_disabled __read_mostly;
1360
1361 static inline char *get_saved_cmdlines(int idx)
1362 {
1363         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1364 }
1365
1366 static inline void set_cmdline(int idx, const char *cmdline)
1367 {
1368         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1369 }
1370
1371 static int allocate_cmdlines_buffer(unsigned int val,
1372                                     struct saved_cmdlines_buffer *s)
1373 {
1374         s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1375                                         GFP_KERNEL);
1376         if (!s->map_cmdline_to_pid)
1377                 return -ENOMEM;
1378
1379         s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1380         if (!s->saved_cmdlines) {
1381                 kfree(s->map_cmdline_to_pid);
1382                 return -ENOMEM;
1383         }
1384
1385         s->cmdline_idx = 0;
1386         s->cmdline_num = val;
1387         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1388                sizeof(s->map_pid_to_cmdline));
1389         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1390                val * sizeof(*s->map_cmdline_to_pid));
1391
1392         return 0;
1393 }
1394
1395 static int trace_create_savedcmd(void)
1396 {
1397         int ret;
1398
1399         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1400         if (!savedcmd)
1401                 return -ENOMEM;
1402
1403         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1404         if (ret < 0) {
1405                 kfree(savedcmd);
1406                 savedcmd = NULL;
1407                 return -ENOMEM;
1408         }
1409
1410         return 0;
1411 }
1412
1413 int is_tracing_stopped(void)
1414 {
1415         return global_trace.stop_count;
1416 }
1417
1418 /**
1419  * tracing_start - quick start of the tracer
1420  *
1421  * If tracing is enabled but was stopped by tracing_stop,
1422  * this will start the tracer back up.
1423  */
1424 void tracing_start(void)
1425 {
1426         struct ring_buffer *buffer;
1427         unsigned long flags;
1428
1429         if (tracing_disabled)
1430                 return;
1431
1432         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1433         if (--global_trace.stop_count) {
1434                 if (global_trace.stop_count < 0) {
1435                         /* Someone screwed up their debugging */
1436                         WARN_ON_ONCE(1);
1437                         global_trace.stop_count = 0;
1438                 }
1439                 goto out;
1440         }
1441
1442         /* Prevent the buffers from switching */
1443         arch_spin_lock(&global_trace.max_lock);
1444
1445         buffer = global_trace.trace_buffer.buffer;
1446         if (buffer)
1447                 ring_buffer_record_enable(buffer);
1448
1449 #ifdef CONFIG_TRACER_MAX_TRACE
1450         buffer = global_trace.max_buffer.buffer;
1451         if (buffer)
1452                 ring_buffer_record_enable(buffer);
1453 #endif
1454
1455         arch_spin_unlock(&global_trace.max_lock);
1456
1457  out:
1458         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1459 }
1460
1461 static void tracing_start_tr(struct trace_array *tr)
1462 {
1463         struct ring_buffer *buffer;
1464         unsigned long flags;
1465
1466         if (tracing_disabled)
1467                 return;
1468
1469         /* If global, we need to also start the max tracer */
1470         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1471                 return tracing_start();
1472
1473         raw_spin_lock_irqsave(&tr->start_lock, flags);
1474
1475         if (--tr->stop_count) {
1476                 if (tr->stop_count < 0) {
1477                         /* Someone screwed up their debugging */
1478                         WARN_ON_ONCE(1);
1479                         tr->stop_count = 0;
1480                 }
1481                 goto out;
1482         }
1483
1484         buffer = tr->trace_buffer.buffer;
1485         if (buffer)
1486                 ring_buffer_record_enable(buffer);
1487
1488  out:
1489         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1490 }
1491
1492 /**
1493  * tracing_stop - quick stop of the tracer
1494  *
1495  * Light weight way to stop tracing. Use in conjunction with
1496  * tracing_start.
1497  */
1498 void tracing_stop(void)
1499 {
1500         struct ring_buffer *buffer;
1501         unsigned long flags;
1502
1503         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1504         if (global_trace.stop_count++)
1505                 goto out;
1506
1507         /* Prevent the buffers from switching */
1508         arch_spin_lock(&global_trace.max_lock);
1509
1510         buffer = global_trace.trace_buffer.buffer;
1511         if (buffer)
1512                 ring_buffer_record_disable(buffer);
1513
1514 #ifdef CONFIG_TRACER_MAX_TRACE
1515         buffer = global_trace.max_buffer.buffer;
1516         if (buffer)
1517                 ring_buffer_record_disable(buffer);
1518 #endif
1519
1520         arch_spin_unlock(&global_trace.max_lock);
1521
1522  out:
1523         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1524 }
1525
1526 static void tracing_stop_tr(struct trace_array *tr)
1527 {
1528         struct ring_buffer *buffer;
1529         unsigned long flags;
1530
1531         /* If global, we need to also stop the max tracer */
1532         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1533                 return tracing_stop();
1534
1535         raw_spin_lock_irqsave(&tr->start_lock, flags);
1536         if (tr->stop_count++)
1537                 goto out;
1538
1539         buffer = tr->trace_buffer.buffer;
1540         if (buffer)
1541                 ring_buffer_record_disable(buffer);
1542
1543  out:
1544         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1545 }
1546
1547 void trace_stop_cmdline_recording(void);
1548
1549 static int trace_save_cmdline(struct task_struct *tsk)
1550 {
1551         unsigned pid, idx;
1552
1553         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1554                 return 0;
1555
1556         /*
1557          * It's not the end of the world if we don't get
1558          * the lock, but we also don't want to spin
1559          * nor do we want to disable interrupts,
1560          * so if we miss here, then better luck next time.
1561          */
1562         if (!arch_spin_trylock(&trace_cmdline_lock))
1563                 return 0;
1564
1565         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1566         if (idx == NO_CMDLINE_MAP) {
1567                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1568
1569                 /*
1570                  * Check whether the cmdline buffer at idx has a pid
1571                  * mapped. We are going to overwrite that entry so we
1572                  * need to clear the map_pid_to_cmdline. Otherwise we
1573                  * would read the new comm for the old pid.
1574                  */
1575                 pid = savedcmd->map_cmdline_to_pid[idx];
1576                 if (pid != NO_CMDLINE_MAP)
1577                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1578
1579                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1580                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1581
1582                 savedcmd->cmdline_idx = idx;
1583         }
1584
1585         set_cmdline(idx, tsk->comm);
1586
1587         arch_spin_unlock(&trace_cmdline_lock);
1588
1589         return 1;
1590 }
1591
1592 static void __trace_find_cmdline(int pid, char comm[])
1593 {
1594         unsigned map;
1595
1596         if (!pid) {
1597                 strcpy(comm, "<idle>");
1598                 return;
1599         }
1600
1601         if (WARN_ON_ONCE(pid < 0)) {
1602                 strcpy(comm, "<XXX>");
1603                 return;
1604         }
1605
1606         if (pid > PID_MAX_DEFAULT) {
1607                 strcpy(comm, "<...>");
1608                 return;
1609         }
1610
1611         map = savedcmd->map_pid_to_cmdline[pid];
1612         if (map != NO_CMDLINE_MAP)
1613                 strcpy(comm, get_saved_cmdlines(map));
1614         else
1615                 strcpy(comm, "<...>");
1616 }
1617
1618 void trace_find_cmdline(int pid, char comm[])
1619 {
1620         preempt_disable();
1621         arch_spin_lock(&trace_cmdline_lock);
1622
1623         __trace_find_cmdline(pid, comm);
1624
1625         arch_spin_unlock(&trace_cmdline_lock);
1626         preempt_enable();
1627 }
1628
1629 void tracing_record_cmdline(struct task_struct *tsk)
1630 {
1631         if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1632                 return;
1633
1634         if (!__this_cpu_read(trace_cmdline_save))
1635                 return;
1636
1637         if (trace_save_cmdline(tsk))
1638                 __this_cpu_write(trace_cmdline_save, false);
1639 }
1640
1641 void
1642 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1643                              int pc)
1644 {
1645         struct task_struct *tsk = current;
1646
1647         entry->preempt_count            = pc & 0xff;
1648         entry->pid                      = (tsk) ? tsk->pid : 0;
1649         entry->flags =
1650 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1651                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1652 #else
1653                 TRACE_FLAG_IRQS_NOSUPPORT |
1654 #endif
1655                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
1656                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1657                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1658                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1659                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1660 }
1661 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1662
1663 static __always_inline void
1664 trace_event_setup(struct ring_buffer_event *event,
1665                   int type, unsigned long flags, int pc)
1666 {
1667         struct trace_entry *ent = ring_buffer_event_data(event);
1668
1669         tracing_generic_entry_update(ent, flags, pc);
1670         ent->type = type;
1671 }
1672
1673 struct ring_buffer_event *
1674 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1675                           int type,
1676                           unsigned long len,
1677                           unsigned long flags, int pc)
1678 {
1679         struct ring_buffer_event *event;
1680
1681         event = ring_buffer_lock_reserve(buffer, len);
1682         if (event != NULL)
1683                 trace_event_setup(event, type, flags, pc);
1684
1685         return event;
1686 }
1687
1688 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
1689 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
1690 static int trace_buffered_event_ref;
1691
1692 /**
1693  * trace_buffered_event_enable - enable buffering events
1694  *
1695  * When events are being filtered, it is quicker to use a temporary
1696  * buffer to write the event data into if there's a likely chance
1697  * that it will not be committed. The discard of the ring buffer
1698  * is not as fast as committing, and is much slower than copying
1699  * a commit.
1700  *
1701  * When an event is to be filtered, allocate per cpu buffers to
1702  * write the event data into, and if the event is filtered and discarded
1703  * it is simply dropped, otherwise, the entire data is to be committed
1704  * in one shot.
1705  */
1706 void trace_buffered_event_enable(void)
1707 {
1708         struct ring_buffer_event *event;
1709         struct page *page;
1710         int cpu;
1711
1712         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
1713
1714         if (trace_buffered_event_ref++)
1715                 return;
1716
1717         for_each_tracing_cpu(cpu) {
1718                 page = alloc_pages_node(cpu_to_node(cpu),
1719                                         GFP_KERNEL | __GFP_NORETRY, 0);
1720                 if (!page)
1721                         goto failed;
1722
1723                 event = page_address(page);
1724                 memset(event, 0, sizeof(*event));
1725
1726                 per_cpu(trace_buffered_event, cpu) = event;
1727
1728                 preempt_disable();
1729                 if (cpu == smp_processor_id() &&
1730                     this_cpu_read(trace_buffered_event) !=
1731                     per_cpu(trace_buffered_event, cpu))
1732                         WARN_ON_ONCE(1);
1733                 preempt_enable();
1734         }
1735
1736         return;
1737  failed:
1738         trace_buffered_event_disable();
1739 }
1740
1741 static void enable_trace_buffered_event(void *data)
1742 {
1743         /* Probably not needed, but do it anyway */
1744         smp_rmb();
1745         this_cpu_dec(trace_buffered_event_cnt);
1746 }
1747
1748 static void disable_trace_buffered_event(void *data)
1749 {
1750         this_cpu_inc(trace_buffered_event_cnt);
1751 }
1752
1753 /**
1754  * trace_buffered_event_disable - disable buffering events
1755  *
1756  * When a filter is removed, it is faster to not use the buffered
1757  * events, and to commit directly into the ring buffer. Free up
1758  * the temp buffers when there are no more users. This requires
1759  * special synchronization with current events.
1760  */
1761 void trace_buffered_event_disable(void)
1762 {
1763         int cpu;
1764
1765         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
1766
1767         if (WARN_ON_ONCE(!trace_buffered_event_ref))
1768                 return;
1769
1770         if (--trace_buffered_event_ref)
1771                 return;
1772
1773         preempt_disable();
1774         /* For each CPU, set the buffer as used. */
1775         smp_call_function_many(tracing_buffer_mask,
1776                                disable_trace_buffered_event, NULL, 1);
1777         preempt_enable();
1778
1779         /* Wait for all current users to finish */
1780         synchronize_sched();
1781
1782         for_each_tracing_cpu(cpu) {
1783                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
1784                 per_cpu(trace_buffered_event, cpu) = NULL;
1785         }
1786         /*
1787          * Make sure trace_buffered_event is NULL before clearing
1788          * trace_buffered_event_cnt.
1789          */
1790         smp_wmb();
1791
1792         preempt_disable();
1793         /* Do the work on each cpu */
1794         smp_call_function_many(tracing_buffer_mask,
1795                                enable_trace_buffered_event, NULL, 1);
1796         preempt_enable();
1797 }
1798
1799 void
1800 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
1801 {
1802         __this_cpu_write(trace_cmdline_save, true);
1803
1804         /* If this is the temp buffer, we need to commit fully */
1805         if (this_cpu_read(trace_buffered_event) == event) {
1806                 /* Length is in event->array[0] */
1807                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
1808                 /* Release the temp buffer */
1809                 this_cpu_dec(trace_buffered_event_cnt);
1810         } else
1811                 ring_buffer_unlock_commit(buffer, event);
1812 }
1813
1814 static struct ring_buffer *temp_buffer;
1815
1816 struct ring_buffer_event *
1817 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
1818                           struct trace_event_file *trace_file,
1819                           int type, unsigned long len,
1820                           unsigned long flags, int pc)
1821 {
1822         struct ring_buffer_event *entry;
1823         int val;
1824
1825         *current_rb = trace_file->tr->trace_buffer.buffer;
1826
1827         if ((trace_file->flags &
1828              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
1829             (entry = this_cpu_read(trace_buffered_event))) {
1830                 /* Try to use the per cpu buffer first */
1831                 val = this_cpu_inc_return(trace_buffered_event_cnt);
1832                 if (val == 1) {
1833                         trace_event_setup(entry, type, flags, pc);
1834                         entry->array[0] = len;
1835                         return entry;
1836                 }
1837                 this_cpu_dec(trace_buffered_event_cnt);
1838         }
1839
1840         entry = trace_buffer_lock_reserve(*current_rb,
1841                                          type, len, flags, pc);
1842         /*
1843          * If tracing is off, but we have triggers enabled
1844          * we still need to look at the event data. Use the temp_buffer
1845          * to store the trace event for the tigger to use. It's recusive
1846          * safe and will not be recorded anywhere.
1847          */
1848         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
1849                 *current_rb = temp_buffer;
1850                 entry = trace_buffer_lock_reserve(*current_rb,
1851                                                   type, len, flags, pc);
1852         }
1853         return entry;
1854 }
1855 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
1856
1857 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
1858                                      struct ring_buffer *buffer,
1859                                      struct ring_buffer_event *event,
1860                                      unsigned long flags, int pc,
1861                                      struct pt_regs *regs)
1862 {
1863         __buffer_unlock_commit(buffer, event);
1864
1865         ftrace_trace_stack(tr, buffer, flags, 0, pc, regs);
1866         ftrace_trace_userstack(buffer, flags, pc);
1867 }
1868
1869 void
1870 trace_function(struct trace_array *tr,
1871                unsigned long ip, unsigned long parent_ip, unsigned long flags,
1872                int pc)
1873 {
1874         struct trace_event_call *call = &event_function;
1875         struct ring_buffer *buffer = tr->trace_buffer.buffer;
1876         struct ring_buffer_event *event;
1877         struct ftrace_entry *entry;
1878
1879         event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
1880                                           flags, pc);
1881         if (!event)
1882                 return;
1883         entry   = ring_buffer_event_data(event);
1884         entry->ip                       = ip;
1885         entry->parent_ip                = parent_ip;
1886
1887         if (!call_filter_check_discard(call, entry, buffer, event))
1888                 __buffer_unlock_commit(buffer, event);
1889 }
1890
1891 #ifdef CONFIG_STACKTRACE
1892
1893 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
1894 struct ftrace_stack {
1895         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
1896 };
1897
1898 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
1899 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
1900
1901 static void __ftrace_trace_stack(struct ring_buffer *buffer,
1902                                  unsigned long flags,
1903                                  int skip, int pc, struct pt_regs *regs)
1904 {
1905         struct trace_event_call *call = &event_kernel_stack;
1906         struct ring_buffer_event *event;
1907         struct stack_entry *entry;
1908         struct stack_trace trace;
1909         int use_stack;
1910         int size = FTRACE_STACK_ENTRIES;
1911
1912         trace.nr_entries        = 0;
1913         trace.skip              = skip;
1914
1915         /*
1916          * Since events can happen in NMIs there's no safe way to
1917          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
1918          * or NMI comes in, it will just have to use the default
1919          * FTRACE_STACK_SIZE.
1920          */
1921         preempt_disable_notrace();
1922
1923         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
1924         /*
1925          * We don't need any atomic variables, just a barrier.
1926          * If an interrupt comes in, we don't care, because it would
1927          * have exited and put the counter back to what we want.
1928          * We just need a barrier to keep gcc from moving things
1929          * around.
1930          */
1931         barrier();
1932         if (use_stack == 1) {
1933                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
1934                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
1935
1936                 if (regs)
1937                         save_stack_trace_regs(regs, &trace);
1938                 else
1939                         save_stack_trace(&trace);
1940
1941                 if (trace.nr_entries > size)
1942                         size = trace.nr_entries;
1943         } else
1944                 /* From now on, use_stack is a boolean */
1945                 use_stack = 0;
1946
1947         size *= sizeof(unsigned long);
1948
1949         event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1950                                           sizeof(*entry) + size, flags, pc);
1951         if (!event)
1952                 goto out;
1953         entry = ring_buffer_event_data(event);
1954
1955         memset(&entry->caller, 0, size);
1956
1957         if (use_stack)
1958                 memcpy(&entry->caller, trace.entries,
1959                        trace.nr_entries * sizeof(unsigned long));
1960         else {
1961                 trace.max_entries       = FTRACE_STACK_ENTRIES;
1962                 trace.entries           = entry->caller;
1963                 if (regs)
1964                         save_stack_trace_regs(regs, &trace);
1965                 else
1966                         save_stack_trace(&trace);
1967         }
1968
1969         entry->size = trace.nr_entries;
1970
1971         if (!call_filter_check_discard(call, entry, buffer, event))
1972                 __buffer_unlock_commit(buffer, event);
1973
1974  out:
1975         /* Again, don't let gcc optimize things here */
1976         barrier();
1977         __this_cpu_dec(ftrace_stack_reserve);
1978         preempt_enable_notrace();
1979
1980 }
1981
1982 static inline void ftrace_trace_stack(struct trace_array *tr,
1983                                       struct ring_buffer *buffer,
1984                                       unsigned long flags,
1985                                       int skip, int pc, struct pt_regs *regs)
1986 {
1987         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
1988                 return;
1989
1990         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
1991 }
1992
1993 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1994                    int pc)
1995 {
1996         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
1997 }
1998
1999 /**
2000  * trace_dump_stack - record a stack back trace in the trace buffer
2001  * @skip: Number of functions to skip (helper handlers)
2002  */
2003 void trace_dump_stack(int skip)
2004 {
2005         unsigned long flags;
2006
2007         if (tracing_disabled || tracing_selftest_running)
2008                 return;
2009
2010         local_save_flags(flags);
2011
2012         /*
2013          * Skip 3 more, seems to get us at the caller of
2014          * this function.
2015          */
2016         skip += 3;
2017         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2018                              flags, skip, preempt_count(), NULL);
2019 }
2020
2021 static DEFINE_PER_CPU(int, user_stack_count);
2022
2023 void
2024 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2025 {
2026         struct trace_event_call *call = &event_user_stack;
2027         struct ring_buffer_event *event;
2028         struct userstack_entry *entry;
2029         struct stack_trace trace;
2030
2031         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2032                 return;
2033
2034         /*
2035          * NMIs can not handle page faults, even with fix ups.
2036          * The save user stack can (and often does) fault.
2037          */
2038         if (unlikely(in_nmi()))
2039                 return;
2040
2041         /*
2042          * prevent recursion, since the user stack tracing may
2043          * trigger other kernel events.
2044          */
2045         preempt_disable();
2046         if (__this_cpu_read(user_stack_count))
2047                 goto out;
2048
2049         __this_cpu_inc(user_stack_count);
2050
2051         event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2052                                           sizeof(*entry), flags, pc);
2053         if (!event)
2054                 goto out_drop_count;
2055         entry   = ring_buffer_event_data(event);
2056
2057         entry->tgid             = current->tgid;
2058         memset(&entry->caller, 0, sizeof(entry->caller));
2059
2060         trace.nr_entries        = 0;
2061         trace.max_entries       = FTRACE_STACK_ENTRIES;
2062         trace.skip              = 0;
2063         trace.entries           = entry->caller;
2064
2065         save_stack_trace_user(&trace);
2066         if (!call_filter_check_discard(call, entry, buffer, event))
2067                 __buffer_unlock_commit(buffer, event);
2068
2069  out_drop_count:
2070         __this_cpu_dec(user_stack_count);
2071  out:
2072         preempt_enable();
2073 }
2074
2075 #ifdef UNUSED
2076 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2077 {
2078         ftrace_trace_userstack(tr, flags, preempt_count());
2079 }
2080 #endif /* UNUSED */
2081
2082 #endif /* CONFIG_STACKTRACE */
2083
2084 /* created for use with alloc_percpu */
2085 struct trace_buffer_struct {
2086         char buffer[TRACE_BUF_SIZE];
2087 };
2088
2089 static struct trace_buffer_struct *trace_percpu_buffer;
2090 static struct trace_buffer_struct *trace_percpu_sirq_buffer;
2091 static struct trace_buffer_struct *trace_percpu_irq_buffer;
2092 static struct trace_buffer_struct *trace_percpu_nmi_buffer;
2093
2094 /*
2095  * The buffer used is dependent on the context. There is a per cpu
2096  * buffer for normal context, softirq contex, hard irq context and
2097  * for NMI context. Thise allows for lockless recording.
2098  *
2099  * Note, if the buffers failed to be allocated, then this returns NULL
2100  */
2101 static char *get_trace_buf(void)
2102 {
2103         struct trace_buffer_struct *percpu_buffer;
2104
2105         /*
2106          * If we have allocated per cpu buffers, then we do not
2107          * need to do any locking.
2108          */
2109         if (in_nmi())
2110                 percpu_buffer = trace_percpu_nmi_buffer;
2111         else if (in_irq())
2112                 percpu_buffer = trace_percpu_irq_buffer;
2113         else if (in_softirq())
2114                 percpu_buffer = trace_percpu_sirq_buffer;
2115         else
2116                 percpu_buffer = trace_percpu_buffer;
2117
2118         if (!percpu_buffer)
2119                 return NULL;
2120
2121         return this_cpu_ptr(&percpu_buffer->buffer[0]);
2122 }
2123
2124 static int alloc_percpu_trace_buffer(void)
2125 {
2126         struct trace_buffer_struct *buffers;
2127         struct trace_buffer_struct *sirq_buffers;
2128         struct trace_buffer_struct *irq_buffers;
2129         struct trace_buffer_struct *nmi_buffers;
2130
2131         buffers = alloc_percpu(struct trace_buffer_struct);
2132         if (!buffers)
2133                 goto err_warn;
2134
2135         sirq_buffers = alloc_percpu(struct trace_buffer_struct);
2136         if (!sirq_buffers)
2137                 goto err_sirq;
2138
2139         irq_buffers = alloc_percpu(struct trace_buffer_struct);
2140         if (!irq_buffers)
2141                 goto err_irq;
2142
2143         nmi_buffers = alloc_percpu(struct trace_buffer_struct);
2144         if (!nmi_buffers)
2145                 goto err_nmi;
2146
2147         trace_percpu_buffer = buffers;
2148         trace_percpu_sirq_buffer = sirq_buffers;
2149         trace_percpu_irq_buffer = irq_buffers;
2150         trace_percpu_nmi_buffer = nmi_buffers;
2151
2152         return 0;
2153
2154  err_nmi:
2155         free_percpu(irq_buffers);
2156  err_irq:
2157         free_percpu(sirq_buffers);
2158  err_sirq:
2159         free_percpu(buffers);
2160  err_warn:
2161         WARN(1, "Could not allocate percpu trace_printk buffer");
2162         return -ENOMEM;
2163 }
2164
2165 static int buffers_allocated;
2166
2167 void trace_printk_init_buffers(void)
2168 {
2169         if (buffers_allocated)
2170                 return;
2171
2172         if (alloc_percpu_trace_buffer())
2173                 return;
2174
2175         /* trace_printk() is for debug use only. Don't use it in production. */
2176
2177         pr_warn("\n");
2178         pr_warn("**********************************************************\n");
2179         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2180         pr_warn("**                                                      **\n");
2181         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2182         pr_warn("**                                                      **\n");
2183         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2184         pr_warn("** unsafe for production use.                           **\n");
2185         pr_warn("**                                                      **\n");
2186         pr_warn("** If you see this message and you are not debugging    **\n");
2187         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2188         pr_warn("**                                                      **\n");
2189         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2190         pr_warn("**********************************************************\n");
2191
2192         /* Expand the buffers to set size */
2193         tracing_update_buffers();
2194
2195         buffers_allocated = 1;
2196
2197         /*
2198          * trace_printk_init_buffers() can be called by modules.
2199          * If that happens, then we need to start cmdline recording
2200          * directly here. If the global_trace.buffer is already
2201          * allocated here, then this was called by module code.
2202          */
2203         if (global_trace.trace_buffer.buffer)
2204                 tracing_start_cmdline_record();
2205 }
2206
2207 void trace_printk_start_comm(void)
2208 {
2209         /* Start tracing comms if trace printk is set */
2210         if (!buffers_allocated)
2211                 return;
2212         tracing_start_cmdline_record();
2213 }
2214
2215 static void trace_printk_start_stop_comm(int enabled)
2216 {
2217         if (!buffers_allocated)
2218                 return;
2219
2220         if (enabled)
2221                 tracing_start_cmdline_record();
2222         else
2223                 tracing_stop_cmdline_record();
2224 }
2225
2226 /**
2227  * trace_vbprintk - write binary msg to tracing buffer
2228  *
2229  */
2230 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2231 {
2232         struct trace_event_call *call = &event_bprint;
2233         struct ring_buffer_event *event;
2234         struct ring_buffer *buffer;
2235         struct trace_array *tr = &global_trace;
2236         struct bprint_entry *entry;
2237         unsigned long flags;
2238         char *tbuffer;
2239         int len = 0, size, pc;
2240
2241         if (unlikely(tracing_selftest_running || tracing_disabled))
2242                 return 0;
2243
2244         /* Don't pollute graph traces with trace_vprintk internals */
2245         pause_graph_tracing();
2246
2247         pc = preempt_count();
2248         preempt_disable_notrace();
2249
2250         tbuffer = get_trace_buf();
2251         if (!tbuffer) {
2252                 len = 0;
2253                 goto out;
2254         }
2255
2256         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2257
2258         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2259                 goto out;
2260
2261         local_save_flags(flags);
2262         size = sizeof(*entry) + sizeof(u32) * len;
2263         buffer = tr->trace_buffer.buffer;
2264         event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2265                                           flags, pc);
2266         if (!event)
2267                 goto out;
2268         entry = ring_buffer_event_data(event);
2269         entry->ip                       = ip;
2270         entry->fmt                      = fmt;
2271
2272         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2273         if (!call_filter_check_discard(call, entry, buffer, event)) {
2274                 __buffer_unlock_commit(buffer, event);
2275                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2276         }
2277
2278 out:
2279         preempt_enable_notrace();
2280         unpause_graph_tracing();
2281
2282         return len;
2283 }
2284 EXPORT_SYMBOL_GPL(trace_vbprintk);
2285
2286 static int
2287 __trace_array_vprintk(struct ring_buffer *buffer,
2288                       unsigned long ip, const char *fmt, va_list args)
2289 {
2290         struct trace_event_call *call = &event_print;
2291         struct ring_buffer_event *event;
2292         int len = 0, size, pc;
2293         struct print_entry *entry;
2294         unsigned long flags;
2295         char *tbuffer;
2296
2297         if (tracing_disabled || tracing_selftest_running)
2298                 return 0;
2299
2300         /* Don't pollute graph traces with trace_vprintk internals */
2301         pause_graph_tracing();
2302
2303         pc = preempt_count();
2304         preempt_disable_notrace();
2305
2306
2307         tbuffer = get_trace_buf();
2308         if (!tbuffer) {
2309                 len = 0;
2310                 goto out;
2311         }
2312
2313         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2314
2315         local_save_flags(flags);
2316         size = sizeof(*entry) + len + 1;
2317         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2318                                           flags, pc);
2319         if (!event)
2320                 goto out;
2321         entry = ring_buffer_event_data(event);
2322         entry->ip = ip;
2323
2324         memcpy(&entry->buf, tbuffer, len + 1);
2325         if (!call_filter_check_discard(call, entry, buffer, event)) {
2326                 __buffer_unlock_commit(buffer, event);
2327                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2328         }
2329  out:
2330         preempt_enable_notrace();
2331         unpause_graph_tracing();
2332
2333         return len;
2334 }
2335
2336 int trace_array_vprintk(struct trace_array *tr,
2337                         unsigned long ip, const char *fmt, va_list args)
2338 {
2339         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2340 }
2341
2342 int trace_array_printk(struct trace_array *tr,
2343                        unsigned long ip, const char *fmt, ...)
2344 {
2345         int ret;
2346         va_list ap;
2347
2348         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2349                 return 0;
2350
2351         va_start(ap, fmt);
2352         ret = trace_array_vprintk(tr, ip, fmt, ap);
2353         va_end(ap);
2354         return ret;
2355 }
2356
2357 int trace_array_printk_buf(struct ring_buffer *buffer,
2358                            unsigned long ip, const char *fmt, ...)
2359 {
2360         int ret;
2361         va_list ap;
2362
2363         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2364                 return 0;
2365
2366         va_start(ap, fmt);
2367         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2368         va_end(ap);
2369         return ret;
2370 }
2371
2372 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2373 {
2374         return trace_array_vprintk(&global_trace, ip, fmt, args);
2375 }
2376 EXPORT_SYMBOL_GPL(trace_vprintk);
2377
2378 static void trace_iterator_increment(struct trace_iterator *iter)
2379 {
2380         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2381
2382         iter->idx++;
2383         if (buf_iter)
2384                 ring_buffer_read(buf_iter, NULL);
2385 }
2386
2387 static struct trace_entry *
2388 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2389                 unsigned long *lost_events)
2390 {
2391         struct ring_buffer_event *event;
2392         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2393
2394         if (buf_iter)
2395                 event = ring_buffer_iter_peek(buf_iter, ts);
2396         else
2397                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2398                                          lost_events);
2399
2400         if (event) {
2401                 iter->ent_size = ring_buffer_event_length(event);
2402                 return ring_buffer_event_data(event);
2403         }
2404         iter->ent_size = 0;
2405         return NULL;
2406 }
2407
2408 static struct trace_entry *
2409 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2410                   unsigned long *missing_events, u64 *ent_ts)
2411 {
2412         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2413         struct trace_entry *ent, *next = NULL;
2414         unsigned long lost_events = 0, next_lost = 0;
2415         int cpu_file = iter->cpu_file;
2416         u64 next_ts = 0, ts;
2417         int next_cpu = -1;
2418         int next_size = 0;
2419         int cpu;
2420
2421         /*
2422          * If we are in a per_cpu trace file, don't bother by iterating over
2423          * all cpu and peek directly.
2424          */
2425         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2426                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2427                         return NULL;
2428                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2429                 if (ent_cpu)
2430                         *ent_cpu = cpu_file;
2431
2432                 return ent;
2433         }
2434
2435         for_each_tracing_cpu(cpu) {
2436
2437                 if (ring_buffer_empty_cpu(buffer, cpu))
2438                         continue;
2439
2440                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2441
2442                 /*
2443                  * Pick the entry with the smallest timestamp:
2444                  */
2445                 if (ent && (!next || ts < next_ts)) {
2446                         next = ent;
2447                         next_cpu = cpu;
2448                         next_ts = ts;
2449                         next_lost = lost_events;
2450                         next_size = iter->ent_size;
2451                 }
2452         }
2453
2454         iter->ent_size = next_size;
2455
2456         if (ent_cpu)
2457                 *ent_cpu = next_cpu;
2458
2459         if (ent_ts)
2460                 *ent_ts = next_ts;
2461
2462         if (missing_events)
2463                 *missing_events = next_lost;
2464
2465         return next;
2466 }
2467
2468 /* Find the next real entry, without updating the iterator itself */
2469 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2470                                           int *ent_cpu, u64 *ent_ts)
2471 {
2472         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2473 }
2474
2475 /* Find the next real entry, and increment the iterator to the next entry */
2476 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2477 {
2478         iter->ent = __find_next_entry(iter, &iter->cpu,
2479                                       &iter->lost_events, &iter->ts);
2480
2481         if (iter->ent)
2482                 trace_iterator_increment(iter);
2483
2484         return iter->ent ? iter : NULL;
2485 }
2486
2487 static void trace_consume(struct trace_iterator *iter)
2488 {
2489         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2490                             &iter->lost_events);
2491 }
2492
2493 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2494 {
2495         struct trace_iterator *iter = m->private;
2496         int i = (int)*pos;
2497         void *ent;
2498
2499         WARN_ON_ONCE(iter->leftover);
2500
2501         (*pos)++;
2502
2503         /* can't go backwards */
2504         if (iter->idx > i)
2505                 return NULL;
2506
2507         if (iter->idx < 0)
2508                 ent = trace_find_next_entry_inc(iter);
2509         else
2510                 ent = iter;
2511
2512         while (ent && iter->idx < i)
2513                 ent = trace_find_next_entry_inc(iter);
2514
2515         iter->pos = *pos;
2516
2517         return ent;
2518 }
2519
2520 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2521 {
2522         struct ring_buffer_event *event;
2523         struct ring_buffer_iter *buf_iter;
2524         unsigned long entries = 0;
2525         u64 ts;
2526
2527         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2528
2529         buf_iter = trace_buffer_iter(iter, cpu);
2530         if (!buf_iter)
2531                 return;
2532
2533         ring_buffer_iter_reset(buf_iter);
2534
2535         /*
2536          * We could have the case with the max latency tracers
2537          * that a reset never took place on a cpu. This is evident
2538          * by the timestamp being before the start of the buffer.
2539          */
2540         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2541                 if (ts >= iter->trace_buffer->time_start)
2542                         break;
2543                 entries++;
2544                 ring_buffer_read(buf_iter, NULL);
2545         }
2546
2547         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2548 }
2549
2550 /*
2551  * The current tracer is copied to avoid a global locking
2552  * all around.
2553  */
2554 static void *s_start(struct seq_file *m, loff_t *pos)
2555 {
2556         struct trace_iterator *iter = m->private;
2557         struct trace_array *tr = iter->tr;
2558         int cpu_file = iter->cpu_file;
2559         void *p = NULL;
2560         loff_t l = 0;
2561         int cpu;
2562
2563         /*
2564          * copy the tracer to avoid using a global lock all around.
2565          * iter->trace is a copy of current_trace, the pointer to the
2566          * name may be used instead of a strcmp(), as iter->trace->name
2567          * will point to the same string as current_trace->name.
2568          */
2569         mutex_lock(&trace_types_lock);
2570         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2571                 *iter->trace = *tr->current_trace;
2572         mutex_unlock(&trace_types_lock);
2573
2574 #ifdef CONFIG_TRACER_MAX_TRACE
2575         if (iter->snapshot && iter->trace->use_max_tr)
2576                 return ERR_PTR(-EBUSY);
2577 #endif
2578
2579         if (!iter->snapshot)
2580                 atomic_inc(&trace_record_cmdline_disabled);
2581
2582         if (*pos != iter->pos) {
2583                 iter->ent = NULL;
2584                 iter->cpu = 0;
2585                 iter->idx = -1;
2586
2587                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2588                         for_each_tracing_cpu(cpu)
2589                                 tracing_iter_reset(iter, cpu);
2590                 } else
2591                         tracing_iter_reset(iter, cpu_file);
2592
2593                 iter->leftover = 0;
2594                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2595                         ;
2596
2597         } else {
2598                 /*
2599                  * If we overflowed the seq_file before, then we want
2600                  * to just reuse the trace_seq buffer again.
2601                  */
2602                 if (iter->leftover)
2603                         p = iter;
2604                 else {
2605                         l = *pos - 1;
2606                         p = s_next(m, p, &l);
2607                 }
2608         }
2609
2610         trace_event_read_lock();
2611         trace_access_lock(cpu_file);
2612         return p;
2613 }
2614
2615 static void s_stop(struct seq_file *m, void *p)
2616 {
2617         struct trace_iterator *iter = m->private;
2618
2619 #ifdef CONFIG_TRACER_MAX_TRACE
2620         if (iter->snapshot && iter->trace->use_max_tr)
2621                 return;
2622 #endif
2623
2624         if (!iter->snapshot)
2625                 atomic_dec(&trace_record_cmdline_disabled);
2626
2627         trace_access_unlock(iter->cpu_file);
2628         trace_event_read_unlock();
2629 }
2630
2631 static void
2632 get_total_entries(struct trace_buffer *buf,
2633                   unsigned long *total, unsigned long *entries)
2634 {
2635         unsigned long count;
2636         int cpu;
2637
2638         *total = 0;
2639         *entries = 0;
2640
2641         for_each_tracing_cpu(cpu) {
2642                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2643                 /*
2644                  * If this buffer has skipped entries, then we hold all
2645                  * entries for the trace and we need to ignore the
2646                  * ones before the time stamp.
2647                  */
2648                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2649                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2650                         /* total is the same as the entries */
2651                         *total += count;
2652                 } else
2653                         *total += count +
2654                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
2655                 *entries += count;
2656         }
2657 }
2658
2659 static void print_lat_help_header(struct seq_file *m)
2660 {
2661         seq_puts(m, "#                  _------=> CPU#            \n"
2662                     "#                 / _-----=> irqs-off        \n"
2663                     "#                | / _----=> need-resched    \n"
2664                     "#                || / _---=> hardirq/softirq \n"
2665                     "#                ||| / _--=> preempt-depth   \n"
2666                     "#                |||| /     delay            \n"
2667                     "#  cmd     pid   ||||| time  |   caller      \n"
2668                     "#     \\   /      |||||  \\    |   /         \n");
2669 }
2670
2671 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2672 {
2673         unsigned long total;
2674         unsigned long entries;
2675
2676         get_total_entries(buf, &total, &entries);
2677         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2678                    entries, total, num_online_cpus());
2679         seq_puts(m, "#\n");
2680 }
2681
2682 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2683 {
2684         print_event_info(buf, m);
2685         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n"
2686                     "#              | |       |          |         |\n");
2687 }
2688
2689 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2690 {
2691         print_event_info(buf, m);
2692         seq_puts(m, "#                              _-----=> irqs-off\n"
2693                     "#                             / _----=> need-resched\n"
2694                     "#                            | / _---=> hardirq/softirq\n"
2695                     "#                            || / _--=> preempt-depth\n"
2696                     "#                            ||| /     delay\n"
2697                     "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n"
2698                     "#              | |       |   ||||       |         |\n");
2699 }
2700
2701 void
2702 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2703 {
2704         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
2705         struct trace_buffer *buf = iter->trace_buffer;
2706         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2707         struct tracer *type = iter->trace;
2708         unsigned long entries;
2709         unsigned long total;
2710         const char *name = "preemption";
2711
2712         name = type->name;
2713
2714         get_total_entries(buf, &total, &entries);
2715
2716         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2717                    name, UTS_RELEASE);
2718         seq_puts(m, "# -----------------------------------"
2719                  "---------------------------------\n");
2720         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2721                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2722                    nsecs_to_usecs(data->saved_latency),
2723                    entries,
2724                    total,
2725                    buf->cpu,
2726 #if defined(CONFIG_PREEMPT_NONE)
2727                    "server",
2728 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2729                    "desktop",
2730 #elif defined(CONFIG_PREEMPT)
2731                    "preempt",
2732 #else
2733                    "unknown",
2734 #endif
2735                    /* These are reserved for later use */
2736                    0, 0, 0, 0);
2737 #ifdef CONFIG_SMP
2738         seq_printf(m, " #P:%d)\n", num_online_cpus());
2739 #else
2740         seq_puts(m, ")\n");
2741 #endif
2742         seq_puts(m, "#    -----------------\n");
2743         seq_printf(m, "#    | task: %.16s-%d "
2744                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2745                    data->comm, data->pid,
2746                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2747                    data->policy, data->rt_priority);
2748         seq_puts(m, "#    -----------------\n");
2749
2750         if (data->critical_start) {
2751                 seq_puts(m, "#  => started at: ");
2752                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2753                 trace_print_seq(m, &iter->seq);
2754                 seq_puts(m, "\n#  => ended at:   ");
2755                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2756                 trace_print_seq(m, &iter->seq);
2757                 seq_puts(m, "\n#\n");
2758         }
2759
2760         seq_puts(m, "#\n");
2761 }
2762
2763 static void test_cpu_buff_start(struct trace_iterator *iter)
2764 {
2765         struct trace_seq *s = &iter->seq;
2766         struct trace_array *tr = iter->tr;
2767
2768         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
2769                 return;
2770
2771         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
2772                 return;
2773
2774         if (iter->started && cpumask_test_cpu(iter->cpu, iter->started))
2775                 return;
2776
2777         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
2778                 return;
2779
2780         if (iter->started)
2781                 cpumask_set_cpu(iter->cpu, iter->started);
2782
2783         /* Don't print started cpu buffer for the first entry of the trace */
2784         if (iter->idx > 1)
2785                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
2786                                 iter->cpu);
2787 }
2788
2789 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
2790 {
2791         struct trace_array *tr = iter->tr;
2792         struct trace_seq *s = &iter->seq;
2793         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
2794         struct trace_entry *entry;
2795         struct trace_event *event;
2796
2797         entry = iter->ent;
2798
2799         test_cpu_buff_start(iter);
2800
2801         event = ftrace_find_event(entry->type);
2802
2803         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
2804                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2805                         trace_print_lat_context(iter);
2806                 else
2807                         trace_print_context(iter);
2808         }
2809
2810         if (trace_seq_has_overflowed(s))
2811                 return TRACE_TYPE_PARTIAL_LINE;
2812
2813         if (event)
2814                 return event->funcs->trace(iter, sym_flags, event);
2815
2816         trace_seq_printf(s, "Unknown type %d\n", entry->type);
2817
2818         return trace_handle_return(s);
2819 }
2820
2821 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2822 {
2823         struct trace_array *tr = iter->tr;
2824         struct trace_seq *s = &iter->seq;
2825         struct trace_entry *entry;
2826         struct trace_event *event;
2827
2828         entry = iter->ent;
2829
2830         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
2831                 trace_seq_printf(s, "%d %d %llu ",
2832                                  entry->pid, iter->cpu, iter->ts);
2833
2834         if (trace_seq_has_overflowed(s))
2835                 return TRACE_TYPE_PARTIAL_LINE;
2836
2837         event = ftrace_find_event(entry->type);
2838         if (event)
2839                 return event->funcs->raw(iter, 0, event);
2840
2841         trace_seq_printf(s, "%d ?\n", entry->type);
2842
2843         return trace_handle_return(s);
2844 }
2845
2846 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2847 {
2848         struct trace_array *tr = iter->tr;
2849         struct trace_seq *s = &iter->seq;
2850         unsigned char newline = '\n';
2851         struct trace_entry *entry;
2852         struct trace_event *event;
2853
2854         entry = iter->ent;
2855
2856         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
2857                 SEQ_PUT_HEX_FIELD(s, entry->pid);
2858                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
2859                 SEQ_PUT_HEX_FIELD(s, iter->ts);
2860                 if (trace_seq_has_overflowed(s))
2861                         return TRACE_TYPE_PARTIAL_LINE;
2862         }
2863
2864         event = ftrace_find_event(entry->type);
2865         if (event) {
2866                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
2867                 if (ret != TRACE_TYPE_HANDLED)
2868                         return ret;
2869         }
2870
2871         SEQ_PUT_FIELD(s, newline);
2872
2873         return trace_handle_return(s);
2874 }
2875
2876 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2877 {
2878         struct trace_array *tr = iter->tr;
2879         struct trace_seq *s = &iter->seq;
2880         struct trace_entry *entry;
2881         struct trace_event *event;
2882
2883         entry = iter->ent;
2884
2885         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
2886                 SEQ_PUT_FIELD(s, entry->pid);
2887                 SEQ_PUT_FIELD(s, iter->cpu);
2888                 SEQ_PUT_FIELD(s, iter->ts);
2889                 if (trace_seq_has_overflowed(s))
2890                         return TRACE_TYPE_PARTIAL_LINE;
2891         }
2892
2893         event = ftrace_find_event(entry->type);
2894         return event ? event->funcs->binary(iter, 0, event) :
2895                 TRACE_TYPE_HANDLED;
2896 }
2897
2898 int trace_empty(struct trace_iterator *iter)
2899 {
2900         struct ring_buffer_iter *buf_iter;
2901         int cpu;
2902
2903         /* If we are looking at one CPU buffer, only check that one */
2904         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
2905                 cpu = iter->cpu_file;
2906                 buf_iter = trace_buffer_iter(iter, cpu);
2907                 if (buf_iter) {
2908                         if (!ring_buffer_iter_empty(buf_iter))
2909                                 return 0;
2910                 } else {
2911                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2912                                 return 0;
2913                 }
2914                 return 1;
2915         }
2916
2917         for_each_tracing_cpu(cpu) {
2918                 buf_iter = trace_buffer_iter(iter, cpu);
2919                 if (buf_iter) {
2920                         if (!ring_buffer_iter_empty(buf_iter))
2921                                 return 0;
2922                 } else {
2923                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2924                                 return 0;
2925                 }
2926         }
2927
2928         return 1;
2929 }
2930
2931 /*  Called with trace_event_read_lock() held. */
2932 enum print_line_t print_trace_line(struct trace_iterator *iter)
2933 {
2934         struct trace_array *tr = iter->tr;
2935         unsigned long trace_flags = tr->trace_flags;
2936         enum print_line_t ret;
2937
2938         if (iter->lost_events) {
2939                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
2940                                  iter->cpu, iter->lost_events);
2941                 if (trace_seq_has_overflowed(&iter->seq))
2942                         return TRACE_TYPE_PARTIAL_LINE;
2943         }
2944
2945         if (iter->trace && iter->trace->print_line) {
2946                 ret = iter->trace->print_line(iter);
2947                 if (ret != TRACE_TYPE_UNHANDLED)
2948                         return ret;
2949         }
2950
2951         if (iter->ent->type == TRACE_BPUTS &&
2952                         trace_flags & TRACE_ITER_PRINTK &&
2953                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2954                 return trace_print_bputs_msg_only(iter);
2955
2956         if (iter->ent->type == TRACE_BPRINT &&
2957                         trace_flags & TRACE_ITER_PRINTK &&
2958                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2959                 return trace_print_bprintk_msg_only(iter);
2960
2961         if (iter->ent->type == TRACE_PRINT &&
2962                         trace_flags & TRACE_ITER_PRINTK &&
2963                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2964                 return trace_print_printk_msg_only(iter);
2965
2966         if (trace_flags & TRACE_ITER_BIN)
2967                 return print_bin_fmt(iter);
2968
2969         if (trace_flags & TRACE_ITER_HEX)
2970                 return print_hex_fmt(iter);
2971
2972         if (trace_flags & TRACE_ITER_RAW)
2973                 return print_raw_fmt(iter);
2974
2975         return print_trace_fmt(iter);
2976 }
2977
2978 void trace_latency_header(struct seq_file *m)
2979 {
2980         struct trace_iterator *iter = m->private;
2981         struct trace_array *tr = iter->tr;
2982
2983         /* print nothing if the buffers are empty */
2984         if (trace_empty(iter))
2985                 return;
2986
2987         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2988                 print_trace_header(m, iter);
2989
2990         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
2991                 print_lat_help_header(m);
2992 }
2993
2994 void trace_default_header(struct seq_file *m)
2995 {
2996         struct trace_iterator *iter = m->private;
2997         struct trace_array *tr = iter->tr;
2998         unsigned long trace_flags = tr->trace_flags;
2999
3000         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3001                 return;
3002
3003         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3004                 /* print nothing if the buffers are empty */
3005                 if (trace_empty(iter))
3006                         return;
3007                 print_trace_header(m, iter);
3008                 if (!(trace_flags & TRACE_ITER_VERBOSE))
3009                         print_lat_help_header(m);
3010         } else {
3011                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3012                         if (trace_flags & TRACE_ITER_IRQ_INFO)
3013                                 print_func_help_header_irq(iter->trace_buffer, m);
3014                         else
3015                                 print_func_help_header(iter->trace_buffer, m);
3016                 }
3017         }
3018 }
3019
3020 static void test_ftrace_alive(struct seq_file *m)
3021 {
3022         if (!ftrace_is_dead())
3023                 return;
3024         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3025                     "#          MAY BE MISSING FUNCTION EVENTS\n");
3026 }
3027
3028 #ifdef CONFIG_TRACER_MAX_TRACE
3029 static void show_snapshot_main_help(struct seq_file *m)
3030 {
3031         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3032                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3033                     "#                      Takes a snapshot of the main buffer.\n"
3034                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3035                     "#                      (Doesn't have to be '2' works with any number that\n"
3036                     "#                       is not a '0' or '1')\n");
3037 }
3038
3039 static void show_snapshot_percpu_help(struct seq_file *m)
3040 {
3041         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3042 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3043         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3044                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
3045 #else
3046         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3047                     "#                     Must use main snapshot file to allocate.\n");
3048 #endif
3049         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3050                     "#                      (Doesn't have to be '2' works with any number that\n"
3051                     "#                       is not a '0' or '1')\n");
3052 }
3053
3054 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3055 {
3056         if (iter->tr->allocated_snapshot)
3057                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3058         else
3059                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3060
3061         seq_puts(m, "# Snapshot commands:\n");
3062         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3063                 show_snapshot_main_help(m);
3064         else
3065                 show_snapshot_percpu_help(m);
3066 }
3067 #else
3068 /* Should never be called */
3069 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3070 #endif
3071
3072 static int s_show(struct seq_file *m, void *v)
3073 {
3074         struct trace_iterator *iter = v;
3075         int ret;
3076
3077         if (iter->ent == NULL) {
3078                 if (iter->tr) {
3079                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
3080                         seq_puts(m, "#\n");
3081                         test_ftrace_alive(m);
3082                 }
3083                 if (iter->snapshot && trace_empty(iter))
3084                         print_snapshot_help(m, iter);
3085                 else if (iter->trace && iter->trace->print_header)
3086                         iter->trace->print_header(m);
3087                 else
3088                         trace_default_header(m);
3089
3090         } else if (iter->leftover) {
3091                 /*
3092                  * If we filled the seq_file buffer earlier, we
3093                  * want to just show it now.
3094                  */
3095                 ret = trace_print_seq(m, &iter->seq);
3096
3097                 /* ret should this time be zero, but you never know */
3098                 iter->leftover = ret;
3099
3100         } else {
3101                 print_trace_line(iter);
3102                 ret = trace_print_seq(m, &iter->seq);
3103                 /*
3104                  * If we overflow the seq_file buffer, then it will
3105                  * ask us for this data again at start up.
3106                  * Use that instead.
3107                  *  ret is 0 if seq_file write succeeded.
3108                  *        -1 otherwise.
3109                  */
3110                 iter->leftover = ret;
3111         }
3112
3113         return 0;
3114 }
3115
3116 /*
3117  * Should be used after trace_array_get(), trace_types_lock
3118  * ensures that i_cdev was already initialized.
3119  */
3120 static inline int tracing_get_cpu(struct inode *inode)
3121 {
3122         if (inode->i_cdev) /* See trace_create_cpu_file() */
3123                 return (long)inode->i_cdev - 1;
3124         return RING_BUFFER_ALL_CPUS;
3125 }
3126
3127 static const struct seq_operations tracer_seq_ops = {
3128         .start          = s_start,
3129         .next           = s_next,
3130         .stop           = s_stop,
3131         .show           = s_show,
3132 };
3133
3134 static struct trace_iterator *
3135 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3136 {
3137         struct trace_array *tr = inode->i_private;
3138         struct trace_iterator *iter;
3139         int cpu;
3140
3141         if (tracing_disabled)
3142                 return ERR_PTR(-ENODEV);
3143
3144         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3145         if (!iter)
3146                 return ERR_PTR(-ENOMEM);
3147
3148         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3149                                     GFP_KERNEL);
3150         if (!iter->buffer_iter)
3151                 goto release;
3152
3153         /*
3154          * We make a copy of the current tracer to avoid concurrent
3155          * changes on it while we are reading.
3156          */
3157         mutex_lock(&trace_types_lock);
3158         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3159         if (!iter->trace)
3160                 goto fail;
3161
3162         *iter->trace = *tr->current_trace;
3163
3164         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3165                 goto fail;
3166
3167         iter->tr = tr;
3168
3169 #ifdef CONFIG_TRACER_MAX_TRACE
3170         /* Currently only the top directory has a snapshot */
3171         if (tr->current_trace->print_max || snapshot)
3172                 iter->trace_buffer = &tr->max_buffer;
3173         else
3174 #endif
3175                 iter->trace_buffer = &tr->trace_buffer;
3176         iter->snapshot = snapshot;
3177         iter->pos = -1;
3178         iter->cpu_file = tracing_get_cpu(inode);
3179         mutex_init(&iter->mutex);
3180
3181         /* Notify the tracer early; before we stop tracing. */
3182         if (iter->trace && iter->trace->open)
3183                 iter->trace->open(iter);
3184
3185         /* Annotate start of buffers if we had overruns */
3186         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3187                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3188
3189         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3190         if (trace_clocks[tr->clock_id].in_ns)
3191                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3192
3193         /* stop the trace while dumping if we are not opening "snapshot" */
3194         if (!iter->snapshot)
3195                 tracing_stop_tr(tr);
3196
3197         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3198                 for_each_tracing_cpu(cpu) {
3199                         iter->buffer_iter[cpu] =
3200                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3201                 }
3202                 ring_buffer_read_prepare_sync();
3203                 for_each_tracing_cpu(cpu) {
3204                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3205                         tracing_iter_reset(iter, cpu);
3206                 }
3207         } else {
3208                 cpu = iter->cpu_file;
3209                 iter->buffer_iter[cpu] =
3210                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3211                 ring_buffer_read_prepare_sync();
3212                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3213                 tracing_iter_reset(iter, cpu);
3214         }
3215
3216         mutex_unlock(&trace_types_lock);
3217
3218         return iter;
3219
3220  fail:
3221         mutex_unlock(&trace_types_lock);
3222         kfree(iter->trace);
3223         kfree(iter->buffer_iter);
3224 release:
3225         seq_release_private(inode, file);
3226         return ERR_PTR(-ENOMEM);
3227 }
3228
3229 int tracing_open_generic(struct inode *inode, struct file *filp)
3230 {
3231         if (tracing_disabled)
3232                 return -ENODEV;
3233
3234         filp->private_data = inode->i_private;
3235         return 0;
3236 }
3237
3238 bool tracing_is_disabled(void)
3239 {
3240         return (tracing_disabled) ? true: false;
3241 }
3242
3243 /*
3244  * Open and update trace_array ref count.
3245  * Must have the current trace_array passed to it.
3246  */
3247 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3248 {
3249         struct trace_array *tr = inode->i_private;
3250
3251         if (tracing_disabled)
3252                 return -ENODEV;
3253
3254         if (trace_array_get(tr) < 0)
3255                 return -ENODEV;
3256
3257         filp->private_data = inode->i_private;
3258
3259         return 0;
3260 }
3261
3262 static int tracing_release(struct inode *inode, struct file *file)
3263 {
3264         struct trace_array *tr = inode->i_private;
3265         struct seq_file *m = file->private_data;
3266         struct trace_iterator *iter;
3267         int cpu;
3268
3269         if (!(file->f_mode & FMODE_READ)) {
3270                 trace_array_put(tr);
3271                 return 0;
3272         }
3273
3274         /* Writes do not use seq_file */
3275         iter = m->private;
3276         mutex_lock(&trace_types_lock);
3277
3278         for_each_tracing_cpu(cpu) {
3279                 if (iter->buffer_iter[cpu])
3280                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3281         }
3282
3283         if (iter->trace && iter->trace->close)
3284                 iter->trace->close(iter);
3285
3286         if (!iter->snapshot)
3287                 /* reenable tracing if it was previously enabled */
3288                 tracing_start_tr(tr);
3289
3290         __trace_array_put(tr);
3291
3292         mutex_unlock(&trace_types_lock);
3293
3294         mutex_destroy(&iter->mutex);
3295         free_cpumask_var(iter->started);
3296         kfree(iter->trace);
3297         kfree(iter->buffer_iter);
3298         seq_release_private(inode, file);
3299
3300         return 0;
3301 }
3302
3303 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3304 {
3305         struct trace_array *tr = inode->i_private;
3306
3307         trace_array_put(tr);
3308         return 0;
3309 }
3310
3311 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3312 {
3313         struct trace_array *tr = inode->i_private;
3314
3315         trace_array_put(tr);
3316
3317         return single_release(inode, file);
3318 }
3319
3320 static int tracing_open(struct inode *inode, struct file *file)
3321 {
3322         struct trace_array *tr = inode->i_private;
3323         struct trace_iterator *iter;
3324         int ret = 0;
3325
3326         if (trace_array_get(tr) < 0)
3327                 return -ENODEV;
3328
3329         /* If this file was open for write, then erase contents */
3330         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3331                 int cpu = tracing_get_cpu(inode);
3332
3333                 if (cpu == RING_BUFFER_ALL_CPUS)
3334                         tracing_reset_online_cpus(&tr->trace_buffer);
3335                 else
3336                         tracing_reset(&tr->trace_buffer, cpu);
3337         }
3338
3339         if (file->f_mode & FMODE_READ) {
3340                 iter = __tracing_open(inode, file, false);
3341                 if (IS_ERR(iter))
3342                         ret = PTR_ERR(iter);
3343                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3344                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3345         }
3346
3347         if (ret < 0)
3348                 trace_array_put(tr);
3349
3350         return ret;
3351 }
3352
3353 /*
3354  * Some tracers are not suitable for instance buffers.
3355  * A tracer is always available for the global array (toplevel)
3356  * or if it explicitly states that it is.
3357  */
3358 static bool
3359 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3360 {
3361         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3362 }
3363
3364 /* Find the next tracer that this trace array may use */
3365 static struct tracer *
3366 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3367 {
3368         while (t && !trace_ok_for_array(t, tr))
3369                 t = t->next;
3370
3371         return t;
3372 }
3373
3374 static void *
3375 t_next(struct seq_file *m, void *v, loff_t *pos)
3376 {
3377         struct trace_array *tr = m->private;
3378         struct tracer *t = v;
3379
3380         (*pos)++;
3381
3382         if (t)
3383                 t = get_tracer_for_array(tr, t->next);
3384
3385         return t;
3386 }
3387
3388 static void *t_start(struct seq_file *m, loff_t *pos)
3389 {
3390         struct trace_array *tr = m->private;
3391         struct tracer *t;
3392         loff_t l = 0;
3393
3394         mutex_lock(&trace_types_lock);
3395
3396         t = get_tracer_for_array(tr, trace_types);
3397         for (; t && l < *pos; t = t_next(m, t, &l))
3398                         ;
3399
3400         return t;
3401 }
3402
3403 static void t_stop(struct seq_file *m, void *p)
3404 {
3405         mutex_unlock(&trace_types_lock);
3406 }
3407
3408 static int t_show(struct seq_file *m, void *v)
3409 {
3410         struct tracer *t = v;
3411
3412         if (!t)
3413                 return 0;
3414
3415         seq_puts(m, t->name);
3416         if (t->next)
3417                 seq_putc(m, ' ');
3418         else
3419                 seq_putc(m, '\n');
3420
3421         return 0;
3422 }
3423
3424 static const struct seq_operations show_traces_seq_ops = {
3425         .start          = t_start,
3426         .next           = t_next,
3427         .stop           = t_stop,
3428         .show           = t_show,
3429 };
3430
3431 static int show_traces_open(struct inode *inode, struct file *file)
3432 {
3433         struct trace_array *tr = inode->i_private;
3434         struct seq_file *m;
3435         int ret;
3436
3437         if (tracing_disabled)
3438                 return -ENODEV;
3439
3440         ret = seq_open(file, &show_traces_seq_ops);
3441         if (ret)
3442                 return ret;
3443
3444         m = file->private_data;
3445         m->private = tr;
3446
3447         return 0;
3448 }
3449
3450 static ssize_t
3451 tracing_write_stub(struct file *filp, const char __user *ubuf,
3452                    size_t count, loff_t *ppos)
3453 {
3454         return count;
3455 }
3456
3457 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3458 {
3459         int ret;
3460
3461         if (file->f_mode & FMODE_READ)
3462                 ret = seq_lseek(file, offset, whence);
3463         else
3464                 file->f_pos = ret = 0;
3465
3466         return ret;
3467 }
3468
3469 static const struct file_operations tracing_fops = {
3470         .open           = tracing_open,
3471         .read           = seq_read,
3472         .write          = tracing_write_stub,
3473         .llseek         = tracing_lseek,
3474         .release        = tracing_release,
3475 };
3476
3477 static const struct file_operations show_traces_fops = {
3478         .open           = show_traces_open,
3479         .read           = seq_read,
3480         .release        = seq_release,
3481         .llseek         = seq_lseek,
3482 };
3483
3484 /*
3485  * The tracer itself will not take this lock, but still we want
3486  * to provide a consistent cpumask to user-space:
3487  */
3488 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3489
3490 /*
3491  * Temporary storage for the character representation of the
3492  * CPU bitmask (and one more byte for the newline):
3493  */
3494 static char mask_str[NR_CPUS + 1];
3495
3496 static ssize_t
3497 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3498                      size_t count, loff_t *ppos)
3499 {
3500         struct trace_array *tr = file_inode(filp)->i_private;
3501         int len;
3502
3503         mutex_lock(&tracing_cpumask_update_lock);
3504
3505         len = snprintf(mask_str, count, "%*pb\n",
3506                        cpumask_pr_args(tr->tracing_cpumask));
3507         if (len >= count) {
3508                 count = -EINVAL;
3509                 goto out_err;
3510         }
3511         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3512
3513 out_err:
3514         mutex_unlock(&tracing_cpumask_update_lock);
3515
3516         return count;
3517 }
3518
3519 static ssize_t
3520 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3521                       size_t count, loff_t *ppos)
3522 {
3523         struct trace_array *tr = file_inode(filp)->i_private;
3524         cpumask_var_t tracing_cpumask_new;
3525         int err, cpu;
3526
3527         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3528                 return -ENOMEM;
3529
3530         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3531         if (err)
3532                 goto err_unlock;
3533
3534         mutex_lock(&tracing_cpumask_update_lock);
3535
3536         local_irq_disable();
3537         arch_spin_lock(&tr->max_lock);
3538         for_each_tracing_cpu(cpu) {
3539                 /*
3540                  * Increase/decrease the disabled counter if we are
3541                  * about to flip a bit in the cpumask:
3542                  */
3543                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3544                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3545                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3546                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3547                 }
3548                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3549                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3550                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3551                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3552                 }
3553         }
3554         arch_spin_unlock(&tr->max_lock);
3555         local_irq_enable();
3556
3557         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3558
3559         mutex_unlock(&tracing_cpumask_update_lock);
3560         free_cpumask_var(tracing_cpumask_new);
3561
3562         return count;
3563
3564 err_unlock:
3565         free_cpumask_var(tracing_cpumask_new);
3566
3567         return err;
3568 }
3569
3570 static const struct file_operations tracing_cpumask_fops = {
3571         .open           = tracing_open_generic_tr,
3572         .read           = tracing_cpumask_read,
3573         .write          = tracing_cpumask_write,
3574         .release        = tracing_release_generic_tr,
3575         .llseek         = generic_file_llseek,
3576 };
3577
3578 static int tracing_trace_options_show(struct seq_file *m, void *v)
3579 {
3580         struct tracer_opt *trace_opts;
3581         struct trace_array *tr = m->private;
3582         u32 tracer_flags;
3583         int i;
3584
3585         mutex_lock(&trace_types_lock);
3586         tracer_flags = tr->current_trace->flags->val;
3587         trace_opts = tr->current_trace->flags->opts;
3588
3589         for (i = 0; trace_options[i]; i++) {
3590                 if (tr->trace_flags & (1 << i))
3591                         seq_printf(m, "%s\n", trace_options[i]);
3592                 else
3593                         seq_printf(m, "no%s\n", trace_options[i]);
3594         }
3595
3596         for (i = 0; trace_opts[i].name; i++) {
3597                 if (tracer_flags & trace_opts[i].bit)
3598                         seq_printf(m, "%s\n", trace_opts[i].name);
3599                 else
3600                         seq_printf(m, "no%s\n", trace_opts[i].name);
3601         }
3602         mutex_unlock(&trace_types_lock);
3603
3604         return 0;
3605 }
3606
3607 static int __set_tracer_option(struct trace_array *tr,
3608                                struct tracer_flags *tracer_flags,
3609                                struct tracer_opt *opts, int neg)
3610 {
3611         struct tracer *trace = tracer_flags->trace;
3612         int ret;
3613
3614         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3615         if (ret)
3616                 return ret;
3617
3618         if (neg)
3619                 tracer_flags->val &= ~opts->bit;
3620         else
3621                 tracer_flags->val |= opts->bit;
3622         return 0;
3623 }
3624
3625 /* Try to assign a tracer specific option */
3626 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3627 {
3628         struct tracer *trace = tr->current_trace;
3629         struct tracer_flags *tracer_flags = trace->flags;
3630         struct tracer_opt *opts = NULL;
3631         int i;
3632
3633         for (i = 0; tracer_flags->opts[i].name; i++) {
3634                 opts = &tracer_flags->opts[i];
3635
3636                 if (strcmp(cmp, opts->name) == 0)
3637                         return __set_tracer_option(tr, trace->flags, opts, neg);
3638         }
3639
3640         return -EINVAL;
3641 }
3642
3643 /* Some tracers require overwrite to stay enabled */
3644 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3645 {
3646         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3647                 return -1;
3648
3649         return 0;
3650 }
3651
3652 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3653 {
3654         /* do nothing if flag is already set */
3655         if (!!(tr->trace_flags & mask) == !!enabled)
3656                 return 0;
3657
3658         /* Give the tracer a chance to approve the change */
3659         if (tr->current_trace->flag_changed)
3660                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3661                         return -EINVAL;
3662
3663         if (enabled)
3664                 tr->trace_flags |= mask;
3665         else
3666                 tr->trace_flags &= ~mask;
3667
3668         if (mask == TRACE_ITER_RECORD_CMD)
3669                 trace_event_enable_cmd_record(enabled);
3670
3671         if (mask == TRACE_ITER_EVENT_FORK)
3672                 trace_event_follow_fork(tr, enabled);
3673
3674         if (mask == TRACE_ITER_OVERWRITE) {
3675                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3676 #ifdef CONFIG_TRACER_MAX_TRACE
3677                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3678 #endif
3679         }
3680
3681         if (mask == TRACE_ITER_PRINTK) {
3682                 trace_printk_start_stop_comm(enabled);
3683                 trace_printk_control(enabled);
3684         }
3685
3686         return 0;
3687 }
3688
3689 static int trace_set_options(struct trace_array *tr, char *option)
3690 {
3691         char *cmp;
3692         int neg = 0;
3693         int ret = -ENODEV;
3694         int i;
3695         size_t orig_len = strlen(option);
3696
3697         cmp = strstrip(option);
3698
3699         if (strncmp(cmp, "no", 2) == 0) {
3700                 neg = 1;
3701                 cmp += 2;
3702         }
3703
3704         mutex_lock(&trace_types_lock);
3705
3706         for (i = 0; trace_options[i]; i++) {
3707                 if (strcmp(cmp, trace_options[i]) == 0) {
3708                         ret = set_tracer_flag(tr, 1 << i, !neg);
3709                         break;
3710                 }
3711         }
3712
3713         /* If no option could be set, test the specific tracer options */
3714         if (!trace_options[i])
3715                 ret = set_tracer_option(tr, cmp, neg);
3716
3717         mutex_unlock(&trace_types_lock);
3718
3719         /*
3720          * If the first trailing whitespace is replaced with '\0' by strstrip,
3721          * turn it back into a space.
3722          */
3723         if (orig_len > strlen(option))
3724                 option[strlen(option)] = ' ';
3725
3726         return ret;
3727 }
3728
3729 static void __init apply_trace_boot_options(void)
3730 {
3731         char *buf = trace_boot_options_buf;
3732         char *option;
3733
3734         while (true) {
3735                 option = strsep(&buf, ",");
3736
3737                 if (!option)
3738                         break;
3739
3740                 if (*option)
3741                         trace_set_options(&global_trace, option);
3742
3743                 /* Put back the comma to allow this to be called again */
3744                 if (buf)
3745                         *(buf - 1) = ',';
3746         }
3747 }
3748
3749 static ssize_t
3750 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3751                         size_t cnt, loff_t *ppos)
3752 {
3753         struct seq_file *m = filp->private_data;
3754         struct trace_array *tr = m->private;
3755         char buf[64];
3756         int ret;
3757
3758         if (cnt >= sizeof(buf))
3759                 return -EINVAL;
3760
3761         if (copy_from_user(buf, ubuf, cnt))
3762                 return -EFAULT;
3763
3764         buf[cnt] = 0;
3765
3766         ret = trace_set_options(tr, buf);
3767         if (ret < 0)
3768                 return ret;
3769
3770         *ppos += cnt;
3771
3772         return cnt;
3773 }
3774
3775 static int tracing_trace_options_open(struct inode *inode, struct file *file)
3776 {
3777         struct trace_array *tr = inode->i_private;
3778         int ret;
3779
3780         if (tracing_disabled)
3781                 return -ENODEV;
3782
3783         if (trace_array_get(tr) < 0)
3784                 return -ENODEV;
3785
3786         ret = single_open(file, tracing_trace_options_show, inode->i_private);
3787         if (ret < 0)
3788                 trace_array_put(tr);
3789
3790         return ret;
3791 }
3792
3793 static const struct file_operations tracing_iter_fops = {
3794         .open           = tracing_trace_options_open,
3795         .read           = seq_read,
3796         .llseek         = seq_lseek,
3797         .release        = tracing_single_release_tr,
3798         .write          = tracing_trace_options_write,
3799 };
3800
3801 static const char readme_msg[] =
3802         "tracing mini-HOWTO:\n\n"
3803         "# echo 0 > tracing_on : quick way to disable tracing\n"
3804         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
3805         " Important files:\n"
3806         "  trace\t\t\t- The static contents of the buffer\n"
3807         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
3808         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
3809         "  current_tracer\t- function and latency tracers\n"
3810         "  available_tracers\t- list of configured tracers for current_tracer\n"
3811         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
3812         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
3813         "  trace_clock\t\t-change the clock used to order events\n"
3814         "       local:   Per cpu clock but may not be synced across CPUs\n"
3815         "      global:   Synced across CPUs but slows tracing down.\n"
3816         "     counter:   Not a clock, but just an increment\n"
3817         "      uptime:   Jiffy counter from time of boot\n"
3818         "        perf:   Same clock that perf events use\n"
3819 #ifdef CONFIG_X86_64
3820         "     x86-tsc:   TSC cycle counter\n"
3821 #endif
3822         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
3823         "  tracing_cpumask\t- Limit which CPUs to trace\n"
3824         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
3825         "\t\t\t  Remove sub-buffer with rmdir\n"
3826         "  trace_options\t\t- Set format or modify how tracing happens\n"
3827         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
3828         "\t\t\t  option name\n"
3829         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
3830 #ifdef CONFIG_DYNAMIC_FTRACE
3831         "\n  available_filter_functions - list of functions that can be filtered on\n"
3832         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
3833         "\t\t\t  functions\n"
3834         "\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3835         "\t     modules: Can select a group via module\n"
3836         "\t      Format: :mod:<module-name>\n"
3837         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
3838         "\t    triggers: a command to perform when function is hit\n"
3839         "\t      Format: <function>:<trigger>[:count]\n"
3840         "\t     trigger: traceon, traceoff\n"
3841         "\t\t      enable_event:<system>:<event>\n"
3842         "\t\t      disable_event:<system>:<event>\n"
3843 #ifdef CONFIG_STACKTRACE
3844         "\t\t      stacktrace\n"
3845 #endif
3846 #ifdef CONFIG_TRACER_SNAPSHOT
3847         "\t\t      snapshot\n"
3848 #endif
3849         "\t\t      dump\n"
3850         "\t\t      cpudump\n"
3851         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
3852         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
3853         "\t     The first one will disable tracing every time do_fault is hit\n"
3854         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
3855         "\t       The first time do trap is hit and it disables tracing, the\n"
3856         "\t       counter will decrement to 2. If tracing is already disabled,\n"
3857         "\t       the counter will not decrement. It only decrements when the\n"
3858         "\t       trigger did work\n"
3859         "\t     To remove trigger without count:\n"
3860         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
3861         "\t     To remove trigger with a count:\n"
3862         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
3863         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
3864         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3865         "\t    modules: Can select a group via module command :mod:\n"
3866         "\t    Does not accept triggers\n"
3867 #endif /* CONFIG_DYNAMIC_FTRACE */
3868 #ifdef CONFIG_FUNCTION_TRACER
3869         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
3870         "\t\t    (function)\n"
3871 #endif
3872 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
3873         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
3874         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
3875         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
3876 #endif
3877 #ifdef CONFIG_TRACER_SNAPSHOT
3878         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
3879         "\t\t\t  snapshot buffer. Read the contents for more\n"
3880         "\t\t\t  information\n"
3881 #endif
3882 #ifdef CONFIG_STACK_TRACER
3883         "  stack_trace\t\t- Shows the max stack trace when active\n"
3884         "  stack_max_size\t- Shows current max stack size that was traced\n"
3885         "\t\t\t  Write into this file to reset the max size (trigger a\n"
3886         "\t\t\t  new trace)\n"
3887 #ifdef CONFIG_DYNAMIC_FTRACE
3888         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
3889         "\t\t\t  traces\n"
3890 #endif
3891 #endif /* CONFIG_STACK_TRACER */
3892         "  events/\t\t- Directory containing all trace event subsystems:\n"
3893         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
3894         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
3895         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
3896         "\t\t\t  events\n"
3897         "      filter\t\t- If set, only events passing filter are traced\n"
3898         "  events/<system>/<event>/\t- Directory containing control files for\n"
3899         "\t\t\t  <event>:\n"
3900         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
3901         "      filter\t\t- If set, only events passing filter are traced\n"
3902         "      trigger\t\t- If set, a command to perform when event is hit\n"
3903         "\t    Format: <trigger>[:count][if <filter>]\n"
3904         "\t   trigger: traceon, traceoff\n"
3905         "\t            enable_event:<system>:<event>\n"
3906         "\t            disable_event:<system>:<event>\n"
3907 #ifdef CONFIG_HIST_TRIGGERS
3908         "\t            enable_hist:<system>:<event>\n"
3909         "\t            disable_hist:<system>:<event>\n"
3910 #endif
3911 #ifdef CONFIG_STACKTRACE
3912         "\t\t    stacktrace\n"
3913 #endif
3914 #ifdef CONFIG_TRACER_SNAPSHOT
3915         "\t\t    snapshot\n"
3916 #endif
3917 #ifdef CONFIG_HIST_TRIGGERS
3918         "\t\t    hist (see below)\n"
3919 #endif
3920         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
3921         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
3922         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
3923         "\t                  events/block/block_unplug/trigger\n"
3924         "\t   The first disables tracing every time block_unplug is hit.\n"
3925         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
3926         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
3927         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
3928         "\t   Like function triggers, the counter is only decremented if it\n"
3929         "\t    enabled or disabled tracing.\n"
3930         "\t   To remove a trigger without a count:\n"
3931         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
3932         "\t   To remove a trigger with a count:\n"
3933         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
3934         "\t   Filters can be ignored when removing a trigger.\n"
3935 #ifdef CONFIG_HIST_TRIGGERS
3936         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
3937         "\t    Format: hist:keys=<field1[,field2,...]>\n"
3938         "\t            [:values=<field1[,field2,...]>]\n"
3939         "\t            [:sort=<field1[,field2,...]>]\n"
3940         "\t            [:size=#entries]\n"
3941         "\t            [:pause][:continue][:clear]\n"
3942         "\t            [:name=histname1]\n"
3943         "\t            [if <filter>]\n\n"
3944         "\t    When a matching event is hit, an entry is added to a hash\n"
3945         "\t    table using the key(s) and value(s) named, and the value of a\n"
3946         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
3947         "\t    correspond to fields in the event's format description.  Keys\n"
3948         "\t    can be any field, or the special string 'stacktrace'.\n"
3949         "\t    Compound keys consisting of up to two fields can be specified\n"
3950         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
3951         "\t    fields.  Sort keys consisting of up to two fields can be\n"
3952         "\t    specified using the 'sort' keyword.  The sort direction can\n"
3953         "\t    be modified by appending '.descending' or '.ascending' to a\n"
3954         "\t    sort field.  The 'size' parameter can be used to specify more\n"
3955         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
3956         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
3957         "\t    its histogram data will be shared with other triggers of the\n"
3958         "\t    same name, and trigger hits will update this common data.\n\n"
3959         "\t    Reading the 'hist' file for the event will dump the hash\n"
3960         "\t    table in its entirety to stdout.  If there are multiple hist\n"
3961         "\t    triggers attached to an event, there will be a table for each\n"
3962         "\t    trigger in the output.  The table displayed for a named\n"
3963         "\t    trigger will be the same as any other instance having the\n"
3964         "\t    same name.  The default format used to display a given field\n"
3965         "\t    can be modified by appending any of the following modifiers\n"
3966         "\t    to the field name, as applicable:\n\n"
3967         "\t            .hex        display a number as a hex value\n"
3968         "\t            .sym        display an address as a symbol\n"
3969         "\t            .sym-offset display an address as a symbol and offset\n"
3970         "\t            .execname   display a common_pid as a program name\n"
3971         "\t            .syscall    display a syscall id as a syscall name\n\n"
3972         "\t            .log2       display log2 value rather than raw number\n\n"
3973         "\t    The 'pause' parameter can be used to pause an existing hist\n"
3974         "\t    trigger or to start a hist trigger but not log any events\n"
3975         "\t    until told to do so.  'continue' can be used to start or\n"
3976         "\t    restart a paused hist trigger.\n\n"
3977         "\t    The 'clear' parameter will clear the contents of a running\n"
3978         "\t    hist trigger and leave its current paused/active state\n"
3979         "\t    unchanged.\n\n"
3980         "\t    The enable_hist and disable_hist triggers can be used to\n"
3981         "\t    have one event conditionally start and stop another event's\n"
3982         "\t    already-attached hist trigger.  The syntax is analagous to\n"
3983         "\t    the enable_event and disable_event triggers.\n"
3984 #endif
3985 ;
3986
3987 static ssize_t
3988 tracing_readme_read(struct file *filp, char __user *ubuf,
3989                        size_t cnt, loff_t *ppos)
3990 {
3991         return simple_read_from_buffer(ubuf, cnt, ppos,
3992                                         readme_msg, strlen(readme_msg));
3993 }
3994
3995 static const struct file_operations tracing_readme_fops = {
3996         .open           = tracing_open_generic,
3997         .read           = tracing_readme_read,
3998         .llseek         = generic_file_llseek,
3999 };
4000
4001 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4002 {
4003         unsigned int *ptr = v;
4004
4005         if (*pos || m->count)
4006                 ptr++;
4007
4008         (*pos)++;
4009
4010         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4011              ptr++) {
4012                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4013                         continue;
4014
4015                 return ptr;
4016         }
4017
4018         return NULL;
4019 }
4020
4021 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4022 {
4023         void *v;
4024         loff_t l = 0;
4025
4026         preempt_disable();
4027         arch_spin_lock(&trace_cmdline_lock);
4028
4029         v = &savedcmd->map_cmdline_to_pid[0];
4030         while (l <= *pos) {
4031                 v = saved_cmdlines_next(m, v, &l);
4032                 if (!v)
4033                         return NULL;
4034         }
4035
4036         return v;
4037 }
4038
4039 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4040 {
4041         arch_spin_unlock(&trace_cmdline_lock);
4042         preempt_enable();
4043 }
4044
4045 static int saved_cmdlines_show(struct seq_file *m, void *v)
4046 {
4047         char buf[TASK_COMM_LEN];
4048         unsigned int *pid = v;
4049
4050         __trace_find_cmdline(*pid, buf);
4051         seq_printf(m, "%d %s\n", *pid, buf);
4052         return 0;
4053 }
4054
4055 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4056         .start          = saved_cmdlines_start,
4057         .next           = saved_cmdlines_next,
4058         .stop           = saved_cmdlines_stop,
4059         .show           = saved_cmdlines_show,
4060 };
4061
4062 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4063 {
4064         if (tracing_disabled)
4065                 return -ENODEV;
4066
4067         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4068 }
4069
4070 static const struct file_operations tracing_saved_cmdlines_fops = {
4071         .open           = tracing_saved_cmdlines_open,
4072         .read           = seq_read,
4073         .llseek         = seq_lseek,
4074         .release        = seq_release,
4075 };
4076
4077 static ssize_t
4078 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4079                                  size_t cnt, loff_t *ppos)
4080 {
4081         char buf[64];
4082         int r;
4083
4084         arch_spin_lock(&trace_cmdline_lock);
4085         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4086         arch_spin_unlock(&trace_cmdline_lock);
4087
4088         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4089 }
4090
4091 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4092 {
4093         kfree(s->saved_cmdlines);
4094         kfree(s->map_cmdline_to_pid);
4095         kfree(s);
4096 }
4097
4098 static int tracing_resize_saved_cmdlines(unsigned int val)
4099 {
4100         struct saved_cmdlines_buffer *s, *savedcmd_temp;
4101
4102         s = kmalloc(sizeof(*s), GFP_KERNEL);
4103         if (!s)
4104                 return -ENOMEM;
4105
4106         if (allocate_cmdlines_buffer(val, s) < 0) {
4107                 kfree(s);
4108                 return -ENOMEM;
4109         }
4110
4111         arch_spin_lock(&trace_cmdline_lock);
4112         savedcmd_temp = savedcmd;
4113         savedcmd = s;
4114         arch_spin_unlock(&trace_cmdline_lock);
4115         free_saved_cmdlines_buffer(savedcmd_temp);
4116
4117         return 0;
4118 }
4119
4120 static ssize_t
4121 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4122                                   size_t cnt, loff_t *ppos)
4123 {
4124         unsigned long val;
4125         int ret;
4126
4127         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4128         if (ret)
4129                 return ret;
4130
4131         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4132         if (!val || val > PID_MAX_DEFAULT)
4133                 return -EINVAL;
4134
4135         ret = tracing_resize_saved_cmdlines((unsigned int)val);
4136         if (ret < 0)
4137                 return ret;
4138
4139         *ppos += cnt;
4140
4141         return cnt;
4142 }
4143
4144 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4145         .open           = tracing_open_generic,
4146         .read           = tracing_saved_cmdlines_size_read,
4147         .write          = tracing_saved_cmdlines_size_write,
4148 };
4149
4150 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
4151 static union trace_enum_map_item *
4152 update_enum_map(union trace_enum_map_item *ptr)
4153 {
4154         if (!ptr->map.enum_string) {
4155                 if (ptr->tail.next) {
4156                         ptr = ptr->tail.next;
4157                         /* Set ptr to the next real item (skip head) */
4158                         ptr++;
4159                 } else
4160                         return NULL;
4161         }
4162         return ptr;
4163 }
4164
4165 static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos)
4166 {
4167         union trace_enum_map_item *ptr = v;
4168
4169         /*
4170          * Paranoid! If ptr points to end, we don't want to increment past it.
4171          * This really should never happen.
4172          */
4173         ptr = update_enum_map(ptr);
4174         if (WARN_ON_ONCE(!ptr))
4175                 return NULL;
4176
4177         ptr++;
4178
4179         (*pos)++;
4180
4181         ptr = update_enum_map(ptr);
4182
4183         return ptr;
4184 }
4185
4186 static void *enum_map_start(struct seq_file *m, loff_t *pos)
4187 {
4188         union trace_enum_map_item *v;
4189         loff_t l = 0;
4190
4191         mutex_lock(&trace_enum_mutex);
4192
4193         v = trace_enum_maps;
4194         if (v)
4195                 v++;
4196
4197         while (v && l < *pos) {
4198                 v = enum_map_next(m, v, &l);
4199         }
4200
4201         return v;
4202 }
4203
4204 static void enum_map_stop(struct seq_file *m, void *v)
4205 {
4206         mutex_unlock(&trace_enum_mutex);
4207 }
4208
4209 static int enum_map_show(struct seq_file *m, void *v)
4210 {
4211         union trace_enum_map_item *ptr = v;
4212
4213         seq_printf(m, "%s %ld (%s)\n",
4214                    ptr->map.enum_string, ptr->map.enum_value,
4215                    ptr->map.system);
4216
4217         return 0;
4218 }
4219
4220 static const struct seq_operations tracing_enum_map_seq_ops = {
4221         .start          = enum_map_start,
4222         .next           = enum_map_next,
4223         .stop           = enum_map_stop,
4224         .show           = enum_map_show,
4225 };
4226
4227 static int tracing_enum_map_open(struct inode *inode, struct file *filp)
4228 {
4229         if (tracing_disabled)
4230                 return -ENODEV;
4231
4232         return seq_open(filp, &tracing_enum_map_seq_ops);
4233 }
4234
4235 static const struct file_operations tracing_enum_map_fops = {
4236         .open           = tracing_enum_map_open,
4237         .read           = seq_read,
4238         .llseek         = seq_lseek,
4239         .release        = seq_release,
4240 };
4241
4242 static inline union trace_enum_map_item *
4243 trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
4244 {
4245         /* Return tail of array given the head */
4246         return ptr + ptr->head.length + 1;
4247 }
4248
4249 static void
4250 trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
4251                            int len)
4252 {
4253         struct trace_enum_map **stop;
4254         struct trace_enum_map **map;
4255         union trace_enum_map_item *map_array;
4256         union trace_enum_map_item *ptr;
4257
4258         stop = start + len;
4259
4260         /*
4261          * The trace_enum_maps contains the map plus a head and tail item,
4262          * where the head holds the module and length of array, and the
4263          * tail holds a pointer to the next list.
4264          */
4265         map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4266         if (!map_array) {
4267                 pr_warn("Unable to allocate trace enum mapping\n");
4268                 return;
4269         }
4270
4271         mutex_lock(&trace_enum_mutex);
4272
4273         if (!trace_enum_maps)
4274                 trace_enum_maps = map_array;
4275         else {
4276                 ptr = trace_enum_maps;
4277                 for (;;) {
4278                         ptr = trace_enum_jmp_to_tail(ptr);
4279                         if (!ptr->tail.next)
4280                                 break;
4281                         ptr = ptr->tail.next;
4282
4283                 }
4284                 ptr->tail.next = map_array;
4285         }
4286         map_array->head.mod = mod;
4287         map_array->head.length = len;
4288         map_array++;
4289
4290         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4291                 map_array->map = **map;
4292                 map_array++;
4293         }
4294         memset(map_array, 0, sizeof(*map_array));
4295
4296         mutex_unlock(&trace_enum_mutex);
4297 }
4298
4299 static void trace_create_enum_file(struct dentry *d_tracer)
4300 {
4301         trace_create_file("enum_map", 0444, d_tracer,
4302                           NULL, &tracing_enum_map_fops);
4303 }
4304
4305 #else /* CONFIG_TRACE_ENUM_MAP_FILE */
4306 static inline void trace_create_enum_file(struct dentry *d_tracer) { }
4307 static inline void trace_insert_enum_map_file(struct module *mod,
4308                               struct trace_enum_map **start, int len) { }
4309 #endif /* !CONFIG_TRACE_ENUM_MAP_FILE */
4310
4311 static void trace_insert_enum_map(struct module *mod,
4312                                   struct trace_enum_map **start, int len)
4313 {
4314         struct trace_enum_map **map;
4315
4316         if (len <= 0)
4317                 return;
4318
4319         map = start;
4320
4321         trace_event_enum_update(map, len);
4322
4323         trace_insert_enum_map_file(mod, start, len);
4324 }
4325
4326 static ssize_t
4327 tracing_set_trace_read(struct file *filp, char __user *ubuf,
4328                        size_t cnt, loff_t *ppos)
4329 {
4330         struct trace_array *tr = filp->private_data;
4331         char buf[MAX_TRACER_SIZE+2];
4332         int r;
4333
4334         mutex_lock(&trace_types_lock);
4335         r = sprintf(buf, "%s\n", tr->current_trace->name);
4336         mutex_unlock(&trace_types_lock);
4337
4338         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4339 }
4340
4341 int tracer_init(struct tracer *t, struct trace_array *tr)
4342 {
4343         tracing_reset_online_cpus(&tr->trace_buffer);
4344         return t->init(tr);
4345 }
4346
4347 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4348 {
4349         int cpu;
4350
4351         for_each_tracing_cpu(cpu)
4352                 per_cpu_ptr(buf->data, cpu)->entries = val;
4353 }
4354
4355 #ifdef CONFIG_TRACER_MAX_TRACE
4356 /* resize @tr's buffer to the size of @size_tr's entries */
4357 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
4358                                         struct trace_buffer *size_buf, int cpu_id)
4359 {
4360         int cpu, ret = 0;
4361
4362         if (cpu_id == RING_BUFFER_ALL_CPUS) {
4363                 for_each_tracing_cpu(cpu) {
4364                         ret = ring_buffer_resize(trace_buf->buffer,
4365                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4366                         if (ret < 0)
4367                                 break;
4368                         per_cpu_ptr(trace_buf->data, cpu)->entries =
4369                                 per_cpu_ptr(size_buf->data, cpu)->entries;
4370                 }
4371         } else {
4372                 ret = ring_buffer_resize(trace_buf->buffer,
4373                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4374                 if (ret == 0)
4375                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
4376                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
4377         }
4378
4379         return ret;
4380 }
4381 #endif /* CONFIG_TRACER_MAX_TRACE */
4382
4383 static int __tracing_resize_ring_buffer(struct trace_array *tr,
4384                                         unsigned long size, int cpu)
4385 {
4386         int ret;
4387
4388         /*
4389          * If kernel or user changes the size of the ring buffer
4390          * we use the size that was given, and we can forget about
4391          * expanding it later.
4392          */
4393         ring_buffer_expanded = true;
4394
4395         /* May be called before buffers are initialized */
4396         if (!tr->trace_buffer.buffer)
4397                 return 0;
4398
4399         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
4400         if (ret < 0)
4401                 return ret;
4402
4403 #ifdef CONFIG_TRACER_MAX_TRACE
4404         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
4405             !tr->current_trace->use_max_tr)
4406                 goto out;
4407
4408         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
4409         if (ret < 0) {
4410                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
4411                                                      &tr->trace_buffer, cpu);
4412                 if (r < 0) {
4413                         /*
4414                          * AARGH! We are left with different
4415                          * size max buffer!!!!
4416                          * The max buffer is our "snapshot" buffer.
4417                          * When a tracer needs a snapshot (one of the
4418                          * latency tracers), it swaps the max buffer
4419                          * with the saved snap shot. We succeeded to
4420                          * update the size of the main buffer, but failed to
4421                          * update the size of the max buffer. But when we tried
4422                          * to reset the main buffer to the original size, we
4423                          * failed there too. This is very unlikely to
4424                          * happen, but if it does, warn and kill all
4425                          * tracing.
4426                          */
4427                         WARN_ON(1);
4428                         tracing_disabled = 1;
4429                 }
4430                 return ret;
4431         }
4432
4433         if (cpu == RING_BUFFER_ALL_CPUS)
4434                 set_buffer_entries(&tr->max_buffer, size);
4435         else
4436                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4437
4438  out:
4439 #endif /* CONFIG_TRACER_MAX_TRACE */
4440
4441         if (cpu == RING_BUFFER_ALL_CPUS)
4442                 set_buffer_entries(&tr->trace_buffer, size);
4443         else
4444                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4445
4446         return ret;
4447 }
4448
4449 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4450                                           unsigned long size, int cpu_id)
4451 {
4452         int ret = size;
4453
4454         mutex_lock(&trace_types_lock);
4455
4456         if (cpu_id != RING_BUFFER_ALL_CPUS) {
4457                 /* make sure, this cpu is enabled in the mask */
4458                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4459                         ret = -EINVAL;
4460                         goto out;
4461                 }
4462         }
4463
4464         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4465         if (ret < 0)
4466                 ret = -ENOMEM;
4467
4468 out:
4469         mutex_unlock(&trace_types_lock);
4470
4471         return ret;
4472 }
4473
4474
4475 /**
4476  * tracing_update_buffers - used by tracing facility to expand ring buffers
4477  *
4478  * To save on memory when the tracing is never used on a system with it
4479  * configured in. The ring buffers are set to a minimum size. But once
4480  * a user starts to use the tracing facility, then they need to grow
4481  * to their default size.
4482  *
4483  * This function is to be called when a tracer is about to be used.
4484  */
4485 int tracing_update_buffers(void)
4486 {
4487         int ret = 0;
4488
4489         mutex_lock(&trace_types_lock);
4490         if (!ring_buffer_expanded)
4491                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4492                                                 RING_BUFFER_ALL_CPUS);
4493         mutex_unlock(&trace_types_lock);
4494
4495         return ret;
4496 }
4497
4498 struct trace_option_dentry;
4499
4500 static void
4501 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4502
4503 /*
4504  * Used to clear out the tracer before deletion of an instance.
4505  * Must have trace_types_lock held.
4506  */
4507 static void tracing_set_nop(struct trace_array *tr)
4508 {
4509         if (tr->current_trace == &nop_trace)
4510                 return;
4511         
4512         tr->current_trace->enabled--;
4513
4514         if (tr->current_trace->reset)
4515                 tr->current_trace->reset(tr);
4516
4517         tr->current_trace = &nop_trace;
4518 }
4519
4520 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
4521 {
4522         /* Only enable if the directory has been created already. */
4523         if (!tr->dir)
4524                 return;
4525
4526         create_trace_option_files(tr, t);
4527 }
4528
4529 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
4530 {
4531         struct tracer *t;
4532 #ifdef CONFIG_TRACER_MAX_TRACE
4533         bool had_max_tr;
4534 #endif
4535         int ret = 0;
4536
4537         mutex_lock(&trace_types_lock);
4538
4539         if (!ring_buffer_expanded) {
4540                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
4541                                                 RING_BUFFER_ALL_CPUS);
4542                 if (ret < 0)
4543                         goto out;
4544                 ret = 0;
4545         }
4546
4547         for (t = trace_types; t; t = t->next) {
4548                 if (strcmp(t->name, buf) == 0)
4549                         break;
4550         }
4551         if (!t) {
4552                 ret = -EINVAL;
4553                 goto out;
4554         }
4555         if (t == tr->current_trace)
4556                 goto out;
4557
4558         /* Some tracers are only allowed for the top level buffer */
4559         if (!trace_ok_for_array(t, tr)) {
4560                 ret = -EINVAL;
4561                 goto out;
4562         }
4563
4564         /* If trace pipe files are being read, we can't change the tracer */
4565         if (tr->current_trace->ref) {
4566                 ret = -EBUSY;
4567                 goto out;
4568         }
4569
4570         trace_branch_disable();
4571
4572         tr->current_trace->enabled--;
4573
4574         if (tr->current_trace->reset)
4575                 tr->current_trace->reset(tr);
4576
4577         /* Current trace needs to be nop_trace before synchronize_sched */
4578         tr->current_trace = &nop_trace;
4579
4580 #ifdef CONFIG_TRACER_MAX_TRACE
4581         had_max_tr = tr->allocated_snapshot;
4582
4583         if (had_max_tr && !t->use_max_tr) {
4584                 /*
4585                  * We need to make sure that the update_max_tr sees that
4586                  * current_trace changed to nop_trace to keep it from
4587                  * swapping the buffers after we resize it.
4588                  * The update_max_tr is called from interrupts disabled
4589                  * so a synchronized_sched() is sufficient.
4590                  */
4591                 synchronize_sched();
4592                 free_snapshot(tr);
4593         }
4594 #endif
4595
4596 #ifdef CONFIG_TRACER_MAX_TRACE
4597         if (t->use_max_tr && !had_max_tr) {
4598                 ret = alloc_snapshot(tr);
4599                 if (ret < 0)
4600                         goto out;
4601         }
4602 #endif
4603
4604         if (t->init) {
4605                 ret = tracer_init(t, tr);
4606                 if (ret)
4607                         goto out;
4608         }
4609
4610         tr->current_trace = t;
4611         tr->current_trace->enabled++;
4612         trace_branch_enable(tr);
4613  out:
4614         mutex_unlock(&trace_types_lock);
4615
4616         return ret;
4617 }
4618
4619 static ssize_t
4620 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
4621                         size_t cnt, loff_t *ppos)
4622 {
4623         struct trace_array *tr = filp->private_data;
4624         char buf[MAX_TRACER_SIZE+1];
4625         int i;
4626         size_t ret;
4627         int err;
4628
4629         ret = cnt;
4630
4631         if (cnt > MAX_TRACER_SIZE)
4632                 cnt = MAX_TRACER_SIZE;
4633
4634         if (copy_from_user(buf, ubuf, cnt))
4635                 return -EFAULT;
4636
4637         buf[cnt] = 0;
4638
4639         /* strip ending whitespace. */
4640         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4641                 buf[i] = 0;
4642
4643         err = tracing_set_tracer(tr, buf);
4644         if (err)
4645                 return err;
4646
4647         *ppos += ret;
4648
4649         return ret;
4650 }
4651
4652 static ssize_t
4653 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
4654                    size_t cnt, loff_t *ppos)
4655 {
4656         char buf[64];
4657         int r;
4658
4659         r = snprintf(buf, sizeof(buf), "%ld\n",
4660                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4661         if (r > sizeof(buf))
4662                 r = sizeof(buf);
4663         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4664 }
4665
4666 static ssize_t
4667 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
4668                     size_t cnt, loff_t *ppos)
4669 {
4670         unsigned long val;
4671         int ret;
4672
4673         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4674         if (ret)
4675                 return ret;
4676
4677         *ptr = val * 1000;
4678
4679         return cnt;
4680 }
4681
4682 static ssize_t
4683 tracing_thresh_read(struct file *filp, char __user *ubuf,
4684                     size_t cnt, loff_t *ppos)
4685 {
4686         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
4687 }
4688
4689 static ssize_t
4690 tracing_thresh_write(struct file *filp, const char __user *ubuf,
4691                      size_t cnt, loff_t *ppos)
4692 {
4693         struct trace_array *tr = filp->private_data;
4694         int ret;
4695
4696         mutex_lock(&trace_types_lock);
4697         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
4698         if (ret < 0)
4699                 goto out;
4700
4701         if (tr->current_trace->update_thresh) {
4702                 ret = tr->current_trace->update_thresh(tr);
4703                 if (ret < 0)
4704                         goto out;
4705         }
4706
4707         ret = cnt;
4708 out:
4709         mutex_unlock(&trace_types_lock);
4710
4711         return ret;
4712 }
4713
4714 #ifdef CONFIG_TRACER_MAX_TRACE
4715
4716 static ssize_t
4717 tracing_max_lat_read(struct file *filp, char __user *ubuf,
4718                      size_t cnt, loff_t *ppos)
4719 {
4720         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
4721 }
4722
4723 static ssize_t
4724 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4725                       size_t cnt, loff_t *ppos)
4726 {
4727         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
4728 }
4729
4730 #endif
4731
4732 static int tracing_open_pipe(struct inode *inode, struct file *filp)
4733 {
4734         struct trace_array *tr = inode->i_private;
4735         struct trace_iterator *iter;
4736         int ret = 0;
4737
4738         if (tracing_disabled)
4739                 return -ENODEV;
4740
4741         if (trace_array_get(tr) < 0)
4742                 return -ENODEV;
4743
4744         mutex_lock(&trace_types_lock);
4745
4746         /* create a buffer to store the information to pass to userspace */
4747         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4748         if (!iter) {
4749                 ret = -ENOMEM;
4750                 __trace_array_put(tr);
4751                 goto out;
4752         }
4753
4754         trace_seq_init(&iter->seq);
4755         iter->trace = tr->current_trace;
4756
4757         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
4758                 ret = -ENOMEM;
4759                 goto fail;
4760         }
4761
4762         /* trace pipe does not show start of buffer */
4763         cpumask_setall(iter->started);
4764
4765         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4766                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4767
4768         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4769         if (trace_clocks[tr->clock_id].in_ns)
4770                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4771
4772         iter->tr = tr;
4773         iter->trace_buffer = &tr->trace_buffer;
4774         iter->cpu_file = tracing_get_cpu(inode);
4775         mutex_init(&iter->mutex);
4776         filp->private_data = iter;
4777
4778         if (iter->trace->pipe_open)
4779                 iter->trace->pipe_open(iter);
4780
4781         nonseekable_open(inode, filp);
4782
4783         tr->current_trace->ref++;
4784 out:
4785         mutex_unlock(&trace_types_lock);
4786         return ret;
4787
4788 fail:
4789         kfree(iter->trace);
4790         kfree(iter);
4791         __trace_array_put(tr);
4792         mutex_unlock(&trace_types_lock);
4793         return ret;
4794 }
4795
4796 static int tracing_release_pipe(struct inode *inode, struct file *file)
4797 {
4798         struct trace_iterator *iter = file->private_data;
4799         struct trace_array *tr = inode->i_private;
4800
4801         mutex_lock(&trace_types_lock);
4802
4803         tr->current_trace->ref--;
4804
4805         if (iter->trace->pipe_close)
4806                 iter->trace->pipe_close(iter);
4807
4808         mutex_unlock(&trace_types_lock);
4809
4810         free_cpumask_var(iter->started);
4811         mutex_destroy(&iter->mutex);
4812         kfree(iter);
4813
4814         trace_array_put(tr);
4815
4816         return 0;
4817 }
4818
4819 static unsigned int
4820 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
4821 {
4822         struct trace_array *tr = iter->tr;
4823
4824         /* Iterators are static, they should be filled or empty */
4825         if (trace_buffer_iter(iter, iter->cpu_file))
4826                 return POLLIN | POLLRDNORM;
4827
4828         if (tr->trace_flags & TRACE_ITER_BLOCK)
4829                 /*
4830                  * Always select as readable when in blocking mode
4831                  */
4832                 return POLLIN | POLLRDNORM;
4833         else
4834                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
4835                                              filp, poll_table);
4836 }
4837
4838 static unsigned int
4839 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
4840 {
4841         struct trace_iterator *iter = filp->private_data;
4842
4843         return trace_poll(iter, filp, poll_table);
4844 }
4845
4846 /* Must be called with iter->mutex held. */
4847 static int tracing_wait_pipe(struct file *filp)
4848 {
4849         struct trace_iterator *iter = filp->private_data;
4850         int ret;
4851
4852         while (trace_empty(iter)) {
4853
4854                 if ((filp->f_flags & O_NONBLOCK)) {
4855                         return -EAGAIN;
4856                 }
4857
4858                 /*
4859                  * We block until we read something and tracing is disabled.
4860                  * We still block if tracing is disabled, but we have never
4861                  * read anything. This allows a user to cat this file, and
4862                  * then enable tracing. But after we have read something,
4863                  * we give an EOF when tracing is again disabled.
4864                  *
4865                  * iter->pos will be 0 if we haven't read anything.
4866                  */
4867                 if (!tracing_is_on() && iter->pos)
4868                         break;
4869
4870                 mutex_unlock(&iter->mutex);
4871
4872                 ret = wait_on_pipe(iter, false);
4873
4874                 mutex_lock(&iter->mutex);
4875
4876                 if (ret)
4877                         return ret;
4878         }
4879
4880         return 1;
4881 }
4882
4883 /*
4884  * Consumer reader.
4885  */
4886 static ssize_t
4887 tracing_read_pipe(struct file *filp, char __user *ubuf,
4888                   size_t cnt, loff_t *ppos)
4889 {
4890         struct trace_iterator *iter = filp->private_data;
4891         ssize_t sret;
4892
4893         /*
4894          * Avoid more than one consumer on a single file descriptor
4895          * This is just a matter of traces coherency, the ring buffer itself
4896          * is protected.
4897          */
4898         mutex_lock(&iter->mutex);
4899
4900         /* return any leftover data */
4901         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4902         if (sret != -EBUSY)
4903                 goto out;
4904
4905         trace_seq_init(&iter->seq);
4906
4907         if (iter->trace->read) {
4908                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
4909                 if (sret)
4910                         goto out;
4911         }
4912
4913 waitagain:
4914         sret = tracing_wait_pipe(filp);
4915         if (sret <= 0)
4916                 goto out;
4917
4918         /* stop when tracing is finished */
4919         if (trace_empty(iter)) {
4920                 sret = 0;
4921                 goto out;
4922         }
4923
4924         if (cnt >= PAGE_SIZE)
4925                 cnt = PAGE_SIZE - 1;
4926
4927         /* reset all but tr, trace, and overruns */
4928         memset(&iter->seq, 0,
4929                sizeof(struct trace_iterator) -
4930                offsetof(struct trace_iterator, seq));
4931         cpumask_clear(iter->started);
4932         iter->pos = -1;
4933
4934         trace_event_read_lock();
4935         trace_access_lock(iter->cpu_file);
4936         while (trace_find_next_entry_inc(iter) != NULL) {
4937                 enum print_line_t ret;
4938                 int save_len = iter->seq.seq.len;
4939
4940                 ret = print_trace_line(iter);
4941                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4942                         /* don't print partial lines */
4943                         iter->seq.seq.len = save_len;
4944                         break;
4945                 }
4946                 if (ret != TRACE_TYPE_NO_CONSUME)
4947                         trace_consume(iter);
4948
4949                 if (trace_seq_used(&iter->seq) >= cnt)
4950                         break;
4951
4952                 /*
4953                  * Setting the full flag means we reached the trace_seq buffer
4954                  * size and we should leave by partial output condition above.
4955                  * One of the trace_seq_* functions is not used properly.
4956                  */
4957                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
4958                           iter->ent->type);
4959         }
4960         trace_access_unlock(iter->cpu_file);
4961         trace_event_read_unlock();
4962
4963         /* Now copy what we have to the user */
4964         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4965         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
4966                 trace_seq_init(&iter->seq);
4967
4968         /*
4969          * If there was nothing to send to user, in spite of consuming trace
4970          * entries, go back to wait for more entries.
4971          */
4972         if (sret == -EBUSY)
4973                 goto waitagain;
4974
4975 out:
4976         mutex_unlock(&iter->mutex);
4977
4978         return sret;
4979 }
4980
4981 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
4982                                      unsigned int idx)
4983 {
4984         __free_page(spd->pages[idx]);
4985 }
4986
4987 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
4988         .can_merge              = 0,
4989         .confirm                = generic_pipe_buf_confirm,
4990         .release                = generic_pipe_buf_release,
4991         .steal                  = generic_pipe_buf_steal,
4992         .get                    = generic_pipe_buf_get,
4993 };
4994
4995 static size_t
4996 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
4997 {
4998         size_t count;
4999         int save_len;
5000         int ret;
5001
5002         /* Seq buffer is page-sized, exactly what we need. */
5003         for (;;) {
5004                 save_len = iter->seq.seq.len;
5005                 ret = print_trace_line(iter);
5006
5007                 if (trace_seq_has_overflowed(&iter->seq)) {
5008                         iter->seq.seq.len = save_len;
5009                         break;
5010                 }
5011
5012                 /*
5013                  * This should not be hit, because it should only
5014                  * be set if the iter->seq overflowed. But check it
5015                  * anyway to be safe.
5016                  */
5017                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5018                         iter->seq.seq.len = save_len;
5019                         break;
5020                 }
5021
5022                 count = trace_seq_used(&iter->seq) - save_len;
5023                 if (rem < count) {
5024                         rem = 0;
5025                         iter->seq.seq.len = save_len;
5026                         break;
5027                 }
5028
5029                 if (ret != TRACE_TYPE_NO_CONSUME)
5030                         trace_consume(iter);
5031                 rem -= count;
5032                 if (!trace_find_next_entry_inc(iter))   {
5033                         rem = 0;
5034                         iter->ent = NULL;
5035                         break;
5036                 }
5037         }
5038
5039         return rem;
5040 }
5041
5042 static ssize_t tracing_splice_read_pipe(struct file *filp,
5043                                         loff_t *ppos,
5044                                         struct pipe_inode_info *pipe,
5045                                         size_t len,
5046                                         unsigned int flags)
5047 {
5048         struct page *pages_def[PIPE_DEF_BUFFERS];
5049         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5050         struct trace_iterator *iter = filp->private_data;
5051         struct splice_pipe_desc spd = {
5052                 .pages          = pages_def,
5053                 .partial        = partial_def,
5054                 .nr_pages       = 0, /* This gets updated below. */
5055                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5056                 .flags          = flags,
5057                 .ops            = &tracing_pipe_buf_ops,
5058                 .spd_release    = tracing_spd_release_pipe,
5059         };
5060         ssize_t ret;
5061         size_t rem;
5062         unsigned int i;
5063
5064         if (splice_grow_spd(pipe, &spd))
5065                 return -ENOMEM;
5066
5067         mutex_lock(&iter->mutex);
5068
5069         if (iter->trace->splice_read) {
5070                 ret = iter->trace->splice_read(iter, filp,
5071                                                ppos, pipe, len, flags);
5072                 if (ret)
5073                         goto out_err;
5074         }
5075
5076         ret = tracing_wait_pipe(filp);
5077         if (ret <= 0)
5078                 goto out_err;
5079
5080         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5081                 ret = -EFAULT;
5082                 goto out_err;
5083         }
5084
5085         trace_event_read_lock();
5086         trace_access_lock(iter->cpu_file);
5087
5088         /* Fill as many pages as possible. */
5089         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5090                 spd.pages[i] = alloc_page(GFP_KERNEL);
5091                 if (!spd.pages[i])
5092                         break;
5093
5094                 rem = tracing_fill_pipe_page(rem, iter);
5095
5096                 /* Copy the data into the page, so we can start over. */
5097                 ret = trace_seq_to_buffer(&iter->seq,
5098                                           page_address(spd.pages[i]),
5099                                           trace_seq_used(&iter->seq));
5100                 if (ret < 0) {
5101                         __free_page(spd.pages[i]);
5102                         break;
5103                 }
5104                 spd.partial[i].offset = 0;
5105                 spd.partial[i].len = trace_seq_used(&iter->seq);
5106
5107                 trace_seq_init(&iter->seq);
5108         }
5109
5110         trace_access_unlock(iter->cpu_file);
5111         trace_event_read_unlock();
5112         mutex_unlock(&iter->mutex);
5113
5114         spd.nr_pages = i;
5115
5116         if (i)
5117                 ret = splice_to_pipe(pipe, &spd);
5118         else
5119                 ret = 0;
5120 out:
5121         splice_shrink_spd(&spd);
5122         return ret;
5123
5124 out_err:
5125         mutex_unlock(&iter->mutex);
5126         goto out;
5127 }
5128
5129 static ssize_t
5130 tracing_entries_read(struct file *filp, char __user *ubuf,
5131                      size_t cnt, loff_t *ppos)
5132 {
5133         struct inode *inode = file_inode(filp);
5134         struct trace_array *tr = inode->i_private;
5135         int cpu = tracing_get_cpu(inode);
5136         char buf[64];
5137         int r = 0;
5138         ssize_t ret;
5139
5140         mutex_lock(&trace_types_lock);
5141
5142         if (cpu == RING_BUFFER_ALL_CPUS) {
5143                 int cpu, buf_size_same;
5144                 unsigned long size;
5145
5146                 size = 0;
5147                 buf_size_same = 1;
5148                 /* check if all cpu sizes are same */
5149                 for_each_tracing_cpu(cpu) {
5150                         /* fill in the size from first enabled cpu */
5151                         if (size == 0)
5152                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5153                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5154                                 buf_size_same = 0;
5155                                 break;
5156                         }
5157                 }
5158
5159                 if (buf_size_same) {
5160                         if (!ring_buffer_expanded)
5161                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
5162                                             size >> 10,
5163                                             trace_buf_size >> 10);
5164                         else
5165                                 r = sprintf(buf, "%lu\n", size >> 10);
5166                 } else
5167                         r = sprintf(buf, "X\n");
5168         } else
5169                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5170
5171         mutex_unlock(&trace_types_lock);
5172
5173         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5174         return ret;
5175 }
5176
5177 static ssize_t
5178 tracing_entries_write(struct file *filp, const char __user *ubuf,
5179                       size_t cnt, loff_t *ppos)
5180 {
5181         struct inode *inode = file_inode(filp);
5182         struct trace_array *tr = inode->i_private;
5183         unsigned long val;
5184         int ret;
5185
5186         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5187         if (ret)
5188                 return ret;
5189
5190         /* must have at least 1 entry */
5191         if (!val)
5192                 return -EINVAL;
5193
5194         /* value is in KB */
5195         val <<= 10;
5196         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5197         if (ret < 0)
5198                 return ret;
5199
5200         *ppos += cnt;
5201
5202         return cnt;
5203 }
5204
5205 static ssize_t
5206 tracing_total_entries_read(struct file *filp, char __user *ubuf,
5207                                 size_t cnt, loff_t *ppos)
5208 {
5209         struct trace_array *tr = filp->private_data;
5210         char buf[64];
5211         int r, cpu;
5212         unsigned long size = 0, expanded_size = 0;
5213
5214         mutex_lock(&trace_types_lock);
5215         for_each_tracing_cpu(cpu) {
5216                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5217                 if (!ring_buffer_expanded)
5218                         expanded_size += trace_buf_size >> 10;
5219         }
5220         if (ring_buffer_expanded)
5221                 r = sprintf(buf, "%lu\n", size);
5222         else
5223                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5224         mutex_unlock(&trace_types_lock);
5225
5226         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5227 }
5228
5229 static ssize_t
5230 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5231                           size_t cnt, loff_t *ppos)
5232 {
5233         /*
5234          * There is no need to read what the user has written, this function
5235          * is just to make sure that there is no error when "echo" is used
5236          */
5237
5238         *ppos += cnt;
5239
5240         return cnt;
5241 }
5242
5243 static int
5244 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5245 {
5246         struct trace_array *tr = inode->i_private;
5247
5248         /* disable tracing ? */
5249         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5250                 tracer_tracing_off(tr);
5251         /* resize the ring buffer to 0 */
5252         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5253
5254         trace_array_put(tr);
5255
5256         return 0;
5257 }
5258
5259 static ssize_t
5260 tracing_mark_write(struct file *filp, const char __user *ubuf,
5261                                         size_t cnt, loff_t *fpos)
5262 {
5263         unsigned long addr = (unsigned long)ubuf;
5264         struct trace_array *tr = filp->private_data;
5265         struct ring_buffer_event *event;
5266         struct ring_buffer *buffer;
5267         struct print_entry *entry;
5268         unsigned long irq_flags;
5269         struct page *pages[2];
5270         void *map_page[2];
5271         int nr_pages = 1;
5272         ssize_t written;
5273         int offset;
5274         int size;
5275         int len;
5276         int ret;
5277         int i;
5278
5279         if (tracing_disabled)
5280                 return -EINVAL;
5281
5282         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5283                 return -EINVAL;
5284
5285         if (cnt > TRACE_BUF_SIZE)
5286                 cnt = TRACE_BUF_SIZE;
5287
5288         /*
5289          * Userspace is injecting traces into the kernel trace buffer.
5290          * We want to be as non intrusive as possible.
5291          * To do so, we do not want to allocate any special buffers
5292          * or take any locks, but instead write the userspace data
5293          * straight into the ring buffer.
5294          *
5295          * First we need to pin the userspace buffer into memory,
5296          * which, most likely it is, because it just referenced it.
5297          * But there's no guarantee that it is. By using get_user_pages_fast()
5298          * and kmap_atomic/kunmap_atomic() we can get access to the
5299          * pages directly. We then write the data directly into the
5300          * ring buffer.
5301          */
5302         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5303
5304         /* check if we cross pages */
5305         if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
5306                 nr_pages = 2;
5307
5308         offset = addr & (PAGE_SIZE - 1);
5309         addr &= PAGE_MASK;
5310
5311         ret = get_user_pages_fast(addr, nr_pages, 0, pages);
5312         if (ret < nr_pages) {
5313                 while (--ret >= 0)
5314                         put_page(pages[ret]);
5315                 written = -EFAULT;
5316                 goto out;
5317         }
5318
5319         for (i = 0; i < nr_pages; i++)
5320                 map_page[i] = kmap_atomic(pages[i]);
5321
5322         local_save_flags(irq_flags);
5323         size = sizeof(*entry) + cnt + 2; /* possible \n added */
5324         buffer = tr->trace_buffer.buffer;
5325         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5326                                           irq_flags, preempt_count());
5327         if (!event) {
5328                 /* Ring buffer disabled, return as if not open for write */
5329                 written = -EBADF;
5330                 goto out_unlock;
5331         }
5332
5333         entry = ring_buffer_event_data(event);
5334         entry->ip = _THIS_IP_;
5335
5336         if (nr_pages == 2) {
5337                 len = PAGE_SIZE - offset;
5338                 memcpy(&entry->buf, map_page[0] + offset, len);
5339                 memcpy(&entry->buf[len], map_page[1], cnt - len);
5340         } else
5341                 memcpy(&entry->buf, map_page[0] + offset, cnt);
5342
5343         if (entry->buf[cnt - 1] != '\n') {
5344                 entry->buf[cnt] = '\n';
5345                 entry->buf[cnt + 1] = '\0';
5346         } else
5347                 entry->buf[cnt] = '\0';
5348
5349         __buffer_unlock_commit(buffer, event);
5350
5351         written = cnt;
5352
5353         *fpos += written;
5354
5355  out_unlock:
5356         for (i = nr_pages - 1; i >= 0; i--) {
5357                 kunmap_atomic(map_page[i]);
5358                 put_page(pages[i]);
5359         }
5360  out:
5361         return written;
5362 }
5363
5364 static int tracing_clock_show(struct seq_file *m, void *v)
5365 {
5366         struct trace_array *tr = m->private;
5367         int i;
5368
5369         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
5370                 seq_printf(m,
5371                         "%s%s%s%s", i ? " " : "",
5372                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
5373                         i == tr->clock_id ? "]" : "");
5374         seq_putc(m, '\n');
5375
5376         return 0;
5377 }
5378
5379 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
5380 {
5381         int i;
5382
5383         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
5384                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
5385                         break;
5386         }
5387         if (i == ARRAY_SIZE(trace_clocks))
5388                 return -EINVAL;
5389
5390         mutex_lock(&trace_types_lock);
5391
5392         tr->clock_id = i;
5393
5394         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
5395
5396         /*
5397          * New clock may not be consistent with the previous clock.
5398          * Reset the buffer so that it doesn't have incomparable timestamps.
5399          */
5400         tracing_reset_online_cpus(&tr->trace_buffer);
5401
5402 #ifdef CONFIG_TRACER_MAX_TRACE
5403         if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
5404                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
5405         tracing_reset_online_cpus(&tr->max_buffer);
5406 #endif
5407
5408         mutex_unlock(&trace_types_lock);
5409
5410         return 0;
5411 }
5412
5413 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
5414                                    size_t cnt, loff_t *fpos)
5415 {
5416         struct seq_file *m = filp->private_data;
5417         struct trace_array *tr = m->private;
5418         char buf[64];
5419         const char *clockstr;
5420         int ret;
5421
5422         if (cnt >= sizeof(buf))
5423                 return -EINVAL;
5424
5425         if (copy_from_user(buf, ubuf, cnt))
5426                 return -EFAULT;
5427
5428         buf[cnt] = 0;
5429
5430         clockstr = strstrip(buf);
5431
5432         ret = tracing_set_clock(tr, clockstr);
5433         if (ret)
5434                 return ret;
5435
5436         *fpos += cnt;
5437
5438         return cnt;
5439 }
5440
5441 static int tracing_clock_open(struct inode *inode, struct file *file)
5442 {
5443         struct trace_array *tr = inode->i_private;
5444         int ret;
5445
5446         if (tracing_disabled)
5447                 return -ENODEV;
5448
5449         if (trace_array_get(tr))
5450                 return -ENODEV;
5451
5452         ret = single_open(file, tracing_clock_show, inode->i_private);
5453         if (ret < 0)
5454                 trace_array_put(tr);
5455
5456         return ret;
5457 }
5458
5459 struct ftrace_buffer_info {
5460         struct trace_iterator   iter;
5461         void                    *spare;
5462         unsigned int            read;
5463 };
5464
5465 #ifdef CONFIG_TRACER_SNAPSHOT
5466 static int tracing_snapshot_open(struct inode *inode, struct file *file)
5467 {
5468         struct trace_array *tr = inode->i_private;
5469         struct trace_iterator *iter;
5470         struct seq_file *m;
5471         int ret = 0;
5472
5473         if (trace_array_get(tr) < 0)
5474                 return -ENODEV;
5475
5476         if (file->f_mode & FMODE_READ) {
5477                 iter = __tracing_open(inode, file, true);
5478                 if (IS_ERR(iter))
5479                         ret = PTR_ERR(iter);
5480         } else {
5481                 /* Writes still need the seq_file to hold the private data */
5482                 ret = -ENOMEM;
5483                 m = kzalloc(sizeof(*m), GFP_KERNEL);
5484                 if (!m)
5485                         goto out;
5486                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5487                 if (!iter) {
5488                         kfree(m);
5489                         goto out;
5490                 }
5491                 ret = 0;
5492
5493                 iter->tr = tr;
5494                 iter->trace_buffer = &tr->max_buffer;
5495                 iter->cpu_file = tracing_get_cpu(inode);
5496                 m->private = iter;
5497                 file->private_data = m;
5498         }
5499 out:
5500         if (ret < 0)
5501                 trace_array_put(tr);
5502
5503         return ret;
5504 }
5505
5506 static ssize_t
5507 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
5508                        loff_t *ppos)
5509 {
5510         struct seq_file *m = filp->private_data;
5511         struct trace_iterator *iter = m->private;
5512         struct trace_array *tr = iter->tr;
5513         unsigned long val;
5514         int ret;
5515
5516         ret = tracing_update_buffers();
5517         if (ret < 0)
5518                 return ret;
5519
5520         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5521         if (ret)
5522                 return ret;
5523
5524         mutex_lock(&trace_types_lock);
5525
5526         if (tr->current_trace->use_max_tr) {
5527                 ret = -EBUSY;
5528                 goto out;
5529         }
5530
5531         switch (val) {
5532         case 0:
5533                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5534                         ret = -EINVAL;
5535                         break;
5536                 }
5537                 if (tr->allocated_snapshot)
5538                         free_snapshot(tr);
5539                 break;
5540         case 1:
5541 /* Only allow per-cpu swap if the ring buffer supports it */
5542 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
5543                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5544                         ret = -EINVAL;
5545                         break;
5546                 }
5547 #endif
5548                 if (!tr->allocated_snapshot) {
5549                         ret = alloc_snapshot(tr);
5550                         if (ret < 0)
5551                                 break;
5552                 }
5553                 local_irq_disable();
5554                 /* Now, we're going to swap */
5555                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5556                         update_max_tr(tr, current, smp_processor_id());
5557                 else
5558                         update_max_tr_single(tr, current, iter->cpu_file);
5559                 local_irq_enable();
5560                 break;
5561         default:
5562                 if (tr->allocated_snapshot) {
5563                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5564                                 tracing_reset_online_cpus(&tr->max_buffer);
5565                         else
5566                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
5567                 }
5568                 break;
5569         }
5570
5571         if (ret >= 0) {
5572                 *ppos += cnt;
5573                 ret = cnt;
5574         }
5575 out:
5576         mutex_unlock(&trace_types_lock);
5577         return ret;
5578 }
5579
5580 static int tracing_snapshot_release(struct inode *inode, struct file *file)
5581 {
5582         struct seq_file *m = file->private_data;
5583         int ret;
5584
5585         ret = tracing_release(inode, file);
5586
5587         if (file->f_mode & FMODE_READ)
5588                 return ret;
5589
5590         /* If write only, the seq_file is just a stub */
5591         if (m)
5592                 kfree(m->private);
5593         kfree(m);
5594
5595         return 0;
5596 }
5597
5598 static int tracing_buffers_open(struct inode *inode, struct file *filp);
5599 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
5600                                     size_t count, loff_t *ppos);
5601 static int tracing_buffers_release(struct inode *inode, struct file *file);
5602 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5603                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
5604
5605 static int snapshot_raw_open(struct inode *inode, struct file *filp)
5606 {
5607         struct ftrace_buffer_info *info;
5608         int ret;
5609
5610         ret = tracing_buffers_open(inode, filp);
5611         if (ret < 0)
5612                 return ret;
5613
5614         info = filp->private_data;
5615
5616         if (info->iter.trace->use_max_tr) {
5617                 tracing_buffers_release(inode, filp);
5618                 return -EBUSY;
5619         }
5620
5621         info->iter.snapshot = true;
5622         info->iter.trace_buffer = &info->iter.tr->max_buffer;
5623
5624         return ret;
5625 }
5626
5627 #endif /* CONFIG_TRACER_SNAPSHOT */
5628
5629
5630 static const struct file_operations tracing_thresh_fops = {
5631         .open           = tracing_open_generic,
5632         .read           = tracing_thresh_read,
5633         .write          = tracing_thresh_write,
5634         .llseek         = generic_file_llseek,
5635 };
5636
5637 #ifdef CONFIG_TRACER_MAX_TRACE
5638 static const struct file_operations tracing_max_lat_fops = {
5639         .open           = tracing_open_generic,
5640         .read           = tracing_max_lat_read,
5641         .write          = tracing_max_lat_write,
5642         .llseek         = generic_file_llseek,
5643 };
5644 #endif
5645
5646 static const struct file_operations set_tracer_fops = {
5647         .open           = tracing_open_generic,
5648         .read           = tracing_set_trace_read,
5649         .write          = tracing_set_trace_write,
5650         .llseek         = generic_file_llseek,
5651 };
5652
5653 static const struct file_operations tracing_pipe_fops = {
5654         .open           = tracing_open_pipe,
5655         .poll           = tracing_poll_pipe,
5656         .read           = tracing_read_pipe,
5657         .splice_read    = tracing_splice_read_pipe,
5658         .release        = tracing_release_pipe,
5659         .llseek         = no_llseek,
5660 };
5661
5662 static const struct file_operations tracing_entries_fops = {
5663         .open           = tracing_open_generic_tr,
5664         .read           = tracing_entries_read,
5665         .write          = tracing_entries_write,
5666         .llseek         = generic_file_llseek,
5667         .release        = tracing_release_generic_tr,
5668 };
5669
5670 static const struct file_operations tracing_total_entries_fops = {
5671         .open           = tracing_open_generic_tr,
5672         .read           = tracing_total_entries_read,
5673         .llseek         = generic_file_llseek,
5674         .release        = tracing_release_generic_tr,
5675 };
5676
5677 static const struct file_operations tracing_free_buffer_fops = {
5678         .open           = tracing_open_generic_tr,
5679         .write          = tracing_free_buffer_write,
5680         .release        = tracing_free_buffer_release,
5681 };
5682
5683 static const struct file_operations tracing_mark_fops = {
5684         .open           = tracing_open_generic_tr,
5685         .write          = tracing_mark_write,
5686         .llseek         = generic_file_llseek,
5687         .release        = tracing_release_generic_tr,
5688 };
5689
5690 static const struct file_operations trace_clock_fops = {
5691         .open           = tracing_clock_open,
5692         .read           = seq_read,
5693         .llseek         = seq_lseek,
5694         .release        = tracing_single_release_tr,
5695         .write          = tracing_clock_write,
5696 };
5697
5698 #ifdef CONFIG_TRACER_SNAPSHOT
5699 static const struct file_operations snapshot_fops = {
5700         .open           = tracing_snapshot_open,
5701         .read           = seq_read,
5702         .write          = tracing_snapshot_write,
5703         .llseek         = tracing_lseek,
5704         .release        = tracing_snapshot_release,
5705 };
5706
5707 static const struct file_operations snapshot_raw_fops = {
5708         .open           = snapshot_raw_open,
5709         .read           = tracing_buffers_read,
5710         .release        = tracing_buffers_release,
5711         .splice_read    = tracing_buffers_splice_read,
5712         .llseek         = no_llseek,
5713 };
5714
5715 #endif /* CONFIG_TRACER_SNAPSHOT */
5716
5717 static int tracing_buffers_open(struct inode *inode, struct file *filp)
5718 {
5719         struct trace_array *tr = inode->i_private;
5720         struct ftrace_buffer_info *info;
5721         int ret;
5722
5723         if (tracing_disabled)
5724                 return -ENODEV;
5725
5726         if (trace_array_get(tr) < 0)
5727                 return -ENODEV;
5728
5729         info = kzalloc(sizeof(*info), GFP_KERNEL);
5730         if (!info) {
5731                 trace_array_put(tr);
5732                 return -ENOMEM;
5733         }
5734
5735         mutex_lock(&trace_types_lock);
5736
5737         info->iter.tr           = tr;
5738         info->iter.cpu_file     = tracing_get_cpu(inode);
5739         info->iter.trace        = tr->current_trace;
5740         info->iter.trace_buffer = &tr->trace_buffer;
5741         info->spare             = NULL;
5742         /* Force reading ring buffer for first read */
5743         info->read              = (unsigned int)-1;
5744
5745         filp->private_data = info;
5746
5747         tr->current_trace->ref++;
5748
5749         mutex_unlock(&trace_types_lock);
5750
5751         ret = nonseekable_open(inode, filp);
5752         if (ret < 0)
5753                 trace_array_put(tr);
5754
5755         return ret;
5756 }
5757
5758 static unsigned int
5759 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
5760 {
5761         struct ftrace_buffer_info *info = filp->private_data;
5762         struct trace_iterator *iter = &info->iter;
5763
5764         return trace_poll(iter, filp, poll_table);
5765 }
5766
5767 static ssize_t
5768 tracing_buffers_read(struct file *filp, char __user *ubuf,
5769                      size_t count, loff_t *ppos)
5770 {
5771         struct ftrace_buffer_info *info = filp->private_data;
5772         struct trace_iterator *iter = &info->iter;
5773         ssize_t ret;
5774         ssize_t size;
5775
5776         if (!count)
5777                 return 0;
5778
5779 #ifdef CONFIG_TRACER_MAX_TRACE
5780         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
5781                 return -EBUSY;
5782 #endif
5783
5784         if (!info->spare)
5785                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
5786                                                           iter->cpu_file);
5787         if (!info->spare)
5788                 return -ENOMEM;
5789
5790         /* Do we have previous read data to read? */
5791         if (info->read < PAGE_SIZE)
5792                 goto read;
5793
5794  again:
5795         trace_access_lock(iter->cpu_file);
5796         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
5797                                     &info->spare,
5798                                     count,
5799                                     iter->cpu_file, 0);
5800         trace_access_unlock(iter->cpu_file);
5801
5802         if (ret < 0) {
5803                 if (trace_empty(iter)) {
5804                         if ((filp->f_flags & O_NONBLOCK))
5805                                 return -EAGAIN;
5806
5807                         ret = wait_on_pipe(iter, false);
5808                         if (ret)
5809                                 return ret;
5810
5811                         goto again;
5812                 }
5813                 return 0;
5814         }
5815
5816         info->read = 0;
5817  read:
5818         size = PAGE_SIZE - info->read;
5819         if (size > count)
5820                 size = count;
5821
5822         ret = copy_to_user(ubuf, info->spare + info->read, size);
5823         if (ret == size)
5824                 return -EFAULT;
5825
5826         size -= ret;
5827
5828         *ppos += size;
5829         info->read += size;
5830
5831         return size;
5832 }
5833
5834 static int tracing_buffers_release(struct inode *inode, struct file *file)
5835 {
5836         struct ftrace_buffer_info *info = file->private_data;
5837         struct trace_iterator *iter = &info->iter;
5838
5839         mutex_lock(&trace_types_lock);
5840
5841         iter->tr->current_trace->ref--;
5842
5843         __trace_array_put(iter->tr);
5844
5845         if (info->spare)
5846                 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
5847         kfree(info);
5848
5849         mutex_unlock(&trace_types_lock);
5850
5851         return 0;
5852 }
5853
5854 struct buffer_ref {
5855         struct ring_buffer      *buffer;
5856         void                    *page;
5857         int                     ref;
5858 };
5859
5860 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
5861                                     struct pipe_buffer *buf)
5862 {
5863         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5864
5865         if (--ref->ref)
5866                 return;
5867
5868         ring_buffer_free_read_page(ref->buffer, ref->page);
5869         kfree(ref);
5870         buf->private = 0;
5871 }
5872
5873 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
5874                                 struct pipe_buffer *buf)
5875 {
5876         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5877
5878         ref->ref++;
5879 }
5880
5881 /* Pipe buffer operations for a buffer. */
5882 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
5883         .can_merge              = 0,
5884         .confirm                = generic_pipe_buf_confirm,
5885         .release                = buffer_pipe_buf_release,
5886         .steal                  = generic_pipe_buf_steal,
5887         .get                    = buffer_pipe_buf_get,
5888 };
5889
5890 /*
5891  * Callback from splice_to_pipe(), if we need to release some pages
5892  * at the end of the spd in case we error'ed out in filling the pipe.
5893  */
5894 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
5895 {
5896         struct buffer_ref *ref =
5897                 (struct buffer_ref *)spd->partial[i].private;
5898
5899         if (--ref->ref)
5900                 return;
5901
5902         ring_buffer_free_read_page(ref->buffer, ref->page);
5903         kfree(ref);
5904         spd->partial[i].private = 0;
5905 }
5906
5907 static ssize_t
5908 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5909                             struct pipe_inode_info *pipe, size_t len,
5910                             unsigned int flags)
5911 {
5912         struct ftrace_buffer_info *info = file->private_data;
5913         struct trace_iterator *iter = &info->iter;
5914         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5915         struct page *pages_def[PIPE_DEF_BUFFERS];
5916         struct splice_pipe_desc spd = {
5917                 .pages          = pages_def,
5918                 .partial        = partial_def,
5919                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5920                 .flags          = flags,
5921                 .ops            = &buffer_pipe_buf_ops,
5922                 .spd_release    = buffer_spd_release,
5923         };
5924         struct buffer_ref *ref;
5925         int entries, size, i;
5926         ssize_t ret = 0;
5927
5928 #ifdef CONFIG_TRACER_MAX_TRACE
5929         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
5930                 return -EBUSY;
5931 #endif
5932
5933         if (*ppos & (PAGE_SIZE - 1))
5934                 return -EINVAL;
5935
5936         if (len & (PAGE_SIZE - 1)) {
5937                 if (len < PAGE_SIZE)
5938                         return -EINVAL;
5939                 len &= PAGE_MASK;
5940         }
5941
5942         if (splice_grow_spd(pipe, &spd))
5943                 return -ENOMEM;
5944
5945  again:
5946         trace_access_lock(iter->cpu_file);
5947         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5948
5949         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
5950                 struct page *page;
5951                 int r;
5952
5953                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
5954                 if (!ref) {
5955                         ret = -ENOMEM;
5956                         break;
5957                 }
5958
5959                 ref->ref = 1;
5960                 ref->buffer = iter->trace_buffer->buffer;
5961                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
5962                 if (!ref->page) {
5963                         ret = -ENOMEM;
5964                         kfree(ref);
5965                         break;
5966                 }
5967
5968                 r = ring_buffer_read_page(ref->buffer, &ref->page,
5969                                           len, iter->cpu_file, 1);
5970                 if (r < 0) {
5971                         ring_buffer_free_read_page(ref->buffer, ref->page);
5972                         kfree(ref);
5973                         break;
5974                 }
5975
5976                 /*
5977                  * zero out any left over data, this is going to
5978                  * user land.
5979                  */
5980                 size = ring_buffer_page_len(ref->page);
5981                 if (size < PAGE_SIZE)
5982                         memset(ref->page + size, 0, PAGE_SIZE - size);
5983
5984                 page = virt_to_page(ref->page);
5985
5986                 spd.pages[i] = page;
5987                 spd.partial[i].len = PAGE_SIZE;
5988                 spd.partial[i].offset = 0;
5989                 spd.partial[i].private = (unsigned long)ref;
5990                 spd.nr_pages++;
5991                 *ppos += PAGE_SIZE;
5992
5993                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5994         }
5995
5996         trace_access_unlock(iter->cpu_file);
5997         spd.nr_pages = i;
5998
5999         /* did we read anything? */
6000         if (!spd.nr_pages) {
6001                 if (ret)
6002                         goto out;
6003
6004                 ret = -EAGAIN;
6005                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6006                         goto out;
6007
6008                 ret = wait_on_pipe(iter, true);
6009                 if (ret)
6010                         goto out;
6011
6012                 goto again;
6013         }
6014
6015         ret = splice_to_pipe(pipe, &spd);
6016 out:
6017         splice_shrink_spd(&spd);
6018
6019         return ret;
6020 }
6021
6022 static const struct file_operations tracing_buffers_fops = {
6023         .open           = tracing_buffers_open,
6024         .read           = tracing_buffers_read,
6025         .poll           = tracing_buffers_poll,
6026         .release        = tracing_buffers_release,
6027         .splice_read    = tracing_buffers_splice_read,
6028         .llseek         = no_llseek,
6029 };
6030
6031 static ssize_t
6032 tracing_stats_read(struct file *filp, char __user *ubuf,
6033                    size_t count, loff_t *ppos)
6034 {
6035         struct inode *inode = file_inode(filp);
6036         struct trace_array *tr = inode->i_private;
6037         struct trace_buffer *trace_buf = &tr->trace_buffer;
6038         int cpu = tracing_get_cpu(inode);
6039         struct trace_seq *s;
6040         unsigned long cnt;
6041         unsigned long long t;
6042         unsigned long usec_rem;
6043
6044         s = kmalloc(sizeof(*s), GFP_KERNEL);
6045         if (!s)
6046                 return -ENOMEM;
6047
6048         trace_seq_init(s);
6049
6050         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6051         trace_seq_printf(s, "entries: %ld\n", cnt);
6052
6053         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6054         trace_seq_printf(s, "overrun: %ld\n", cnt);
6055
6056         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6057         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6058
6059         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6060         trace_seq_printf(s, "bytes: %ld\n", cnt);
6061
6062         if (trace_clocks[tr->clock_id].in_ns) {
6063                 /* local or global for trace_clock */
6064                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6065                 usec_rem = do_div(t, USEC_PER_SEC);
6066                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6067                                                                 t, usec_rem);
6068
6069                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6070                 usec_rem = do_div(t, USEC_PER_SEC);
6071                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6072         } else {
6073                 /* counter or tsc mode for trace_clock */
6074                 trace_seq_printf(s, "oldest event ts: %llu\n",
6075                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6076
6077                 trace_seq_printf(s, "now ts: %llu\n",
6078                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
6079         }
6080
6081         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6082         trace_seq_printf(s, "dropped events: %ld\n", cnt);
6083
6084         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
6085         trace_seq_printf(s, "read events: %ld\n", cnt);
6086
6087         count = simple_read_from_buffer(ubuf, count, ppos,
6088                                         s->buffer, trace_seq_used(s));
6089
6090         kfree(s);
6091
6092         return count;
6093 }
6094
6095 static const struct file_operations tracing_stats_fops = {
6096         .open           = tracing_open_generic_tr,
6097         .read           = tracing_stats_read,
6098         .llseek         = generic_file_llseek,
6099         .release        = tracing_release_generic_tr,
6100 };
6101
6102 #ifdef CONFIG_DYNAMIC_FTRACE
6103
6104 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
6105 {
6106         return 0;
6107 }
6108
6109 static ssize_t
6110 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
6111                   size_t cnt, loff_t *ppos)
6112 {
6113         static char ftrace_dyn_info_buffer[1024];
6114         static DEFINE_MUTEX(dyn_info_mutex);
6115         unsigned long *p = filp->private_data;
6116         char *buf = ftrace_dyn_info_buffer;
6117         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
6118         int r;
6119
6120         mutex_lock(&dyn_info_mutex);
6121         r = sprintf(buf, "%ld ", *p);
6122
6123         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
6124         buf[r++] = '\n';
6125
6126         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6127
6128         mutex_unlock(&dyn_info_mutex);
6129
6130         return r;
6131 }
6132
6133 static const struct file_operations tracing_dyn_info_fops = {
6134         .open           = tracing_open_generic,
6135         .read           = tracing_read_dyn_info,
6136         .llseek         = generic_file_llseek,
6137 };
6138 #endif /* CONFIG_DYNAMIC_FTRACE */
6139
6140 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6141 static void
6142 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6143 {
6144         tracing_snapshot();
6145 }
6146
6147 static void
6148 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6149 {
6150         unsigned long *count = (long *)data;
6151
6152         if (!*count)
6153                 return;
6154
6155         if (*count != -1)
6156                 (*count)--;
6157
6158         tracing_snapshot();
6159 }
6160
6161 static int
6162 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
6163                       struct ftrace_probe_ops *ops, void *data)
6164 {
6165         long count = (long)data;
6166
6167         seq_printf(m, "%ps:", (void *)ip);
6168
6169         seq_puts(m, "snapshot");
6170
6171         if (count == -1)
6172                 seq_puts(m, ":unlimited\n");
6173         else
6174                 seq_printf(m, ":count=%ld\n", count);
6175
6176         return 0;
6177 }
6178
6179 static struct ftrace_probe_ops snapshot_probe_ops = {
6180         .func                   = ftrace_snapshot,
6181         .print                  = ftrace_snapshot_print,
6182 };
6183
6184 static struct ftrace_probe_ops snapshot_count_probe_ops = {
6185         .func                   = ftrace_count_snapshot,
6186         .print                  = ftrace_snapshot_print,
6187 };
6188
6189 static int
6190 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
6191                                char *glob, char *cmd, char *param, int enable)
6192 {
6193         struct ftrace_probe_ops *ops;
6194         void *count = (void *)-1;
6195         char *number;
6196         int ret;
6197
6198         /* hash funcs only work with set_ftrace_filter */
6199         if (!enable)
6200                 return -EINVAL;
6201
6202         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
6203
6204         if (glob[0] == '!') {
6205                 unregister_ftrace_function_probe_func(glob+1, ops);
6206                 return 0;
6207         }
6208
6209         if (!param)
6210                 goto out_reg;
6211
6212         number = strsep(&param, ":");
6213
6214         if (!strlen(number))
6215                 goto out_reg;
6216
6217         /*
6218          * We use the callback data field (which is a pointer)
6219          * as our counter.
6220          */
6221         ret = kstrtoul(number, 0, (unsigned long *)&count);
6222         if (ret)
6223                 return ret;
6224
6225  out_reg:
6226         ret = register_ftrace_function_probe(glob, ops, count);
6227
6228         if (ret >= 0)
6229                 alloc_snapshot(&global_trace);
6230
6231         return ret < 0 ? ret : 0;
6232 }
6233
6234 static struct ftrace_func_command ftrace_snapshot_cmd = {
6235         .name                   = "snapshot",
6236         .func                   = ftrace_trace_snapshot_callback,
6237 };
6238
6239 static __init int register_snapshot_cmd(void)
6240 {
6241         return register_ftrace_command(&ftrace_snapshot_cmd);
6242 }
6243 #else
6244 static inline __init int register_snapshot_cmd(void) { return 0; }
6245 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
6246
6247 static struct dentry *tracing_get_dentry(struct trace_array *tr)
6248 {
6249         if (WARN_ON(!tr->dir))
6250                 return ERR_PTR(-ENODEV);
6251
6252         /* Top directory uses NULL as the parent */
6253         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
6254                 return NULL;
6255
6256         /* All sub buffers have a descriptor */
6257         return tr->dir;
6258 }
6259
6260 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
6261 {
6262         struct dentry *d_tracer;
6263
6264         if (tr->percpu_dir)
6265                 return tr->percpu_dir;
6266
6267         d_tracer = tracing_get_dentry(tr);
6268         if (IS_ERR(d_tracer))
6269                 return NULL;
6270
6271         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
6272
6273         WARN_ONCE(!tr->percpu_dir,
6274                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
6275
6276         return tr->percpu_dir;
6277 }
6278
6279 static struct dentry *
6280 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
6281                       void *data, long cpu, const struct file_operations *fops)
6282 {
6283         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
6284
6285         if (ret) /* See tracing_get_cpu() */
6286                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
6287         return ret;
6288 }
6289
6290 static void
6291 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
6292 {
6293         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
6294         struct dentry *d_cpu;
6295         char cpu_dir[30]; /* 30 characters should be more than enough */
6296
6297         if (!d_percpu)
6298                 return;
6299
6300         snprintf(cpu_dir, 30, "cpu%ld", cpu);
6301         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
6302         if (!d_cpu) {
6303                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
6304                 return;
6305         }
6306
6307         /* per cpu trace_pipe */
6308         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
6309                                 tr, cpu, &tracing_pipe_fops);
6310
6311         /* per cpu trace */
6312         trace_create_cpu_file("trace", 0644, d_cpu,
6313                                 tr, cpu, &tracing_fops);
6314
6315         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
6316                                 tr, cpu, &tracing_buffers_fops);
6317
6318         trace_create_cpu_file("stats", 0444, d_cpu,
6319                                 tr, cpu, &tracing_stats_fops);
6320
6321         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
6322                                 tr, cpu, &tracing_entries_fops);
6323
6324 #ifdef CONFIG_TRACER_SNAPSHOT
6325         trace_create_cpu_file("snapshot", 0644, d_cpu,
6326                                 tr, cpu, &snapshot_fops);
6327
6328         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
6329                                 tr, cpu, &snapshot_raw_fops);
6330 #endif
6331 }
6332
6333 #ifdef CONFIG_FTRACE_SELFTEST
6334 /* Let selftest have access to static functions in this file */
6335 #include "trace_selftest.c"
6336 #endif
6337
6338 static ssize_t
6339 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
6340                         loff_t *ppos)
6341 {
6342         struct trace_option_dentry *topt = filp->private_data;
6343         char *buf;
6344
6345         if (topt->flags->val & topt->opt->bit)
6346                 buf = "1\n";
6347         else
6348                 buf = "0\n";
6349
6350         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6351 }
6352
6353 static ssize_t
6354 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
6355                          loff_t *ppos)
6356 {
6357         struct trace_option_dentry *topt = filp->private_data;
6358         unsigned long val;
6359         int ret;
6360
6361         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6362         if (ret)
6363                 return ret;
6364
6365         if (val != 0 && val != 1)
6366                 return -EINVAL;
6367
6368         if (!!(topt->flags->val & topt->opt->bit) != val) {
6369                 mutex_lock(&trace_types_lock);
6370                 ret = __set_tracer_option(topt->tr, topt->flags,
6371                                           topt->opt, !val);
6372                 mutex_unlock(&trace_types_lock);
6373                 if (ret)
6374                         return ret;
6375         }
6376
6377         *ppos += cnt;
6378
6379         return cnt;
6380 }
6381
6382
6383 static const struct file_operations trace_options_fops = {
6384         .open = tracing_open_generic,
6385         .read = trace_options_read,
6386         .write = trace_options_write,
6387         .llseek = generic_file_llseek,
6388 };
6389
6390 /*
6391  * In order to pass in both the trace_array descriptor as well as the index
6392  * to the flag that the trace option file represents, the trace_array
6393  * has a character array of trace_flags_index[], which holds the index
6394  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
6395  * The address of this character array is passed to the flag option file
6396  * read/write callbacks.
6397  *
6398  * In order to extract both the index and the trace_array descriptor,
6399  * get_tr_index() uses the following algorithm.
6400  *
6401  *   idx = *ptr;
6402  *
6403  * As the pointer itself contains the address of the index (remember
6404  * index[1] == 1).
6405  *
6406  * Then to get the trace_array descriptor, by subtracting that index
6407  * from the ptr, we get to the start of the index itself.
6408  *
6409  *   ptr - idx == &index[0]
6410  *
6411  * Then a simple container_of() from that pointer gets us to the
6412  * trace_array descriptor.
6413  */
6414 static void get_tr_index(void *data, struct trace_array **ptr,
6415                          unsigned int *pindex)
6416 {
6417         *pindex = *(unsigned char *)data;
6418
6419         *ptr = container_of(data - *pindex, struct trace_array,
6420                             trace_flags_index);
6421 }
6422
6423 static ssize_t
6424 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
6425                         loff_t *ppos)
6426 {
6427         void *tr_index = filp->private_data;
6428         struct trace_array *tr;
6429         unsigned int index;
6430         char *buf;
6431
6432         get_tr_index(tr_index, &tr, &index);
6433
6434         if (tr->trace_flags & (1 << index))
6435                 buf = "1\n";
6436         else
6437                 buf = "0\n";
6438
6439         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6440 }
6441
6442 static ssize_t
6443 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
6444                          loff_t *ppos)
6445 {
6446         void *tr_index = filp->private_data;
6447         struct trace_array *tr;
6448         unsigned int index;
6449         unsigned long val;
6450         int ret;
6451
6452         get_tr_index(tr_index, &tr, &index);
6453
6454         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6455         if (ret)
6456                 return ret;
6457
6458         if (val != 0 && val != 1)
6459                 return -EINVAL;
6460
6461         mutex_lock(&trace_types_lock);
6462         ret = set_tracer_flag(tr, 1 << index, val);
6463         mutex_unlock(&trace_types_lock);
6464
6465         if (ret < 0)
6466                 return ret;
6467
6468         *ppos += cnt;
6469
6470         return cnt;
6471 }
6472
6473 static const struct file_operations trace_options_core_fops = {
6474         .open = tracing_open_generic,
6475         .read = trace_options_core_read,
6476         .write = trace_options_core_write,
6477         .llseek = generic_file_llseek,
6478 };
6479
6480 struct dentry *trace_create_file(const char *name,
6481                                  umode_t mode,
6482                                  struct dentry *parent,
6483                                  void *data,
6484                                  const struct file_operations *fops)
6485 {
6486         struct dentry *ret;
6487
6488         ret = tracefs_create_file(name, mode, parent, data, fops);
6489         if (!ret)
6490                 pr_warn("Could not create tracefs '%s' entry\n", name);
6491
6492         return ret;
6493 }
6494
6495
6496 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
6497 {
6498         struct dentry *d_tracer;
6499
6500         if (tr->options)
6501                 return tr->options;
6502
6503         d_tracer = tracing_get_dentry(tr);
6504         if (IS_ERR(d_tracer))
6505                 return NULL;
6506
6507         tr->options = tracefs_create_dir("options", d_tracer);
6508         if (!tr->options) {
6509                 pr_warn("Could not create tracefs directory 'options'\n");
6510                 return NULL;
6511         }
6512
6513         return tr->options;
6514 }
6515
6516 static void
6517 create_trace_option_file(struct trace_array *tr,
6518                          struct trace_option_dentry *topt,
6519                          struct tracer_flags *flags,
6520                          struct tracer_opt *opt)
6521 {
6522         struct dentry *t_options;
6523
6524         t_options = trace_options_init_dentry(tr);
6525         if (!t_options)
6526                 return;
6527
6528         topt->flags = flags;
6529         topt->opt = opt;
6530         topt->tr = tr;
6531
6532         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
6533                                     &trace_options_fops);
6534
6535 }
6536
6537 static void
6538 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
6539 {
6540         struct trace_option_dentry *topts;
6541         struct trace_options *tr_topts;
6542         struct tracer_flags *flags;
6543         struct tracer_opt *opts;
6544         int cnt;
6545         int i;
6546
6547         if (!tracer)
6548                 return;
6549
6550         flags = tracer->flags;
6551
6552         if (!flags || !flags->opts)
6553                 return;
6554
6555         /*
6556          * If this is an instance, only create flags for tracers
6557          * the instance may have.
6558          */
6559         if (!trace_ok_for_array(tracer, tr))
6560                 return;
6561
6562         for (i = 0; i < tr->nr_topts; i++) {
6563                 /* Make sure there's no duplicate flags. */
6564                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
6565                         return;
6566         }
6567
6568         opts = flags->opts;
6569
6570         for (cnt = 0; opts[cnt].name; cnt++)
6571                 ;
6572
6573         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
6574         if (!topts)
6575                 return;
6576
6577         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
6578                             GFP_KERNEL);
6579         if (!tr_topts) {
6580                 kfree(topts);
6581                 return;
6582         }
6583
6584         tr->topts = tr_topts;
6585         tr->topts[tr->nr_topts].tracer = tracer;
6586         tr->topts[tr->nr_topts].topts = topts;
6587         tr->nr_topts++;
6588
6589         for (cnt = 0; opts[cnt].name; cnt++) {
6590                 create_trace_option_file(tr, &topts[cnt], flags,
6591                                          &opts[cnt]);
6592                 WARN_ONCE(topts[cnt].entry == NULL,
6593                           "Failed to create trace option: %s",
6594                           opts[cnt].name);
6595         }
6596 }
6597
6598 static struct dentry *
6599 create_trace_option_core_file(struct trace_array *tr,
6600                               const char *option, long index)
6601 {
6602         struct dentry *t_options;
6603
6604         t_options = trace_options_init_dentry(tr);
6605         if (!t_options)
6606                 return NULL;
6607
6608         return trace_create_file(option, 0644, t_options,
6609                                  (void *)&tr->trace_flags_index[index],
6610                                  &trace_options_core_fops);
6611 }
6612
6613 static void create_trace_options_dir(struct trace_array *tr)
6614 {
6615         struct dentry *t_options;
6616         bool top_level = tr == &global_trace;
6617         int i;
6618
6619         t_options = trace_options_init_dentry(tr);
6620         if (!t_options)
6621                 return;
6622
6623         for (i = 0; trace_options[i]; i++) {
6624                 if (top_level ||
6625                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
6626                         create_trace_option_core_file(tr, trace_options[i], i);
6627         }
6628 }
6629
6630 static ssize_t
6631 rb_simple_read(struct file *filp, char __user *ubuf,
6632                size_t cnt, loff_t *ppos)
6633 {
6634         struct trace_array *tr = filp->private_data;
6635         char buf[64];
6636         int r;
6637
6638         r = tracer_tracing_is_on(tr);
6639         r = sprintf(buf, "%d\n", r);
6640
6641         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6642 }
6643
6644 static ssize_t
6645 rb_simple_write(struct file *filp, const char __user *ubuf,
6646                 size_t cnt, loff_t *ppos)
6647 {
6648         struct trace_array *tr = filp->private_data;
6649         struct ring_buffer *buffer = tr->trace_buffer.buffer;
6650         unsigned long val;
6651         int ret;
6652
6653         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6654         if (ret)
6655                 return ret;
6656
6657         if (buffer) {
6658                 mutex_lock(&trace_types_lock);
6659                 if (val) {
6660                         tracer_tracing_on(tr);
6661                         if (tr->current_trace->start)
6662                                 tr->current_trace->start(tr);
6663                 } else {
6664                         tracer_tracing_off(tr);
6665                         if (tr->current_trace->stop)
6666                                 tr->current_trace->stop(tr);
6667                 }
6668                 mutex_unlock(&trace_types_lock);
6669         }
6670
6671         (*ppos)++;
6672
6673         return cnt;
6674 }
6675
6676 static const struct file_operations rb_simple_fops = {
6677         .open           = tracing_open_generic_tr,
6678         .read           = rb_simple_read,
6679         .write          = rb_simple_write,
6680         .release        = tracing_release_generic_tr,
6681         .llseek         = default_llseek,
6682 };
6683
6684 struct dentry *trace_instance_dir;
6685
6686 static void
6687 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
6688
6689 static int
6690 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
6691 {
6692         enum ring_buffer_flags rb_flags;
6693
6694         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
6695
6696         buf->tr = tr;
6697
6698         buf->buffer = ring_buffer_alloc(size, rb_flags);
6699         if (!buf->buffer)
6700                 return -ENOMEM;
6701
6702         buf->data = alloc_percpu(struct trace_array_cpu);
6703         if (!buf->data) {
6704                 ring_buffer_free(buf->buffer);
6705                 return -ENOMEM;
6706         }
6707
6708         /* Allocate the first page for all buffers */
6709         set_buffer_entries(&tr->trace_buffer,
6710                            ring_buffer_size(tr->trace_buffer.buffer, 0));
6711
6712         return 0;
6713 }
6714
6715 static int allocate_trace_buffers(struct trace_array *tr, int size)
6716 {
6717         int ret;
6718
6719         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
6720         if (ret)
6721                 return ret;
6722
6723 #ifdef CONFIG_TRACER_MAX_TRACE
6724         ret = allocate_trace_buffer(tr, &tr->max_buffer,
6725                                     allocate_snapshot ? size : 1);
6726         if (WARN_ON(ret)) {
6727                 ring_buffer_free(tr->trace_buffer.buffer);
6728                 free_percpu(tr->trace_buffer.data);
6729                 return -ENOMEM;
6730         }
6731         tr->allocated_snapshot = allocate_snapshot;
6732
6733         /*
6734          * Only the top level trace array gets its snapshot allocated
6735          * from the kernel command line.
6736          */
6737         allocate_snapshot = false;
6738 #endif
6739         return 0;
6740 }
6741
6742 static void free_trace_buffer(struct trace_buffer *buf)
6743 {
6744         if (buf->buffer) {
6745                 ring_buffer_free(buf->buffer);
6746                 buf->buffer = NULL;
6747                 free_percpu(buf->data);
6748                 buf->data = NULL;
6749         }
6750 }
6751
6752 static void free_trace_buffers(struct trace_array *tr)
6753 {
6754         if (!tr)
6755                 return;
6756
6757         free_trace_buffer(&tr->trace_buffer);
6758
6759 #ifdef CONFIG_TRACER_MAX_TRACE
6760         free_trace_buffer(&tr->max_buffer);
6761 #endif
6762 }
6763
6764 static void init_trace_flags_index(struct trace_array *tr)
6765 {
6766         int i;
6767
6768         /* Used by the trace options files */
6769         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
6770                 tr->trace_flags_index[i] = i;
6771 }
6772
6773 static void __update_tracer_options(struct trace_array *tr)
6774 {
6775         struct tracer *t;
6776
6777         for (t = trace_types; t; t = t->next)
6778                 add_tracer_options(tr, t);
6779 }
6780
6781 static void update_tracer_options(struct trace_array *tr)
6782 {
6783         mutex_lock(&trace_types_lock);
6784         __update_tracer_options(tr);
6785         mutex_unlock(&trace_types_lock);
6786 }
6787
6788 static int instance_mkdir(const char *name)
6789 {
6790         struct trace_array *tr;
6791         int ret;
6792
6793         mutex_lock(&trace_types_lock);
6794
6795         ret = -EEXIST;
6796         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6797                 if (tr->name && strcmp(tr->name, name) == 0)
6798                         goto out_unlock;
6799         }
6800
6801         ret = -ENOMEM;
6802         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
6803         if (!tr)
6804                 goto out_unlock;
6805
6806         tr->name = kstrdup(name, GFP_KERNEL);
6807         if (!tr->name)
6808                 goto out_free_tr;
6809
6810         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
6811                 goto out_free_tr;
6812
6813         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
6814
6815         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
6816
6817         raw_spin_lock_init(&tr->start_lock);
6818
6819         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
6820
6821         tr->current_trace = &nop_trace;
6822
6823         INIT_LIST_HEAD(&tr->systems);
6824         INIT_LIST_HEAD(&tr->events);
6825
6826         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
6827                 goto out_free_tr;
6828
6829         tr->dir = tracefs_create_dir(name, trace_instance_dir);
6830         if (!tr->dir)
6831                 goto out_free_tr;
6832
6833         ret = event_trace_add_tracer(tr->dir, tr);
6834         if (ret) {
6835                 tracefs_remove_recursive(tr->dir);
6836                 goto out_free_tr;
6837         }
6838
6839         init_tracer_tracefs(tr, tr->dir);
6840         init_trace_flags_index(tr);
6841         __update_tracer_options(tr);
6842
6843         list_add(&tr->list, &ftrace_trace_arrays);
6844
6845         mutex_unlock(&trace_types_lock);
6846
6847         return 0;
6848
6849  out_free_tr:
6850         free_trace_buffers(tr);
6851         free_cpumask_var(tr->tracing_cpumask);
6852         kfree(tr->name);
6853         kfree(tr);
6854
6855  out_unlock:
6856         mutex_unlock(&trace_types_lock);
6857
6858         return ret;
6859
6860 }
6861
6862 static int instance_rmdir(const char *name)
6863 {
6864         struct trace_array *tr;
6865         int found = 0;
6866         int ret;
6867         int i;
6868
6869         mutex_lock(&trace_types_lock);
6870
6871         ret = -ENODEV;
6872         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6873                 if (tr->name && strcmp(tr->name, name) == 0) {
6874                         found = 1;
6875                         break;
6876                 }
6877         }
6878         if (!found)
6879                 goto out_unlock;
6880
6881         ret = -EBUSY;
6882         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
6883                 goto out_unlock;
6884
6885         list_del(&tr->list);
6886
6887         /* Disable all the flags that were enabled coming in */
6888         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
6889                 if ((1 << i) & ZEROED_TRACE_FLAGS)
6890                         set_tracer_flag(tr, 1 << i, 0);
6891         }
6892
6893         tracing_set_nop(tr);
6894         event_trace_del_tracer(tr);
6895         ftrace_destroy_function_files(tr);
6896         tracefs_remove_recursive(tr->dir);
6897         free_trace_buffers(tr);
6898
6899         for (i = 0; i < tr->nr_topts; i++) {
6900                 kfree(tr->topts[i].topts);
6901         }
6902         kfree(tr->topts);
6903
6904         kfree(tr->name);
6905         kfree(tr);
6906
6907         ret = 0;
6908
6909  out_unlock:
6910         mutex_unlock(&trace_types_lock);
6911
6912         return ret;
6913 }
6914
6915 static __init void create_trace_instances(struct dentry *d_tracer)
6916 {
6917         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
6918                                                          instance_mkdir,
6919                                                          instance_rmdir);
6920         if (WARN_ON(!trace_instance_dir))
6921                 return;
6922 }
6923
6924 static void
6925 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
6926 {
6927         int cpu;
6928
6929         trace_create_file("available_tracers", 0444, d_tracer,
6930                         tr, &show_traces_fops);
6931
6932         trace_create_file("current_tracer", 0644, d_tracer,
6933                         tr, &set_tracer_fops);
6934
6935         trace_create_file("tracing_cpumask", 0644, d_tracer,
6936                           tr, &tracing_cpumask_fops);
6937
6938         trace_create_file("trace_options", 0644, d_tracer,
6939                           tr, &tracing_iter_fops);
6940
6941         trace_create_file("trace", 0644, d_tracer,
6942                           tr, &tracing_fops);
6943
6944         trace_create_file("trace_pipe", 0444, d_tracer,
6945                           tr, &tracing_pipe_fops);
6946
6947         trace_create_file("buffer_size_kb", 0644, d_tracer,
6948                           tr, &tracing_entries_fops);
6949
6950         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
6951                           tr, &tracing_total_entries_fops);
6952
6953         trace_create_file("free_buffer", 0200, d_tracer,
6954                           tr, &tracing_free_buffer_fops);
6955
6956         trace_create_file("trace_marker", 0220, d_tracer,
6957                           tr, &tracing_mark_fops);
6958
6959         trace_create_file("trace_clock", 0644, d_tracer, tr,
6960                           &trace_clock_fops);
6961
6962         trace_create_file("tracing_on", 0644, d_tracer,
6963                           tr, &rb_simple_fops);
6964
6965         create_trace_options_dir(tr);
6966
6967 #ifdef CONFIG_TRACER_MAX_TRACE
6968         trace_create_file("tracing_max_latency", 0644, d_tracer,
6969                         &tr->max_latency, &tracing_max_lat_fops);
6970 #endif
6971
6972         if (ftrace_create_function_files(tr, d_tracer))
6973                 WARN(1, "Could not allocate function filter files");
6974
6975 #ifdef CONFIG_TRACER_SNAPSHOT
6976         trace_create_file("snapshot", 0644, d_tracer,
6977                           tr, &snapshot_fops);
6978 #endif
6979
6980         for_each_tracing_cpu(cpu)
6981                 tracing_init_tracefs_percpu(tr, cpu);
6982
6983 }
6984
6985 static struct vfsmount *trace_automount(void *ingore)
6986 {
6987         struct vfsmount *mnt;
6988         struct file_system_type *type;
6989
6990         /*
6991          * To maintain backward compatibility for tools that mount
6992          * debugfs to get to the tracing facility, tracefs is automatically
6993          * mounted to the debugfs/tracing directory.
6994          */
6995         type = get_fs_type("tracefs");
6996         if (!type)
6997                 return NULL;
6998         mnt = vfs_kern_mount(type, 0, "tracefs", NULL);
6999         put_filesystem(type);
7000         if (IS_ERR(mnt))
7001                 return NULL;
7002         mntget(mnt);
7003
7004         return mnt;
7005 }
7006
7007 /**
7008  * tracing_init_dentry - initialize top level trace array
7009  *
7010  * This is called when creating files or directories in the tracing
7011  * directory. It is called via fs_initcall() by any of the boot up code
7012  * and expects to return the dentry of the top level tracing directory.
7013  */
7014 struct dentry *tracing_init_dentry(void)
7015 {
7016         struct trace_array *tr = &global_trace;
7017
7018         /* The top level trace array uses  NULL as parent */
7019         if (tr->dir)
7020                 return NULL;
7021
7022         if (WARN_ON(!tracefs_initialized()) ||
7023                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
7024                  WARN_ON(!debugfs_initialized())))
7025                 return ERR_PTR(-ENODEV);
7026
7027         /*
7028          * As there may still be users that expect the tracing
7029          * files to exist in debugfs/tracing, we must automount
7030          * the tracefs file system there, so older tools still
7031          * work with the newer kerenl.
7032          */
7033         tr->dir = debugfs_create_automount("tracing", NULL,
7034                                            trace_automount, NULL);
7035         if (!tr->dir) {
7036                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
7037                 return ERR_PTR(-ENOMEM);
7038         }
7039
7040         return NULL;
7041 }
7042
7043 extern struct trace_enum_map *__start_ftrace_enum_maps[];
7044 extern struct trace_enum_map *__stop_ftrace_enum_maps[];
7045
7046 static void __init trace_enum_init(void)
7047 {
7048         int len;
7049
7050         len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps;
7051         trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len);
7052 }
7053
7054 #ifdef CONFIG_MODULES
7055 static void trace_module_add_enums(struct module *mod)
7056 {
7057         if (!mod->num_trace_enums)
7058                 return;
7059
7060         /*
7061          * Modules with bad taint do not have events created, do
7062          * not bother with enums either.
7063          */
7064         if (trace_module_has_bad_taint(mod))
7065                 return;
7066
7067         trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums);
7068 }
7069
7070 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
7071 static void trace_module_remove_enums(struct module *mod)
7072 {
7073         union trace_enum_map_item *map;
7074         union trace_enum_map_item **last = &trace_enum_maps;
7075
7076         if (!mod->num_trace_enums)
7077                 return;
7078
7079         mutex_lock(&trace_enum_mutex);
7080
7081         map = trace_enum_maps;
7082
7083         while (map) {
7084                 if (map->head.mod == mod)
7085                         break;
7086                 map = trace_enum_jmp_to_tail(map);
7087                 last = &map->tail.next;
7088                 map = map->tail.next;
7089         }
7090         if (!map)
7091                 goto out;
7092
7093         *last = trace_enum_jmp_to_tail(map)->tail.next;
7094         kfree(map);
7095  out:
7096         mutex_unlock(&trace_enum_mutex);
7097 }
7098 #else
7099 static inline void trace_module_remove_enums(struct module *mod) { }
7100 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
7101
7102 static int trace_module_notify(struct notifier_block *self,
7103                                unsigned long val, void *data)
7104 {
7105         struct module *mod = data;
7106
7107         switch (val) {
7108         case MODULE_STATE_COMING:
7109                 trace_module_add_enums(mod);
7110                 break;
7111         case MODULE_STATE_GOING:
7112                 trace_module_remove_enums(mod);
7113                 break;
7114         }
7115
7116         return 0;
7117 }
7118
7119 static struct notifier_block trace_module_nb = {
7120         .notifier_call = trace_module_notify,
7121         .priority = 0,
7122 };
7123 #endif /* CONFIG_MODULES */
7124
7125 static __init int tracer_init_tracefs(void)
7126 {
7127         struct dentry *d_tracer;
7128
7129         trace_access_lock_init();
7130
7131         d_tracer = tracing_init_dentry();
7132         if (IS_ERR(d_tracer))
7133                 return 0;
7134
7135         init_tracer_tracefs(&global_trace, d_tracer);
7136
7137         trace_create_file("tracing_thresh", 0644, d_tracer,
7138                         &global_trace, &tracing_thresh_fops);
7139
7140         trace_create_file("README", 0444, d_tracer,
7141                         NULL, &tracing_readme_fops);
7142
7143         trace_create_file("saved_cmdlines", 0444, d_tracer,
7144                         NULL, &tracing_saved_cmdlines_fops);
7145
7146         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
7147                           NULL, &tracing_saved_cmdlines_size_fops);
7148
7149         trace_enum_init();
7150
7151         trace_create_enum_file(d_tracer);
7152
7153 #ifdef CONFIG_MODULES
7154         register_module_notifier(&trace_module_nb);
7155 #endif
7156
7157 #ifdef CONFIG_DYNAMIC_FTRACE
7158         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
7159                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
7160 #endif
7161
7162         create_trace_instances(d_tracer);
7163
7164         update_tracer_options(&global_trace);
7165
7166         return 0;
7167 }
7168
7169 static int trace_panic_handler(struct notifier_block *this,
7170                                unsigned long event, void *unused)
7171 {
7172         if (ftrace_dump_on_oops)
7173                 ftrace_dump(ftrace_dump_on_oops);
7174         return NOTIFY_OK;
7175 }
7176
7177 static struct notifier_block trace_panic_notifier = {
7178         .notifier_call  = trace_panic_handler,
7179         .next           = NULL,
7180         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
7181 };
7182
7183 static int trace_die_handler(struct notifier_block *self,
7184                              unsigned long val,
7185                              void *data)
7186 {
7187         switch (val) {
7188         case DIE_OOPS:
7189                 if (ftrace_dump_on_oops)
7190                         ftrace_dump(ftrace_dump_on_oops);
7191                 break;
7192         default:
7193                 break;
7194         }
7195         return NOTIFY_OK;
7196 }
7197
7198 static struct notifier_block trace_die_notifier = {
7199         .notifier_call = trace_die_handler,
7200         .priority = 200
7201 };
7202
7203 /*
7204  * printk is set to max of 1024, we really don't need it that big.
7205  * Nothing should be printing 1000 characters anyway.
7206  */
7207 #define TRACE_MAX_PRINT         1000
7208
7209 /*
7210  * Define here KERN_TRACE so that we have one place to modify
7211  * it if we decide to change what log level the ftrace dump
7212  * should be at.
7213  */
7214 #define KERN_TRACE              KERN_EMERG
7215
7216 void
7217 trace_printk_seq(struct trace_seq *s)
7218 {
7219         /* Probably should print a warning here. */
7220         if (s->seq.len >= TRACE_MAX_PRINT)
7221                 s->seq.len = TRACE_MAX_PRINT;
7222
7223         /*
7224          * More paranoid code. Although the buffer size is set to
7225          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
7226          * an extra layer of protection.
7227          */
7228         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
7229                 s->seq.len = s->seq.size - 1;
7230
7231         /* should be zero ended, but we are paranoid. */
7232         s->buffer[s->seq.len] = 0;
7233
7234         printk(KERN_TRACE "%s", s->buffer);
7235
7236         trace_seq_init(s);
7237 }
7238
7239 void trace_init_global_iter(struct trace_iterator *iter)
7240 {
7241         iter->tr = &global_trace;
7242         iter->trace = iter->tr->current_trace;
7243         iter->cpu_file = RING_BUFFER_ALL_CPUS;
7244         iter->trace_buffer = &global_trace.trace_buffer;
7245
7246         if (iter->trace && iter->trace->open)
7247                 iter->trace->open(iter);
7248
7249         /* Annotate start of buffers if we had overruns */
7250         if (ring_buffer_overruns(iter->trace_buffer->buffer))
7251                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
7252
7253         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
7254         if (trace_clocks[iter->tr->clock_id].in_ns)
7255                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
7256 }
7257
7258 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
7259 {
7260         /* use static because iter can be a bit big for the stack */
7261         static struct trace_iterator iter;
7262         static atomic_t dump_running;
7263         struct trace_array *tr = &global_trace;
7264         unsigned int old_userobj;
7265         unsigned long flags;
7266         int cnt = 0, cpu;
7267
7268         /* Only allow one dump user at a time. */
7269         if (atomic_inc_return(&dump_running) != 1) {
7270                 atomic_dec(&dump_running);
7271                 return;
7272         }
7273
7274         /*
7275          * Always turn off tracing when we dump.
7276          * We don't need to show trace output of what happens
7277          * between multiple crashes.
7278          *
7279          * If the user does a sysrq-z, then they can re-enable
7280          * tracing with echo 1 > tracing_on.
7281          */
7282         tracing_off();
7283
7284         local_irq_save(flags);
7285
7286         /* Simulate the iterator */
7287         trace_init_global_iter(&iter);
7288
7289         for_each_tracing_cpu(cpu) {
7290                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7291         }
7292
7293         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
7294
7295         /* don't look at user memory in panic mode */
7296         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
7297
7298         switch (oops_dump_mode) {
7299         case DUMP_ALL:
7300                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7301                 break;
7302         case DUMP_ORIG:
7303                 iter.cpu_file = raw_smp_processor_id();
7304                 break;
7305         case DUMP_NONE:
7306                 goto out_enable;
7307         default:
7308                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
7309                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7310         }
7311
7312         printk(KERN_TRACE "Dumping ftrace buffer:\n");
7313
7314         /* Did function tracer already get disabled? */
7315         if (ftrace_is_dead()) {
7316                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
7317                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
7318         }
7319
7320         /*
7321          * We need to stop all tracing on all CPUS to read the
7322          * the next buffer. This is a bit expensive, but is
7323          * not done often. We fill all what we can read,
7324          * and then release the locks again.
7325          */
7326
7327         while (!trace_empty(&iter)) {
7328
7329                 if (!cnt)
7330                         printk(KERN_TRACE "---------------------------------\n");
7331
7332                 cnt++;
7333
7334                 /* reset all but tr, trace, and overruns */
7335                 memset(&iter.seq, 0,
7336                        sizeof(struct trace_iterator) -
7337                        offsetof(struct trace_iterator, seq));
7338                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
7339                 iter.pos = -1;
7340
7341                 if (trace_find_next_entry_inc(&iter) != NULL) {
7342                         int ret;
7343
7344                         ret = print_trace_line(&iter);
7345                         if (ret != TRACE_TYPE_NO_CONSUME)
7346                                 trace_consume(&iter);
7347                 }
7348                 touch_nmi_watchdog();
7349
7350                 trace_printk_seq(&iter.seq);
7351         }
7352
7353         if (!cnt)
7354                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
7355         else
7356                 printk(KERN_TRACE "---------------------------------\n");
7357
7358  out_enable:
7359         tr->trace_flags |= old_userobj;
7360
7361         for_each_tracing_cpu(cpu) {
7362                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7363         }
7364         atomic_dec(&dump_running);
7365         local_irq_restore(flags);
7366 }
7367 EXPORT_SYMBOL_GPL(ftrace_dump);
7368
7369 __init static int tracer_alloc_buffers(void)
7370 {
7371         int ring_buf_size;
7372         int ret = -ENOMEM;
7373
7374         /*
7375          * Make sure we don't accidently add more trace options
7376          * than we have bits for.
7377          */
7378         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
7379
7380         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
7381                 goto out;
7382
7383         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
7384                 goto out_free_buffer_mask;
7385
7386         /* Only allocate trace_printk buffers if a trace_printk exists */
7387         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
7388                 /* Must be called before global_trace.buffer is allocated */
7389                 trace_printk_init_buffers();
7390
7391         /* To save memory, keep the ring buffer size to its minimum */
7392         if (ring_buffer_expanded)
7393                 ring_buf_size = trace_buf_size;
7394         else
7395                 ring_buf_size = 1;
7396
7397         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
7398         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
7399
7400         raw_spin_lock_init(&global_trace.start_lock);
7401
7402         /* Used for event triggers */
7403         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
7404         if (!temp_buffer)
7405                 goto out_free_cpumask;
7406
7407         if (trace_create_savedcmd() < 0)
7408                 goto out_free_temp_buffer;
7409
7410         /* TODO: make the number of buffers hot pluggable with CPUS */
7411         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
7412                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
7413                 WARN_ON(1);
7414                 goto out_free_savedcmd;
7415         }
7416
7417         if (global_trace.buffer_disabled)
7418                 tracing_off();
7419
7420         if (trace_boot_clock) {
7421                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
7422                 if (ret < 0)
7423                         pr_warn("Trace clock %s not defined, going back to default\n",
7424                                 trace_boot_clock);
7425         }
7426
7427         /*
7428          * register_tracer() might reference current_trace, so it
7429          * needs to be set before we register anything. This is
7430          * just a bootstrap of current_trace anyway.
7431          */
7432         global_trace.current_trace = &nop_trace;
7433
7434         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7435
7436         ftrace_init_global_array_ops(&global_trace);
7437
7438         init_trace_flags_index(&global_trace);
7439
7440         register_tracer(&nop_trace);
7441
7442         /* All seems OK, enable tracing */
7443         tracing_disabled = 0;
7444
7445         atomic_notifier_chain_register(&panic_notifier_list,
7446                                        &trace_panic_notifier);
7447
7448         register_die_notifier(&trace_die_notifier);
7449
7450         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
7451
7452         INIT_LIST_HEAD(&global_trace.systems);
7453         INIT_LIST_HEAD(&global_trace.events);
7454         list_add(&global_trace.list, &ftrace_trace_arrays);
7455
7456         apply_trace_boot_options();
7457
7458         register_snapshot_cmd();
7459
7460         return 0;
7461
7462 out_free_savedcmd:
7463         free_saved_cmdlines_buffer(savedcmd);
7464 out_free_temp_buffer:
7465         ring_buffer_free(temp_buffer);
7466 out_free_cpumask:
7467         free_cpumask_var(global_trace.tracing_cpumask);
7468 out_free_buffer_mask:
7469         free_cpumask_var(tracing_buffer_mask);
7470 out:
7471         return ret;
7472 }
7473
7474 void __init trace_init(void)
7475 {
7476         if (tracepoint_printk) {
7477                 tracepoint_print_iter =
7478                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
7479                 if (WARN_ON(!tracepoint_print_iter))
7480                         tracepoint_printk = 0;
7481         }
7482         tracer_alloc_buffers();
7483         trace_event_init();
7484 }
7485
7486 __init static int clear_boot_tracer(void)
7487 {
7488         /*
7489          * The default tracer at boot buffer is an init section.
7490          * This function is called in lateinit. If we did not
7491          * find the boot tracer, then clear it out, to prevent
7492          * later registration from accessing the buffer that is
7493          * about to be freed.
7494          */
7495         if (!default_bootup_tracer)
7496                 return 0;
7497
7498         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
7499                default_bootup_tracer);
7500         default_bootup_tracer = NULL;
7501
7502         return 0;
7503 }
7504
7505 fs_initcall(tracer_init_tracefs);
7506 late_initcall(clear_boot_tracer);