8fb4847b0450d3768d249f2b8062027f6f3ed2a4
[cascardo/linux.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/kprobes.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/sched/rt.h>
44
45 #include "trace.h"
46 #include "trace_output.h"
47
48 /*
49  * On boot up, the ring buffer is set to the minimum size, so that
50  * we do not waste memory on systems that are not using tracing.
51  */
52 bool ring_buffer_expanded;
53
54 /*
55  * We need to change this state when a selftest is running.
56  * A selftest will lurk into the ring-buffer to count the
57  * entries inserted during the selftest although some concurrent
58  * insertions into the ring-buffer such as trace_printk could occurred
59  * at the same time, giving false positive or negative results.
60  */
61 static bool __read_mostly tracing_selftest_running;
62
63 /*
64  * If a tracer is running, we do not want to run SELFTEST.
65  */
66 bool __read_mostly tracing_selftest_disabled;
67
68 /* Pipe tracepoints to printk */
69 struct trace_iterator *tracepoint_print_iter;
70 int tracepoint_printk;
71
72 /* For tracers that don't implement custom flags */
73 static struct tracer_opt dummy_tracer_opt[] = {
74         { }
75 };
76
77 static int
78 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
79 {
80         return 0;
81 }
82
83 /*
84  * To prevent the comm cache from being overwritten when no
85  * tracing is active, only save the comm when a trace event
86  * occurred.
87  */
88 static DEFINE_PER_CPU(bool, trace_cmdline_save);
89
90 /*
91  * Kill all tracing for good (never come back).
92  * It is initialized to 1 but will turn to zero if the initialization
93  * of the tracer is successful. But that is the only place that sets
94  * this back to zero.
95  */
96 static int tracing_disabled = 1;
97
98 cpumask_var_t __read_mostly     tracing_buffer_mask;
99
100 /*
101  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
102  *
103  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
104  * is set, then ftrace_dump is called. This will output the contents
105  * of the ftrace buffers to the console.  This is very useful for
106  * capturing traces that lead to crashes and outputing it to a
107  * serial console.
108  *
109  * It is default off, but you can enable it with either specifying
110  * "ftrace_dump_on_oops" in the kernel command line, or setting
111  * /proc/sys/kernel/ftrace_dump_on_oops
112  * Set 1 if you want to dump buffers of all CPUs
113  * Set 2 if you want to dump the buffer of the CPU that triggered oops
114  */
115
116 enum ftrace_dump_mode ftrace_dump_on_oops;
117
118 /* When set, tracing will stop when a WARN*() is hit */
119 int __disable_trace_on_warning;
120
121 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
122 /* Map of enums to their values, for "enum_map" file */
123 struct trace_enum_map_head {
124         struct module                   *mod;
125         unsigned long                   length;
126 };
127
128 union trace_enum_map_item;
129
130 struct trace_enum_map_tail {
131         /*
132          * "end" is first and points to NULL as it must be different
133          * than "mod" or "enum_string"
134          */
135         union trace_enum_map_item       *next;
136         const char                      *end;   /* points to NULL */
137 };
138
139 static DEFINE_MUTEX(trace_enum_mutex);
140
141 /*
142  * The trace_enum_maps are saved in an array with two extra elements,
143  * one at the beginning, and one at the end. The beginning item contains
144  * the count of the saved maps (head.length), and the module they
145  * belong to if not built in (head.mod). The ending item contains a
146  * pointer to the next array of saved enum_map items.
147  */
148 union trace_enum_map_item {
149         struct trace_enum_map           map;
150         struct trace_enum_map_head      head;
151         struct trace_enum_map_tail      tail;
152 };
153
154 static union trace_enum_map_item *trace_enum_maps;
155 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
156
157 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
158
159 #define MAX_TRACER_SIZE         100
160 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
161 static char *default_bootup_tracer;
162
163 static bool allocate_snapshot;
164
165 static int __init set_cmdline_ftrace(char *str)
166 {
167         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
168         default_bootup_tracer = bootup_tracer_buf;
169         /* We are using ftrace early, expand it */
170         ring_buffer_expanded = true;
171         return 1;
172 }
173 __setup("ftrace=", set_cmdline_ftrace);
174
175 static int __init set_ftrace_dump_on_oops(char *str)
176 {
177         if (*str++ != '=' || !*str) {
178                 ftrace_dump_on_oops = DUMP_ALL;
179                 return 1;
180         }
181
182         if (!strcmp("orig_cpu", str)) {
183                 ftrace_dump_on_oops = DUMP_ORIG;
184                 return 1;
185         }
186
187         return 0;
188 }
189 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
190
191 static int __init stop_trace_on_warning(char *str)
192 {
193         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
194                 __disable_trace_on_warning = 1;
195         return 1;
196 }
197 __setup("traceoff_on_warning", stop_trace_on_warning);
198
199 static int __init boot_alloc_snapshot(char *str)
200 {
201         allocate_snapshot = true;
202         /* We also need the main ring buffer expanded */
203         ring_buffer_expanded = true;
204         return 1;
205 }
206 __setup("alloc_snapshot", boot_alloc_snapshot);
207
208
209 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
210
211 static int __init set_trace_boot_options(char *str)
212 {
213         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
214         return 0;
215 }
216 __setup("trace_options=", set_trace_boot_options);
217
218 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
219 static char *trace_boot_clock __initdata;
220
221 static int __init set_trace_boot_clock(char *str)
222 {
223         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
224         trace_boot_clock = trace_boot_clock_buf;
225         return 0;
226 }
227 __setup("trace_clock=", set_trace_boot_clock);
228
229 static int __init set_tracepoint_printk(char *str)
230 {
231         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
232                 tracepoint_printk = 1;
233         return 1;
234 }
235 __setup("tp_printk", set_tracepoint_printk);
236
237 unsigned long long ns2usecs(cycle_t nsec)
238 {
239         nsec += 500;
240         do_div(nsec, 1000);
241         return nsec;
242 }
243
244 /* trace_flags holds trace_options default values */
245 #define TRACE_DEFAULT_FLAGS                                             \
246         (FUNCTION_DEFAULT_FLAGS |                                       \
247          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
248          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
249          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
250          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
251
252 /* trace_options that are only supported by global_trace */
253 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
254                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
255
256 /* trace_flags that are default zero for instances */
257 #define ZEROED_TRACE_FLAGS \
258         TRACE_ITER_EVENT_FORK
259
260 /*
261  * The global_trace is the descriptor that holds the tracing
262  * buffers for the live tracing. For each CPU, it contains
263  * a link list of pages that will store trace entries. The
264  * page descriptor of the pages in the memory is used to hold
265  * the link list by linking the lru item in the page descriptor
266  * to each of the pages in the buffer per CPU.
267  *
268  * For each active CPU there is a data field that holds the
269  * pages for the buffer for that CPU. Each CPU has the same number
270  * of pages allocated for its buffer.
271  */
272 static struct trace_array global_trace = {
273         .trace_flags = TRACE_DEFAULT_FLAGS,
274 };
275
276 LIST_HEAD(ftrace_trace_arrays);
277
278 int trace_array_get(struct trace_array *this_tr)
279 {
280         struct trace_array *tr;
281         int ret = -ENODEV;
282
283         mutex_lock(&trace_types_lock);
284         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
285                 if (tr == this_tr) {
286                         tr->ref++;
287                         ret = 0;
288                         break;
289                 }
290         }
291         mutex_unlock(&trace_types_lock);
292
293         return ret;
294 }
295
296 static void __trace_array_put(struct trace_array *this_tr)
297 {
298         WARN_ON(!this_tr->ref);
299         this_tr->ref--;
300 }
301
302 void trace_array_put(struct trace_array *this_tr)
303 {
304         mutex_lock(&trace_types_lock);
305         __trace_array_put(this_tr);
306         mutex_unlock(&trace_types_lock);
307 }
308
309 int call_filter_check_discard(struct trace_event_call *call, void *rec,
310                               struct ring_buffer *buffer,
311                               struct ring_buffer_event *event)
312 {
313         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
314             !filter_match_preds(call->filter, rec)) {
315                 __trace_event_discard_commit(buffer, event);
316                 return 1;
317         }
318
319         return 0;
320 }
321
322 static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
323 {
324         u64 ts;
325
326         /* Early boot up does not have a buffer yet */
327         if (!buf->buffer)
328                 return trace_clock_local();
329
330         ts = ring_buffer_time_stamp(buf->buffer, cpu);
331         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
332
333         return ts;
334 }
335
336 cycle_t ftrace_now(int cpu)
337 {
338         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
339 }
340
341 /**
342  * tracing_is_enabled - Show if global_trace has been disabled
343  *
344  * Shows if the global trace has been enabled or not. It uses the
345  * mirror flag "buffer_disabled" to be used in fast paths such as for
346  * the irqsoff tracer. But it may be inaccurate due to races. If you
347  * need to know the accurate state, use tracing_is_on() which is a little
348  * slower, but accurate.
349  */
350 int tracing_is_enabled(void)
351 {
352         /*
353          * For quick access (irqsoff uses this in fast path), just
354          * return the mirror variable of the state of the ring buffer.
355          * It's a little racy, but we don't really care.
356          */
357         smp_rmb();
358         return !global_trace.buffer_disabled;
359 }
360
361 /*
362  * trace_buf_size is the size in bytes that is allocated
363  * for a buffer. Note, the number of bytes is always rounded
364  * to page size.
365  *
366  * This number is purposely set to a low number of 16384.
367  * If the dump on oops happens, it will be much appreciated
368  * to not have to wait for all that output. Anyway this can be
369  * boot time and run time configurable.
370  */
371 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
372
373 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
374
375 /* trace_types holds a link list of available tracers. */
376 static struct tracer            *trace_types __read_mostly;
377
378 /*
379  * trace_types_lock is used to protect the trace_types list.
380  */
381 DEFINE_MUTEX(trace_types_lock);
382
383 /*
384  * serialize the access of the ring buffer
385  *
386  * ring buffer serializes readers, but it is low level protection.
387  * The validity of the events (which returns by ring_buffer_peek() ..etc)
388  * are not protected by ring buffer.
389  *
390  * The content of events may become garbage if we allow other process consumes
391  * these events concurrently:
392  *   A) the page of the consumed events may become a normal page
393  *      (not reader page) in ring buffer, and this page will be rewrited
394  *      by events producer.
395  *   B) The page of the consumed events may become a page for splice_read,
396  *      and this page will be returned to system.
397  *
398  * These primitives allow multi process access to different cpu ring buffer
399  * concurrently.
400  *
401  * These primitives don't distinguish read-only and read-consume access.
402  * Multi read-only access are also serialized.
403  */
404
405 #ifdef CONFIG_SMP
406 static DECLARE_RWSEM(all_cpu_access_lock);
407 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
408
409 static inline void trace_access_lock(int cpu)
410 {
411         if (cpu == RING_BUFFER_ALL_CPUS) {
412                 /* gain it for accessing the whole ring buffer. */
413                 down_write(&all_cpu_access_lock);
414         } else {
415                 /* gain it for accessing a cpu ring buffer. */
416
417                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
418                 down_read(&all_cpu_access_lock);
419
420                 /* Secondly block other access to this @cpu ring buffer. */
421                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
422         }
423 }
424
425 static inline void trace_access_unlock(int cpu)
426 {
427         if (cpu == RING_BUFFER_ALL_CPUS) {
428                 up_write(&all_cpu_access_lock);
429         } else {
430                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
431                 up_read(&all_cpu_access_lock);
432         }
433 }
434
435 static inline void trace_access_lock_init(void)
436 {
437         int cpu;
438
439         for_each_possible_cpu(cpu)
440                 mutex_init(&per_cpu(cpu_access_lock, cpu));
441 }
442
443 #else
444
445 static DEFINE_MUTEX(access_lock);
446
447 static inline void trace_access_lock(int cpu)
448 {
449         (void)cpu;
450         mutex_lock(&access_lock);
451 }
452
453 static inline void trace_access_unlock(int cpu)
454 {
455         (void)cpu;
456         mutex_unlock(&access_lock);
457 }
458
459 static inline void trace_access_lock_init(void)
460 {
461 }
462
463 #endif
464
465 #ifdef CONFIG_STACKTRACE
466 static void __ftrace_trace_stack(struct ring_buffer *buffer,
467                                  unsigned long flags,
468                                  int skip, int pc, struct pt_regs *regs);
469 static inline void ftrace_trace_stack(struct trace_array *tr,
470                                       struct ring_buffer *buffer,
471                                       unsigned long flags,
472                                       int skip, int pc, struct pt_regs *regs);
473
474 #else
475 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
476                                         unsigned long flags,
477                                         int skip, int pc, struct pt_regs *regs)
478 {
479 }
480 static inline void ftrace_trace_stack(struct trace_array *tr,
481                                       struct ring_buffer *buffer,
482                                       unsigned long flags,
483                                       int skip, int pc, struct pt_regs *regs)
484 {
485 }
486
487 #endif
488
489 static void tracer_tracing_on(struct trace_array *tr)
490 {
491         if (tr->trace_buffer.buffer)
492                 ring_buffer_record_on(tr->trace_buffer.buffer);
493         /*
494          * This flag is looked at when buffers haven't been allocated
495          * yet, or by some tracers (like irqsoff), that just want to
496          * know if the ring buffer has been disabled, but it can handle
497          * races of where it gets disabled but we still do a record.
498          * As the check is in the fast path of the tracers, it is more
499          * important to be fast than accurate.
500          */
501         tr->buffer_disabled = 0;
502         /* Make the flag seen by readers */
503         smp_wmb();
504 }
505
506 /**
507  * tracing_on - enable tracing buffers
508  *
509  * This function enables tracing buffers that may have been
510  * disabled with tracing_off.
511  */
512 void tracing_on(void)
513 {
514         tracer_tracing_on(&global_trace);
515 }
516 EXPORT_SYMBOL_GPL(tracing_on);
517
518 /**
519  * __trace_puts - write a constant string into the trace buffer.
520  * @ip:    The address of the caller
521  * @str:   The constant string to write
522  * @size:  The size of the string.
523  */
524 int __trace_puts(unsigned long ip, const char *str, int size)
525 {
526         struct ring_buffer_event *event;
527         struct ring_buffer *buffer;
528         struct print_entry *entry;
529         unsigned long irq_flags;
530         int alloc;
531         int pc;
532
533         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
534                 return 0;
535
536         pc = preempt_count();
537
538         if (unlikely(tracing_selftest_running || tracing_disabled))
539                 return 0;
540
541         alloc = sizeof(*entry) + size + 2; /* possible \n added */
542
543         local_save_flags(irq_flags);
544         buffer = global_trace.trace_buffer.buffer;
545         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
546                                           irq_flags, pc);
547         if (!event)
548                 return 0;
549
550         entry = ring_buffer_event_data(event);
551         entry->ip = ip;
552
553         memcpy(&entry->buf, str, size);
554
555         /* Add a newline if necessary */
556         if (entry->buf[size - 1] != '\n') {
557                 entry->buf[size] = '\n';
558                 entry->buf[size + 1] = '\0';
559         } else
560                 entry->buf[size] = '\0';
561
562         __buffer_unlock_commit(buffer, event);
563         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
564
565         return size;
566 }
567 EXPORT_SYMBOL_GPL(__trace_puts);
568
569 /**
570  * __trace_bputs - write the pointer to a constant string into trace buffer
571  * @ip:    The address of the caller
572  * @str:   The constant string to write to the buffer to
573  */
574 int __trace_bputs(unsigned long ip, const char *str)
575 {
576         struct ring_buffer_event *event;
577         struct ring_buffer *buffer;
578         struct bputs_entry *entry;
579         unsigned long irq_flags;
580         int size = sizeof(struct bputs_entry);
581         int pc;
582
583         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
584                 return 0;
585
586         pc = preempt_count();
587
588         if (unlikely(tracing_selftest_running || tracing_disabled))
589                 return 0;
590
591         local_save_flags(irq_flags);
592         buffer = global_trace.trace_buffer.buffer;
593         event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
594                                           irq_flags, pc);
595         if (!event)
596                 return 0;
597
598         entry = ring_buffer_event_data(event);
599         entry->ip                       = ip;
600         entry->str                      = str;
601
602         __buffer_unlock_commit(buffer, event);
603         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
604
605         return 1;
606 }
607 EXPORT_SYMBOL_GPL(__trace_bputs);
608
609 #ifdef CONFIG_TRACER_SNAPSHOT
610 /**
611  * trace_snapshot - take a snapshot of the current buffer.
612  *
613  * This causes a swap between the snapshot buffer and the current live
614  * tracing buffer. You can use this to take snapshots of the live
615  * trace when some condition is triggered, but continue to trace.
616  *
617  * Note, make sure to allocate the snapshot with either
618  * a tracing_snapshot_alloc(), or by doing it manually
619  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
620  *
621  * If the snapshot buffer is not allocated, it will stop tracing.
622  * Basically making a permanent snapshot.
623  */
624 void tracing_snapshot(void)
625 {
626         struct trace_array *tr = &global_trace;
627         struct tracer *tracer = tr->current_trace;
628         unsigned long flags;
629
630         if (in_nmi()) {
631                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
632                 internal_trace_puts("*** snapshot is being ignored        ***\n");
633                 return;
634         }
635
636         if (!tr->allocated_snapshot) {
637                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
638                 internal_trace_puts("*** stopping trace here!   ***\n");
639                 tracing_off();
640                 return;
641         }
642
643         /* Note, snapshot can not be used when the tracer uses it */
644         if (tracer->use_max_tr) {
645                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
646                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
647                 return;
648         }
649
650         local_irq_save(flags);
651         update_max_tr(tr, current, smp_processor_id());
652         local_irq_restore(flags);
653 }
654 EXPORT_SYMBOL_GPL(tracing_snapshot);
655
656 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
657                                         struct trace_buffer *size_buf, int cpu_id);
658 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
659
660 static int alloc_snapshot(struct trace_array *tr)
661 {
662         int ret;
663
664         if (!tr->allocated_snapshot) {
665
666                 /* allocate spare buffer */
667                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
668                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
669                 if (ret < 0)
670                         return ret;
671
672                 tr->allocated_snapshot = true;
673         }
674
675         return 0;
676 }
677
678 static void free_snapshot(struct trace_array *tr)
679 {
680         /*
681          * We don't free the ring buffer. instead, resize it because
682          * The max_tr ring buffer has some state (e.g. ring->clock) and
683          * we want preserve it.
684          */
685         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
686         set_buffer_entries(&tr->max_buffer, 1);
687         tracing_reset_online_cpus(&tr->max_buffer);
688         tr->allocated_snapshot = false;
689 }
690
691 /**
692  * tracing_alloc_snapshot - allocate snapshot buffer.
693  *
694  * This only allocates the snapshot buffer if it isn't already
695  * allocated - it doesn't also take a snapshot.
696  *
697  * This is meant to be used in cases where the snapshot buffer needs
698  * to be set up for events that can't sleep but need to be able to
699  * trigger a snapshot.
700  */
701 int tracing_alloc_snapshot(void)
702 {
703         struct trace_array *tr = &global_trace;
704         int ret;
705
706         ret = alloc_snapshot(tr);
707         WARN_ON(ret < 0);
708
709         return ret;
710 }
711 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
712
713 /**
714  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
715  *
716  * This is similar to trace_snapshot(), but it will allocate the
717  * snapshot buffer if it isn't already allocated. Use this only
718  * where it is safe to sleep, as the allocation may sleep.
719  *
720  * This causes a swap between the snapshot buffer and the current live
721  * tracing buffer. You can use this to take snapshots of the live
722  * trace when some condition is triggered, but continue to trace.
723  */
724 void tracing_snapshot_alloc(void)
725 {
726         int ret;
727
728         ret = tracing_alloc_snapshot();
729         if (ret < 0)
730                 return;
731
732         tracing_snapshot();
733 }
734 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
735 #else
736 void tracing_snapshot(void)
737 {
738         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
739 }
740 EXPORT_SYMBOL_GPL(tracing_snapshot);
741 int tracing_alloc_snapshot(void)
742 {
743         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
744         return -ENODEV;
745 }
746 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
747 void tracing_snapshot_alloc(void)
748 {
749         /* Give warning */
750         tracing_snapshot();
751 }
752 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
753 #endif /* CONFIG_TRACER_SNAPSHOT */
754
755 static void tracer_tracing_off(struct trace_array *tr)
756 {
757         if (tr->trace_buffer.buffer)
758                 ring_buffer_record_off(tr->trace_buffer.buffer);
759         /*
760          * This flag is looked at when buffers haven't been allocated
761          * yet, or by some tracers (like irqsoff), that just want to
762          * know if the ring buffer has been disabled, but it can handle
763          * races of where it gets disabled but we still do a record.
764          * As the check is in the fast path of the tracers, it is more
765          * important to be fast than accurate.
766          */
767         tr->buffer_disabled = 1;
768         /* Make the flag seen by readers */
769         smp_wmb();
770 }
771
772 /**
773  * tracing_off - turn off tracing buffers
774  *
775  * This function stops the tracing buffers from recording data.
776  * It does not disable any overhead the tracers themselves may
777  * be causing. This function simply causes all recording to
778  * the ring buffers to fail.
779  */
780 void tracing_off(void)
781 {
782         tracer_tracing_off(&global_trace);
783 }
784 EXPORT_SYMBOL_GPL(tracing_off);
785
786 void disable_trace_on_warning(void)
787 {
788         if (__disable_trace_on_warning)
789                 tracing_off();
790 }
791
792 /**
793  * tracer_tracing_is_on - show real state of ring buffer enabled
794  * @tr : the trace array to know if ring buffer is enabled
795  *
796  * Shows real state of the ring buffer if it is enabled or not.
797  */
798 static int tracer_tracing_is_on(struct trace_array *tr)
799 {
800         if (tr->trace_buffer.buffer)
801                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
802         return !tr->buffer_disabled;
803 }
804
805 /**
806  * tracing_is_on - show state of ring buffers enabled
807  */
808 int tracing_is_on(void)
809 {
810         return tracer_tracing_is_on(&global_trace);
811 }
812 EXPORT_SYMBOL_GPL(tracing_is_on);
813
814 static int __init set_buf_size(char *str)
815 {
816         unsigned long buf_size;
817
818         if (!str)
819                 return 0;
820         buf_size = memparse(str, &str);
821         /* nr_entries can not be zero */
822         if (buf_size == 0)
823                 return 0;
824         trace_buf_size = buf_size;
825         return 1;
826 }
827 __setup("trace_buf_size=", set_buf_size);
828
829 static int __init set_tracing_thresh(char *str)
830 {
831         unsigned long threshold;
832         int ret;
833
834         if (!str)
835                 return 0;
836         ret = kstrtoul(str, 0, &threshold);
837         if (ret < 0)
838                 return 0;
839         tracing_thresh = threshold * 1000;
840         return 1;
841 }
842 __setup("tracing_thresh=", set_tracing_thresh);
843
844 unsigned long nsecs_to_usecs(unsigned long nsecs)
845 {
846         return nsecs / 1000;
847 }
848
849 /*
850  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
851  * It uses C(a, b) where 'a' is the enum name and 'b' is the string that
852  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
853  * of strings in the order that the enums were defined.
854  */
855 #undef C
856 #define C(a, b) b
857
858 /* These must match the bit postions in trace_iterator_flags */
859 static const char *trace_options[] = {
860         TRACE_FLAGS
861         NULL
862 };
863
864 static struct {
865         u64 (*func)(void);
866         const char *name;
867         int in_ns;              /* is this clock in nanoseconds? */
868 } trace_clocks[] = {
869         { trace_clock_local,            "local",        1 },
870         { trace_clock_global,           "global",       1 },
871         { trace_clock_counter,          "counter",      0 },
872         { trace_clock_jiffies,          "uptime",       0 },
873         { trace_clock,                  "perf",         1 },
874         { ktime_get_mono_fast_ns,       "mono",         1 },
875         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
876         ARCH_TRACE_CLOCKS
877 };
878
879 /*
880  * trace_parser_get_init - gets the buffer for trace parser
881  */
882 int trace_parser_get_init(struct trace_parser *parser, int size)
883 {
884         memset(parser, 0, sizeof(*parser));
885
886         parser->buffer = kmalloc(size, GFP_KERNEL);
887         if (!parser->buffer)
888                 return 1;
889
890         parser->size = size;
891         return 0;
892 }
893
894 /*
895  * trace_parser_put - frees the buffer for trace parser
896  */
897 void trace_parser_put(struct trace_parser *parser)
898 {
899         kfree(parser->buffer);
900 }
901
902 /*
903  * trace_get_user - reads the user input string separated by  space
904  * (matched by isspace(ch))
905  *
906  * For each string found the 'struct trace_parser' is updated,
907  * and the function returns.
908  *
909  * Returns number of bytes read.
910  *
911  * See kernel/trace/trace.h for 'struct trace_parser' details.
912  */
913 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
914         size_t cnt, loff_t *ppos)
915 {
916         char ch;
917         size_t read = 0;
918         ssize_t ret;
919
920         if (!*ppos)
921                 trace_parser_clear(parser);
922
923         ret = get_user(ch, ubuf++);
924         if (ret)
925                 goto out;
926
927         read++;
928         cnt--;
929
930         /*
931          * The parser is not finished with the last write,
932          * continue reading the user input without skipping spaces.
933          */
934         if (!parser->cont) {
935                 /* skip white space */
936                 while (cnt && isspace(ch)) {
937                         ret = get_user(ch, ubuf++);
938                         if (ret)
939                                 goto out;
940                         read++;
941                         cnt--;
942                 }
943
944                 /* only spaces were written */
945                 if (isspace(ch)) {
946                         *ppos += read;
947                         ret = read;
948                         goto out;
949                 }
950
951                 parser->idx = 0;
952         }
953
954         /* read the non-space input */
955         while (cnt && !isspace(ch)) {
956                 if (parser->idx < parser->size - 1)
957                         parser->buffer[parser->idx++] = ch;
958                 else {
959                         ret = -EINVAL;
960                         goto out;
961                 }
962                 ret = get_user(ch, ubuf++);
963                 if (ret)
964                         goto out;
965                 read++;
966                 cnt--;
967         }
968
969         /* We either got finished input or we have to wait for another call. */
970         if (isspace(ch)) {
971                 parser->buffer[parser->idx] = 0;
972                 parser->cont = false;
973         } else if (parser->idx < parser->size - 1) {
974                 parser->cont = true;
975                 parser->buffer[parser->idx++] = ch;
976         } else {
977                 ret = -EINVAL;
978                 goto out;
979         }
980
981         *ppos += read;
982         ret = read;
983
984 out:
985         return ret;
986 }
987
988 /* TODO add a seq_buf_to_buffer() */
989 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
990 {
991         int len;
992
993         if (trace_seq_used(s) <= s->seq.readpos)
994                 return -EBUSY;
995
996         len = trace_seq_used(s) - s->seq.readpos;
997         if (cnt > len)
998                 cnt = len;
999         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1000
1001         s->seq.readpos += cnt;
1002         return cnt;
1003 }
1004
1005 unsigned long __read_mostly     tracing_thresh;
1006
1007 #ifdef CONFIG_TRACER_MAX_TRACE
1008 /*
1009  * Copy the new maximum trace into the separate maximum-trace
1010  * structure. (this way the maximum trace is permanently saved,
1011  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1012  */
1013 static void
1014 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1015 {
1016         struct trace_buffer *trace_buf = &tr->trace_buffer;
1017         struct trace_buffer *max_buf = &tr->max_buffer;
1018         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1019         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1020
1021         max_buf->cpu = cpu;
1022         max_buf->time_start = data->preempt_timestamp;
1023
1024         max_data->saved_latency = tr->max_latency;
1025         max_data->critical_start = data->critical_start;
1026         max_data->critical_end = data->critical_end;
1027
1028         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1029         max_data->pid = tsk->pid;
1030         /*
1031          * If tsk == current, then use current_uid(), as that does not use
1032          * RCU. The irq tracer can be called out of RCU scope.
1033          */
1034         if (tsk == current)
1035                 max_data->uid = current_uid();
1036         else
1037                 max_data->uid = task_uid(tsk);
1038
1039         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1040         max_data->policy = tsk->policy;
1041         max_data->rt_priority = tsk->rt_priority;
1042
1043         /* record this tasks comm */
1044         tracing_record_cmdline(tsk);
1045 }
1046
1047 /**
1048  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1049  * @tr: tracer
1050  * @tsk: the task with the latency
1051  * @cpu: The cpu that initiated the trace.
1052  *
1053  * Flip the buffers between the @tr and the max_tr and record information
1054  * about which task was the cause of this latency.
1055  */
1056 void
1057 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1058 {
1059         struct ring_buffer *buf;
1060
1061         if (tr->stop_count)
1062                 return;
1063
1064         WARN_ON_ONCE(!irqs_disabled());
1065
1066         if (!tr->allocated_snapshot) {
1067                 /* Only the nop tracer should hit this when disabling */
1068                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1069                 return;
1070         }
1071
1072         arch_spin_lock(&tr->max_lock);
1073
1074         buf = tr->trace_buffer.buffer;
1075         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1076         tr->max_buffer.buffer = buf;
1077
1078         __update_max_tr(tr, tsk, cpu);
1079         arch_spin_unlock(&tr->max_lock);
1080 }
1081
1082 /**
1083  * update_max_tr_single - only copy one trace over, and reset the rest
1084  * @tr - tracer
1085  * @tsk - task with the latency
1086  * @cpu - the cpu of the buffer to copy.
1087  *
1088  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1089  */
1090 void
1091 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1092 {
1093         int ret;
1094
1095         if (tr->stop_count)
1096                 return;
1097
1098         WARN_ON_ONCE(!irqs_disabled());
1099         if (!tr->allocated_snapshot) {
1100                 /* Only the nop tracer should hit this when disabling */
1101                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1102                 return;
1103         }
1104
1105         arch_spin_lock(&tr->max_lock);
1106
1107         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1108
1109         if (ret == -EBUSY) {
1110                 /*
1111                  * We failed to swap the buffer due to a commit taking
1112                  * place on this CPU. We fail to record, but we reset
1113                  * the max trace buffer (no one writes directly to it)
1114                  * and flag that it failed.
1115                  */
1116                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1117                         "Failed to swap buffers due to commit in progress\n");
1118         }
1119
1120         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1121
1122         __update_max_tr(tr, tsk, cpu);
1123         arch_spin_unlock(&tr->max_lock);
1124 }
1125 #endif /* CONFIG_TRACER_MAX_TRACE */
1126
1127 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1128 {
1129         /* Iterators are static, they should be filled or empty */
1130         if (trace_buffer_iter(iter, iter->cpu_file))
1131                 return 0;
1132
1133         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1134                                 full);
1135 }
1136
1137 #ifdef CONFIG_FTRACE_STARTUP_TEST
1138 static int run_tracer_selftest(struct tracer *type)
1139 {
1140         struct trace_array *tr = &global_trace;
1141         struct tracer *saved_tracer = tr->current_trace;
1142         int ret;
1143
1144         if (!type->selftest || tracing_selftest_disabled)
1145                 return 0;
1146
1147         /*
1148          * Run a selftest on this tracer.
1149          * Here we reset the trace buffer, and set the current
1150          * tracer to be this tracer. The tracer can then run some
1151          * internal tracing to verify that everything is in order.
1152          * If we fail, we do not register this tracer.
1153          */
1154         tracing_reset_online_cpus(&tr->trace_buffer);
1155
1156         tr->current_trace = type;
1157
1158 #ifdef CONFIG_TRACER_MAX_TRACE
1159         if (type->use_max_tr) {
1160                 /* If we expanded the buffers, make sure the max is expanded too */
1161                 if (ring_buffer_expanded)
1162                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1163                                            RING_BUFFER_ALL_CPUS);
1164                 tr->allocated_snapshot = true;
1165         }
1166 #endif
1167
1168         /* the test is responsible for initializing and enabling */
1169         pr_info("Testing tracer %s: ", type->name);
1170         ret = type->selftest(type, tr);
1171         /* the test is responsible for resetting too */
1172         tr->current_trace = saved_tracer;
1173         if (ret) {
1174                 printk(KERN_CONT "FAILED!\n");
1175                 /* Add the warning after printing 'FAILED' */
1176                 WARN_ON(1);
1177                 return -1;
1178         }
1179         /* Only reset on passing, to avoid touching corrupted buffers */
1180         tracing_reset_online_cpus(&tr->trace_buffer);
1181
1182 #ifdef CONFIG_TRACER_MAX_TRACE
1183         if (type->use_max_tr) {
1184                 tr->allocated_snapshot = false;
1185
1186                 /* Shrink the max buffer again */
1187                 if (ring_buffer_expanded)
1188                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1189                                            RING_BUFFER_ALL_CPUS);
1190         }
1191 #endif
1192
1193         printk(KERN_CONT "PASSED\n");
1194         return 0;
1195 }
1196 #else
1197 static inline int run_tracer_selftest(struct tracer *type)
1198 {
1199         return 0;
1200 }
1201 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1202
1203 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1204
1205 static void __init apply_trace_boot_options(void);
1206
1207 /**
1208  * register_tracer - register a tracer with the ftrace system.
1209  * @type - the plugin for the tracer
1210  *
1211  * Register a new plugin tracer.
1212  */
1213 int __init register_tracer(struct tracer *type)
1214 {
1215         struct tracer *t;
1216         int ret = 0;
1217
1218         if (!type->name) {
1219                 pr_info("Tracer must have a name\n");
1220                 return -1;
1221         }
1222
1223         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1224                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1225                 return -1;
1226         }
1227
1228         mutex_lock(&trace_types_lock);
1229
1230         tracing_selftest_running = true;
1231
1232         for (t = trace_types; t; t = t->next) {
1233                 if (strcmp(type->name, t->name) == 0) {
1234                         /* already found */
1235                         pr_info("Tracer %s already registered\n",
1236                                 type->name);
1237                         ret = -1;
1238                         goto out;
1239                 }
1240         }
1241
1242         if (!type->set_flag)
1243                 type->set_flag = &dummy_set_flag;
1244         if (!type->flags) {
1245                 /*allocate a dummy tracer_flags*/
1246                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1247                 if (!type->flags) {
1248                         ret = -ENOMEM;
1249                         goto out;
1250                 }
1251                 type->flags->val = 0;
1252                 type->flags->opts = dummy_tracer_opt;
1253         } else
1254                 if (!type->flags->opts)
1255                         type->flags->opts = dummy_tracer_opt;
1256
1257         /* store the tracer for __set_tracer_option */
1258         type->flags->trace = type;
1259
1260         ret = run_tracer_selftest(type);
1261         if (ret < 0)
1262                 goto out;
1263
1264         type->next = trace_types;
1265         trace_types = type;
1266         add_tracer_options(&global_trace, type);
1267
1268  out:
1269         tracing_selftest_running = false;
1270         mutex_unlock(&trace_types_lock);
1271
1272         if (ret || !default_bootup_tracer)
1273                 goto out_unlock;
1274
1275         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1276                 goto out_unlock;
1277
1278         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1279         /* Do we want this tracer to start on bootup? */
1280         tracing_set_tracer(&global_trace, type->name);
1281         default_bootup_tracer = NULL;
1282
1283         apply_trace_boot_options();
1284
1285         /* disable other selftests, since this will break it. */
1286         tracing_selftest_disabled = true;
1287 #ifdef CONFIG_FTRACE_STARTUP_TEST
1288         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1289                type->name);
1290 #endif
1291
1292  out_unlock:
1293         return ret;
1294 }
1295
1296 void tracing_reset(struct trace_buffer *buf, int cpu)
1297 {
1298         struct ring_buffer *buffer = buf->buffer;
1299
1300         if (!buffer)
1301                 return;
1302
1303         ring_buffer_record_disable(buffer);
1304
1305         /* Make sure all commits have finished */
1306         synchronize_sched();
1307         ring_buffer_reset_cpu(buffer, cpu);
1308
1309         ring_buffer_record_enable(buffer);
1310 }
1311
1312 void tracing_reset_online_cpus(struct trace_buffer *buf)
1313 {
1314         struct ring_buffer *buffer = buf->buffer;
1315         int cpu;
1316
1317         if (!buffer)
1318                 return;
1319
1320         ring_buffer_record_disable(buffer);
1321
1322         /* Make sure all commits have finished */
1323         synchronize_sched();
1324
1325         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1326
1327         for_each_online_cpu(cpu)
1328                 ring_buffer_reset_cpu(buffer, cpu);
1329
1330         ring_buffer_record_enable(buffer);
1331 }
1332
1333 /* Must have trace_types_lock held */
1334 void tracing_reset_all_online_cpus(void)
1335 {
1336         struct trace_array *tr;
1337
1338         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1339                 tracing_reset_online_cpus(&tr->trace_buffer);
1340 #ifdef CONFIG_TRACER_MAX_TRACE
1341                 tracing_reset_online_cpus(&tr->max_buffer);
1342 #endif
1343         }
1344 }
1345
1346 #define SAVED_CMDLINES_DEFAULT 128
1347 #define NO_CMDLINE_MAP UINT_MAX
1348 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1349 struct saved_cmdlines_buffer {
1350         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1351         unsigned *map_cmdline_to_pid;
1352         unsigned cmdline_num;
1353         int cmdline_idx;
1354         char *saved_cmdlines;
1355 };
1356 static struct saved_cmdlines_buffer *savedcmd;
1357
1358 /* temporary disable recording */
1359 static atomic_t trace_record_cmdline_disabled __read_mostly;
1360
1361 static inline char *get_saved_cmdlines(int idx)
1362 {
1363         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1364 }
1365
1366 static inline void set_cmdline(int idx, const char *cmdline)
1367 {
1368         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1369 }
1370
1371 static int allocate_cmdlines_buffer(unsigned int val,
1372                                     struct saved_cmdlines_buffer *s)
1373 {
1374         s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1375                                         GFP_KERNEL);
1376         if (!s->map_cmdline_to_pid)
1377                 return -ENOMEM;
1378
1379         s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1380         if (!s->saved_cmdlines) {
1381                 kfree(s->map_cmdline_to_pid);
1382                 return -ENOMEM;
1383         }
1384
1385         s->cmdline_idx = 0;
1386         s->cmdline_num = val;
1387         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1388                sizeof(s->map_pid_to_cmdline));
1389         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1390                val * sizeof(*s->map_cmdline_to_pid));
1391
1392         return 0;
1393 }
1394
1395 static int trace_create_savedcmd(void)
1396 {
1397         int ret;
1398
1399         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1400         if (!savedcmd)
1401                 return -ENOMEM;
1402
1403         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1404         if (ret < 0) {
1405                 kfree(savedcmd);
1406                 savedcmd = NULL;
1407                 return -ENOMEM;
1408         }
1409
1410         return 0;
1411 }
1412
1413 int is_tracing_stopped(void)
1414 {
1415         return global_trace.stop_count;
1416 }
1417
1418 /**
1419  * tracing_start - quick start of the tracer
1420  *
1421  * If tracing is enabled but was stopped by tracing_stop,
1422  * this will start the tracer back up.
1423  */
1424 void tracing_start(void)
1425 {
1426         struct ring_buffer *buffer;
1427         unsigned long flags;
1428
1429         if (tracing_disabled)
1430                 return;
1431
1432         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1433         if (--global_trace.stop_count) {
1434                 if (global_trace.stop_count < 0) {
1435                         /* Someone screwed up their debugging */
1436                         WARN_ON_ONCE(1);
1437                         global_trace.stop_count = 0;
1438                 }
1439                 goto out;
1440         }
1441
1442         /* Prevent the buffers from switching */
1443         arch_spin_lock(&global_trace.max_lock);
1444
1445         buffer = global_trace.trace_buffer.buffer;
1446         if (buffer)
1447                 ring_buffer_record_enable(buffer);
1448
1449 #ifdef CONFIG_TRACER_MAX_TRACE
1450         buffer = global_trace.max_buffer.buffer;
1451         if (buffer)
1452                 ring_buffer_record_enable(buffer);
1453 #endif
1454
1455         arch_spin_unlock(&global_trace.max_lock);
1456
1457  out:
1458         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1459 }
1460
1461 static void tracing_start_tr(struct trace_array *tr)
1462 {
1463         struct ring_buffer *buffer;
1464         unsigned long flags;
1465
1466         if (tracing_disabled)
1467                 return;
1468
1469         /* If global, we need to also start the max tracer */
1470         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1471                 return tracing_start();
1472
1473         raw_spin_lock_irqsave(&tr->start_lock, flags);
1474
1475         if (--tr->stop_count) {
1476                 if (tr->stop_count < 0) {
1477                         /* Someone screwed up their debugging */
1478                         WARN_ON_ONCE(1);
1479                         tr->stop_count = 0;
1480                 }
1481                 goto out;
1482         }
1483
1484         buffer = tr->trace_buffer.buffer;
1485         if (buffer)
1486                 ring_buffer_record_enable(buffer);
1487
1488  out:
1489         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1490 }
1491
1492 /**
1493  * tracing_stop - quick stop of the tracer
1494  *
1495  * Light weight way to stop tracing. Use in conjunction with
1496  * tracing_start.
1497  */
1498 void tracing_stop(void)
1499 {
1500         struct ring_buffer *buffer;
1501         unsigned long flags;
1502
1503         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1504         if (global_trace.stop_count++)
1505                 goto out;
1506
1507         /* Prevent the buffers from switching */
1508         arch_spin_lock(&global_trace.max_lock);
1509
1510         buffer = global_trace.trace_buffer.buffer;
1511         if (buffer)
1512                 ring_buffer_record_disable(buffer);
1513
1514 #ifdef CONFIG_TRACER_MAX_TRACE
1515         buffer = global_trace.max_buffer.buffer;
1516         if (buffer)
1517                 ring_buffer_record_disable(buffer);
1518 #endif
1519
1520         arch_spin_unlock(&global_trace.max_lock);
1521
1522  out:
1523         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1524 }
1525
1526 static void tracing_stop_tr(struct trace_array *tr)
1527 {
1528         struct ring_buffer *buffer;
1529         unsigned long flags;
1530
1531         /* If global, we need to also stop the max tracer */
1532         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1533                 return tracing_stop();
1534
1535         raw_spin_lock_irqsave(&tr->start_lock, flags);
1536         if (tr->stop_count++)
1537                 goto out;
1538
1539         buffer = tr->trace_buffer.buffer;
1540         if (buffer)
1541                 ring_buffer_record_disable(buffer);
1542
1543  out:
1544         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1545 }
1546
1547 void trace_stop_cmdline_recording(void);
1548
1549 static int trace_save_cmdline(struct task_struct *tsk)
1550 {
1551         unsigned pid, idx;
1552
1553         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1554                 return 0;
1555
1556         /*
1557          * It's not the end of the world if we don't get
1558          * the lock, but we also don't want to spin
1559          * nor do we want to disable interrupts,
1560          * so if we miss here, then better luck next time.
1561          */
1562         if (!arch_spin_trylock(&trace_cmdline_lock))
1563                 return 0;
1564
1565         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1566         if (idx == NO_CMDLINE_MAP) {
1567                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1568
1569                 /*
1570                  * Check whether the cmdline buffer at idx has a pid
1571                  * mapped. We are going to overwrite that entry so we
1572                  * need to clear the map_pid_to_cmdline. Otherwise we
1573                  * would read the new comm for the old pid.
1574                  */
1575                 pid = savedcmd->map_cmdline_to_pid[idx];
1576                 if (pid != NO_CMDLINE_MAP)
1577                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1578
1579                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1580                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1581
1582                 savedcmd->cmdline_idx = idx;
1583         }
1584
1585         set_cmdline(idx, tsk->comm);
1586
1587         arch_spin_unlock(&trace_cmdline_lock);
1588
1589         return 1;
1590 }
1591
1592 static void __trace_find_cmdline(int pid, char comm[])
1593 {
1594         unsigned map;
1595
1596         if (!pid) {
1597                 strcpy(comm, "<idle>");
1598                 return;
1599         }
1600
1601         if (WARN_ON_ONCE(pid < 0)) {
1602                 strcpy(comm, "<XXX>");
1603                 return;
1604         }
1605
1606         if (pid > PID_MAX_DEFAULT) {
1607                 strcpy(comm, "<...>");
1608                 return;
1609         }
1610
1611         map = savedcmd->map_pid_to_cmdline[pid];
1612         if (map != NO_CMDLINE_MAP)
1613                 strcpy(comm, get_saved_cmdlines(map));
1614         else
1615                 strcpy(comm, "<...>");
1616 }
1617
1618 void trace_find_cmdline(int pid, char comm[])
1619 {
1620         preempt_disable();
1621         arch_spin_lock(&trace_cmdline_lock);
1622
1623         __trace_find_cmdline(pid, comm);
1624
1625         arch_spin_unlock(&trace_cmdline_lock);
1626         preempt_enable();
1627 }
1628
1629 void tracing_record_cmdline(struct task_struct *tsk)
1630 {
1631         if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1632                 return;
1633
1634         if (!__this_cpu_read(trace_cmdline_save))
1635                 return;
1636
1637         if (trace_save_cmdline(tsk))
1638                 __this_cpu_write(trace_cmdline_save, false);
1639 }
1640
1641 void
1642 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1643                              int pc)
1644 {
1645         struct task_struct *tsk = current;
1646
1647         entry->preempt_count            = pc & 0xff;
1648         entry->pid                      = (tsk) ? tsk->pid : 0;
1649         entry->flags =
1650 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1651                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1652 #else
1653                 TRACE_FLAG_IRQS_NOSUPPORT |
1654 #endif
1655                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
1656                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1657                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1658                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1659                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1660 }
1661 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1662
1663 static __always_inline void
1664 trace_event_setup(struct ring_buffer_event *event,
1665                   int type, unsigned long flags, int pc)
1666 {
1667         struct trace_entry *ent = ring_buffer_event_data(event);
1668
1669         tracing_generic_entry_update(ent, flags, pc);
1670         ent->type = type;
1671 }
1672
1673 struct ring_buffer_event *
1674 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1675                           int type,
1676                           unsigned long len,
1677                           unsigned long flags, int pc)
1678 {
1679         struct ring_buffer_event *event;
1680
1681         event = ring_buffer_lock_reserve(buffer, len);
1682         if (event != NULL)
1683                 trace_event_setup(event, type, flags, pc);
1684
1685         return event;
1686 }
1687
1688 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
1689 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
1690 static int trace_buffered_event_ref;
1691
1692 /**
1693  * trace_buffered_event_enable - enable buffering events
1694  *
1695  * When events are being filtered, it is quicker to use a temporary
1696  * buffer to write the event data into if there's a likely chance
1697  * that it will not be committed. The discard of the ring buffer
1698  * is not as fast as committing, and is much slower than copying
1699  * a commit.
1700  *
1701  * When an event is to be filtered, allocate per cpu buffers to
1702  * write the event data into, and if the event is filtered and discarded
1703  * it is simply dropped, otherwise, the entire data is to be committed
1704  * in one shot.
1705  */
1706 void trace_buffered_event_enable(void)
1707 {
1708         struct ring_buffer_event *event;
1709         struct page *page;
1710         int cpu;
1711
1712         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
1713
1714         if (trace_buffered_event_ref++)
1715                 return;
1716
1717         for_each_tracing_cpu(cpu) {
1718                 page = alloc_pages_node(cpu_to_node(cpu),
1719                                         GFP_KERNEL | __GFP_NORETRY, 0);
1720                 if (!page)
1721                         goto failed;
1722
1723                 event = page_address(page);
1724                 memset(event, 0, sizeof(*event));
1725
1726                 per_cpu(trace_buffered_event, cpu) = event;
1727
1728                 preempt_disable();
1729                 if (cpu == smp_processor_id() &&
1730                     this_cpu_read(trace_buffered_event) !=
1731                     per_cpu(trace_buffered_event, cpu))
1732                         WARN_ON_ONCE(1);
1733                 preempt_enable();
1734         }
1735
1736         return;
1737  failed:
1738         trace_buffered_event_disable();
1739 }
1740
1741 static void enable_trace_buffered_event(void *data)
1742 {
1743         /* Probably not needed, but do it anyway */
1744         smp_rmb();
1745         this_cpu_dec(trace_buffered_event_cnt);
1746 }
1747
1748 static void disable_trace_buffered_event(void *data)
1749 {
1750         this_cpu_inc(trace_buffered_event_cnt);
1751 }
1752
1753 /**
1754  * trace_buffered_event_disable - disable buffering events
1755  *
1756  * When a filter is removed, it is faster to not use the buffered
1757  * events, and to commit directly into the ring buffer. Free up
1758  * the temp buffers when there are no more users. This requires
1759  * special synchronization with current events.
1760  */
1761 void trace_buffered_event_disable(void)
1762 {
1763         int cpu;
1764
1765         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
1766
1767         if (WARN_ON_ONCE(!trace_buffered_event_ref))
1768                 return;
1769
1770         if (--trace_buffered_event_ref)
1771                 return;
1772
1773         preempt_disable();
1774         /* For each CPU, set the buffer as used. */
1775         smp_call_function_many(tracing_buffer_mask,
1776                                disable_trace_buffered_event, NULL, 1);
1777         preempt_enable();
1778
1779         /* Wait for all current users to finish */
1780         synchronize_sched();
1781
1782         for_each_tracing_cpu(cpu) {
1783                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
1784                 per_cpu(trace_buffered_event, cpu) = NULL;
1785         }
1786         /*
1787          * Make sure trace_buffered_event is NULL before clearing
1788          * trace_buffered_event_cnt.
1789          */
1790         smp_wmb();
1791
1792         preempt_disable();
1793         /* Do the work on each cpu */
1794         smp_call_function_many(tracing_buffer_mask,
1795                                enable_trace_buffered_event, NULL, 1);
1796         preempt_enable();
1797 }
1798
1799 void
1800 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
1801 {
1802         __this_cpu_write(trace_cmdline_save, true);
1803
1804         /* If this is the temp buffer, we need to commit fully */
1805         if (this_cpu_read(trace_buffered_event) == event) {
1806                 /* Length is in event->array[0] */
1807                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
1808                 /* Release the temp buffer */
1809                 this_cpu_dec(trace_buffered_event_cnt);
1810         } else
1811                 ring_buffer_unlock_commit(buffer, event);
1812 }
1813
1814 static struct ring_buffer *temp_buffer;
1815
1816 struct ring_buffer_event *
1817 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
1818                           struct trace_event_file *trace_file,
1819                           int type, unsigned long len,
1820                           unsigned long flags, int pc)
1821 {
1822         struct ring_buffer_event *entry;
1823         int val;
1824
1825         *current_rb = trace_file->tr->trace_buffer.buffer;
1826
1827         if ((trace_file->flags &
1828              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
1829             (entry = this_cpu_read(trace_buffered_event))) {
1830                 /* Try to use the per cpu buffer first */
1831                 val = this_cpu_inc_return(trace_buffered_event_cnt);
1832                 if (val == 1) {
1833                         trace_event_setup(entry, type, flags, pc);
1834                         entry->array[0] = len;
1835                         return entry;
1836                 }
1837                 this_cpu_dec(trace_buffered_event_cnt);
1838         }
1839
1840         entry = trace_buffer_lock_reserve(*current_rb,
1841                                          type, len, flags, pc);
1842         /*
1843          * If tracing is off, but we have triggers enabled
1844          * we still need to look at the event data. Use the temp_buffer
1845          * to store the trace event for the tigger to use. It's recusive
1846          * safe and will not be recorded anywhere.
1847          */
1848         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
1849                 *current_rb = temp_buffer;
1850                 entry = trace_buffer_lock_reserve(*current_rb,
1851                                                   type, len, flags, pc);
1852         }
1853         return entry;
1854 }
1855 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
1856
1857 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
1858                                      struct ring_buffer *buffer,
1859                                      struct ring_buffer_event *event,
1860                                      unsigned long flags, int pc,
1861                                      struct pt_regs *regs)
1862 {
1863         __buffer_unlock_commit(buffer, event);
1864
1865         ftrace_trace_stack(tr, buffer, flags, 0, pc, regs);
1866         ftrace_trace_userstack(buffer, flags, pc);
1867 }
1868
1869 void
1870 trace_function(struct trace_array *tr,
1871                unsigned long ip, unsigned long parent_ip, unsigned long flags,
1872                int pc)
1873 {
1874         struct trace_event_call *call = &event_function;
1875         struct ring_buffer *buffer = tr->trace_buffer.buffer;
1876         struct ring_buffer_event *event;
1877         struct ftrace_entry *entry;
1878
1879         event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
1880                                           flags, pc);
1881         if (!event)
1882                 return;
1883         entry   = ring_buffer_event_data(event);
1884         entry->ip                       = ip;
1885         entry->parent_ip                = parent_ip;
1886
1887         if (!call_filter_check_discard(call, entry, buffer, event))
1888                 __buffer_unlock_commit(buffer, event);
1889 }
1890
1891 #ifdef CONFIG_STACKTRACE
1892
1893 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
1894 struct ftrace_stack {
1895         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
1896 };
1897
1898 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
1899 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
1900
1901 static void __ftrace_trace_stack(struct ring_buffer *buffer,
1902                                  unsigned long flags,
1903                                  int skip, int pc, struct pt_regs *regs)
1904 {
1905         struct trace_event_call *call = &event_kernel_stack;
1906         struct ring_buffer_event *event;
1907         struct stack_entry *entry;
1908         struct stack_trace trace;
1909         int use_stack;
1910         int size = FTRACE_STACK_ENTRIES;
1911
1912         trace.nr_entries        = 0;
1913         trace.skip              = skip;
1914
1915         /*
1916          * Since events can happen in NMIs there's no safe way to
1917          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
1918          * or NMI comes in, it will just have to use the default
1919          * FTRACE_STACK_SIZE.
1920          */
1921         preempt_disable_notrace();
1922
1923         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
1924         /*
1925          * We don't need any atomic variables, just a barrier.
1926          * If an interrupt comes in, we don't care, because it would
1927          * have exited and put the counter back to what we want.
1928          * We just need a barrier to keep gcc from moving things
1929          * around.
1930          */
1931         barrier();
1932         if (use_stack == 1) {
1933                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
1934                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
1935
1936                 if (regs)
1937                         save_stack_trace_regs(regs, &trace);
1938                 else
1939                         save_stack_trace(&trace);
1940
1941                 if (trace.nr_entries > size)
1942                         size = trace.nr_entries;
1943         } else
1944                 /* From now on, use_stack is a boolean */
1945                 use_stack = 0;
1946
1947         size *= sizeof(unsigned long);
1948
1949         event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1950                                           sizeof(*entry) + size, flags, pc);
1951         if (!event)
1952                 goto out;
1953         entry = ring_buffer_event_data(event);
1954
1955         memset(&entry->caller, 0, size);
1956
1957         if (use_stack)
1958                 memcpy(&entry->caller, trace.entries,
1959                        trace.nr_entries * sizeof(unsigned long));
1960         else {
1961                 trace.max_entries       = FTRACE_STACK_ENTRIES;
1962                 trace.entries           = entry->caller;
1963                 if (regs)
1964                         save_stack_trace_regs(regs, &trace);
1965                 else
1966                         save_stack_trace(&trace);
1967         }
1968
1969         entry->size = trace.nr_entries;
1970
1971         if (!call_filter_check_discard(call, entry, buffer, event))
1972                 __buffer_unlock_commit(buffer, event);
1973
1974  out:
1975         /* Again, don't let gcc optimize things here */
1976         barrier();
1977         __this_cpu_dec(ftrace_stack_reserve);
1978         preempt_enable_notrace();
1979
1980 }
1981
1982 static inline void ftrace_trace_stack(struct trace_array *tr,
1983                                       struct ring_buffer *buffer,
1984                                       unsigned long flags,
1985                                       int skip, int pc, struct pt_regs *regs)
1986 {
1987         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
1988                 return;
1989
1990         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
1991 }
1992
1993 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1994                    int pc)
1995 {
1996         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
1997 }
1998
1999 /**
2000  * trace_dump_stack - record a stack back trace in the trace buffer
2001  * @skip: Number of functions to skip (helper handlers)
2002  */
2003 void trace_dump_stack(int skip)
2004 {
2005         unsigned long flags;
2006
2007         if (tracing_disabled || tracing_selftest_running)
2008                 return;
2009
2010         local_save_flags(flags);
2011
2012         /*
2013          * Skip 3 more, seems to get us at the caller of
2014          * this function.
2015          */
2016         skip += 3;
2017         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2018                              flags, skip, preempt_count(), NULL);
2019 }
2020
2021 static DEFINE_PER_CPU(int, user_stack_count);
2022
2023 void
2024 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2025 {
2026         struct trace_event_call *call = &event_user_stack;
2027         struct ring_buffer_event *event;
2028         struct userstack_entry *entry;
2029         struct stack_trace trace;
2030
2031         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2032                 return;
2033
2034         /*
2035          * NMIs can not handle page faults, even with fix ups.
2036          * The save user stack can (and often does) fault.
2037          */
2038         if (unlikely(in_nmi()))
2039                 return;
2040
2041         /*
2042          * prevent recursion, since the user stack tracing may
2043          * trigger other kernel events.
2044          */
2045         preempt_disable();
2046         if (__this_cpu_read(user_stack_count))
2047                 goto out;
2048
2049         __this_cpu_inc(user_stack_count);
2050
2051         event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2052                                           sizeof(*entry), flags, pc);
2053         if (!event)
2054                 goto out_drop_count;
2055         entry   = ring_buffer_event_data(event);
2056
2057         entry->tgid             = current->tgid;
2058         memset(&entry->caller, 0, sizeof(entry->caller));
2059
2060         trace.nr_entries        = 0;
2061         trace.max_entries       = FTRACE_STACK_ENTRIES;
2062         trace.skip              = 0;
2063         trace.entries           = entry->caller;
2064
2065         save_stack_trace_user(&trace);
2066         if (!call_filter_check_discard(call, entry, buffer, event))
2067                 __buffer_unlock_commit(buffer, event);
2068
2069  out_drop_count:
2070         __this_cpu_dec(user_stack_count);
2071  out:
2072         preempt_enable();
2073 }
2074
2075 #ifdef UNUSED
2076 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2077 {
2078         ftrace_trace_userstack(tr, flags, preempt_count());
2079 }
2080 #endif /* UNUSED */
2081
2082 #endif /* CONFIG_STACKTRACE */
2083
2084 /* created for use with alloc_percpu */
2085 struct trace_buffer_struct {
2086         char buffer[TRACE_BUF_SIZE];
2087 };
2088
2089 static struct trace_buffer_struct *trace_percpu_buffer;
2090 static struct trace_buffer_struct *trace_percpu_sirq_buffer;
2091 static struct trace_buffer_struct *trace_percpu_irq_buffer;
2092 static struct trace_buffer_struct *trace_percpu_nmi_buffer;
2093
2094 /*
2095  * The buffer used is dependent on the context. There is a per cpu
2096  * buffer for normal context, softirq contex, hard irq context and
2097  * for NMI context. Thise allows for lockless recording.
2098  *
2099  * Note, if the buffers failed to be allocated, then this returns NULL
2100  */
2101 static char *get_trace_buf(void)
2102 {
2103         struct trace_buffer_struct *percpu_buffer;
2104
2105         /*
2106          * If we have allocated per cpu buffers, then we do not
2107          * need to do any locking.
2108          */
2109         if (in_nmi())
2110                 percpu_buffer = trace_percpu_nmi_buffer;
2111         else if (in_irq())
2112                 percpu_buffer = trace_percpu_irq_buffer;
2113         else if (in_softirq())
2114                 percpu_buffer = trace_percpu_sirq_buffer;
2115         else
2116                 percpu_buffer = trace_percpu_buffer;
2117
2118         if (!percpu_buffer)
2119                 return NULL;
2120
2121         return this_cpu_ptr(&percpu_buffer->buffer[0]);
2122 }
2123
2124 static int alloc_percpu_trace_buffer(void)
2125 {
2126         struct trace_buffer_struct *buffers;
2127         struct trace_buffer_struct *sirq_buffers;
2128         struct trace_buffer_struct *irq_buffers;
2129         struct trace_buffer_struct *nmi_buffers;
2130
2131         buffers = alloc_percpu(struct trace_buffer_struct);
2132         if (!buffers)
2133                 goto err_warn;
2134
2135         sirq_buffers = alloc_percpu(struct trace_buffer_struct);
2136         if (!sirq_buffers)
2137                 goto err_sirq;
2138
2139         irq_buffers = alloc_percpu(struct trace_buffer_struct);
2140         if (!irq_buffers)
2141                 goto err_irq;
2142
2143         nmi_buffers = alloc_percpu(struct trace_buffer_struct);
2144         if (!nmi_buffers)
2145                 goto err_nmi;
2146
2147         trace_percpu_buffer = buffers;
2148         trace_percpu_sirq_buffer = sirq_buffers;
2149         trace_percpu_irq_buffer = irq_buffers;
2150         trace_percpu_nmi_buffer = nmi_buffers;
2151
2152         return 0;
2153
2154  err_nmi:
2155         free_percpu(irq_buffers);
2156  err_irq:
2157         free_percpu(sirq_buffers);
2158  err_sirq:
2159         free_percpu(buffers);
2160  err_warn:
2161         WARN(1, "Could not allocate percpu trace_printk buffer");
2162         return -ENOMEM;
2163 }
2164
2165 static int buffers_allocated;
2166
2167 void trace_printk_init_buffers(void)
2168 {
2169         if (buffers_allocated)
2170                 return;
2171
2172         if (alloc_percpu_trace_buffer())
2173                 return;
2174
2175         /* trace_printk() is for debug use only. Don't use it in production. */
2176
2177         pr_warn("\n");
2178         pr_warn("**********************************************************\n");
2179         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2180         pr_warn("**                                                      **\n");
2181         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2182         pr_warn("**                                                      **\n");
2183         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2184         pr_warn("** unsafe for production use.                           **\n");
2185         pr_warn("**                                                      **\n");
2186         pr_warn("** If you see this message and you are not debugging    **\n");
2187         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2188         pr_warn("**                                                      **\n");
2189         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2190         pr_warn("**********************************************************\n");
2191
2192         /* Expand the buffers to set size */
2193         tracing_update_buffers();
2194
2195         buffers_allocated = 1;
2196
2197         /*
2198          * trace_printk_init_buffers() can be called by modules.
2199          * If that happens, then we need to start cmdline recording
2200          * directly here. If the global_trace.buffer is already
2201          * allocated here, then this was called by module code.
2202          */
2203         if (global_trace.trace_buffer.buffer)
2204                 tracing_start_cmdline_record();
2205 }
2206
2207 void trace_printk_start_comm(void)
2208 {
2209         /* Start tracing comms if trace printk is set */
2210         if (!buffers_allocated)
2211                 return;
2212         tracing_start_cmdline_record();
2213 }
2214
2215 static void trace_printk_start_stop_comm(int enabled)
2216 {
2217         if (!buffers_allocated)
2218                 return;
2219
2220         if (enabled)
2221                 tracing_start_cmdline_record();
2222         else
2223                 tracing_stop_cmdline_record();
2224 }
2225
2226 /**
2227  * trace_vbprintk - write binary msg to tracing buffer
2228  *
2229  */
2230 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2231 {
2232         struct trace_event_call *call = &event_bprint;
2233         struct ring_buffer_event *event;
2234         struct ring_buffer *buffer;
2235         struct trace_array *tr = &global_trace;
2236         struct bprint_entry *entry;
2237         unsigned long flags;
2238         char *tbuffer;
2239         int len = 0, size, pc;
2240
2241         if (unlikely(tracing_selftest_running || tracing_disabled))
2242                 return 0;
2243
2244         /* Don't pollute graph traces with trace_vprintk internals */
2245         pause_graph_tracing();
2246
2247         pc = preempt_count();
2248         preempt_disable_notrace();
2249
2250         tbuffer = get_trace_buf();
2251         if (!tbuffer) {
2252                 len = 0;
2253                 goto out;
2254         }
2255
2256         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2257
2258         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2259                 goto out;
2260
2261         local_save_flags(flags);
2262         size = sizeof(*entry) + sizeof(u32) * len;
2263         buffer = tr->trace_buffer.buffer;
2264         event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2265                                           flags, pc);
2266         if (!event)
2267                 goto out;
2268         entry = ring_buffer_event_data(event);
2269         entry->ip                       = ip;
2270         entry->fmt                      = fmt;
2271
2272         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2273         if (!call_filter_check_discard(call, entry, buffer, event)) {
2274                 __buffer_unlock_commit(buffer, event);
2275                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2276         }
2277
2278 out:
2279         preempt_enable_notrace();
2280         unpause_graph_tracing();
2281
2282         return len;
2283 }
2284 EXPORT_SYMBOL_GPL(trace_vbprintk);
2285
2286 static int
2287 __trace_array_vprintk(struct ring_buffer *buffer,
2288                       unsigned long ip, const char *fmt, va_list args)
2289 {
2290         struct trace_event_call *call = &event_print;
2291         struct ring_buffer_event *event;
2292         int len = 0, size, pc;
2293         struct print_entry *entry;
2294         unsigned long flags;
2295         char *tbuffer;
2296
2297         if (tracing_disabled || tracing_selftest_running)
2298                 return 0;
2299
2300         /* Don't pollute graph traces with trace_vprintk internals */
2301         pause_graph_tracing();
2302
2303         pc = preempt_count();
2304         preempt_disable_notrace();
2305
2306
2307         tbuffer = get_trace_buf();
2308         if (!tbuffer) {
2309                 len = 0;
2310                 goto out;
2311         }
2312
2313         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2314
2315         local_save_flags(flags);
2316         size = sizeof(*entry) + len + 1;
2317         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2318                                           flags, pc);
2319         if (!event)
2320                 goto out;
2321         entry = ring_buffer_event_data(event);
2322         entry->ip = ip;
2323
2324         memcpy(&entry->buf, tbuffer, len + 1);
2325         if (!call_filter_check_discard(call, entry, buffer, event)) {
2326                 __buffer_unlock_commit(buffer, event);
2327                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2328         }
2329  out:
2330         preempt_enable_notrace();
2331         unpause_graph_tracing();
2332
2333         return len;
2334 }
2335
2336 int trace_array_vprintk(struct trace_array *tr,
2337                         unsigned long ip, const char *fmt, va_list args)
2338 {
2339         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2340 }
2341
2342 int trace_array_printk(struct trace_array *tr,
2343                        unsigned long ip, const char *fmt, ...)
2344 {
2345         int ret;
2346         va_list ap;
2347
2348         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2349                 return 0;
2350
2351         va_start(ap, fmt);
2352         ret = trace_array_vprintk(tr, ip, fmt, ap);
2353         va_end(ap);
2354         return ret;
2355 }
2356
2357 int trace_array_printk_buf(struct ring_buffer *buffer,
2358                            unsigned long ip, const char *fmt, ...)
2359 {
2360         int ret;
2361         va_list ap;
2362
2363         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2364                 return 0;
2365
2366         va_start(ap, fmt);
2367         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2368         va_end(ap);
2369         return ret;
2370 }
2371
2372 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2373 {
2374         return trace_array_vprintk(&global_trace, ip, fmt, args);
2375 }
2376 EXPORT_SYMBOL_GPL(trace_vprintk);
2377
2378 static void trace_iterator_increment(struct trace_iterator *iter)
2379 {
2380         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2381
2382         iter->idx++;
2383         if (buf_iter)
2384                 ring_buffer_read(buf_iter, NULL);
2385 }
2386
2387 static struct trace_entry *
2388 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2389                 unsigned long *lost_events)
2390 {
2391         struct ring_buffer_event *event;
2392         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2393
2394         if (buf_iter)
2395                 event = ring_buffer_iter_peek(buf_iter, ts);
2396         else
2397                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2398                                          lost_events);
2399
2400         if (event) {
2401                 iter->ent_size = ring_buffer_event_length(event);
2402                 return ring_buffer_event_data(event);
2403         }
2404         iter->ent_size = 0;
2405         return NULL;
2406 }
2407
2408 static struct trace_entry *
2409 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2410                   unsigned long *missing_events, u64 *ent_ts)
2411 {
2412         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2413         struct trace_entry *ent, *next = NULL;
2414         unsigned long lost_events = 0, next_lost = 0;
2415         int cpu_file = iter->cpu_file;
2416         u64 next_ts = 0, ts;
2417         int next_cpu = -1;
2418         int next_size = 0;
2419         int cpu;
2420
2421         /*
2422          * If we are in a per_cpu trace file, don't bother by iterating over
2423          * all cpu and peek directly.
2424          */
2425         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2426                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2427                         return NULL;
2428                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2429                 if (ent_cpu)
2430                         *ent_cpu = cpu_file;
2431
2432                 return ent;
2433         }
2434
2435         for_each_tracing_cpu(cpu) {
2436
2437                 if (ring_buffer_empty_cpu(buffer, cpu))
2438                         continue;
2439
2440                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2441
2442                 /*
2443                  * Pick the entry with the smallest timestamp:
2444                  */
2445                 if (ent && (!next || ts < next_ts)) {
2446                         next = ent;
2447                         next_cpu = cpu;
2448                         next_ts = ts;
2449                         next_lost = lost_events;
2450                         next_size = iter->ent_size;
2451                 }
2452         }
2453
2454         iter->ent_size = next_size;
2455
2456         if (ent_cpu)
2457                 *ent_cpu = next_cpu;
2458
2459         if (ent_ts)
2460                 *ent_ts = next_ts;
2461
2462         if (missing_events)
2463                 *missing_events = next_lost;
2464
2465         return next;
2466 }
2467
2468 /* Find the next real entry, without updating the iterator itself */
2469 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2470                                           int *ent_cpu, u64 *ent_ts)
2471 {
2472         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2473 }
2474
2475 /* Find the next real entry, and increment the iterator to the next entry */
2476 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2477 {
2478         iter->ent = __find_next_entry(iter, &iter->cpu,
2479                                       &iter->lost_events, &iter->ts);
2480
2481         if (iter->ent)
2482                 trace_iterator_increment(iter);
2483
2484         return iter->ent ? iter : NULL;
2485 }
2486
2487 static void trace_consume(struct trace_iterator *iter)
2488 {
2489         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2490                             &iter->lost_events);
2491 }
2492
2493 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2494 {
2495         struct trace_iterator *iter = m->private;
2496         int i = (int)*pos;
2497         void *ent;
2498
2499         WARN_ON_ONCE(iter->leftover);
2500
2501         (*pos)++;
2502
2503         /* can't go backwards */
2504         if (iter->idx > i)
2505                 return NULL;
2506
2507         if (iter->idx < 0)
2508                 ent = trace_find_next_entry_inc(iter);
2509         else
2510                 ent = iter;
2511
2512         while (ent && iter->idx < i)
2513                 ent = trace_find_next_entry_inc(iter);
2514
2515         iter->pos = *pos;
2516
2517         return ent;
2518 }
2519
2520 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2521 {
2522         struct ring_buffer_event *event;
2523         struct ring_buffer_iter *buf_iter;
2524         unsigned long entries = 0;
2525         u64 ts;
2526
2527         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2528
2529         buf_iter = trace_buffer_iter(iter, cpu);
2530         if (!buf_iter)
2531                 return;
2532
2533         ring_buffer_iter_reset(buf_iter);
2534
2535         /*
2536          * We could have the case with the max latency tracers
2537          * that a reset never took place on a cpu. This is evident
2538          * by the timestamp being before the start of the buffer.
2539          */
2540         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2541                 if (ts >= iter->trace_buffer->time_start)
2542                         break;
2543                 entries++;
2544                 ring_buffer_read(buf_iter, NULL);
2545         }
2546
2547         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2548 }
2549
2550 /*
2551  * The current tracer is copied to avoid a global locking
2552  * all around.
2553  */
2554 static void *s_start(struct seq_file *m, loff_t *pos)
2555 {
2556         struct trace_iterator *iter = m->private;
2557         struct trace_array *tr = iter->tr;
2558         int cpu_file = iter->cpu_file;
2559         void *p = NULL;
2560         loff_t l = 0;
2561         int cpu;
2562
2563         /*
2564          * copy the tracer to avoid using a global lock all around.
2565          * iter->trace is a copy of current_trace, the pointer to the
2566          * name may be used instead of a strcmp(), as iter->trace->name
2567          * will point to the same string as current_trace->name.
2568          */
2569         mutex_lock(&trace_types_lock);
2570         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2571                 *iter->trace = *tr->current_trace;
2572         mutex_unlock(&trace_types_lock);
2573
2574 #ifdef CONFIG_TRACER_MAX_TRACE
2575         if (iter->snapshot && iter->trace->use_max_tr)
2576                 return ERR_PTR(-EBUSY);
2577 #endif
2578
2579         if (!iter->snapshot)
2580                 atomic_inc(&trace_record_cmdline_disabled);
2581
2582         if (*pos != iter->pos) {
2583                 iter->ent = NULL;
2584                 iter->cpu = 0;
2585                 iter->idx = -1;
2586
2587                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2588                         for_each_tracing_cpu(cpu)
2589                                 tracing_iter_reset(iter, cpu);
2590                 } else
2591                         tracing_iter_reset(iter, cpu_file);
2592
2593                 iter->leftover = 0;
2594                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2595                         ;
2596
2597         } else {
2598                 /*
2599                  * If we overflowed the seq_file before, then we want
2600                  * to just reuse the trace_seq buffer again.
2601                  */
2602                 if (iter->leftover)
2603                         p = iter;
2604                 else {
2605                         l = *pos - 1;
2606                         p = s_next(m, p, &l);
2607                 }
2608         }
2609
2610         trace_event_read_lock();
2611         trace_access_lock(cpu_file);
2612         return p;
2613 }
2614
2615 static void s_stop(struct seq_file *m, void *p)
2616 {
2617         struct trace_iterator *iter = m->private;
2618
2619 #ifdef CONFIG_TRACER_MAX_TRACE
2620         if (iter->snapshot && iter->trace->use_max_tr)
2621                 return;
2622 #endif
2623
2624         if (!iter->snapshot)
2625                 atomic_dec(&trace_record_cmdline_disabled);
2626
2627         trace_access_unlock(iter->cpu_file);
2628         trace_event_read_unlock();
2629 }
2630
2631 static void
2632 get_total_entries(struct trace_buffer *buf,
2633                   unsigned long *total, unsigned long *entries)
2634 {
2635         unsigned long count;
2636         int cpu;
2637
2638         *total = 0;
2639         *entries = 0;
2640
2641         for_each_tracing_cpu(cpu) {
2642                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2643                 /*
2644                  * If this buffer has skipped entries, then we hold all
2645                  * entries for the trace and we need to ignore the
2646                  * ones before the time stamp.
2647                  */
2648                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2649                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2650                         /* total is the same as the entries */
2651                         *total += count;
2652                 } else
2653                         *total += count +
2654                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
2655                 *entries += count;
2656         }
2657 }
2658
2659 static void print_lat_help_header(struct seq_file *m)
2660 {
2661         seq_puts(m, "#                  _------=> CPU#            \n"
2662                     "#                 / _-----=> irqs-off        \n"
2663                     "#                | / _----=> need-resched    \n"
2664                     "#                || / _---=> hardirq/softirq \n"
2665                     "#                ||| / _--=> preempt-depth   \n"
2666                     "#                |||| /     delay            \n"
2667                     "#  cmd     pid   ||||| time  |   caller      \n"
2668                     "#     \\   /      |||||  \\    |   /         \n");
2669 }
2670
2671 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2672 {
2673         unsigned long total;
2674         unsigned long entries;
2675
2676         get_total_entries(buf, &total, &entries);
2677         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2678                    entries, total, num_online_cpus());
2679         seq_puts(m, "#\n");
2680 }
2681
2682 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2683 {
2684         print_event_info(buf, m);
2685         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n"
2686                     "#              | |       |          |         |\n");
2687 }
2688
2689 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2690 {
2691         print_event_info(buf, m);
2692         seq_puts(m, "#                              _-----=> irqs-off\n"
2693                     "#                             / _----=> need-resched\n"
2694                     "#                            | / _---=> hardirq/softirq\n"
2695                     "#                            || / _--=> preempt-depth\n"
2696                     "#                            ||| /     delay\n"
2697                     "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n"
2698                     "#              | |       |   ||||       |         |\n");
2699 }
2700
2701 void
2702 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2703 {
2704         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
2705         struct trace_buffer *buf = iter->trace_buffer;
2706         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2707         struct tracer *type = iter->trace;
2708         unsigned long entries;
2709         unsigned long total;
2710         const char *name = "preemption";
2711
2712         name = type->name;
2713
2714         get_total_entries(buf, &total, &entries);
2715
2716         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2717                    name, UTS_RELEASE);
2718         seq_puts(m, "# -----------------------------------"
2719                  "---------------------------------\n");
2720         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2721                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2722                    nsecs_to_usecs(data->saved_latency),
2723                    entries,
2724                    total,
2725                    buf->cpu,
2726 #if defined(CONFIG_PREEMPT_NONE)
2727                    "server",
2728 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2729                    "desktop",
2730 #elif defined(CONFIG_PREEMPT)
2731                    "preempt",
2732 #else
2733                    "unknown",
2734 #endif
2735                    /* These are reserved for later use */
2736                    0, 0, 0, 0);
2737 #ifdef CONFIG_SMP
2738         seq_printf(m, " #P:%d)\n", num_online_cpus());
2739 #else
2740         seq_puts(m, ")\n");
2741 #endif
2742         seq_puts(m, "#    -----------------\n");
2743         seq_printf(m, "#    | task: %.16s-%d "
2744                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2745                    data->comm, data->pid,
2746                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2747                    data->policy, data->rt_priority);
2748         seq_puts(m, "#    -----------------\n");
2749
2750         if (data->critical_start) {
2751                 seq_puts(m, "#  => started at: ");
2752                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2753                 trace_print_seq(m, &iter->seq);
2754                 seq_puts(m, "\n#  => ended at:   ");
2755                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2756                 trace_print_seq(m, &iter->seq);
2757                 seq_puts(m, "\n#\n");
2758         }
2759
2760         seq_puts(m, "#\n");
2761 }
2762
2763 static void test_cpu_buff_start(struct trace_iterator *iter)
2764 {
2765         struct trace_seq *s = &iter->seq;
2766         struct trace_array *tr = iter->tr;
2767
2768         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
2769                 return;
2770
2771         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
2772                 return;
2773
2774         if (iter->started && cpumask_test_cpu(iter->cpu, iter->started))
2775                 return;
2776
2777         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
2778                 return;
2779
2780         if (iter->started)
2781                 cpumask_set_cpu(iter->cpu, iter->started);
2782
2783         /* Don't print started cpu buffer for the first entry of the trace */
2784         if (iter->idx > 1)
2785                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
2786                                 iter->cpu);
2787 }
2788
2789 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
2790 {
2791         struct trace_array *tr = iter->tr;
2792         struct trace_seq *s = &iter->seq;
2793         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
2794         struct trace_entry *entry;
2795         struct trace_event *event;
2796
2797         entry = iter->ent;
2798
2799         test_cpu_buff_start(iter);
2800
2801         event = ftrace_find_event(entry->type);
2802
2803         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
2804                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2805                         trace_print_lat_context(iter);
2806                 else
2807                         trace_print_context(iter);
2808         }
2809
2810         if (trace_seq_has_overflowed(s))
2811                 return TRACE_TYPE_PARTIAL_LINE;
2812
2813         if (event)
2814                 return event->funcs->trace(iter, sym_flags, event);
2815
2816         trace_seq_printf(s, "Unknown type %d\n", entry->type);
2817
2818         return trace_handle_return(s);
2819 }
2820
2821 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2822 {
2823         struct trace_array *tr = iter->tr;
2824         struct trace_seq *s = &iter->seq;
2825         struct trace_entry *entry;
2826         struct trace_event *event;
2827
2828         entry = iter->ent;
2829
2830         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
2831                 trace_seq_printf(s, "%d %d %llu ",
2832                                  entry->pid, iter->cpu, iter->ts);
2833
2834         if (trace_seq_has_overflowed(s))
2835                 return TRACE_TYPE_PARTIAL_LINE;
2836
2837         event = ftrace_find_event(entry->type);
2838         if (event)
2839                 return event->funcs->raw(iter, 0, event);
2840
2841         trace_seq_printf(s, "%d ?\n", entry->type);
2842
2843         return trace_handle_return(s);
2844 }
2845
2846 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2847 {
2848         struct trace_array *tr = iter->tr;
2849         struct trace_seq *s = &iter->seq;
2850         unsigned char newline = '\n';
2851         struct trace_entry *entry;
2852         struct trace_event *event;
2853
2854         entry = iter->ent;
2855
2856         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
2857                 SEQ_PUT_HEX_FIELD(s, entry->pid);
2858                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
2859                 SEQ_PUT_HEX_FIELD(s, iter->ts);
2860                 if (trace_seq_has_overflowed(s))
2861                         return TRACE_TYPE_PARTIAL_LINE;
2862         }
2863
2864         event = ftrace_find_event(entry->type);
2865         if (event) {
2866                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
2867                 if (ret != TRACE_TYPE_HANDLED)
2868                         return ret;
2869         }
2870
2871         SEQ_PUT_FIELD(s, newline);
2872
2873         return trace_handle_return(s);
2874 }
2875
2876 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2877 {
2878         struct trace_array *tr = iter->tr;
2879         struct trace_seq *s = &iter->seq;
2880         struct trace_entry *entry;
2881         struct trace_event *event;
2882
2883         entry = iter->ent;
2884
2885         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
2886                 SEQ_PUT_FIELD(s, entry->pid);
2887                 SEQ_PUT_FIELD(s, iter->cpu);
2888                 SEQ_PUT_FIELD(s, iter->ts);
2889                 if (trace_seq_has_overflowed(s))
2890                         return TRACE_TYPE_PARTIAL_LINE;
2891         }
2892
2893         event = ftrace_find_event(entry->type);
2894         return event ? event->funcs->binary(iter, 0, event) :
2895                 TRACE_TYPE_HANDLED;
2896 }
2897
2898 int trace_empty(struct trace_iterator *iter)
2899 {
2900         struct ring_buffer_iter *buf_iter;
2901         int cpu;
2902
2903         /* If we are looking at one CPU buffer, only check that one */
2904         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
2905                 cpu = iter->cpu_file;
2906                 buf_iter = trace_buffer_iter(iter, cpu);
2907                 if (buf_iter) {
2908                         if (!ring_buffer_iter_empty(buf_iter))
2909                                 return 0;
2910                 } else {
2911                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2912                                 return 0;
2913                 }
2914                 return 1;
2915         }
2916
2917         for_each_tracing_cpu(cpu) {
2918                 buf_iter = trace_buffer_iter(iter, cpu);
2919                 if (buf_iter) {
2920                         if (!ring_buffer_iter_empty(buf_iter))
2921                                 return 0;
2922                 } else {
2923                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2924                                 return 0;
2925                 }
2926         }
2927
2928         return 1;
2929 }
2930
2931 /*  Called with trace_event_read_lock() held. */
2932 enum print_line_t print_trace_line(struct trace_iterator *iter)
2933 {
2934         struct trace_array *tr = iter->tr;
2935         unsigned long trace_flags = tr->trace_flags;
2936         enum print_line_t ret;
2937
2938         if (iter->lost_events) {
2939                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
2940                                  iter->cpu, iter->lost_events);
2941                 if (trace_seq_has_overflowed(&iter->seq))
2942                         return TRACE_TYPE_PARTIAL_LINE;
2943         }
2944
2945         if (iter->trace && iter->trace->print_line) {
2946                 ret = iter->trace->print_line(iter);
2947                 if (ret != TRACE_TYPE_UNHANDLED)
2948                         return ret;
2949         }
2950
2951         if (iter->ent->type == TRACE_BPUTS &&
2952                         trace_flags & TRACE_ITER_PRINTK &&
2953                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2954                 return trace_print_bputs_msg_only(iter);
2955
2956         if (iter->ent->type == TRACE_BPRINT &&
2957                         trace_flags & TRACE_ITER_PRINTK &&
2958                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2959                 return trace_print_bprintk_msg_only(iter);
2960
2961         if (iter->ent->type == TRACE_PRINT &&
2962                         trace_flags & TRACE_ITER_PRINTK &&
2963                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2964                 return trace_print_printk_msg_only(iter);
2965
2966         if (trace_flags & TRACE_ITER_BIN)
2967                 return print_bin_fmt(iter);
2968
2969         if (trace_flags & TRACE_ITER_HEX)
2970                 return print_hex_fmt(iter);
2971
2972         if (trace_flags & TRACE_ITER_RAW)
2973                 return print_raw_fmt(iter);
2974
2975         return print_trace_fmt(iter);
2976 }
2977
2978 void trace_latency_header(struct seq_file *m)
2979 {
2980         struct trace_iterator *iter = m->private;
2981         struct trace_array *tr = iter->tr;
2982
2983         /* print nothing if the buffers are empty */
2984         if (trace_empty(iter))
2985                 return;
2986
2987         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2988                 print_trace_header(m, iter);
2989
2990         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
2991                 print_lat_help_header(m);
2992 }
2993
2994 void trace_default_header(struct seq_file *m)
2995 {
2996         struct trace_iterator *iter = m->private;
2997         struct trace_array *tr = iter->tr;
2998         unsigned long trace_flags = tr->trace_flags;
2999
3000         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3001                 return;
3002
3003         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3004                 /* print nothing if the buffers are empty */
3005                 if (trace_empty(iter))
3006                         return;
3007                 print_trace_header(m, iter);
3008                 if (!(trace_flags & TRACE_ITER_VERBOSE))
3009                         print_lat_help_header(m);
3010         } else {
3011                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3012                         if (trace_flags & TRACE_ITER_IRQ_INFO)
3013                                 print_func_help_header_irq(iter->trace_buffer, m);
3014                         else
3015                                 print_func_help_header(iter->trace_buffer, m);
3016                 }
3017         }
3018 }
3019
3020 static void test_ftrace_alive(struct seq_file *m)
3021 {
3022         if (!ftrace_is_dead())
3023                 return;
3024         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3025                     "#          MAY BE MISSING FUNCTION EVENTS\n");
3026 }
3027
3028 #ifdef CONFIG_TRACER_MAX_TRACE
3029 static void show_snapshot_main_help(struct seq_file *m)
3030 {
3031         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3032                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3033                     "#                      Takes a snapshot of the main buffer.\n"
3034                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3035                     "#                      (Doesn't have to be '2' works with any number that\n"
3036                     "#                       is not a '0' or '1')\n");
3037 }
3038
3039 static void show_snapshot_percpu_help(struct seq_file *m)
3040 {
3041         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3042 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3043         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3044                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
3045 #else
3046         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3047                     "#                     Must use main snapshot file to allocate.\n");
3048 #endif
3049         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3050                     "#                      (Doesn't have to be '2' works with any number that\n"
3051                     "#                       is not a '0' or '1')\n");
3052 }
3053
3054 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3055 {
3056         if (iter->tr->allocated_snapshot)
3057                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3058         else
3059                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3060
3061         seq_puts(m, "# Snapshot commands:\n");
3062         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3063                 show_snapshot_main_help(m);
3064         else
3065                 show_snapshot_percpu_help(m);
3066 }
3067 #else
3068 /* Should never be called */
3069 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3070 #endif
3071
3072 static int s_show(struct seq_file *m, void *v)
3073 {
3074         struct trace_iterator *iter = v;
3075         int ret;
3076
3077         if (iter->ent == NULL) {
3078                 if (iter->tr) {
3079                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
3080                         seq_puts(m, "#\n");
3081                         test_ftrace_alive(m);
3082                 }
3083                 if (iter->snapshot && trace_empty(iter))
3084                         print_snapshot_help(m, iter);
3085                 else if (iter->trace && iter->trace->print_header)
3086                         iter->trace->print_header(m);
3087                 else
3088                         trace_default_header(m);
3089
3090         } else if (iter->leftover) {
3091                 /*
3092                  * If we filled the seq_file buffer earlier, we
3093                  * want to just show it now.
3094                  */
3095                 ret = trace_print_seq(m, &iter->seq);
3096
3097                 /* ret should this time be zero, but you never know */
3098                 iter->leftover = ret;
3099
3100         } else {
3101                 print_trace_line(iter);
3102                 ret = trace_print_seq(m, &iter->seq);
3103                 /*
3104                  * If we overflow the seq_file buffer, then it will
3105                  * ask us for this data again at start up.
3106                  * Use that instead.
3107                  *  ret is 0 if seq_file write succeeded.
3108                  *        -1 otherwise.
3109                  */
3110                 iter->leftover = ret;
3111         }
3112
3113         return 0;
3114 }
3115
3116 /*
3117  * Should be used after trace_array_get(), trace_types_lock
3118  * ensures that i_cdev was already initialized.
3119  */
3120 static inline int tracing_get_cpu(struct inode *inode)
3121 {
3122         if (inode->i_cdev) /* See trace_create_cpu_file() */
3123                 return (long)inode->i_cdev - 1;
3124         return RING_BUFFER_ALL_CPUS;
3125 }
3126
3127 static const struct seq_operations tracer_seq_ops = {
3128         .start          = s_start,
3129         .next           = s_next,
3130         .stop           = s_stop,
3131         .show           = s_show,
3132 };
3133
3134 static struct trace_iterator *
3135 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3136 {
3137         struct trace_array *tr = inode->i_private;
3138         struct trace_iterator *iter;
3139         int cpu;
3140
3141         if (tracing_disabled)
3142                 return ERR_PTR(-ENODEV);
3143
3144         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3145         if (!iter)
3146                 return ERR_PTR(-ENOMEM);
3147
3148         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3149                                     GFP_KERNEL);
3150         if (!iter->buffer_iter)
3151                 goto release;
3152
3153         /*
3154          * We make a copy of the current tracer to avoid concurrent
3155          * changes on it while we are reading.
3156          */
3157         mutex_lock(&trace_types_lock);
3158         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3159         if (!iter->trace)
3160                 goto fail;
3161
3162         *iter->trace = *tr->current_trace;
3163
3164         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3165                 goto fail;
3166
3167         iter->tr = tr;
3168
3169 #ifdef CONFIG_TRACER_MAX_TRACE
3170         /* Currently only the top directory has a snapshot */
3171         if (tr->current_trace->print_max || snapshot)
3172                 iter->trace_buffer = &tr->max_buffer;
3173         else
3174 #endif
3175                 iter->trace_buffer = &tr->trace_buffer;
3176         iter->snapshot = snapshot;
3177         iter->pos = -1;
3178         iter->cpu_file = tracing_get_cpu(inode);
3179         mutex_init(&iter->mutex);
3180
3181         /* Notify the tracer early; before we stop tracing. */
3182         if (iter->trace && iter->trace->open)
3183                 iter->trace->open(iter);
3184
3185         /* Annotate start of buffers if we had overruns */
3186         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3187                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3188
3189         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3190         if (trace_clocks[tr->clock_id].in_ns)
3191                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3192
3193         /* stop the trace while dumping if we are not opening "snapshot" */
3194         if (!iter->snapshot)
3195                 tracing_stop_tr(tr);
3196
3197         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3198                 for_each_tracing_cpu(cpu) {
3199                         iter->buffer_iter[cpu] =
3200                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3201                 }
3202                 ring_buffer_read_prepare_sync();
3203                 for_each_tracing_cpu(cpu) {
3204                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3205                         tracing_iter_reset(iter, cpu);
3206                 }
3207         } else {
3208                 cpu = iter->cpu_file;
3209                 iter->buffer_iter[cpu] =
3210                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3211                 ring_buffer_read_prepare_sync();
3212                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3213                 tracing_iter_reset(iter, cpu);
3214         }
3215
3216         mutex_unlock(&trace_types_lock);
3217
3218         return iter;
3219
3220  fail:
3221         mutex_unlock(&trace_types_lock);
3222         kfree(iter->trace);
3223         kfree(iter->buffer_iter);
3224 release:
3225         seq_release_private(inode, file);
3226         return ERR_PTR(-ENOMEM);
3227 }
3228
3229 int tracing_open_generic(struct inode *inode, struct file *filp)
3230 {
3231         if (tracing_disabled)
3232                 return -ENODEV;
3233
3234         filp->private_data = inode->i_private;
3235         return 0;
3236 }
3237
3238 bool tracing_is_disabled(void)
3239 {
3240         return (tracing_disabled) ? true: false;
3241 }
3242
3243 /*
3244  * Open and update trace_array ref count.
3245  * Must have the current trace_array passed to it.
3246  */
3247 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3248 {
3249         struct trace_array *tr = inode->i_private;
3250
3251         if (tracing_disabled)
3252                 return -ENODEV;
3253
3254         if (trace_array_get(tr) < 0)
3255                 return -ENODEV;
3256
3257         filp->private_data = inode->i_private;
3258
3259         return 0;
3260 }
3261
3262 static int tracing_release(struct inode *inode, struct file *file)
3263 {
3264         struct trace_array *tr = inode->i_private;
3265         struct seq_file *m = file->private_data;
3266         struct trace_iterator *iter;
3267         int cpu;
3268
3269         if (!(file->f_mode & FMODE_READ)) {
3270                 trace_array_put(tr);
3271                 return 0;
3272         }
3273
3274         /* Writes do not use seq_file */
3275         iter = m->private;
3276         mutex_lock(&trace_types_lock);
3277
3278         for_each_tracing_cpu(cpu) {
3279                 if (iter->buffer_iter[cpu])
3280                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3281         }
3282
3283         if (iter->trace && iter->trace->close)
3284                 iter->trace->close(iter);
3285
3286         if (!iter->snapshot)
3287                 /* reenable tracing if it was previously enabled */
3288                 tracing_start_tr(tr);
3289
3290         __trace_array_put(tr);
3291
3292         mutex_unlock(&trace_types_lock);
3293
3294         mutex_destroy(&iter->mutex);
3295         free_cpumask_var(iter->started);
3296         kfree(iter->trace);
3297         kfree(iter->buffer_iter);
3298         seq_release_private(inode, file);
3299
3300         return 0;
3301 }
3302
3303 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3304 {
3305         struct trace_array *tr = inode->i_private;
3306
3307         trace_array_put(tr);
3308         return 0;
3309 }
3310
3311 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3312 {
3313         struct trace_array *tr = inode->i_private;
3314
3315         trace_array_put(tr);
3316
3317         return single_release(inode, file);
3318 }
3319
3320 static int tracing_open(struct inode *inode, struct file *file)
3321 {
3322         struct trace_array *tr = inode->i_private;
3323         struct trace_iterator *iter;
3324         int ret = 0;
3325
3326         if (trace_array_get(tr) < 0)
3327                 return -ENODEV;
3328
3329         /* If this file was open for write, then erase contents */
3330         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3331                 int cpu = tracing_get_cpu(inode);
3332
3333                 if (cpu == RING_BUFFER_ALL_CPUS)
3334                         tracing_reset_online_cpus(&tr->trace_buffer);
3335                 else
3336                         tracing_reset(&tr->trace_buffer, cpu);
3337         }
3338
3339         if (file->f_mode & FMODE_READ) {
3340                 iter = __tracing_open(inode, file, false);
3341                 if (IS_ERR(iter))
3342                         ret = PTR_ERR(iter);
3343                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3344                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3345         }
3346
3347         if (ret < 0)
3348                 trace_array_put(tr);
3349
3350         return ret;
3351 }
3352
3353 /*
3354  * Some tracers are not suitable for instance buffers.
3355  * A tracer is always available for the global array (toplevel)
3356  * or if it explicitly states that it is.
3357  */
3358 static bool
3359 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3360 {
3361         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3362 }
3363
3364 /* Find the next tracer that this trace array may use */
3365 static struct tracer *
3366 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3367 {
3368         while (t && !trace_ok_for_array(t, tr))
3369                 t = t->next;
3370
3371         return t;
3372 }
3373
3374 static void *
3375 t_next(struct seq_file *m, void *v, loff_t *pos)
3376 {
3377         struct trace_array *tr = m->private;
3378         struct tracer *t = v;
3379
3380         (*pos)++;
3381
3382         if (t)
3383                 t = get_tracer_for_array(tr, t->next);
3384
3385         return t;
3386 }
3387
3388 static void *t_start(struct seq_file *m, loff_t *pos)
3389 {
3390         struct trace_array *tr = m->private;
3391         struct tracer *t;
3392         loff_t l = 0;
3393
3394         mutex_lock(&trace_types_lock);
3395
3396         t = get_tracer_for_array(tr, trace_types);
3397         for (; t && l < *pos; t = t_next(m, t, &l))
3398                         ;
3399
3400         return t;
3401 }
3402
3403 static void t_stop(struct seq_file *m, void *p)
3404 {
3405         mutex_unlock(&trace_types_lock);
3406 }
3407
3408 static int t_show(struct seq_file *m, void *v)
3409 {
3410         struct tracer *t = v;
3411
3412         if (!t)
3413                 return 0;
3414
3415         seq_puts(m, t->name);
3416         if (t->next)
3417                 seq_putc(m, ' ');
3418         else
3419                 seq_putc(m, '\n');
3420
3421         return 0;
3422 }
3423
3424 static const struct seq_operations show_traces_seq_ops = {
3425         .start          = t_start,
3426         .next           = t_next,
3427         .stop           = t_stop,
3428         .show           = t_show,
3429 };
3430
3431 static int show_traces_open(struct inode *inode, struct file *file)
3432 {
3433         struct trace_array *tr = inode->i_private;
3434         struct seq_file *m;
3435         int ret;
3436
3437         if (tracing_disabled)
3438                 return -ENODEV;
3439
3440         ret = seq_open(file, &show_traces_seq_ops);
3441         if (ret)
3442                 return ret;
3443
3444         m = file->private_data;
3445         m->private = tr;
3446
3447         return 0;
3448 }
3449
3450 static ssize_t
3451 tracing_write_stub(struct file *filp, const char __user *ubuf,
3452                    size_t count, loff_t *ppos)
3453 {
3454         return count;
3455 }
3456
3457 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3458 {
3459         int ret;
3460
3461         if (file->f_mode & FMODE_READ)
3462                 ret = seq_lseek(file, offset, whence);
3463         else
3464                 file->f_pos = ret = 0;
3465
3466         return ret;
3467 }
3468
3469 static const struct file_operations tracing_fops = {
3470         .open           = tracing_open,
3471         .read           = seq_read,
3472         .write          = tracing_write_stub,
3473         .llseek         = tracing_lseek,
3474         .release        = tracing_release,
3475 };
3476
3477 static const struct file_operations show_traces_fops = {
3478         .open           = show_traces_open,
3479         .read           = seq_read,
3480         .release        = seq_release,
3481         .llseek         = seq_lseek,
3482 };
3483
3484 /*
3485  * The tracer itself will not take this lock, but still we want
3486  * to provide a consistent cpumask to user-space:
3487  */
3488 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3489
3490 /*
3491  * Temporary storage for the character representation of the
3492  * CPU bitmask (and one more byte for the newline):
3493  */
3494 static char mask_str[NR_CPUS + 1];
3495
3496 static ssize_t
3497 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3498                      size_t count, loff_t *ppos)
3499 {
3500         struct trace_array *tr = file_inode(filp)->i_private;
3501         int len;
3502
3503         mutex_lock(&tracing_cpumask_update_lock);
3504
3505         len = snprintf(mask_str, count, "%*pb\n",
3506                        cpumask_pr_args(tr->tracing_cpumask));
3507         if (len >= count) {
3508                 count = -EINVAL;
3509                 goto out_err;
3510         }
3511         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3512
3513 out_err:
3514         mutex_unlock(&tracing_cpumask_update_lock);
3515
3516         return count;
3517 }
3518
3519 static ssize_t
3520 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3521                       size_t count, loff_t *ppos)
3522 {
3523         struct trace_array *tr = file_inode(filp)->i_private;
3524         cpumask_var_t tracing_cpumask_new;
3525         int err, cpu;
3526
3527         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3528                 return -ENOMEM;
3529
3530         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3531         if (err)
3532                 goto err_unlock;
3533
3534         mutex_lock(&tracing_cpumask_update_lock);
3535
3536         local_irq_disable();
3537         arch_spin_lock(&tr->max_lock);
3538         for_each_tracing_cpu(cpu) {
3539                 /*
3540                  * Increase/decrease the disabled counter if we are
3541                  * about to flip a bit in the cpumask:
3542                  */
3543                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3544                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3545                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3546                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3547                 }
3548                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3549                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3550                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3551                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3552                 }
3553         }
3554         arch_spin_unlock(&tr->max_lock);
3555         local_irq_enable();
3556
3557         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3558
3559         mutex_unlock(&tracing_cpumask_update_lock);
3560         free_cpumask_var(tracing_cpumask_new);
3561
3562         return count;
3563
3564 err_unlock:
3565         free_cpumask_var(tracing_cpumask_new);
3566
3567         return err;
3568 }
3569
3570 static const struct file_operations tracing_cpumask_fops = {
3571         .open           = tracing_open_generic_tr,
3572         .read           = tracing_cpumask_read,
3573         .write          = tracing_cpumask_write,
3574         .release        = tracing_release_generic_tr,
3575         .llseek         = generic_file_llseek,
3576 };
3577
3578 static int tracing_trace_options_show(struct seq_file *m, void *v)
3579 {
3580         struct tracer_opt *trace_opts;
3581         struct trace_array *tr = m->private;
3582         u32 tracer_flags;
3583         int i;
3584
3585         mutex_lock(&trace_types_lock);
3586         tracer_flags = tr->current_trace->flags->val;
3587         trace_opts = tr->current_trace->flags->opts;
3588
3589         for (i = 0; trace_options[i]; i++) {
3590                 if (tr->trace_flags & (1 << i))
3591                         seq_printf(m, "%s\n", trace_options[i]);
3592                 else
3593                         seq_printf(m, "no%s\n", trace_options[i]);
3594         }
3595
3596         for (i = 0; trace_opts[i].name; i++) {
3597                 if (tracer_flags & trace_opts[i].bit)
3598                         seq_printf(m, "%s\n", trace_opts[i].name);
3599                 else
3600                         seq_printf(m, "no%s\n", trace_opts[i].name);
3601         }
3602         mutex_unlock(&trace_types_lock);
3603
3604         return 0;
3605 }
3606
3607 static int __set_tracer_option(struct trace_array *tr,
3608                                struct tracer_flags *tracer_flags,
3609                                struct tracer_opt *opts, int neg)
3610 {
3611         struct tracer *trace = tracer_flags->trace;
3612         int ret;
3613
3614         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3615         if (ret)
3616                 return ret;
3617
3618         if (neg)
3619                 tracer_flags->val &= ~opts->bit;
3620         else
3621                 tracer_flags->val |= opts->bit;
3622         return 0;
3623 }
3624
3625 /* Try to assign a tracer specific option */
3626 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3627 {
3628         struct tracer *trace = tr->current_trace;
3629         struct tracer_flags *tracer_flags = trace->flags;
3630         struct tracer_opt *opts = NULL;
3631         int i;
3632
3633         for (i = 0; tracer_flags->opts[i].name; i++) {
3634                 opts = &tracer_flags->opts[i];
3635
3636                 if (strcmp(cmp, opts->name) == 0)
3637                         return __set_tracer_option(tr, trace->flags, opts, neg);
3638         }
3639
3640         return -EINVAL;
3641 }
3642
3643 /* Some tracers require overwrite to stay enabled */
3644 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3645 {
3646         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3647                 return -1;
3648
3649         return 0;
3650 }
3651
3652 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3653 {
3654         /* do nothing if flag is already set */
3655         if (!!(tr->trace_flags & mask) == !!enabled)
3656                 return 0;
3657
3658         /* Give the tracer a chance to approve the change */
3659         if (tr->current_trace->flag_changed)
3660                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3661                         return -EINVAL;
3662
3663         if (enabled)
3664                 tr->trace_flags |= mask;
3665         else
3666                 tr->trace_flags &= ~mask;
3667
3668         if (mask == TRACE_ITER_RECORD_CMD)
3669                 trace_event_enable_cmd_record(enabled);
3670
3671         if (mask == TRACE_ITER_EVENT_FORK)
3672                 trace_event_follow_fork(tr, enabled);
3673
3674         if (mask == TRACE_ITER_OVERWRITE) {
3675                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3676 #ifdef CONFIG_TRACER_MAX_TRACE
3677                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3678 #endif
3679         }
3680
3681         if (mask == TRACE_ITER_PRINTK) {
3682                 trace_printk_start_stop_comm(enabled);
3683                 trace_printk_control(enabled);
3684         }
3685
3686         return 0;
3687 }
3688
3689 static int trace_set_options(struct trace_array *tr, char *option)
3690 {
3691         char *cmp;
3692         int neg = 0;
3693         int ret = -ENODEV;
3694         int i;
3695         size_t orig_len = strlen(option);
3696
3697         cmp = strstrip(option);
3698
3699         if (strncmp(cmp, "no", 2) == 0) {
3700                 neg = 1;
3701                 cmp += 2;
3702         }
3703
3704         mutex_lock(&trace_types_lock);
3705
3706         for (i = 0; trace_options[i]; i++) {
3707                 if (strcmp(cmp, trace_options[i]) == 0) {
3708                         ret = set_tracer_flag(tr, 1 << i, !neg);
3709                         break;
3710                 }
3711         }
3712
3713         /* If no option could be set, test the specific tracer options */
3714         if (!trace_options[i])
3715                 ret = set_tracer_option(tr, cmp, neg);
3716
3717         mutex_unlock(&trace_types_lock);
3718
3719         /*
3720          * If the first trailing whitespace is replaced with '\0' by strstrip,
3721          * turn it back into a space.
3722          */
3723         if (orig_len > strlen(option))
3724                 option[strlen(option)] = ' ';
3725
3726         return ret;
3727 }
3728
3729 static void __init apply_trace_boot_options(void)
3730 {
3731         char *buf = trace_boot_options_buf;
3732         char *option;
3733
3734         while (true) {
3735                 option = strsep(&buf, ",");
3736
3737                 if (!option)
3738                         break;
3739
3740                 if (*option)
3741                         trace_set_options(&global_trace, option);
3742
3743                 /* Put back the comma to allow this to be called again */
3744                 if (buf)
3745                         *(buf - 1) = ',';
3746         }
3747 }
3748
3749 static ssize_t
3750 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3751                         size_t cnt, loff_t *ppos)
3752 {
3753         struct seq_file *m = filp->private_data;
3754         struct trace_array *tr = m->private;
3755         char buf[64];
3756         int ret;
3757
3758         if (cnt >= sizeof(buf))
3759                 return -EINVAL;
3760
3761         if (copy_from_user(buf, ubuf, cnt))
3762                 return -EFAULT;
3763
3764         buf[cnt] = 0;
3765
3766         ret = trace_set_options(tr, buf);
3767         if (ret < 0)
3768                 return ret;
3769
3770         *ppos += cnt;
3771
3772         return cnt;
3773 }
3774
3775 static int tracing_trace_options_open(struct inode *inode, struct file *file)
3776 {
3777         struct trace_array *tr = inode->i_private;
3778         int ret;
3779
3780         if (tracing_disabled)
3781                 return -ENODEV;
3782
3783         if (trace_array_get(tr) < 0)
3784                 return -ENODEV;
3785
3786         ret = single_open(file, tracing_trace_options_show, inode->i_private);
3787         if (ret < 0)
3788                 trace_array_put(tr);
3789
3790         return ret;
3791 }
3792
3793 static const struct file_operations tracing_iter_fops = {
3794         .open           = tracing_trace_options_open,
3795         .read           = seq_read,
3796         .llseek         = seq_lseek,
3797         .release        = tracing_single_release_tr,
3798         .write          = tracing_trace_options_write,
3799 };
3800
3801 static const char readme_msg[] =
3802         "tracing mini-HOWTO:\n\n"
3803         "# echo 0 > tracing_on : quick way to disable tracing\n"
3804         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
3805         " Important files:\n"
3806         "  trace\t\t\t- The static contents of the buffer\n"
3807         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
3808         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
3809         "  current_tracer\t- function and latency tracers\n"
3810         "  available_tracers\t- list of configured tracers for current_tracer\n"
3811         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
3812         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
3813         "  trace_clock\t\t-change the clock used to order events\n"
3814         "       local:   Per cpu clock but may not be synced across CPUs\n"
3815         "      global:   Synced across CPUs but slows tracing down.\n"
3816         "     counter:   Not a clock, but just an increment\n"
3817         "      uptime:   Jiffy counter from time of boot\n"
3818         "        perf:   Same clock that perf events use\n"
3819 #ifdef CONFIG_X86_64
3820         "     x86-tsc:   TSC cycle counter\n"
3821 #endif
3822         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
3823         "  tracing_cpumask\t- Limit which CPUs to trace\n"
3824         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
3825         "\t\t\t  Remove sub-buffer with rmdir\n"
3826         "  trace_options\t\t- Set format or modify how tracing happens\n"
3827         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
3828         "\t\t\t  option name\n"
3829         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
3830 #ifdef CONFIG_DYNAMIC_FTRACE
3831         "\n  available_filter_functions - list of functions that can be filtered on\n"
3832         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
3833         "\t\t\t  functions\n"
3834         "\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3835         "\t     modules: Can select a group via module\n"
3836         "\t      Format: :mod:<module-name>\n"
3837         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
3838         "\t    triggers: a command to perform when function is hit\n"
3839         "\t      Format: <function>:<trigger>[:count]\n"
3840         "\t     trigger: traceon, traceoff\n"
3841         "\t\t      enable_event:<system>:<event>\n"
3842         "\t\t      disable_event:<system>:<event>\n"
3843 #ifdef CONFIG_STACKTRACE
3844         "\t\t      stacktrace\n"
3845 #endif
3846 #ifdef CONFIG_TRACER_SNAPSHOT
3847         "\t\t      snapshot\n"
3848 #endif
3849         "\t\t      dump\n"
3850         "\t\t      cpudump\n"
3851         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
3852         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
3853         "\t     The first one will disable tracing every time do_fault is hit\n"
3854         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
3855         "\t       The first time do trap is hit and it disables tracing, the\n"
3856         "\t       counter will decrement to 2. If tracing is already disabled,\n"
3857         "\t       the counter will not decrement. It only decrements when the\n"
3858         "\t       trigger did work\n"
3859         "\t     To remove trigger without count:\n"
3860         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
3861         "\t     To remove trigger with a count:\n"
3862         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
3863         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
3864         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3865         "\t    modules: Can select a group via module command :mod:\n"
3866         "\t    Does not accept triggers\n"
3867 #endif /* CONFIG_DYNAMIC_FTRACE */
3868 #ifdef CONFIG_FUNCTION_TRACER
3869         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
3870         "\t\t    (function)\n"
3871 #endif
3872 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
3873         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
3874         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
3875         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
3876 #endif
3877 #ifdef CONFIG_TRACER_SNAPSHOT
3878         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
3879         "\t\t\t  snapshot buffer. Read the contents for more\n"
3880         "\t\t\t  information\n"
3881 #endif
3882 #ifdef CONFIG_STACK_TRACER
3883         "  stack_trace\t\t- Shows the max stack trace when active\n"
3884         "  stack_max_size\t- Shows current max stack size that was traced\n"
3885         "\t\t\t  Write into this file to reset the max size (trigger a\n"
3886         "\t\t\t  new trace)\n"
3887 #ifdef CONFIG_DYNAMIC_FTRACE
3888         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
3889         "\t\t\t  traces\n"
3890 #endif
3891 #endif /* CONFIG_STACK_TRACER */
3892         "  events/\t\t- Directory containing all trace event subsystems:\n"
3893         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
3894         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
3895         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
3896         "\t\t\t  events\n"
3897         "      filter\t\t- If set, only events passing filter are traced\n"
3898         "  events/<system>/<event>/\t- Directory containing control files for\n"
3899         "\t\t\t  <event>:\n"
3900         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
3901         "      filter\t\t- If set, only events passing filter are traced\n"
3902         "      trigger\t\t- If set, a command to perform when event is hit\n"
3903         "\t    Format: <trigger>[:count][if <filter>]\n"
3904         "\t   trigger: traceon, traceoff\n"
3905         "\t            enable_event:<system>:<event>\n"
3906         "\t            disable_event:<system>:<event>\n"
3907 #ifdef CONFIG_HIST_TRIGGERS
3908         "\t            enable_hist:<system>:<event>\n"
3909         "\t            disable_hist:<system>:<event>\n"
3910 #endif
3911 #ifdef CONFIG_STACKTRACE
3912         "\t\t    stacktrace\n"
3913 #endif
3914 #ifdef CONFIG_TRACER_SNAPSHOT
3915         "\t\t    snapshot\n"
3916 #endif
3917 #ifdef CONFIG_HIST_TRIGGERS
3918         "\t\t    hist (see below)\n"
3919 #endif
3920         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
3921         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
3922         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
3923         "\t                  events/block/block_unplug/trigger\n"
3924         "\t   The first disables tracing every time block_unplug is hit.\n"
3925         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
3926         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
3927         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
3928         "\t   Like function triggers, the counter is only decremented if it\n"
3929         "\t    enabled or disabled tracing.\n"
3930         "\t   To remove a trigger without a count:\n"
3931         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
3932         "\t   To remove a trigger with a count:\n"
3933         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
3934         "\t   Filters can be ignored when removing a trigger.\n"
3935 #ifdef CONFIG_HIST_TRIGGERS
3936         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
3937         "\t    Format: hist:keys=<field1[,field2,...]>\n"
3938         "\t            [:values=<field1[,field2,...]>]\n"
3939         "\t            [:sort=<field1[,field2,...]>]\n"
3940         "\t            [:size=#entries]\n"
3941         "\t            [:pause][:continue][:clear]\n"
3942         "\t            [:name=histname1]\n"
3943         "\t            [if <filter>]\n\n"
3944         "\t    When a matching event is hit, an entry is added to a hash\n"
3945         "\t    table using the key(s) and value(s) named, and the value of a\n"
3946         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
3947         "\t    correspond to fields in the event's format description.  Keys\n"
3948         "\t    can be any field, or the special string 'stacktrace'.\n"
3949         "\t    Compound keys consisting of up to two fields can be specified\n"
3950         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
3951         "\t    fields.  Sort keys consisting of up to two fields can be\n"
3952         "\t    specified using the 'sort' keyword.  The sort direction can\n"
3953         "\t    be modified by appending '.descending' or '.ascending' to a\n"
3954         "\t    sort field.  The 'size' parameter can be used to specify more\n"
3955         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
3956         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
3957         "\t    its histogram data will be shared with other triggers of the\n"
3958         "\t    same name, and trigger hits will update this common data.\n\n"
3959         "\t    Reading the 'hist' file for the event will dump the hash\n"
3960         "\t    table in its entirety to stdout.  If there are multiple hist\n"
3961         "\t    triggers attached to an event, there will be a table for each\n"
3962         "\t    trigger in the output.  The table displayed for a named\n"
3963         "\t    trigger will be the same as any other instance having the\n"
3964         "\t    same name.  The default format used to display a given field\n"
3965         "\t    can be modified by appending any of the following modifiers\n"
3966         "\t    to the field name, as applicable:\n\n"
3967         "\t            .hex        display a number as a hex value\n"
3968         "\t            .sym        display an address as a symbol\n"
3969         "\t            .sym-offset display an address as a symbol and offset\n"
3970         "\t            .execname   display a common_pid as a program name\n"
3971         "\t            .syscall    display a syscall id as a syscall name\n\n"
3972         "\t            .log2       display log2 value rather than raw number\n\n"
3973         "\t    The 'pause' parameter can be used to pause an existing hist\n"
3974         "\t    trigger or to start a hist trigger but not log any events\n"
3975         "\t    until told to do so.  'continue' can be used to start or\n"
3976         "\t    restart a paused hist trigger.\n\n"
3977         "\t    The 'clear' parameter will clear the contents of a running\n"
3978         "\t    hist trigger and leave its current paused/active state\n"
3979         "\t    unchanged.\n\n"
3980         "\t    The enable_hist and disable_hist triggers can be used to\n"
3981         "\t    have one event conditionally start and stop another event's\n"
3982         "\t    already-attached hist trigger.  The syntax is analagous to\n"
3983         "\t    the enable_event and disable_event triggers.\n"
3984 #endif
3985 ;
3986
3987 static ssize_t
3988 tracing_readme_read(struct file *filp, char __user *ubuf,
3989                        size_t cnt, loff_t *ppos)
3990 {
3991         return simple_read_from_buffer(ubuf, cnt, ppos,
3992                                         readme_msg, strlen(readme_msg));
3993 }
3994
3995 static const struct file_operations tracing_readme_fops = {
3996         .open           = tracing_open_generic,
3997         .read           = tracing_readme_read,
3998         .llseek         = generic_file_llseek,
3999 };
4000
4001 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4002 {
4003         unsigned int *ptr = v;
4004
4005         if (*pos || m->count)
4006                 ptr++;
4007
4008         (*pos)++;
4009
4010         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4011              ptr++) {
4012                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4013                         continue;
4014
4015                 return ptr;
4016         }
4017
4018         return NULL;
4019 }
4020
4021 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4022 {
4023         void *v;
4024         loff_t l = 0;
4025
4026         preempt_disable();
4027         arch_spin_lock(&trace_cmdline_lock);
4028
4029         v = &savedcmd->map_cmdline_to_pid[0];
4030         while (l <= *pos) {
4031                 v = saved_cmdlines_next(m, v, &l);
4032                 if (!v)
4033                         return NULL;
4034         }
4035
4036         return v;
4037 }
4038
4039 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4040 {
4041         arch_spin_unlock(&trace_cmdline_lock);
4042         preempt_enable();
4043 }
4044
4045 static int saved_cmdlines_show(struct seq_file *m, void *v)
4046 {
4047         char buf[TASK_COMM_LEN];
4048         unsigned int *pid = v;
4049
4050         __trace_find_cmdline(*pid, buf);
4051         seq_printf(m, "%d %s\n", *pid, buf);
4052         return 0;
4053 }
4054
4055 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4056         .start          = saved_cmdlines_start,
4057         .next           = saved_cmdlines_next,
4058         .stop           = saved_cmdlines_stop,
4059         .show           = saved_cmdlines_show,
4060 };
4061
4062 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4063 {
4064         if (tracing_disabled)
4065                 return -ENODEV;
4066
4067         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4068 }
4069
4070 static const struct file_operations tracing_saved_cmdlines_fops = {
4071         .open           = tracing_saved_cmdlines_open,
4072         .read           = seq_read,
4073         .llseek         = seq_lseek,
4074         .release        = seq_release,
4075 };
4076
4077 static ssize_t
4078 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4079                                  size_t cnt, loff_t *ppos)
4080 {
4081         char buf[64];
4082         int r;
4083
4084         arch_spin_lock(&trace_cmdline_lock);
4085         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4086         arch_spin_unlock(&trace_cmdline_lock);
4087
4088         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4089 }
4090
4091 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4092 {
4093         kfree(s->saved_cmdlines);
4094         kfree(s->map_cmdline_to_pid);
4095         kfree(s);
4096 }
4097
4098 static int tracing_resize_saved_cmdlines(unsigned int val)
4099 {
4100         struct saved_cmdlines_buffer *s, *savedcmd_temp;
4101
4102         s = kmalloc(sizeof(*s), GFP_KERNEL);
4103         if (!s)
4104                 return -ENOMEM;
4105
4106         if (allocate_cmdlines_buffer(val, s) < 0) {
4107                 kfree(s);
4108                 return -ENOMEM;
4109         }
4110
4111         arch_spin_lock(&trace_cmdline_lock);
4112         savedcmd_temp = savedcmd;
4113         savedcmd = s;
4114         arch_spin_unlock(&trace_cmdline_lock);
4115         free_saved_cmdlines_buffer(savedcmd_temp);
4116
4117         return 0;
4118 }
4119
4120 static ssize_t
4121 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4122                                   size_t cnt, loff_t *ppos)
4123 {
4124         unsigned long val;
4125         int ret;
4126
4127         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4128         if (ret)
4129                 return ret;
4130
4131         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4132         if (!val || val > PID_MAX_DEFAULT)
4133                 return -EINVAL;
4134
4135         ret = tracing_resize_saved_cmdlines((unsigned int)val);
4136         if (ret < 0)
4137                 return ret;
4138
4139         *ppos += cnt;
4140
4141         return cnt;
4142 }
4143
4144 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4145         .open           = tracing_open_generic,
4146         .read           = tracing_saved_cmdlines_size_read,
4147         .write          = tracing_saved_cmdlines_size_write,
4148 };
4149
4150 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
4151 static union trace_enum_map_item *
4152 update_enum_map(union trace_enum_map_item *ptr)
4153 {
4154         if (!ptr->map.enum_string) {
4155                 if (ptr->tail.next) {
4156                         ptr = ptr->tail.next;
4157                         /* Set ptr to the next real item (skip head) */
4158                         ptr++;
4159                 } else
4160                         return NULL;
4161         }
4162         return ptr;
4163 }
4164
4165 static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos)
4166 {
4167         union trace_enum_map_item *ptr = v;
4168
4169         /*
4170          * Paranoid! If ptr points to end, we don't want to increment past it.
4171          * This really should never happen.
4172          */
4173         ptr = update_enum_map(ptr);
4174         if (WARN_ON_ONCE(!ptr))
4175                 return NULL;
4176
4177         ptr++;
4178
4179         (*pos)++;
4180
4181         ptr = update_enum_map(ptr);
4182
4183         return ptr;
4184 }
4185
4186 static void *enum_map_start(struct seq_file *m, loff_t *pos)
4187 {
4188         union trace_enum_map_item *v;
4189         loff_t l = 0;
4190
4191         mutex_lock(&trace_enum_mutex);
4192
4193         v = trace_enum_maps;
4194         if (v)
4195                 v++;
4196
4197         while (v && l < *pos) {
4198                 v = enum_map_next(m, v, &l);
4199         }
4200
4201         return v;
4202 }
4203
4204 static void enum_map_stop(struct seq_file *m, void *v)
4205 {
4206         mutex_unlock(&trace_enum_mutex);
4207 }
4208
4209 static int enum_map_show(struct seq_file *m, void *v)
4210 {
4211         union trace_enum_map_item *ptr = v;
4212
4213         seq_printf(m, "%s %ld (%s)\n",
4214                    ptr->map.enum_string, ptr->map.enum_value,
4215                    ptr->map.system);
4216
4217         return 0;
4218 }
4219
4220 static const struct seq_operations tracing_enum_map_seq_ops = {
4221         .start          = enum_map_start,
4222         .next           = enum_map_next,
4223         .stop           = enum_map_stop,
4224         .show           = enum_map_show,
4225 };
4226
4227 static int tracing_enum_map_open(struct inode *inode, struct file *filp)
4228 {
4229         if (tracing_disabled)
4230                 return -ENODEV;
4231
4232         return seq_open(filp, &tracing_enum_map_seq_ops);
4233 }
4234
4235 static const struct file_operations tracing_enum_map_fops = {
4236         .open           = tracing_enum_map_open,
4237         .read           = seq_read,
4238         .llseek         = seq_lseek,
4239         .release        = seq_release,
4240 };
4241
4242 static inline union trace_enum_map_item *
4243 trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
4244 {
4245         /* Return tail of array given the head */
4246         return ptr + ptr->head.length + 1;
4247 }
4248
4249 static void
4250 trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
4251                            int len)
4252 {
4253         struct trace_enum_map **stop;
4254         struct trace_enum_map **map;
4255         union trace_enum_map_item *map_array;
4256         union trace_enum_map_item *ptr;
4257
4258         stop = start + len;
4259
4260         /*
4261          * The trace_enum_maps contains the map plus a head and tail item,
4262          * where the head holds the module and length of array, and the
4263          * tail holds a pointer to the next list.
4264          */
4265         map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4266         if (!map_array) {
4267                 pr_warn("Unable to allocate trace enum mapping\n");
4268                 return;
4269         }
4270
4271         mutex_lock(&trace_enum_mutex);
4272
4273         if (!trace_enum_maps)
4274                 trace_enum_maps = map_array;
4275         else {
4276                 ptr = trace_enum_maps;
4277                 for (;;) {
4278                         ptr = trace_enum_jmp_to_tail(ptr);
4279                         if (!ptr->tail.next)
4280                                 break;
4281                         ptr = ptr->tail.next;
4282
4283                 }
4284                 ptr->tail.next = map_array;
4285         }
4286         map_array->head.mod = mod;
4287         map_array->head.length = len;
4288         map_array++;
4289
4290         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4291                 map_array->map = **map;
4292                 map_array++;
4293         }
4294         memset(map_array, 0, sizeof(*map_array));
4295
4296         mutex_unlock(&trace_enum_mutex);
4297 }
4298
4299 static void trace_create_enum_file(struct dentry *d_tracer)
4300 {
4301         trace_create_file("enum_map", 0444, d_tracer,
4302                           NULL, &tracing_enum_map_fops);
4303 }
4304
4305 #else /* CONFIG_TRACE_ENUM_MAP_FILE */
4306 static inline void trace_create_enum_file(struct dentry *d_tracer) { }
4307 static inline void trace_insert_enum_map_file(struct module *mod,
4308                               struct trace_enum_map **start, int len) { }
4309 #endif /* !CONFIG_TRACE_ENUM_MAP_FILE */
4310
4311 static void trace_insert_enum_map(struct module *mod,
4312                                   struct trace_enum_map **start, int len)
4313 {
4314         struct trace_enum_map **map;
4315
4316         if (len <= 0)
4317                 return;
4318
4319         map = start;
4320
4321         trace_event_enum_update(map, len);
4322
4323         trace_insert_enum_map_file(mod, start, len);
4324 }
4325
4326 static ssize_t
4327 tracing_set_trace_read(struct file *filp, char __user *ubuf,
4328                        size_t cnt, loff_t *ppos)
4329 {
4330         struct trace_array *tr = filp->private_data;
4331         char buf[MAX_TRACER_SIZE+2];
4332         int r;
4333
4334         mutex_lock(&trace_types_lock);
4335         r = sprintf(buf, "%s\n", tr->current_trace->name);
4336         mutex_unlock(&trace_types_lock);
4337
4338         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4339 }
4340
4341 int tracer_init(struct tracer *t, struct trace_array *tr)
4342 {
4343         tracing_reset_online_cpus(&tr->trace_buffer);
4344         return t->init(tr);
4345 }
4346
4347 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4348 {
4349         int cpu;
4350
4351         for_each_tracing_cpu(cpu)
4352                 per_cpu_ptr(buf->data, cpu)->entries = val;
4353 }
4354
4355 #ifdef CONFIG_TRACER_MAX_TRACE
4356 /* resize @tr's buffer to the size of @size_tr's entries */
4357 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
4358                                         struct trace_buffer *size_buf, int cpu_id)
4359 {
4360         int cpu, ret = 0;
4361
4362         if (cpu_id == RING_BUFFER_ALL_CPUS) {
4363                 for_each_tracing_cpu(cpu) {
4364                         ret = ring_buffer_resize(trace_buf->buffer,
4365                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4366                         if (ret < 0)
4367                                 break;
4368                         per_cpu_ptr(trace_buf->data, cpu)->entries =
4369                                 per_cpu_ptr(size_buf->data, cpu)->entries;
4370                 }
4371         } else {
4372                 ret = ring_buffer_resize(trace_buf->buffer,
4373                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4374                 if (ret == 0)
4375                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
4376                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
4377         }
4378
4379         return ret;
4380 }
4381 #endif /* CONFIG_TRACER_MAX_TRACE */
4382
4383 static int __tracing_resize_ring_buffer(struct trace_array *tr,
4384                                         unsigned long size, int cpu)
4385 {
4386         int ret;
4387
4388         /*
4389          * If kernel or user changes the size of the ring buffer
4390          * we use the size that was given, and we can forget about
4391          * expanding it later.
4392          */
4393         ring_buffer_expanded = true;
4394
4395         /* May be called before buffers are initialized */
4396         if (!tr->trace_buffer.buffer)
4397                 return 0;
4398
4399         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
4400         if (ret < 0)
4401                 return ret;
4402
4403 #ifdef CONFIG_TRACER_MAX_TRACE
4404         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
4405             !tr->current_trace->use_max_tr)
4406                 goto out;
4407
4408         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
4409         if (ret < 0) {
4410                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
4411                                                      &tr->trace_buffer, cpu);
4412                 if (r < 0) {
4413                         /*
4414                          * AARGH! We are left with different
4415                          * size max buffer!!!!
4416                          * The max buffer is our "snapshot" buffer.
4417                          * When a tracer needs a snapshot (one of the
4418                          * latency tracers), it swaps the max buffer
4419                          * with the saved snap shot. We succeeded to
4420                          * update the size of the main buffer, but failed to
4421                          * update the size of the max buffer. But when we tried
4422                          * to reset the main buffer to the original size, we
4423                          * failed there too. This is very unlikely to
4424                          * happen, but if it does, warn and kill all
4425                          * tracing.
4426                          */
4427                         WARN_ON(1);
4428                         tracing_disabled = 1;
4429                 }
4430                 return ret;
4431         }
4432
4433         if (cpu == RING_BUFFER_ALL_CPUS)
4434                 set_buffer_entries(&tr->max_buffer, size);
4435         else
4436                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4437
4438  out:
4439 #endif /* CONFIG_TRACER_MAX_TRACE */
4440
4441         if (cpu == RING_BUFFER_ALL_CPUS)
4442                 set_buffer_entries(&tr->trace_buffer, size);
4443         else
4444                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4445
4446         return ret;
4447 }
4448
4449 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4450                                           unsigned long size, int cpu_id)
4451 {
4452         int ret = size;
4453
4454         mutex_lock(&trace_types_lock);
4455
4456         if (cpu_id != RING_BUFFER_ALL_CPUS) {
4457                 /* make sure, this cpu is enabled in the mask */
4458                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4459                         ret = -EINVAL;
4460                         goto out;
4461                 }
4462         }
4463
4464         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4465         if (ret < 0)
4466                 ret = -ENOMEM;
4467
4468 out:
4469         mutex_unlock(&trace_types_lock);
4470
4471         return ret;
4472 }
4473
4474
4475 /**
4476  * tracing_update_buffers - used by tracing facility to expand ring buffers
4477  *
4478  * To save on memory when the tracing is never used on a system with it
4479  * configured in. The ring buffers are set to a minimum size. But once
4480  * a user starts to use the tracing facility, then they need to grow
4481  * to their default size.
4482  *
4483  * This function is to be called when a tracer is about to be used.
4484  */
4485 int tracing_update_buffers(void)
4486 {
4487         int ret = 0;
4488
4489         mutex_lock(&trace_types_lock);
4490         if (!ring_buffer_expanded)
4491                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4492                                                 RING_BUFFER_ALL_CPUS);
4493         mutex_unlock(&trace_types_lock);
4494
4495         return ret;
4496 }
4497
4498 struct trace_option_dentry;
4499
4500 static void
4501 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4502
4503 /*
4504  * Used to clear out the tracer before deletion of an instance.
4505  * Must have trace_types_lock held.
4506  */
4507 static void tracing_set_nop(struct trace_array *tr)
4508 {
4509         if (tr->current_trace == &nop_trace)
4510                 return;
4511         
4512         tr->current_trace->enabled--;
4513
4514         if (tr->current_trace->reset)
4515                 tr->current_trace->reset(tr);
4516
4517         tr->current_trace = &nop_trace;
4518 }
4519
4520 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
4521 {
4522         /* Only enable if the directory has been created already. */
4523         if (!tr->dir)
4524                 return;
4525
4526         create_trace_option_files(tr, t);
4527 }
4528
4529 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
4530 {
4531         struct tracer *t;
4532 #ifdef CONFIG_TRACER_MAX_TRACE
4533         bool had_max_tr;
4534 #endif
4535         int ret = 0;
4536
4537         mutex_lock(&trace_types_lock);
4538
4539         if (!ring_buffer_expanded) {
4540                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
4541                                                 RING_BUFFER_ALL_CPUS);
4542                 if (ret < 0)
4543                         goto out;
4544                 ret = 0;
4545         }
4546
4547         for (t = trace_types; t; t = t->next) {
4548                 if (strcmp(t->name, buf) == 0)
4549                         break;
4550         }
4551         if (!t) {
4552                 ret = -EINVAL;
4553                 goto out;
4554         }
4555         if (t == tr->current_trace)
4556                 goto out;
4557
4558         /* Some tracers are only allowed for the top level buffer */
4559         if (!trace_ok_for_array(t, tr)) {
4560                 ret = -EINVAL;
4561                 goto out;
4562         }
4563
4564         /* If trace pipe files are being read, we can't change the tracer */
4565         if (tr->current_trace->ref) {
4566                 ret = -EBUSY;
4567                 goto out;
4568         }
4569
4570         trace_branch_disable();
4571
4572         tr->current_trace->enabled--;
4573
4574         if (tr->current_trace->reset)
4575                 tr->current_trace->reset(tr);
4576
4577         /* Current trace needs to be nop_trace before synchronize_sched */
4578         tr->current_trace = &nop_trace;
4579
4580 #ifdef CONFIG_TRACER_MAX_TRACE
4581         had_max_tr = tr->allocated_snapshot;
4582
4583         if (had_max_tr && !t->use_max_tr) {
4584                 /*
4585                  * We need to make sure that the update_max_tr sees that
4586                  * current_trace changed to nop_trace to keep it from
4587                  * swapping the buffers after we resize it.
4588                  * The update_max_tr is called from interrupts disabled
4589                  * so a synchronized_sched() is sufficient.
4590                  */
4591                 synchronize_sched();
4592                 free_snapshot(tr);
4593         }
4594 #endif
4595
4596 #ifdef CONFIG_TRACER_MAX_TRACE
4597         if (t->use_max_tr && !had_max_tr) {
4598                 ret = alloc_snapshot(tr);
4599                 if (ret < 0)
4600                         goto out;
4601         }
4602 #endif
4603
4604         if (t->init) {
4605                 ret = tracer_init(t, tr);
4606                 if (ret)
4607                         goto out;
4608         }
4609
4610         tr->current_trace = t;
4611         tr->current_trace->enabled++;
4612         trace_branch_enable(tr);
4613  out:
4614         mutex_unlock(&trace_types_lock);
4615
4616         return ret;
4617 }
4618
4619 static ssize_t
4620 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
4621                         size_t cnt, loff_t *ppos)
4622 {
4623         struct trace_array *tr = filp->private_data;
4624         char buf[MAX_TRACER_SIZE+1];
4625         int i;
4626         size_t ret;
4627         int err;
4628
4629         ret = cnt;
4630
4631         if (cnt > MAX_TRACER_SIZE)
4632                 cnt = MAX_TRACER_SIZE;
4633
4634         if (copy_from_user(buf, ubuf, cnt))
4635                 return -EFAULT;
4636
4637         buf[cnt] = 0;
4638
4639         /* strip ending whitespace. */
4640         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4641                 buf[i] = 0;
4642
4643         err = tracing_set_tracer(tr, buf);
4644         if (err)
4645                 return err;
4646
4647         *ppos += ret;
4648
4649         return ret;
4650 }
4651
4652 static ssize_t
4653 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
4654                    size_t cnt, loff_t *ppos)
4655 {
4656         char buf[64];
4657         int r;
4658
4659         r = snprintf(buf, sizeof(buf), "%ld\n",
4660                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4661         if (r > sizeof(buf))
4662                 r = sizeof(buf);
4663         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4664 }
4665
4666 static ssize_t
4667 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
4668                     size_t cnt, loff_t *ppos)
4669 {
4670         unsigned long val;
4671         int ret;
4672
4673         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4674         if (ret)
4675                 return ret;
4676
4677         *ptr = val * 1000;
4678
4679         return cnt;
4680 }
4681
4682 static ssize_t
4683 tracing_thresh_read(struct file *filp, char __user *ubuf,
4684                     size_t cnt, loff_t *ppos)
4685 {
4686         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
4687 }
4688
4689 static ssize_t
4690 tracing_thresh_write(struct file *filp, const char __user *ubuf,
4691                      size_t cnt, loff_t *ppos)
4692 {
4693         struct trace_array *tr = filp->private_data;
4694         int ret;
4695
4696         mutex_lock(&trace_types_lock);
4697         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
4698         if (ret < 0)
4699                 goto out;
4700
4701         if (tr->current_trace->update_thresh) {
4702                 ret = tr->current_trace->update_thresh(tr);
4703                 if (ret < 0)
4704                         goto out;
4705         }
4706
4707         ret = cnt;
4708 out:
4709         mutex_unlock(&trace_types_lock);
4710
4711         return ret;
4712 }
4713
4714 #ifdef CONFIG_TRACER_MAX_TRACE
4715
4716 static ssize_t
4717 tracing_max_lat_read(struct file *filp, char __user *ubuf,
4718                      size_t cnt, loff_t *ppos)
4719 {
4720         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
4721 }
4722
4723 static ssize_t
4724 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4725                       size_t cnt, loff_t *ppos)
4726 {
4727         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
4728 }
4729
4730 #endif
4731
4732 static int tracing_open_pipe(struct inode *inode, struct file *filp)
4733 {
4734         struct trace_array *tr = inode->i_private;
4735         struct trace_iterator *iter;
4736         int ret = 0;
4737
4738         if (tracing_disabled)
4739                 return -ENODEV;
4740
4741         if (trace_array_get(tr) < 0)
4742                 return -ENODEV;
4743
4744         mutex_lock(&trace_types_lock);
4745
4746         /* create a buffer to store the information to pass to userspace */
4747         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4748         if (!iter) {
4749                 ret = -ENOMEM;
4750                 __trace_array_put(tr);
4751                 goto out;
4752         }
4753
4754         trace_seq_init(&iter->seq);
4755         iter->trace = tr->current_trace;
4756
4757         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
4758                 ret = -ENOMEM;
4759                 goto fail;
4760         }
4761
4762         /* trace pipe does not show start of buffer */
4763         cpumask_setall(iter->started);
4764
4765         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4766                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4767
4768         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4769         if (trace_clocks[tr->clock_id].in_ns)
4770                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4771
4772         iter->tr = tr;
4773         iter->trace_buffer = &tr->trace_buffer;
4774         iter->cpu_file = tracing_get_cpu(inode);
4775         mutex_init(&iter->mutex);
4776         filp->private_data = iter;
4777
4778         if (iter->trace->pipe_open)
4779                 iter->trace->pipe_open(iter);
4780
4781         nonseekable_open(inode, filp);
4782
4783         tr->current_trace->ref++;
4784 out:
4785         mutex_unlock(&trace_types_lock);
4786         return ret;
4787
4788 fail:
4789         kfree(iter->trace);
4790         kfree(iter);
4791         __trace_array_put(tr);
4792         mutex_unlock(&trace_types_lock);
4793         return ret;
4794 }
4795
4796 static int tracing_release_pipe(struct inode *inode, struct file *file)
4797 {
4798         struct trace_iterator *iter = file->private_data;
4799         struct trace_array *tr = inode->i_private;
4800
4801         mutex_lock(&trace_types_lock);
4802
4803         tr->current_trace->ref--;
4804
4805         if (iter->trace->pipe_close)
4806                 iter->trace->pipe_close(iter);
4807
4808         mutex_unlock(&trace_types_lock);
4809
4810         free_cpumask_var(iter->started);
4811         mutex_destroy(&iter->mutex);
4812         kfree(iter);
4813
4814         trace_array_put(tr);
4815
4816         return 0;
4817 }
4818
4819 static unsigned int
4820 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
4821 {
4822         struct trace_array *tr = iter->tr;
4823
4824         /* Iterators are static, they should be filled or empty */
4825         if (trace_buffer_iter(iter, iter->cpu_file))
4826                 return POLLIN | POLLRDNORM;
4827
4828         if (tr->trace_flags & TRACE_ITER_BLOCK)
4829                 /*
4830                  * Always select as readable when in blocking mode
4831                  */
4832                 return POLLIN | POLLRDNORM;
4833         else
4834                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
4835                                              filp, poll_table);
4836 }
4837
4838 static unsigned int
4839 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
4840 {
4841         struct trace_iterator *iter = filp->private_data;
4842
4843         return trace_poll(iter, filp, poll_table);
4844 }
4845
4846 /* Must be called with iter->mutex held. */
4847 static int tracing_wait_pipe(struct file *filp)
4848 {
4849         struct trace_iterator *iter = filp->private_data;
4850         int ret;
4851
4852         while (trace_empty(iter)) {
4853
4854                 if ((filp->f_flags & O_NONBLOCK)) {
4855                         return -EAGAIN;
4856                 }
4857
4858                 /*
4859                  * We block until we read something and tracing is disabled.
4860                  * We still block if tracing is disabled, but we have never
4861                  * read anything. This allows a user to cat this file, and
4862                  * then enable tracing. But after we have read something,
4863                  * we give an EOF when tracing is again disabled.
4864                  *
4865                  * iter->pos will be 0 if we haven't read anything.
4866                  */
4867                 if (!tracing_is_on() && iter->pos)
4868                         break;
4869
4870                 mutex_unlock(&iter->mutex);
4871
4872                 ret = wait_on_pipe(iter, false);
4873
4874                 mutex_lock(&iter->mutex);
4875
4876                 if (ret)
4877                         return ret;
4878         }
4879
4880         return 1;
4881 }
4882
4883 /*
4884  * Consumer reader.
4885  */
4886 static ssize_t
4887 tracing_read_pipe(struct file *filp, char __user *ubuf,
4888                   size_t cnt, loff_t *ppos)
4889 {
4890         struct trace_iterator *iter = filp->private_data;
4891         ssize_t sret;
4892
4893         /*
4894          * Avoid more than one consumer on a single file descriptor
4895          * This is just a matter of traces coherency, the ring buffer itself
4896          * is protected.
4897          */
4898         mutex_lock(&iter->mutex);
4899
4900         /* return any leftover data */
4901         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4902         if (sret != -EBUSY)
4903                 goto out;
4904
4905         trace_seq_init(&iter->seq);
4906
4907         if (iter->trace->read) {
4908                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
4909                 if (sret)
4910                         goto out;
4911         }
4912
4913 waitagain:
4914         sret = tracing_wait_pipe(filp);
4915         if (sret <= 0)
4916                 goto out;
4917
4918         /* stop when tracing is finished */
4919         if (trace_empty(iter)) {
4920                 sret = 0;
4921                 goto out;
4922         }
4923
4924         if (cnt >= PAGE_SIZE)
4925                 cnt = PAGE_SIZE - 1;
4926
4927         /* reset all but tr, trace, and overruns */
4928         memset(&iter->seq, 0,
4929                sizeof(struct trace_iterator) -
4930                offsetof(struct trace_iterator, seq));
4931         cpumask_clear(iter->started);
4932         iter->pos = -1;
4933
4934         trace_event_read_lock();
4935         trace_access_lock(iter->cpu_file);
4936         while (trace_find_next_entry_inc(iter) != NULL) {
4937                 enum print_line_t ret;
4938                 int save_len = iter->seq.seq.len;
4939
4940                 ret = print_trace_line(iter);
4941                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4942                         /* don't print partial lines */
4943                         iter->seq.seq.len = save_len;
4944                         break;
4945                 }
4946                 if (ret != TRACE_TYPE_NO_CONSUME)
4947                         trace_consume(iter);
4948
4949                 if (trace_seq_used(&iter->seq) >= cnt)
4950                         break;
4951
4952                 /*
4953                  * Setting the full flag means we reached the trace_seq buffer
4954                  * size and we should leave by partial output condition above.
4955                  * One of the trace_seq_* functions is not used properly.
4956                  */
4957                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
4958                           iter->ent->type);
4959         }
4960         trace_access_unlock(iter->cpu_file);
4961         trace_event_read_unlock();
4962
4963         /* Now copy what we have to the user */
4964         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4965         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
4966                 trace_seq_init(&iter->seq);
4967
4968         /*
4969          * If there was nothing to send to user, in spite of consuming trace
4970          * entries, go back to wait for more entries.
4971          */
4972         if (sret == -EBUSY)
4973                 goto waitagain;
4974
4975 out:
4976         mutex_unlock(&iter->mutex);
4977
4978         return sret;
4979 }
4980
4981 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
4982                                      unsigned int idx)
4983 {
4984         __free_page(spd->pages[idx]);
4985 }
4986
4987 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
4988         .can_merge              = 0,
4989         .confirm                = generic_pipe_buf_confirm,
4990         .release                = generic_pipe_buf_release,
4991         .steal                  = generic_pipe_buf_steal,
4992         .get                    = generic_pipe_buf_get,
4993 };
4994
4995 static size_t
4996 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
4997 {
4998         size_t count;
4999         int save_len;
5000         int ret;
5001
5002         /* Seq buffer is page-sized, exactly what we need. */
5003         for (;;) {
5004                 save_len = iter->seq.seq.len;
5005                 ret = print_trace_line(iter);
5006
5007                 if (trace_seq_has_overflowed(&iter->seq)) {
5008                         iter->seq.seq.len = save_len;
5009                         break;
5010                 }
5011
5012                 /*
5013                  * This should not be hit, because it should only
5014                  * be set if the iter->seq overflowed. But check it
5015                  * anyway to be safe.
5016                  */
5017                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5018                         iter->seq.seq.len = save_len;
5019                         break;
5020                 }
5021
5022                 count = trace_seq_used(&iter->seq) - save_len;
5023                 if (rem < count) {
5024                         rem = 0;
5025                         iter->seq.seq.len = save_len;
5026                         break;
5027                 }
5028
5029                 if (ret != TRACE_TYPE_NO_CONSUME)
5030                         trace_consume(iter);
5031                 rem -= count;
5032                 if (!trace_find_next_entry_inc(iter))   {
5033                         rem = 0;
5034                         iter->ent = NULL;
5035                         break;
5036                 }
5037         }
5038
5039         return rem;
5040 }
5041
5042 static ssize_t tracing_splice_read_pipe(struct file *filp,
5043                                         loff_t *ppos,
5044                                         struct pipe_inode_info *pipe,
5045                                         size_t len,
5046                                         unsigned int flags)
5047 {
5048         struct page *pages_def[PIPE_DEF_BUFFERS];
5049         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5050         struct trace_iterator *iter = filp->private_data;
5051         struct splice_pipe_desc spd = {
5052                 .pages          = pages_def,
5053                 .partial        = partial_def,
5054                 .nr_pages       = 0, /* This gets updated below. */
5055                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5056                 .flags          = flags,
5057                 .ops            = &tracing_pipe_buf_ops,
5058                 .spd_release    = tracing_spd_release_pipe,
5059         };
5060         ssize_t ret;
5061         size_t rem;
5062         unsigned int i;
5063
5064         if (splice_grow_spd(pipe, &spd))
5065                 return -ENOMEM;
5066
5067         mutex_lock(&iter->mutex);
5068
5069         if (iter->trace->splice_read) {
5070                 ret = iter->trace->splice_read(iter, filp,
5071                                                ppos, pipe, len, flags);
5072                 if (ret)
5073                         goto out_err;
5074         }
5075
5076         ret = tracing_wait_pipe(filp);
5077         if (ret <= 0)
5078                 goto out_err;
5079
5080         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5081                 ret = -EFAULT;
5082                 goto out_err;
5083         }
5084
5085         trace_event_read_lock();
5086         trace_access_lock(iter->cpu_file);
5087
5088         /* Fill as many pages as possible. */
5089         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5090                 spd.pages[i] = alloc_page(GFP_KERNEL);
5091                 if (!spd.pages[i])
5092                         break;
5093
5094                 rem = tracing_fill_pipe_page(rem, iter);
5095
5096                 /* Copy the data into the page, so we can start over. */
5097                 ret = trace_seq_to_buffer(&iter->seq,
5098                                           page_address(spd.pages[i]),
5099                                           trace_seq_used(&iter->seq));
5100                 if (ret < 0) {
5101                         __free_page(spd.pages[i]);
5102                         break;
5103                 }
5104                 spd.partial[i].offset = 0;
5105                 spd.partial[i].len = trace_seq_used(&iter->seq);
5106
5107                 trace_seq_init(&iter->seq);
5108         }
5109
5110         trace_access_unlock(iter->cpu_file);
5111         trace_event_read_unlock();
5112         mutex_unlock(&iter->mutex);
5113
5114         spd.nr_pages = i;
5115
5116         if (i)
5117                 ret = splice_to_pipe(pipe, &spd);
5118         else
5119                 ret = 0;
5120 out:
5121         splice_shrink_spd(&spd);
5122         return ret;
5123
5124 out_err:
5125         mutex_unlock(&iter->mutex);
5126         goto out;
5127 }
5128
5129 static ssize_t
5130 tracing_entries_read(struct file *filp, char __user *ubuf,
5131                      size_t cnt, loff_t *ppos)
5132 {
5133         struct inode *inode = file_inode(filp);
5134         struct trace_array *tr = inode->i_private;
5135         int cpu = tracing_get_cpu(inode);
5136         char buf[64];
5137         int r = 0;
5138         ssize_t ret;
5139
5140         mutex_lock(&trace_types_lock);
5141
5142         if (cpu == RING_BUFFER_ALL_CPUS) {
5143                 int cpu, buf_size_same;
5144                 unsigned long size;
5145
5146                 size = 0;
5147                 buf_size_same = 1;
5148                 /* check if all cpu sizes are same */
5149                 for_each_tracing_cpu(cpu) {
5150                         /* fill in the size from first enabled cpu */
5151                         if (size == 0)
5152                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5153                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5154                                 buf_size_same = 0;
5155                                 break;
5156                         }
5157                 }
5158
5159                 if (buf_size_same) {
5160                         if (!ring_buffer_expanded)
5161                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
5162                                             size >> 10,
5163                                             trace_buf_size >> 10);
5164                         else
5165                                 r = sprintf(buf, "%lu\n", size >> 10);
5166                 } else
5167                         r = sprintf(buf, "X\n");
5168         } else
5169                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5170
5171         mutex_unlock(&trace_types_lock);
5172
5173         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5174         return ret;
5175 }
5176
5177 static ssize_t
5178 tracing_entries_write(struct file *filp, const char __user *ubuf,
5179                       size_t cnt, loff_t *ppos)
5180 {
5181         struct inode *inode = file_inode(filp);
5182         struct trace_array *tr = inode->i_private;
5183         unsigned long val;
5184         int ret;
5185
5186         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5187         if (ret)
5188                 return ret;
5189
5190         /* must have at least 1 entry */
5191         if (!val)
5192                 return -EINVAL;
5193
5194         /* value is in KB */
5195         val <<= 10;
5196         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5197         if (ret < 0)
5198                 return ret;
5199
5200         *ppos += cnt;
5201
5202         return cnt;
5203 }
5204
5205 static ssize_t
5206 tracing_total_entries_read(struct file *filp, char __user *ubuf,
5207                                 size_t cnt, loff_t *ppos)
5208 {
5209         struct trace_array *tr = filp->private_data;
5210         char buf[64];
5211         int r, cpu;
5212         unsigned long size = 0, expanded_size = 0;
5213
5214         mutex_lock(&trace_types_lock);
5215         for_each_tracing_cpu(cpu) {
5216                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5217                 if (!ring_buffer_expanded)
5218                         expanded_size += trace_buf_size >> 10;
5219         }
5220         if (ring_buffer_expanded)
5221                 r = sprintf(buf, "%lu\n", size);
5222         else
5223                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5224         mutex_unlock(&trace_types_lock);
5225
5226         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5227 }
5228
5229 static ssize_t
5230 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5231                           size_t cnt, loff_t *ppos)
5232 {
5233         /*
5234          * There is no need to read what the user has written, this function
5235          * is just to make sure that there is no error when "echo" is used
5236          */
5237
5238         *ppos += cnt;
5239
5240         return cnt;
5241 }
5242
5243 static int
5244 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5245 {
5246         struct trace_array *tr = inode->i_private;
5247
5248         /* disable tracing ? */
5249         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5250                 tracer_tracing_off(tr);
5251         /* resize the ring buffer to 0 */
5252         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5253
5254         trace_array_put(tr);
5255
5256         return 0;
5257 }
5258
5259 static ssize_t
5260 tracing_mark_write(struct file *filp, const char __user *ubuf,
5261                                         size_t cnt, loff_t *fpos)
5262 {
5263         unsigned long addr = (unsigned long)ubuf;
5264         struct trace_array *tr = filp->private_data;
5265         struct ring_buffer_event *event;
5266         struct ring_buffer *buffer;
5267         struct print_entry *entry;
5268         unsigned long irq_flags;
5269         struct page *pages[2];
5270         void *map_page[2];
5271         int nr_pages = 1;
5272         ssize_t written;
5273         int offset;
5274         int size;
5275         int len;
5276         int ret;
5277         int i;
5278
5279         if (tracing_disabled)
5280                 return -EINVAL;
5281
5282         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5283                 return -EINVAL;
5284
5285         if (cnt > TRACE_BUF_SIZE)
5286                 cnt = TRACE_BUF_SIZE;
5287
5288         /*
5289          * Userspace is injecting traces into the kernel trace buffer.
5290          * We want to be as non intrusive as possible.
5291          * To do so, we do not want to allocate any special buffers
5292          * or take any locks, but instead write the userspace data
5293          * straight into the ring buffer.
5294          *
5295          * First we need to pin the userspace buffer into memory,
5296          * which, most likely it is, because it just referenced it.
5297          * But there's no guarantee that it is. By using get_user_pages_fast()
5298          * and kmap_atomic/kunmap_atomic() we can get access to the
5299          * pages directly. We then write the data directly into the
5300          * ring buffer.
5301          */
5302         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5303
5304         /* check if we cross pages */
5305         if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
5306                 nr_pages = 2;
5307
5308         offset = addr & (PAGE_SIZE - 1);
5309         addr &= PAGE_MASK;
5310
5311         ret = get_user_pages_fast(addr, nr_pages, 0, pages);
5312         if (ret < nr_pages) {
5313                 while (--ret >= 0)
5314                         put_page(pages[ret]);
5315                 written = -EFAULT;
5316                 goto out;
5317         }
5318
5319         for (i = 0; i < nr_pages; i++)
5320                 map_page[i] = kmap_atomic(pages[i]);
5321
5322         local_save_flags(irq_flags);
5323         size = sizeof(*entry) + cnt + 2; /* possible \n added */
5324         buffer = tr->trace_buffer.buffer;
5325         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5326                                           irq_flags, preempt_count());
5327         if (!event) {
5328                 /* Ring buffer disabled, return as if not open for write */
5329                 written = -EBADF;
5330                 goto out_unlock;
5331         }
5332
5333         entry = ring_buffer_event_data(event);
5334         entry->ip = _THIS_IP_;
5335
5336         if (nr_pages == 2) {
5337                 len = PAGE_SIZE - offset;
5338                 memcpy(&entry->buf, map_page[0] + offset, len);
5339                 memcpy(&entry->buf[len], map_page[1], cnt - len);
5340         } else
5341                 memcpy(&entry->buf, map_page[0] + offset, cnt);
5342
5343         if (entry->buf[cnt - 1] != '\n') {
5344                 entry->buf[cnt] = '\n';
5345                 entry->buf[cnt + 1] = '\0';
5346         } else
5347                 entry->buf[cnt] = '\0';
5348
5349         __buffer_unlock_commit(buffer, event);
5350
5351         written = cnt;
5352
5353         *fpos += written;
5354
5355  out_unlock:
5356         for (i = nr_pages - 1; i >= 0; i--) {
5357                 kunmap_atomic(map_page[i]);
5358                 put_page(pages[i]);
5359         }
5360  out:
5361         return written;
5362 }
5363
5364 static int tracing_clock_show(struct seq_file *m, void *v)
5365 {
5366         struct trace_array *tr = m->private;
5367         int i;
5368
5369         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
5370                 seq_printf(m,
5371                         "%s%s%s%s", i ? " " : "",
5372                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
5373                         i == tr->clock_id ? "]" : "");
5374         seq_putc(m, '\n');
5375
5376         return 0;
5377 }
5378
5379 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
5380 {
5381         int i;
5382
5383         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
5384                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
5385                         break;
5386         }
5387         if (i == ARRAY_SIZE(trace_clocks))
5388                 return -EINVAL;
5389
5390         mutex_lock(&trace_types_lock);
5391
5392         tr->clock_id = i;
5393
5394         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
5395
5396         /*
5397          * New clock may not be consistent with the previous clock.
5398          * Reset the buffer so that it doesn't have incomparable timestamps.
5399          */
5400         tracing_reset_online_cpus(&tr->trace_buffer);
5401
5402 #ifdef CONFIG_TRACER_MAX_TRACE
5403         if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
5404                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
5405         tracing_reset_online_cpus(&tr->max_buffer);
5406 #endif
5407
5408         mutex_unlock(&trace_types_lock);
5409
5410         return 0;
5411 }
5412
5413 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
5414                                    size_t cnt, loff_t *fpos)
5415 {
5416         struct seq_file *m = filp->private_data;
5417         struct trace_array *tr = m->private;
5418         char buf[64];
5419         const char *clockstr;
5420         int ret;
5421
5422         if (cnt >= sizeof(buf))
5423                 return -EINVAL;
5424
5425         if (copy_from_user(buf, ubuf, cnt))
5426                 return -EFAULT;
5427
5428         buf[cnt] = 0;
5429
5430         clockstr = strstrip(buf);
5431
5432         ret = tracing_set_clock(tr, clockstr);
5433         if (ret)
5434                 return ret;
5435
5436         *fpos += cnt;
5437
5438         return cnt;
5439 }
5440
5441 static int tracing_clock_open(struct inode *inode, struct file *file)
5442 {
5443         struct trace_array *tr = inode->i_private;
5444         int ret;
5445
5446         if (tracing_disabled)
5447                 return -ENODEV;
5448
5449         if (trace_array_get(tr))
5450                 return -ENODEV;
5451
5452         ret = single_open(file, tracing_clock_show, inode->i_private);
5453         if (ret < 0)
5454                 trace_array_put(tr);
5455
5456         return ret;
5457 }
5458
5459 struct ftrace_buffer_info {
5460         struct trace_iterator   iter;
5461         void                    *spare;
5462         unsigned int            read;
5463 };
5464
5465 #ifdef CONFIG_TRACER_SNAPSHOT
5466 static int tracing_snapshot_open(struct inode *inode, struct file *file)
5467 {
5468         struct trace_array *tr = inode->i_private;
5469         struct trace_iterator *iter;
5470         struct seq_file *m;
5471         int ret = 0;
5472
5473         if (trace_array_get(tr) < 0)
5474                 return -ENODEV;
5475
5476         if (file->f_mode & FMODE_READ) {
5477                 iter = __tracing_open(inode, file, true);
5478                 if (IS_ERR(iter))
5479                         ret = PTR_ERR(iter);
5480         } else {
5481                 /* Writes still need the seq_file to hold the private data */
5482                 ret = -ENOMEM;
5483                 m = kzalloc(sizeof(*m), GFP_KERNEL);
5484                 if (!m)
5485                         goto out;
5486                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5487                 if (!iter) {
5488                         kfree(m);
5489                         goto out;
5490                 }
5491                 ret = 0;
5492
5493                 iter->tr = tr;
5494                 iter->trace_buffer = &tr->max_buffer;
5495                 iter->cpu_file = tracing_get_cpu(inode);
5496                 m->private = iter;
5497                 file->private_data = m;
5498         }
5499 out:
5500         if (ret < 0)
5501                 trace_array_put(tr);
5502
5503         return ret;
5504 }
5505
5506 static ssize_t
5507 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
5508                        loff_t *ppos)
5509 {
5510         struct seq_file *m = filp->private_data;
5511         struct trace_iterator *iter = m->private;
5512         struct trace_array *tr = iter->tr;
5513         unsigned long val;
5514         int ret;
5515
5516         ret = tracing_update_buffers();
5517         if (ret < 0)
5518                 return ret;
5519
5520         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5521         if (ret)
5522                 return ret;
5523
5524         mutex_lock(&trace_types_lock);
5525
5526         if (tr->current_trace->use_max_tr) {
5527                 ret = -EBUSY;
5528                 goto out;
5529         }
5530
5531         switch (val) {
5532         case 0:
5533                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5534                         ret = -EINVAL;
5535                         break;
5536                 }
5537                 if (tr->allocated_snapshot)
5538                         free_snapshot(tr);
5539                 break;
5540         case 1:
5541 /* Only allow per-cpu swap if the ring buffer supports it */
5542 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
5543                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5544                         ret = -EINVAL;
5545                         break;
5546                 }
5547 #endif
5548                 if (!tr->allocated_snapshot) {
5549                         ret = alloc_snapshot(tr);
5550                         if (ret < 0)
5551                                 break;
5552                 }
5553                 local_irq_disable();
5554                 /* Now, we're going to swap */
5555                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5556                         update_max_tr(tr, current, smp_processor_id());
5557                 else
5558                         update_max_tr_single(tr, current, iter->cpu_file);
5559                 local_irq_enable();
5560                 break;
5561         default:
5562                 if (tr->allocated_snapshot) {
5563                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5564                                 tracing_reset_online_cpus(&tr->max_buffer);
5565                         else
5566                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
5567                 }
5568                 break;
5569         }
5570
5571         if (ret >= 0) {
5572                 *ppos += cnt;
5573                 ret = cnt;
5574         }
5575 out:
5576         mutex_unlock(&trace_types_lock);
5577         return ret;
5578 }
5579
5580 static int tracing_snapshot_release(struct inode *inode, struct file *file)
5581 {
5582         struct seq_file *m = file->private_data;
5583         int ret;
5584
5585         ret = tracing_release(inode, file);
5586
5587         if (file->f_mode & FMODE_READ)
5588                 return ret;
5589
5590         /* If write only, the seq_file is just a stub */
5591         if (m)
5592                 kfree(m->private);
5593         kfree(m);
5594
5595         return 0;
5596 }
5597
5598 static int tracing_buffers_open(struct inode *inode, struct file *filp);
5599 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
5600                                     size_t count, loff_t *ppos);
5601 static int tracing_buffers_release(struct inode *inode, struct file *file);
5602 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5603                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
5604
5605 static int snapshot_raw_open(struct inode *inode, struct file *filp)
5606 {
5607         struct ftrace_buffer_info *info;
5608         int ret;
5609
5610         ret = tracing_buffers_open(inode, filp);
5611         if (ret < 0)
5612                 return ret;
5613
5614         info = filp->private_data;
5615
5616         if (info->iter.trace->use_max_tr) {
5617                 tracing_buffers_release(inode, filp);
5618                 return -EBUSY;
5619         }
5620
5621         info->iter.snapshot = true;
5622         info->iter.trace_buffer = &info->iter.tr->max_buffer;
5623
5624         return ret;
5625 }
5626
5627 #endif /* CONFIG_TRACER_SNAPSHOT */
5628
5629
5630 static const struct file_operations tracing_thresh_fops = {
5631         .open           = tracing_open_generic,
5632         .read           = tracing_thresh_read,
5633         .write          = tracing_thresh_write,
5634         .llseek         = generic_file_llseek,
5635 };
5636
5637 #ifdef CONFIG_TRACER_MAX_TRACE
5638 static const struct file_operations tracing_max_lat_fops = {
5639         .open           = tracing_open_generic,
5640         .read           = tracing_max_lat_read,
5641         .write          = tracing_max_lat_write,
5642         .llseek         = generic_file_llseek,
5643 };
5644 #endif
5645
5646 static const struct file_operations set_tracer_fops = {
5647         .open           = tracing_open_generic,
5648         .read           = tracing_set_trace_read,
5649         .write          = tracing_set_trace_write,
5650         .llseek         = generic_file_llseek,
5651 };
5652
5653 static const struct file_operations tracing_pipe_fops = {
5654         .open           = tracing_open_pipe,
5655         .poll           = tracing_poll_pipe,
5656         .read           = tracing_read_pipe,
5657         .splice_read    = tracing_splice_read_pipe,
5658         .release        = tracing_release_pipe,
5659         .llseek         = no_llseek,
5660 };
5661
5662 static const struct file_operations tracing_entries_fops = {
5663         .open           = tracing_open_generic_tr,
5664         .read           = tracing_entries_read,
5665         .write          = tracing_entries_write,
5666         .llseek         = generic_file_llseek,
5667         .release        = tracing_release_generic_tr,
5668 };
5669
5670 static const struct file_operations tracing_total_entries_fops = {
5671         .open           = tracing_open_generic_tr,
5672         .read           = tracing_total_entries_read,
5673         .llseek         = generic_file_llseek,
5674         .release        = tracing_release_generic_tr,
5675 };
5676
5677 static const struct file_operations tracing_free_buffer_fops = {
5678         .open           = tracing_open_generic_tr,
5679         .write          = tracing_free_buffer_write,
5680         .release        = tracing_free_buffer_release,
5681 };
5682
5683 static const struct file_operations tracing_mark_fops = {
5684         .open           = tracing_open_generic_tr,
5685         .write          = tracing_mark_write,
5686         .llseek         = generic_file_llseek,
5687         .release        = tracing_release_generic_tr,
5688 };
5689
5690 static const struct file_operations trace_clock_fops = {
5691         .open           = tracing_clock_open,
5692         .read           = seq_read,
5693         .llseek         = seq_lseek,
5694         .release        = tracing_single_release_tr,
5695         .write          = tracing_clock_write,
5696 };
5697
5698 #ifdef CONFIG_TRACER_SNAPSHOT
5699 static const struct file_operations snapshot_fops = {
5700         .open           = tracing_snapshot_open,
5701         .read           = seq_read,
5702         .write          = tracing_snapshot_write,
5703         .llseek         = tracing_lseek,
5704         .release        = tracing_snapshot_release,
5705 };
5706
5707 static const struct file_operations snapshot_raw_fops = {
5708         .open           = snapshot_raw_open,
5709         .read           = tracing_buffers_read,
5710         .release        = tracing_buffers_release,
5711         .splice_read    = tracing_buffers_splice_read,
5712         .llseek         = no_llseek,
5713 };
5714
5715 #endif /* CONFIG_TRACER_SNAPSHOT */
5716
5717 static int tracing_buffers_open(struct inode *inode, struct file *filp)
5718 {
5719         struct trace_array *tr = inode->i_private;
5720         struct ftrace_buffer_info *info;
5721         int ret;
5722
5723         if (tracing_disabled)
5724                 return -ENODEV;
5725
5726         if (trace_array_get(tr) < 0)
5727                 return -ENODEV;
5728
5729         info = kzalloc(sizeof(*info), GFP_KERNEL);
5730         if (!info) {
5731                 trace_array_put(tr);
5732                 return -ENOMEM;
5733         }
5734
5735         mutex_lock(&trace_types_lock);
5736
5737         info->iter.tr           = tr;
5738         info->iter.cpu_file     = tracing_get_cpu(inode);
5739         info->iter.trace        = tr->current_trace;
5740         info->iter.trace_buffer = &tr->trace_buffer;
5741         info->spare             = NULL;
5742         /* Force reading ring buffer for first read */
5743         info->read              = (unsigned int)-1;
5744
5745         filp->private_data = info;
5746
5747         tr->current_trace->ref++;
5748
5749         mutex_unlock(&trace_types_lock);
5750
5751         ret = nonseekable_open(inode, filp);
5752         if (ret < 0)
5753                 trace_array_put(tr);
5754
5755         return ret;
5756 }
5757
5758 static unsigned int
5759 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
5760 {
5761         struct ftrace_buffer_info *info = filp->private_data;
5762         struct trace_iterator *iter = &info->iter;
5763
5764         return trace_poll(iter, filp, poll_table);
5765 }
5766
5767 static ssize_t
5768 tracing_buffers_read(struct file *filp, char __user *ubuf,
5769                      size_t count, loff_t *ppos)
5770 {
5771         struct ftrace_buffer_info *info = filp->private_data;
5772         struct trace_iterator *iter = &info->iter;
5773         ssize_t ret;
5774         ssize_t size;
5775
5776         if (!count)
5777                 return 0;
5778
5779 #ifdef CONFIG_TRACER_MAX_TRACE
5780         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
5781                 return -EBUSY;
5782 #endif
5783
5784         if (!info->spare)
5785                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
5786                                                           iter->cpu_file);
5787         if (!info->spare)
5788                 return -ENOMEM;
5789
5790         /* Do we have previous read data to read? */
5791         if (info->read < PAGE_SIZE)
5792                 goto read;
5793
5794  again:
5795         trace_access_lock(iter->cpu_file);
5796         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
5797                                     &info->spare,
5798                                     count,
5799                                     iter->cpu_file, 0);
5800         trace_access_unlock(iter->cpu_file);
5801
5802         if (ret < 0) {
5803                 if (trace_empty(iter)) {
5804                         if ((filp->f_flags & O_NONBLOCK))
5805                                 return -EAGAIN;
5806
5807                         ret = wait_on_pipe(iter, false);
5808                         if (ret)
5809                                 return ret;
5810
5811                         goto again;
5812                 }
5813                 return 0;
5814         }
5815
5816         info->read = 0;
5817  read:
5818         size = PAGE_SIZE - info->read;
5819         if (size > count)
5820                 size = count;
5821
5822         ret = copy_to_user(ubuf, info->spare + info->read, size);
5823         if (ret == size)
5824                 return -EFAULT;
5825
5826         size -= ret;
5827
5828         *ppos += size;
5829         info->read += size;
5830
5831         return size;
5832 }
5833
5834 static int tracing_buffers_release(struct inode *inode, struct file *file)
5835 {
5836         struct ftrace_buffer_info *info = file->private_data;
5837         struct trace_iterator *iter = &info->iter;
5838
5839         mutex_lock(&trace_types_lock);
5840
5841         iter->tr->current_trace->ref--;
5842
5843         __trace_array_put(iter->tr);
5844
5845         if (info->spare)
5846                 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
5847         kfree(info);
5848
5849         mutex_unlock(&trace_types_lock);
5850
5851         return 0;
5852 }
5853
5854 struct buffer_ref {
5855         struct ring_buffer      *buffer;
5856         void                    *page;
5857         int                     ref;
5858 };
5859
5860 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
5861                                     struct pipe_buffer *buf)
5862 {
5863         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5864
5865         if (--ref->ref)
5866                 return;
5867
5868         ring_buffer_free_read_page(ref->buffer, ref->page);
5869         kfree(ref);
5870         buf->private = 0;
5871 }
5872
5873 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
5874                                 struct pipe_buffer *buf)
5875 {
5876         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5877
5878         ref->ref++;
5879 }
5880
5881 /* Pipe buffer operations for a buffer. */
5882 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
5883         .can_merge              = 0,
5884         .confirm                = generic_pipe_buf_confirm,
5885         .release                = buffer_pipe_buf_release,
5886         .steal                  = generic_pipe_buf_steal,
5887         .get                    = buffer_pipe_buf_get,
5888 };
5889
5890 /*
5891  * Callback from splice_to_pipe(), if we need to release some pages
5892  * at the end of the spd in case we error'ed out in filling the pipe.
5893  */
5894 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
5895 {
5896         struct buffer_ref *ref =
5897                 (struct buffer_ref *)spd->partial[i].private;
5898
5899         if (--ref->ref)
5900                 return;
5901
5902         ring_buffer_free_read_page(ref->buffer, ref->page);
5903         kfree(ref);
5904         spd->partial[i].private = 0;
5905 }
5906
5907 static ssize_t
5908 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5909                             struct pipe_inode_info *pipe, size_t len,
5910                             unsigned int flags)
5911 {
5912         struct ftrace_buffer_info *info = file->private_data;
5913         struct trace_iterator *iter = &info->iter;
5914         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5915         struct page *pages_def[PIPE_DEF_BUFFERS];
5916         struct splice_pipe_desc spd = {
5917                 .pages          = pages_def,
5918                 .partial        = partial_def,
5919                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5920                 .flags          = flags,
5921                 .ops            = &buffer_pipe_buf_ops,
5922                 .spd_release    = buffer_spd_release,
5923         };
5924         struct buffer_ref *ref;
5925         int entries, size, i;
5926         ssize_t ret = 0;
5927
5928 #ifdef CONFIG_TRACER_MAX_TRACE
5929         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
5930                 return -EBUSY;
5931 #endif
5932
5933         if (splice_grow_spd(pipe, &spd))
5934                 return -ENOMEM;
5935
5936         if (*ppos & (PAGE_SIZE - 1))
5937                 return -EINVAL;
5938
5939         if (len & (PAGE_SIZE - 1)) {
5940                 if (len < PAGE_SIZE)
5941                         return -EINVAL;
5942                 len &= PAGE_MASK;
5943         }
5944
5945  again:
5946         trace_access_lock(iter->cpu_file);
5947         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5948
5949         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
5950                 struct page *page;
5951                 int r;
5952
5953                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
5954                 if (!ref) {
5955                         ret = -ENOMEM;
5956                         break;
5957                 }
5958
5959                 ref->ref = 1;
5960                 ref->buffer = iter->trace_buffer->buffer;
5961                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
5962                 if (!ref->page) {
5963                         ret = -ENOMEM;
5964                         kfree(ref);
5965                         break;
5966                 }
5967
5968                 r = ring_buffer_read_page(ref->buffer, &ref->page,
5969                                           len, iter->cpu_file, 1);
5970                 if (r < 0) {
5971                         ring_buffer_free_read_page(ref->buffer, ref->page);
5972                         kfree(ref);
5973                         break;
5974                 }
5975
5976                 /*
5977                  * zero out any left over data, this is going to
5978                  * user land.
5979                  */
5980                 size = ring_buffer_page_len(ref->page);
5981                 if (size < PAGE_SIZE)
5982                         memset(ref->page + size, 0, PAGE_SIZE - size);
5983
5984                 page = virt_to_page(ref->page);
5985
5986                 spd.pages[i] = page;
5987                 spd.partial[i].len = PAGE_SIZE;
5988                 spd.partial[i].offset = 0;
5989                 spd.partial[i].private = (unsigned long)ref;
5990                 spd.nr_pages++;
5991                 *ppos += PAGE_SIZE;
5992
5993                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5994         }
5995
5996         trace_access_unlock(iter->cpu_file);
5997         spd.nr_pages = i;
5998
5999         /* did we read anything? */
6000         if (!spd.nr_pages) {
6001                 if (ret)
6002                         return ret;
6003
6004                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6005                         return -EAGAIN;
6006
6007                 ret = wait_on_pipe(iter, true);
6008                 if (ret)
6009                         return ret;
6010
6011                 goto again;
6012         }
6013
6014         ret = splice_to_pipe(pipe, &spd);
6015         splice_shrink_spd(&spd);
6016
6017         return ret;
6018 }
6019
6020 static const struct file_operations tracing_buffers_fops = {
6021         .open           = tracing_buffers_open,
6022         .read           = tracing_buffers_read,
6023         .poll           = tracing_buffers_poll,
6024         .release        = tracing_buffers_release,
6025         .splice_read    = tracing_buffers_splice_read,
6026         .llseek         = no_llseek,
6027 };
6028
6029 static ssize_t
6030 tracing_stats_read(struct file *filp, char __user *ubuf,
6031                    size_t count, loff_t *ppos)
6032 {
6033         struct inode *inode = file_inode(filp);
6034         struct trace_array *tr = inode->i_private;
6035         struct trace_buffer *trace_buf = &tr->trace_buffer;
6036         int cpu = tracing_get_cpu(inode);
6037         struct trace_seq *s;
6038         unsigned long cnt;
6039         unsigned long long t;
6040         unsigned long usec_rem;
6041
6042         s = kmalloc(sizeof(*s), GFP_KERNEL);
6043         if (!s)
6044                 return -ENOMEM;
6045
6046         trace_seq_init(s);
6047
6048         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6049         trace_seq_printf(s, "entries: %ld\n", cnt);
6050
6051         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6052         trace_seq_printf(s, "overrun: %ld\n", cnt);
6053
6054         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6055         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6056
6057         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6058         trace_seq_printf(s, "bytes: %ld\n", cnt);
6059
6060         if (trace_clocks[tr->clock_id].in_ns) {
6061                 /* local or global for trace_clock */
6062                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6063                 usec_rem = do_div(t, USEC_PER_SEC);
6064                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6065                                                                 t, usec_rem);
6066
6067                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6068                 usec_rem = do_div(t, USEC_PER_SEC);
6069                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6070         } else {
6071                 /* counter or tsc mode for trace_clock */
6072                 trace_seq_printf(s, "oldest event ts: %llu\n",
6073                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6074
6075                 trace_seq_printf(s, "now ts: %llu\n",
6076                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
6077         }
6078
6079         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6080         trace_seq_printf(s, "dropped events: %ld\n", cnt);
6081
6082         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
6083         trace_seq_printf(s, "read events: %ld\n", cnt);
6084
6085         count = simple_read_from_buffer(ubuf, count, ppos,
6086                                         s->buffer, trace_seq_used(s));
6087
6088         kfree(s);
6089
6090         return count;
6091 }
6092
6093 static const struct file_operations tracing_stats_fops = {
6094         .open           = tracing_open_generic_tr,
6095         .read           = tracing_stats_read,
6096         .llseek         = generic_file_llseek,
6097         .release        = tracing_release_generic_tr,
6098 };
6099
6100 #ifdef CONFIG_DYNAMIC_FTRACE
6101
6102 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
6103 {
6104         return 0;
6105 }
6106
6107 static ssize_t
6108 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
6109                   size_t cnt, loff_t *ppos)
6110 {
6111         static char ftrace_dyn_info_buffer[1024];
6112         static DEFINE_MUTEX(dyn_info_mutex);
6113         unsigned long *p = filp->private_data;
6114         char *buf = ftrace_dyn_info_buffer;
6115         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
6116         int r;
6117
6118         mutex_lock(&dyn_info_mutex);
6119         r = sprintf(buf, "%ld ", *p);
6120
6121         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
6122         buf[r++] = '\n';
6123
6124         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6125
6126         mutex_unlock(&dyn_info_mutex);
6127
6128         return r;
6129 }
6130
6131 static const struct file_operations tracing_dyn_info_fops = {
6132         .open           = tracing_open_generic,
6133         .read           = tracing_read_dyn_info,
6134         .llseek         = generic_file_llseek,
6135 };
6136 #endif /* CONFIG_DYNAMIC_FTRACE */
6137
6138 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6139 static void
6140 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6141 {
6142         tracing_snapshot();
6143 }
6144
6145 static void
6146 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6147 {
6148         unsigned long *count = (long *)data;
6149
6150         if (!*count)
6151                 return;
6152
6153         if (*count != -1)
6154                 (*count)--;
6155
6156         tracing_snapshot();
6157 }
6158
6159 static int
6160 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
6161                       struct ftrace_probe_ops *ops, void *data)
6162 {
6163         long count = (long)data;
6164
6165         seq_printf(m, "%ps:", (void *)ip);
6166
6167         seq_puts(m, "snapshot");
6168
6169         if (count == -1)
6170                 seq_puts(m, ":unlimited\n");
6171         else
6172                 seq_printf(m, ":count=%ld\n", count);
6173
6174         return 0;
6175 }
6176
6177 static struct ftrace_probe_ops snapshot_probe_ops = {
6178         .func                   = ftrace_snapshot,
6179         .print                  = ftrace_snapshot_print,
6180 };
6181
6182 static struct ftrace_probe_ops snapshot_count_probe_ops = {
6183         .func                   = ftrace_count_snapshot,
6184         .print                  = ftrace_snapshot_print,
6185 };
6186
6187 static int
6188 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
6189                                char *glob, char *cmd, char *param, int enable)
6190 {
6191         struct ftrace_probe_ops *ops;
6192         void *count = (void *)-1;
6193         char *number;
6194         int ret;
6195
6196         /* hash funcs only work with set_ftrace_filter */
6197         if (!enable)
6198                 return -EINVAL;
6199
6200         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
6201
6202         if (glob[0] == '!') {
6203                 unregister_ftrace_function_probe_func(glob+1, ops);
6204                 return 0;
6205         }
6206
6207         if (!param)
6208                 goto out_reg;
6209
6210         number = strsep(&param, ":");
6211
6212         if (!strlen(number))
6213                 goto out_reg;
6214
6215         /*
6216          * We use the callback data field (which is a pointer)
6217          * as our counter.
6218          */
6219         ret = kstrtoul(number, 0, (unsigned long *)&count);
6220         if (ret)
6221                 return ret;
6222
6223  out_reg:
6224         ret = register_ftrace_function_probe(glob, ops, count);
6225
6226         if (ret >= 0)
6227                 alloc_snapshot(&global_trace);
6228
6229         return ret < 0 ? ret : 0;
6230 }
6231
6232 static struct ftrace_func_command ftrace_snapshot_cmd = {
6233         .name                   = "snapshot",
6234         .func                   = ftrace_trace_snapshot_callback,
6235 };
6236
6237 static __init int register_snapshot_cmd(void)
6238 {
6239         return register_ftrace_command(&ftrace_snapshot_cmd);
6240 }
6241 #else
6242 static inline __init int register_snapshot_cmd(void) { return 0; }
6243 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
6244
6245 static struct dentry *tracing_get_dentry(struct trace_array *tr)
6246 {
6247         if (WARN_ON(!tr->dir))
6248                 return ERR_PTR(-ENODEV);
6249
6250         /* Top directory uses NULL as the parent */
6251         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
6252                 return NULL;
6253
6254         /* All sub buffers have a descriptor */
6255         return tr->dir;
6256 }
6257
6258 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
6259 {
6260         struct dentry *d_tracer;
6261
6262         if (tr->percpu_dir)
6263                 return tr->percpu_dir;
6264
6265         d_tracer = tracing_get_dentry(tr);
6266         if (IS_ERR(d_tracer))
6267                 return NULL;
6268
6269         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
6270
6271         WARN_ONCE(!tr->percpu_dir,
6272                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
6273
6274         return tr->percpu_dir;
6275 }
6276
6277 static struct dentry *
6278 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
6279                       void *data, long cpu, const struct file_operations *fops)
6280 {
6281         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
6282
6283         if (ret) /* See tracing_get_cpu() */
6284                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
6285         return ret;
6286 }
6287
6288 static void
6289 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
6290 {
6291         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
6292         struct dentry *d_cpu;
6293         char cpu_dir[30]; /* 30 characters should be more than enough */
6294
6295         if (!d_percpu)
6296                 return;
6297
6298         snprintf(cpu_dir, 30, "cpu%ld", cpu);
6299         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
6300         if (!d_cpu) {
6301                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
6302                 return;
6303         }
6304
6305         /* per cpu trace_pipe */
6306         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
6307                                 tr, cpu, &tracing_pipe_fops);
6308
6309         /* per cpu trace */
6310         trace_create_cpu_file("trace", 0644, d_cpu,
6311                                 tr, cpu, &tracing_fops);
6312
6313         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
6314                                 tr, cpu, &tracing_buffers_fops);
6315
6316         trace_create_cpu_file("stats", 0444, d_cpu,
6317                                 tr, cpu, &tracing_stats_fops);
6318
6319         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
6320                                 tr, cpu, &tracing_entries_fops);
6321
6322 #ifdef CONFIG_TRACER_SNAPSHOT
6323         trace_create_cpu_file("snapshot", 0644, d_cpu,
6324                                 tr, cpu, &snapshot_fops);
6325
6326         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
6327                                 tr, cpu, &snapshot_raw_fops);
6328 #endif
6329 }
6330
6331 #ifdef CONFIG_FTRACE_SELFTEST
6332 /* Let selftest have access to static functions in this file */
6333 #include "trace_selftest.c"
6334 #endif
6335
6336 static ssize_t
6337 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
6338                         loff_t *ppos)
6339 {
6340         struct trace_option_dentry *topt = filp->private_data;
6341         char *buf;
6342
6343         if (topt->flags->val & topt->opt->bit)
6344                 buf = "1\n";
6345         else
6346                 buf = "0\n";
6347
6348         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6349 }
6350
6351 static ssize_t
6352 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
6353                          loff_t *ppos)
6354 {
6355         struct trace_option_dentry *topt = filp->private_data;
6356         unsigned long val;
6357         int ret;
6358
6359         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6360         if (ret)
6361                 return ret;
6362
6363         if (val != 0 && val != 1)
6364                 return -EINVAL;
6365
6366         if (!!(topt->flags->val & topt->opt->bit) != val) {
6367                 mutex_lock(&trace_types_lock);
6368                 ret = __set_tracer_option(topt->tr, topt->flags,
6369                                           topt->opt, !val);
6370                 mutex_unlock(&trace_types_lock);
6371                 if (ret)
6372                         return ret;
6373         }
6374
6375         *ppos += cnt;
6376
6377         return cnt;
6378 }
6379
6380
6381 static const struct file_operations trace_options_fops = {
6382         .open = tracing_open_generic,
6383         .read = trace_options_read,
6384         .write = trace_options_write,
6385         .llseek = generic_file_llseek,
6386 };
6387
6388 /*
6389  * In order to pass in both the trace_array descriptor as well as the index
6390  * to the flag that the trace option file represents, the trace_array
6391  * has a character array of trace_flags_index[], which holds the index
6392  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
6393  * The address of this character array is passed to the flag option file
6394  * read/write callbacks.
6395  *
6396  * In order to extract both the index and the trace_array descriptor,
6397  * get_tr_index() uses the following algorithm.
6398  *
6399  *   idx = *ptr;
6400  *
6401  * As the pointer itself contains the address of the index (remember
6402  * index[1] == 1).
6403  *
6404  * Then to get the trace_array descriptor, by subtracting that index
6405  * from the ptr, we get to the start of the index itself.
6406  *
6407  *   ptr - idx == &index[0]
6408  *
6409  * Then a simple container_of() from that pointer gets us to the
6410  * trace_array descriptor.
6411  */
6412 static void get_tr_index(void *data, struct trace_array **ptr,
6413                          unsigned int *pindex)
6414 {
6415         *pindex = *(unsigned char *)data;
6416
6417         *ptr = container_of(data - *pindex, struct trace_array,
6418                             trace_flags_index);
6419 }
6420
6421 static ssize_t
6422 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
6423                         loff_t *ppos)
6424 {
6425         void *tr_index = filp->private_data;
6426         struct trace_array *tr;
6427         unsigned int index;
6428         char *buf;
6429
6430         get_tr_index(tr_index, &tr, &index);
6431
6432         if (tr->trace_flags & (1 << index))
6433                 buf = "1\n";
6434         else
6435                 buf = "0\n";
6436
6437         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6438 }
6439
6440 static ssize_t
6441 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
6442                          loff_t *ppos)
6443 {
6444         void *tr_index = filp->private_data;
6445         struct trace_array *tr;
6446         unsigned int index;
6447         unsigned long val;
6448         int ret;
6449
6450         get_tr_index(tr_index, &tr, &index);
6451
6452         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6453         if (ret)
6454                 return ret;
6455
6456         if (val != 0 && val != 1)
6457                 return -EINVAL;
6458
6459         mutex_lock(&trace_types_lock);
6460         ret = set_tracer_flag(tr, 1 << index, val);
6461         mutex_unlock(&trace_types_lock);
6462
6463         if (ret < 0)
6464                 return ret;
6465
6466         *ppos += cnt;
6467
6468         return cnt;
6469 }
6470
6471 static const struct file_operations trace_options_core_fops = {
6472         .open = tracing_open_generic,
6473         .read = trace_options_core_read,
6474         .write = trace_options_core_write,
6475         .llseek = generic_file_llseek,
6476 };
6477
6478 struct dentry *trace_create_file(const char *name,
6479                                  umode_t mode,
6480                                  struct dentry *parent,
6481                                  void *data,
6482                                  const struct file_operations *fops)
6483 {
6484         struct dentry *ret;
6485
6486         ret = tracefs_create_file(name, mode, parent, data, fops);
6487         if (!ret)
6488                 pr_warn("Could not create tracefs '%s' entry\n", name);
6489
6490         return ret;
6491 }
6492
6493
6494 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
6495 {
6496         struct dentry *d_tracer;
6497
6498         if (tr->options)
6499                 return tr->options;
6500
6501         d_tracer = tracing_get_dentry(tr);
6502         if (IS_ERR(d_tracer))
6503                 return NULL;
6504
6505         tr->options = tracefs_create_dir("options", d_tracer);
6506         if (!tr->options) {
6507                 pr_warn("Could not create tracefs directory 'options'\n");
6508                 return NULL;
6509         }
6510
6511         return tr->options;
6512 }
6513
6514 static void
6515 create_trace_option_file(struct trace_array *tr,
6516                          struct trace_option_dentry *topt,
6517                          struct tracer_flags *flags,
6518                          struct tracer_opt *opt)
6519 {
6520         struct dentry *t_options;
6521
6522         t_options = trace_options_init_dentry(tr);
6523         if (!t_options)
6524                 return;
6525
6526         topt->flags = flags;
6527         topt->opt = opt;
6528         topt->tr = tr;
6529
6530         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
6531                                     &trace_options_fops);
6532
6533 }
6534
6535 static void
6536 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
6537 {
6538         struct trace_option_dentry *topts;
6539         struct trace_options *tr_topts;
6540         struct tracer_flags *flags;
6541         struct tracer_opt *opts;
6542         int cnt;
6543         int i;
6544
6545         if (!tracer)
6546                 return;
6547
6548         flags = tracer->flags;
6549
6550         if (!flags || !flags->opts)
6551                 return;
6552
6553         /*
6554          * If this is an instance, only create flags for tracers
6555          * the instance may have.
6556          */
6557         if (!trace_ok_for_array(tracer, tr))
6558                 return;
6559
6560         for (i = 0; i < tr->nr_topts; i++) {
6561                 /* Make sure there's no duplicate flags. */
6562                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
6563                         return;
6564         }
6565
6566         opts = flags->opts;
6567
6568         for (cnt = 0; opts[cnt].name; cnt++)
6569                 ;
6570
6571         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
6572         if (!topts)
6573                 return;
6574
6575         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
6576                             GFP_KERNEL);
6577         if (!tr_topts) {
6578                 kfree(topts);
6579                 return;
6580         }
6581
6582         tr->topts = tr_topts;
6583         tr->topts[tr->nr_topts].tracer = tracer;
6584         tr->topts[tr->nr_topts].topts = topts;
6585         tr->nr_topts++;
6586
6587         for (cnt = 0; opts[cnt].name; cnt++) {
6588                 create_trace_option_file(tr, &topts[cnt], flags,
6589                                          &opts[cnt]);
6590                 WARN_ONCE(topts[cnt].entry == NULL,
6591                           "Failed to create trace option: %s",
6592                           opts[cnt].name);
6593         }
6594 }
6595
6596 static struct dentry *
6597 create_trace_option_core_file(struct trace_array *tr,
6598                               const char *option, long index)
6599 {
6600         struct dentry *t_options;
6601
6602         t_options = trace_options_init_dentry(tr);
6603         if (!t_options)
6604                 return NULL;
6605
6606         return trace_create_file(option, 0644, t_options,
6607                                  (void *)&tr->trace_flags_index[index],
6608                                  &trace_options_core_fops);
6609 }
6610
6611 static void create_trace_options_dir(struct trace_array *tr)
6612 {
6613         struct dentry *t_options;
6614         bool top_level = tr == &global_trace;
6615         int i;
6616
6617         t_options = trace_options_init_dentry(tr);
6618         if (!t_options)
6619                 return;
6620
6621         for (i = 0; trace_options[i]; i++) {
6622                 if (top_level ||
6623                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
6624                         create_trace_option_core_file(tr, trace_options[i], i);
6625         }
6626 }
6627
6628 static ssize_t
6629 rb_simple_read(struct file *filp, char __user *ubuf,
6630                size_t cnt, loff_t *ppos)
6631 {
6632         struct trace_array *tr = filp->private_data;
6633         char buf[64];
6634         int r;
6635
6636         r = tracer_tracing_is_on(tr);
6637         r = sprintf(buf, "%d\n", r);
6638
6639         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6640 }
6641
6642 static ssize_t
6643 rb_simple_write(struct file *filp, const char __user *ubuf,
6644                 size_t cnt, loff_t *ppos)
6645 {
6646         struct trace_array *tr = filp->private_data;
6647         struct ring_buffer *buffer = tr->trace_buffer.buffer;
6648         unsigned long val;
6649         int ret;
6650
6651         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6652         if (ret)
6653                 return ret;
6654
6655         if (buffer) {
6656                 mutex_lock(&trace_types_lock);
6657                 if (val) {
6658                         tracer_tracing_on(tr);
6659                         if (tr->current_trace->start)
6660                                 tr->current_trace->start(tr);
6661                 } else {
6662                         tracer_tracing_off(tr);
6663                         if (tr->current_trace->stop)
6664                                 tr->current_trace->stop(tr);
6665                 }
6666                 mutex_unlock(&trace_types_lock);
6667         }
6668
6669         (*ppos)++;
6670
6671         return cnt;
6672 }
6673
6674 static const struct file_operations rb_simple_fops = {
6675         .open           = tracing_open_generic_tr,
6676         .read           = rb_simple_read,
6677         .write          = rb_simple_write,
6678         .release        = tracing_release_generic_tr,
6679         .llseek         = default_llseek,
6680 };
6681
6682 struct dentry *trace_instance_dir;
6683
6684 static void
6685 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
6686
6687 static int
6688 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
6689 {
6690         enum ring_buffer_flags rb_flags;
6691
6692         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
6693
6694         buf->tr = tr;
6695
6696         buf->buffer = ring_buffer_alloc(size, rb_flags);
6697         if (!buf->buffer)
6698                 return -ENOMEM;
6699
6700         buf->data = alloc_percpu(struct trace_array_cpu);
6701         if (!buf->data) {
6702                 ring_buffer_free(buf->buffer);
6703                 return -ENOMEM;
6704         }
6705
6706         /* Allocate the first page for all buffers */
6707         set_buffer_entries(&tr->trace_buffer,
6708                            ring_buffer_size(tr->trace_buffer.buffer, 0));
6709
6710         return 0;
6711 }
6712
6713 static int allocate_trace_buffers(struct trace_array *tr, int size)
6714 {
6715         int ret;
6716
6717         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
6718         if (ret)
6719                 return ret;
6720
6721 #ifdef CONFIG_TRACER_MAX_TRACE
6722         ret = allocate_trace_buffer(tr, &tr->max_buffer,
6723                                     allocate_snapshot ? size : 1);
6724         if (WARN_ON(ret)) {
6725                 ring_buffer_free(tr->trace_buffer.buffer);
6726                 free_percpu(tr->trace_buffer.data);
6727                 return -ENOMEM;
6728         }
6729         tr->allocated_snapshot = allocate_snapshot;
6730
6731         /*
6732          * Only the top level trace array gets its snapshot allocated
6733          * from the kernel command line.
6734          */
6735         allocate_snapshot = false;
6736 #endif
6737         return 0;
6738 }
6739
6740 static void free_trace_buffer(struct trace_buffer *buf)
6741 {
6742         if (buf->buffer) {
6743                 ring_buffer_free(buf->buffer);
6744                 buf->buffer = NULL;
6745                 free_percpu(buf->data);
6746                 buf->data = NULL;
6747         }
6748 }
6749
6750 static void free_trace_buffers(struct trace_array *tr)
6751 {
6752         if (!tr)
6753                 return;
6754
6755         free_trace_buffer(&tr->trace_buffer);
6756
6757 #ifdef CONFIG_TRACER_MAX_TRACE
6758         free_trace_buffer(&tr->max_buffer);
6759 #endif
6760 }
6761
6762 static void init_trace_flags_index(struct trace_array *tr)
6763 {
6764         int i;
6765
6766         /* Used by the trace options files */
6767         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
6768                 tr->trace_flags_index[i] = i;
6769 }
6770
6771 static void __update_tracer_options(struct trace_array *tr)
6772 {
6773         struct tracer *t;
6774
6775         for (t = trace_types; t; t = t->next)
6776                 add_tracer_options(tr, t);
6777 }
6778
6779 static void update_tracer_options(struct trace_array *tr)
6780 {
6781         mutex_lock(&trace_types_lock);
6782         __update_tracer_options(tr);
6783         mutex_unlock(&trace_types_lock);
6784 }
6785
6786 static int instance_mkdir(const char *name)
6787 {
6788         struct trace_array *tr;
6789         int ret;
6790
6791         mutex_lock(&trace_types_lock);
6792
6793         ret = -EEXIST;
6794         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6795                 if (tr->name && strcmp(tr->name, name) == 0)
6796                         goto out_unlock;
6797         }
6798
6799         ret = -ENOMEM;
6800         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
6801         if (!tr)
6802                 goto out_unlock;
6803
6804         tr->name = kstrdup(name, GFP_KERNEL);
6805         if (!tr->name)
6806                 goto out_free_tr;
6807
6808         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
6809                 goto out_free_tr;
6810
6811         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
6812
6813         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
6814
6815         raw_spin_lock_init(&tr->start_lock);
6816
6817         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
6818
6819         tr->current_trace = &nop_trace;
6820
6821         INIT_LIST_HEAD(&tr->systems);
6822         INIT_LIST_HEAD(&tr->events);
6823
6824         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
6825                 goto out_free_tr;
6826
6827         tr->dir = tracefs_create_dir(name, trace_instance_dir);
6828         if (!tr->dir)
6829                 goto out_free_tr;
6830
6831         ret = event_trace_add_tracer(tr->dir, tr);
6832         if (ret) {
6833                 tracefs_remove_recursive(tr->dir);
6834                 goto out_free_tr;
6835         }
6836
6837         init_tracer_tracefs(tr, tr->dir);
6838         init_trace_flags_index(tr);
6839         __update_tracer_options(tr);
6840
6841         list_add(&tr->list, &ftrace_trace_arrays);
6842
6843         mutex_unlock(&trace_types_lock);
6844
6845         return 0;
6846
6847  out_free_tr:
6848         free_trace_buffers(tr);
6849         free_cpumask_var(tr->tracing_cpumask);
6850         kfree(tr->name);
6851         kfree(tr);
6852
6853  out_unlock:
6854         mutex_unlock(&trace_types_lock);
6855
6856         return ret;
6857
6858 }
6859
6860 static int instance_rmdir(const char *name)
6861 {
6862         struct trace_array *tr;
6863         int found = 0;
6864         int ret;
6865         int i;
6866
6867         mutex_lock(&trace_types_lock);
6868
6869         ret = -ENODEV;
6870         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6871                 if (tr->name && strcmp(tr->name, name) == 0) {
6872                         found = 1;
6873                         break;
6874                 }
6875         }
6876         if (!found)
6877                 goto out_unlock;
6878
6879         ret = -EBUSY;
6880         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
6881                 goto out_unlock;
6882
6883         list_del(&tr->list);
6884
6885         /* Disable all the flags that were enabled coming in */
6886         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
6887                 if ((1 << i) & ZEROED_TRACE_FLAGS)
6888                         set_tracer_flag(tr, 1 << i, 0);
6889         }
6890
6891         tracing_set_nop(tr);
6892         event_trace_del_tracer(tr);
6893         ftrace_destroy_function_files(tr);
6894         tracefs_remove_recursive(tr->dir);
6895         free_trace_buffers(tr);
6896
6897         for (i = 0; i < tr->nr_topts; i++) {
6898                 kfree(tr->topts[i].topts);
6899         }
6900         kfree(tr->topts);
6901
6902         kfree(tr->name);
6903         kfree(tr);
6904
6905         ret = 0;
6906
6907  out_unlock:
6908         mutex_unlock(&trace_types_lock);
6909
6910         return ret;
6911 }
6912
6913 static __init void create_trace_instances(struct dentry *d_tracer)
6914 {
6915         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
6916                                                          instance_mkdir,
6917                                                          instance_rmdir);
6918         if (WARN_ON(!trace_instance_dir))
6919                 return;
6920 }
6921
6922 static void
6923 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
6924 {
6925         int cpu;
6926
6927         trace_create_file("available_tracers", 0444, d_tracer,
6928                         tr, &show_traces_fops);
6929
6930         trace_create_file("current_tracer", 0644, d_tracer,
6931                         tr, &set_tracer_fops);
6932
6933         trace_create_file("tracing_cpumask", 0644, d_tracer,
6934                           tr, &tracing_cpumask_fops);
6935
6936         trace_create_file("trace_options", 0644, d_tracer,
6937                           tr, &tracing_iter_fops);
6938
6939         trace_create_file("trace", 0644, d_tracer,
6940                           tr, &tracing_fops);
6941
6942         trace_create_file("trace_pipe", 0444, d_tracer,
6943                           tr, &tracing_pipe_fops);
6944
6945         trace_create_file("buffer_size_kb", 0644, d_tracer,
6946                           tr, &tracing_entries_fops);
6947
6948         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
6949                           tr, &tracing_total_entries_fops);
6950
6951         trace_create_file("free_buffer", 0200, d_tracer,
6952                           tr, &tracing_free_buffer_fops);
6953
6954         trace_create_file("trace_marker", 0220, d_tracer,
6955                           tr, &tracing_mark_fops);
6956
6957         trace_create_file("trace_clock", 0644, d_tracer, tr,
6958                           &trace_clock_fops);
6959
6960         trace_create_file("tracing_on", 0644, d_tracer,
6961                           tr, &rb_simple_fops);
6962
6963         create_trace_options_dir(tr);
6964
6965 #ifdef CONFIG_TRACER_MAX_TRACE
6966         trace_create_file("tracing_max_latency", 0644, d_tracer,
6967                         &tr->max_latency, &tracing_max_lat_fops);
6968 #endif
6969
6970         if (ftrace_create_function_files(tr, d_tracer))
6971                 WARN(1, "Could not allocate function filter files");
6972
6973 #ifdef CONFIG_TRACER_SNAPSHOT
6974         trace_create_file("snapshot", 0644, d_tracer,
6975                           tr, &snapshot_fops);
6976 #endif
6977
6978         for_each_tracing_cpu(cpu)
6979                 tracing_init_tracefs_percpu(tr, cpu);
6980
6981 }
6982
6983 static struct vfsmount *trace_automount(void *ingore)
6984 {
6985         struct vfsmount *mnt;
6986         struct file_system_type *type;
6987
6988         /*
6989          * To maintain backward compatibility for tools that mount
6990          * debugfs to get to the tracing facility, tracefs is automatically
6991          * mounted to the debugfs/tracing directory.
6992          */
6993         type = get_fs_type("tracefs");
6994         if (!type)
6995                 return NULL;
6996         mnt = vfs_kern_mount(type, 0, "tracefs", NULL);
6997         put_filesystem(type);
6998         if (IS_ERR(mnt))
6999                 return NULL;
7000         mntget(mnt);
7001
7002         return mnt;
7003 }
7004
7005 /**
7006  * tracing_init_dentry - initialize top level trace array
7007  *
7008  * This is called when creating files or directories in the tracing
7009  * directory. It is called via fs_initcall() by any of the boot up code
7010  * and expects to return the dentry of the top level tracing directory.
7011  */
7012 struct dentry *tracing_init_dentry(void)
7013 {
7014         struct trace_array *tr = &global_trace;
7015
7016         /* The top level trace array uses  NULL as parent */
7017         if (tr->dir)
7018                 return NULL;
7019
7020         if (WARN_ON(!tracefs_initialized()) ||
7021                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
7022                  WARN_ON(!debugfs_initialized())))
7023                 return ERR_PTR(-ENODEV);
7024
7025         /*
7026          * As there may still be users that expect the tracing
7027          * files to exist in debugfs/tracing, we must automount
7028          * the tracefs file system there, so older tools still
7029          * work with the newer kerenl.
7030          */
7031         tr->dir = debugfs_create_automount("tracing", NULL,
7032                                            trace_automount, NULL);
7033         if (!tr->dir) {
7034                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
7035                 return ERR_PTR(-ENOMEM);
7036         }
7037
7038         return NULL;
7039 }
7040
7041 extern struct trace_enum_map *__start_ftrace_enum_maps[];
7042 extern struct trace_enum_map *__stop_ftrace_enum_maps[];
7043
7044 static void __init trace_enum_init(void)
7045 {
7046         int len;
7047
7048         len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps;
7049         trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len);
7050 }
7051
7052 #ifdef CONFIG_MODULES
7053 static void trace_module_add_enums(struct module *mod)
7054 {
7055         if (!mod->num_trace_enums)
7056                 return;
7057
7058         /*
7059          * Modules with bad taint do not have events created, do
7060          * not bother with enums either.
7061          */
7062         if (trace_module_has_bad_taint(mod))
7063                 return;
7064
7065         trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums);
7066 }
7067
7068 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
7069 static void trace_module_remove_enums(struct module *mod)
7070 {
7071         union trace_enum_map_item *map;
7072         union trace_enum_map_item **last = &trace_enum_maps;
7073
7074         if (!mod->num_trace_enums)
7075                 return;
7076
7077         mutex_lock(&trace_enum_mutex);
7078
7079         map = trace_enum_maps;
7080
7081         while (map) {
7082                 if (map->head.mod == mod)
7083                         break;
7084                 map = trace_enum_jmp_to_tail(map);
7085                 last = &map->tail.next;
7086                 map = map->tail.next;
7087         }
7088         if (!map)
7089                 goto out;
7090
7091         *last = trace_enum_jmp_to_tail(map)->tail.next;
7092         kfree(map);
7093  out:
7094         mutex_unlock(&trace_enum_mutex);
7095 }
7096 #else
7097 static inline void trace_module_remove_enums(struct module *mod) { }
7098 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
7099
7100 static int trace_module_notify(struct notifier_block *self,
7101                                unsigned long val, void *data)
7102 {
7103         struct module *mod = data;
7104
7105         switch (val) {
7106         case MODULE_STATE_COMING:
7107                 trace_module_add_enums(mod);
7108                 break;
7109         case MODULE_STATE_GOING:
7110                 trace_module_remove_enums(mod);
7111                 break;
7112         }
7113
7114         return 0;
7115 }
7116
7117 static struct notifier_block trace_module_nb = {
7118         .notifier_call = trace_module_notify,
7119         .priority = 0,
7120 };
7121 #endif /* CONFIG_MODULES */
7122
7123 static __init int tracer_init_tracefs(void)
7124 {
7125         struct dentry *d_tracer;
7126
7127         trace_access_lock_init();
7128
7129         d_tracer = tracing_init_dentry();
7130         if (IS_ERR(d_tracer))
7131                 return 0;
7132
7133         init_tracer_tracefs(&global_trace, d_tracer);
7134
7135         trace_create_file("tracing_thresh", 0644, d_tracer,
7136                         &global_trace, &tracing_thresh_fops);
7137
7138         trace_create_file("README", 0444, d_tracer,
7139                         NULL, &tracing_readme_fops);
7140
7141         trace_create_file("saved_cmdlines", 0444, d_tracer,
7142                         NULL, &tracing_saved_cmdlines_fops);
7143
7144         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
7145                           NULL, &tracing_saved_cmdlines_size_fops);
7146
7147         trace_enum_init();
7148
7149         trace_create_enum_file(d_tracer);
7150
7151 #ifdef CONFIG_MODULES
7152         register_module_notifier(&trace_module_nb);
7153 #endif
7154
7155 #ifdef CONFIG_DYNAMIC_FTRACE
7156         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
7157                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
7158 #endif
7159
7160         create_trace_instances(d_tracer);
7161
7162         update_tracer_options(&global_trace);
7163
7164         return 0;
7165 }
7166
7167 static int trace_panic_handler(struct notifier_block *this,
7168                                unsigned long event, void *unused)
7169 {
7170         if (ftrace_dump_on_oops)
7171                 ftrace_dump(ftrace_dump_on_oops);
7172         return NOTIFY_OK;
7173 }
7174
7175 static struct notifier_block trace_panic_notifier = {
7176         .notifier_call  = trace_panic_handler,
7177         .next           = NULL,
7178         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
7179 };
7180
7181 static int trace_die_handler(struct notifier_block *self,
7182                              unsigned long val,
7183                              void *data)
7184 {
7185         switch (val) {
7186         case DIE_OOPS:
7187                 if (ftrace_dump_on_oops)
7188                         ftrace_dump(ftrace_dump_on_oops);
7189                 break;
7190         default:
7191                 break;
7192         }
7193         return NOTIFY_OK;
7194 }
7195
7196 static struct notifier_block trace_die_notifier = {
7197         .notifier_call = trace_die_handler,
7198         .priority = 200
7199 };
7200
7201 /*
7202  * printk is set to max of 1024, we really don't need it that big.
7203  * Nothing should be printing 1000 characters anyway.
7204  */
7205 #define TRACE_MAX_PRINT         1000
7206
7207 /*
7208  * Define here KERN_TRACE so that we have one place to modify
7209  * it if we decide to change what log level the ftrace dump
7210  * should be at.
7211  */
7212 #define KERN_TRACE              KERN_EMERG
7213
7214 void
7215 trace_printk_seq(struct trace_seq *s)
7216 {
7217         /* Probably should print a warning here. */
7218         if (s->seq.len >= TRACE_MAX_PRINT)
7219                 s->seq.len = TRACE_MAX_PRINT;
7220
7221         /*
7222          * More paranoid code. Although the buffer size is set to
7223          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
7224          * an extra layer of protection.
7225          */
7226         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
7227                 s->seq.len = s->seq.size - 1;
7228
7229         /* should be zero ended, but we are paranoid. */
7230         s->buffer[s->seq.len] = 0;
7231
7232         printk(KERN_TRACE "%s", s->buffer);
7233
7234         trace_seq_init(s);
7235 }
7236
7237 void trace_init_global_iter(struct trace_iterator *iter)
7238 {
7239         iter->tr = &global_trace;
7240         iter->trace = iter->tr->current_trace;
7241         iter->cpu_file = RING_BUFFER_ALL_CPUS;
7242         iter->trace_buffer = &global_trace.trace_buffer;
7243
7244         if (iter->trace && iter->trace->open)
7245                 iter->trace->open(iter);
7246
7247         /* Annotate start of buffers if we had overruns */
7248         if (ring_buffer_overruns(iter->trace_buffer->buffer))
7249                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
7250
7251         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
7252         if (trace_clocks[iter->tr->clock_id].in_ns)
7253                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
7254 }
7255
7256 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
7257 {
7258         /* use static because iter can be a bit big for the stack */
7259         static struct trace_iterator iter;
7260         static atomic_t dump_running;
7261         struct trace_array *tr = &global_trace;
7262         unsigned int old_userobj;
7263         unsigned long flags;
7264         int cnt = 0, cpu;
7265
7266         /* Only allow one dump user at a time. */
7267         if (atomic_inc_return(&dump_running) != 1) {
7268                 atomic_dec(&dump_running);
7269                 return;
7270         }
7271
7272         /*
7273          * Always turn off tracing when we dump.
7274          * We don't need to show trace output of what happens
7275          * between multiple crashes.
7276          *
7277          * If the user does a sysrq-z, then they can re-enable
7278          * tracing with echo 1 > tracing_on.
7279          */
7280         tracing_off();
7281
7282         local_irq_save(flags);
7283
7284         /* Simulate the iterator */
7285         trace_init_global_iter(&iter);
7286
7287         for_each_tracing_cpu(cpu) {
7288                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7289         }
7290
7291         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
7292
7293         /* don't look at user memory in panic mode */
7294         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
7295
7296         switch (oops_dump_mode) {
7297         case DUMP_ALL:
7298                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7299                 break;
7300         case DUMP_ORIG:
7301                 iter.cpu_file = raw_smp_processor_id();
7302                 break;
7303         case DUMP_NONE:
7304                 goto out_enable;
7305         default:
7306                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
7307                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7308         }
7309
7310         printk(KERN_TRACE "Dumping ftrace buffer:\n");
7311
7312         /* Did function tracer already get disabled? */
7313         if (ftrace_is_dead()) {
7314                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
7315                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
7316         }
7317
7318         /*
7319          * We need to stop all tracing on all CPUS to read the
7320          * the next buffer. This is a bit expensive, but is
7321          * not done often. We fill all what we can read,
7322          * and then release the locks again.
7323          */
7324
7325         while (!trace_empty(&iter)) {
7326
7327                 if (!cnt)
7328                         printk(KERN_TRACE "---------------------------------\n");
7329
7330                 cnt++;
7331
7332                 /* reset all but tr, trace, and overruns */
7333                 memset(&iter.seq, 0,
7334                        sizeof(struct trace_iterator) -
7335                        offsetof(struct trace_iterator, seq));
7336                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
7337                 iter.pos = -1;
7338
7339                 if (trace_find_next_entry_inc(&iter) != NULL) {
7340                         int ret;
7341
7342                         ret = print_trace_line(&iter);
7343                         if (ret != TRACE_TYPE_NO_CONSUME)
7344                                 trace_consume(&iter);
7345                 }
7346                 touch_nmi_watchdog();
7347
7348                 trace_printk_seq(&iter.seq);
7349         }
7350
7351         if (!cnt)
7352                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
7353         else
7354                 printk(KERN_TRACE "---------------------------------\n");
7355
7356  out_enable:
7357         tr->trace_flags |= old_userobj;
7358
7359         for_each_tracing_cpu(cpu) {
7360                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7361         }
7362         atomic_dec(&dump_running);
7363         local_irq_restore(flags);
7364 }
7365 EXPORT_SYMBOL_GPL(ftrace_dump);
7366
7367 __init static int tracer_alloc_buffers(void)
7368 {
7369         int ring_buf_size;
7370         int ret = -ENOMEM;
7371
7372         /*
7373          * Make sure we don't accidently add more trace options
7374          * than we have bits for.
7375          */
7376         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
7377
7378         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
7379                 goto out;
7380
7381         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
7382                 goto out_free_buffer_mask;
7383
7384         /* Only allocate trace_printk buffers if a trace_printk exists */
7385         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
7386                 /* Must be called before global_trace.buffer is allocated */
7387                 trace_printk_init_buffers();
7388
7389         /* To save memory, keep the ring buffer size to its minimum */
7390         if (ring_buffer_expanded)
7391                 ring_buf_size = trace_buf_size;
7392         else
7393                 ring_buf_size = 1;
7394
7395         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
7396         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
7397
7398         raw_spin_lock_init(&global_trace.start_lock);
7399
7400         /* Used for event triggers */
7401         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
7402         if (!temp_buffer)
7403                 goto out_free_cpumask;
7404
7405         if (trace_create_savedcmd() < 0)
7406                 goto out_free_temp_buffer;
7407
7408         /* TODO: make the number of buffers hot pluggable with CPUS */
7409         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
7410                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
7411                 WARN_ON(1);
7412                 goto out_free_savedcmd;
7413         }
7414
7415         if (global_trace.buffer_disabled)
7416                 tracing_off();
7417
7418         if (trace_boot_clock) {
7419                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
7420                 if (ret < 0)
7421                         pr_warn("Trace clock %s not defined, going back to default\n",
7422                                 trace_boot_clock);
7423         }
7424
7425         /*
7426          * register_tracer() might reference current_trace, so it
7427          * needs to be set before we register anything. This is
7428          * just a bootstrap of current_trace anyway.
7429          */
7430         global_trace.current_trace = &nop_trace;
7431
7432         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7433
7434         ftrace_init_global_array_ops(&global_trace);
7435
7436         init_trace_flags_index(&global_trace);
7437
7438         register_tracer(&nop_trace);
7439
7440         /* All seems OK, enable tracing */
7441         tracing_disabled = 0;
7442
7443         atomic_notifier_chain_register(&panic_notifier_list,
7444                                        &trace_panic_notifier);
7445
7446         register_die_notifier(&trace_die_notifier);
7447
7448         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
7449
7450         INIT_LIST_HEAD(&global_trace.systems);
7451         INIT_LIST_HEAD(&global_trace.events);
7452         list_add(&global_trace.list, &ftrace_trace_arrays);
7453
7454         apply_trace_boot_options();
7455
7456         register_snapshot_cmd();
7457
7458         return 0;
7459
7460 out_free_savedcmd:
7461         free_saved_cmdlines_buffer(savedcmd);
7462 out_free_temp_buffer:
7463         ring_buffer_free(temp_buffer);
7464 out_free_cpumask:
7465         free_cpumask_var(global_trace.tracing_cpumask);
7466 out_free_buffer_mask:
7467         free_cpumask_var(tracing_buffer_mask);
7468 out:
7469         return ret;
7470 }
7471
7472 void __init trace_init(void)
7473 {
7474         if (tracepoint_printk) {
7475                 tracepoint_print_iter =
7476                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
7477                 if (WARN_ON(!tracepoint_print_iter))
7478                         tracepoint_printk = 0;
7479         }
7480         tracer_alloc_buffers();
7481         trace_event_init();
7482 }
7483
7484 __init static int clear_boot_tracer(void)
7485 {
7486         /*
7487          * The default tracer at boot buffer is an init section.
7488          * This function is called in lateinit. If we did not
7489          * find the boot tracer, then clear it out, to prevent
7490          * later registration from accessing the buffer that is
7491          * about to be freed.
7492          */
7493         if (!default_bootup_tracer)
7494                 return 0;
7495
7496         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
7497                default_bootup_tracer);
7498         default_bootup_tracer = NULL;
7499
7500         return 0;
7501 }
7502
7503 fs_initcall(tracer_init_tracefs);
7504 late_initcall(clear_boot_tracer);