perf script: Add stat-cpi.py script
authorJiri Olsa <jolsa@kernel.org>
Wed, 6 Jan 2016 10:49:57 +0000 (11:49 +0100)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Wed, 6 Jan 2016 23:11:16 +0000 (20:11 -0300)
Adding stat-cpi.py as an example of how to do stat scripting.

It computes the CPI metrics from cycles and instructions events.

The CPI is based performance metric showing the Cycles Per Instructions
ratio, which helps to identify cycles-hungry code.

Following stat record/report/script combinations could be used:

- get CPI for given workload

    $ perf stat -e cycles,instructions record ls

    SNIP

     Performance counter stats for 'ls':

             2,904,431      cycles
             3,346,878      instructions              #    1.15  insns per cycle

           0.001782686 seconds time elapsed

    $ perf script -s ./scripts/python/stat-cpi.py
           0.001783: cpu -1, thread -1 -> cpi 0.867803 (2904431/3346878)

    $ perf stat -e cycles,instructions record ls | perf script -s ./scripts/python/stat-cpi.py

    SNIP

           0.001730: cpu -1, thread -1 -> cpi 0.869026 (2928292/3369627)

- get CPI systemwide:

    $ perf stat -e cycles,instructions -a -I 1000 record sleep 3
    #           time             counts unit events
         1.000158618        594,274,711      cycles                     (100.00%)
         1.000158618        441,898,250      instructions
         2.000350973        567,649,705      cycles                     (100.00%)
         2.000350973        432,669,206      instructions
         3.000559210        561,940,430      cycles                     (100.00%)
         3.000559210        420,403,465      instructions
         3.000670798            780,105      cycles                     (100.00%)
         3.000670798            326,516      instructions

    $ perf script -s ./scripts/python/stat-cpi.py
           1.000159: cpu -1, thread -1 -> cpi 1.344823 (594274711/441898250)
           2.000351: cpu -1, thread -1 -> cpi 1.311972 (567649705/432669206)
           3.000559: cpu -1, thread -1 -> cpi 1.336669 (561940430/420403465)
           3.000671: cpu -1, thread -1 -> cpi 2.389178 (780105/326516)

    $ perf stat -e cycles,instructions -a -I 1000 record sleep 3 | perf script -s ./scripts/python/stat-cpi.py
           1.000202: cpu -1, thread -1 -> cpi 1.035091 (940778881/908885530)
           2.000392: cpu -1, thread -1 -> cpi 1.442600 (627493992/434974455)
           3.000545: cpu -1, thread -1 -> cpi 1.353612 (741463930/547766890)
           3.000622: cpu -1, thread -1 -> cpi 2.642110 (784083/296764)

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Tested-by: Kan Liang <kan.liang@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1452077397-31958-4-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/scripts/python/stat-cpi.py [new file with mode: 0644]

diff --git a/tools/perf/scripts/python/stat-cpi.py b/tools/perf/scripts/python/stat-cpi.py
new file mode 100644 (file)
index 0000000..8b60f34
--- /dev/null
@@ -0,0 +1,77 @@
+#!/usr/bin/env python
+
+data    = {}
+times   = []
+threads = []
+cpus    = []
+
+def get_key(time, event, cpu, thread):
+    return "%d-%s-%d-%d" % (time, event, cpu, thread)
+
+def store_key(time, cpu, thread):
+    if (time not in times):
+        times.append(time)
+
+    if (cpu not in cpus):
+        cpus.append(cpu)
+
+    if (thread not in threads):
+        threads.append(thread)
+
+def store(time, event, cpu, thread, val, ena, run):
+    #print "event %s cpu %d, thread %d, time %d, val %d, ena %d, run %d" % \
+    #      (event, cpu, thread, time, val, ena, run)
+
+    store_key(time, cpu, thread)
+    key = get_key(time, event, cpu, thread)
+    data[key] = [ val, ena, run]
+
+def get(time, event, cpu, thread):
+    key = get_key(time, event, cpu, thread)
+    return data[key][0]
+
+def stat__cycles_k(cpu, thread, time, val, ena, run):
+    store(time, "cycles", cpu, thread, val, ena, run);
+
+def stat__instructions_k(cpu, thread, time, val, ena, run):
+    store(time, "instructions", cpu, thread, val, ena, run);
+
+def stat__cycles_u(cpu, thread, time, val, ena, run):
+    store(time, "cycles", cpu, thread, val, ena, run);
+
+def stat__instructions_u(cpu, thread, time, val, ena, run):
+    store(time, "instructions", cpu, thread, val, ena, run);
+
+def stat__cycles(cpu, thread, time, val, ena, run):
+    store(time, "cycles", cpu, thread, val, ena, run);
+
+def stat__instructions(cpu, thread, time, val, ena, run):
+    store(time, "instructions", cpu, thread, val, ena, run);
+
+def stat__interval(time):
+    for cpu in cpus:
+        for thread in threads:
+            cyc = get(time, "cycles", cpu, thread)
+            ins = get(time, "instructions", cpu, thread)
+            cpi = 0
+
+            if ins != 0:
+                cpi = cyc/float(ins)
+
+            print "%15f: cpu %d, thread %d -> cpi %f (%d/%d)" % (time/(float(1000000000)), cpu, thread, cpi, cyc, ins)
+
+def trace_end():
+    pass
+# XXX trace_end callback could be used as an alternative place
+#     to compute same values as in the script above:
+#
+#    for time in times:
+#        for cpu in cpus:
+#            for thread in threads:
+#                cyc = get(time, "cycles", cpu, thread)
+#                ins = get(time, "instructions", cpu, thread)
+#
+#                if ins != 0:
+#                    cpi = cyc/float(ins)
+#
+#                print "time %.9f, cpu %d, thread %d -> cpi %f" % (time/(float(1000000000)), cpu, thread, cpi)