lib: add a hardware performance counter access library
authorAndy Zhou <azhou@nicira.com>
Sat, 21 Mar 2015 07:00:48 +0000 (00:00 -0700)
committerAndy Zhou <azhou@nicira.com>
Tue, 14 Apr 2015 00:23:09 +0000 (17:23 -0700)
First cut of adding a performance library that provides access to
hardware counters. Please see comments in perf-counter.h for
more details.

Signed-off-by: Andy Zhou <azhou@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
lib/automake.mk
lib/perf-counter.c [new file with mode: 0644]
lib/perf-counter.h [new file with mode: 0644]

index 3629079..7a34c1a 100644 (file)
@@ -185,6 +185,8 @@ lib_libopenvswitch_la_SOURCES = \
        lib/packets.h \
        lib/pcap-file.c \
        lib/pcap-file.h \
+       lib/perf-counter.h \
+       lib/perf-counter.c \
        lib/poll-loop.c \
        lib/poll-loop.h \
        lib/process.c \
diff --git a/lib/perf-counter.c b/lib/perf-counter.c
new file mode 100644 (file)
index 0000000..e2eaefc
--- /dev/null
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) 2015 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* This implementation only applies to the Linux platform.  */
+#ifdef __linux__
+
+#include <stddef.h>
+#include <sys/types.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <linux/perf_event.h>
+#include <asm/unistd.h>
+#include <config.h>
+#include "dynamic-string.h"
+#include "openvswitch/vlog.h"
+#include "perf-counter.h"
+#include "shash.h"
+#include "util.h"
+
+VLOG_DEFINE_THIS_MODULE(perf_counter);
+
+static struct shash perf_counters;
+static int fd__ = 0;
+
+uint64_t
+perf_counter_read(uint64_t *counter)
+{
+    if (fd__ > 0) {
+        read(fd__, counter, sizeof(*counter));
+    } else {
+        *counter = 0;
+    }
+
+    return *counter;
+}
+
+static long
+perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
+                int cpu, int group_fd, unsigned long flags)
+{
+    int ret;
+
+    ret = syscall(__NR_perf_event_open, hw_event, pid, cpu,
+                  group_fd, flags);
+    return ret;
+}
+
+/* Set up perf event counters to read user space instruction counters
+ * only for this process, on all cpus.   */
+static void
+perf_event_setup(void)
+{
+    struct perf_event_attr pe;
+
+    memset(&pe, 0, sizeof(struct perf_event_attr));
+    pe.type = PERF_TYPE_HARDWARE;
+    pe.size = sizeof(struct perf_event_attr);
+    pe.config = PERF_COUNT_HW_INSTRUCTIONS;
+    pe.disabled = 1;
+    pe.exclude_kernel = 1;
+    pe.exclude_hv = 1;
+
+    fd__ = perf_event_open(&pe, 0, -1, -1, 0);
+    if (fd__ == -1) {
+        VLOG_ERR("Peformance counter is not available on this platform.\n");
+    } else {
+        ioctl(fd__, PERF_EVENT_IOC_RESET, 0);
+        ioctl(fd__, PERF_EVENT_IOC_ENABLE, 0);
+    }
+}
+
+static void
+perf_counter_init(struct perf_counter *counter)
+{
+    counter->once = true;
+    shash_add_assert(&perf_counters, counter->name, counter);
+}
+
+void
+perf_counter_accumulate(struct perf_counter *counter, uint64_t start_count)
+{
+    uint64_t end_count;
+
+    if (!counter->once) {
+        perf_counter_init(counter);
+    }
+
+    counter->n_events++;
+    perf_counter_read(&end_count);
+    counter->total_count += end_count - start_count;
+}
+
+static void
+perf_counter_to_ds(struct ds *ds, struct perf_counter *pfc)
+{
+    double ratio;
+
+    if (pfc->n_events) {
+        ratio = (double)pfc->total_count / (double)pfc->n_events;
+    } else {
+        ratio = 0.0;
+    }
+
+    ds_put_format(ds, "%-40s%12lu%12lu%12.1f\n", pfc->name, pfc->n_events,
+                  pfc->total_count, ratio);
+}
+
+static void
+perf_counters_to_ds(struct ds *ds)
+{
+    const char *err_str;
+    const struct shash_node **sorted;
+    int i;
+
+    err_str = NULL;
+    if (fd__ == -1) {
+        err_str = "performance counter is not supported on this platfrom";
+    } else if (!shash_count(&perf_counters)) {
+        err_str = "performance counter has never been hit";
+    }
+
+    if (err_str) {
+        ds_put_format(ds, "%s\n", err_str);
+        return;
+    }
+
+    /* Display counters in alphabetical order.  */
+    sorted = shash_sort(&perf_counters);
+    for (i = 0; i < shash_count(&perf_counters); i++) {
+        perf_counter_to_ds(ds, sorted[i]->data);
+    }
+    free(sorted);
+}
+
+/*
+ * Caller is responsible for free memory.
+ */
+char *
+perf_counters_to_string()
+{
+    struct ds ds;
+
+    ds_init(&ds);
+    perf_counters_to_ds(&ds);
+    return ds_steal_cstr(&ds);
+}
+
+void
+perf_counters_init(void)
+{
+    shash_init(&perf_counters);
+    perf_event_setup();
+}
+
+void
+perf_counters_clear(void)
+{
+    struct shash_node *node;
+
+    SHASH_FOR_EACH (node, &perf_counters) {
+        struct perf_counter *perf = node->data;
+
+        perf->n_events = 0;
+        perf->total_count = 0;
+    }
+}
+
+void
+perf_counters_destroy()
+{
+    struct shash_node *node, *next;
+
+    if (fd__ != -1) {
+        ioctl(fd__, PERF_EVENT_IOC_DISABLE, 0);
+        close(fd__);
+    }
+
+    SHASH_FOR_EACH_SAFE (node, next, &perf_counters) {
+        shash_delete(&perf_counters, node);
+    }
+
+    shash_destroy(&perf_counters);
+}
+#endif
diff --git a/lib/perf-counter.h b/lib/perf-counter.h
new file mode 100644 (file)
index 0000000..b5b72e5
--- /dev/null
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2015 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __PERF_COUNTER_H
+#define __PERF_COUNTER_H 1
+
+/* Motivation
+ * ==========
+ *
+ * It is sometimes desirable to gain performance insights of a program
+ * by using hardware counters.  Recent Linux kernels started to support
+ * a set of portable API for configuring and access those counter across
+ * multiple platforms.
+ *
+ * APIs provided by perf-counter.h provides a set of APIs that are
+ * semi-integrated into OVS user spaces. The infrastructure that initializes,
+ * cleanup, display and clear them at run time is provided. However the
+ * sample points are not. A programmer needs insert sample points when needed.
+ *
+ * Since there is no pre configured sample points, there is no run time
+ * over head for the released product.
+ *
+ * Limitations
+ * ===========
+ * - Hard coded to sample CPU cycle count in user space only.
+ * - Only one counter is sampled.
+ * - Useful macros are only provided for function profiling.
+ * - show and clear command applies to all counters, there is no way
+ *   to select a sub-set of counter.
+ *
+ * Those are not fundamental limits, but only limited by current
+ * implementation.
+ *
+ * Function instruction counter sample point Usage
+ * ================================================
+ *
+ * There are two macros provided:
+ *
+ * Macro 'PERF_FUNCTON_COUNT_BEGIN' needs to be inserted towards the
+ * beginning of the function where local variables are declared.
+ *
+ * Macro 'PERF_FUNCTON_COUNT_END' needs to appear in the same function,
+ * some where below 'PERF_FUNCTION_COUNT_BEGIN', usually towards of
+ * a function.
+ *
+ * For example:
+ *
+ *    void my_func() {
+ *      int some_local_variable;
+ *
+ *      PERF_FUNCTION_COUNT_BEGIN;
+ *
+ *      < implementation >
+ *
+ *      PERF_FUNCTION_COUNT_END
+ *    }
+ *
+ * This will maintain the number of times 'my_func()' is called, total
+ * number of instructions '<implementation>' executed during all those calls.
+ *
+ * Currently there are two limitation:
+ * 1). At most one pair can appear in the same variable scope.
+ * 2). The Macros use function name as the counter name for display.
+ *     Thus, all functions in one annotation session are required to
+ *     have unique names.
+ *
+ * Note, there is no requirement for those macros to be balanced.
+ * For example:
+ *
+ *    void my_func(int i){
+ *
+ *      PERF_FUNCTION_COUNT_BEGIN;
+ *
+ *      if (i == 300) {
+ *          PERF_FUNCTION_COUNT_END;
+ *          return;
+ *      } else {
+ *           <some code>
+ *      }
+ *    }
+ * will work just fine.
+ */
+
+#ifdef __linux__
+struct perf_counter {
+    const char *name;
+    bool once;
+    uint64_t n_events;
+    uint64_t total_count;
+};
+
+#define PERF_COUNTER_ONCE_INITIALIZER(name)  \
+    {                                        \
+        name,                                \
+        false,                               \
+        0,                                   \
+        0,                                   \
+    }
+
+void perf_counters_init(void);
+void perf_counters_destroy(void);
+void perf_counters_clear(void);
+
+uint64_t perf_counter_read(uint64_t *counter);
+void perf_counter_accumulate(struct perf_counter *counter,
+                             uint64_t start_count);
+char *perf_counters_to_string(void);
+
+/* User access macros. */
+#define PERF_FUNCTION_BEGIN \
+    static struct perf_counter x__ = PERF_COUNTER_ONCE_INITIALIZER(__func__); \
+    uint64_t start_count__ = perf_counter_read(&start_count__);               \
+
+#define PERF_FUNCTION_END \
+    perf_counter_accumulate(&x__, start_count__);
+
+#else
+
+#define PERF_FUNCTON_COUNT_BEGIN
+#define PERF_FUNCTON_COUNT_END
+#endif
+
+#endif