#define SO_INCOMING_CPU 49
+#define SO_ATTACH_BPF 50
+#define SO_DETACH_BPF SO_DETACH_FILTER
+
#endif /* _UAPI_ASM_SOCKET_H */
#define SO_INCOMING_CPU 49
+#define SO_ATTACH_BPF 50
+#define SO_DETACH_BPF SO_DETACH_FILTER
+
#endif /* _UAPI__ASM_AVR32_SOCKET_H */
#define SO_INCOMING_CPU 49
+#define SO_ATTACH_BPF 50
+#define SO_DETACH_BPF SO_DETACH_FILTER
+
#endif /* _ASM_SOCKET_H */
#define SO_INCOMING_CPU 49
+#define SO_ATTACH_BPF 50
+#define SO_DETACH_BPF SO_DETACH_FILTER
+
#endif /* _ASM_SOCKET_H */
#define SO_INCOMING_CPU 49
+#define SO_ATTACH_BPF 50
+#define SO_DETACH_BPF SO_DETACH_FILTER
+
#endif /* _ASM_IA64_SOCKET_H */
#define SO_INCOMING_CPU 49
+#define SO_ATTACH_BPF 50
+#define SO_DETACH_BPF SO_DETACH_FILTER
+
#endif /* _ASM_M32R_SOCKET_H */
#define SO_INCOMING_CPU 49
+#define SO_ATTACH_BPF 50
+#define SO_DETACH_BPF SO_DETACH_FILTER
+
#endif /* _UAPI_ASM_SOCKET_H */
#define SO_INCOMING_CPU 49
+#define SO_ATTACH_BPF 50
+#define SO_DETACH_BPF SO_DETACH_FILTER
+
#endif /* _ASM_SOCKET_H */
#define SO_INCOMING_CPU 0x402A
+#define SO_ATTACH_BPF 0x402B
+#define SO_DETACH_BPF SO_DETACH_FILTER
+
#endif /* _UAPI_ASM_SOCKET_H */
#define SO_INCOMING_CPU 49
+#define SO_ATTACH_BPF 50
+#define SO_DETACH_BPF SO_DETACH_FILTER
+
#endif /* _ASM_POWERPC_SOCKET_H */
#define SO_INCOMING_CPU 49
+#define SO_ATTACH_BPF 50
+#define SO_DETACH_BPF SO_DETACH_FILTER
+
#endif /* _ASM_SOCKET_H */
#define SO_INCOMING_CPU 0x0033
+#define SO_ATTACH_BPF 0x0034
+#define SO_DETACH_BPF SO_DETACH_FILTER
+
/* Security levels - as per NRL IPv6 - don't actually do anything */
#define SO_SECURITY_AUTHENTICATION 0x5001
#define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002
#define SO_INCOMING_CPU 49
+#define SO_ATTACH_BPF 50
+#define SO_DETACH_BPF SO_DETACH_FILTER
+
#endif /* _XTENSA_SOCKET_H */
struct work_struct work;
};
+#ifdef CONFIG_BPF_SYSCALL
void bpf_prog_put(struct bpf_prog *prog);
+#else
+static inline void bpf_prog_put(struct bpf_prog *prog) {}
+#endif
struct bpf_prog *bpf_prog_get(u32 ufd);
/* verify correctness of eBPF program */
int bpf_check(struct bpf_prog *fp, union bpf_attr *attr);
void bpf_prog_destroy(struct bpf_prog *fp);
int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
+int sk_attach_bpf(u32 ufd, struct sock *sk);
int sk_detach_filter(struct sock *sk);
int bpf_check_classic(const struct sock_filter *filter, unsigned int flen);
#define SO_INCOMING_CPU 49
+#define SO_ATTACH_BPF 50
+#define SO_DETACH_BPF SO_DETACH_FILTER
+
#endif /* __ASM_GENERIC_SOCKET_H */
enum bpf_prog_type {
BPF_PROG_TYPE_UNSPEC,
+ BPF_PROG_TYPE_SOCKET_FILTER,
};
/* flags for BPF_MAP_UPDATE_ELEM command */
return 0;
}
+/* verify safety of LD_ABS|LD_IND instructions:
+ * - they can only appear in the programs where ctx == skb
+ * - since they are wrappers of function calls, they scratch R1-R5 registers,
+ * preserve R6-R9, and store return value into R0
+ *
+ * Implicit input:
+ * ctx == skb == R6 == CTX
+ *
+ * Explicit input:
+ * SRC == any register
+ * IMM == 32-bit immediate
+ *
+ * Output:
+ * R0 - 8/16/32-bit skb data converted to cpu endianness
+ */
+static int check_ld_abs(struct verifier_env *env, struct bpf_insn *insn)
+{
+ struct reg_state *regs = env->cur_state.regs;
+ u8 mode = BPF_MODE(insn->code);
+ struct reg_state *reg;
+ int i, err;
+
+ if (env->prog->aux->prog_type != BPF_PROG_TYPE_SOCKET_FILTER) {
+ verbose("BPF_LD_ABS|IND instructions are only allowed in socket filters\n");
+ return -EINVAL;
+ }
+
+ if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
+ (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
+ verbose("BPF_LD_ABS uses reserved fields\n");
+ return -EINVAL;
+ }
+
+ /* check whether implicit source operand (register R6) is readable */
+ err = check_reg_arg(regs, BPF_REG_6, SRC_OP);
+ if (err)
+ return err;
+
+ if (regs[BPF_REG_6].type != PTR_TO_CTX) {
+ verbose("at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
+ return -EINVAL;
+ }
+
+ if (mode == BPF_IND) {
+ /* check explicit source operand */
+ err = check_reg_arg(regs, insn->src_reg, SRC_OP);
+ if (err)
+ return err;
+ }
+
+ /* reset caller saved regs to unreadable */
+ for (i = 0; i < CALLER_SAVED_REGS; i++) {
+ reg = regs + caller_saved[i];
+ reg->type = NOT_INIT;
+ reg->imm = 0;
+ }
+
+ /* mark destination R0 register as readable, since it contains
+ * the value fetched from the packet
+ */
+ regs[BPF_REG_0].type = UNKNOWN_VALUE;
+ return 0;
+}
+
/* non-recursive DFS pseudo code
* 1 procedure DFS-iterative(G,v):
* 2 label v as discovered
u8 mode = BPF_MODE(insn->code);
if (mode == BPF_ABS || mode == BPF_IND) {
- verbose("LD_ABS is not supported yet\n");
- return -EINVAL;
+ err = check_ld_abs(env, insn);
+ if (err)
+ return err;
+
} else if (mode == BPF_IMM) {
err = check_ld_imm(env, insn);
if (err)
#include <linux/ratelimit.h>
#include <linux/seccomp.h>
#include <linux/if_vlan.h>
+#include <linux/bpf.h>
/**
* sk_filter - run a packet through a socket filter
static void __bpf_prog_release(struct bpf_prog *prog)
{
- bpf_release_orig_filter(prog);
- bpf_prog_free(prog);
+ if (prog->aux->prog_type == BPF_PROG_TYPE_SOCKET_FILTER) {
+ bpf_prog_put(prog);
+ } else {
+ bpf_release_orig_filter(prog);
+ bpf_prog_free(prog);
+ }
}
static void __sk_filter_release(struct sk_filter *fp)
}
EXPORT_SYMBOL_GPL(sk_attach_filter);
+#ifdef CONFIG_BPF_SYSCALL
+int sk_attach_bpf(u32 ufd, struct sock *sk)
+{
+ struct sk_filter *fp, *old_fp;
+ struct bpf_prog *prog;
+
+ if (sock_flag(sk, SOCK_FILTER_LOCKED))
+ return -EPERM;
+
+ prog = bpf_prog_get(ufd);
+ if (!prog)
+ return -EINVAL;
+
+ if (prog->aux->prog_type != BPF_PROG_TYPE_SOCKET_FILTER) {
+ /* valid fd, but invalid program type */
+ bpf_prog_put(prog);
+ return -EINVAL;
+ }
+
+ fp = kmalloc(sizeof(*fp), GFP_KERNEL);
+ if (!fp) {
+ bpf_prog_put(prog);
+ return -ENOMEM;
+ }
+ fp->prog = prog;
+
+ atomic_set(&fp->refcnt, 0);
+
+ if (!sk_filter_charge(sk, fp)) {
+ __sk_filter_release(fp);
+ return -ENOMEM;
+ }
+
+ old_fp = rcu_dereference_protected(sk->sk_filter,
+ sock_owned_by_user(sk));
+ rcu_assign_pointer(sk->sk_filter, fp);
+
+ if (old_fp)
+ sk_filter_uncharge(sk, old_fp);
+
+ return 0;
+}
+
+/* allow socket filters to call
+ * bpf_map_lookup_elem(), bpf_map_update_elem(), bpf_map_delete_elem()
+ */
+static const struct bpf_func_proto *sock_filter_func_proto(enum bpf_func_id func_id)
+{
+ switch (func_id) {
+ case BPF_FUNC_map_lookup_elem:
+ return &bpf_map_lookup_elem_proto;
+ case BPF_FUNC_map_update_elem:
+ return &bpf_map_update_elem_proto;
+ case BPF_FUNC_map_delete_elem:
+ return &bpf_map_delete_elem_proto;
+ default:
+ return NULL;
+ }
+}
+
+static bool sock_filter_is_valid_access(int off, int size, enum bpf_access_type type)
+{
+ /* skb fields cannot be accessed yet */
+ return false;
+}
+
+static struct bpf_verifier_ops sock_filter_ops = {
+ .get_func_proto = sock_filter_func_proto,
+ .is_valid_access = sock_filter_is_valid_access,
+};
+
+static struct bpf_prog_type_list tl = {
+ .ops = &sock_filter_ops,
+ .type = BPF_PROG_TYPE_SOCKET_FILTER,
+};
+
+static int __init register_sock_filter_ops(void)
+{
+ bpf_register_prog_type(&tl);
+ return 0;
+}
+late_initcall(register_sock_filter_ops);
+#else
+int sk_attach_bpf(u32 ufd, struct sock *sk)
+{
+ return -EOPNOTSUPP;
+}
+#endif
int sk_detach_filter(struct sock *sk)
{
int ret = -ENOENT;
}
break;
+ case SO_ATTACH_BPF:
+ ret = -EINVAL;
+ if (optlen == sizeof(u32)) {
+ u32 ufd;
+
+ ret = -EFAULT;
+ if (copy_from_user(&ufd, optval, sizeof(ufd)))
+ break;
+
+ ret = sk_attach_bpf(ufd, sk);
+ }
+ break;
+
case SO_DETACH_FILTER:
ret = sk_detach_filter(sk);
break;
# List of programs to build
hostprogs-y := test_verifier test_maps
+hostprogs-y += sock_example
+hostprogs-y += sockex1
+hostprogs-y += sockex2
test_verifier-objs := test_verifier.o libbpf.o
test_maps-objs := test_maps.o libbpf.o
+sock_example-objs := sock_example.o libbpf.o
+sockex1-objs := bpf_load.o libbpf.o sockex1_user.o
+sockex2-objs := bpf_load.o libbpf.o sockex2_user.o
# Tell kbuild to always build the programs
always := $(hostprogs-y)
+always += sockex1_kern.o
+always += sockex2_kern.o
HOSTCFLAGS += -I$(objtree)/usr/include
+
+HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable
+HOSTLOADLIBES_sockex1 += -lelf
+HOSTLOADLIBES_sockex2 += -lelf
+
+# point this to your LLVM backend with bpf support
+LLC=$(srctree)/tools/bpf/llvm/bld/Debug+Asserts/bin/llc
+
+%.o: %.c
+ clang $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \
+ -D__KERNEL__ -Wno-unused-value -Wno-pointer-sign \
+ -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@
--- /dev/null
+#ifndef __BPF_HELPERS_H
+#define __BPF_HELPERS_H
+
+/* helper macro to place programs, maps, license in
+ * different sections in elf_bpf file. Section names
+ * are interpreted by elf_bpf loader
+ */
+#define SEC(NAME) __attribute__((section(NAME), used))
+
+/* helper functions called from eBPF programs written in C */
+static void *(*bpf_map_lookup_elem)(void *map, void *key) =
+ (void *) BPF_FUNC_map_lookup_elem;
+static int (*bpf_map_update_elem)(void *map, void *key, void *value,
+ unsigned long long flags) =
+ (void *) BPF_FUNC_map_update_elem;
+static int (*bpf_map_delete_elem)(void *map, void *key) =
+ (void *) BPF_FUNC_map_delete_elem;
+
+/* llvm builtin functions that eBPF C program may use to
+ * emit BPF_LD_ABS and BPF_LD_IND instructions
+ */
+struct sk_buff;
+unsigned long long load_byte(void *skb,
+ unsigned long long off) asm("llvm.bpf.load.byte");
+unsigned long long load_half(void *skb,
+ unsigned long long off) asm("llvm.bpf.load.half");
+unsigned long long load_word(void *skb,
+ unsigned long long off) asm("llvm.bpf.load.word");
+
+/* a helper structure used by eBPF C program
+ * to describe map attributes to elf_bpf loader
+ */
+struct bpf_map_def {
+ unsigned int type;
+ unsigned int key_size;
+ unsigned int value_size;
+ unsigned int max_entries;
+};
+
+#endif
--- /dev/null
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <libelf.h>
+#include <gelf.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include "libbpf.h"
+#include "bpf_helpers.h"
+#include "bpf_load.h"
+
+static char license[128];
+static bool processed_sec[128];
+int map_fd[MAX_MAPS];
+int prog_fd[MAX_PROGS];
+int prog_cnt;
+
+static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
+{
+ int fd;
+ bool is_socket = strncmp(event, "socket", 6) == 0;
+
+ if (!is_socket)
+ /* tracing events tbd */
+ return -1;
+
+ fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER,
+ prog, size, license);
+
+ if (fd < 0) {
+ printf("bpf_prog_load() err=%d\n%s", errno, bpf_log_buf);
+ return -1;
+ }
+
+ prog_fd[prog_cnt++] = fd;
+
+ return 0;
+}
+
+static int load_maps(struct bpf_map_def *maps, int len)
+{
+ int i;
+
+ for (i = 0; i < len / sizeof(struct bpf_map_def); i++) {
+
+ map_fd[i] = bpf_create_map(maps[i].type,
+ maps[i].key_size,
+ maps[i].value_size,
+ maps[i].max_entries);
+ if (map_fd[i] < 0)
+ return 1;
+ }
+ return 0;
+}
+
+static int get_sec(Elf *elf, int i, GElf_Ehdr *ehdr, char **shname,
+ GElf_Shdr *shdr, Elf_Data **data)
+{
+ Elf_Scn *scn;
+
+ scn = elf_getscn(elf, i);
+ if (!scn)
+ return 1;
+
+ if (gelf_getshdr(scn, shdr) != shdr)
+ return 2;
+
+ *shname = elf_strptr(elf, ehdr->e_shstrndx, shdr->sh_name);
+ if (!*shname || !shdr->sh_size)
+ return 3;
+
+ *data = elf_getdata(scn, 0);
+ if (!*data || elf_getdata(scn, *data) != NULL)
+ return 4;
+
+ return 0;
+}
+
+static int parse_relo_and_apply(Elf_Data *data, Elf_Data *symbols,
+ GElf_Shdr *shdr, struct bpf_insn *insn)
+{
+ int i, nrels;
+
+ nrels = shdr->sh_size / shdr->sh_entsize;
+
+ for (i = 0; i < nrels; i++) {
+ GElf_Sym sym;
+ GElf_Rel rel;
+ unsigned int insn_idx;
+
+ gelf_getrel(data, i, &rel);
+
+ insn_idx = rel.r_offset / sizeof(struct bpf_insn);
+
+ gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym);
+
+ if (insn[insn_idx].code != (BPF_LD | BPF_IMM | BPF_DW)) {
+ printf("invalid relo for insn[%d].code 0x%x\n",
+ insn_idx, insn[insn_idx].code);
+ return 1;
+ }
+ insn[insn_idx].src_reg = BPF_PSEUDO_MAP_FD;
+ insn[insn_idx].imm = map_fd[sym.st_value / sizeof(struct bpf_map_def)];
+ }
+
+ return 0;
+}
+
+int load_bpf_file(char *path)
+{
+ int fd, i;
+ Elf *elf;
+ GElf_Ehdr ehdr;
+ GElf_Shdr shdr, shdr_prog;
+ Elf_Data *data, *data_prog, *symbols = NULL;
+ char *shname, *shname_prog;
+
+ if (elf_version(EV_CURRENT) == EV_NONE)
+ return 1;
+
+ fd = open(path, O_RDONLY, 0);
+ if (fd < 0)
+ return 1;
+
+ elf = elf_begin(fd, ELF_C_READ, NULL);
+
+ if (!elf)
+ return 1;
+
+ if (gelf_getehdr(elf, &ehdr) != &ehdr)
+ return 1;
+
+ /* scan over all elf sections to get license and map info */
+ for (i = 1; i < ehdr.e_shnum; i++) {
+
+ if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
+ continue;
+
+ if (0) /* helpful for llvm debugging */
+ printf("section %d:%s data %p size %zd link %d flags %d\n",
+ i, shname, data->d_buf, data->d_size,
+ shdr.sh_link, (int) shdr.sh_flags);
+
+ if (strcmp(shname, "license") == 0) {
+ processed_sec[i] = true;
+ memcpy(license, data->d_buf, data->d_size);
+ } else if (strcmp(shname, "maps") == 0) {
+ processed_sec[i] = true;
+ if (load_maps(data->d_buf, data->d_size))
+ return 1;
+ } else if (shdr.sh_type == SHT_SYMTAB) {
+ symbols = data;
+ }
+ }
+
+ /* load programs that need map fixup (relocations) */
+ for (i = 1; i < ehdr.e_shnum; i++) {
+
+ if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
+ continue;
+ if (shdr.sh_type == SHT_REL) {
+ struct bpf_insn *insns;
+
+ if (get_sec(elf, shdr.sh_info, &ehdr, &shname_prog,
+ &shdr_prog, &data_prog))
+ continue;
+
+ insns = (struct bpf_insn *) data_prog->d_buf;
+
+ processed_sec[shdr.sh_info] = true;
+ processed_sec[i] = true;
+
+ if (parse_relo_and_apply(data, symbols, &shdr, insns))
+ continue;
+
+ if (memcmp(shname_prog, "events/", 7) == 0 ||
+ memcmp(shname_prog, "socket", 6) == 0)
+ load_and_attach(shname_prog, insns, data_prog->d_size);
+ }
+ }
+
+ /* load programs that don't use maps */
+ for (i = 1; i < ehdr.e_shnum; i++) {
+
+ if (processed_sec[i])
+ continue;
+
+ if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
+ continue;
+
+ if (memcmp(shname, "events/", 7) == 0 ||
+ memcmp(shname, "socket", 6) == 0)
+ load_and_attach(shname, data->d_buf, data->d_size);
+ }
+
+ close(fd);
+ return 0;
+}
--- /dev/null
+#ifndef __BPF_LOAD_H
+#define __BPF_LOAD_H
+
+#define MAX_MAPS 32
+#define MAX_PROGS 32
+
+extern int map_fd[MAX_MAPS];
+extern int prog_fd[MAX_PROGS];
+
+/* parses elf file compiled by llvm .c->.o
+ * . parses 'maps' section and creates maps via BPF syscall
+ * . parses 'license' section and passes it to syscall
+ * . parses elf relocations for BPF maps and adjusts BPF_LD_IMM64 insns by
+ * storing map_fd into insn->imm and marking such insns as BPF_PSEUDO_MAP_FD
+ * . loads eBPF programs via BPF syscall
+ *
+ * One ELF file can contain multiple BPF programs which will be loaded
+ * and their FDs stored stored in prog_fd array
+ *
+ * returns zero on success
+ */
+int load_bpf_file(char *path);
+
+#endif
#include <linux/netlink.h>
#include <linux/bpf.h>
#include <errno.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <linux/if_packet.h>
+#include <arpa/inet.h>
#include "libbpf.h"
static __u64 ptr_to_u64(void *ptr)
return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
}
+
+int open_raw_sock(const char *name)
+{
+ struct sockaddr_ll sll;
+ int sock;
+
+ sock = socket(PF_PACKET, SOCK_RAW | SOCK_NONBLOCK | SOCK_CLOEXEC, htons(ETH_P_ALL));
+ if (sock < 0) {
+ printf("cannot create raw socket\n");
+ return -1;
+ }
+
+ memset(&sll, 0, sizeof(sll));
+ sll.sll_family = AF_PACKET;
+ sll.sll_ifindex = if_nametoindex(name);
+ sll.sll_protocol = htons(ETH_P_ALL);
+ if (bind(sock, (struct sockaddr *)&sll, sizeof(sll)) < 0) {
+ printf("bind to %s: %s\n", name, strerror(errno));
+ close(sock);
+ return -1;
+ }
+
+ return sock;
+}
const struct bpf_insn *insns, int insn_len,
const char *license);
-#define LOG_BUF_SIZE 8192
+#define LOG_BUF_SIZE 65536
extern char bpf_log_buf[LOG_BUF_SIZE];
/* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */
BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD)
+/* Direct packet access, R0 = *(uint *) (skb->data + imm32) */
+
+#define BPF_LD_ABS(SIZE, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS, \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
/* Memory load, dst_reg = *(uint *) (src_reg + off16) */
#define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \
.off = 0, \
.imm = 0 })
+/* create RAW socket and bind to interface 'name' */
+int open_raw_sock(const char *name);
+
#endif
--- /dev/null
+/* eBPF example program:
+ * - creates arraymap in kernel with key 4 bytes and value 8 bytes
+ *
+ * - loads eBPF program:
+ * r0 = skb->data[ETH_HLEN + offsetof(struct iphdr, protocol)];
+ * *(u32*)(fp - 4) = r0;
+ * // assuming packet is IPv4, lookup ip->proto in a map
+ * value = bpf_map_lookup_elem(map_fd, fp - 4);
+ * if (value)
+ * (*(u64*)value) += 1;
+ *
+ * - attaches this program to eth0 raw socket
+ *
+ * - every second user space reads map[tcp], map[udp], map[icmp] to see
+ * how many packets of given protocol were seen on eth0
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <assert.h>
+#include <linux/bpf.h>
+#include <string.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <sys/socket.h>
+#include <arpa/inet.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <stddef.h>
+#include "libbpf.h"
+
+static int test_sock(void)
+{
+ int sock = -1, map_fd, prog_fd, i, key;
+ long long value = 0, tcp_cnt, udp_cnt, icmp_cnt;
+
+ map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(key), sizeof(value),
+ 256);
+ if (map_fd < 0) {
+ printf("failed to create map '%s'\n", strerror(errno));
+ goto cleanup;
+ }
+
+ struct bpf_insn prog[] = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_LD_ABS(BPF_B, ETH_HLEN + offsetof(struct iphdr, protocol) /* R0 = ip->proto */),
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
+ BPF_LD_MAP_FD(BPF_REG_1, map_fd),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_1, 1), /* r1 = 1 */
+ BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
+ BPF_MOV64_IMM(BPF_REG_0, 0), /* r0 = 0 */
+ BPF_EXIT_INSN(),
+ };
+
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, prog, sizeof(prog),
+ "GPL");
+ if (prog_fd < 0) {
+ printf("failed to load prog '%s'\n", strerror(errno));
+ goto cleanup;
+ }
+
+ sock = open_raw_sock("lo");
+
+ if (setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog_fd,
+ sizeof(prog_fd)) < 0) {
+ printf("setsockopt %s\n", strerror(errno));
+ goto cleanup;
+ }
+
+ for (i = 0; i < 10; i++) {
+ key = IPPROTO_TCP;
+ assert(bpf_lookup_elem(map_fd, &key, &tcp_cnt) == 0);
+
+ key = IPPROTO_UDP;
+ assert(bpf_lookup_elem(map_fd, &key, &udp_cnt) == 0);
+
+ key = IPPROTO_ICMP;
+ assert(bpf_lookup_elem(map_fd, &key, &icmp_cnt) == 0);
+
+ printf("TCP %lld UDP %lld ICMP %lld packets\n",
+ tcp_cnt, udp_cnt, icmp_cnt);
+ sleep(1);
+ }
+
+cleanup:
+ /* maps, programs, raw sockets will auto cleanup on process exit */
+ return 0;
+}
+
+int main(void)
+{
+ FILE *f;
+
+ f = popen("ping -c5 localhost", "r");
+ (void)f;
+
+ return test_sock();
+}
--- /dev/null
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/ip.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") my_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(long),
+ .max_entries = 256,
+};
+
+SEC("socket1")
+int bpf_prog1(struct sk_buff *skb)
+{
+ int index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
+ long *value;
+
+ value = bpf_map_lookup_elem(&my_map, &index);
+ if (value)
+ __sync_fetch_and_add(value, 1);
+
+ return 0;
+}
+char _license[] SEC("license") = "GPL";
--- /dev/null
+#include <stdio.h>
+#include <assert.h>
+#include <linux/bpf.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+#include <unistd.h>
+#include <arpa/inet.h>
+
+int main(int ac, char **argv)
+{
+ char filename[256];
+ FILE *f;
+ int i, sock;
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ sock = open_raw_sock("lo");
+
+ assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, prog_fd,
+ sizeof(prog_fd[0])) == 0);
+
+ f = popen("ping -c5 localhost", "r");
+ (void) f;
+
+ for (i = 0; i < 5; i++) {
+ long long tcp_cnt, udp_cnt, icmp_cnt;
+ int key;
+
+ key = IPPROTO_TCP;
+ assert(bpf_lookup_elem(map_fd[0], &key, &tcp_cnt) == 0);
+
+ key = IPPROTO_UDP;
+ assert(bpf_lookup_elem(map_fd[0], &key, &udp_cnt) == 0);
+
+ key = IPPROTO_ICMP;
+ assert(bpf_lookup_elem(map_fd[0], &key, &icmp_cnt) == 0);
+
+ printf("TCP %lld UDP %lld ICMP %lld packets\n",
+ tcp_cnt, udp_cnt, icmp_cnt);
+ sleep(1);
+ }
+
+ return 0;
+}
--- /dev/null
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+#include <uapi/linux/in.h>
+#include <uapi/linux/if.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/ip.h>
+#include <uapi/linux/ipv6.h>
+#include <uapi/linux/if_tunnel.h>
+#define IP_MF 0x2000
+#define IP_OFFSET 0x1FFF
+
+struct vlan_hdr {
+ __be16 h_vlan_TCI;
+ __be16 h_vlan_encapsulated_proto;
+};
+
+struct flow_keys {
+ __be32 src;
+ __be32 dst;
+ union {
+ __be32 ports;
+ __be16 port16[2];
+ };
+ __u16 thoff;
+ __u8 ip_proto;
+};
+
+static inline int proto_ports_offset(__u64 proto)
+{
+ switch (proto) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ case IPPROTO_DCCP:
+ case IPPROTO_ESP:
+ case IPPROTO_SCTP:
+ case IPPROTO_UDPLITE:
+ return 0;
+ case IPPROTO_AH:
+ return 4;
+ default:
+ return 0;
+ }
+}
+
+static inline int ip_is_fragment(struct sk_buff *ctx, __u64 nhoff)
+{
+ return load_half(ctx, nhoff + offsetof(struct iphdr, frag_off))
+ & (IP_MF | IP_OFFSET);
+}
+
+static inline __u32 ipv6_addr_hash(struct sk_buff *ctx, __u64 off)
+{
+ __u64 w0 = load_word(ctx, off);
+ __u64 w1 = load_word(ctx, off + 4);
+ __u64 w2 = load_word(ctx, off + 8);
+ __u64 w3 = load_word(ctx, off + 12);
+
+ return (__u32)(w0 ^ w1 ^ w2 ^ w3);
+}
+
+static inline __u64 parse_ip(struct sk_buff *skb, __u64 nhoff, __u64 *ip_proto,
+ struct flow_keys *flow)
+{
+ __u64 verlen;
+
+ if (unlikely(ip_is_fragment(skb, nhoff)))
+ *ip_proto = 0;
+ else
+ *ip_proto = load_byte(skb, nhoff + offsetof(struct iphdr, protocol));
+
+ if (*ip_proto != IPPROTO_GRE) {
+ flow->src = load_word(skb, nhoff + offsetof(struct iphdr, saddr));
+ flow->dst = load_word(skb, nhoff + offsetof(struct iphdr, daddr));
+ }
+
+ verlen = load_byte(skb, nhoff + 0/*offsetof(struct iphdr, ihl)*/);
+ if (likely(verlen == 0x45))
+ nhoff += 20;
+ else
+ nhoff += (verlen & 0xF) << 2;
+
+ return nhoff;
+}
+
+static inline __u64 parse_ipv6(struct sk_buff *skb, __u64 nhoff, __u64 *ip_proto,
+ struct flow_keys *flow)
+{
+ *ip_proto = load_byte(skb,
+ nhoff + offsetof(struct ipv6hdr, nexthdr));
+ flow->src = ipv6_addr_hash(skb,
+ nhoff + offsetof(struct ipv6hdr, saddr));
+ flow->dst = ipv6_addr_hash(skb,
+ nhoff + offsetof(struct ipv6hdr, daddr));
+ nhoff += sizeof(struct ipv6hdr);
+
+ return nhoff;
+}
+
+static inline bool flow_dissector(struct sk_buff *skb, struct flow_keys *flow)
+{
+ __u64 nhoff = ETH_HLEN;
+ __u64 ip_proto;
+ __u64 proto = load_half(skb, 12);
+ int poff;
+
+ if (proto == ETH_P_8021AD) {
+ proto = load_half(skb, nhoff + offsetof(struct vlan_hdr,
+ h_vlan_encapsulated_proto));
+ nhoff += sizeof(struct vlan_hdr);
+ }
+
+ if (proto == ETH_P_8021Q) {
+ proto = load_half(skb, nhoff + offsetof(struct vlan_hdr,
+ h_vlan_encapsulated_proto));
+ nhoff += sizeof(struct vlan_hdr);
+ }
+
+ if (likely(proto == ETH_P_IP))
+ nhoff = parse_ip(skb, nhoff, &ip_proto, flow);
+ else if (proto == ETH_P_IPV6)
+ nhoff = parse_ipv6(skb, nhoff, &ip_proto, flow);
+ else
+ return false;
+
+ switch (ip_proto) {
+ case IPPROTO_GRE: {
+ struct gre_hdr {
+ __be16 flags;
+ __be16 proto;
+ };
+
+ __u64 gre_flags = load_half(skb,
+ nhoff + offsetof(struct gre_hdr, flags));
+ __u64 gre_proto = load_half(skb,
+ nhoff + offsetof(struct gre_hdr, proto));
+
+ if (gre_flags & (GRE_VERSION|GRE_ROUTING))
+ break;
+
+ proto = gre_proto;
+ nhoff += 4;
+ if (gre_flags & GRE_CSUM)
+ nhoff += 4;
+ if (gre_flags & GRE_KEY)
+ nhoff += 4;
+ if (gre_flags & GRE_SEQ)
+ nhoff += 4;
+
+ if (proto == ETH_P_8021Q) {
+ proto = load_half(skb,
+ nhoff + offsetof(struct vlan_hdr,
+ h_vlan_encapsulated_proto));
+ nhoff += sizeof(struct vlan_hdr);
+ }
+
+ if (proto == ETH_P_IP)
+ nhoff = parse_ip(skb, nhoff, &ip_proto, flow);
+ else if (proto == ETH_P_IPV6)
+ nhoff = parse_ipv6(skb, nhoff, &ip_proto, flow);
+ else
+ return false;
+ break;
+ }
+ case IPPROTO_IPIP:
+ nhoff = parse_ip(skb, nhoff, &ip_proto, flow);
+ break;
+ case IPPROTO_IPV6:
+ nhoff = parse_ipv6(skb, nhoff, &ip_proto, flow);
+ break;
+ default:
+ break;
+ }
+
+ flow->ip_proto = ip_proto;
+ poff = proto_ports_offset(ip_proto);
+ if (poff >= 0) {
+ nhoff += poff;
+ flow->ports = load_word(skb, nhoff);
+ }
+
+ flow->thoff = (__u16) nhoff;
+
+ return true;
+}
+
+struct bpf_map_def SEC("maps") hash_map = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(__be32),
+ .value_size = sizeof(long),
+ .max_entries = 1024,
+};
+
+SEC("socket2")
+int bpf_prog2(struct sk_buff *skb)
+{
+ struct flow_keys flow;
+ long *value;
+ u32 key;
+
+ if (!flow_dissector(skb, &flow))
+ return 0;
+
+ key = flow.dst;
+ value = bpf_map_lookup_elem(&hash_map, &key);
+ if (value) {
+ __sync_fetch_and_add(value, 1);
+ } else {
+ long val = 1;
+
+ bpf_map_update_elem(&hash_map, &key, &val, BPF_ANY);
+ }
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
--- /dev/null
+#include <stdio.h>
+#include <assert.h>
+#include <linux/bpf.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+#include <unistd.h>
+#include <arpa/inet.h>
+
+int main(int ac, char **argv)
+{
+ char filename[256];
+ FILE *f;
+ int i, sock;
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ sock = open_raw_sock("lo");
+
+ assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, prog_fd,
+ sizeof(prog_fd[0])) == 0);
+
+ f = popen("ping -c5 localhost", "r");
+ (void) f;
+
+ for (i = 0; i < 5; i++) {
+ int key = 0, next_key;
+ long long value;
+
+ while (bpf_get_next_key(map_fd[0], &key, &next_key) == 0) {
+ bpf_lookup_elem(map_fd[0], &next_key, &value);
+ printf("ip %s count %lld\n",
+ inet_ntoa((struct in_addr){htonl(next_key)}),
+ value);
+ key = next_key;
+ }
+ sleep(1);
+ }
+ return 0;
+}