cloudflare · bobrik · Jul 18, 2023 · Jul 18, 2023 · Jul 18, 2023 · Jul 18, 2023
diff --git a/benchmark/README.md b/benchmark/README.md
@@ -8,8 +8,9 @@ We're using `getpid()` as a simple syscall to provide a reference. To measure
 the overhead of eBPF probes we measure performance in the following cases:
 
 * No probes attached
-* Simple kprobe incrementing hash map with pid as a key
-* Complex kprobe incrementing hash map with a complex key:
+* Empty probe doing nothing at all
+* Simple probe incrementing hash map with pid as a key
+* Complex probe incrementing hash map with a complex key:
   * PID
   * Random number obtained as `time_in_ns % 1024`
   * Command name up to 32 chars
@@ -18,28 +19,48 @@ You can see exact code of probes in [`probes` directory](probes).
 
 ## Results
 
-The results below are from MacBook Air (M1, 2020) running Linux 6.1-rc1
-in QEMU with ftrace direct call patches applied to enable `fentry`:
-
-* https://patchwork.kernel.org/project/netdevbpf/cover/[email protected]/
-
-We see the following results:
+The results below are from MacBook Air (M1, 2020) running Linux 6.5-rc1
+in QEMU. We see the following results:
 
 ```
-BenchmarkGetpidWithoutAnyProbes/getpid         	10949119	       106.3 ns/op
-BenchmarkGetpidFentryWithSimpleMap/getpid      	 8035327	       149.7 ns/op
-BenchmarkGetpidFentryWithComplexMap/getpid     	 5566742	       214.9 ns/op
-BenchmarkGetpidKprobeWithSimpleMap/getpid      	 4605552	       260.6 ns/op
-BenchmarkGetpidKprobeWithComplexMap/getpid     	 3604656	       330.3 ns/op
+BenchmarkGetpidWithoutAnyProbes/getpid             9954225       117.3 ns/op
+BenchmarkGetpidTracepointWithNoMap/getpid          9098228       132.2 ns/op
+BenchmarkGetpidTracepointWithSimpleMap/getpid      7995439       152.2 ns/op
+BenchmarkGetpidTracepointWithComplexMap/getpid     5655841       212.8 ns/op
+BenchmarkGetpidFentryWithNoMap/getpid              8481037       141.0 ns/op
+BenchmarkGetpidFentryWithSimpleMap/getpid          7582813       159.1 ns/op
+BenchmarkGetpidFentryWithComplexMap/getpid         4579310       220.7 ns/op
+BenchmarkGetpidKprobeWithNoMap/getpid              4725835       253.8 ns/op
+BenchmarkGetpidKprobeWithSimpleMap/getpid          4306387       277.1 ns/op
+BenchmarkGetpidKprobeWithComplexMap/getpid         3460576       346.3 ns/op
 ```
 
-| Case            | ns/op | overhead ns/op | overhead percent |
-|:----------------|------:|---------------:|-----------------:|
-| no probe        |   106 |              0 |               0% |
-| fentry simple   |   150 |             44 |              42% |
-| fentry complex  |   215 |            109 |             103% |
-| kprobe simple   |   261 |            155 |             146% |
-| kprobe complex  |   330 |            224 |             211% |
+Empty probe attached:
+
+| Case               | ns/op | Overhead ns/op | Overhead percent |
+|:-------------------|------:|---------------:|-----------------:|
+| no probe attached  |   117 |              0 |               0% |
+| tracepoint empty   |   132 |             15 |              13% |
+| fentry empty       |   141 |             24 |              21% |
+| kprobe empty       |   254 |            137 |             117% |
+
+Probe with a simple map increment attached:
+
+| Case               | ns/op | Overhead ns/op | Overhead percent |
+|:-------------------|------:|---------------:|-----------------:|
+| no probe attached  |   117 |              0 |               0% |
+| tracepoint simple  |   152 |             35 |              30% |
+| fentry simple      |   159 |             42 |              36% |
+| kprobe simple      |   277 |            160 |             136% |
+
+Probe with a complex map increment attached:
+
+| Case               | ns/op | Overhead ns/op | Overhead percent |
+|:-------------------|------:|---------------:|-----------------:|
+| no probe attached  |   117 |              0 |               0% |
+| tracepoint complex |   213 |             96 |              82% |
+| fentry complex     |   220 |            103 |              88% |
+| kprobe complex     |   346 |            229 |             196% |
 
 Big slowdown in terms of % for complex case may sounds like terrible,
 but you have to remember that we're using a fast `getpid()` syscall.
@@ -49,6 +70,8 @@ what you're going to pay no matter how fast or frequent function you're
 probing is. 200-300ns overhead for the complex case of `getpid` is a lot, but
 for tracing operations like disk access it's nothing compared to baseline.
 
+Notice how tracepoints are faster than fentry and fentry is faster than kprobe.
+
 Keep in mind that these numbers are for a single logical CPU core.
 
 Your mileage may vary depending on your hardware, make sure to test

diff --git a/benchmark/getpid_test.go b/benchmark/getpid_test.go
@@ -10,32 +10,60 @@ import (
 	"github.com/cloudflare/ebpf_exporter/v2/util"
 )
 
+func init() {
+	libbpfgoCallbacks := libbpfgo.Callbacks{}
+	libbpfgoCallbacks.LogFilters = append(libbpfgoCallbacks.LogFilters, func(libLevel int, msg string) bool {
+		return libLevel == libbpfgo.LibbpfDebugLevel
+	})
+
+	libbpfgo.SetLoggerCbs(libbpfgoCallbacks)
+}
+
 func BenchmarkGetpidWithoutAnyProbes(b *testing.B) {
 	b.Run("getpid", func(b *testing.B) {
 		for n := 0; n < b.N; n++ {
 			os.Getpid()
 		}
 	})
+}
+
+func BenchmarkGetpidTracepointWithNoMap(b *testing.B) {
+	benchmarkWithProbe(b, "tracepoint", "probes/tracepoint-empty.bpf.o", false)
+}
 
+func BenchmarkGetpidTracepointWithSimpleMap(b *testing.B) {
+	benchmarkWithProbe(b, "tracepoint", "probes/tracepoint-simple.bpf.o", true)
+}
+
+func BenchmarkGetpidTracepointWithComplexMap(b *testing.B) {
+	benchmarkWithProbe(b, "tracepoint", "probes/tracepoint-complex.bpf.o", true)
+}
+
+func BenchmarkGetpidFentryWithNoMap(b *testing.B) {
+	benchmarkWithProbe(b, "fentry", "probes/fentry-empty.bpf.o", false)
 }
 
 func BenchmarkGetpidFentryWithSimpleMap(b *testing.B) {
-	benchmarkWithProbe(b, "fentry", "probes/fentry-simple.bpf.o")
+	benchmarkWithProbe(b, "fentry", "probes/fentry-simple.bpf.o", true)
 }
 
 func BenchmarkGetpidFentryWithComplexMap(b *testing.B) {
-	benchmarkWithProbe(b, "fentry", "probes/fentry-complex.bpf.o")
+	benchmarkWithProbe(b, "fentry", "probes/fentry-complex.bpf.o", true)
+}
+
+func BenchmarkGetpidKprobeWithNoMap(b *testing.B) {
+	benchmarkWithProbe(b, "kprobe", "probes/kprobe-empty.bpf.o", false)
 }
 
 func BenchmarkGetpidKprobeWithSimpleMap(b *testing.B) {
-	benchmarkWithProbe(b, "kprobe", "probes/kprobe-simple.bpf.o")
+	benchmarkWithProbe(b, "kprobe", "probes/kprobe-simple.bpf.o", true)
 }
 
 func BenchmarkGetpidKprobeWithComplexMap(b *testing.B) {
-	benchmarkWithProbe(b, "kprobe", "probes/kprobe-complex.bpf.o")
+	benchmarkWithProbe(b, "kprobe", "probes/kprobe-complex.bpf.o", true)
 }
 
-func benchmarkWithProbe(b *testing.B, kind string, file string) {
+func benchmarkWithProbe(b *testing.B, kind string, file string, checkMap bool) {
 	byteOrder := util.GetHostByteOrder()
 
 	m, link, err := setupGetpidProbe(kind, file)
@@ -58,6 +86,10 @@ func benchmarkWithProbe(b *testing.B, kind string, file string) {
 		}
 	})
 
+	if !checkMap {
+		return
+	}
+
 	counts, err := m.GetMap("counts")
 	if err != nil {
 		b.Fatalf("Error getting map from bpf: %v", err)

diff --git a/benchmark/probes/Makefile b/benchmark/probes/Makefile
@@ -7,7 +7,7 @@ ARCH := $(shell uname -m | sed -e 's/x86_64/x86/' -e 's/aarch64/arm64/')
 SRC = ${wildcard *.bpf.c}
 OBJ = ${patsubst %.bpf.c, %.bpf.o, $(SRC)}
 
-$(OBJ): %.bpf.o: %.bpf.c ../../include/$(ARCH)/vmlinux.h
+$(OBJ): %.bpf.o: %.bpf.c benchmark.bpf.h ../../include/$(ARCH)/vmlinux.h
 	$(CC) -mcpu=v3 -g -O2 -Wall -Werror -D__TARGET_ARCH_$(ARCH) -I../../include/$(ARCH) -c -target bpf $< -o $@
 
 .PHONY: clean

diff --git a/benchmark/probes/benchmark.bpf.h b/benchmark/probes/benchmark.bpf.h
@@ -0,0 +1,102 @@
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#if defined(__TARGET_ARCH_x86)
+#define FENTRY_SEC() SEC("fentry/__x64_sys_getpid")
+#elif defined(__TARGET_ARCH_arm64)
+#define FENTRY_SEC() SEC("fentry/__arm64_sys_getpid")
+#else
+#error Unknown target for this architecture
+#endif
+
+#if defined(__TARGET_ARCH_x86)
+#define KPROBE_SEC() SEC("kprobe/__x64_sys_getpid")
+#elif defined(__TARGET_ARCH_arm64)
+#define KPROBE_SEC() SEC("kprobe/__arm64_sys_getpid")
+#else
+#error Unknown target for this architecture
+#endif
+
+#define TRACEPOINT_SEC() SEC("tp_btf/sys_enter")
+
+#define BENCHMARK_PROBE(sec, impl)                                                                                     \
+    sec() int probe()                                                                                                  \
+    {                                                                                                                  \
+        return impl();                                                                                                 \
+    }
+
+static u64 zero = 0;
+
+#ifdef BENCHMARK_NO_MAP
+static inline int empty_probe()
+{
+    return 0;
+}
+#endif
+
+#ifdef BENCHMARK_SIMPLE_MAP
+struct {
+    __uint(type, BPF_MAP_TYPE_HASH);
+    __uint(max_entries, 1024);
+    __type(key, u32);
+    __type(value, u64);
+} counts SEC(".maps");
+
+static inline int simple_probe()
+{
+    u32 key = bpf_get_current_pid_tgid();
+    u64 *count;
+
+    count = bpf_map_lookup_elem(&counts, &key);
+    if (!count) {
+        bpf_map_update_elem(&counts, &key, &zero, BPF_NOEXIST);
+        count = bpf_map_lookup_elem(&counts, &key);
+        if (!count) {
+            return 0;
+        }
+    }
+    __sync_fetch_and_add(count, 1);
+
+    return 0;
+}
+#endif
+
+#ifdef BENCHMARK_COMPLEX_MAP
+struct key_t {
+    u64 pid;
+    u64 random;
+    char command[32];
+};
+
+struct {
+    __uint(type, BPF_MAP_TYPE_HASH);
+    __uint(max_entries, 1024);
+    __type(key, struct key_t);
+    __type(value, u64);
+} counts SEC(".maps");
+
+static inline int complex_probe()
+{
+    u64 *count;
+    struct key_t key = {};
+
+    key.pid = bpf_get_current_pid_tgid();
+    key.random = bpf_ktime_get_ns() % 1024;
+    bpf_get_current_comm(&key.command, sizeof(key.command));
+
+    count = bpf_map_lookup_elem(&counts, &key);
+    if (!count) {
+        bpf_map_update_elem(&counts, &key, &zero, BPF_NOEXIST);
+        count = bpf_map_lookup_elem(&counts, &key);
+        if (!count) {
+            return 0;
+        }
+    }
+    __sync_fetch_and_add(count, 1);
+
+    return 0;
+}
+#endif
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/benchmark/probes/fentry-complex.bpf.c b/benchmark/probes/fentry-complex.bpf.c
@@ -1,48 +1,4 @@
-#include "vmlinux.h"
-#include <bpf/bpf_helpers.h>
+#define BENCHMARK_COMPLEX_MAP
+#include "benchmark.bpf.h"
 
-static u64 zero = 0;
-
-struct key_t {
-    u64 pid;
-    u64 random;
-    char command[32];
-};
-
-struct {
-    __uint(type, BPF_MAP_TYPE_HASH);
-    __uint(max_entries, 1024);
-    __type(key, struct key_t);
-    __type(value, u64);
-} counts SEC(".maps");
-
-#if defined(__TARGET_ARCH_x86)
-SEC("fentry/__x64_sys_getpid")
-#elif defined(__TARGET_ARCH_arm64)
-SEC("fentry/__arm64_sys_getpid")
-#else
-#error Unknown target for this architecture
-#endif
-int probe(struct pt_regs *ctx)
-{
-    u64 *count;
-    struct key_t key = {};
-
-    key.pid = bpf_get_current_pid_tgid();
-    key.random = bpf_ktime_get_ns() % 1024;
-    bpf_get_current_comm(&key.command, sizeof(key.command));
-
-    count = bpf_map_lookup_elem(&counts, &key);
-    if (!count) {
-        bpf_map_update_elem(&counts, &key, &zero, BPF_NOEXIST);
-        count = bpf_map_lookup_elem(&counts, &key);
-        if (!count) {
-            return 0;
-        }
-    }
-    __sync_fetch_and_add(count, 1);
-
-    return 0;
-}
-
-char LICENSE[] SEC("license") = "GPL";
+BENCHMARK_PROBE(FENTRY_SEC, complex_probe);
diff --git a/benchmark/probes/fentry-empty.bpf.c b/benchmark/probes/fentry-empty.bpf.c
@@ -0,0 +1,4 @@
+#define BENCHMARK_NO_MAP
+#include "benchmark.bpf.h"
+
+BENCHMARK_PROBE(FENTRY_SEC, empty_probe);
diff --git a/benchmark/probes/fentry-simple.bpf.c b/benchmark/probes/fentry-simple.bpf.c
@@ -1,38 +1,4 @@
-#include "vmlinux.h"
-#include <bpf/bpf_helpers.h>
+#define BENCHMARK_SIMPLE_MAP
+#include "benchmark.bpf.h"
 
-static u64 zero = 0;
-
-struct {
-    __uint(type, BPF_MAP_TYPE_HASH);
-    __uint(max_entries, 1024);
-    __type(key, u32);
-    __type(value, u64);
-} counts SEC(".maps");
-
-#if defined(__TARGET_ARCH_x86)
-SEC("fentry/__x64_sys_getpid")
-#elif defined(__TARGET_ARCH_arm64)
-SEC("fentry/__arm64_sys_getpid")
-#else
-#error Unknown target for this architecture
-#endif
-int probe(struct pt_regs *ctx)
-{
-    u32 key = bpf_get_current_pid_tgid();
-    u64 *count;
-
-    count = bpf_map_lookup_elem(&counts, &key);
-    if (!count) {
-        bpf_map_update_elem(&counts, &key, &zero, BPF_NOEXIST);
-        count = bpf_map_lookup_elem(&counts, &key);
-        if (!count) {
-            return 0;
-        }
-    }
-    __sync_fetch_and_add(count, 1);
-
-    return 0;
-}
-
-char LICENSE[] SEC("license") = "GPL";
+BENCHMARK_PROBE(FENTRY_SEC, simple_probe);