Skip to content

Commit

Permalink
Support BCC perf maps
Browse files Browse the repository at this point in the history
  • Loading branch information
zuzzas committed Sep 1, 2021
1 parent 7c1da66 commit dfc20e3
Show file tree
Hide file tree
Showing 5 changed files with 193 additions and 4 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -800,10 +800,14 @@ See [Counters](#counters) section for more details.
name: <prometheus counter name>
help: <prometheus metric help>
table: <eBPF table name to track>
perf_map: <name for a BPF_PERF_OUTPUT map> # optional
perf_map_flush_duration: <how often should we flush metrics from perf_map: time.Duration> # optional
labels:
[ - label ]
```

An example of `perf_map` can be found [here](examples/oomkill.yaml).

#### `histogram`

See [Histograms](#histograms) section for more details.
Expand Down
11 changes: 7 additions & 4 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package config
import (
"errors"
"fmt"
"time"
)

// Config defines exporter configuration
Expand Down Expand Up @@ -40,10 +41,12 @@ type Metrics struct {

// Counter is a metric defining prometheus counter
type Counter struct {
Name string `yaml:"name"`
Help string `yaml:"help"`
Table string `yaml:"table"`
Labels []Label `yaml:"labels"`
Name string `yaml:"name"`
Help string `yaml:"help"`
Table string `yaml:"table"`
PerfMap string `yaml:"perf_map"`
PerfMapFlushDuration time.Duration `yaml:"perf_map_flush_duration"`
Labels []Label `yaml:"labels"`
}

// Histogram is a metric defining prometheus histogram
Expand Down
48 changes: 48 additions & 0 deletions examples/oomkill.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
programs:
# See:
# * https://github.com/iovisor/bcc/blob/master/tools/oomkill.py
# * https://github.com/iovisor/bcc/blob/master/tools/oomkill_example.txt
- name: oomkill
metrics:
counters:
- name: oom_kills
help: Count global and cgroup level OOMs
perf_map: events
labels:
- name: cgroup_path
size: 8
decoders:
- name: uint
- name: cgroup
- name: global_oom
size: 1
decoders:
- name: uint
kprobes:
oom_kill_process: count_ooms
code: |
#include <uapi/linux/ptrace.h>
#include <linux/oom.h>
#include <linux/memcontrol.h>
// we'll use "BPF_PERF_OUTPUT" map type here to avoid unbound cardinality
BPF_PERF_OUTPUT(events);
struct data_t {
u64 cgroup_id;
u8 global_oom;
};
void count_ooms(struct pt_regs *ctx, struct oom_control *oc, const char *message) {
struct data_t data = {};
struct mem_cgroup *mcg = oc->memcg;
if (!mcg) {
data.global_oom = 1;
events.perf_submit(ctx, &data, sizeof(data));
return;
}
data.cgroup_id = mcg->css.cgroup->kn->id.id;
events.perf_submit(ctx, &data, sizeof(data));
}
14 changes: 14 additions & 0 deletions exporter/exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ const prometheusNamespace = "ebpf_exporter"
type Exporter struct {
config config.Config
modules map[string]*bcc.Module
perfMapCollectors []*PerfMapSink
ksyms map[uint64]string
enabledProgramsDesc *prometheus.Desc
programInfoDesc *prometheus.Desc
Expand Down Expand Up @@ -124,6 +125,11 @@ func (e *Exporter) Describe(ch chan<- *prometheus.Desc) {
}

for _, counter := range program.Metrics.Counters {
if len(counter.PerfMap) != 0 {
perfSink := NewPerfMapSink(e.decoders, e.modules[program.Name], counter)
e.perfMapCollectors = append(e.perfMapCollectors, perfSink)
}

addDescs(program.Name, counter.Name, counter.Help, counter.Labels)
}

Expand All @@ -145,6 +151,10 @@ func (e *Exporter) Collect(ch chan<- prometheus.Metric) {
}
}

for _, perfMapCollector := range e.perfMapCollectors {
perfMapCollector.Collect(ch)
}

e.collectCounters(ch)
e.collectHistograms(ch)
}
Expand All @@ -153,6 +163,10 @@ func (e *Exporter) Collect(ch chan<- prometheus.Metric) {
func (e *Exporter) collectCounters(ch chan<- prometheus.Metric) {
for _, program := range e.config.Programs {
for _, counter := range program.Metrics.Counters {
if len(counter.PerfMap) != 0 {
continue
}

tableValues, err := e.tableValues(e.modules[program.Name], counter.Table, counter.Labels)
if err != nil {
log.Printf("Error getting table %q values for metric %q of program %q: %s", counter.Table, counter.Name, program.Name, err)
Expand Down
120 changes: 120 additions & 0 deletions exporter/perf_map.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
package exporter

import (
"log"
"time"

"github.com/cloudflare/ebpf_exporter/config"
"github.com/cloudflare/ebpf_exporter/decoder"
"github.com/iovisor/gobpf/bcc"
"github.com/prometheus/client_golang/prometheus"
)

type PerfMapSink struct {
counterConfig config.Counter
counterVec *prometheus.CounterVec
dropCounter prometheus.Counter
}

func NewPerfMapSink(decoders *decoder.Set, module *bcc.Module, counterConfig config.Counter) *PerfMapSink {
var (
receiveCh = make(chan []byte)
lostCh = make(chan uint64)
)

sink := &PerfMapSink{
counterConfig: counterConfig,
dropCounter: createDropCounterForPerfMap(counterConfig),
}
sink.resetCounterVec()

table := bcc.NewTable(module.TableId(counterConfig.PerfMap), module)

perfMap, err := bcc.InitPerfMap(table, receiveCh, lostCh)
if err != nil {
log.Fatalf("Can't init PerfMap: %s", err)
}

go func(sink *PerfMapSink, receiveCh <-chan []byte) {
for rawBytes := range receiveCh {
// https://github.com/cilium/ebpf/pull/94#discussion_r425823371
// https://lore.kernel.org/patchwork/patch/1244339/
var validDataSize uint
for _, labelConfig := range sink.counterConfig.Labels {
validDataSize += labelConfig.Size
}

labelValues, err := decoders.DecodeLabels(rawBytes[:validDataSize], sink.counterConfig.Labels)
if err != nil {
if err == decoder.ErrSkipLabelSet {
continue
}

log.Printf("Failed to decode labels: %s", err)
}

sink.counterVec.WithLabelValues(labelValues...).Inc()

}
}(sink, receiveCh)

go func(sink *PerfMapSink, lostCh <-chan uint64) {
for droppedEvents := range lostCh {
sink.dropCounter.Add(float64(droppedEvents))
}
}(sink, lostCh)

go func(sink *PerfMapSink) {
flushDuration := time.Hour
if sink.counterConfig.PerfMapFlushDuration > 0 {
flushDuration = sink.counterConfig.PerfMapFlushDuration
}

ticker := time.NewTicker(flushDuration)

for {
<-ticker.C
sink.resetCounterVec()
}
}(sink)

perfMap.Start()

return sink
}

func (s *PerfMapSink) Collect(ch chan<- prometheus.Metric) {
s.counterVec.Collect(ch)
}

func (s *PerfMapSink) Describe(ch chan<- *prometheus.Desc) {
s.counterVec.Describe(ch)
}

func (s *PerfMapSink) resetCounterVec() {
s.counterVec = createCounterVecForPerfMap(s.counterConfig, labelNamesFromCounterConfig(s.counterConfig))
}

func createCounterVecForPerfMap(counterConfig config.Counter, labelNames []string) *prometheus.CounterVec {
return prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: prometheusNamespace,
Name: counterConfig.Name,
Help: counterConfig.Help,
}, labelNames)
}

func createDropCounterForPerfMap(counterConfig config.Counter) prometheus.Counter {
return prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: "dropped_perf_map_events",
Name: counterConfig.Name,
Help: "Dropped perf map events",
}, []string{}).WithLabelValues()
}

func labelNamesFromCounterConfig(counterConfig config.Counter) (labelNames []string) {
for _, label := range counterConfig.Labels {
labelNames = append(labelNames, label.Name)
}

return
}

0 comments on commit dfc20e3

Please sign in to comment.