From d2d9709400aaada3a235db07788055ab88ea1cca Mon Sep 17 00:00:00 2001 From: Andrey Klimentyev Date: Wed, 1 Sep 2021 13:11:52 +0300 Subject: [PATCH] Support BCC perf maps --- README.md | 2 + config/config.go | 11 ++-- examples/oomkill.yaml | 48 +++++++++++++++++ exporter/exporter.go | 14 +++++ exporter/perf_map.go | 120 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 191 insertions(+), 4 deletions(-) create mode 100644 examples/oomkill.yaml create mode 100644 exporter/perf_map.go diff --git a/README.md b/README.md index 8fbff1dc..1561c63c 100644 --- a/README.md +++ b/README.md @@ -800,6 +800,8 @@ See [Counters](#counters) section for more details. name: help: table: +perf_map: # optional +perf_map_flush_duration: # optional labels: [ - label ] ``` diff --git a/config/config.go b/config/config.go index 97572a7d..7e9dbbf2 100644 --- a/config/config.go +++ b/config/config.go @@ -3,6 +3,7 @@ package config import ( "errors" "fmt" + "time" ) // Config defines exporter configuration @@ -40,10 +41,12 @@ type Metrics struct { // Counter is a metric defining prometheus counter type Counter struct { - Name string `yaml:"name"` - Help string `yaml:"help"` - Table string `yaml:"table"` - Labels []Label `yaml:"labels"` + Name string `yaml:"name"` + Help string `yaml:"help"` + Table string `yaml:"table"` + PerfMap string `yaml:"perf_map"` + PerfMapFlushDuration time.Duration `yaml:"perf_map_flush_duration"` + Labels []Label `yaml:"labels"` } // Histogram is a metric defining prometheus histogram diff --git a/examples/oomkill.yaml b/examples/oomkill.yaml new file mode 100644 index 00000000..cf3f14a9 --- /dev/null +++ b/examples/oomkill.yaml @@ -0,0 +1,48 @@ +programs: + # See: + # * https://github.com/iovisor/bcc/blob/master/tools/oomkill.py + # * https://github.com/iovisor/bcc/blob/master/tools/oomkill_example.txt + - name: oomkill + metrics: + counters: + - name: oom_kills + help: Count global and cgroup level OOMs + perf_map: events + labels: + - name: cgroup_path + size: 8 + decoders: + - name: uint + - name: cgroup + - name: global_oom + size: 1 + decoders: + - name: uint + kprobes: + oom_kill_process: count_ooms + code: | + #include + #include + #include + + // we'll use "BPF_PERF_OUTPUT" map type here to avoid unbound cardinality + BPF_PERF_OUTPUT(events); + + struct data_t { + u64 cgroup_id; + u8 global_oom; + }; + + void count_ooms(struct pt_regs *ctx, struct oom_control *oc, const char *message) { + struct data_t data = {}; + + struct mem_cgroup *mcg = oc->memcg; + if (!mcg) { + data.global_oom = 1; + events.perf_submit(ctx, &data, sizeof(data)); + return; + } + + data.cgroup_id = mcg->css.cgroup->kn->id.id; + events.perf_submit(ctx, &data, sizeof(data)); + } diff --git a/exporter/exporter.go b/exporter/exporter.go index 54576615..2c4ac001 100644 --- a/exporter/exporter.go +++ b/exporter/exporter.go @@ -19,6 +19,7 @@ const prometheusNamespace = "ebpf_exporter" type Exporter struct { config config.Config modules map[string]*bcc.Module + perfMapCollectors []*PerfMapSink ksyms map[uint64]string enabledProgramsDesc *prometheus.Desc programInfoDesc *prometheus.Desc @@ -124,6 +125,11 @@ func (e *Exporter) Describe(ch chan<- *prometheus.Desc) { } for _, counter := range program.Metrics.Counters { + if len(counter.PerfMap) != 0 { + perfSink := NewPerfMapSink(e.decoders, e.modules[program.Name], counter) + e.perfMapCollectors = append(e.perfMapCollectors, perfSink) + } + addDescs(program.Name, counter.Name, counter.Help, counter.Labels) } @@ -145,6 +151,10 @@ func (e *Exporter) Collect(ch chan<- prometheus.Metric) { } } + for _, perfMapCollector := range e.perfMapCollectors { + perfMapCollector.Collect(ch) + } + e.collectCounters(ch) e.collectHistograms(ch) } @@ -153,6 +163,10 @@ func (e *Exporter) Collect(ch chan<- prometheus.Metric) { func (e *Exporter) collectCounters(ch chan<- prometheus.Metric) { for _, program := range e.config.Programs { for _, counter := range program.Metrics.Counters { + if len(counter.PerfMap) != 0 { + continue + } + tableValues, err := e.tableValues(e.modules[program.Name], counter.Table, counter.Labels) if err != nil { log.Printf("Error getting table %q values for metric %q of program %q: %s", counter.Table, counter.Name, program.Name, err) diff --git a/exporter/perf_map.go b/exporter/perf_map.go new file mode 100644 index 00000000..bc475643 --- /dev/null +++ b/exporter/perf_map.go @@ -0,0 +1,120 @@ +package exporter + +import ( + "log" + "time" + + "github.com/cloudflare/ebpf_exporter/config" + "github.com/cloudflare/ebpf_exporter/decoder" + "github.com/iovisor/gobpf/bcc" + "github.com/prometheus/client_golang/prometheus" +) + +type PerfMapSink struct { + counterConfig config.Counter + counterVec *prometheus.CounterVec + dropCounter prometheus.Counter +} + +func NewPerfMapSink(decoders *decoder.Set, module *bcc.Module, counterConfig config.Counter) *PerfMapSink { + var ( + receiveCh = make(chan []byte) + lostCh = make(chan uint64) + ) + + sink := &PerfMapSink{ + counterConfig: counterConfig, + dropCounter: createDropCounterForPerfMap(counterConfig), + } + sink.resetCounterVec() + + table := bcc.NewTable(module.TableId(counterConfig.PerfMap), module) + + perfMap, err := bcc.InitPerfMap(table, receiveCh, lostCh) + if err != nil { + log.Fatalf("Can't init PerfMap: %s", err) + } + + go func(sink *PerfMapSink, receiveCh <-chan []byte) { + for rawBytes := range receiveCh { + // https://github.com/cilium/ebpf/pull/94#discussion_r425823371 + // https://lore.kernel.org/patchwork/patch/1244339/ + var validDataSize uint + for _, labelConfig := range sink.counterConfig.Labels { + validDataSize += labelConfig.Size + } + + labelValues, err := decoders.DecodeLabels(rawBytes[:validDataSize], sink.counterConfig.Labels) + if err != nil { + if err == decoder.ErrSkipLabelSet { + continue + } + + log.Printf("Failed to decode labels: %s", err) + } + + sink.counterVec.WithLabelValues(labelValues...).Inc() + + } + }(sink, receiveCh) + + go func(sink *PerfMapSink, lostCh <-chan uint64) { + for droppedEvents := range lostCh { + sink.dropCounter.Add(float64(droppedEvents)) + } + }(sink, lostCh) + + go func(sink *PerfMapSink) { + flushDuration := time.Hour + if sink.counterConfig.PerfMapFlushDuration > 0 { + flushDuration = sink.counterConfig.PerfMapFlushDuration + } + + ticker := time.NewTicker(flushDuration) + + for { + <-ticker.C + sink.resetCounterVec() + } + }(sink) + + perfMap.Start() + + return sink +} + +func (s *PerfMapSink) Collect(ch chan<- prometheus.Metric) { + s.counterVec.Collect(ch) +} + +func (s *PerfMapSink) Describe(ch chan<- *prometheus.Desc) { + s.counterVec.Describe(ch) +} + +func (s *PerfMapSink) resetCounterVec() { + s.counterVec = createCounterVecForPerfMap(s.counterConfig, labelNamesFromCounterConfig(s.counterConfig)) +} + +func createCounterVecForPerfMap(counterConfig config.Counter, labelNames []string) *prometheus.CounterVec { + return prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: prometheusNamespace, + Name: counterConfig.Name, + Help: counterConfig.Help, + }, labelNames) +} + +func createDropCounterForPerfMap(counterConfig config.Counter) prometheus.Counter { + return prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: "dropped_perf_map_events", + Name: counterConfig.Name, + Help: "Dropped perf map events", + }, []string{}).WithLabelValues() +} + +func labelNamesFromCounterConfig(counterConfig config.Counter) (labelNames []string) { + for _, label := range counterConfig.Labels { + labelNames = append(labelNames, label.Name) + } + + return +}