Skip to content

Commit

Permalink
Support BCC perf maps
Browse files Browse the repository at this point in the history
  • Loading branch information
zuzzas committed May 11, 2021
1 parent b77a6dd commit 5ab5698
Show file tree
Hide file tree
Showing 5 changed files with 207 additions and 49 deletions.
9 changes: 5 additions & 4 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,11 @@ type Metrics struct {

// Counter is a metric defining prometheus counter
type Counter struct {
Name string `yaml:"name"`
Help string `yaml:"help"`
Table string `yaml:"table"`
Labels []Label `yaml:"labels"`
Name string `yaml:"name"`
Help string `yaml:"help"`
Table string `yaml:"table"`
PerfMap string `yaml:"perf_map"`
Labels []Label `yaml:"labels"`
}

// Histogram is a metric defining prometheus histogram
Expand Down
87 changes: 42 additions & 45 deletions decoder/cgroup.go
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
package decoder
package decoder

import (
"os"
"fmt"
"log"
"strconv"
"path/filepath"
"fmt"
"log"
"os"
"path/filepath"
"strconv"

"golang.org/x/sys/unix"
"github.com/cloudflare/ebpf_exporter/config"
"github.com/iovisor/gobpf/bcc"
"github.com/iovisor/gobpf/bcc"
"golang.org/x/sys/unix"
)

// CGroup is a decoder that transforms cgroup id to path in cgroupfs
Expand All @@ -18,55 +18,52 @@ type CGroup struct {
}

// Decode transforms cgroup id to path in cgroupfs
func (c *CGroup) Decode(in []byte, conf config.Decoder) ([]byte, error) {
if c.cache == nil {
c.cache = map[uint64][]byte{}
}
func (c *CGroup) Decode(in []byte, _ config.Decoder) ([]byte, error) {
if c.cache == nil {
c.cache = map[uint64][]byte{}
}

cgroupID, err := strconv.Atoi(string(in))
if err != nil {
return nil, err
}
cgroupID, err := strconv.Atoi(string(in))
if err != nil {
return nil, err
}

if path, ok := c.cache[uint64(cgroupID)]; ok {
return path, nil
}
if path, ok := c.cache[uint64(cgroupID)]; ok {
return path, nil
}

if err = c.refreshCache(); err != nil {
log.Printf("Error refreshing cgroup id to path map: %s", err)
}
if err = c.refreshCache(); err != nil {
log.Printf("Error refreshing cgroup id to path map: %s", err)
}

if path, ok := c.cache[uint64(cgroupID)]; ok {
return path, nil
}
if path, ok := c.cache[uint64(cgroupID)]; ok {
return path, nil
}

return []byte(fmt.Sprintf("unknown_cgroup_id:%d", cgroupID)), nil
return []byte(fmt.Sprintf("unknown_cgroup_id:%d", cgroupID)), nil
}

func (c *CGroup) refreshCache() error {
byteOrder := bcc.GetHostByteOrder()
byteOrder := bcc.GetHostByteOrder()

cgroupPath := "/sys/fs/cgroup/unified"
if _, err := os.Stat(cgroupPath); os.IsNotExist(err) {
cgroupPath = "/sys/fs/cgroup"
}
cgroupPath := "/sys/fs/cgroup"

return filepath.Walk(cgroupPath, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
return filepath.Walk(cgroupPath, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}

if !info.IsDir() {
return nil
}
if !info.IsDir() {
return nil
}

handle, _, err := unix.NameToHandleAt(unix.AT_FDCWD, path, 0)
if err != nil {
log.Printf("Error resolving handle of %s: %s", path, err)
}
handle, _, err := unix.NameToHandleAt(unix.AT_FDCWD, path, 0)
if err != nil {
log.Printf("Error resolving handle of %s: %s", path, err)
}

c.cache[byteOrder.Uint64(handle.Bytes())] = []byte(path)
c.cache[byteOrder.Uint64(handle.Bytes())] = []byte(path)

return nil
})
return nil
})
}
47 changes: 47 additions & 0 deletions examples/oomkill.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
programs:
# See:
# * https://github.com/iovisor/bcc/blob/master/tools/oomkill.py
# * https://github.com/iovisor/bcc/blob/master/tools/oomkill_example.txt
- name: oomkill
metrics:
counters:
- name: oom_kills
help: Count global and cgroup level OOMs
perf_map: events
labels:
- name: cgroup_path
size: 8
decoders:
- name: uint
- name: cgroup
- name: global_oom
size: 1
decoders:
- name: uint
kprobes:
oom_kill_process: count_ooms
code: |
#include <uapi/linux/ptrace.h>
#include <linux/oom.h>
#include <linux/memcontrol.h>
BPF_PERF_OUTPUT(events);
struct data_t {
u64 cgroup_id;
u8 global_oom;
};
void count_ooms(struct pt_regs *ctx, struct oom_control *oc, const char *message) {
struct data_t data = {};
struct mem_cgroup *mcg = oc->memcg;
if (!mcg) {
data.global_oom = 1;
events.perf_submit(ctx, &data, sizeof(data));
return;
}
data.cgroup_id = mcg->css.cgroup->kn->id.id;
events.perf_submit(ctx, &data, sizeof(data));
}
14 changes: 14 additions & 0 deletions exporter/exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ const prometheusNamespace = "ebpf_exporter"
type Exporter struct {
config config.Config
modules map[string]*bcc.Module
perfMapCollectors []*PerfMapSink
ksyms map[uint64]string
enabledProgramsDesc *prometheus.Desc
programInfoDesc *prometheus.Desc
Expand Down Expand Up @@ -119,6 +120,11 @@ func (e *Exporter) Describe(ch chan<- *prometheus.Desc) {
}

for _, counter := range program.Metrics.Counters {
if len(counter.PerfMap) != 0 {
perfSink := NewPerfMapSink(e.decoders, e.modules[program.Name], counter)
e.perfMapCollectors = append(e.perfMapCollectors, perfSink)
}

addDescs(program.Name, counter.Name, counter.Help, counter.Labels)
}

Expand All @@ -140,6 +146,10 @@ func (e *Exporter) Collect(ch chan<- prometheus.Metric) {
}
}

for _, perfMapCollector := range e.perfMapCollectors {
perfMapCollector.Collect(ch)
}

e.collectCounters(ch)
e.collectHistograms(ch)
}
Expand All @@ -148,6 +158,10 @@ func (e *Exporter) Collect(ch chan<- prometheus.Metric) {
func (e *Exporter) collectCounters(ch chan<- prometheus.Metric) {
for _, program := range e.config.Programs {
for _, counter := range program.Metrics.Counters {
if len(counter.PerfMap) != 0 {
continue
}

tableValues, err := e.tableValues(e.modules[program.Name], counter.Table, counter.Labels)
if err != nil {
log.Printf("Error getting table %q values for metric %q of program %q: %s", counter.Table, counter.Name, program.Name, err)
Expand Down
99 changes: 99 additions & 0 deletions exporter/perf_map.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
package exporter

import (
"log"
"time"

"github.com/cloudflare/ebpf_exporter/config"
"github.com/cloudflare/ebpf_exporter/decoder"
"github.com/iovisor/gobpf/bcc"
"github.com/prometheus/client_golang/prometheus"
)

type PerfMapSink struct {
counterConfig config.Counter
counterVec *prometheus.CounterVec
}

func NewPerfMapSink(decoders *decoder.Set, module *bcc.Module, counterConfig config.Counter) *PerfMapSink {
var (
receiveCh = make(chan []byte)
lostCh = make(chan uint64)
)

sink := &PerfMapSink{
counterConfig: counterConfig,
}
sink.resetCounterVec()

table := bcc.NewTable(module.TableId(counterConfig.PerfMap), module)

perfMap, err := bcc.InitPerfMap(table, receiveCh, lostCh)
if err != nil {
log.Panicf("Can't init PerfMap: %s", err)
}

go func(sink *PerfMapSink, counterConfig config.Counter, receiveCh <-chan []byte) {
for rawBytes := range receiveCh {
// https://github.com/cilium/ebpf/pull/94#discussion_r425823371
// https://lore.kernel.org/patchwork/patch/1244339/
var validDataSize uint
for _, labelConfig := range counterConfig.Labels {
validDataSize += labelConfig.Size
}

labelValues, err := decoders.DecodeLabels(rawBytes[:validDataSize], counterConfig.Labels)
if err != nil {
if err == decoder.ErrSkipLabelSet {
continue
}

log.Printf("failed to decode labels: %s", err)
}

sink.counterVec.WithLabelValues(labelValues...).Inc()

}
}(sink, counterConfig, receiveCh)

go func(sink *PerfMapSink) {
ticker := time.NewTicker(time.Hour)

for {
<-ticker.C
sink.resetCounterVec()
}
}(sink)

perfMap.Start()

return sink
}

func (s *PerfMapSink) resetCounterVec() {
s.counterVec = createCounterVecForPerfMap(s.counterConfig, labelNamesFromCounterConfig(s.counterConfig))
}

func (s *PerfMapSink) Collect(ch chan<- prometheus.Metric) {
s.counterVec.Collect(ch)
}

func (s *PerfMapSink) Describe(ch chan<- *prometheus.Desc) {
s.counterVec.Describe(ch)
}

func createCounterVecForPerfMap(counterConfig config.Counter, labelNames []string) *prometheus.CounterVec {
return prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: prometheusNamespace,
Name: counterConfig.Name,
Help: counterConfig.Help,
}, labelNames)
}

func labelNamesFromCounterConfig(counterConfig config.Counter) (labelNames []string) {
for _, label := range counterConfig.Labels {
labelNames = append(labelNames, label.Name)
}

return
}

0 comments on commit 5ab5698

Please sign in to comment.