Skip to content

Commit

Permalink
Add envoy metrics discovery bundle (#5780)
Browse files Browse the repository at this point in the history
* Add envoy metrics discovery bundle

* add changelog

* use correct image, use endpoint instead of hardcoding the port

* do not skip the test

* fix test

* fix test

* fix test

* push envoy discovery to its own test

* use a separate test run

* fix docker test again

* fix the test some more
  • Loading branch information
atoulme authored Jan 12, 2025
1 parent 469980b commit 4fe7405
Show file tree
Hide file tree
Showing 17 changed files with 569 additions and 1 deletion.
2 changes: 1 addition & 1 deletion .github/workflows/integration-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,7 @@ jobs:
id: get-matrix
run: |
includes=""
for service in "apache" "jmx/cassandra" "kafkametrics" "mongodb" "nginx"; do
for service in "apache" "jmx/cassandra" "kafkametrics" "mongodb" "nginx" "envoy"; do
for arch in "amd64" "arm64"; do
if [ "$service" = "mongodb" ]; then
# tests for mongo "6.0" and "7.0" are flaky, skipping for now
Expand Down
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@

- (Splunk) Add `metricsgeneration` processor ([#5769](https://github.com/signalfx/splunk-otel-collector/pull/5769))

### 💡 Enhancements 💡

- (Splunk) Add a new discovery bundle for Envoy proxy metrics ([#5780](https://github.com/signalfx/splunk-otel-collector/pull/5780))

## v0.116.0

This Splunk OpenTelemetry Collector release includes changes from the [opentelemetry-collector v0.116.0](https://github.com/open-telemetry/opentelemetry-collector/releases/tag/v0.116.0) and the [opentelemetry-collector-contrib v0.116.0](https://github.com/open-telemetry/opentelemetry-collector-contrib/releases/tag/v0.116.0) releases where appropriate.
Expand Down
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,10 @@ integration-test-jmx/cassandra-discovery:
integration-test-apache-discovery:
@set -e; cd tests && $(GOTEST_SERIAL) $(BUILD_INFO_TESTS) --tags=discovery_integration_apachewebserver -v -timeout 5m -count 1 ./...

.PHONY: integration-test-envoy-discovery
integration-test-envoy-discovery:
@set -e; cd tests && $(GOTEST_SERIAL) $(BUILD_INFO_TESTS) --tags=discovery_integration_envoy -v -timeout 5m -count 1 ./...

.PHONY: integration-test-nginx-discovery
integration-test-nginx-discovery:
@set -e; cd tests && $(GOTEST_SERIAL) $(BUILD_INFO_TESTS) --tags=discovery_integration_nginx -v -timeout 5m -count 1 ./...
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#####################################################################################
# This file is generated by the Splunk Distribution of the OpenTelemetry Collector. #
# #
# It reflects the default configuration bundled in the Collector executable for use #
# in discovery mode (--discovery) and is provided for reference or customization. #
# Please note that any changes made to this file will need to be reconciled during #
# upgrades of the Collector. #
#####################################################################################
# prometheus:
# enabled: true
# rule:
# docker_observer: type == "container" and any([name, image, command], {# matches "(?i)envoy"}) and not (command matches "splunk.discovery")
# host_observer: type == "hostport" and command matches "(?i)envoy" and not (command matches "splunk.discovery")
# k8s_observer: type == "port" and pod.name matches "(?i)envoy"
# config:
# default:
# config:
# scrape_configs:
# - job_name: 'envoy'
# metrics_path: /stats/prometheus
# scrape_interval: 10s
# static_configs:
# - targets: ['`endpoint`']
# metric_relabel_configs:
# - source_labels: [__name__]
# action: keep
# regex: '(envoy_cluster_upstream_cx_active|envoy_cluster_upstream_cx_total|envoy_cluster_upstream_cx_connect_fail|envoy_cluster_upstream_cx_connect_ms|envoy_cluster_upstream_rq_active|envoy_cluster_upstream_rq_total|envoy_cluster_upstream_rq_timeout|envoy_cluster_upstream_rq_pending_active|envoy_cluster_upstream_rq_pending_overflow|envoy_cluster_upstream_rq_time|envoy_cluster_membership_total|envoy_cluster_membership_degraded|envoy_cluster_membership_excluded|envoy_listener_downstream_cx_active|envoy_listener_downstream_cx_total|envoy_listener_downstream_cx_transport_socket_connect_timeout|envoy_listener_downstream_cx_overflow|envoy_listener_downstream_cx_overload_reject|envoy_listener_downstream_global_cx_overflow)'
# status:
# metrics:
# - status: successful
# strict: envoy_cluster_upstream_cx_active
# message: envoy prometheus receiver is working!
# statements:
# - status: failed
# regexp: "connection refused"
# message: The container is not serving http connections.
# - status: failed
# regexp: "dial tcp: lookup"
# message: Unable to resolve envoy prometheus tcp endpoint
7 changes: 7 additions & 0 deletions docker/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,13 @@ services:
- integration
environment:
- ELASTIC_PASSWORD=$ELASTIC_PASSWORD
envoy:
image: quay.io/splunko11ytest/envoy:latest
profiles:
- integration-test-envoy-discovery
build: ./envoy
ports:
- "9901:9901"
# Haproxy image for haproxy test:
haproxy:
image: quay.io/splunko11ytest/haproxy:latest
Expand Down
1 change: 1 addition & 0 deletions docker/envoy/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
FROM envoyproxy/envoy:v1.32-latest
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#####################################################################################
# Do not edit manually! #
# All changes must be made to associated .tmpl file before running 'make bundle.d'. #
#####################################################################################
prometheus:
enabled: true
rule:
docker_observer: type == "container" and any([name, image, command], {# matches "(?i)envoy"}) and not (command matches "splunk.discovery")
host_observer: type == "hostport" and command matches "(?i)envoy" and not (command matches "splunk.discovery")
k8s_observer: type == "port" and pod.name matches "(?i)envoy"
config:
default:
config:
scrape_configs:
- job_name: 'envoy'
metrics_path: /stats/prometheus
scrape_interval: 10s
static_configs:
- targets: ['`endpoint`']
metric_relabel_configs:
- source_labels: [__name__]
action: keep
regex: '(envoy_cluster_upstream_cx_active|envoy_cluster_upstream_cx_total|envoy_cluster_upstream_cx_connect_fail|envoy_cluster_upstream_cx_connect_ms|envoy_cluster_upstream_rq_active|envoy_cluster_upstream_rq_total|envoy_cluster_upstream_rq_timeout|envoy_cluster_upstream_rq_pending_active|envoy_cluster_upstream_rq_pending_overflow|envoy_cluster_upstream_rq_time|envoy_cluster_membership_total|envoy_cluster_membership_degraded|envoy_cluster_membership_excluded|envoy_listener_downstream_cx_active|envoy_listener_downstream_cx_total|envoy_listener_downstream_cx_transport_socket_connect_timeout|envoy_listener_downstream_cx_overflow|envoy_listener_downstream_cx_overload_reject|envoy_listener_downstream_global_cx_overflow)'
status:
metrics:
- status: successful
strict: envoy_cluster_upstream_cx_active
message: envoy prometheus receiver is working!
statements:
- status: failed
regexp: "connection refused"
message: The container is not serving http connections.
- status: failed
regexp: "dial tcp: lookup"
message: Unable to resolve envoy prometheus tcp endpoint
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
{{ receiver "prometheus" }}:
enabled: true
rule:
docker_observer: type == "container" and any([name, image, command], {# matches "(?i)envoy"}) and not (command matches "splunk.discovery")
host_observer: type == "hostport" and command matches "(?i)envoy" and not (command matches "splunk.discovery")
k8s_observer: type == "port" and pod.name matches "(?i)envoy"
config:
default:
config:
scrape_configs:
- job_name: 'envoy'
metrics_path: /stats/prometheus
scrape_interval: 10s
static_configs:
- targets: ['`endpoint`']
metric_relabel_configs:
- source_labels: [__name__]
action: keep
regex: '(envoy_cluster_upstream_cx_active|envoy_cluster_upstream_cx_total|envoy_cluster_upstream_cx_connect_fail|envoy_cluster_upstream_cx_connect_ms|envoy_cluster_upstream_rq_active|envoy_cluster_upstream_rq_total|envoy_cluster_upstream_rq_timeout|envoy_cluster_upstream_rq_pending_active|envoy_cluster_upstream_rq_pending_overflow|envoy_cluster_upstream_rq_time|envoy_cluster_membership_total|envoy_cluster_membership_degraded|envoy_cluster_membership_excluded|envoy_listener_downstream_cx_active|envoy_listener_downstream_cx_total|envoy_listener_downstream_cx_transport_socket_connect_timeout|envoy_listener_downstream_cx_overflow|envoy_listener_downstream_cx_overload_reject|envoy_listener_downstream_global_cx_overflow)'
status:
metrics:
- status: successful
strict: envoy_cluster_upstream_cx_active
message: envoy prometheus receiver is working!
statements:
- status: failed
regexp: "connection refused"
message: The container is not serving http connections.
- status: failed
regexp: "dial tcp: lookup"
message: Unable to resolve envoy prometheus tcp endpoint
2 changes: 2 additions & 0 deletions internal/confmapprovider/discovery/bundle/bundle_gen.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@

//go:generate discoverybundler --render --template bundle.d/receivers/apache.discovery.yaml.tmpl
//go:generate discoverybundler --render --commented --dir ../../../../cmd/otelcol/config/collector/config.d.linux/receivers -t bundle.d/receivers/apache.discovery.yaml.tmpl
//go:generate discoverybundler --render --template bundle.d/receivers/envoy.discovery.yaml.tmpl
//go:generate discoverybundler --render --commented --dir ../../../../cmd/otelcol/config/collector/config.d.linux/receivers -t bundle.d/receivers/envoy.discovery.yaml.tmpl
//go:generate discoverybundler --render --template bundle.d/receivers/jmx-cassandra.discovery.yaml.tmpl
//go:generate discoverybundler --render --commented --dir ../../../../cmd/otelcol/config/collector/config.d.linux/receivers -t bundle.d/receivers/jmx-cassandra.discovery.yaml.tmpl
//go:generate discoverybundler --render --template bundle.d/receivers/kafkametrics.discovery.yaml.tmpl
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ func TestBundleDir(t *testing.T) {
require.NoError(t, err)
require.Equal(t, []string{
"bundle.d/receivers/apache.discovery.yaml",
"bundle.d/receivers/envoy.discovery.yaml",
"bundle.d/receivers/jmx-cassandra.discovery.yaml",
"bundle.d/receivers/kafkametrics.discovery.yaml",
"bundle.d/receivers/mongodb.discovery.yaml",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
//go:embed bundle.d/extensions/host-observer.discovery.yaml
//go:embed bundle.d/extensions/k8s-observer.discovery.yaml
//go:embed bundle.d/receivers/apache.discovery.yaml
//go:embed bundle.d/receivers/envoy.discovery.yaml
//go:embed bundle.d/receivers/jmx-cassandra.discovery.yaml
//go:embed bundle.d/receivers/kafkametrics.discovery.yaml
//go:embed bundle.d/receivers/mongodb.discovery.yaml
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
//go:embed bundle.d/extensions/host-observer.discovery.yaml
//go:embed bundle.d/extensions/k8s-observer.discovery.yaml
//go:embed bundle.d/receivers/apache.discovery.yaml
//go:embed bundle.d/receivers/envoy.discovery.yaml
//go:embed bundle.d/receivers/jmx-cassandra.discovery.yaml
//go:embed bundle.d/receivers/kafkametrics.discovery.yaml
//go:embed bundle.d/receivers/mongodb.discovery.yaml
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ func TestBundleDir(t *testing.T) {
require.NoError(t, err)
require.Equal(t, []string{
"bundle.d/receivers/apache.discovery.yaml",
"bundle.d/receivers/envoy.discovery.yaml",
"bundle.d/receivers/jmx-cassandra.discovery.yaml",
"bundle.d/receivers/kafkametrics.discovery.yaml",
"bundle.d/receivers/mongodb.discovery.yaml",
Expand Down
2 changes: 2 additions & 0 deletions internal/confmapprovider/discovery/bundle/components.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ var (
// in Components.Linux. If desired in windows BundledFS, ensure they are included in Components.Windows.
receivers = []string{
"apache",
"envoy",
"jmx-cassandra",
"kafkametrics",
"mongodb",
Expand Down Expand Up @@ -66,6 +67,7 @@ var (
Windows: func() map[string]struct{} {
windows := map[string]struct{}{
"apache": {},
"envoy": {},
"jmx-cassandra": {},
"kafkametrics": {},
"mongodb": {},
Expand Down
89 changes: 89 additions & 0 deletions tests/receivers/envoy/bundled_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
// Copyright Splunk, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build discovery_integration_envoy

package tests

import (
"fmt"
"path/filepath"
"testing"
"time"

"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/golden"
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatatest/pmetrictest"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

"github.com/signalfx/splunk-otel-collector/tests/testutils"
)

func TestEnvoyDockerObserver(t *testing.T) {
testutils.SkipIfNotContainerTest(t)
dockerSocket := testutils.CreateDockerSocketProxy(t)
require.NoError(t, dockerSocket.Start())
t.Cleanup(func() {
dockerSocket.Stop()
})

tc := testutils.NewTestcase(t)
defer tc.PrintLogsOnFailure()
defer tc.ShutdownOTLPReceiverSink()
_, shutdown := tc.SplunkOtelCollectorContainer("otlp_exporter.yaml", func(collector testutils.Collector) testutils.Collector {
cc := collector.(*testutils.CollectorContainer)
return cc.WithEnv(map[string]string{
"SPLUNK_DISCOVERY_DURATION": "20s",
"SPLUNK_DISCOVERY_LOG_LEVEL": "debug",
}).WithArgs(
"--discovery",
"--set", `splunk.discovery.extensions.k8s_observer.enabled=false`,
"--set", `splunk.discovery.extensions.host_observer.enabled=false`,
"--set", fmt.Sprintf("splunk.discovery.extensions.docker_observer.config.endpoint=tcp://%s", dockerSocket.ContainerEndpoint),
)
})
defer shutdown()

expected, err := golden.ReadMetrics(filepath.Join("testdata", "expected.yaml"))
require.NoError(t, err)
require.EventuallyWithT(t, func(tt *assert.CollectT) {
if len(tc.OTLPReceiverSink.AllMetrics()) == 0 {
assert.Fail(tt, "No metrics collected")
return
}
err := pmetrictest.CompareMetrics(expected, tc.OTLPReceiverSink.AllMetrics()[len(tc.OTLPReceiverSink.AllMetrics())-1],
pmetrictest.IgnoreResourceAttributeValue("service.instance.id"),
pmetrictest.IgnoreResourceAttributeValue("net.host.port"),
pmetrictest.IgnoreResourceAttributeValue("net.host.name"),
pmetrictest.IgnoreResourceAttributeValue("server.address"),
pmetrictest.IgnoreResourceAttributeValue("container.name"),
pmetrictest.IgnoreResourceAttributeValue("server.port"),
pmetrictest.IgnoreResourceAttributeValue("service.name"),
pmetrictest.IgnoreResourceAttributeValue("service_instance_id"),
pmetrictest.IgnoreResourceAttributeValue("service_version"),
pmetrictest.IgnoreMetricAttributeValue("service_version"),
pmetrictest.IgnoreMetricAttributeValue("service_instance_id"),
pmetrictest.IgnoreResourceAttributeValue("server.address"),
pmetrictest.IgnoreTimestamp(),
pmetrictest.IgnoreStartTimestamp(),
pmetrictest.IgnoreMetricDataPointsOrder(),
pmetrictest.IgnoreScopeMetricsOrder(),
pmetrictest.IgnoreScopeVersion(),
pmetrictest.IgnoreResourceMetricsOrder(),
pmetrictest.IgnoreMetricsOrder(),
pmetrictest.IgnoreMetricValues(),
)
assert.NoError(tt, err)
}, 30*time.Second, 1*time.Second)
}
Loading

0 comments on commit 4fe7405

Please sign in to comment.