Skip to content

Commit

Permalink
[processor/tailsampling] Add metric for sampled/not sampled spans (#3…
Browse files Browse the repository at this point in the history
…0485)

**Description:** <Describe what has changed.>
Add metrics to measure sampled/not sampled spans.

**Link to tracking Issue:** 
Fixes
#30482

**Testing:** <Describe what testing was performed and which tests were
added.>
None

**Documentation:** <Describe the documentation added.>
None

---------

Signed-off-by: Arthur Silva Sens <[email protected]>
  • Loading branch information
ArthurSens authored Feb 16, 2024
1 parent a852c86 commit 30eda26
Show file tree
Hide file tree
Showing 6 changed files with 134 additions and 83 deletions.
27 changes: 27 additions & 0 deletions .chloggen/sampled_spans_metrics.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Use this changelog template to create an entry for release notes.

# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: enhancement

# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
component: processor/tail_sampling

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: "Add metrics that measure the number of sampled spans and the number of spans that are dropped due to sampling decisions."

# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
issues: [30482]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext:

# If your change doesn't affect end users or the exported elements of any package,
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
# Optional: The change log or logs in which this entry should be included.
# e.g. '[user]' or '[user, api]'
# Include 'user' if the change is relevant to end users.
# Include 'api' if there is a change to a library API.
# Default: '[user]'
change_logs: [user]
11 changes: 11 additions & 0 deletions processor/tailsamplingprocessor/factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,24 @@ import (
"go.opentelemetry.io/collector/component"
"go.opentelemetry.io/collector/config/configtelemetry"
"go.opentelemetry.io/collector/consumer"
"go.opentelemetry.io/collector/featuregate"
"go.opentelemetry.io/collector/processor"

"github.com/open-telemetry/opentelemetry-collector-contrib/processor/tailsamplingprocessor/internal/metadata"
)

var onceMetrics sync.Once

var metricStatCountSpansSampledFeatureGate = featuregate.GlobalRegistry().MustRegister(
"processor.tailsamplingprocessor.metricstatcountspanssampled",
featuregate.StageAlpha,
featuregate.WithRegisterDescription("When enabled, a new metric stat_count_spans_sampled will be available in the tail sampling processor. Differently from stat_count_traces_sampled, this metric will count the number of spans sampled or not per sampling policy, where the original counts traces."),
)

func isMetricStatCountSpansSampledEnabled() bool {
return metricStatCountSpansSampledFeatureGate.IsEnabled()
}

// NewFactory returns a new factory for the Tail Sampling processor.
func NewFactory() processor.Factory {
onceMetrics.Do(func() {
Expand Down
2 changes: 2 additions & 0 deletions processor/tailsamplingprocessor/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ require (
go.opentelemetry.io/collector/config/configtelemetry v0.94.1
go.opentelemetry.io/collector/confmap v0.94.1
go.opentelemetry.io/collector/consumer v0.94.1
go.opentelemetry.io/collector/featuregate v1.1.0
go.opentelemetry.io/collector/pdata v1.1.0
go.opentelemetry.io/collector/processor v0.94.1
go.opentelemetry.io/otel/metric v1.23.1
Expand All @@ -33,6 +34,7 @@ require (
github.com/gobwas/glob v0.2.3 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang/protobuf v1.5.3 // indirect
github.com/hashicorp/go-version v1.6.0 // indirect
github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
github.com/iancoleman/strcase v0.3.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
Expand Down
4 changes: 4 additions & 0 deletions processor/tailsamplingprocessor/go.sum

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

159 changes: 76 additions & 83 deletions processor/tailsamplingprocessor/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ var (
statPolicyEvaluationErrorCount = stats.Int64("sampling_policy_evaluation_error", "Count of sampling policy evaluation errors", stats.UnitDimensionless)

statCountTracesSampled = stats.Int64("count_traces_sampled", "Count of traces that were sampled or not per sampling policy", stats.UnitDimensionless)
statCountSpansSampled = stats.Int64("count_spans_sampled", "Count of spans that were sampled or not per sampling policy", stats.UnitDimensionless)
statCountGlobalTracesSampled = stats.Int64("global_count_traces_sampled", "Global count of traces that were sampled or not by at least one policy", stats.UnitDimensionless)

statDroppedTooEarlyCount = stats.Int64("sampling_trace_dropped_too_early", "Count of traces that needed to be dropped before the configured wait time", stats.UnitDimensionless)
Expand All @@ -46,90 +47,82 @@ func samplingProcessorMetricViews(level configtelemetry.Level) []*view.View {
latencyDistributionAggregation := view.Distribution(1, 2, 5, 10, 25, 50, 75, 100, 150, 200, 300, 400, 500, 750, 1000, 2000, 3000, 4000, 5000, 10000, 20000, 30000, 50000)
ageDistributionAggregation := view.Distribution(1, 2, 5, 10, 20, 30, 40, 50, 60, 90, 120, 180, 300, 600, 1800, 3600, 7200)

decisionLatencyView := &view.View{
Name: processorhelper.BuildCustomMetricName(metadata.Type.String(), statDecisionLatencyMicroSec.Name()),
Measure: statDecisionLatencyMicroSec,
Description: statDecisionLatencyMicroSec.Description(),
TagKeys: policyTagKeys,
Aggregation: latencyDistributionAggregation,
}
overallDecisionLatencyView := &view.View{
Name: processorhelper.BuildCustomMetricName(metadata.Type.String(), statOverallDecisionLatencyUs.Name()),
Measure: statOverallDecisionLatencyUs,
Description: statOverallDecisionLatencyUs.Description(),
Aggregation: latencyDistributionAggregation,
}

traceRemovalAgeView := &view.View{
Name: processorhelper.BuildCustomMetricName(metadata.Type.String(), statTraceRemovalAgeSec.Name()),
Measure: statTraceRemovalAgeSec,
Description: statTraceRemovalAgeSec.Description(),
Aggregation: ageDistributionAggregation,
}
lateSpanArrivalView := &view.View{
Name: processorhelper.BuildCustomMetricName(metadata.Type.String(), statLateSpanArrivalAfterDecision.Name()),
Measure: statLateSpanArrivalAfterDecision,
Description: statLateSpanArrivalAfterDecision.Description(),
Aggregation: ageDistributionAggregation,
}

countPolicyEvaluationErrorView := &view.View{
Name: processorhelper.BuildCustomMetricName(metadata.Type.String(), statPolicyEvaluationErrorCount.Name()),
Measure: statPolicyEvaluationErrorCount,
Description: statPolicyEvaluationErrorCount.Description(),
Aggregation: view.Sum(),
}

views := make([]*view.View, 0)
sampledTagKeys := []tag.Key{tagPolicyKey, tagSampledKey}
countTracesSampledView := &view.View{
Name: processorhelper.BuildCustomMetricName(metadata.Type.String(), statCountTracesSampled.Name()),
Measure: statCountTracesSampled,
Description: statCountTracesSampled.Description(),
TagKeys: sampledTagKeys,
Aggregation: view.Sum(),
}

countGlobalTracesSampledView := &view.View{
Name: processorhelper.BuildCustomMetricName(metadata.Type.String(), statCountGlobalTracesSampled.Name()),
Measure: statCountGlobalTracesSampled,
Description: statCountGlobalTracesSampled.Description(),
TagKeys: []tag.Key{tagSampledKey},
Aggregation: view.Sum(),
views = append(views,
&view.View{
Name: processorhelper.BuildCustomMetricName(metadata.Type.String(), statDecisionLatencyMicroSec.Name()),
Measure: statDecisionLatencyMicroSec,
Description: statDecisionLatencyMicroSec.Description(),
TagKeys: policyTagKeys,
Aggregation: latencyDistributionAggregation,
},
&view.View{
Name: processorhelper.BuildCustomMetricName(metadata.Type.String(), statOverallDecisionLatencyUs.Name()),
Measure: statOverallDecisionLatencyUs,
Description: statOverallDecisionLatencyUs.Description(),
Aggregation: latencyDistributionAggregation,
},
&view.View{
Name: processorhelper.BuildCustomMetricName(metadata.Type.String(), statTraceRemovalAgeSec.Name()),
Measure: statTraceRemovalAgeSec,
Description: statTraceRemovalAgeSec.Description(),
Aggregation: ageDistributionAggregation,
},
&view.View{
Name: processorhelper.BuildCustomMetricName(metadata.Type.String(), statLateSpanArrivalAfterDecision.Name()),
Measure: statLateSpanArrivalAfterDecision,
Description: statLateSpanArrivalAfterDecision.Description(),
Aggregation: ageDistributionAggregation,
},
&view.View{
Name: processorhelper.BuildCustomMetricName(metadata.Type.String(), statPolicyEvaluationErrorCount.Name()),
Measure: statPolicyEvaluationErrorCount,
Description: statPolicyEvaluationErrorCount.Description(),
Aggregation: view.Sum(),
},
&view.View{
Name: processorhelper.BuildCustomMetricName(metadata.Type.String(), statCountTracesSampled.Name()),
Measure: statCountTracesSampled,
Description: statCountTracesSampled.Description(),
TagKeys: sampledTagKeys,
Aggregation: view.Sum(),
},
&view.View{
Name: processorhelper.BuildCustomMetricName(metadata.Type.String(), statCountGlobalTracesSampled.Name()),
Measure: statCountGlobalTracesSampled,
Description: statCountGlobalTracesSampled.Description(),
TagKeys: []tag.Key{tagSampledKey},
Aggregation: view.Sum(),
},
&view.View{
Name: processorhelper.BuildCustomMetricName(metadata.Type.String(), statDroppedTooEarlyCount.Name()),
Measure: statDroppedTooEarlyCount,
Description: statDroppedTooEarlyCount.Description(),
Aggregation: view.Sum(),
},
&view.View{
Name: processorhelper.BuildCustomMetricName(metadata.Type.String(), statNewTraceIDReceivedCount.Name()),
Measure: statNewTraceIDReceivedCount,
Description: statNewTraceIDReceivedCount.Description(),
Aggregation: view.Sum(),
},
&view.View{
Name: processorhelper.BuildCustomMetricName(metadata.Type.String(), statTracesOnMemoryGauge.Name()),
Measure: statTracesOnMemoryGauge,
Description: statTracesOnMemoryGauge.Description(),
Aggregation: view.LastValue(),
})

if isMetricStatCountSpansSampledEnabled() {
views = append(views, &view.View{
Name: processorhelper.BuildCustomMetricName(metadata.Type.String(), statCountSpansSampled.Name()),
Measure: statCountSpansSampled,
Description: statCountSpansSampled.Description(),
TagKeys: sampledTagKeys,
Aggregation: view.Sum(),
})
}

countTraceDroppedTooEarlyView := &view.View{
Name: processorhelper.BuildCustomMetricName(metadata.Type.String(), statDroppedTooEarlyCount.Name()),
Measure: statDroppedTooEarlyCount,
Description: statDroppedTooEarlyCount.Description(),
Aggregation: view.Sum(),
}
countTraceIDArrivalView := &view.View{
Name: processorhelper.BuildCustomMetricName(metadata.Type.String(), statNewTraceIDReceivedCount.Name()),
Measure: statNewTraceIDReceivedCount,
Description: statNewTraceIDReceivedCount.Description(),
Aggregation: view.Sum(),
}
trackTracesOnMemorylView := &view.View{
Name: processorhelper.BuildCustomMetricName(metadata.Type.String(), statTracesOnMemoryGauge.Name()),
Measure: statTracesOnMemoryGauge,
Description: statTracesOnMemoryGauge.Description(),
Aggregation: view.LastValue(),
}

return []*view.View{
decisionLatencyView,
overallDecisionLatencyView,

traceRemovalAgeView,
lateSpanArrivalView,

countPolicyEvaluationErrorView,

countTracesSampledView,
countGlobalTracesSampledView,

countTraceDroppedTooEarlyView,
countTraceIDArrivalView,
trackTracesOnMemorylView,
}
return views
}
14 changes: 14 additions & 0 deletions processor/tailsamplingprocessor/processor.go
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,13 @@ func (tsp *tailSamplingSpanProcessor) makeDecision(id pcommon.TraceID, trace *sa
mutators,
statCountTracesSampled.M(int64(1)),
)
if isMetricStatCountSpansSampledEnabled() {
_ = stats.RecordWithTags(
p.ctx,
mutators,
statCountSpansSampled.M(trace.SpanCount.Load()),
)
}
metrics.decisionSampled++

case sampling.NotSampled:
Expand All @@ -317,6 +324,13 @@ func (tsp *tailSamplingSpanProcessor) makeDecision(id pcommon.TraceID, trace *sa
mutators,
statCountTracesSampled.M(int64(1)),
)
if isMetricStatCountSpansSampledEnabled() {
_ = stats.RecordWithTags(
p.ctx,
mutators,
statCountSpansSampled.M(trace.SpanCount.Load()),
)
}
metrics.decisionNotSampled++
}
}
Expand Down

0 comments on commit 30eda26

Please sign in to comment.