From 5f75137242bd198c0a205a78c21dddfd79418791 Mon Sep 17 00:00:00 2001
From: Florian Bacher <florian.bacher@dynatrace.com>
Date: Tue, 2 Jul 2024 19:59:00 +0200
Subject: [PATCH] [processor/transform] Instrument the transform processor to
 emit traces (#33508)

**Description:** Currently Work in Progress - This PR adds
instrumentation to the ottl statement sequence execution by emitting
traces for statement sequence executions. Each statement execution
within that trace is represented as a child span with the respective
statement as an attribute.

Currently, the generated spans will be child spans of the
`opentelemetry.proto.collector.trace.v1.TraceService/Export` trace, due
to the context provided to the `StatementExecution.Execute()` method.
Alternatively, we can also create a new root span for the statement
sequence execution. Feel free to comment on what option you would prefer
in this case.

As for relating the debug logs to the traces, it seems like exporting
logs is currently not supported for internal observability, as
documented here:
https://github.com/open-telemetry/opentelemetry-collector/blob/1889d589d5933623b4a4a1831379e95ca02225fb/docs/observability.md?plain=1#L123
- If this is in fact possible in the meantime, I appreciate any pointers
towards how this could be done.

**Link to tracking Issue:** #33433

**Testing:** Tested by running the collector with the
`--feature-gates=telemetry.useOtelWithSDKConfigurationForInternalTelemetry`
feature flag, and the following configuration:

```
receivers:
  otlp:
    protocols:
      grpc:
        endpoint: 0.0.0.0:4317

processors:
  transform:
    error_mode: ignore
    trace_statements:
      - context: span
        statements:
          - keep_matching_keys(attributes, "foo.*")

exporters:
  debug:
    verbosity: detailed
  otlphttp:
    endpoint: ${env:OTLP_ENDPOINT}
    headers:
      Authorization: "Api-Token ${env:API_TOKEN}"

service:
  telemetry:
    traces:
      processors:
        - batch:
            exporter:
              otlp:
                protocol: http/protobuf
                endpoint: ${env:OTLP_ENDPOINT}/v1/traces
                headers:
                  Authorization: "Api-Token ${env:API_TOKEN}"

  pipelines:
    traces:
      receivers: [otlp]
      processors: [transform]
      exporters: [otlphttp, debug]
```

And sending test spans with attributes (to trigger the statement
sequence execution) using the `telemetrygen` CLI:

```
telemetrygen traces --otlp-insecure --traces 1 --status-code Ok --span-duration 1s --telemetry-attributes http.foo=\"value1\" --telemetry-attributes http.bar=\"value2\" --telemetry-attributes foo=\"bar\"
```

**Documentation:** Added a paragraph in the Troubleshooting section of
the README about emitting traces

---------

Signed-off-by: Florian Bacher <florian.bacher@dynatrace.com>
---
 .chloggen/ottl-emit-traces.yaml |  27 +++++
 pkg/ottl/README.md              |  18 ++++
 pkg/ottl/go.mod                 |   4 +-
 pkg/ottl/parser.go              |  64 ++++++++++-
 pkg/ottl/parser_test.go         | 184 +++++++++++++++++++++++++++++++-
 5 files changed, 287 insertions(+), 10 deletions(-)
 create mode 100644 .chloggen/ottl-emit-traces.yaml

diff --git a/.chloggen/ottl-emit-traces.yaml b/.chloggen/ottl-emit-traces.yaml
new file mode 100644
index 000000000000..66644cc2ee88
--- /dev/null
+++ b/.chloggen/ottl-emit-traces.yaml
@@ -0,0 +1,27 @@
+# Use this changelog template to create an entry for release notes.
+
+# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
+change_type: enhancement
+
+# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
+component: pkg/ottl
+
+# A brief description of the change.  Surround your text with quotes ("") if it needs to start with a backtick (`).
+note: Emit traces for statement sequence executions to troubleshoot OTTL statements/conditions
+
+# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
+issues: [33433]
+
+# (Optional) One or more lines of additional information to render under the primary note.
+# These lines will be padded with 2 spaces and then inserted directly into the document.
+# Use pipe (|) for multiline entries.
+subtext:
+
+# If your change doesn't affect end users or the exported elements of any package,
+# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
+# Optional: The change log or logs in which this entry should be included.
+# e.g. '[user]' or '[user, api]'
+# Include 'user' if the change is relevant to end users.
+# Include 'api' if there is a change to a library API.
+# Default: '[user]'
+change_logs: []
diff --git a/pkg/ottl/README.md b/pkg/ottl/README.md
index 57a53ac9e053..b4962f84a2d7 100644
--- a/pkg/ottl/README.md
+++ b/pkg/ottl/README.md
@@ -151,3 +151,21 @@ service:
 2024-05-29T16:38:09.600-0600    debug   ottl@v0.101.0/parser.go:268     TransformContext after statement execution      {"kind": "processor", "name": "transform", "pipeline": "logs", "statement": "set(instrumentation_scope.attributes[\"test\"], [\"pass\"])", "condition matched": true, "TransformContext": {"resource": {"attributes": {"test": "pass"}, "dropped_attribute_count": 0}, "scope": {"attributes": {"test": ["pass"]}, "dropped_attribute_count": 0, "name": "", "version": ""}, "log_record": {"attributes": {"log.file.name": "test.log"}, "body": "test", "dropped_attribute_count": 0, "flags": 0, "observed_time_unix_nano": 1717022289500721000, "severity_number": 0, "severity_text": "", "span_id": "", "time_unix_nano": 0, "trace_id": ""}, "cache": {}}}
 2024-05-29T16:38:09.601-0600    debug   ottl@v0.101.0/parser.go:268     TransformContext after statement execution      {"kind": "processor", "name": "transform", "pipeline": "logs", "statement": "set(attributes[\"test\"], true)", "condition matched": true, "TransformContext": {"resource": {"attributes": {"test": "pass"}, "dropped_attribute_count": 0}, "scope": {"attributes": {"test": ["pass"]}, "dropped_attribute_count": 0, "name": "", "version": ""}, "log_record": {"attributes": {"log.file.name": "test.log", "test": true}, "body": "test", "dropped_attribute_count": 0, "flags": 0, "observed_time_unix_nano": 1717022289500721000, "severity_number": 0, "severity_text": "", "span_id": "", "time_unix_nano": 0, "trace_id": ""}, "cache": {}}}
 ```
+
+If configured to do so, the collector also emits traces for the execution of OTTL statement sequences.
+These traces contain spans for the execution of each statement, including the statement itself and whether it has
+been applied or not. To make use of this, enable the self monitoring of the collector by setting the
+`--feature-gates=telemetry.useOtelWithSDKConfigurationForInternalTelemetry` flag, and using the following configuration
+to export the traces to e.g. an OTLP API endpoint:
+
+```yaml
+service:
+  telemetry:
+    traces:
+      processors:
+        - batch:
+            exporter:
+              otlp:
+                protocol: http/protobuf
+                endpoint: ${env:OTLP_ENDPOINT}/v1/traces
+```
diff --git a/pkg/ottl/go.mod b/pkg/ottl/go.mod
index 7fec131e523e..6eafe57857de 100644
--- a/pkg/ottl/go.mod
+++ b/pkg/ottl/go.mod
@@ -14,6 +14,8 @@ require (
 	go.opentelemetry.io/collector/component v0.104.0
 	go.opentelemetry.io/collector/pdata v1.11.0
 	go.opentelemetry.io/collector/semconv v0.104.0
+	go.opentelemetry.io/otel v1.27.0
+	go.opentelemetry.io/otel/sdk v1.27.0
 	go.opentelemetry.io/otel/trace v1.27.0
 	go.uber.org/goleak v1.3.0
 	go.uber.org/zap v1.27.0
@@ -37,10 +39,8 @@ require (
 	github.com/prometheus/common v0.54.0 // indirect
 	github.com/prometheus/procfs v0.15.0 // indirect
 	go.opentelemetry.io/collector/config/configtelemetry v0.104.0 // indirect
-	go.opentelemetry.io/otel v1.27.0 // indirect
 	go.opentelemetry.io/otel/exporters/prometheus v0.49.0 // indirect
 	go.opentelemetry.io/otel/metric v1.27.0 // indirect
-	go.opentelemetry.io/otel/sdk v1.27.0 // indirect
 	go.opentelemetry.io/otel/sdk/metric v1.27.0 // indirect
 	go.uber.org/multierr v1.11.0 // indirect
 	golang.org/x/sys v0.21.0 // indirect
diff --git a/pkg/ottl/parser.go b/pkg/ottl/parser.go
index 4a0b3cabe7be..f8d5ab7dabc8 100644
--- a/pkg/ottl/parser.go
+++ b/pkg/ottl/parser.go
@@ -10,9 +10,18 @@ import (
 
 	"github.com/alecthomas/participle/v2"
 	"go.opentelemetry.io/collector/component"
+	"go.opentelemetry.io/otel/attribute"
+	"go.opentelemetry.io/otel/codes"
+	"go.opentelemetry.io/otel/trace"
+	"go.opentelemetry.io/otel/trace/noop"
 	"go.uber.org/zap"
 )
 
+const (
+	logAttributeTraceID = "trace_id"
+	logAttributeSpanID  = "span_id"
+)
+
 // Statement holds a top level Statement for processing telemetry data. A Statement is a combination of a function
 // invocation and the boolean expression to match telemetry for invoking the function.
 type Statement[K any] struct {
@@ -231,6 +240,7 @@ type StatementSequence[K any] struct {
 	statements        []*Statement[K]
 	errorMode         ErrorMode
 	telemetrySettings component.TelemetrySettings
+	tracer            trace.Tracer
 }
 
 type StatementSequenceOption[K any] func(*StatementSequence[K])
@@ -250,6 +260,10 @@ func NewStatementSequence[K any](statements []*Statement[K], telemetrySettings c
 		statements:        statements,
 		errorMode:         PropagateError,
 		telemetrySettings: telemetrySettings,
+		tracer:            &noop.Tracer{},
+	}
+	if telemetrySettings.TracerProvider != nil {
+		s.tracer = telemetrySettings.TracerProvider.Tracer("ottl")
 	}
 	for _, op := range options {
 		op(&s)
@@ -262,20 +276,62 @@ func NewStatementSequence[K any](statements []*Statement[K], telemetrySettings c
 // When the ErrorMode of the StatementSequence is `ignore`, errors are logged and execution continues to the next statement.
 // When the ErrorMode of the StatementSequence is `silent`, errors are not logged and execution continues to the next statement.
 func (s *StatementSequence[K]) Execute(ctx context.Context, tCtx K) error {
-	s.telemetrySettings.Logger.Debug("initial TransformContext", zap.Any("TransformContext", tCtx))
+	ctx, sequenceSpan := s.tracer.Start(ctx, "ottl/StatementSequenceExecution")
+	defer sequenceSpan.End()
+	s.telemetrySettings.Logger.Debug(
+		"initial TransformContext",
+		zap.Any("TransformContext", tCtx),
+		zap.String(logAttributeTraceID, sequenceSpan.SpanContext().TraceID().String()),
+		zap.String(logAttributeSpanID, sequenceSpan.SpanContext().SpanID().String()),
+	)
 	for _, statement := range s.statements {
-		_, condition, err := statement.Execute(ctx, tCtx)
-		s.telemetrySettings.Logger.Debug("TransformContext after statement execution", zap.String("statement", statement.origText), zap.Bool("condition matched", condition), zap.Any("TransformContext", tCtx))
+		statementCtx, statementSpan := s.tracer.Start(ctx, "ottl/StatementExecution")
+		statementSpan.SetAttributes(
+			attribute.KeyValue{
+				Key:   "statement",
+				Value: attribute.StringValue(statement.origText),
+			},
+		)
+		_, condition, err := statement.Execute(statementCtx, tCtx)
+		statementSpan.SetAttributes(
+			attribute.KeyValue{
+				Key:   "condition.matched",
+				Value: attribute.BoolValue(condition),
+			},
+		)
+		s.telemetrySettings.Logger.Debug(
+			"TransformContext after statement execution",
+			zap.String("statement", statement.origText),
+			zap.Bool("condition matched", condition),
+			zap.Any("TransformContext", tCtx),
+			zap.String(logAttributeTraceID, statementSpan.SpanContext().TraceID().String()),
+			zap.String(logAttributeSpanID, statementSpan.SpanContext().SpanID().String()),
+		)
 		if err != nil {
+			statementSpan.RecordError(err)
+			errMsg := fmt.Sprintf("failed to execute statement '%s': %v", statement.origText, err)
+			statementSpan.SetStatus(codes.Error, errMsg)
 			if s.errorMode == PropagateError {
+				sequenceSpan.SetStatus(codes.Error, errMsg)
+				statementSpan.End()
 				err = fmt.Errorf("failed to execute statement: %v, %w", statement.origText, err)
 				return err
 			}
 			if s.errorMode == IgnoreError {
-				s.telemetrySettings.Logger.Warn("failed to execute statement", zap.Error(err), zap.String("statement", statement.origText))
+				s.telemetrySettings.Logger.Warn(
+					"failed to execute statement",
+					zap.Error(err),
+					zap.String("statement", statement.origText),
+					zap.String(logAttributeTraceID, statementSpan.SpanContext().TraceID().String()),
+					zap.String(logAttributeSpanID, statementSpan.SpanContext().SpanID().String()),
+				)
 			}
+		} else {
+			statementSpan.SetStatus(codes.Ok, "statement executed successfully")
 		}
+		statementSpan.End()
 	}
+	sequenceSpan.SetStatus(codes.Ok, "statement sequence executed successfully")
 	return nil
 }
 
diff --git a/pkg/ottl/parser_test.go b/pkg/ottl/parser_test.go
index 10da41ead069..679ceddd1872 100644
--- a/pkg/ottl/parser_test.go
+++ b/pkg/ottl/parser_test.go
@@ -13,7 +13,12 @@ import (
 	"time"
 
 	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
 	"go.opentelemetry.io/collector/component/componenttest"
+	"go.opentelemetry.io/otel/attribute"
+	"go.opentelemetry.io/otel/codes"
+	"go.opentelemetry.io/otel/sdk/trace"
+	"go.opentelemetry.io/otel/sdk/trace/tracetest"
 
 	"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/ottltest"
 )
@@ -2049,10 +2054,11 @@ func Test_Condition_Eval(t *testing.T) {
 
 func Test_Statements_Execute_Error(t *testing.T) {
 	tests := []struct {
-		name      string
-		condition boolExpressionEvaluator[any]
-		function  ExprFunc[any]
-		errorMode ErrorMode
+		name          string
+		condition     boolExpressionEvaluator[any]
+		function      ExprFunc[any]
+		errorMode     ErrorMode
+		expectedSpans []expectedSpan
 	}{
 		{
 			name: "IgnoreError error from condition",
@@ -2063,6 +2069,31 @@ func Test_Statements_Execute_Error(t *testing.T) {
 				return 1, nil
 			},
 			errorMode: IgnoreError,
+			expectedSpans: []expectedSpan{
+				{
+					name: "ottl/StatementExecution",
+					attributes: []attribute.KeyValue{
+						{
+							Key:   "statement",
+							Value: attribute.StringValue("test"),
+						},
+						{
+							Key:   "condition.matched",
+							Value: attribute.BoolValue(false),
+						},
+					},
+					status: trace.Status{
+						Code:        codes.Error,
+						Description: "failed to execute statement 'test': test",
+					},
+				},
+				{
+					name: "ottl/StatementSequenceExecution",
+					status: trace.Status{
+						Code: codes.Ok,
+					},
+				},
+			},
 		},
 		{
 			name: "PropagateError error from condition",
@@ -2073,6 +2104,32 @@ func Test_Statements_Execute_Error(t *testing.T) {
 				return 1, nil
 			},
 			errorMode: PropagateError,
+			expectedSpans: []expectedSpan{
+				{
+					name: "ottl/StatementExecution",
+					attributes: []attribute.KeyValue{
+						{
+							Key:   "statement",
+							Value: attribute.StringValue("test"),
+						},
+						{
+							Key:   "condition.matched",
+							Value: attribute.BoolValue(false),
+						},
+					},
+					status: trace.Status{
+						Code:        codes.Error,
+						Description: "failed to execute statement 'test': test",
+					},
+				},
+				{
+					name: "ottl/StatementSequenceExecution",
+					status: trace.Status{
+						Code:        codes.Error,
+						Description: "failed to execute statement 'test': test",
+					},
+				},
+			},
 		},
 		{
 			name: "IgnoreError error from function",
@@ -2083,6 +2140,31 @@ func Test_Statements_Execute_Error(t *testing.T) {
 				return 1, fmt.Errorf("test")
 			},
 			errorMode: IgnoreError,
+			expectedSpans: []expectedSpan{
+				{
+					name: "ottl/StatementExecution",
+					attributes: []attribute.KeyValue{
+						{
+							Key:   "statement",
+							Value: attribute.StringValue("test"),
+						},
+						{
+							Key:   "condition.matched",
+							Value: attribute.BoolValue(true),
+						},
+					},
+					status: trace.Status{
+						Code:        codes.Error,
+						Description: "failed to execute statement 'test': test",
+					},
+				},
+				{
+					name: "ottl/StatementSequenceExecution",
+					status: trace.Status{
+						Code: codes.Ok,
+					},
+				},
+			},
 		},
 		{
 			name: "PropagateError error from function",
@@ -2093,6 +2175,32 @@ func Test_Statements_Execute_Error(t *testing.T) {
 				return 1, fmt.Errorf("test")
 			},
 			errorMode: PropagateError,
+			expectedSpans: []expectedSpan{
+				{
+					name: "ottl/StatementExecution",
+					attributes: []attribute.KeyValue{
+						{
+							Key:   "statement",
+							Value: attribute.StringValue("test"),
+						},
+						{
+							Key:   "condition.matched",
+							Value: attribute.BoolValue(true),
+						},
+					},
+					status: trace.Status{
+						Code:        codes.Error,
+						Description: "failed to execute statement 'test': test",
+					},
+				},
+				{
+					name: "ottl/StatementSequenceExecution",
+					status: trace.Status{
+						Code:        codes.Error,
+						Description: "failed to execute statement 'test': test",
+					},
+				},
+			},
 		},
 		{
 			name: "SilentError error from condition",
@@ -2103,6 +2211,31 @@ func Test_Statements_Execute_Error(t *testing.T) {
 				return 1, nil
 			},
 			errorMode: SilentError,
+			expectedSpans: []expectedSpan{
+				{
+					name: "ottl/StatementExecution",
+					attributes: []attribute.KeyValue{
+						{
+							Key:   "statement",
+							Value: attribute.StringValue("test"),
+						},
+						{
+							Key:   "condition.matched",
+							Value: attribute.BoolValue(false),
+						},
+					},
+					status: trace.Status{
+						Code:        codes.Error,
+						Description: "failed to execute statement 'test': test",
+					},
+				},
+				{
+					name: "ottl/StatementSequenceExecution",
+					status: trace.Status{
+						Code: codes.Ok,
+					},
+				},
+			},
 		},
 		{
 			name: "SilentError error from function",
@@ -2113,6 +2246,31 @@ func Test_Statements_Execute_Error(t *testing.T) {
 				return 1, fmt.Errorf("test")
 			},
 			errorMode: SilentError,
+			expectedSpans: []expectedSpan{
+				{
+					name: "ottl/StatementExecution",
+					attributes: []attribute.KeyValue{
+						{
+							Key:   "statement",
+							Value: attribute.StringValue("test"),
+						},
+						{
+							Key:   "condition.matched",
+							Value: attribute.BoolValue(true),
+						},
+					},
+					status: trace.Status{
+						Code:        codes.Error,
+						Description: "failed to execute statement 'test': test",
+					},
+				},
+				{
+					name: "ottl/StatementSequenceExecution",
+					status: trace.Status{
+						Code: codes.Ok,
+					},
+				},
+			},
 		},
 	}
 	for _, tt := range tests {
@@ -2122,11 +2280,15 @@ func Test_Statements_Execute_Error(t *testing.T) {
 					{
 						condition: BoolExpr[any]{tt.condition},
 						function:  Expr[any]{exprFunc: tt.function},
+						origText:  "test",
 					},
 				},
 				errorMode:         tt.errorMode,
 				telemetrySettings: componenttest.NewNopTelemetrySettings(),
 			}
+			spanRecorder := tracetest.NewSpanRecorder()
+			statements.telemetrySettings.TracerProvider = trace.NewTracerProvider(trace.WithSpanProcessor(spanRecorder))
+			statements.tracer = statements.telemetrySettings.TracerProvider.Tracer("ottl")
 
 			err := statements.Execute(context.Background(), nil)
 			if tt.errorMode == PropagateError {
@@ -2134,6 +2296,14 @@ func Test_Statements_Execute_Error(t *testing.T) {
 			} else {
 				assert.NoError(t, err)
 			}
+
+			require.Len(t, spanRecorder.Ended(), len(tt.expectedSpans))
+
+			for i, es := range tt.expectedSpans {
+				require.Equal(t, es.name, spanRecorder.Ended()[i].Name())
+				require.Equal(t, es.attributes, spanRecorder.Ended()[i].Attributes())
+				require.Equal(t, es.status, spanRecorder.Ended()[i].Status())
+			}
 		})
 	}
 }
@@ -2337,3 +2507,9 @@ func Test_ConditionSequence_Eval_Error(t *testing.T) {
 		})
 	}
 }
+
+type expectedSpan struct {
+	name       string
+	attributes []attribute.KeyValue
+	status     trace.Status
+}