Skip to content

Commit

Permalink
[exporterhelper] Add default batching for OTLP data type
Browse files Browse the repository at this point in the history
Make the exporter batching capability to be available for the regular exporter helper without using custom requests.
  • Loading branch information
dmitryax committed Mar 12, 2024
1 parent ef9caab commit c113899
Show file tree
Hide file tree
Showing 13 changed files with 1,098 additions and 20 deletions.
5 changes: 4 additions & 1 deletion .chloggen/batch-exporter-helper.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@ issues: [8122]
# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext:
subtext: |
`WithBatcher` can be used with both regular exporter helper (e.g. NewTracesExporter) and the request-based exporter
helper (e.g. NewTracesRequestExporter). The request-based exporter helpers require `WithRequestBatchFuncs` option
providing batching functions.
# Optional: The change log or logs in which this entry should be included.
# e.g. '[user]' or '[user, api]'
Expand Down
17 changes: 10 additions & 7 deletions exporter/exporterhelper/batch_sender.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,17 @@ type batchSender struct {
}

// newBatchSender returns a new batch consumer component.
func newBatchSender(cfg exporterbatcher.Config, set exporter.CreateSettings) *batchSender {
func newBatchSender(cfg exporterbatcher.Config, set exporter.CreateSettings,
mf exporterbatcher.BatchMergeFunc[Request], msf exporterbatcher.BatchMergeSplitFunc[Request]) *batchSender {
bs := &batchSender{
activeBatch: newEmptyBatch(),
cfg: cfg,
logger: set.Logger,
shutdownCh: make(chan struct{}),
stopped: &atomic.Bool{},
resetTimerCh: make(chan struct{}),
activeBatch: newEmptyBatch(),
cfg: cfg,
logger: set.Logger,
mergeFunc: mf,
mergeSplitFunc: msf,
shutdownCh: make(chan struct{}),
stopped: &atomic.Bool{},
resetTimerCh: make(chan struct{}),
}
return bs
}
Expand Down
45 changes: 45 additions & 0 deletions exporter/exporterhelper/batch_sender_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,51 @@ func TestBatchSender_DrainActiveRequests(t *testing.T) {
assert.Equal(t, uint64(3), sink.itemsCount.Load())
}

func TestBatchSender_WithBatcherOption(t *testing.T) {
tests := []struct {
name string
opts []Option
expectedErr bool
}{
{
name: "no_funcs_set",
opts: []Option{WithBatcher(exporterbatcher.NewDefaultConfig())},
expectedErr: true,
},
{
name: "funcs_set_internally",
opts: []Option{withBatchFuncs(fakeBatchMergeFunc, fakeBatchMergeSplitFunc), WithBatcher(exporterbatcher.NewDefaultConfig())},
expectedErr: false,
},
{
name: "funcs_set_twice",
opts: []Option{
withBatchFuncs(fakeBatchMergeFunc, fakeBatchMergeSplitFunc),
WithBatcher(exporterbatcher.NewDefaultConfig(), WithRequestBatchFuncs(fakeBatchMergeFunc,
fakeBatchMergeSplitFunc)),
},
expectedErr: true,
},
{
name: "nil_funcs",
opts: []Option{WithBatcher(exporterbatcher.NewDefaultConfig(), WithRequestBatchFuncs(nil, nil))},
expectedErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
be, err := newBaseExporter(defaultSettings, defaultType, newNoopObsrepSender, tt.opts...)
if tt.expectedErr {
assert.Nil(t, be)
assert.Error(t, err)
} else {
assert.NotNil(t, be)
assert.NoError(t, err)
}
})
}
}

func queueBatchExporter(t *testing.T, batchOption Option) *baseExporter {
be, err := newBaseExporter(defaultSettings, defaultType, newNoopObsrepSender, batchOption,
WithRequestQueue(exporterqueue.NewDefaultConfig(), exporterqueue.NewMemoryQueueFactory[Request]()))
Expand Down
42 changes: 33 additions & 9 deletions exporter/exporterhelper/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,27 +148,35 @@ func WithCapabilities(capabilities consumer.Capabilities) Option {
}

// BatcherOption apply changes to batcher sender.
type BatcherOption func(*batchSender)
type BatcherOption func(*batchSender) error

// WithRequestBatchFuncs sets the functions for merging and splitting batches for an exporter built for custom request types.
func WithRequestBatchFuncs(mf exporterbatcher.BatchMergeFunc[Request], msf exporterbatcher.BatchMergeSplitFunc[Request]) BatcherOption {
return func(bs *batchSender) {
return func(bs *batchSender) error {
if mf == nil || msf == nil {
return fmt.Errorf("WithRequestBatchFuncs must be provided with non-nil functions")
}
if bs.mergeFunc != nil || bs.mergeSplitFunc != nil {
return fmt.Errorf("WithRequestBatchFuncs can only be used once with request-based exporters")
}
bs.mergeFunc = mf
bs.mergeSplitFunc = msf
return nil
}
}

// WithBatcher enables batching for an exporter based on custom request types.
// For now, it can be used only with the New[Traces|Metrics|Logs]RequestExporter exporter helpers and
// WithRequestBatchFuncs provided.
// TODO: Add OTLP-based batch functions applied by default so it can be used with New[Traces|Metrics|Logs]Exporter exporter helpers.
// This API is at the early stage of development and may change without backward compatibility
// until https://github.com/open-telemetry/opentelemetry-collector/issues/8122 is resolved.
func WithBatcher(cfg exporterbatcher.Config, opts ...BatcherOption) Option {
return func(o *baseExporter) error {
bs := newBatchSender(cfg, o.set)
bs := newBatchSender(cfg, o.set, o.batchMergeFunc, o.batchMergeSplitfunc)
for _, opt := range opts {
opt(bs)
if err := opt(bs); err != nil {
return err
}
}
if bs.mergeFunc == nil || bs.mergeSplitFunc == nil {
return fmt.Errorf("WithRequestBatchFuncs must be provided for the batcher applied to the request-based exporters")
Expand Down Expand Up @@ -196,14 +204,28 @@ func withUnmarshaler(unmarshaler exporterqueue.Unmarshaler[Request]) Option {
}
}

// withBatchFuncs is used to set the functions for merging and splitting batches for OLTP-based exporters.
// It must be provided as the first option when creating a new exporter helper.
func withBatchFuncs(mf exporterbatcher.BatchMergeFunc[Request], msf exporterbatcher.BatchMergeSplitFunc[Request]) Option {
return func(o *baseExporter) error {
o.batchMergeFunc = mf
o.batchMergeSplitfunc = msf
return nil
}
}

// baseExporter contains common fields between different exporter types.
type baseExporter struct {
component.StartFunc
component.ShutdownFunc

signal component.DataType

batchMergeFunc exporterbatcher.BatchMergeFunc[Request]
batchMergeSplitfunc exporterbatcher.BatchMergeSplitFunc[Request]

marshaler exporterqueue.Marshaler[Request]
unmarshaler exporterqueue.Unmarshaler[Request]
signal component.DataType

set exporter.CreateSettings
obsrep *ObsReport
Expand Down Expand Up @@ -251,11 +273,13 @@ func newBaseExporter(set exporter.CreateSettings, signal component.DataType, osf

be.connectSenders()

// If queue sender is enabled assign to the batch sender the same number of workers.
if qs, ok := be.queueSender.(*queueSender); ok {
if bs, ok := be.batchSender.(*batchSender); ok {
if bs, ok := be.batchSender.(*batchSender); ok {
// If queue sender is enabled assign to the batch sender the same number of workers.
if qs, ok := be.queueSender.(*queueSender); ok {
bs.concurrencyLimit = uint64(qs.numConsumers)
}
// Batcher sender mutates the data.
be.consumerOptions = append(be.consumerOptions, consumer.WithCapabilities(consumer.Capabilities{MutatesData: true}))
}

return be, nil
Expand Down
5 changes: 4 additions & 1 deletion exporter/exporterhelper/logs.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,10 @@ func NewLogsExporter(
if pusher == nil {
return nil, errNilPushLogsData
}
logsOpts := []Option{withMarshaler(logsRequestMarshaler), withUnmarshaler(newLogsRequestUnmarshalerFunc(pusher))}
logsOpts := []Option{
withMarshaler(logsRequestMarshaler), withUnmarshaler(newLogsRequestUnmarshalerFunc(pusher)),
withBatchFuncs(mergeLogs, mergeSplitLogs),
}
return NewLogsRequestExporter(ctx, set, requestFromLogs(pusher), append(logsOpts, options...)...)
}

Expand Down
136 changes: 136 additions & 0 deletions exporter/exporterhelper/logs_batch.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0

package exporterhelper // import "go.opentelemetry.io/collector/exporter/exporterhelper"

import (
"context"
"errors"

"go.opentelemetry.io/collector/exporter/exporterbatcher"
"go.opentelemetry.io/collector/pdata/plog"
)

// mergeLogs merges two logs requests into one.
func mergeLogs(_ context.Context, r1 Request, r2 Request) (Request, error) {
lr1, ok1 := r1.(*logsRequest)
lr2, ok2 := r2.(*logsRequest)
if !ok1 || !ok2 {
return nil, errors.New("invalid input type")
}
lr2.ld.ResourceLogs().MoveAndAppendTo(lr1.ld.ResourceLogs())
return lr1, nil
}

// mergeSplitLogs splits and/or merges the logs into multiple requests based on the MaxSizeConfig.
func mergeSplitLogs(_ context.Context, cfg exporterbatcher.MaxSizeConfig, r1 Request, r2 Request) ([]Request, error) {
var (
res []Request
destReq *logsRequest
capacityLeft = cfg.MaxSizeItems
)
for _, req := range []Request{r1, r2} {
if req == nil {
continue
}
srcReq, ok := req.(*logsRequest)
if !ok {
return nil, errors.New("invalid input type")
}
if srcReq.ld.LogRecordCount() <= capacityLeft {
if destReq == nil {
destReq = srcReq
} else {
srcReq.ld.ResourceLogs().MoveAndAppendTo(destReq.ld.ResourceLogs())
}
capacityLeft = cfg.MaxSizeItems - destReq.ld.LogRecordCount()
continue
}

for {
extractedLogs := extractLogs(srcReq.ld, capacityLeft)
if extractedLogs.LogRecordCount() == 0 {
break
}
capacityLeft -= extractedLogs.LogRecordCount()
if destReq == nil {
destReq = &logsRequest{ld: extractedLogs, pusher: srcReq.pusher}
} else {
extractedLogs.ResourceLogs().MoveAndAppendTo(destReq.ld.ResourceLogs())
}
if capacityLeft <= 0 {
res = append(res, destReq)
destReq = nil
capacityLeft = cfg.MaxSizeItems
}
}
}

if destReq != nil {
res = append(res, destReq)
}
return res, nil
}

// extractLogs extracts logs from the input logs and returns a new logs with the specified number of log records.
func extractLogs(srcLogs plog.Logs, count int) plog.Logs {
destLogs := plog.NewLogs()
srcLogs.ResourceLogs().RemoveIf(func(srcRL plog.ResourceLogs) bool {
if count <= 0 {
return false
}
needToExtract := resourceLogsCount(srcRL) > count
if needToExtract {
srcRL = extractResourceLogs(srcRL, count)
}
count -= resourceLogsCount(srcRL)
srcRL.MoveTo(destLogs.ResourceLogs().AppendEmpty())
return !needToExtract
})
return destLogs
}

// extractResourceLogs extracts resource logs and returns a new resource logs with the specified number of log records.
func extractResourceLogs(srcRL plog.ResourceLogs, count int) plog.ResourceLogs {
destRL := plog.NewResourceLogs()
destRL.SetSchemaUrl(srcRL.SchemaUrl())
srcRL.Resource().CopyTo(destRL.Resource())
srcRL.ScopeLogs().RemoveIf(func(srcSL plog.ScopeLogs) bool {
if count <= 0 {
return false
}
needToExtract := srcSL.LogRecords().Len() > count
if needToExtract {
srcSL = extractScopeLogs(srcSL, count)
}
count -= srcSL.LogRecords().Len()
srcSL.MoveTo(destRL.ScopeLogs().AppendEmpty())
return !needToExtract
})
return destRL
}

// extractScopeLogs extracts scope logs and returns a new scope logs with the specified number of log records.
func extractScopeLogs(srcSL plog.ScopeLogs, count int) plog.ScopeLogs {
destSL := plog.NewScopeLogs()
destSL.SetSchemaUrl(srcSL.SchemaUrl())
srcSL.Scope().CopyTo(destSL.Scope())
srcSL.LogRecords().RemoveIf(func(srcLR plog.LogRecord) bool {
if count <= 0 {
return false
}
srcLR.MoveTo(destSL.LogRecords().AppendEmpty())
count--
return true
})
return destSL
}

// resourceLogsCount calculates the total number of log records in the plog.ResourceLogs.
func resourceLogsCount(rl plog.ResourceLogs) int {
count := 0
for k := 0; k < rl.ScopeLogs().Len(); k++ {
count += rl.ScopeLogs().At(k).LogRecords().Len()
}
return count
}
Loading

0 comments on commit c113899

Please sign in to comment.