-
Notifications
You must be signed in to change notification settings - Fork 2.5k
/
Copy pathparser.go
107 lines (92 loc) · 3.46 KB
/
parser.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0
package csv // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/parser/csv"
import (
"context"
"fmt"
"strings"
"go.uber.org/zap"
"github.com/open-telemetry/opentelemetry-collector-contrib/internal/coreinternal/parseutils"
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/entry"
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/helper"
)
// Parser is an operator that parses csv in an entry.
type Parser struct {
helper.ParserOperator
fieldDelimiter rune
headerDelimiter rune
headerAttribute string
lazyQuotes bool
ignoreQuotes bool
parse parseFunc
}
type parseFunc func(any) (any, error)
// Process will parse an entry for csv.
func (p *Parser) Process(ctx context.Context, e *entry.Entry) error {
// Static parse function
if p.parse != nil {
return p.ParserOperator.ProcessWith(ctx, e, p.parse)
}
// Dynamically generate the parse function based on a header attribute
h, ok := e.Attributes[p.headerAttribute]
if !ok {
p.Logger().Error("read dynamic header attribute", zap.String("attribute", p.headerAttribute))
return fmt.Errorf("failed to read dynamic header attribute %s", p.headerAttribute)
}
headerString, ok := h.(string)
if !ok {
p.Logger().Error("header must be string", zap.String("type", fmt.Sprintf("%T", h)))
return fmt.Errorf("header is expected to be a string but is %T", h)
}
headers := strings.Split(headerString, string([]rune{p.headerDelimiter}))
parse := generateParseFunc(headers, p.fieldDelimiter, p.lazyQuotes, p.ignoreQuotes)
return p.ParserOperator.ProcessWith(ctx, e, parse)
}
// generateParseFunc returns a parse function for a given header, allowing
// each entry to have a potentially unique set of fields when using dynamic
// field names retrieved from an entry's attribute
func generateParseFunc(headers []string, fieldDelimiter rune, lazyQuotes bool, ignoreQuotes bool) parseFunc {
if ignoreQuotes {
return generateSplitParseFunc(headers, fieldDelimiter)
}
return generateCSVParseFunc(headers, fieldDelimiter, lazyQuotes)
}
// generateCSVParseFunc returns a parse function for a given header and field delimiter, which parses a line of CSV text.
func generateCSVParseFunc(headers []string, fieldDelimiter rune, lazyQuotes bool) parseFunc {
return func(value any) (any, error) {
csvLine, err := valueAsString(value)
if err != nil {
return nil, err
}
joinedLine, err := parseutils.ReadCSVRow(csvLine, fieldDelimiter, lazyQuotes)
if err != nil {
return nil, err
}
return parseutils.MapCSVHeaders(headers, joinedLine)
}
}
// generateSplitParseFunc returns a parse function (which ignores quotes) for a given header and field delimiter.
func generateSplitParseFunc(headers []string, fieldDelimiter rune) parseFunc {
return func(value any) (any, error) {
csvLine, err := valueAsString(value)
if err != nil {
return nil, err
}
// This parse function does not do any special quote handling; Splitting on the delimiter is sufficient.
fields := strings.Split(csvLine, string(fieldDelimiter))
return parseutils.MapCSVHeaders(headers, fields)
}
}
// valueAsString interprets the given value as a string.
func valueAsString(value any) (string, error) {
var s string
switch t := value.(type) {
case string:
s += t
case []byte:
s += string(t)
default:
return s, fmt.Errorf("type '%T' cannot be parsed as csv", value)
}
return s, nil
}