From 2e319b595106a347d034141d304fdd2447b51f71 Mon Sep 17 00:00:00 2001 From: adatzer Date: Thu, 7 Apr 2022 11:24:45 +0300 Subject: [PATCH] Add custom transformation layer (closes #146) --- cmd/cli/cli.go | 3 +- cmd/serverless.go | 3 +- config/config.go | 93 +- config/config_test.go | 117 +- config/examples/README.md | 18 +- config/test-fixtures/invalids.hcl | 4 +- .../transform-invalid-layer-js.hcl | 7 + .../transform-invalid-layer-lua.hcl | 7 + .../test-fixtures/transform-js-extended.hcl | 12 + config/test-fixtures/transform-js-simple.hcl | 9 + .../test-fixtures/transform-lua-extended.hcl | 11 + config/test-fixtures/transform-lua-simple.hcl | 9 + go.mod | 9 +- go.sum | 19 +- pkg/transform/engine.go | 35 + pkg/transform/engine_javascript.go | 277 +++ pkg/transform/engine_javascript_test.go | 1937 ++++++++++++++++ pkg/transform/engine_lua.go | 427 ++++ pkg/transform/engine_lua_test.go | 1958 +++++++++++++++++ .../snowplow_enriched_filter_test.go | 46 +- .../transformconfig/transform_config.go | 272 +++ .../transformconfig/transform_config_test.go | 491 +++++ 22 files changed, 5639 insertions(+), 125 deletions(-) create mode 100644 config/test-fixtures/transform-invalid-layer-js.hcl create mode 100644 config/test-fixtures/transform-invalid-layer-lua.hcl create mode 100644 config/test-fixtures/transform-js-extended.hcl create mode 100644 config/test-fixtures/transform-js-simple.hcl create mode 100644 config/test-fixtures/transform-lua-extended.hcl create mode 100644 config/test-fixtures/transform-lua-simple.hcl create mode 100644 pkg/transform/engine.go create mode 100644 pkg/transform/engine_javascript.go create mode 100644 pkg/transform/engine_javascript_test.go create mode 100644 pkg/transform/engine_lua.go create mode 100644 pkg/transform/engine_lua_test.go create mode 100644 pkg/transform/transformconfig/transform_config.go create mode 100644 pkg/transform/transformconfig/transform_config_test.go diff --git a/cmd/cli/cli.go b/cmd/cli/cli.go index 39a665fc..f6dfa9f1 100644 --- a/cmd/cli/cli.go +++ b/cmd/cli/cli.go @@ -31,6 +31,7 @@ import ( "github.com/snowplow-devops/stream-replicator/pkg/target/targetiface" "github.com/snowplow-devops/stream-replicator/pkg/telemetry" "github.com/snowplow-devops/stream-replicator/pkg/transform" + "github.com/snowplow-devops/stream-replicator/pkg/transform/transformconfig" ) const ( @@ -80,7 +81,7 @@ func RunCli(supportedSourceConfigPairs []sourceconfig.ConfigPair) { return err } - tr, err := cfg.GetTransformations() + tr, err := transformconfig.GetTransformations(cfg) if err != nil { return err } diff --git a/cmd/serverless.go b/cmd/serverless.go index 06b19776..ba50a016 100644 --- a/cmd/serverless.go +++ b/cmd/serverless.go @@ -13,6 +13,7 @@ import ( log "github.com/sirupsen/logrus" "github.com/snowplow-devops/stream-replicator/pkg/models" + "github.com/snowplow-devops/stream-replicator/pkg/transform/transformconfig" ) // ServerlessRequestHandler is a common function for all @@ -34,7 +35,7 @@ func ServerlessRequestHandler(messages []*models.Message) error { } t.Open() - tr, err := cfg.GetTransformations() + tr, err := transformconfig.GetTransformations(cfg) if err != nil { return err } diff --git a/config/config.go b/config/config.go index e231367d..ff1aa4ce 100644 --- a/config/config.go +++ b/config/config.go @@ -25,7 +25,6 @@ import ( "github.com/snowplow-devops/stream-replicator/pkg/statsreceiver/statsreceiveriface" "github.com/snowplow-devops/stream-replicator/pkg/target" "github.com/snowplow-devops/stream-replicator/pkg/target/targetiface" - "github.com/snowplow-devops/stream-replicator/pkg/transform" ) // Config holds the configuration data along with the decoder to decode them @@ -36,15 +35,15 @@ type Config struct { // ConfigurationData for holding all configuration options type ConfigurationData struct { - Source *Component `hcl:"source,block" envPrefix:"SOURCE_"` - Target *Component `hcl:"target,block" envPrefix:"TARGET_"` - FailureTarget *FailureConfig `hcl:"failure_target,block"` - Sentry *SentryConfig `hcl:"sentry,block"` - StatsReceiver *StatsConfig `hcl:"stats_receiver,block"` - Transformation string `hcl:"message_transformation,optional" env:"MESSAGE_TRANSFORMATION"` - LogLevel string `hcl:"log_level,optional" env:"LOG_LEVEL"` - GoogleServiceAccountB64 string `hcl:"google_application_credentials_b64,optional" env:"GOOGLE_APPLICATION_CREDENTIALS_B64"` - UserProvidedID string `hcl:"user_provided_id,optional" env:"USER_PROVIDED_ID"` + Source *Component `hcl:"source,block" envPrefix:"SOURCE_"` + Target *Component `hcl:"target,block" envPrefix:"TARGET_"` + FailureTarget *FailureConfig `hcl:"failure_target,block"` + Sentry *SentryConfig `hcl:"sentry,block"` + StatsReceiver *StatsConfig `hcl:"stats_receiver,block"` + Transform *TransformConfig `hcl:"transform,block"` + LogLevel string `hcl:"log_level,optional" env:"LOG_LEVEL"` + GoogleServiceAccountB64 string `hcl:"google_application_credentials_b64,optional" env:"GOOGLE_APPLICATION_CREDENTIALS_B64"` + UserProvidedID string `hcl:"user_provided_id,optional" env:"USER_PROVIDED_ID"` } // Component is a type to abstract over configuration blocks. @@ -80,6 +79,12 @@ type StatsConfig struct { BufferSec int `hcl:"buffer_sec,optional" env:"STATS_RECEIVER_BUFFER_SEC"` } +// TransformConfig holds configuration for tranformations. +type TransformConfig struct { + Message string `hcl:"message_transformation,optional" env:"MESSAGE_TRANSFORMATION"` + Layer *Use `hcl:"use,block" envPrefix:"TRANSFORMATION_LAYER_"` +} + // defaultConfigData returns the initial main configuration target. func defaultConfigData() *ConfigurationData { return &ConfigurationData{ @@ -98,8 +103,11 @@ func defaultConfigData() *ConfigurationData { TimeoutSec: 1, BufferSec: 15, }, - Transformation: "none", - LogLevel: "info", + Transform: &TransformConfig{ + Message: "none", + Layer: &Use{}, + }, + LogLevel: "info", } } @@ -299,48 +307,6 @@ func (c *Config) GetFailureTarget(AppName string, AppVersion string) (failureifa return nil, fmt.Errorf("could not interpret failure target configuration for %q", useFailureTarget.Name) } -// GetTransformations builds and returns transformationApplyFunction from the transformations configured -func (c *Config) GetTransformations() (transform.TransformationApplyFunction, error) { - funcs := make([]transform.TransformationFunction, 0, 0) - - // Parse list of transformations - transformations := strings.Split(c.Data.Transformation, ",") - - for _, transformation := range transformations { - // Parse function name-option sets - funcOpts := strings.Split(transformation, ":") - - switch funcOpts[0] { - case "spEnrichedToJson": - funcs = append(funcs, transform.SpEnrichedToJSON) - case "spEnrichedSetPk": - funcs = append(funcs, transform.NewSpEnrichedSetPkFunction(funcOpts[1])) - case "spEnrichedFilter": - filterFunc, err := transform.NewSpEnrichedFilterFunction(funcOpts[1]) - if err != nil { - return nil, err - } - funcs = append(funcs, filterFunc) - case "spEnrichedFilterContext": - filterFunc, err := transform.NewSpEnrichedFilterFunctionContext(funcOpts[1]) - if err != nil { - return nil, err - } - funcs = append(funcs, filterFunc) - case "spEnrichedFilterUnstructEvent": - filterFunc, err := transform.NewSpEnrichedFilterFunctionUnstructEvent(funcOpts[1]) - if err != nil { - return nil, err - } - funcs = append(funcs, filterFunc) - case "none": - default: - return nil, errors.New(fmt.Sprintf("Invalid transformation found; expected one of 'spEnrichedToJson', 'spEnrichedSetPk:{option}', spEnrichedFilter:{option} and got '%s'", c.Data.Transformation)) - } - } - return transform.NewTransformation(funcs...), nil -} - // GetTags returns a list of tags to use in identifying this instance of stream-replicator with enough // entropy so as to avoid collisions as it should not be possible to have both the host and process_id be // the same. @@ -398,3 +364,22 @@ func (c *Config) GetStatsReceiver(tags map[string]string) (statsreceiveriface.St return nil, errors.New(fmt.Sprintf("Invalid stats receiver found; expected one of 'statsd' and got '%s'", useReceiver.Name)) } } + +// ProvideTransformMessage implements transformconfig.configProvider +func (c *Config) ProvideTransformMessage() string { + return c.Data.Transform.Message +} + +// ProvideTransformLayerName implements transformconfig.configProvider +func (c *Config) ProvideTransformLayerName() string { + return c.Data.Transform.Layer.Name +} + +// ProvideTransformComponent implements transformconfig.configProvider +func (c *Config) ProvideTransformComponent(p Pluggable) (interface{}, error) { + decoderOpts := &DecoderOptions{ + Input: c.Data.Transform.Layer.Body, + } + + return c.CreateComponent(p, decoderOpts) +} diff --git a/config/config_test.go b/config/config_test.go index 3cdec05a..3635bc2f 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -11,6 +11,7 @@ import ( "path/filepath" "testing" + "github.com/snowplow-devops/stream-replicator/pkg/transform" "github.com/stretchr/testify/assert" ) @@ -26,7 +27,7 @@ func TestNewConfig(t *testing.T) { assert.Equal("info", c.Data.LogLevel) assert.Equal("stdout", c.Data.Target.Use.Name) - assert.Equal("none", c.Data.Transform.Transformation) + assert.Equal("none", c.Data.Transform.Message) assert.Equal("stdin", c.Data.Source.Use.Name) // Tests on sources moved to the source package. @@ -35,10 +36,6 @@ func TestNewConfig(t *testing.T) { assert.NotNil(target) assert.Nil(err) - transformation, err := c.GetTransformations() - assert.NotNil(transformation) - assert.Nil(err) - failureTarget, err := c.GetFailureTarget("testAppName", "0.0.0") assert.NotNil(failureTarget) assert.Nil(err) @@ -83,44 +80,6 @@ func TestNewConfig_FromEnvInvalid(t *testing.T) { assert.NotNil(err) } -func TestNewConfig_InvalidTransformation(t *testing.T) { - assert := assert.New(t) - - defer os.Unsetenv("MESSAGE_TRANSFORMATION") - - os.Setenv("MESSAGE_TRANSFORMATION", "fake") - - c, err := NewConfig() - assert.NotNil(c) - if err != nil { - t.Fatalf("function NewConfig failed with error: %q", err.Error()) - } - - transformation, err := c.GetTransformations() - assert.Nil(transformation) - assert.NotNil(err) - assert.Equal("Invalid transformation found; expected one of 'spEnrichedToJson', 'spEnrichedSetPk:{option}', spEnrichedFilter:{option} and got 'fake'", err.Error()) -} - -func TestNewConfig_FilterFailure(t *testing.T) { - assert := assert.New(t) - - defer os.Unsetenv("MESSAGE_TRANSFORMATION") - - os.Setenv("MESSAGE_TRANSFORMATION", "spEnrichedFilter:incompatibleArg") - - c, err := NewConfig() - assert.NotNil(c) - if err != nil { - t.Fatalf("function NewConfig failed with error: %q", err.Error()) - } - - transformation, err := c.GetTransformations() - assert.Nil(transformation) - assert.NotNil(err) - assert.Equal(`invalid filter function config, must be of the format {field name}=={value}[|{value}|...] or {field name}!={value}[|{value}|...]`, err.Error()) -} - func TestNewConfig_InvalidTarget(t *testing.T) { assert := assert.New(t) @@ -230,13 +189,6 @@ func TestNewConfig_Hcl_invalids(t *testing.T) { t.Fatalf("function NewConfig failed with error: %q", err.Error()) } - t.Run("invalid_transformation", func(t *testing.T) { - transformation, err := c.GetTransformations() - assert.Nil(transformation) - assert.NotNil(err) - assert.Equal("Invalid transformation found; expected one of 'spEnrichedToJson', 'spEnrichedSetPk:{option}', spEnrichedFilter:{option} and got 'fakeHCL'", err.Error()) - }) - t.Run("invalid_target", func(t *testing.T) { target, err := c.GetTarget() assert.Nil(target) @@ -273,7 +225,7 @@ func TestNewConfig_Hcl_defaults(t *testing.T) { assert.Equal(c.Data.Sentry.Debug, false) assert.Equal(c.Data.StatsReceiver.TimeoutSec, 1) assert.Equal(c.Data.StatsReceiver.BufferSec, 15) - assert.Equal(c.Data.Transform.Transformation, "none") + assert.Equal(c.Data.Transform.Message, "none") assert.Equal(c.Data.LogLevel, "info") } @@ -293,3 +245,66 @@ func TestNewConfig_Hcl_sentry(t *testing.T) { assert.Equal(c.Data.Sentry.Tags, "{\"testKey\":\"testValue\"}") assert.Equal(c.Data.Sentry.Dsn, "testDsn") } + +func TestDefaultTransformation(t *testing.T) { + assert := assert.New(t) + + t.Setenv("STREAM_REPLICATOR_CONFIG_FILE", "") + t.Setenv("MESSAGE_TRANSFORMATION", "") + + c, err := NewConfig() + assert.NotNil(c) + if err != nil { + t.Fatalf("function NewConfig failed with error: %q", err.Error()) + } + + assert.Equal("none", c.Data.Transform.Message) + assert.Equal("none", c.ProvideTransformMessage()) + assert.Equal("", c.ProvideTransformLayerName()) +} + +func TestTransformationProviderImplementation(t *testing.T) { + testFixPath := "./test-fixtures" + testCases := []struct { + File string + Plug Pluggable + Message string + LayerName string + }{ + { + File: "transform-lua-simple.hcl", + Plug: transform.LuaLayer().(Pluggable), + Message: "lua:fun", + LayerName: "lua", + }, + { + File: "transform-js-simple.hcl", + Plug: transform.JSLayer().(Pluggable), + Message: "js:fun", + LayerName: "js", + }, + } + + for _, tt := range testCases { + t.Run(tt.File, func(t *testing.T) { + assert := assert.New(t) + + configFile := filepath.Join(testFixPath, tt.File) + t.Setenv("STREAM_REPLICATOR_CONFIG_FILE", configFile) + + c, err := NewConfig() + assert.NotNil(c) + if err != nil { + t.Fatalf("function NewConfig failed with error: %q", err.Error()) + } + + assert.Equal(tt.Message, c.ProvideTransformMessage()) + assert.Equal(tt.LayerName, c.ProvideTransformLayerName()) + + component, err := c.ProvideTransformComponent(tt.Plug) + assert.Nil(err) + assert.NotNil(component) + + }) + } +} diff --git a/config/examples/README.md b/config/examples/README.md index 3ce0646b..066e56b0 100644 --- a/config/examples/README.md +++ b/config/examples/README.md @@ -49,8 +49,11 @@ stats_receiver { // block for configuring sentry sentry {} -// string to configure message transformation (default: "none") -message_transformation = "none" +// block for configuring transformations +transform { + // string to configure message transformation (default: "none") + message_transformation = "none" +} // log level configuration (default: "info") log_level = "info" @@ -99,6 +102,17 @@ sentry { debug = true } +transform { + message_transformation = "spEnrichedFilter:app_id==myApp,js:customFunction" + + use "js" { + source_b64 = "CmZ1bmN0aW9uIGN1c3RvbUZ1bmN0aW9uKGlucHV0KSB7CiAgICByZXR1cm4gaW5wdXQ7Cn0K" + timeout_sec = 2 + disable_source_maps = false + snowplow_mode = true + } +} + log_level = "debug" user_provided_id = "my-example-id" diff --git a/config/test-fixtures/invalids.hcl b/config/test-fixtures/invalids.hcl index 7075e475..1944444e 100644 --- a/config/test-fixtures/invalids.hcl +++ b/config/test-fixtures/invalids.hcl @@ -1,6 +1,8 @@ # configuration with various invalid options -message_transformation = "fakeHCL" +transform { + message_transformation = "fakeHCL" +} target { use "fakeHCL" {} diff --git a/config/test-fixtures/transform-invalid-layer-js.hcl b/config/test-fixtures/transform-invalid-layer-js.hcl new file mode 100644 index 00000000..fb4a2191 --- /dev/null +++ b/config/test-fixtures/transform-invalid-layer-js.hcl @@ -0,0 +1,7 @@ +# transform configuration + +transform { + message_transformation = "js:fun" + + use "fake" {} +} diff --git a/config/test-fixtures/transform-invalid-layer-lua.hcl b/config/test-fixtures/transform-invalid-layer-lua.hcl new file mode 100644 index 00000000..f8c46865 --- /dev/null +++ b/config/test-fixtures/transform-invalid-layer-lua.hcl @@ -0,0 +1,7 @@ +# transform configuration + +transform { + message_transformation = "lua:fun" + + use "fake" {} +} diff --git a/config/test-fixtures/transform-js-extended.hcl b/config/test-fixtures/transform-js-extended.hcl new file mode 100644 index 00000000..6292e2fe --- /dev/null +++ b/config/test-fixtures/transform-js-extended.hcl @@ -0,0 +1,12 @@ +# transform configuration - js - extended + +transform { + message_transformation = "js:fun" + + use "js" { + source_b64 = "CglmdW5jdGlvbiBmb28oeCkgewoJICAgIHJldHVybiB4OwoJfQoJ" + timeout_sec = 10 + disable_source_maps = false + snowplow_mode = true + } +} diff --git a/config/test-fixtures/transform-js-simple.hcl b/config/test-fixtures/transform-js-simple.hcl new file mode 100644 index 00000000..3e766b10 --- /dev/null +++ b/config/test-fixtures/transform-js-simple.hcl @@ -0,0 +1,9 @@ +# transform configuration - js - simple + +transform { + message_transformation = "js:fun" + + use "js" { + source_b64 = "CglmdW5jdGlvbiBmb28oeCkgewoJICAgIHJldHVybiB4OwoJfQoJ" + } +} diff --git a/config/test-fixtures/transform-lua-extended.hcl b/config/test-fixtures/transform-lua-extended.hcl new file mode 100644 index 00000000..f728b995 --- /dev/null +++ b/config/test-fixtures/transform-lua-extended.hcl @@ -0,0 +1,11 @@ +# transform configuration - lua - extended + +transform { + message_transformation = "lua:fun" + + use "lua" { + source_b64 = "CglmdW5jdGlvbiBmb28oeCkKICAgICAgICAgICByZXR1cm4geAogICAgICAgIGVuZAoJ" + timeout_sec = 10 + sandbox = false + } +} diff --git a/config/test-fixtures/transform-lua-simple.hcl b/config/test-fixtures/transform-lua-simple.hcl new file mode 100644 index 00000000..3f8c2ba0 --- /dev/null +++ b/config/test-fixtures/transform-lua-simple.hcl @@ -0,0 +1,9 @@ +# transform configuration - lua - simple + +transform { + message_transformation = "lua:fun" + + use "lua" { + source_b64 = "CglmdW5jdGlvbiBmb28oeCkKICAgICAgICAgICByZXR1cm4geAogICAgICAgIGVuZAoJ" + } +} diff --git a/go.mod b/go.mod index 29b7739b..45fd6602 100644 --- a/go.mod +++ b/go.mod @@ -24,7 +24,7 @@ require ( github.com/hashicorp/go-multierror v1.1.1 github.com/jpillora/backoff v1.0.0 // indirect github.com/klauspost/compress v1.15.0 // indirect - github.com/mitchellh/mapstructure v1.4.1 // indirect + github.com/mitchellh/mapstructure v1.4.1 github.com/myesui/uuid v1.0.0 // indirect github.com/pkg/errors v0.9.1 github.com/sirupsen/logrus v1.8.1 @@ -51,9 +51,14 @@ require ( require ( github.com/davecgh/go-spew v1.1.1 + github.com/dop251/goja v0.0.0-20220405120441-9037c2b61cbf + github.com/goccy/go-json v0.9.7 github.com/hashicorp/hcl/v2 v2.11.1 github.com/snowplow/snowplow-golang-tracker/v2 v2.4.1 + github.com/yuin/gluamapper v0.0.0-20150323120927-d836955830e7 + github.com/yuin/gopher-lua v0.0.0-20210529063254-f4c35e4016d9 github.com/zclconf/go-cty v1.10.0 + layeh.com/gopher-json v0.0.0-20201124131017-552bb3c4c3bf ) require ( @@ -66,9 +71,11 @@ require ( github.com/agext/levenshtein v1.2.1 // indirect github.com/apparentlymart/go-textseg/v13 v13.0.0 // indirect github.com/devigned/tab v0.1.1 // indirect + github.com/dlclark/regexp2 v1.4.1-0.20201116162257-a2a8dda75c91 // indirect github.com/eapache/go-resiliency v1.2.0 // indirect github.com/eapache/go-xerial-snappy v0.0.0-20180814174437-776d5712da21 // indirect github.com/eapache/queue v1.1.0 // indirect + github.com/go-sourcemap/sourcemap v2.1.3+incompatible // indirect github.com/golang/protobuf v1.5.2 // indirect github.com/google/go-cmp v0.5.6 // indirect github.com/googleapis/gax-go/v2 v2.0.5 // indirect diff --git a/go.sum b/go.sum index 7a4bcc94..cfea9f12 100644 --- a/go.sum +++ b/go.sum @@ -159,6 +159,11 @@ github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZm github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw= github.com/dimchansky/utfbom v1.1.0 h1:FcM3g+nofKgUteL8dm/UpdRXNC9KmADgTpLKsu0TRo4= github.com/dimchansky/utfbom v1.1.0/go.mod h1:rO41eb7gLfo8SF1jd9F8HplJm1Fewwi4mQvIirEdv+8= +github.com/dlclark/regexp2 v1.4.1-0.20201116162257-a2a8dda75c91 h1:Izz0+t1Z5nI16/II7vuEo/nHjodOg0p7+OiDpjX5t1E= +github.com/dlclark/regexp2 v1.4.1-0.20201116162257-a2a8dda75c91/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc= +github.com/dop251/goja v0.0.0-20220405120441-9037c2b61cbf h1:Yt+4K30SdjOkRoRRm3vYNQgR+/ZIy0RmeUDZo7Y8zeQ= +github.com/dop251/goja v0.0.0-20220405120441-9037c2b61cbf/go.mod h1:R9ET47fwRVRPZnOGvHxxhuZcbrMCuiqOz3Rlrh4KSnk= +github.com/dop251/goja_nodejs v0.0.0-20210225215109-d91c329300e7/go.mod h1:hn7BA7c8pLvoGndExHudxTDKZ84Pyvv+90pbBjbTz0Y= github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/eapache/go-resiliency v1.2.0 h1:v7g92e/KSN71Rq7vSThKaWIq68fL4YHvWyiUKorFR1Q= github.com/eapache/go-resiliency v1.2.0/go.mod h1:kFI+JgMyC7bLPUVY133qvEBtVayf5mFgVsvEsIPBvNs= @@ -198,11 +203,15 @@ github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9 github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= github.com/go-martini/martini v0.0.0-20170121215854-22fa46961aab/go.mod h1:/P9AEU963A2AYjv4d1V5eVL1CQbEJq6aCNHDDjibzu8= +github.com/go-sourcemap/sourcemap v2.1.3+incompatible h1:W1iEw64niKVGogNgBN3ePyLFfuisuzeidWPMPWmECqU= +github.com/go-sourcemap/sourcemap v2.1.3+incompatible/go.mod h1:F8jJfvm2KbVjc5NqelyYJmf/v5J0dwNLS2mL4sNA1Jg= github.com/go-test/deep v1.0.3 h1:ZrJSEWsXzPOxaZnFteGEfooLba+ju3FYIbOrS+rQd68= github.com/go-test/deep v1.0.3/go.mod h1:wGDj63lr65AM2AQyKZd/NYHGb0R+1RLqB8NKt3aSFNA= github.com/gobwas/httphead v0.0.0-20180130184737-2c6c146eadee/go.mod h1:L0fX3K22YWvt/FAX9NnzrNzcI4wNYi9Yku4O0LKYflo= github.com/gobwas/pool v0.2.0/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw= github.com/gobwas/ws v1.0.2/go.mod h1:szmBTxLgaFppYjEmNtny/v3w89xOydFnnZMcgRRu/EM= +github.com/goccy/go-json v0.9.7 h1:IcB+Aqpx/iMHu5Yooh7jEzJk1JZ7Pjtmys2ukPr7EeM= +github.com/goccy/go-json v0.9.7/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= @@ -506,11 +515,15 @@ github.com/yalp/jsonpath v0.0.0-20180802001716-5cc68e5049a0/go.mod h1:/LWChgwKmv github.com/yudai/gojsondiff v1.0.0/go.mod h1:AY32+k2cwILAkW1fbgxQ5mUmMiZFgLIV+FBNExI05xg= github.com/yudai/golcs v0.0.0-20170316035057-ecda9a501e82/go.mod h1:lgjkn3NuSvDfVJdfcVVdX+jpBxNmX4rDAzaS45IcYoM= github.com/yudai/pp v2.0.1+incompatible/go.mod h1:PuxR/8QJ7cyCkFp/aUDS+JY727OFEZkTdatxwunjIkc= +github.com/yuin/gluamapper v0.0.0-20150323120927-d836955830e7 h1:noHsffKZsNfU38DwcXWEPldrTjIZ8FPNKx8mYMGnqjs= +github.com/yuin/gluamapper v0.0.0-20150323120927-d836955830e7/go.mod h1:bbMEM6aU1WDF1ErA5YJ0p91652pGv140gGw4Ww3RGp8= github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= +github.com/yuin/gopher-lua v0.0.0-20210529063254-f4c35e4016d9 h1:k/gmLsJDWwWqbLCur2yWnJzwQEKRcAHXo6seXGuSwWw= +github.com/yuin/gopher-lua v0.0.0-20210529063254-f4c35e4016d9/go.mod h1:E1AXubJBdNmFERAOucpDIxNzeGfLzg0mYh+UfMWdChA= github.com/zclconf/go-cty v1.2.0/go.mod h1:hOPWgoHbaTUnI5k4D2ld+GRpFJSCe6bCM7m1q/N4PQ8= github.com/zclconf/go-cty v1.8.0/go.mod h1:vVKLxnk3puL4qRAv72AO+W99LUD4da90g3uUAzyuvAk= github.com/zclconf/go-cty v1.10.0 h1:mp9ZXQeIcN8kAwuqorjH+Q+njbJKjLrvB2yIh4q7U+0= @@ -656,6 +669,7 @@ golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190204203706-41f3e6584952/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -930,8 +944,9 @@ gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWD gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10= gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20191120175047-4206685974f2/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b h1:h8qDotaEPuJATrMmW04NCwg7v22aHH28wwpauUhK9Oo= @@ -943,6 +958,8 @@ honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWh honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= +layeh.com/gopher-json v0.0.0-20201124131017-552bb3c4c3bf h1:rRz0YsF7VXj9fXRF6yQgFI7DzST+hsI3TeFSGupntu0= +layeh.com/gopher-json v0.0.0-20201124131017-552bb3c4c3bf/go.mod h1:ivKkcY8Zxw5ba0jldhZCYYQfGdb2K6u9tbYK1AwMIBc= rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA= diff --git a/pkg/transform/engine.go b/pkg/transform/engine.go new file mode 100644 index 00000000..4b4af505 --- /dev/null +++ b/pkg/transform/engine.go @@ -0,0 +1,35 @@ +// PROPRIETARY AND CONFIDENTIAL +// +// Unauthorized copying of this file via any medium is strictly prohibited. +// +// Copyright (c) 2020-2022 Snowplow Analytics Ltd. All rights reserved. + +package transform + +// FunctionMaker is the interface that wraps the MakeFunction method +type FunctionMaker interface { + // MakeFunction returns a TransformationFunction that runs + // a given function in a runtime engine. + MakeFunction(funcName string) TransformationFunction +} + +// SmokeTester is the interface that wraps the SmokeTest method. +type SmokeTester interface { + // SmokeTest runs a test spin of the engine trying to get as close to + // running the given function as possible. + SmokeTest(funcName string) error +} + +// Engine is the interface that groups +// FunctionMaker and SmokeTester. +type Engine interface { + FunctionMaker + SmokeTester +} + +// EngineProtocol is the I/O type of an Engine. +type EngineProtocol struct { + FilterOut bool + PartitionKey string + Data interface{} +} diff --git a/pkg/transform/engine_javascript.go b/pkg/transform/engine_javascript.go new file mode 100644 index 00000000..5df77df1 --- /dev/null +++ b/pkg/transform/engine_javascript.go @@ -0,0 +1,277 @@ +// PROPRIETARY AND CONFIDENTIAL +// +// Unauthorized copying of this file via any medium is strictly prohibited. +// +// Copyright (c) 2020-2022 Snowplow Analytics Ltd. All rights reserved. + +package transform + +import ( + "encoding/base64" + "fmt" + "time" + + goja "github.com/dop251/goja" + gojaparser "github.com/dop251/goja/parser" + gojson "github.com/goccy/go-json" + "github.com/mitchellh/mapstructure" + + "github.com/snowplow-devops/stream-replicator/pkg/models" +) + +// jsEngineConfig configures the JavaScript Engine. +type jsEngineConfig struct { + SourceB64 string `hcl:"source_b64" env:"TRANSFORMATION_JS_SOURCE_B64"` + RunTimeout int `hcl:"timeout_sec,optional" env:"TRANSFORMATION_JS_TIMEOUT_SEC"` + DisableSourceMaps bool `hcl:"disable_source_maps,optional" env:"TRANSFORMATION_JS_DISABLE_SOURCE_MAPS"` + SpMode bool `hcl:"snowplow_mode,optional" env:"TRANSFORMATION_JS_SNOWPLOW_MODE"` +} + +// jsEngine handles the provision of a JavaScript runtime to run transformations. +type jsEngine struct { + Code *goja.Program + RunTimeout time.Duration + SpMode bool +} + +// newJSEngine returns a JavaScript Engine from a jsEngineConfig. +func newJSEngine(c *jsEngineConfig) (*jsEngine, error) { + jsSrc, err := base64.StdEncoding.DecodeString(c.SourceB64) + if err != nil { + return nil, err + } + + compiledCode, err := compileJS(string(jsSrc), c.SourceB64, c.DisableSourceMaps) + if err != nil { + return nil, err + } + + eng := &jsEngine{ + Code: compiledCode, + RunTimeout: time.Duration(c.RunTimeout) * time.Second, + SpMode: c.SpMode, + } + + return eng, nil +} + +// The jsEngineAdapter type is an adapter for functions to be used as +// pluggable components for JavaScript Engine. Implements the Pluggable interface. +type jsEngineAdapter func(i interface{}) (interface{}, error) + +// Create implements the ComponentCreator interface. +func (f jsEngineAdapter) Create(i interface{}) (interface{}, error) { + return f(i) +} + +// ProvideDefault implements the ComponentConfigurable interface. +func (f jsEngineAdapter) ProvideDefault() (interface{}, error) { + // Provide defaults for the optional parameters + // whose default is not their zero value. + cfg := &jsEngineConfig{ + RunTimeout: 5, + DisableSourceMaps: true, + } + + return cfg, nil +} + +// adaptJSEngineFunc returns a jsEngineAdapter. +func adaptJSEngineFunc(f func(c *jsEngineConfig) (*jsEngine, error)) jsEngineAdapter { + return func(i interface{}) (interface{}, error) { + cfg, ok := i.(*jsEngineConfig) + if !ok { + return nil, fmt.Errorf("invalid input, expected jsEngineConfig") + } + + return f(cfg) + } +} + +// JSLayer returns the Pluggable transformation layer implemented in JavaScript. +func JSLayer() interface{} { + return adaptJSEngineFunc(newJSEngine) +} + +// SmokeTest implements SmokeTester. +func (e *jsEngine) SmokeTest(funcName string) error { + _, _, err := initRuntime(e, funcName) + return err +} + +// MakeFunction implements FunctionMaker. +func (e *jsEngine) MakeFunction(funcName string) TransformationFunction { + + return func(message *models.Message, interState interface{}) (*models.Message, *models.Message, *models.Message, interface{}) { + // making input + input, err := mkJSEngineInput(e, message, interState) + if err != nil { + message.SetError(fmt.Errorf("failed making input for the JavaScript runtime: %q", err.Error())) + return nil, nil, message, nil + } + + // initializing + vm, fun, err := initRuntime(e, funcName) + if err != nil { + message.SetError(fmt.Errorf("failed initializing JavaScript runtime: %q", err.Error())) + return nil, nil, message, nil + } + + timer := time.AfterFunc(e.RunTimeout, func() { + vm.Interrupt("runtime deadline exceeded") + }) + defer timer.Stop() + + // running + res, err := fun(goja.Undefined(), vm.ToValue(input)) + + if err != nil { + // runtime error counts as failure + runErr := fmt.Errorf("error running JavaScript function %q: %q", funcName, err.Error()) + message.SetError(runErr) + return nil, nil, message, nil + } + + // validating output + protocol, err := validateJSEngineOut(res.Export()) + if err != nil { + message.SetError(err) + return nil, nil, message, nil + } + + // filtering - keeping same behaviour with spEnrichedFilter + if protocol.FilterOut == true { + return nil, message, nil, nil + } + + // handling data + switch protoData := protocol.Data.(type) { + case string: + message.Data = []byte(protoData) + case map[string]interface{}: + // encode + encoded, err := gojson.MarshalWithOption(protoData, gojson.DisableHTMLEscape()) + if err != nil { + message.SetError(fmt.Errorf("error encoding message data")) + return nil, nil, message, nil + } + message.Data = encoded + default: + message.SetError(fmt.Errorf("invalid return type from JavaScript transformation; expected string or object")) + return nil, nil, message, nil + } + + // setting pk if needed + pk := protocol.PartitionKey + if pk != "" && message.PartitionKey != pk { + message.PartitionKey = pk + } + + return message, nil, nil, protocol + } +} + +// compileJS compiles JavaScript code. +// Since goja.New is not goroutine-safe, we spin a new runtime for every +// transformation. The reason for this function is to allow us to at least share +// the compiled code and so run only once the parse and compile steps, +// which are implicitly run by the alternative RunString. +// see also: +// https://pkg.go.dev/github.com/dop251/goja#CompileAST +func compileJS(code, name string, disableSrcMaps bool) (*goja.Program, error) { + parserOpts := make([]gojaparser.Option, 0, 1) + + if disableSrcMaps == true { + parserOpts = append(parserOpts, gojaparser.WithDisableSourceMaps) + } + + ast, err := goja.Parse(name, code, parserOpts...) + if err != nil { + return nil, err + } + + // 'use strict' + prog, err := goja.CompileAST(ast, true) + if err != nil { + return nil, err + } + + return prog, nil +} + +// initRuntime initializes and returns an instance of a JavaScript runtime. +func initRuntime(e *jsEngine, funcName string) (*goja.Runtime, goja.Callable, error) { + // goja.New returns *goja.Runtime + vm := goja.New() + timer := time.AfterFunc(e.RunTimeout, func() { + vm.Interrupt("runtime deadline exceeded") + }) + defer timer.Stop() + + _, err := vm.RunProgram(e.Code) + if err != nil { + return nil, nil, fmt.Errorf("could not load JavaScript code: %q", err) + } + + if fun, ok := goja.AssertFunction(vm.Get(funcName)); ok { + return vm, fun, nil + } + + return nil, nil, fmt.Errorf("could not assert as function: %q", funcName) +} + +// mkJSEngineInput describes the logic for constructing the input to JS engine. +// No side effects. +func mkJSEngineInput(e *jsEngine, message *models.Message, interState interface{}) (*EngineProtocol, error) { + if interState != nil { + if i, ok := interState.(*EngineProtocol); ok { + return i, nil + } + } + + candidate := &EngineProtocol{ + Data: string(message.Data), + } + + if !e.SpMode { + return candidate, nil + } + + parsedMessage, err := intermediateAsSpEnrichedParsed(interState, message) + if err != nil { + // if spMode, error for non Snowplow enriched event data + return nil, err + } + + spMap, err := parsedMessage.ToMap() + if err != nil { + return nil, err + } + + candidate.Data = spMap + return candidate, nil +} + +// validateJSEngineOut validates the value returned by the js engine. +func validateJSEngineOut(output interface{}) (*EngineProtocol, error) { + if output == nil { + return nil, fmt.Errorf("invalid return type from JavaScript transformation; got null or undefined") + } + + if out, ok := output.(*EngineProtocol); ok { + return out, nil + } + + outMap, ok := output.(map[string]interface{}) + if !ok { + return nil, fmt.Errorf("invalid return type from JavaScript transformation") + } + + result := &EngineProtocol{} + err := mapstructure.Decode(outMap, result) + if err != nil { + return nil, fmt.Errorf("protocol violation in return value from JavaScript transformation") + } + + return result, nil +} diff --git a/pkg/transform/engine_javascript_test.go b/pkg/transform/engine_javascript_test.go new file mode 100644 index 00000000..4f434c00 --- /dev/null +++ b/pkg/transform/engine_javascript_test.go @@ -0,0 +1,1937 @@ +// PROPRIETARY AND CONFIDENTIAL +// +// Unauthorized copying of this file via any medium is strictly prohibited. +// +// Copyright (c) 2020-2022 Snowplow Analytics Ltd. All rights reserved. + +package transform + +import ( + "encoding/base64" + "fmt" + "path/filepath" + "reflect" + "strings" + "testing" + "time" + + "github.com/davecgh/go-spew/spew" + "github.com/stretchr/testify/assert" + + config "github.com/snowplow-devops/stream-replicator/config" + "github.com/snowplow-devops/stream-replicator/pkg/models" +) + +func TestJSEngineConfig_ENV(t *testing.T) { + testCases := []struct { + Name string + Plug config.Pluggable + Expected interface{} + }{ + { + Name: "transform-js-from-env", + Plug: testJSEngineAdapter(testJSEngineFunc), + Expected: &jsEngineConfig{ + SourceB64: "CglmdW5jdGlvbiBmb28oeCkgewoJICAgIHJldHVybiB4OwoJfQoJ", + RunTimeout: 10, + DisableSourceMaps: false, + SpMode: false, + }, + }, + } + + for _, tt := range testCases { + t.Run(tt.Name, func(t *testing.T) { + assert := assert.New(t) + + t.Setenv("STREAM_REPLICATOR_CONFIG_FILE", "") + + t.Setenv("MESSAGE_TRANSFORMATION", "js") + t.Setenv("TRANSFORMATION_LAYER_NAME", "js") + + t.Setenv("TRANSFORMATION_JS_SOURCE_B64", "CglmdW5jdGlvbiBmb28oeCkgewoJICAgIHJldHVybiB4OwoJfQoJ") + t.Setenv("TRANSFORMATION_JS_TIMEOUT_SEC", "10") + t.Setenv("TRANSFORMATION_JS_DISABLE_SOURCE_MAPS", "false") + t.Setenv("TRANSFORMATION_JS_SNOWPLOW_MODE", "false") + + c, err := config.NewConfig() + assert.NotNil(c) + if err != nil { + t.Fatalf("function NewConfig failed with error: %q", err.Error()) + } + + engine := c.Data.Transform.Layer + decoderOpts := &config.DecoderOptions{ + Input: engine.Body, + } + + result, err := c.CreateComponent(tt.Plug, decoderOpts) + assert.NotNil(result) + assert.Nil(err) + + if !reflect.DeepEqual(result, tt.Expected) { + t.Errorf("GOT:\n%s\nEXPECTED:\n%s", + spew.Sdump(result), + spew.Sdump(tt.Expected)) + } + }) + } +} + +func TestJSEngineConfig_HCL(t *testing.T) { + testFixPath := "../../config/test-fixtures" + testCases := []struct { + File string + Plug config.Pluggable + Expected interface{} + }{ + { + File: "transform-js-simple.hcl", + Plug: testJSEngineAdapter(testJSEngineFunc), + Expected: &jsEngineConfig{ + SourceB64: "CglmdW5jdGlvbiBmb28oeCkgewoJICAgIHJldHVybiB4OwoJfQoJ", + RunTimeout: 5, + DisableSourceMaps: true, + SpMode: false, + }, + }, + { + File: "transform-js-extended.hcl", + Plug: testJSEngineAdapter(testJSEngineFunc), + Expected: &jsEngineConfig{ + SourceB64: "CglmdW5jdGlvbiBmb28oeCkgewoJICAgIHJldHVybiB4OwoJfQoJ", + RunTimeout: 10, + DisableSourceMaps: false, + SpMode: true, + }, + }, + } + + for _, tt := range testCases { + t.Run(tt.File, func(t *testing.T) { + assert := assert.New(t) + + filename := filepath.Join(testFixPath, tt.File) + t.Setenv("STREAM_REPLICATOR_CONFIG_FILE", filename) + + c, err := config.NewConfig() + assert.NotNil(c) + if err != nil { + t.Fatalf("function NewConfig failed with error: %q", err.Error()) + } + + engine := c.Data.Transform.Layer + decoderOpts := &config.DecoderOptions{ + Input: engine.Body, + } + + result, err := c.CreateComponent(tt.Plug, decoderOpts) + assert.NotNil(result) + assert.Nil(err) + + if !reflect.DeepEqual(result, tt.Expected) { + t.Errorf("GOT:\n%s\nEXPECTED:\n%s", + spew.Sdump(result), + spew.Sdump(tt.Expected)) + } + }) + } +} + +func TestJSLayer(t *testing.T) { + layer := JSLayer() + if _, ok := layer.(config.Pluggable); !ok { + t.Errorf("invalid interface returned from JSLayer") + } +} + +func TestJSEngineMakeFunction_SpModeFalse_IntermediateNil(t *testing.T) { + var testInterState interface{} = nil + var testSpMode bool = false + testCases := []struct { + Src string + FunName string + DisableSourceMaps bool + Input *models.Message + Expected map[string]*models.Message + ExpInterState interface{} + Error error + }{ + { + Src: ` +function identity(x) { + return x; +} +`, + FunName: "identity", + DisableSourceMaps: true, + Input: &models.Message{ + Data: []byte("asdf"), + PartitionKey: "some-test-key", + }, + Expected: map[string]*models.Message{ + "success": { + Data: []byte("asdf"), + PartitionKey: "some-test-key", + }, + "filtered": nil, + "failed": nil, + }, + ExpInterState: &EngineProtocol{ + FilterOut: false, + PartitionKey: "", + Data: "asdf", + }, + Error: nil, + }, + { + Src: ` +function concatHello(x) { + let newVal = "Hello:" + x.Data; + x.Data = newVal; + return x; +} +`, + FunName: "concatHello", + DisableSourceMaps: true, + Input: &models.Message{ + Data: []byte("asdf"), + PartitionKey: "some-test-key", + }, + Expected: map[string]*models.Message{ + "success": { + Data: []byte("Hello:asdf"), + PartitionKey: "some-test-key", + }, + "filtered": nil, + "failed": nil, + }, + ExpInterState: &EngineProtocol{ + FilterOut: false, + PartitionKey: "", + Data: "Hello:asdf", + }, + Error: nil, + }, + { + Src: ` +function filterIn(x) { + x.FilterOut = false + return x; +} +`, + FunName: "filterIn", + DisableSourceMaps: true, + Input: &models.Message{ + Data: []byte("asdf"), + PartitionKey: "some-test-key", + }, + Expected: map[string]*models.Message{ + "success": { + Data: []byte("asdf"), + PartitionKey: "some-test-key", + }, + "filtered": nil, + "failed": nil, + }, + ExpInterState: &EngineProtocol{ + FilterOut: false, + PartitionKey: "", + Data: "asdf", + }, + Error: nil, + }, + { + Src: ` +function filterOut(x) { + if (Object.prototype.toString.call(x.Data) === '[object String]') { + return { + FilterOut: true, + }; + } + + return { + FilterOut: false, + Data: x.Data + }; +} +`, + FunName: "filterOut", + DisableSourceMaps: true, + Input: &models.Message{ + Data: []byte("asdf"), + PartitionKey: "some-test-key", + }, + Expected: map[string]*models.Message{ + "success": nil, + "filtered": { + Data: []byte("asdf"), + PartitionKey: "some-test-key", + }, + "failed": nil, + }, + ExpInterState: nil, + Error: nil, + }, + { + Src: ` +function jsonIdentity(x) { + var jsonObj = JSON.parse(x.Data); + var result = JSON.stringify(jsonObj); + + return { + Data: result + }; +} +`, + FunName: "jsonIdentity", + DisableSourceMaps: false, + Input: &models.Message{ + Data: testJsJSON, + PartitionKey: "some-test-key", + }, + Expected: map[string]*models.Message{ + "success": { + Data: testJsJSON, + PartitionKey: "some-test-key", + }, + "filtered": nil, + "failed": nil, + }, + ExpInterState: &EngineProtocol{ + FilterOut: false, + PartitionKey: "", + Data: string(testJsJSON), + }, + Error: nil, + }, + { + Src: ` +function jsonTransformFieldNameRegex(x) { + var jsonObj = JSON.parse(x.Data); + + if (jsonObj.hasOwnProperty("app_id")) { + x.Data = x.Data.replace(/app_id/, 'app_id_CHANGED'); + } + + return x; +} +`, + FunName: "jsonTransformFieldNameRegex", + DisableSourceMaps: false, + Input: &models.Message{ + Data: testJsJSON, + PartitionKey: "some-test-key", + }, + Expected: map[string]*models.Message{ + "success": { + Data: testJsJSONChanged1, + PartitionKey: "some-test-key", + }, + "filtered": nil, + "failed": nil, + }, + ExpInterState: &EngineProtocol{ + FilterOut: false, + PartitionKey: "", + Data: string(testJsJSONChanged1), + }, + Error: nil, + }, + { + Src: ` +function jsonTransformFieldNameObj(x) { + + var jsonObj = JSON.parse(x.Data); + + var descriptor = Object.getOwnPropertyDescriptor(jsonObj, "app_id"); + Object.defineProperty(jsonObj, "app_id_CHANGED", descriptor); + delete jsonObj["app_id"]; + + return { + Data: JSON.stringify(jsonObj) + }; +} +`, + FunName: "jsonTransformFieldNameObj", + DisableSourceMaps: false, + Input: &models.Message{ + Data: testJsJSON, + PartitionKey: "some-test-key", + }, + Expected: map[string]*models.Message{ + "success": { + Data: testJsJSONChanged2, + PartitionKey: "some-test-key", + }, + "filtered": nil, + "failed": nil, + }, + ExpInterState: &EngineProtocol{ + FilterOut: false, + PartitionKey: "", + Data: string(testJsJSONChanged2), + }, + Error: nil, + }, + { + Src: ` +function jsonFilterOut(x) { + var jsonObj = JSON.parse(x.Data); + + if (jsonObj.hasOwnProperty("app_id") && jsonObj["app_id"] === "filterMeOut") { + x.FilterOut = false; + } else { + x.FilterOut = true; + } + + return x; +} +`, + FunName: "jsonFilterOut", + DisableSourceMaps: false, + Input: &models.Message{ + Data: testJsJSON, + PartitionKey: "some-test-key", + }, + Expected: map[string]*models.Message{ + "success": nil, + "filtered": { + Data: testJsJSON, + PartitionKey: "some-test-key", + }, + "failed": nil, + }, + ExpInterState: nil, + Error: nil, + }, + { + Src: ` +function returnWrongType(x) { + return 0; +} +`, + FunName: "returnWrongType", + DisableSourceMaps: true, + Input: &models.Message{ + Data: []byte("asdf"), + PartitionKey: "some-test-key", + }, + Expected: map[string]*models.Message{ + "success": nil, + "filtered": nil, + "failed": { + Data: []byte("asdf"), + PartitionKey: "some-test-key", + }, + }, + ExpInterState: nil, + Error: fmt.Errorf("invalid return type from JavaScript transformation"), + }, + { + Src: ` +function returnUndefined(x) {} +`, + FunName: "returnUndefined", + DisableSourceMaps: true, + Input: &models.Message{ + Data: []byte("asdf"), + PartitionKey: "some-test-key", + }, + Expected: map[string]*models.Message{ + "success": nil, + "filtered": nil, + "failed": { + Data: []byte("asdf"), + PartitionKey: "some-test-key", + }, + }, + ExpInterState: nil, + Error: fmt.Errorf("invalid return type from JavaScript transformation; got null or undefined"), + }, + { + Src: ` +function returnNull(x) { + return null; +} +`, + FunName: "returnNull", + DisableSourceMaps: true, + Input: &models.Message{ + Data: []byte("asdf"), + PartitionKey: "some-test-key", + }, + Expected: map[string]*models.Message{ + "success": nil, + "filtered": nil, + "failed": { + Data: []byte("asdf"), + PartitionKey: "some-test-key", + }, + }, + ExpInterState: nil, + Error: fmt.Errorf("invalid return type from JavaScript transformation; got null or undefined"), + }, + { + Src: ` +function causeRuntimeError(x) { + return x.toExponential(2); +} +`, + FunName: "causeRuntimeError", + DisableSourceMaps: true, + Input: &models.Message{ + Data: []byte("asdf"), + PartitionKey: "some-test-key", + }, + Expected: map[string]*models.Message{ + "success": nil, + "filtered": nil, + "failed": { + Data: []byte("asdf"), + PartitionKey: "some-test-key", + }, + }, + ExpInterState: nil, + Error: fmt.Errorf("error running JavaScript function \"causeRuntimeError\""), + }, + { + Src: ` +function callError(x) { + throw("Failed"); +} +`, + FunName: "callError", + DisableSourceMaps: false, + Input: &models.Message{ + Data: []byte("asdf"), + PartitionKey: "some-test-key", + }, + Expected: map[string]*models.Message{ + "success": nil, + "filtered": nil, + "failed": { + Data: []byte("asdf"), + PartitionKey: "some-test-key", + }, + }, + ExpInterState: nil, + Error: fmt.Errorf("error running JavaScript function \"callError\""), + }, + { + Src: ` +function sleepTenSecs(x) { + var now = new Date().getTime(); + while(new Date().getTime() < now + 10000) { + } +} +`, + FunName: "sleepTenSecs", + DisableSourceMaps: false, + Input: &models.Message{ + Data: []byte("asdf"), + PartitionKey: "some-test-key", + }, + Expected: map[string]*models.Message{ + "success": nil, + "filtered": nil, + "failed": { + Data: []byte("asdf"), + PartitionKey: "some-test-key", + }, + }, + ExpInterState: nil, + Error: fmt.Errorf("runtime deadline exceeded"), + }, + } + + for _, tt := range testCases { + t.Run(tt.FunName, func(t *testing.T) { + assert := assert.New(t) + + src := base64.StdEncoding.EncodeToString([]byte(tt.Src)) + jsConfig := &jsEngineConfig{ + SourceB64: src, + RunTimeout: 1, + DisableSourceMaps: tt.DisableSourceMaps, + SpMode: testSpMode, + } + + jsEngine, err := newJSEngine(jsConfig) + assert.NotNil(jsEngine) + if err != nil { + t.Fatalf("function newJSEngine failed with error: %q", err.Error()) + } + + if err := jsEngine.SmokeTest(tt.FunName); err != nil { + t.Fatalf("smoke-test failed with error: %q", err.Error()) + } + + transFunction := jsEngine.MakeFunction(tt.FunName) + s, f, e, i := transFunction(tt.Input, testInterState) + + if !reflect.DeepEqual(i, tt.ExpInterState) { + t.Errorf("GOT:\n%s\nEXPECTED:\n%s", + spew.Sdump(i), + spew.Sdump(tt.ExpInterState)) + } + + if e != nil { + gotErr := e.GetError() + expErr := tt.Error + if expErr == nil { + t.Fatalf("got unexpected error: %s", gotErr.Error()) + } + + if !strings.Contains(gotErr.Error(), expErr.Error()) { + t.Errorf("GOT_ERROR:\n%s\n does not contain\nEXPECTED_ERROR:\n%s", + gotErr.Error(), + expErr.Error()) + } + } + + assertMessagesCompareJs(t, s, tt.Expected["success"]) + assertMessagesCompareJs(t, f, tt.Expected["filtered"]) + assertMessagesCompareJs(t, e, tt.Expected["failed"]) + }) + } +} + +func TestJSEngineMakeFunction_SpModeTrue_IntermediateNil(t *testing.T) { + var testInterState interface{} = nil + var testSpMode bool = true + testCases := []struct { + Scenario string + Src string + FunName string + DisableSourceMaps bool + Input *models.Message + Expected map[string]*models.Message + ExpInterState interface{} + Error error + }{ + { + Scenario: "identity", + Src: ` +function identity(x) { + return x; +} +`, + FunName: "identity", + DisableSourceMaps: true, + Input: &models.Message{ + Data: testJsTsv, + PartitionKey: "some-test-key", + }, + Expected: map[string]*models.Message{ + "success": { + Data: testJsJSON, + PartitionKey: "some-test-key", + }, + "filtered": nil, + "failed": nil, + }, + ExpInterState: &EngineProtocol{ + FilterOut: false, + PartitionKey: "", + Data: testJSMap, + }, + Error: nil, + }, + { + Scenario: "filtering", + Src: ` +function filterOut(input) { + // input is an object + var spData = input.Data; + if (spData["app_id"] === "myApp") { + return input; + } + return { + FilterOut: true + }; +} +`, + FunName: "filterOut", + DisableSourceMaps: true, + Input: &models.Message{ + Data: testJsTsv, + PartitionKey: "some-test-key", + }, + Expected: map[string]*models.Message{ + "success": nil, + "filtered": { + Data: testJsTsv, + PartitionKey: "some-test-key", + }, + "failed": nil, + }, + ExpInterState: nil, + Error: nil, + }, + { + Scenario: "filteringOut_ignoresData", + Src: ` +function filterOutIgnores(x) { + return { + FilterOut: true, + Data: "shouldNotAppear", + PartitionKey: "notThis" + }; +} +`, + FunName: "filterOutIgnores", + DisableSourceMaps: true, + Input: &models.Message{ + Data: testJsTsv, + PartitionKey: "some-test-key", + }, + Expected: map[string]*models.Message{ + "success": nil, + "filtered": { + Data: testJsTsv, + PartitionKey: "some-test-key", + }, + "failed": nil, + }, + ExpInterState: nil, + Error: nil, + }, + { + Scenario: "non_Snowplow_enriched_to_failed", + Src: ` +function willNotRun(x) { + return x; +} +`, + FunName: "willNotRun", + DisableSourceMaps: false, + Input: &models.Message{ + Data: []byte("nonSpEnrichedEvent"), + PartitionKey: "some-test-key", + }, + Expected: map[string]*models.Message{ + "success": nil, + "filtered": nil, + "failed": { + Data: []byte("nonSpEnrichedEvent"), + PartitionKey: "some-test-key", + }, + }, + ExpInterState: nil, + Error: fmt.Errorf("Cannot parse"), + }, + { + Scenario: "return_wrong_type", + Src: ` +function returnWrongType(x) { + return 0; +} +`, + FunName: "returnWrongType", + DisableSourceMaps: true, + Input: &models.Message{ + Data: testJsTsv, + PartitionKey: "some-test-key", + }, + Expected: map[string]*models.Message{ + "success": nil, + "filtered": nil, + "failed": { + Data: testJsTsv, + PartitionKey: "some-test-key", + }, + }, + ExpInterState: nil, + Error: fmt.Errorf("invalid return type from JavaScript transformation"), + }, + } + + for _, tt := range testCases { + t.Run(tt.Scenario, func(t *testing.T) { + assert := assert.New(t) + + src := base64.StdEncoding.EncodeToString([]byte(tt.Src)) + jsConfig := &jsEngineConfig{ + SourceB64: src, + RunTimeout: 1, + DisableSourceMaps: tt.DisableSourceMaps, + SpMode: testSpMode, + } + + jsEngine, err := newJSEngine(jsConfig) + assert.NotNil(jsEngine) + if err != nil { + t.Fatalf("function newJSEngine failed with error: %q", err.Error()) + } + + if err := jsEngine.SmokeTest(tt.FunName); err != nil { + t.Fatalf("smoke-test failed with error: %q", err.Error()) + } + + transFunction := jsEngine.MakeFunction(tt.FunName) + s, f, e, i := transFunction(tt.Input, testInterState) + + if !reflect.DeepEqual(i, tt.ExpInterState) { + t.Errorf("GOT:\n%s\nEXPECTED:\n%s", + spew.Sdump(i), + spew.Sdump(tt.ExpInterState)) + } + + if e != nil { + gotErr := e.GetError() + expErr := tt.Error + if expErr == nil { + t.Fatalf("got unexpected error: %s", gotErr.Error()) + } + + if !strings.Contains(gotErr.Error(), expErr.Error()) { + t.Errorf("GOT_ERROR:\n%s\n does not contain\nEXPECTED_ERROR:\n%s", + gotErr.Error(), + expErr.Error()) + } + } + + assertMessagesCompareJs(t, s, tt.Expected["success"]) + assertMessagesCompareJs(t, f, tt.Expected["filtered"]) + assertMessagesCompareJs(t, e, tt.Expected["failed"]) + }) + } +} + +func TestJSEngineMakeFunction_IntermediateState_SpModeFalse(t *testing.T) { + testSpMode := false + testCases := []struct { + Scenario string + Src string + FunName string + DisableSourceMaps bool + Input *models.Message + InterState interface{} + Expected map[string]*models.Message + ExpInterState interface{} + Error error + }{ + { + Scenario: "intermediateState_EngineProtocol_Map", + Src: ` +function identity(x) { + return x; +} +`, + FunName: "identity", + DisableSourceMaps: true, + Input: &models.Message{ + Data: testJsJSON, + PartitionKey: "some-test-key", + }, + InterState: &EngineProtocol{ + FilterOut: false, + PartitionKey: "", + Data: testJSMap, + }, + Expected: map[string]*models.Message{ + "success": { + Data: testJsJSON, + PartitionKey: "some-test-key", + }, + "filtered": nil, + "failed": nil, + }, + ExpInterState: &EngineProtocol{ + FilterOut: false, + PartitionKey: "", + Data: testJSMap, + }, + Error: nil, + }, + { + Scenario: "intermediateState_EngineProtocol_String", + Src: ` +function identity(x) { + return x; +} +`, + FunName: "identity", + DisableSourceMaps: true, + Input: &models.Message{ + Data: testJsJSON, + PartitionKey: "some-test-key", + }, + InterState: &EngineProtocol{ + FilterOut: false, + PartitionKey: "", + Data: string(testJsJSON), + }, + Expected: map[string]*models.Message{ + "success": { + Data: testJsJSON, + PartitionKey: "some-test-key", + }, + "filtered": nil, + "failed": nil, + }, + ExpInterState: &EngineProtocol{ + FilterOut: false, + PartitionKey: "", + Data: string(testJsJSON), + }, + Error: nil, + }, + { + Scenario: "intermediateState_not_EngineProtocol_spMode_true", + Src: ` +function identity(x) { + return x; +} +`, + FunName: "identity", + DisableSourceMaps: true, + Input: &models.Message{ + Data: testJsJSON, + PartitionKey: "some-test-key", + }, + InterState: "notEngineProtocol", + Expected: map[string]*models.Message{ + "success": { + Data: testJsJSON, + PartitionKey: "some-test-key", + }, + "filtered": nil, + "failed": nil, + }, + ExpInterState: &EngineProtocol{ + FilterOut: false, + PartitionKey: "", + Data: string(testJsJSON), + }, + Error: nil, + }, + { + Scenario: "intermediateState_not_EngineProtocol_spMode_false", + Src: ` +function identity(x) { + return x; +} +`, + FunName: "identity", + DisableSourceMaps: true, + Input: &models.Message{ + Data: testJsJSON, + PartitionKey: "some-test-key", + }, + InterState: "notEngineProtocol", + Expected: map[string]*models.Message{ + "success": { + Data: testJsJSON, + PartitionKey: "some-test-key", + }, + "filtered": nil, + "failed": nil, + }, + ExpInterState: &EngineProtocol{ + FilterOut: false, + PartitionKey: "", + Data: string(testJsJSON), + }, + Error: nil, + }, + } + + for _, tt := range testCases { + t.Run(tt.Scenario, func(t *testing.T) { + assert := assert.New(t) + + src := base64.StdEncoding.EncodeToString([]byte(tt.Src)) + jsConfig := &jsEngineConfig{ + SourceB64: src, + RunTimeout: 1, + DisableSourceMaps: tt.DisableSourceMaps, + SpMode: testSpMode, + } + + jsEngine, err := newJSEngine(jsConfig) + assert.NotNil(jsEngine) + if err != nil { + t.Fatalf("function newJSEngine failed with error: %q", err.Error()) + } + + if err := jsEngine.SmokeTest(tt.FunName); err != nil { + t.Fatalf("smoke-test failed with error: %q", err.Error()) + } + + transFunction := jsEngine.MakeFunction(tt.FunName) + s, f, e, i := transFunction(tt.Input, tt.InterState) + + if !reflect.DeepEqual(i, tt.ExpInterState) { + t.Errorf("GOT:\n%s\nEXPECTED:\n%s", + spew.Sdump(i), + spew.Sdump(tt.ExpInterState)) + } + + if e != nil { + gotErr := e.GetError() + expErr := tt.Error + if expErr == nil { + t.Fatalf("got unexpected error: %s", gotErr.Error()) + } + + if !strings.Contains(gotErr.Error(), expErr.Error()) { + t.Errorf("GOT_ERROR:\n%s\n does not contain\nEXPECTED_ERROR:\n%s", + gotErr.Error(), + expErr.Error()) + } + } + + assertMessagesCompareJs(t, s, tt.Expected["success"]) + assertMessagesCompareJs(t, f, tt.Expected["filtered"]) + assertMessagesCompareJs(t, e, tt.Expected["failed"]) + }) + } +} + +func TestJSEngineMakeFunction_IntermediateState_SpModeTrue(t *testing.T) { + testSpMode := true + testCases := []struct { + Scenario string + Src string + FunName string + DisableSourceMaps bool + Input *models.Message + InterState interface{} + Expected map[string]*models.Message + ExpInterState interface{} + Error error + }{ + { + Scenario: "intermediateState_EngineProtocol_Map", + Src: ` +function identity(x) { + return x; +} +`, + FunName: "identity", + DisableSourceMaps: true, + Input: &models.Message{ + Data: testJsJSON, + PartitionKey: "some-test-key", + }, + InterState: &EngineProtocol{ + FilterOut: false, + PartitionKey: "", + Data: testJSMap, + }, + Expected: map[string]*models.Message{ + "success": { + Data: testJsJSON, + PartitionKey: "some-test-key", + }, + "filtered": nil, + "failed": nil, + }, + ExpInterState: &EngineProtocol{ + FilterOut: false, + PartitionKey: "", + Data: testJSMap, + }, + Error: nil, + }, + { + Scenario: "intermediateState_EngineProtocol_String", + Src: ` +function identity(x) { + return x; +} +`, + FunName: "identity", + DisableSourceMaps: true, + Input: &models.Message{ + Data: testJsJSON, + PartitionKey: "some-test-key", + }, + InterState: &EngineProtocol{ + FilterOut: false, + PartitionKey: "", + Data: string(testJsJSON), + }, + Expected: map[string]*models.Message{ + "success": { + Data: testJsJSON, + PartitionKey: "some-test-key", + }, + "filtered": nil, + "failed": nil, + }, + ExpInterState: &EngineProtocol{ + FilterOut: false, + PartitionKey: "", + Data: string(testJsJSON), + }, + Error: nil, + }, + { + Scenario: "intermediateState_notEngineProtocol_notSpEnriched", + Src: ` +function willNotRun(x) { + return x; +} +`, + FunName: "willNotRun", + DisableSourceMaps: true, + Input: &models.Message{ + Data: testJsJSON, + PartitionKey: "some-test-key", + }, + InterState: "notEngineProtocol", + Expected: map[string]*models.Message{ + "success": nil, + "filtered": nil, + "failed": { + Data: testJsJSON, + PartitionKey: "some-test-key", + }, + }, + ExpInterState: nil, + Error: fmt.Errorf("Cannot parse"), + }, + { + Scenario: "intermediateState_notEngineProtocol_SpEnriched", + Src: ` +function identity(x) { + return x; +} +`, + FunName: "identity", + DisableSourceMaps: true, + Input: &models.Message{ + Data: testJsTsv, + PartitionKey: "some-test-key", + }, + InterState: "notEngineProtocol", + Expected: map[string]*models.Message{ + "success": { + Data: testJsJSON, + PartitionKey: "some-test-key", + }, + "filtered": nil, + "failed": nil, + }, + ExpInterState: &EngineProtocol{ + FilterOut: false, + PartitionKey: "", + Data: testJSMap, + }, + Error: nil, + }, + } + + for _, tt := range testCases { + t.Run(tt.Scenario, func(t *testing.T) { + assert := assert.New(t) + + src := base64.StdEncoding.EncodeToString([]byte(tt.Src)) + jsConfig := &jsEngineConfig{ + SourceB64: src, + RunTimeout: 1, + DisableSourceMaps: tt.DisableSourceMaps, + SpMode: testSpMode, + } + + jsEngine, err := newJSEngine(jsConfig) + assert.NotNil(jsEngine) + if err != nil { + t.Fatalf("function newJSEngine failed with error: %q", err.Error()) + } + + if err := jsEngine.SmokeTest(tt.FunName); err != nil { + t.Fatalf("smoke-test failed with error: %q", err.Error()) + } + + transFunction := jsEngine.MakeFunction(tt.FunName) + s, f, e, i := transFunction(tt.Input, tt.InterState) + + if !reflect.DeepEqual(i, tt.ExpInterState) { + t.Errorf("GOT:\n%s\nEXPECTED:\n%s", + spew.Sdump(i), + spew.Sdump(tt.ExpInterState)) + } + + if e != nil { + gotErr := e.GetError() + expErr := tt.Error + if expErr == nil { + t.Fatalf("got unexpected error: %s", gotErr.Error()) + } + + if !strings.Contains(gotErr.Error(), expErr.Error()) { + t.Errorf("GOT_ERROR:\n%s\n does not contain\nEXPECTED_ERROR:\n%s", + gotErr.Error(), + expErr.Error()) + } + } + + assertMessagesCompareJs(t, s, tt.Expected["success"]) + assertMessagesCompareJs(t, f, tt.Expected["filtered"]) + assertMessagesCompareJs(t, e, tt.Expected["failed"]) + }) + } +} + +func TestJSEngineMakeFunction_SetPK(t *testing.T) { + var testInterState interface{} = nil + testCases := []struct { + Scenario string + Src string + FunName string + DisableSourceMaps bool + SpMode bool + Input *models.Message + Expected map[string]*models.Message + ExpInterState interface{} + Error error + }{ + { + Scenario: "onlySetPk_spModeTrue", + Src: ` +function onlySetPk(x) { + x.PartitionKey = "newPk"; + return x; +} +`, + FunName: "onlySetPk", + DisableSourceMaps: true, + SpMode: true, + Input: &models.Message{ + Data: testJsTsv, + PartitionKey: "oldPK", + }, + Expected: map[string]*models.Message{ + "success": { + Data: testJsJSON, + PartitionKey: "newPk", + }, + "filtered": nil, + "failed": nil, + }, + ExpInterState: &EngineProtocol{ + FilterOut: false, + PartitionKey: "newPk", + Data: testJSMap, + }, + Error: nil, + }, + { + Scenario: "onlySetPk_spModeFalse", + Src: ` +function onlySetPk(x) { + x.PartitionKey = "newPk"; + return x; +} +`, + FunName: "onlySetPk", + DisableSourceMaps: true, + SpMode: false, + Input: &models.Message{ + Data: testJsTsv, + PartitionKey: "oldPK", + }, + Expected: map[string]*models.Message{ + "success": { + Data: testJsTsv, + PartitionKey: "newPk", + }, + "filtered": nil, + "failed": nil, + }, + ExpInterState: &EngineProtocol{ + FilterOut: false, + PartitionKey: "newPk", + Data: string(testJsTsv), + }, + Error: nil, + }, + { + Scenario: "filterOutIgnores", + Src: ` +function filterOutIgnores(x) { + return { + FilterOut: true, + Data: "shouldNotAppear", + PartitionKey: "notThis" + }; +} +`, + FunName: "filterOutIgnores", + DisableSourceMaps: true, + SpMode: true, + Input: &models.Message{ + Data: testJsTsv, + PartitionKey: "oldPk", + }, + Expected: map[string]*models.Message{ + "success": nil, + "filtered": { + Data: testJsTsv, + PartitionKey: "oldPk", + }, + "failed": nil, + }, + ExpInterState: nil, + Error: nil, + }, + } + + for _, tt := range testCases { + t.Run(tt.Scenario, func(t *testing.T) { + assert := assert.New(t) + + src := base64.StdEncoding.EncodeToString([]byte(tt.Src)) + jsConfig := &jsEngineConfig{ + SourceB64: src, + RunTimeout: 1, + DisableSourceMaps: tt.DisableSourceMaps, + SpMode: tt.SpMode, + } + + jsEngine, err := newJSEngine(jsConfig) + assert.NotNil(jsEngine) + if err != nil { + t.Fatalf("function newJSEngine failed with error: %q", err.Error()) + } + + if err := jsEngine.SmokeTest(tt.FunName); err != nil { + t.Fatalf("smoke-test failed with error: %q", err.Error()) + } + + transFunction := jsEngine.MakeFunction(tt.FunName) + s, f, e, i := transFunction(tt.Input, testInterState) + + if !reflect.DeepEqual(i, tt.ExpInterState) { + t.Errorf("GOT:\n%s\nEXPECTED:\n%s", + spew.Sdump(i), + spew.Sdump(tt.ExpInterState)) + } + + if e != nil { + gotErr := e.GetError() + expErr := tt.Error + if expErr == nil { + t.Fatalf("got unexpected error: %s", gotErr.Error()) + } + + if !strings.Contains(gotErr.Error(), expErr.Error()) { + t.Errorf("GOT_ERROR:\n%s\n does not contain\nEXPECTED_ERROR:\n%s", + gotErr.Error(), + expErr.Error()) + } + } + + assertMessagesCompareJs(t, s, tt.Expected["success"]) + assertMessagesCompareJs(t, f, tt.Expected["filtered"]) + assertMessagesCompareJs(t, e, tt.Expected["failed"]) + }) + } +} + +func TestJSEngineSmokeTest(t *testing.T) { + testCases := []struct { + Src string + FunName string + DisableSourceMaps bool + CompileError error + SmokeError error + }{ + { + Src: ` +function identity(x) { + return x; +} +`, + FunName: "identity", + DisableSourceMaps: true, + CompileError: nil, + SmokeError: nil, + }, + { + Src: ` +function notThisOne(x) { + return x; +} +`, + FunName: "notExists", + DisableSourceMaps: true, + CompileError: nil, + SmokeError: fmt.Errorf("could not assert as function"), + }, + { + Src: ` +function syntaxError(x) { + loca y = 0; +} +`, + FunName: "syntaxError", + DisableSourceMaps: false, + CompileError: fmt.Errorf("SyntaxError"), + SmokeError: nil, + }, + } + + for _, tt := range testCases { + t.Run(tt.FunName, func(t *testing.T) { + assert := assert.New(t) + + src := base64.StdEncoding.EncodeToString([]byte(tt.Src)) + jsConfig := &jsEngineConfig{ + SourceB64: src, + RunTimeout: 1, + DisableSourceMaps: tt.DisableSourceMaps, + } + + jsEngine, compileErr := newJSEngine(jsConfig) + + if compileErr != nil { + if tt.CompileError == nil { + t.Fatalf("got unexpected error while creating newJSEngine: %s", compileErr.Error()) + } + + if !strings.Contains(compileErr.Error(), tt.CompileError.Error()) { + t.Errorf("newJSEngine error mismatch\nGOT_ERROR:\n%q\n does not contain\nEXPECTED_ERROR:\n%q", + compileErr.Error(), + tt.CompileError.Error()) + } + } else { + assert.NotNil(jsEngine) + + smoke := jsEngine.SmokeTest(tt.FunName) + expErr := tt.SmokeError + if smoke != nil { + if expErr == nil { + t.Fatalf("got unexpected smoke-test error: %q", smoke.Error()) + } + + if !strings.Contains(smoke.Error(), expErr.Error()) { + t.Errorf("smoke error mismatch\nGOT_ERROR:\n%q\ndoes not contain\nEXPECTED_ERROR:\n%q", + smoke.Error(), + expErr.Error()) + } + } else { + assert.Nil(tt.SmokeError) + } + } + }) + } +} + +func TestJSEngineWithBuiltinsSpModeFalse(t *testing.T) { + srcCode := ` +function identity(x) { + return x; +} + +function setPk(x) { + x.PartitionKey = "testKey"; + return x; +} +` + // JS + src := base64.StdEncoding.EncodeToString([]byte(srcCode)) + jsConfig := &jsEngineConfig{ + SourceB64: src, + RunTimeout: 1, + SpMode: false, + } + + jsEngine, err := newJSEngine(jsConfig) + if err != nil { + t.Fatalf("newJSEngine failed with error: %q", err) + } + + if err := jsEngine.SmokeTest("identity"); err != nil { + t.Fatalf("smoke-test failed with error: %q", err.Error()) + } + if err := jsEngine.SmokeTest("setPk"); err != nil { + t.Fatalf("smoke-test failed with error: %q", err.Error()) + } + + jsFuncID := jsEngine.MakeFunction("identity") + jsFuncPk := jsEngine.MakeFunction("setPk") + + // Builtins + setPkToAppID := NewSpEnrichedSetPkFunction("app_id") + spEnrichedToJSON := SpEnrichedToJSON + + testCases := []struct { + Name string + Transformation TransformationApplyFunction + Input []*models.Message + ExpectedGood []*models.Message + }{ + { + Name: "identity0", + Input: messages, + Transformation: NewTransformation( + jsFuncID, + setPkToAppID, + spEnrichedToJSON, + ), + ExpectedGood: []*models.Message{ + { + Data: snowplowJSON1, + PartitionKey: "test-data1", + }, + { + Data: snowplowJSON2, + PartitionKey: "test-data2", + }, + { + Data: snowplowJSON3, + PartitionKey: "test-data3", + }, + }, + }, + { + Name: "identity2", + Input: messages, + Transformation: NewTransformation( + setPkToAppID, + spEnrichedToJSON, + jsFuncID, + ), + ExpectedGood: []*models.Message{ + { + Data: snowplowJSON1, + PartitionKey: "test-data1", + }, + { + Data: snowplowJSON2, + PartitionKey: "test-data2", + }, + { + Data: snowplowJSON3, + PartitionKey: "test-data3", + }, + }, + }, + { + Name: "setPk1", + Input: messages, + Transformation: NewTransformation( + setPkToAppID, + jsFuncPk, + spEnrichedToJSON, + ), + ExpectedGood: []*models.Message{ + { + Data: snowplowJSON1, + PartitionKey: "testKey", + }, + { + Data: snowplowJSON2, + PartitionKey: "testKey", + }, + { + Data: snowplowJSON3, + PartitionKey: "testKey", + }, + }, + }, + } + + for _, tt := range testCases { + t.Run(tt.Name, func(t *testing.T) { + assert := assert.New(t) + + result := tt.Transformation(tt.Input) + assert.NotNil(result) + assert.Equal(len(tt.ExpectedGood), len(result.Result)) + for i, res := range result.Result { + if i < len(tt.ExpectedGood) { + exp := tt.ExpectedGood[i] + if !reflect.DeepEqual(res.Data, exp.Data) { + t.Errorf("GOT:\n%s\nEXPECTED:\n%s", + spew.Sdump(res.Data), + spew.Sdump(exp.Data)) + } + assert.Equal(res.PartitionKey, exp.PartitionKey) + } + } + }) + } +} + +func TestJSEngineWithBuiltinsSpModeTrue(t *testing.T) { + srcCode := ` +function identity(x) { + return x; +} + +function setPk(x) { + x.PartitionKey = "testKey"; + return x; +} +` + // JS + src := base64.StdEncoding.EncodeToString([]byte(srcCode)) + jsConfig := &jsEngineConfig{ + SourceB64: src, + RunTimeout: 1, + SpMode: true, + } + + jsEngine, err := newJSEngine(jsConfig) + if err != nil { + t.Fatalf("newJSEngine failed with error: %q", err) + } + + if err := jsEngine.SmokeTest("identity"); err != nil { + t.Fatalf("smoke-test failed with error: %q", err.Error()) + } + if err := jsEngine.SmokeTest("setPk"); err != nil { + t.Fatalf("smoke-test failed with error: %q", err.Error()) + } + + jsFuncID := jsEngine.MakeFunction("identity") + jsFuncPk := jsEngine.MakeFunction("setPk") + + // Builtins + setPkToAppID := NewSpEnrichedSetPkFunction("app_id") + spEnrichedToJSON := SpEnrichedToJSON + + testCases := []struct { + Name string + Transformation TransformationApplyFunction + Input []*models.Message + ExpectedGood []*models.Message + }{ + { + Name: "identity", + Input: []*models.Message{ + { + Data: testJsTsv, + PartitionKey: "prevKey", + }, + }, + Transformation: NewTransformation( + setPkToAppID, + spEnrichedToJSON, + jsFuncID, + ), + ExpectedGood: []*models.Message{ + { + Data: testJsJSON, + PartitionKey: "test-data<>", + }, + }, + }, + { + Name: "setPk", + Input: []*models.Message{ + { + Data: testJsTsv, + PartitionKey: "prevKey", + }, + }, + Transformation: NewTransformation( + setPkToAppID, + jsFuncPk, + ), + ExpectedGood: []*models.Message{ + { + Data: testJsJSON, + PartitionKey: "testKey", + }, + }, + }, + { + Name: "mix", + Input: []*models.Message{ + { + Data: testJsTsv, + PartitionKey: "prevKey", + }, + }, + Transformation: NewTransformation( + setPkToAppID, + jsFuncID, + jsFuncPk, + jsFuncID, + ), + ExpectedGood: []*models.Message{ + { + Data: testJsJSON, + PartitionKey: "testKey", + }, + }, + }, + } + + for _, tt := range testCases { + t.Run(tt.Name, func(t *testing.T) { + assert := assert.New(t) + + result := tt.Transformation(tt.Input) + assert.NotNil(result) + assert.Equal(len(tt.ExpectedGood), len(result.Result)) + for i, res := range result.Result { + if i < len(tt.ExpectedGood) { + exp := tt.ExpectedGood[i] + if !reflect.DeepEqual(res.Data, exp.Data) { + t.Errorf("GOT:\n%s\nEXPECTED:\n%s", + spew.Sdump(res.Data), + spew.Sdump(exp.Data)) + } + assert.Equal(res.PartitionKey, exp.PartitionKey) + } + } + }) + } +} + +func Benchmark_JSEngine_Passthrough_DisabledSrcMaps(b *testing.B) { + b.ReportAllocs() + + srcCode := ` +function identity(x) { + return x; +} +` + src := base64.StdEncoding.EncodeToString([]byte(srcCode)) + inputMsg := &models.Message{ + Data: testJsJSON, + PartitionKey: "some-test-key", + } + + jsConfig := &jsEngineConfig{ + SourceB64: src, + RunTimeout: 5, + DisableSourceMaps: true, + } + + jsEngine, err := newJSEngine(jsConfig) + if err != nil { + b.Fatalf("function newJSEngine failed with error: %q", err.Error()) + } + + // not Smoke-Tested + transFunction := jsEngine.MakeFunction("identity") + + for n := 0; n < b.N; n++ { + transFunction(inputMsg, nil) + } +} + +func Benchmark_JSEngine_Passthrough(b *testing.B) { + b.ReportAllocs() + + srcCode := ` +function identity(x) { + return x; +} +` + src := base64.StdEncoding.EncodeToString([]byte(srcCode)) + inputMsg := &models.Message{ + Data: testJsJSON, + PartitionKey: "some-test-key", + } + + jsConfig := &jsEngineConfig{ + SourceB64: src, + RunTimeout: 5, + DisableSourceMaps: false, + } + + jsEngine, err := newJSEngine(jsConfig) + if err != nil { + b.Fatalf("function newJSEngine failed with error: %q", err.Error()) + } + + // not Smoke-Tested + transFunction := jsEngine.MakeFunction("identity") + + for n := 0; n < b.N; n++ { + transFunction(inputMsg, nil) + } +} + +func Benchmark_JSEngine_PassthroughSpMode(b *testing.B) { + b.ReportAllocs() + + srcCode := ` +function identity(x) { + return x; +} +` + src := base64.StdEncoding.EncodeToString([]byte(srcCode)) + inputMsg := &models.Message{ + Data: testJsTsv, + PartitionKey: "some-test-key", + } + + jsConfig := &jsEngineConfig{ + SourceB64: src, + RunTimeout: 5, + DisableSourceMaps: false, + } + + jsEngine, err := newJSEngine(jsConfig) + if err != nil { + b.Fatalf("function newJSEngine failed with error: %q", err.Error()) + } + + // not Smoke-Tested + transFunction := jsEngine.MakeFunction("identity") + + for n := 0; n < b.N; n++ { + transFunction(inputMsg, nil) + } +} + +func Benchmark_JSEngine_Passthrough_JsJson(b *testing.B) { + b.ReportAllocs() + + srcCode := ` +function jsonIdentity(x) { + var jsonObj = JSON.parse(x.Data); + var result = JSON.stringify(jsonObj); + + return { + Data: result + }; +} +` + src := base64.StdEncoding.EncodeToString([]byte(srcCode)) + inputMsg := &models.Message{ + Data: testJsJSON, + PartitionKey: "some-test-key", + } + + jsConfig := &jsEngineConfig{ + SourceB64: src, + RunTimeout: 5, + DisableSourceMaps: false, + } + + jsEngine, err := newJSEngine(jsConfig) + if err != nil { + b.Fatalf("function newJSEngine failed with error: %q", err.Error()) + } + + // not Smoke-Tested + transFunction := jsEngine.MakeFunction("jsonIdentity") + + for n := 0; n < b.N; n++ { + transFunction(inputMsg, nil) + } +} + +// Test helpers +func testJSEngineAdapter(f func(c *jsEngineConfig) (*jsEngineConfig, error)) jsEngineAdapter { + return func(i interface{}) (interface{}, error) { + cfg, ok := i.(*jsEngineConfig) + if !ok { + return nil, fmt.Errorf("invalid input, expected jsEngineConfig") + } + + return f(cfg) + } + +} + +func testJSEngineFunc(c *jsEngineConfig) (*jsEngineConfig, error) { + return c, nil +} + +// Helper function to compare messages and avoid using reflect.DeepEqual +// on errors. Compares all but the error field of messages. +func assertMessagesCompareJs(t *testing.T, act, exp *models.Message) { + t.Helper() + + ok := false + switch { + case act == nil: + ok = exp == nil + case exp == nil: + default: + pkOk := act.PartitionKey == exp.PartitionKey + dataOk := reflect.DeepEqual(act.Data, exp.Data) + cTimeOk := reflect.DeepEqual(act.TimeCreated, exp.TimeCreated) + pTimeOk := reflect.DeepEqual(act.TimePulled, exp.TimePulled) + tTimeOk := reflect.DeepEqual(act.TimeTransformed, exp.TimeTransformed) + ackOk := reflect.DeepEqual(act.AckFunc, exp.AckFunc) + + if pkOk && dataOk && cTimeOk && pTimeOk && tTimeOk && ackOk { + ok = true + } + } + + if !ok { + t.Errorf("\nGOT:\n%s\nEXPECTED:\n%s\n", + spew.Sdump(act), + spew.Sdump(exp)) + } +} + +// helper variables +var testJsDvceCreatedTstamp, _ = time.Parse("2006-01-02 15:04:05.999", "2019-05-10 14:40:35.551") +var testJsEtlTstamp, _ = time.Parse("2006-01-02 15:04:05.999", "2019-05-10 14:40:37.436") +var testJsDerivedTstamp, _ = time.Parse("2006-01-02 15:04:05.999", "2019-05-10 14:40:35.972") +var testJsCollectorTstamp, _ = time.Parse("2006-01-02 15:04:05.999", "2019-05-10 14:40:35.972") +var testJsDvceSentTstamp, _ = time.Parse("2006-01-02 15:04:05.999", "2019-05-10 14:40:35") +var testJSMap = map[string]interface{}{ + "event_version": "1-0-0", + "app_id": "test-data<>", + "dvce_created_tstamp": testJsDvceCreatedTstamp, + "event": "unstruct", + "v_collector": "ssc-0.15.0-googlepubsub", + "network_userid": "d26822f5-52cc-4292-8f77-14ef6b7a27e2", + "event_name": "add_to_cart", + "event_vendor": "com.snowplowanalytics.snowplow", + "event_format": "jsonschema", + "platform": "pc", + "etl_tstamp": testJsEtlTstamp, + "collector_tstamp": testJsCollectorTstamp, + "user_id": "user", + "dvce_sent_tstamp": testJsDvceSentTstamp, + "derived_tstamp": testJsDerivedTstamp, + "event_id": "e9234345-f042-46ad-b1aa-424464066a33", + "v_tracker": "py-0.8.2", + "v_etl": "beam-enrich-0.2.0-common-0.36.0", + "user_ipaddress": "1.2.3.4", + "unstruct_event_com_snowplowanalytics_snowplow_add_to_cart_1": map[string]interface{}{ + "quantity": float64(2), + "unitPrice": 32.4, + "currency": "GBP", + "sku": "item41", + }, + "contexts_nl_basjes_yauaa_context_1": []interface{}{ + map[string]interface{}{ + "deviceName": "Unknown", + "layoutEngineVersionMajor": "??", + "operatingSystemName": "Unknown", + "deviceClass": "Unknown", + "agentVersion": "2.21.0", + "layoutEngineName": "Unknown", + "layoutEngineClass": "Unknown", + "agentName": "python-requests", + "agentNameVersion": "python-requests 2.21.0", + "operatingSystemVersion": "??", + "agentClass": "Special", + "deviceBrand": "Unknown", + "agentVersionMajor": "2", + "agentNameVersionMajor": "python-requests 2", + "operatingSystemClass": "Unknown", + "layoutEngineVersion": "??", + }, + }, + "useragent": "python-requests/2.21.0", +} + +var testJsTsv = []byte(`test-data<> pc 2019-05-10 14:40:37.436 2019-05-10 14:40:35.972 2019-05-10 14:40:35.551 unstruct e9234345-f042-46ad-b1aa-424464066a33 py-0.8.2 ssc-0.15.0-googlepubsub beam-enrich-0.2.0-common-0.36.0 user 1.2.3.4 d26822f5-52cc-4292-8f77-14ef6b7a27e2 {"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.snowplowanalytics.snowplow/add_to_cart/jsonschema/1-0-0","data":{"sku":"item41","quantity":2,"unitPrice":32.4,"currency":"GBP"}}} python-requests/2.21.0 2019-05-10 14:40:35.000 {"schema":"iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1","data":[{"schema":"iglu:nl.basjes/yauaa_context/jsonschema/1-0-0","data":{"deviceBrand":"Unknown","deviceName":"Unknown","operatingSystemName":"Unknown","agentVersionMajor":"2","layoutEngineVersionMajor":"??","deviceClass":"Unknown","agentNameVersionMajor":"python-requests 2","operatingSystemClass":"Unknown","layoutEngineName":"Unknown","agentName":"python-requests","agentVersion":"2.21.0","layoutEngineClass":"Unknown","agentNameVersion":"python-requests 2.21.0","operatingSystemVersion":"??","agentClass":"Special","layoutEngineVersion":"??"}}]} 2019-05-10 14:40:35.972 com.snowplowanalytics.snowplow add_to_cart jsonschema 1-0-0 `) + +// corresponding JSON to previous TSV +var testJsJSON = []byte(`{"app_id":"test-data<>","collector_tstamp":"2019-05-10T14:40:35.972Z","contexts_nl_basjes_yauaa_context_1":[{"agentClass":"Special","agentName":"python-requests","agentNameVersion":"python-requests 2.21.0","agentNameVersionMajor":"python-requests 2","agentVersion":"2.21.0","agentVersionMajor":"2","deviceBrand":"Unknown","deviceClass":"Unknown","deviceName":"Unknown","layoutEngineClass":"Unknown","layoutEngineName":"Unknown","layoutEngineVersion":"??","layoutEngineVersionMajor":"??","operatingSystemClass":"Unknown","operatingSystemName":"Unknown","operatingSystemVersion":"??"}],"derived_tstamp":"2019-05-10T14:40:35.972Z","dvce_created_tstamp":"2019-05-10T14:40:35.551Z","dvce_sent_tstamp":"2019-05-10T14:40:35Z","etl_tstamp":"2019-05-10T14:40:37.436Z","event":"unstruct","event_format":"jsonschema","event_id":"e9234345-f042-46ad-b1aa-424464066a33","event_name":"add_to_cart","event_vendor":"com.snowplowanalytics.snowplow","event_version":"1-0-0","network_userid":"d26822f5-52cc-4292-8f77-14ef6b7a27e2","platform":"pc","unstruct_event_com_snowplowanalytics_snowplow_add_to_cart_1":{"currency":"GBP","quantity":2,"sku":"item41","unitPrice":32.4},"user_id":"user","user_ipaddress":"1.2.3.4","useragent":"python-requests/2.21.0","v_collector":"ssc-0.15.0-googlepubsub","v_etl":"beam-enrich-0.2.0-common-0.36.0","v_tracker":"py-0.8.2"}`) + +// json's changed and stringified inside JS +var testJsJSONChanged1 = []byte(`{"app_id_CHANGED":"test-data<>","collector_tstamp":"2019-05-10T14:40:35.972Z","contexts_nl_basjes_yauaa_context_1":[{"agentClass":"Special","agentName":"python-requests","agentNameVersion":"python-requests 2.21.0","agentNameVersionMajor":"python-requests 2","agentVersion":"2.21.0","agentVersionMajor":"2","deviceBrand":"Unknown","deviceClass":"Unknown","deviceName":"Unknown","layoutEngineClass":"Unknown","layoutEngineName":"Unknown","layoutEngineVersion":"??","layoutEngineVersionMajor":"??","operatingSystemClass":"Unknown","operatingSystemName":"Unknown","operatingSystemVersion":"??"}],"derived_tstamp":"2019-05-10T14:40:35.972Z","dvce_created_tstamp":"2019-05-10T14:40:35.551Z","dvce_sent_tstamp":"2019-05-10T14:40:35Z","etl_tstamp":"2019-05-10T14:40:37.436Z","event":"unstruct","event_format":"jsonschema","event_id":"e9234345-f042-46ad-b1aa-424464066a33","event_name":"add_to_cart","event_vendor":"com.snowplowanalytics.snowplow","event_version":"1-0-0","network_userid":"d26822f5-52cc-4292-8f77-14ef6b7a27e2","platform":"pc","unstruct_event_com_snowplowanalytics_snowplow_add_to_cart_1":{"currency":"GBP","quantity":2,"sku":"item41","unitPrice":32.4},"user_id":"user","user_ipaddress":"1.2.3.4","useragent":"python-requests/2.21.0","v_collector":"ssc-0.15.0-googlepubsub","v_etl":"beam-enrich-0.2.0-common-0.36.0","v_tracker":"py-0.8.2"}`) + +var testJsJSONChanged2 = []byte(`{"collector_tstamp":"2019-05-10T14:40:35.972Z","contexts_nl_basjes_yauaa_context_1":[{"agentClass":"Special","agentName":"python-requests","agentNameVersion":"python-requests 2.21.0","agentNameVersionMajor":"python-requests 2","agentVersion":"2.21.0","agentVersionMajor":"2","deviceBrand":"Unknown","deviceClass":"Unknown","deviceName":"Unknown","layoutEngineClass":"Unknown","layoutEngineName":"Unknown","layoutEngineVersion":"??","layoutEngineVersionMajor":"??","operatingSystemClass":"Unknown","operatingSystemName":"Unknown","operatingSystemVersion":"??"}],"derived_tstamp":"2019-05-10T14:40:35.972Z","dvce_created_tstamp":"2019-05-10T14:40:35.551Z","dvce_sent_tstamp":"2019-05-10T14:40:35Z","etl_tstamp":"2019-05-10T14:40:37.436Z","event":"unstruct","event_format":"jsonschema","event_id":"e9234345-f042-46ad-b1aa-424464066a33","event_name":"add_to_cart","event_vendor":"com.snowplowanalytics.snowplow","event_version":"1-0-0","network_userid":"d26822f5-52cc-4292-8f77-14ef6b7a27e2","platform":"pc","unstruct_event_com_snowplowanalytics_snowplow_add_to_cart_1":{"currency":"GBP","quantity":2,"sku":"item41","unitPrice":32.4},"user_id":"user","user_ipaddress":"1.2.3.4","useragent":"python-requests/2.21.0","v_collector":"ssc-0.15.0-googlepubsub","v_etl":"beam-enrich-0.2.0-common-0.36.0","v_tracker":"py-0.8.2","app_id_CHANGED":"test-data<>"}`) diff --git a/pkg/transform/engine_lua.go b/pkg/transform/engine_lua.go new file mode 100644 index 00000000..bd1f5ba5 --- /dev/null +++ b/pkg/transform/engine_lua.go @@ -0,0 +1,427 @@ +// PROPRIETARY AND CONFIDENTIAL +// +// Unauthorized copying of this file via any medium is strictly prohibited. +// +// Copyright (c) 2020-2022 Snowplow Analytics Ltd. All rights reserved. + +package transform + +import ( + "context" + "encoding/base64" + "fmt" + "strings" + "time" + + gojson "github.com/goccy/go-json" + "github.com/mitchellh/mapstructure" + "github.com/yuin/gluamapper" + lua "github.com/yuin/gopher-lua" + luaparse "github.com/yuin/gopher-lua/parse" + luajson "layeh.com/gopher-json" + + "github.com/snowplow-devops/stream-replicator/pkg/models" +) + +// luaEngineConfig configures the Lua Engine. +type luaEngineConfig struct { + SourceB64 string `hcl:"source_b64" env:"TRANSFORMATION_LUA_SOURCE_B64"` + RunTimeout int `hcl:"timeout_sec,optional" env:"TRANSFORMATION_LUA_TIMEOUT_SEC"` + Sandbox bool `hcl:"sandbox,optional" env:"TRANSFORMATION_LUA_SANDBOX"` + SpMode bool `hcl:"snowplow_mode,optional" env:"TRANSFORMATION_LUA_SNOWPLOW_MODE"` +} + +// luaEngine handles the provision of a Lua runtime to run transformations. +type luaEngine struct { + Code *lua.FunctionProto + RunTimeout time.Duration + Options *lua.Options + SpMode bool +} + +// newLuaEngine returns a Lua Engine from a luaEngineConfig. +func newLuaEngine(c *luaEngineConfig) (*luaEngine, error) { + luaSrc, err := base64.StdEncoding.DecodeString(c.SourceB64) + if err != nil { + return nil, err + } + + compiledCode, err := compileLuaCode(string(luaSrc), c.SourceB64) + if err != nil { + return nil, err + } + + eng := &luaEngine{ + Code: compiledCode, + RunTimeout: time.Duration(c.RunTimeout) * time.Second, + Options: &lua.Options{SkipOpenLibs: c.Sandbox}, + SpMode: c.SpMode, + } + + return eng, nil +} + +// The luaEngineAdapter type is an adapter for functions to be used as +// pluggable components for Lua Engine. It implements the Pluggable interface. +type luaEngineAdapter func(i interface{}) (interface{}, error) + +// Create implements the ComponentCreator interface. +func (f luaEngineAdapter) Create(i interface{}) (interface{}, error) { + return f(i) +} + +// ProvideDefault implements the ComponentConfigurable interface. +func (f luaEngineAdapter) ProvideDefault() (interface{}, error) { + // Provide defaults for the optional parameters + // whose default is not their zero value. + cfg := &luaEngineConfig{ + RunTimeout: 5, + Sandbox: true, + } + + return cfg, nil +} + +// adaptLuaEngineFunc returns a luaEngineAdapter. +func adaptLuaEngineFunc(f func(c *luaEngineConfig) (*luaEngine, error)) luaEngineAdapter { + return func(i interface{}) (interface{}, error) { + cfg, ok := i.(*luaEngineConfig) + if !ok { + return nil, fmt.Errorf("invalid input, expected luaEngineConfig") + } + + return f(cfg) + } +} + +// LuaLayer returns the Pluggable transformation layer implemented in Lua. +func LuaLayer() interface{} { + return adaptLuaEngineFunc(newLuaEngine) +} + +// SmokeTest implements SmokeTester. +func (e *luaEngine) SmokeTest(funcName string) error { + // setup the Lua state + L := lua.NewState(*e.Options) // L is ptr + defer L.Close() + + d := time.Now().Add(e.RunTimeout) + ctx, cancel := context.WithDeadline(context.Background(), d) + defer cancel() + L.SetContext(ctx) + + return initVM(e, L, funcName) +} + +// MakeFunction implements FunctionMaker. +func (e *luaEngine) MakeFunction(funcName string) TransformationFunction { + + return func(message *models.Message, interState interface{}) (*models.Message, *models.Message, *models.Message, interface{}) { + // making input + input, err := mkLuaEngineInput(e, message, interState) + if err != nil { + message.SetError(fmt.Errorf("failed making input for the Lua runtime: %q", err.Error())) + return nil, nil, message, nil + } + + // setup the Lua state + L := lua.NewState(*e.Options) + defer L.Close() + + d := time.Now().Add(e.RunTimeout) + ctx, cancel := context.WithDeadline(context.Background(), d) + defer cancel() + L.SetContext(ctx) + + err = initVM(e, L, funcName) + if err != nil { + message.SetError(fmt.Errorf("failed initializing Lua runtime: %q", err.Error())) + return nil, nil, message, nil + } + + // running + err = L.CallByParam(lua.P{ + Fn: L.GetGlobal(funcName), // name of Lua function + NRet: 1, // num of return values + Protect: true, // don't panic + }, input) + if err != nil { + // runtime error counts as failure + runErr := fmt.Errorf("error running Lua function %q: %q", funcName, err.Error()) + message.SetError(runErr) + return nil, nil, message, nil + } + + // validating output + protocol, err := validateLuaEngineOut(L.Get(-1)) + if err != nil { + message.SetError(err) + return nil, nil, message, nil + } + + // filtering - keeping same behaviour with spEnrichedFilter + if protocol.FilterOut == true { + return nil, message, nil, nil + } + + // handling data + encode := false + switch protoData := protocol.Data.(type) { + case string: + message.Data = []byte(protoData) + case map[string]interface{}: + encode = true + case map[interface{}]interface{}: + encode = true + siData := toStringIfaceMap(protoData) + protocol.Data = siData + default: + message.SetError(fmt.Errorf("invalid return type from Lua transformation; expected string or table")) + return nil, nil, message, nil + } + + // encode + if encode { + encoded, err := gojson.MarshalWithOption(protocol.Data, gojson.DisableHTMLEscape()) + if err != nil { + message.SetError(fmt.Errorf("error encoding message data")) + return nil, nil, message, nil + } + message.Data = encoded + } + + // setting pk if needed + pk := protocol.PartitionKey + if pk != "" && message.PartitionKey != pk { + message.PartitionKey = pk + } + + return message, nil, nil, protocol + + } +} + +// compileLuaCode compiles lua code. +// Since lua.NewState is not goroutine-safe, we spin a new state for every +// transformation. The reason for this function is to allow us to at least share +// the compiled bytecode (which is read-only and thus safe) and so run only once +// the load, parse and compile steps, which are implicitly run by the alternative +// lua.DoString. +// see also: +// https://github.com/yuin/gopher-lua/pull/193 +// https://github.com/yuin/gopher-lua#sharing-lua-byte-code-between-lstates +func compileLuaCode(code, name string) (*lua.FunctionProto, error) { + reader := strings.NewReader(code) + chunk, err := luaparse.Parse(reader, code) + if err != nil { + return nil, err + } + proto, err := lua.Compile(chunk, name) + if err != nil { + return nil, err + } + return proto, nil +} + +// loadLuaCode loads compiled Lua code into a lua state +func loadLuaCode(ls *lua.LState, proto *lua.FunctionProto) error { + lfunc := ls.NewFunctionFromProto(proto) + ls.Push(lfunc) + + // https://github.com/yuin/gopher-lua/blob/f4c35e4016d9d8580b007ebaeb68ecd8e0b09f1c/_state.go#L1811 + return ls.PCall(0, lua.MultRet, nil) +} + +// initVM performs the initialization steps for a Lua state. +func initVM(e *luaEngine, L *lua.LState, funcName string) error { + if e.Options.SkipOpenLibs == false { + luajson.Preload(L) + } + + err := loadLuaCode(L, e.Code) + if err != nil { + return fmt.Errorf("could not load lua code: %q", err) + } + + if _, ok := L.GetGlobal(funcName).(*lua.LFunction); !ok { + return fmt.Errorf("global Lua function not found: %q", funcName) + } + + return nil +} + +// mkLuaEngineInput describes the process of constructing input to Lua engine. +// No side effects. +func mkLuaEngineInput(e *luaEngine, message *models.Message, interState interface{}) (*lua.LTable, error) { + if interState != nil { + if i, ok := interState.(*EngineProtocol); ok { + return toLuaTable(i) + } + } + + candidate := &EngineProtocol{ + Data: string(message.Data), + } + + if !e.SpMode { + return toLuaTable(candidate) + } + + parsedMessage, err := intermediateAsSpEnrichedParsed(interState, message) + if err != nil { + // if spMode, error for non Snowplow enriched event data + return nil, err + } + + spMap, err := parsedMessage.ToMap() + if err != nil { + return nil, err + } + candidate.Data = spMap + + return toLuaTable(candidate) +} + +// toLuaTable +func toLuaTable(p *EngineProtocol) (*lua.LTable, error) { + var tmpMap map[string]interface{} + + err := mapstructure.Decode(p, &tmpMap) + if err != nil { + return nil, fmt.Errorf("error decoding to map") + } + + return mapToLTable(tmpMap) +} + +// mapToLTable converts a Go map to a lua table +// see: https://github.com/yuin/gopher-lua/issues/160#issuecomment-447608033 +func mapToLTable(m map[string]interface{}) (*lua.LTable, error) { + timeLayout := "2006-01-02T15:04:05.999Z07:00" + + // Main table pointer + ltbl := &lua.LTable{} + + // Loop map + for key, val := range m { + + switch val.(type) { + case float64: + ltbl.RawSetString(key, lua.LNumber(val.(float64))) + case int64: + ltbl.RawSetString(key, lua.LNumber(val.(int64))) + case string: + ltbl.RawSetString(key, lua.LString(val.(string))) + case bool: + ltbl.RawSetString(key, lua.LBool(val.(bool))) + case []byte: + ltbl.RawSetString(key, lua.LString(string(val.([]byte)))) + case map[string]interface{}: + // Get table from map + tmp, err := mapToLTable(val.(map[string]interface{})) + if err != nil { + return nil, err + } + ltbl.RawSetString(key, tmp) + case time.Time: + t := val.(time.Time).Format(timeLayout) + ltbl.RawSetString(key, lua.LString(t)) + case []map[string]interface{}: + // Create slice table + sliceTable := &lua.LTable{} + for _, vv := range val.([]map[string]interface{}) { + next, err := mapToLTable(vv) + if err != nil { + return nil, err + } + sliceTable.Append(next) + } + ltbl.RawSetString(key, sliceTable) + case []interface{}: + // Create slice table + sliceTable := &lua.LTable{} + for _, vv := range val.([]interface{}) { + switch vv.(type) { + case map[string]interface{}: + // Convert map to table + m, err := mapToLTable(vv.(map[string]interface{})) + if err != nil { + return nil, err + } + sliceTable.Append(m) + case float64: + sliceTable.Append(lua.LNumber(vv.(float64))) + case string: + sliceTable.Append(lua.LString(vv.(string))) + case bool: + sliceTable.Append(lua.LBool(vv.(bool))) + } + } + + // Append to main table + ltbl.RawSetString(key, sliceTable) + } + } + + return ltbl, nil +} + +// validateLuaEngineOut validates the value returned from the Lua engine is a +// Lua Table (lua.LTable) and that it maps to EngineProtocol. +func validateLuaEngineOut(output interface{}) (*EngineProtocol, error) { + if output == nil { + return nil, fmt.Errorf("invalid return type from Lua transformation; got nil") + } + + if luaTablePtr, ok := output.(*lua.LTable); ok { + result := &EngineProtocol{} + luaMapper := gluamapper.NewMapper(gluamapper.Option{ + NameFunc: gluamapper.Id, + }) + + err := luaMapper.Map(luaTablePtr, result) + if err != nil { + return nil, fmt.Errorf("protocol violation in return value from Lua transformation") + } + + return result, nil + } + + return nil, fmt.Errorf("invalid return type from Lua transformation; expected Lua Table") +} + +// toStringIfaceMap converts map[interface{}]interface{} to map[string]interface. +// This function is used in Lua Engine because of how gluamapper actually maps +// lua.LTable to Go map. +// see:https://github.com/yuin/gluamapper/blob/d836955830e75240d46ce9f0e6d148d94f2e1d3a/gluamapper.go#L44 +func toStringIfaceMap(interfaceMap map[interface{}]interface{}) map[string]interface{} { + result := make(map[string]interface{}) + for key, val := range interfaceMap { + result[fmt.Sprintf("%v", key)] = doValue(val) + } + + return result +} + +// doValue is a helper for toStringIfaceMap, to cover for values that are +// []interface{} and map[interface{}]interface. +func doValue(value interface{}) interface{} { + switch value := value.(type) { + case []interface{}: + return doIfaceSlice(value) + case map[interface{}]interface{}: + return toStringIfaceMap(value) + default: + return value + } +} + +// doIfaceSlice is a helper for doValue to handle interface slices. +func doIfaceSlice(iSlice []interface{}) []interface{} { + result := make([]interface{}, len(iSlice)) + for i, val := range iSlice { + result[i] = doValue(val) + } + + return result +} diff --git a/pkg/transform/engine_lua_test.go b/pkg/transform/engine_lua_test.go new file mode 100644 index 00000000..a1238ffd --- /dev/null +++ b/pkg/transform/engine_lua_test.go @@ -0,0 +1,1958 @@ +// PROPRIETARY AND CONFIDENTIAL +// +// Unauthorized copying of this file via any medium is strictly prohibited. +// +// Copyright (c) 2020-2022 Snowplow Analytics Ltd. All rights reserved. + +package transform + +import ( + "encoding/base64" + "fmt" + "path/filepath" + "reflect" + "strings" + "testing" + + "github.com/davecgh/go-spew/spew" + "github.com/stretchr/testify/assert" + + config "github.com/snowplow-devops/stream-replicator/config" + "github.com/snowplow-devops/stream-replicator/pkg/models" +) + +func TestLuaEngineConfig_ENV(t *testing.T) { + testCases := []struct { + Name string + Plug config.Pluggable + Expected interface{} + }{ + { + Name: "transform-lua-from-env", + Plug: testLuaEngineAdapter(testLuaEngineFunc), + Expected: &luaEngineConfig{ + SourceB64: "CglmdW5jdGlvbiBmb28oeCkKICAgICAgICAgICByZXR1cm4geAogICAgICAgIGVuZAoJ", + RunTimeout: 10, + Sandbox: false, + }, + }, + } + + for _, tt := range testCases { + t.Run(tt.Name, func(t *testing.T) { + assert := assert.New(t) + + t.Setenv("STREAM_REPLICATOR_CONFIG_FILE", "") + + t.Setenv("MESSAGE_TRANSFORMATION", "lua:fun") + t.Setenv("TRANSFORMATION_LAYER_NAME", "lua") + + t.Setenv("TRANSFORMATION_LUA_SOURCE_B64", "CglmdW5jdGlvbiBmb28oeCkKICAgICAgICAgICByZXR1cm4geAogICAgICAgIGVuZAoJ") + t.Setenv("TRANSFORMATION_LUA_TIMEOUT_SEC", "10") + t.Setenv("TRANSFORMATION_LUA_SANDBOX", "false") + + c, err := config.NewConfig() + assert.NotNil(c) + if err != nil { + t.Fatalf("function NewConfig failed with error: %q", err.Error()) + } + + engine := c.Data.Transform.Layer + decoderOpts := &config.DecoderOptions{ + Input: engine.Body, + } + + result, err := c.CreateComponent(tt.Plug, decoderOpts) + assert.NotNil(result) + assert.Nil(err) + + if !reflect.DeepEqual(result, tt.Expected) { + t.Errorf("GOT:\n%s\nEXPECTED:\n%s", + spew.Sdump(result), + spew.Sdump(tt.Expected)) + } + }) + } +} + +func TestLuaEngineConfig_HCL(t *testing.T) { + fixturesDir := "../../config/test-fixtures" + testCases := []struct { + File string + Plug config.Pluggable + Expected interface{} + }{ + { + File: "transform-lua-simple.hcl", + Plug: testLuaEngineAdapter(testLuaEngineFunc), + Expected: &luaEngineConfig{ + SourceB64: "CglmdW5jdGlvbiBmb28oeCkKICAgICAgICAgICByZXR1cm4geAogICAgICAgIGVuZAoJ", + RunTimeout: 5, + Sandbox: true, + }, + }, + { + File: "transform-lua-extended.hcl", + Plug: testLuaEngineAdapter(testLuaEngineFunc), + Expected: &luaEngineConfig{ + SourceB64: "CglmdW5jdGlvbiBmb28oeCkKICAgICAgICAgICByZXR1cm4geAogICAgICAgIGVuZAoJ", + RunTimeout: 10, + Sandbox: false, + }, + }, + } + + for _, tt := range testCases { + t.Run(tt.File, func(t *testing.T) { + assert := assert.New(t) + + filename := filepath.Join(fixturesDir, tt.File) + t.Setenv("STREAM_REPLICATOR_CONFIG_FILE", filename) + + c, err := config.NewConfig() + assert.NotNil(c) + if err != nil { + t.Fatalf("function NewConfig failed with error: %q", err.Error()) + } + + engine := c.Data.Transform.Layer + decoderOpts := &config.DecoderOptions{ + Input: engine.Body, + } + + result, err := c.CreateComponent(tt.Plug, decoderOpts) + assert.NotNil(result) + assert.Nil(err) + + if !reflect.DeepEqual(result, tt.Expected) { + t.Errorf("GOT:\n%s\nEXPECTED:\n%s", + spew.Sdump(result), + spew.Sdump(tt.Expected)) + } + }) + } +} + +func TestLuaLayer(t *testing.T) { + layer := LuaLayer() + if _, ok := layer.(config.Pluggable); !ok { + t.Errorf("invalid interface returned from LuaLayer") + } +} + +func TestLuaEngineMakeFunction_SpModeFalse_IntermediateNil(t *testing.T) { + var testInterState interface{} = nil + var testSpMode bool = false + testCases := []struct { + Src string + FunName string + Sandbox bool + Input *models.Message + Expected map[string]*models.Message + ExpInterState interface{} + Error error + }{ + { + Src: ` +function identity(x) + return x +end +`, + FunName: "identity", + Sandbox: true, + Input: &models.Message{ + Data: []byte("asdf"), + PartitionKey: "some-test-key", + }, + Expected: map[string]*models.Message{ + "success": { + Data: []byte("asdf"), + PartitionKey: "some-test-key", + }, + "filtered": nil, + "failed": nil, + }, + ExpInterState: &EngineProtocol{ + FilterOut: false, + PartitionKey: "", + Data: "asdf", + }, + Error: nil, + }, + { + Src: ` +function concatHello(x) + x.Data = "Hello:" .. x.Data + return x +end +`, + FunName: "concatHello", + Sandbox: true, + Input: &models.Message{ + Data: []byte("asdf"), + PartitionKey: "some-test-key", + }, + Expected: map[string]*models.Message{ + "success": { + Data: []byte("Hello:asdf"), + PartitionKey: "some-test-key", + }, + "filtered": nil, + "failed": nil, + }, + ExpInterState: &EngineProtocol{ + FilterOut: false, + PartitionKey: "", + Data: "Hello:asdf", + }, + Error: nil, + }, + { + Src: ` +function filterIn(x) + x.FilterOut = false + return x +end +`, + FunName: "filterIn", + Sandbox: true, + Input: &models.Message{ + Data: []byte("asdf"), + PartitionKey: "some-test-key", + }, + Expected: map[string]*models.Message{ + "success": { + Data: []byte("asdf"), + PartitionKey: "some-test-key", + }, + "filtered": nil, + "failed": nil, + }, + ExpInterState: &EngineProtocol{ + FilterOut: false, + PartitionKey: "", + Data: "asdf", + }, + Error: nil, + }, + { + Src: ` +function filterOut(x) + if type(x.Data) == "string" then + return { FilterOut = true } + end + return { FilterOut = false } +end +`, + FunName: "filterOut", + Sandbox: false, + Input: &models.Message{ + Data: []byte("asdf"), + PartitionKey: "some-test-key", + }, + Expected: map[string]*models.Message{ + "success": nil, + "filtered": { + Data: []byte("asdf"), + PartitionKey: "some-test-key", + }, + "failed": nil, + }, + ExpInterState: nil, + Error: nil, + }, + { + Src: ` +local json = require("json") + +function jsonIdentity(x) + local dat = x["Data"] + local jsonObj, decodeErr = json.decode(dat) + if decodeErr then error(decodeErr) end + + local result, encodeErr = json.encode(jsonObj) + if encodeErr then error(encodeErr) end + + x.Data = result + return x +end +`, + FunName: "jsonIdentity", + Sandbox: false, + Input: &models.Message{ + Data: snowplowJSON1, + PartitionKey: "some-test-key", + }, + Expected: map[string]*models.Message{ + "success": { + Data: snowplowJSON1, + PartitionKey: "some-test-key", + }, + "filtered": nil, + "failed": nil, + }, + ExpInterState: &EngineProtocol{ + FilterOut: false, + PartitionKey: "", + Data: string(snowplowJSON1), + }, + Error: nil, + }, + { + Src: ` +local json = require("json") + +function jsonTransformFieldName(x) + local data = x["Data"] + local jsonObj, decodeErr = json.decode(data) + if decodeErr then error(decodeErr) end + + jsonObj["app_id_CHANGED"] = jsonObj["app_id"] + jsonObj["app_id"] = nil + + local result, encodeErr = json.encode(jsonObj) + if encodeErr then error(encodeErr) end + + return { Data = result } +end +`, + FunName: "jsonTransformFieldName", + Sandbox: false, + Input: &models.Message{ + Data: snowplowJSON1, + PartitionKey: "some-test-key", + }, + Expected: map[string]*models.Message{ + "success": { + Data: snowplowJSON1ChangedLua, + PartitionKey: "some-test-key", + }, + "filtered": nil, + "failed": nil, + }, + ExpInterState: &EngineProtocol{ + FilterOut: false, + PartitionKey: "", + Data: string(snowplowJSON1ChangedLua), + }, + Error: nil, + }, + { + Src: ` +local json = require("json") + +function jsonFilterOut(x) + local jsonObj, decodeErr = json.decode(x["Data"]) + if decodeErr then error(decodeErr) end + + if jsonObj["app_id"] == "filterMeOut" then + return { FilterOut = false, Data = x["Data"] } + else + return { FilterOut = true } + end +end +`, + FunName: "jsonFilterOut", + Sandbox: false, + Input: &models.Message{ + Data: snowplowJSON1, + PartitionKey: "some-test-key", + }, + Expected: map[string]*models.Message{ + "success": nil, + "filtered": { + Data: snowplowJSON1, + PartitionKey: "some-test-key", + }, + "failed": nil, + }, + ExpInterState: nil, + Error: nil, + }, + { + Src: ` +function retWrongType(x) + return 0 +end +`, + FunName: "retWrongType", + Sandbox: true, + Input: &models.Message{ + Data: []byte("asdf"), + PartitionKey: "some-test-key", + }, + Expected: map[string]*models.Message{ + "success": nil, + "filtered": nil, + "failed": { + Data: []byte("asdf"), + PartitionKey: "some-test-key", + }, + }, + ExpInterState: nil, + Error: fmt.Errorf("invalid return type from Lua transformation; expected Lua Table"), + }, + { + Src: ` +function noReturn(x) +end +`, + FunName: "noReturn", + Sandbox: true, + Input: &models.Message{ + Data: []byte("asdf"), + PartitionKey: "some-test-key", + }, + Expected: map[string]*models.Message{ + "success": nil, + "filtered": nil, + "failed": { + Data: []byte("asdf"), + PartitionKey: "some-test-key", + }, + }, + ExpInterState: nil, + Error: fmt.Errorf("invalid return type from Lua transformation; expected Lua Table"), + }, + { + Src: ` +function returnNil(x) + return nil +end +`, + FunName: "returnNil", + Sandbox: true, + Input: &models.Message{ + Data: []byte("asdf"), + PartitionKey: "some-test-key", + }, + Expected: map[string]*models.Message{ + "success": nil, + "filtered": nil, + "failed": { + Data: []byte("asdf"), + PartitionKey: "some-test-key", + }, + }, + ExpInterState: nil, + Error: fmt.Errorf("invalid return type from Lua transformation; expected Lua Table"), + }, + { + Src: ` +function causeRuntimeError(x) + return 2 * x +end +`, + FunName: "causeRuntimeError", + Sandbox: true, + Input: &models.Message{ + Data: []byte("asdf"), + PartitionKey: "some-test-key", + }, + Expected: map[string]*models.Message{ + "success": nil, + "filtered": nil, + "failed": { + Data: []byte("asdf"), + PartitionKey: "some-test-key", + }, + }, + ExpInterState: nil, + Error: fmt.Errorf("error running Lua function \"causeRuntimeError\""), + }, + { + Src: ` +function callError(x) + error("Failed") +end +`, + FunName: "callError", + Sandbox: false, + Input: &models.Message{ + Data: []byte("asdf"), + PartitionKey: "some-test-key", + }, + Expected: map[string]*models.Message{ + "success": nil, + "filtered": nil, + "failed": { + Data: []byte("asdf"), + PartitionKey: "some-test-key", + }, + }, + ExpInterState: nil, + Error: fmt.Errorf("error running Lua function \"callError\""), + }, + { + Src: ` +local clock = os.clock + +function sleepTenSecs(x) + local t0 = clock() + while clock() - t0 <= 10 do end +end +`, + FunName: "sleepTenSecs", + Sandbox: false, + Input: &models.Message{ + Data: []byte("asdf"), + PartitionKey: "some-test-key", + }, + Expected: map[string]*models.Message{ + "success": nil, + "filtered": nil, + "failed": { + Data: []byte("asdf"), + PartitionKey: "some-test-key", + }, + }, + ExpInterState: nil, + Error: fmt.Errorf("context deadline exceeded"), + }, + } + + for _, tt := range testCases { + t.Run(tt.FunName, func(t *testing.T) { + assert := assert.New(t) + + src := base64.StdEncoding.EncodeToString([]byte(tt.Src)) + luaConfig := &luaEngineConfig{ + SourceB64: src, + RunTimeout: 1, + Sandbox: tt.Sandbox, + SpMode: testSpMode, + } + + luaEngine, err := newLuaEngine(luaConfig) + assert.NotNil(luaEngine) + if err != nil { + t.Fatalf("function newLuaEngine failed with error: %q", err.Error()) + } + + if err := luaEngine.SmokeTest(tt.FunName); err != nil { + t.Fatalf("smoke-test failed with error: %q", err.Error()) + } + + transFunction := luaEngine.MakeFunction(tt.FunName) + s, f, e, i := transFunction(tt.Input, testInterState) + + if !reflect.DeepEqual(i, tt.ExpInterState) { + t.Errorf("GOT:\n%s\nEXPECTED:\n%s", + spew.Sdump(i), + spew.Sdump(tt.ExpInterState)) + } + + if e != nil { + gotErr := e.GetError() + expErr := tt.Error + if expErr == nil { + t.Fatalf("got unexpected error: %s", gotErr.Error()) + } + + if !strings.Contains(gotErr.Error(), expErr.Error()) { + t.Errorf("GOT_ERROR:\n%s\n does not contain\nEXPECTED_ERROR:\n%s", + gotErr.Error(), + expErr.Error()) + } + } + + assertMessagesCompareLua(t, s, tt.Expected["success"]) + assertMessagesCompareLua(t, f, tt.Expected["filtered"]) + assertMessagesCompareLua(t, e, tt.Expected["failed"]) + }) + } +} + +func TestLuaEngineMakeFunction_SpModeTrue_IntermediateNil(t *testing.T) { + var testInterState interface{} = nil + var testSpMode bool = true + testCases := []struct { + Scenario string + Src string + FunName string + Sandbox bool + Input *models.Message + Expected map[string]*models.Message + ExpInterState interface{} + Error error + }{ + { + Scenario: "identity", + Src: ` +function identity(x) + return x +end +`, + FunName: "identity", + Sandbox: false, + Input: &models.Message{ + Data: testLuaTsv, + PartitionKey: "some-test-key", + }, + Expected: map[string]*models.Message{ + "success": { + Data: testLuaJSON, + PartitionKey: "some-test-key", + }, + "filtered": nil, + "failed": nil, + }, + ExpInterState: &EngineProtocol{ + FilterOut: false, + PartitionKey: "", + Data: testLuaMap, + }, + Error: nil, + }, + { + Scenario: "filtering", + Src: ` +function filterOut(input) + -- input is a lua table + local spData = input["Data"] + if spData["app_id"] == "myApp" then + return input; + end + return { FilterOut = true } +end +`, + FunName: "filterOut", + Sandbox: false, + Input: &models.Message{ + Data: testLuaTsv, + PartitionKey: "some-test-key", + }, + Expected: map[string]*models.Message{ + "success": nil, + "filtered": { + Data: testLuaTsv, + PartitionKey: "some-test-key", + }, + "failed": nil, + }, + ExpInterState: nil, + Error: nil, + }, + { + Scenario: "filteringOut_ignoresData", + Src: ` +function filterOutIgnores(x) + local ret = { + FilterOut = true, + Data = "shouldNotAppear", + PartitionKey = "notThis" + } + return ret +end +`, + FunName: "filterOutIgnores", + Sandbox: false, + Input: &models.Message{ + Data: testLuaTsv, + PartitionKey: "some-test-key", + }, + Expected: map[string]*models.Message{ + "success": nil, + "filtered": { + Data: testLuaTsv, + PartitionKey: "some-test-key", + }, + "failed": nil, + }, + ExpInterState: nil, + Error: nil, + }, + { + Scenario: "non_Snowplow_enriched_to_failed", + Src: ` +function willNotRun(x) + return x +end +`, + FunName: "willNotRun", + Sandbox: false, + Input: &models.Message{ + Data: []byte("nonSpEnrichedEvent"), + PartitionKey: "some-test-key", + }, + Expected: map[string]*models.Message{ + "success": nil, + "filtered": nil, + "failed": { + Data: []byte("nonSpEnrichedEvent"), + PartitionKey: "some-test-key", + }, + }, + ExpInterState: nil, + Error: fmt.Errorf("Cannot parse"), + }, + { + Scenario: "return_wrong_type", + Src: ` +function returnWrongType(x) + return 0 +end +`, + FunName: "returnWrongType", + Sandbox: true, + Input: &models.Message{ + Data: testLuaTsv, + PartitionKey: "some-test-key", + }, + Expected: map[string]*models.Message{ + "success": nil, + "filtered": nil, + "failed": { + Data: testLuaTsv, + PartitionKey: "some-test-key", + }, + }, + ExpInterState: nil, + Error: fmt.Errorf("invalid return type from Lua transformation; expected Lua Table"), + }, + } + + for _, tt := range testCases { + t.Run(tt.Scenario, func(t *testing.T) { + assert := assert.New(t) + + src := base64.StdEncoding.EncodeToString([]byte(tt.Src)) + luaConfig := &luaEngineConfig{ + SourceB64: src, + RunTimeout: 1, + Sandbox: tt.Sandbox, + SpMode: testSpMode, + } + + luaEngine, err := newLuaEngine(luaConfig) + assert.NotNil(luaEngine) + if err != nil { + t.Fatalf("function newLuaEngine failed with error: %q", err.Error()) + } + + if err := luaEngine.SmokeTest(tt.FunName); err != nil { + t.Fatalf("smoke-test failed with error: %q", err.Error()) + } + + transFunction := luaEngine.MakeFunction(tt.FunName) + s, f, e, i := transFunction(tt.Input, testInterState) + + if !reflect.DeepEqual(i, tt.ExpInterState) { + t.Errorf("GOT:\n%s\nEXPECTED:\n%s", + spew.Sdump(i), + spew.Sdump(tt.ExpInterState)) + } + + if e != nil { + gotErr := e.GetError() + expErr := tt.Error + if expErr == nil { + t.Fatalf("got unexpected error: %s", gotErr.Error()) + } + + if !strings.Contains(gotErr.Error(), expErr.Error()) { + t.Errorf("GOT_ERROR:\n%s\n does not contain\nEXPECTED_ERROR:\n%s", + gotErr.Error(), + expErr.Error()) + } + } + + assertMessagesCompareLua(t, s, tt.Expected["success"]) + assertMessagesCompareLua(t, f, tt.Expected["filtered"]) + assertMessagesCompareLua(t, e, tt.Expected["failed"]) + }) + } +} + +func TestLuaEngineMakeFunction_IntermediateState_SpModeFalse(t *testing.T) { + testSpMode := false + testCases := []struct { + Scenario string + Src string + FunName string + Sandbox bool + Input *models.Message + InterState interface{} + Expected map[string]*models.Message + ExpInterState interface{} + Error error + }{ + { + Scenario: "intermediateState_EngineProtocol_Map", + Src: ` +function identity(x) + return x +end +`, + FunName: "identity", + Sandbox: true, + Input: &models.Message{ + Data: testLuaJSON, + PartitionKey: "some-test-key", + }, + InterState: &EngineProtocol{ + FilterOut: false, + PartitionKey: "", + Data: testLuaMap, + }, + Expected: map[string]*models.Message{ + "success": { + Data: testLuaJSON, + PartitionKey: "some-test-key", + }, + "filtered": nil, + "failed": nil, + }, + ExpInterState: &EngineProtocol{ + FilterOut: false, + PartitionKey: "", + Data: testLuaMap, + }, + Error: nil, + }, + { + Scenario: "intermediateState_EngineProtocol_String", + Src: ` +function identity(x) + return x +end +`, + FunName: "identity", + Sandbox: true, + Input: &models.Message{ + Data: testLuaJSON, + PartitionKey: "some-test-key", + }, + InterState: &EngineProtocol{ + FilterOut: false, + PartitionKey: "", + Data: string(testLuaJSON), + }, + Expected: map[string]*models.Message{ + "success": { + Data: testLuaJSON, + PartitionKey: "some-test-key", + }, + "filtered": nil, + "failed": nil, + }, + ExpInterState: &EngineProtocol{ + FilterOut: false, + PartitionKey: "", + Data: string(testLuaJSON), + }, + Error: nil, + }, + { + Scenario: "intermediateState_not_EngineProtocol_nonSpEnriched", + Src: ` +function identity(x) + return x; +end +`, + FunName: "identity", + Sandbox: true, + Input: &models.Message{ + Data: testLuaJSON, + PartitionKey: "some-test-key", + }, + InterState: "notEngineProtocol", + Expected: map[string]*models.Message{ + "success": { + Data: testLuaJSON, + PartitionKey: "some-test-key", + }, + "filtered": nil, + "failed": nil, + }, + ExpInterState: &EngineProtocol{ + FilterOut: false, + PartitionKey: "", + Data: string(testLuaJSON), + }, + Error: nil, + }, + { + Scenario: "intermediateState_not_EngineProtocol_SpEnriched", + Src: ` +function identity(x) + return x; +end +`, + FunName: "identity", + Sandbox: true, + Input: &models.Message{ + Data: testLuaTsv, + PartitionKey: "some-test-key", + }, + InterState: "notEngineProtocol", + Expected: map[string]*models.Message{ + "success": { + Data: testLuaTsv, + PartitionKey: "some-test-key", + }, + "filtered": nil, + "failed": nil, + }, + ExpInterState: &EngineProtocol{ + FilterOut: false, + PartitionKey: "", + Data: string(testLuaTsv), + }, + Error: nil, + }, + } + + for _, tt := range testCases { + t.Run(tt.Scenario, func(t *testing.T) { + assert := assert.New(t) + + src := base64.StdEncoding.EncodeToString([]byte(tt.Src)) + luaConfig := &luaEngineConfig{ + SourceB64: src, + RunTimeout: 1, + Sandbox: tt.Sandbox, + SpMode: testSpMode, + } + + luaEngine, err := newLuaEngine(luaConfig) + assert.NotNil(luaEngine) + if err != nil { + t.Fatalf("function newLuaEngine failed with error: %q", err.Error()) + } + + if err := luaEngine.SmokeTest(tt.FunName); err != nil { + t.Fatalf("smoke-test failed with error: %q", err.Error()) + } + + transFunction := luaEngine.MakeFunction(tt.FunName) + s, f, e, i := transFunction(tt.Input, tt.InterState) + + if !reflect.DeepEqual(i, tt.ExpInterState) { + t.Errorf("GOT:\n%s\nEXPECTED:\n%s", + spew.Sdump(i), + spew.Sdump(tt.ExpInterState)) + } + + if e != nil { + gotErr := e.GetError() + expErr := tt.Error + if expErr == nil { + t.Fatalf("got unexpected error: %s", gotErr.Error()) + } + + if !strings.Contains(gotErr.Error(), expErr.Error()) { + t.Errorf("GOT_ERROR:\n%s\n does not contain\nEXPECTED_ERROR:\n%s", + gotErr.Error(), + expErr.Error()) + } + } + + assertMessagesCompareLua(t, s, tt.Expected["success"]) + assertMessagesCompareLua(t, f, tt.Expected["filtered"]) + assertMessagesCompareLua(t, e, tt.Expected["failed"]) + }) + } +} + +func TestLuaEngineMakeFunction_IntermediateState_SpModeTrue(t *testing.T) { + testSpMode := true + + testCases := []struct { + Scenario string + Src string + FunName string + Sandbox bool + Input *models.Message + InterState interface{} + Expected map[string]*models.Message + ExpInterState interface{} + Error error + }{ + { + Scenario: "intermediateState_EngineProtocol_Map", + Src: ` +function identity(x) + return x +end +`, + FunName: "identity", + Sandbox: true, + Input: &models.Message{ + Data: testLuaJSON, + PartitionKey: "some-test-key", + }, + InterState: &EngineProtocol{ + FilterOut: false, + PartitionKey: "", + Data: testLuaMap, + }, + Expected: map[string]*models.Message{ + "success": { + Data: testLuaJSON, + PartitionKey: "some-test-key", + }, + "filtered": nil, + "failed": nil, + }, + ExpInterState: &EngineProtocol{ + FilterOut: false, + PartitionKey: "", + Data: testLuaMap, + }, + Error: nil, + }, + { + Scenario: "intermediateState_EngineProtocol_String", + Src: ` +function identity(x) + return x +end +`, + FunName: "identity", + Sandbox: true, + Input: &models.Message{ + Data: testLuaJSON, + PartitionKey: "some-test-key", + }, + InterState: &EngineProtocol{ + FilterOut: false, + PartitionKey: "", + Data: string(testLuaJSON), + }, + Expected: map[string]*models.Message{ + "success": { + Data: testLuaJSON, + PartitionKey: "some-test-key", + }, + "filtered": nil, + "failed": nil, + }, + ExpInterState: &EngineProtocol{ + FilterOut: false, + PartitionKey: "", + Data: string(testLuaJSON), + }, + Error: nil, + }, + { + Scenario: "intermediateState_notEngineProtocol_notSpEnriched", + Src: ` +function willNotRun(x) + return x +end +`, + FunName: "willNotRun", + Sandbox: true, + Input: &models.Message{ + Data: testLuaJSON, + PartitionKey: "some-test-key", + }, + InterState: "notEngineProtocol", + Expected: map[string]*models.Message{ + "success": nil, + "filtered": nil, + "failed": { + Data: testLuaJSON, + PartitionKey: "some-test-key", + }, + }, + ExpInterState: nil, + Error: fmt.Errorf("Cannot parse"), + }, + { + Scenario: "intermediateState_notEngineProtocol_SpEnriched", + Src: ` +function identity(x) + return x +end +`, + FunName: "identity", + Sandbox: true, + Input: &models.Message{ + Data: testLuaTsv, + PartitionKey: "some-test-key", + }, + InterState: "notEngineProtocol", + Expected: map[string]*models.Message{ + "success": { + Data: testLuaJSON, + PartitionKey: "some-test-key", + }, + "filtered": nil, + "failed": nil, + }, + ExpInterState: &EngineProtocol{ + FilterOut: false, + PartitionKey: "", + Data: testLuaMap, + }, + Error: nil, + }, + } + + for _, tt := range testCases { + t.Run(tt.Scenario, func(t *testing.T) { + assert := assert.New(t) + + src := base64.StdEncoding.EncodeToString([]byte(tt.Src)) + luaConfig := &luaEngineConfig{ + SourceB64: src, + RunTimeout: 1, + Sandbox: tt.Sandbox, + SpMode: testSpMode, + } + + luaEngine, err := newLuaEngine(luaConfig) + assert.NotNil(luaEngine) + if err != nil { + t.Fatalf("function newLuaEngine failed with error: %q", err.Error()) + } + + if err := luaEngine.SmokeTest(tt.FunName); err != nil { + t.Fatalf("smoke-test failed with error: %q", err.Error()) + } + + transFunction := luaEngine.MakeFunction(tt.FunName) + s, f, e, i := transFunction(tt.Input, tt.InterState) + + if !reflect.DeepEqual(i, tt.ExpInterState) { + t.Errorf("GOT:\n%s\nEXPECTED:\n%s", + spew.Sdump(i), + spew.Sdump(tt.ExpInterState)) + } + + if e != nil { + gotErr := e.GetError() + expErr := tt.Error + if expErr == nil { + t.Fatalf("got unexpected error: %s", gotErr.Error()) + } + + if !strings.Contains(gotErr.Error(), expErr.Error()) { + t.Errorf("GOT_ERROR:\n%s\n does not contain\nEXPECTED_ERROR:\n%s", + gotErr.Error(), + expErr.Error()) + } + } + + assertMessagesCompareLua(t, s, tt.Expected["success"]) + assertMessagesCompareLua(t, f, tt.Expected["filtered"]) + assertMessagesCompareLua(t, e, tt.Expected["failed"]) + }) + } +} + +func TestLuaEngineMakeFunction_SetPK(t *testing.T) { + var testInterState interface{} = nil + testCases := []struct { + Scenario string + Src string + FunName string + Sandbox bool + SpMode bool + Input *models.Message + Expected map[string]*models.Message + ExpInterState interface{} + Error error + }{ + { + Scenario: "onlySetPk_spModeTrue", + Src: ` +function onlySetPk(x) + x["PartitionKey"] = "newPk" + return x +end +`, + FunName: "onlySetPk", + Sandbox: true, + SpMode: true, + Input: &models.Message{ + Data: testLuaTsv, + PartitionKey: "oldPK", + }, + Expected: map[string]*models.Message{ + "success": { + Data: testLuaJSON, + PartitionKey: "newPk", + }, + "filtered": nil, + "failed": nil, + }, + ExpInterState: &EngineProtocol{ + FilterOut: false, + PartitionKey: "newPk", + Data: testLuaMap, + }, + Error: nil, + }, + { + Scenario: "onlySetPk_spModeFalse", + Src: ` +function onlySetPk(x) + x["PartitionKey"] = "newPk" + return x +end +`, + FunName: "onlySetPk", + Sandbox: true, + SpMode: false, + Input: &models.Message{ + Data: testLuaTsv, + PartitionKey: "oldPK", + }, + Expected: map[string]*models.Message{ + "success": { + Data: testLuaTsv, + PartitionKey: "newPk", + }, + "filtered": nil, + "failed": nil, + }, + ExpInterState: &EngineProtocol{ + FilterOut: false, + PartitionKey: "newPk", + Data: string(testLuaTsv), + }, + Error: nil, + }, + { + Scenario: "filterOutIgnores", + Src: ` +function filterOutIgnores(x) + local ret = { + FilterOut = true, + Data = "shouldNotAppear", + PartitionKey = "notThis" + } + return ret +end +`, + FunName: "filterOutIgnores", + Sandbox: true, + SpMode: true, + Input: &models.Message{ + Data: testLuaTsv, + PartitionKey: "oldPk", + }, + Expected: map[string]*models.Message{ + "success": nil, + "filtered": { + Data: testLuaTsv, + PartitionKey: "oldPk", + }, + "failed": nil, + }, + ExpInterState: nil, + Error: nil, + }, + } + + for _, tt := range testCases { + t.Run(tt.Scenario, func(t *testing.T) { + assert := assert.New(t) + + src := base64.StdEncoding.EncodeToString([]byte(tt.Src)) + luaConfig := &luaEngineConfig{ + SourceB64: src, + RunTimeout: 1, + Sandbox: tt.Sandbox, + SpMode: tt.SpMode, + } + + luaEngine, err := newLuaEngine(luaConfig) + assert.NotNil(luaEngine) + if err != nil { + t.Fatalf("function newLuaEngine failed with error: %q", err.Error()) + } + + if err := luaEngine.SmokeTest(tt.FunName); err != nil { + t.Fatalf("smoke-test failed with error: %q", err.Error()) + } + + transFunction := luaEngine.MakeFunction(tt.FunName) + s, f, e, i := transFunction(tt.Input, testInterState) + + if !reflect.DeepEqual(i, tt.ExpInterState) { + t.Errorf("GOT:\n%s\nEXPECTED:\n%s", + spew.Sdump(i), + spew.Sdump(tt.ExpInterState)) + } + + if e != nil { + gotErr := e.GetError() + expErr := tt.Error + if expErr == nil { + t.Fatalf("got unexpected error: %s", gotErr.Error()) + } + + if !strings.Contains(gotErr.Error(), expErr.Error()) { + t.Errorf("GOT_ERROR:\n%s\n does not contain\nEXPECTED_ERROR:\n%s", + gotErr.Error(), + expErr.Error()) + } + } + + assertMessagesCompareLua(t, s, tt.Expected["success"]) + assertMessagesCompareLua(t, f, tt.Expected["filtered"]) + assertMessagesCompareLua(t, e, tt.Expected["failed"]) + }) + } +} + +func TestLuaEngineSmokeTest(t *testing.T) { + testCases := []struct { + Src string + FunName string + Sandbox bool + CompileError error + SmokeError error + }{ + { + Src: ` +function identity(x) + return x +end +`, + FunName: "identity", + Sandbox: true, + CompileError: nil, + SmokeError: nil, + }, + { + Src: ` +function notThisOne(x) + return "something" +end +`, + FunName: "notExists", + Sandbox: true, + CompileError: nil, + SmokeError: fmt.Errorf("global Lua function not found"), + }, + { + Src: ` +local json = require("json") +local clock = os.clock +`, + FunName: "notCalledMissingLibs", + Sandbox: true, + CompileError: nil, + SmokeError: fmt.Errorf("could not load lua code"), + }, + { + Src: ` +function syntaxError(x) + loca y = 0 +end +`, + FunName: "syntaxError", + Sandbox: false, + CompileError: fmt.Errorf("error"), + SmokeError: nil, + }, + } + + for _, tt := range testCases { + t.Run(tt.FunName, func(t *testing.T) { + assert := assert.New(t) + + src := base64.StdEncoding.EncodeToString([]byte(tt.Src)) + luaConfig := &luaEngineConfig{ + SourceB64: src, + RunTimeout: 1, + Sandbox: tt.Sandbox, + } + + luaEngine, compileErr := newLuaEngine(luaConfig) + + if compileErr != nil { + if tt.CompileError == nil { + t.Fatalf("got unexpected error while creating newLuaEngine: %s", compileErr.Error()) + } + + if !strings.Contains(compileErr.Error(), tt.CompileError.Error()) { + t.Errorf("newLuaEngine error mismatch\nGOT_ERROR:\n%q\n does not contain\nEXPECTED_ERROR:\n%q", + compileErr.Error(), + tt.CompileError.Error()) + } + } else { + assert.NotNil(luaEngine) + + smoke := luaEngine.SmokeTest(tt.FunName) + expErr := tt.SmokeError + if smoke != nil { + if expErr == nil { + t.Fatalf("got unexpected smoke-test error: %q", smoke.Error()) + } + + if !strings.Contains(smoke.Error(), expErr.Error()) { + t.Errorf("smoke error mismatch\nGOT_ERROR:\n%q\ndoes not contain\nEXPECTED_ERROR:\n%q", + smoke.Error(), + expErr.Error()) + } + } else { + assert.Nil(tt.SmokeError) + } + } + }) + } +} + +func TestLuaEngineWithBuiltins(t *testing.T) { + var expectedGood = []*models.Message{ + { + Data: snowplowJSON1, + PartitionKey: "test-data1", + }, + { + Data: snowplowJSON2, + PartitionKey: "test-data2", + }, + { + Data: snowplowJSON3, + PartitionKey: "test-data3", + }, + } + + srcCode := ` +function identity(x) + return x +end +` + funcName := "identity" + src := base64.StdEncoding.EncodeToString([]byte(srcCode)) + luaConfig := &luaEngineConfig{ + SourceB64: src, + RunTimeout: 1, + Sandbox: true, + } + + luaEngine, err := newLuaEngine(luaConfig) + if err != nil { + t.Fatalf("newLuaEngine failed with error: %q", err) + } + + if err := luaEngine.SmokeTest(funcName); err != nil { + t.Fatalf("smoke-test failed with error: %q", err.Error()) + } + + luaFunc := luaEngine.MakeFunction(funcName) + setPkToAppID := NewSpEnrichedSetPkFunction("app_id") + spEnrichedToJSON := SpEnrichedToJSON + + testCases := []struct { + Name string + Transformation TransformationApplyFunction + }{ + { + Name: "first", + Transformation: NewTransformation( + setPkToAppID, + spEnrichedToJSON, + luaFunc, + ), + }, + } + + for _, tt := range testCases { + t.Run(tt.Name, func(t *testing.T) { + assert := assert.New(t) + transformMultiple := tt.Transformation + + result := transformMultiple(messages) + assert.NotNil(result) + for i, res := range result.Result { + exp := expectedGood[i] + if !reflect.DeepEqual(res.Data, exp.Data) { + t.Errorf("GOT:\n%s\nEXPECTED:\n%s", + spew.Sdump(res.Data), + spew.Sdump(exp.Data)) + } + assert.Equal(res.PartitionKey, exp.PartitionKey) + + } + }) + } + +} + +func TestLuaEngineWithBuiltinsSpModeFalse(t *testing.T) { + srcCode := ` +function identity(x) + return x +end + +function setPk(x) + x["PartitionKey"] = "testKey" + return x +end +` + // Lua + src := base64.StdEncoding.EncodeToString([]byte(srcCode)) + luaConfig := &luaEngineConfig{ + SourceB64: src, + RunTimeout: 1, + Sandbox: true, + SpMode: false, + } + + luaEngine, err := newLuaEngine(luaConfig) + if err != nil { + t.Fatalf("newLuaEngine failed with error: %q", err) + } + + if err := luaEngine.SmokeTest("identity"); err != nil { + t.Fatalf("smoke-test failed with error: %q", err.Error()) + } + if err := luaEngine.SmokeTest("setPk"); err != nil { + t.Fatalf("smoke-test failed with error: %q", err.Error()) + } + + luaFuncID := luaEngine.MakeFunction("identity") + luaFuncPk := luaEngine.MakeFunction("setPk") + + // Builtins + setPkToAppID := NewSpEnrichedSetPkFunction("app_id") + spEnrichedToJSON := SpEnrichedToJSON + + testCases := []struct { + Name string + Transformation TransformationApplyFunction + Input []*models.Message + ExpectedGood []*models.Message + }{ + { + Name: "identity0", + Input: messages, + Transformation: NewTransformation( + luaFuncID, + setPkToAppID, + spEnrichedToJSON, + ), + ExpectedGood: []*models.Message{ + { + Data: snowplowJSON1, + PartitionKey: "test-data1", + }, + { + Data: snowplowJSON2, + PartitionKey: "test-data2", + }, + { + Data: snowplowJSON3, + PartitionKey: "test-data3", + }, + }, + }, + { + Name: "identity2", + Input: messages, + Transformation: NewTransformation( + setPkToAppID, + spEnrichedToJSON, + luaFuncID, + ), + ExpectedGood: []*models.Message{ + { + Data: snowplowJSON1, + PartitionKey: "test-data1", + }, + { + Data: snowplowJSON2, + PartitionKey: "test-data2", + }, + { + Data: snowplowJSON3, + PartitionKey: "test-data3", + }, + }, + }, + { + Name: "setPk1", + Input: messages, + Transformation: NewTransformation( + setPkToAppID, + luaFuncPk, + spEnrichedToJSON, + ), + ExpectedGood: []*models.Message{ + { + Data: snowplowJSON1, + PartitionKey: "testKey", + }, + { + Data: snowplowJSON2, + PartitionKey: "testKey", + }, + { + Data: snowplowJSON3, + PartitionKey: "testKey", + }, + }, + }, + } + + for _, tt := range testCases { + t.Run(tt.Name, func(t *testing.T) { + assert := assert.New(t) + + result := tt.Transformation(tt.Input) + assert.NotNil(result) + assert.Equal(len(tt.ExpectedGood), len(result.Result)) + for i, res := range result.Result { + if i < len(tt.ExpectedGood) { + exp := tt.ExpectedGood[i] + if !reflect.DeepEqual(res.Data, exp.Data) { + t.Errorf("GOT:\n%s\nEXPECTED:\n%s", + spew.Sdump(res.Data), + spew.Sdump(exp.Data)) + } + assert.Equal(res.PartitionKey, exp.PartitionKey) + } + } + }) + } +} + +func TestLuaEngineWithBuiltinsSpModeTrue(t *testing.T) { + srcCode := ` +function identity(x) + return x +end + +function setPk(x) + x["PartitionKey"] = "testKey" + return x +end +` + // Lua + src := base64.StdEncoding.EncodeToString([]byte(srcCode)) + luaConfig := &luaEngineConfig{ + SourceB64: src, + RunTimeout: 1, + Sandbox: true, + SpMode: true, + } + + luaEngine, err := newLuaEngine(luaConfig) + if err != nil { + t.Fatalf("newLuaEngine failed with error: %q", err) + } + + if err := luaEngine.SmokeTest("identity"); err != nil { + t.Fatalf("smoke-test failed with error: %q", err.Error()) + } + if err := luaEngine.SmokeTest("setPk"); err != nil { + t.Fatalf("smoke-test failed with error: %q", err.Error()) + } + + luaFuncID := luaEngine.MakeFunction("identity") + luaFuncPk := luaEngine.MakeFunction("setPk") + + // Builtins + setPkToAppID := NewSpEnrichedSetPkFunction("app_id") + spEnrichedToJSON := SpEnrichedToJSON + + testCases := []struct { + Name string + Transformation TransformationApplyFunction + Input []*models.Message + ExpectedGood []*models.Message + }{ + { + Name: "identity", + Input: []*models.Message{ + { + Data: testLuaTsv, + PartitionKey: "prevKey", + }, + }, + Transformation: NewTransformation( + setPkToAppID, + spEnrichedToJSON, + luaFuncID, + ), + ExpectedGood: []*models.Message{ + { + Data: testLuaJSON, + PartitionKey: "test-data<>", + }, + }, + }, + { + Name: "setPk", + Input: []*models.Message{ + { + Data: testLuaTsv, + PartitionKey: "prevKey", + }, + }, + Transformation: NewTransformation( + setPkToAppID, + luaFuncPk, + ), + ExpectedGood: []*models.Message{ + { + Data: testLuaJSON, + PartitionKey: "testKey", + }, + }, + }, + { + Name: "mix", + Input: []*models.Message{ + { + Data: testLuaTsv, + PartitionKey: "prevKey", + }, + }, + Transformation: NewTransformation( + setPkToAppID, + luaFuncID, + luaFuncPk, + luaFuncID, + ), + ExpectedGood: []*models.Message{ + { + Data: testLuaJSON, + PartitionKey: "testKey", + }, + }, + }, + } + + for _, tt := range testCases { + t.Run(tt.Name, func(t *testing.T) { + assert := assert.New(t) + + result := tt.Transformation(tt.Input) + assert.NotNil(result) + assert.Equal(len(tt.ExpectedGood), len(result.Result)) + for i, res := range result.Result { + if i < len(tt.ExpectedGood) { + exp := tt.ExpectedGood[i] + if !reflect.DeepEqual(res.Data, exp.Data) { + t.Errorf("GOT:\n%s\nEXPECTED:\n%s", + spew.Sdump(res.Data), + spew.Sdump(exp.Data)) + } + assert.Equal(res.PartitionKey, exp.PartitionKey) + } + } + }) + } +} + +func Benchmark_LuaEngine_Passthrough_Sandboxed(b *testing.B) { + b.ReportAllocs() + + srcCode := ` +function identity(x) + return x +end +` + src := base64.StdEncoding.EncodeToString([]byte(srcCode)) + + inputMsg := &models.Message{ + Data: snowplowJSON1, + PartitionKey: "some-test-key", + } + luaConfig := &luaEngineConfig{ + SourceB64: src, + RunTimeout: 5, + Sandbox: true, + } + + luaEngine, err := newLuaEngine(luaConfig) + if err != nil { + b.Fatalf("function newLuaEngine failed with error: %q", err.Error()) + } + + transFunction := luaEngine.MakeFunction("identity") + + for n := 0; n < b.N; n++ { + transFunction(inputMsg, nil) + } +} + +func Benchmark_LuaEngine_Passthrough(b *testing.B) { + b.ReportAllocs() + + srcCode := ` +function identity(x) + return x +end +` + src := base64.StdEncoding.EncodeToString([]byte(srcCode)) + + inputMsg := &models.Message{ + Data: snowplowJSON1, + PartitionKey: "some-test-key", + } + luaConfig := &luaEngineConfig{ + SourceB64: src, + RunTimeout: 5, + Sandbox: false, + } + + luaEngine, err := newLuaEngine(luaConfig) + if err != nil { + b.Fatalf("function newLuaEngine failed with error: %q", err.Error()) + } + + transFunction := luaEngine.MakeFunction("identity") + + for n := 0; n < b.N; n++ { + transFunction(inputMsg, nil) + } +} + +func Benchmark_LuaEngine_Passthrough_Json(b *testing.B) { + b.ReportAllocs() + + srcCode := ` +function jsonIdentity(x) + local jsonObj, _ = json.decode(x) + local result, _ = json.encode(jsonObj) + + return result +end +` + src := base64.StdEncoding.EncodeToString([]byte(srcCode)) + + inputMsg := &models.Message{ + Data: snowplowJSON1, + PartitionKey: "some-test-key", + } + luaConfig := &luaEngineConfig{ + SourceB64: src, + RunTimeout: 5, + Sandbox: false, + } + + luaEngine, err := newLuaEngine(luaConfig) + if err != nil { + b.Fatalf("function newLuaEngine failed with error: %q", err.Error()) + } + + transFunction := luaEngine.MakeFunction("jsonIdentity") + + for n := 0; n < b.N; n++ { + transFunction(inputMsg, nil) + } +} + +// Test helpers +func testLuaEngineAdapter(f func(c *luaEngineConfig) (*luaEngineConfig, error)) luaEngineAdapter { + return func(i interface{}) (interface{}, error) { + cfg, ok := i.(*luaEngineConfig) + if !ok { + return nil, fmt.Errorf("invalid input, expected luaEngineConfig") + } + + return f(cfg) + } + +} + +func testLuaEngineFunc(c *luaEngineConfig) (*luaEngineConfig, error) { + + return c, nil +} + +// Helper function to compare messages and avoid using reflect.DeepEqual +// on errors. Compares all but the error field of messages. +func assertMessagesCompareLua(t *testing.T, act, exp *models.Message) { + t.Helper() + + ok := false + switch { + case act == nil: + ok = exp == nil + case exp == nil: + default: + pkOk := act.PartitionKey == exp.PartitionKey + dataOk := reflect.DeepEqual(act.Data, exp.Data) + cTimeOk := reflect.DeepEqual(act.TimeCreated, exp.TimeCreated) + pTimeOk := reflect.DeepEqual(act.TimePulled, exp.TimePulled) + tTimeOk := reflect.DeepEqual(act.TimeTransformed, exp.TimeTransformed) + ackOk := reflect.DeepEqual(act.AckFunc, exp.AckFunc) + + if pkOk && dataOk && cTimeOk && pTimeOk && tTimeOk && ackOk { + ok = true + } + } + + if !ok { + t.Errorf("\nGOT:\n%s\nEXPECTED:\n%s\n", + spew.Sdump(act), + spew.Sdump(exp)) + } +} + +// helper variables +var testLuaTimes = map[string]string{ + "dvceCreatedTstamp": "2019-05-10T14:40:35.551Z", + "etlTstamp": "2019-05-10T14:40:37.436Z", + "derivedTstamp": "2019-05-10T14:40:35.972Z", + "collectorTstamp": "2019-05-10T14:40:35.972Z", + "dvceSentTstamp": "2019-05-10T14:40:35Z", +} + +var testLuaMap = map[string]interface{}{ + "event_version": "1-0-0", + "app_id": "test-data<>", + "dvce_created_tstamp": testLuaTimes["dvceCreatedTstamp"], + "event": "unstruct", + "v_collector": "ssc-0.15.0-googlepubsub", + "network_userid": "d26822f5-52cc-4292-8f77-14ef6b7a27e2", + "event_name": "add_to_cart", + "event_vendor": "com.snowplowanalytics.snowplow", + "event_format": "jsonschema", + "platform": "pc", + "etl_tstamp": testLuaTimes["etlTstamp"], + "collector_tstamp": testLuaTimes["collectorTstamp"], + "user_id": "user", + "dvce_sent_tstamp": testLuaTimes["dvceSentTstamp"], + "derived_tstamp": testLuaTimes["derivedTstamp"], + "event_id": "e9234345-f042-46ad-b1aa-424464066a33", + "v_tracker": "py-0.8.2", + "v_etl": "beam-enrich-0.2.0-common-0.36.0", + "user_ipaddress": "1.2.3.4", + "unstruct_event_com_snowplowanalytics_snowplow_add_to_cart_1": map[string]interface{}{ + "quantity": float64(2), + "unitPrice": 32.4, + "currency": "GBP", + "sku": "item41", + }, + "contexts_nl_basjes_yauaa_context_1": []interface{}{ + map[string]interface{}{ + "deviceName": "Unknown", + "layoutEngineVersionMajor": "??", + "operatingSystemName": "Unknown", + "deviceClass": "Unknown", + "agentVersion": "2.21.0", + "layoutEngineName": "Unknown", + "layoutEngineClass": "Unknown", + "agentName": "python-requests", + "agentNameVersion": "python-requests 2.21.0", + "operatingSystemVersion": "??", + "agentClass": "Special", + "deviceBrand": "Unknown", + "agentVersionMajor": "2", + "agentNameVersionMajor": "python-requests 2", + "operatingSystemClass": "Unknown", + "layoutEngineVersion": "??", + }, + }, + "useragent": "python-requests/2.21.0", +} + +var testLuaTsv = []byte(`test-data<> pc 2019-05-10 14:40:37.436 2019-05-10 14:40:35.972 2019-05-10 14:40:35.551 unstruct e9234345-f042-46ad-b1aa-424464066a33 py-0.8.2 ssc-0.15.0-googlepubsub beam-enrich-0.2.0-common-0.36.0 user 1.2.3.4 d26822f5-52cc-4292-8f77-14ef6b7a27e2 {"schema":"iglu:com.snowplowanalytics.snowplow/unstruct_event/jsonschema/1-0-0","data":{"schema":"iglu:com.snowplowanalytics.snowplow/add_to_cart/jsonschema/1-0-0","data":{"sku":"item41","quantity":2,"unitPrice":32.4,"currency":"GBP"}}} python-requests/2.21.0 2019-05-10 14:40:35.000 {"schema":"iglu:com.snowplowanalytics.snowplow/contexts/jsonschema/1-0-1","data":[{"schema":"iglu:nl.basjes/yauaa_context/jsonschema/1-0-0","data":{"deviceBrand":"Unknown","deviceName":"Unknown","operatingSystemName":"Unknown","agentVersionMajor":"2","layoutEngineVersionMajor":"??","deviceClass":"Unknown","agentNameVersionMajor":"python-requests 2","operatingSystemClass":"Unknown","layoutEngineName":"Unknown","agentName":"python-requests","agentVersion":"2.21.0","layoutEngineClass":"Unknown","agentNameVersion":"python-requests 2.21.0","operatingSystemVersion":"??","agentClass":"Special","layoutEngineVersion":"??"}}]} 2019-05-10 14:40:35.972 com.snowplowanalytics.snowplow add_to_cart jsonschema 1-0-0 `) + +// corresponding JSON to previous TSV +var testLuaJSON = []byte(`{"app_id":"test-data<>","collector_tstamp":"2019-05-10T14:40:35.972Z","contexts_nl_basjes_yauaa_context_1":[{"agentClass":"Special","agentName":"python-requests","agentNameVersion":"python-requests 2.21.0","agentNameVersionMajor":"python-requests 2","agentVersion":"2.21.0","agentVersionMajor":"2","deviceBrand":"Unknown","deviceClass":"Unknown","deviceName":"Unknown","layoutEngineClass":"Unknown","layoutEngineName":"Unknown","layoutEngineVersion":"??","layoutEngineVersionMajor":"??","operatingSystemClass":"Unknown","operatingSystemName":"Unknown","operatingSystemVersion":"??"}],"derived_tstamp":"2019-05-10T14:40:35.972Z","dvce_created_tstamp":"2019-05-10T14:40:35.551Z","dvce_sent_tstamp":"2019-05-10T14:40:35Z","etl_tstamp":"2019-05-10T14:40:37.436Z","event":"unstruct","event_format":"jsonschema","event_id":"e9234345-f042-46ad-b1aa-424464066a33","event_name":"add_to_cart","event_vendor":"com.snowplowanalytics.snowplow","event_version":"1-0-0","network_userid":"d26822f5-52cc-4292-8f77-14ef6b7a27e2","platform":"pc","unstruct_event_com_snowplowanalytics_snowplow_add_to_cart_1":{"currency":"GBP","quantity":2,"sku":"item41","unitPrice":32.4},"user_id":"user","user_ipaddress":"1.2.3.4","useragent":"python-requests/2.21.0","v_collector":"ssc-0.15.0-googlepubsub","v_etl":"beam-enrich-0.2.0-common-0.36.0","v_tracker":"py-0.8.2"}`) + +// json encoded inside Lua +var snowplowJSON1ChangedLua = []byte(`{"app_id_CHANGED":"test-data1","collector_tstamp":"2019-05-10T14:40:35.972Z","contexts_nl_basjes_yauaa_context_1":[{"agentClass":"Special","agentName":"python-requests","agentNameVersion":"python-requests 2.21.0","agentNameVersionMajor":"python-requests 2","agentVersion":"2.21.0","agentVersionMajor":"2","deviceBrand":"Unknown","deviceClass":"Unknown","deviceName":"Unknown","layoutEngineClass":"Unknown","layoutEngineName":"Unknown","layoutEngineVersion":"??","layoutEngineVersionMajor":"??","operatingSystemClass":"Unknown","operatingSystemName":"Unknown","operatingSystemVersion":"??"}],"derived_tstamp":"2019-05-10T14:40:35.972Z","dvce_created_tstamp":"2019-05-10T14:40:35.551Z","dvce_sent_tstamp":"2019-05-10T14:40:35Z","etl_tstamp":"2019-05-10T14:40:37.436Z","event":"unstruct","event_format":"jsonschema","event_id":"e9234345-f042-46ad-b1aa-424464066a33","event_name":"add_to_cart","event_vendor":"com.snowplowanalytics.snowplow","event_version":"1-0-0","network_userid":"d26822f5-52cc-4292-8f77-14ef6b7a27e2","platform":"pc","unstruct_event_com_snowplowanalytics_snowplow_add_to_cart_1":{"currency":"GBP","quantity":2,"sku":"item41","unitPrice":32.4},"user_id":"user\u003cbuilt-in function input\u003e","user_ipaddress":"18.194.133.57","useragent":"python-requests/2.21.0","v_collector":"ssc-0.15.0-googlepubsub","v_etl":"beam-enrich-0.2.0-common-0.36.0","v_tracker":"py-0.8.2"}`) diff --git a/pkg/transform/snowplow_enriched_filter_test.go b/pkg/transform/snowplow_enriched_filter_test.go index e8c0f825..ef1da676 100644 --- a/pkg/transform/snowplow_enriched_filter_test.go +++ b/pkg/transform/snowplow_enriched_filter_test.go @@ -192,23 +192,43 @@ func TestNewSpEnrichedFilterFunction(t *testing.T) { } func TestNewSpEnrichedFilterFunction_Error(t *testing.T) { - assert := assert.New(t) - error := `invalid filter function config, must be of the format {field name}=={value}[|{value}|...] or {field name}!={value}[|{value}|...]` - - filterFunc, err1 := NewSpEnrichedFilterFunction("") + filterError := `invalid filter function config, must be of the format {field name}=={value}[|{value}|...] or {field name}!={value}[|{value}|...]` - assert.Nil(filterFunc) - assert.Equal(error, err1.Error()) - - filterFunc, err2 := NewSpEnrichedFilterFunction("app_id==abc|") + testCases := []struct { + Name string + Arg string + }{ + { + Name: "incompatible_arg", + Arg: "incompatibleArg", + }, + { + Name: "empty_arg", + Arg: "", + }, + { + Name: "wrong_arg_pipe", + Arg: "app_id==abc|", + }, + { + Name: "wrong_arg_syntax", + Arg: "!=abc", + }, + } - assert.Nil(filterFunc) - assert.Equal(error, err2.Error()) + for _, tt := range testCases { + t.Run(tt.Name, func(t *testing.T) { + assert := assert.New(t) - filterFunc, err3 := NewSpEnrichedFilterFunction("!=abc") + filterFunc, err := NewSpEnrichedFilterFunction(tt.Arg) - assert.Nil(filterFunc) - assert.Equal(error, err3.Error()) + assert.Nil(filterFunc) + if err == nil { + t.Fatalf("expected error, got nil") + } + assert.Equal(filterError, err.Error()) + }) + } } func TestSpEnrichedFilterFunction_Slice(t *testing.T) { diff --git a/pkg/transform/transformconfig/transform_config.go b/pkg/transform/transformconfig/transform_config.go new file mode 100644 index 00000000..ac2fca1c --- /dev/null +++ b/pkg/transform/transformconfig/transform_config.go @@ -0,0 +1,272 @@ +// PROPRIETARY AND CONFIDENTIAL +// +// Unauthorized copying of this file via any medium is strictly prohibited. +// +// Copyright (c) 2020-2022 Snowplow Analytics Ltd. All rights reserved. + +package transformconfig + +import ( + "fmt" + "strings" + + "github.com/snowplow-devops/stream-replicator/config" + "github.com/snowplow-devops/stream-replicator/pkg/transform" +) + +// GetTransformations builds and returns transformationApplyFunction +// from the transformations configured. +func GetTransformations(c configProvider) (transform.TransformationApplyFunction, error) { + registry, err := getLayerRegistry() + if err != nil { + return nil, err + } + + transMessage := c.ProvideTransformMessage() + transUnits, err := parseTransformations(transMessage) + if err != nil { + return nil, err + } + + funcs := make([]transform.TransformationFunction, 0, len(transUnits)) + for _, trans := range transUnits { + switch trans.name { + // Builtin transformations + case "spEnrichedToJson": + funcs = append(funcs, transform.SpEnrichedToJSON) + case "spEnrichedSetPk": + funcs = append(funcs, transform.NewSpEnrichedSetPkFunction(trans.option)) + case "spEnrichedFilter": + filterFunc, err := transform.NewSpEnrichedFilterFunction(trans.option) + if err != nil { + return nil, err + } + funcs = append(funcs, filterFunc) + case "spEnrichedFilterContext": + filterFunc, err := transform.NewSpEnrichedFilterFunctionContext(trans.option) + if err != nil { + return nil, err + } + funcs = append(funcs, filterFunc) + case "spEnrichedFilterUnstructEvent": + filterFunc, err := transform.NewSpEnrichedFilterFunctionUnstructEvent(trans.option) + if err != nil { + return nil, err + } + funcs = append(funcs, filterFunc) + // Custom transformations + case "lua": + luaFunc, err := mkEngineFunction(c, trans, registry) + if err != nil { + return nil, err + } + funcs = append(funcs, luaFunc) + case "js": + jsFunc, err := mkEngineFunction(c, trans, registry) + if err != nil { + return nil, err + } + funcs = append(funcs, jsFunc) + + // we don't need `case 'none'` or `default` + // (see parseTransformations) + } + } + return transform.NewTransformation(funcs...), nil +} + +// configProvider is the interface a config must implement to configure the +// stream-replicator transformations +type configProvider interface { + ProvideTransformMessage() string + ProvideTransformLayerName() string + ProvideTransformComponent(p config.Pluggable) (interface{}, error) +} + +// transformationUnit is a helper struct type for transformations according to +// the transformation message that is being used to configure the sequence of +// transformations. It denotes the distinction we use when we split by ':', +// e.g. 'spEnrichedSetPk:{option}' +type transformationUnit struct { + name string + option string +} + +// layerRegistry is a helper type to map names to the supported Pluggable custom +// transformation layer engines. +type layerRegistry map[string]config.Pluggable + +// getLayerRegistry returns the registry of supported Pluggable transform layers. +func getLayerRegistry() (layerRegistry, error) { + luaLayerPlug, ok := transform.LuaLayer().(config.Pluggable) + if !ok { + return nil, fmt.Errorf("non pluggable lua transformation layer") + } + + jsLayerPlug, ok := transform.JSLayer().(config.Pluggable) + if !ok { + return nil, fmt.Errorf("non pluggable js transformation layer") + } + + return map[string](config.Pluggable){ + "lua": luaLayerPlug, + "js": jsLayerPlug, + }, nil +} + +// parseTransformations validates the message_transformation according to rules. +// The reason for this function is to make the validation part explicit and +// separate it from GetTransformations. +func parseTransformations(input string) ([]*transformationUnit, error) { + if input == "" { + return nil, fmt.Errorf("invalid message transformation found; empty string") + } + + transformations := strings.Split(input, ",") + out := make([]*transformationUnit, 0, len(transformations)) + for _, trans := range transformations { + splitTrans := strings.Split(trans, ":") + name := splitTrans[0] // safe + + switch name { + case "spEnrichedToJson": + // option rules + if len(splitTrans) > 1 { + return nil, fmt.Errorf("invalid message transformation found; unexpected colon after %q", name) + } + + out = append(out, &transformationUnit{name: name}) + case "spEnrichedSetPk": + // option rules + if len(splitTrans) != 2 { + return nil, fmt.Errorf("invalid message transformation found; expected 'spEnrichedSetPk:{option}' but got %q", trans) + } + + if splitTrans[1] == "" { + return nil, fmt.Errorf("invalid message transformation found; empty option for 'spEnrichedSetPk'") + } + + out = append(out, &transformationUnit{ + name: name, + option: splitTrans[1], + }) + case "spEnrichedFilter": + // option rules + if len(splitTrans) != 2 { + return nil, fmt.Errorf("invalid message transformation found; expected 'spEnrichedFilter:{option}' but got %q", trans) + } + + if splitTrans[1] == "" { + return nil, fmt.Errorf("invalid message transformation found; empty option for 'spEnrichedFilter'") + } + + out = append(out, &transformationUnit{ + name: name, + option: splitTrans[1], + }) + case "spEnrichedFilterContext": + // option rules + if len(splitTrans) != 2 { + return nil, fmt.Errorf("invalid message transformation found; expected 'spEnrichedFilterContext:{option}' but got %q", trans) + } + + if splitTrans[1] == "" { + return nil, fmt.Errorf("invalid message transformation found; empty option for 'spEnrichedFilterContext'") + } + + out = append(out, &transformationUnit{ + name: name, + option: splitTrans[1], + }) + case "spEnrichedFilterUnstructEvent": + // option rules + if len(splitTrans) != 2 { + return nil, fmt.Errorf("invalid message transformation found; expected 'spEnrichedFilterUnstructEvent:{option}' but got %q", trans) + } + + if splitTrans[1] == "" { + return nil, fmt.Errorf("invalid message transformation found; empty option for 'spEnrichedFilterUnstructEvent'") + } + + out = append(out, &transformationUnit{ + name: name, + option: splitTrans[1], + }) + case "lua": + // option rules + if len(splitTrans) != 2 { + return nil, fmt.Errorf("invalid message transformation found; expected 'lua:{option}' but got %q", trans) + } + + if splitTrans[1] == "" { + return nil, fmt.Errorf("invalid message transformation found; empty option for 'lua'") + } + + out = append(out, &transformationUnit{ + name: name, + option: splitTrans[1], + }) + case "js": + // option rules + if len(splitTrans) != 2 { + return nil, fmt.Errorf("invalid message transformation found; expected 'js:{option}' but got %q", trans) + } + + if splitTrans[1] == "" { + return nil, fmt.Errorf("invalid message transformation found; empty option for 'js'") + } + + out = append(out, &transformationUnit{ + name: name, + option: splitTrans[1], + }) + case "none": + // option rule + if len(splitTrans) > 1 { + return nil, fmt.Errorf("invalid message transformation found; unexpected colon after %q", name) + } + // none is treated like identity, so ignoring + case "": + // this could be caused by some trailing/excessive comma + // differentiating from default in order to generate a + // more helpful error message + return nil, fmt.Errorf("empty transformation found; please check the message transformation syntax") + default: + return nil, fmt.Errorf("invalid transformation found; expected one of 'spEnrichedToJson', 'spEnrichedSetPk', 'spEnrichedFilter', 'spEnrichedFilterContext', 'spEnrichedFilterUnstructEvent', 'lua', 'js' or 'none' but got %q", name) + } + } + + return out, nil +} + +// mkEngineFunction is a helper method used in GetTransformations +// It creates, smoke-tests and returns a custom transformation function. +func mkEngineFunction(c configProvider, trans *transformationUnit, registry layerRegistry) (transform.TransformationFunction, error) { + useLayerName := c.ProvideTransformLayerName() + + // validate that the expected layer is specified in the configuration + if useLayerName != trans.name { + return nil, fmt.Errorf("missing configuration for the custom transformation layer specified: %q", trans.name) + } + + plug, ok := registry[trans.name] + if !ok { + return nil, fmt.Errorf("unknown transformation layer specified") + } + + component, err := c.ProvideTransformComponent(plug) + if err != nil { + return nil, err + } + + if engine, ok := component.(transform.Engine); ok { + err := engine.SmokeTest(trans.option) + if err != nil { + return nil, err + } + + return engine.MakeFunction(trans.option), nil + } + + return nil, fmt.Errorf("could not interpret custom transformation configuration") +} diff --git a/pkg/transform/transformconfig/transform_config_test.go b/pkg/transform/transformconfig/transform_config_test.go new file mode 100644 index 00000000..ae96c9c4 --- /dev/null +++ b/pkg/transform/transformconfig/transform_config_test.go @@ -0,0 +1,491 @@ +// PROPRIETARY AND CONFIDENTIAL +// +// Unauthorized copying of this file via any medium is strictly prohibited. +// +// Copyright (c) 2020-2022 Snowplow Analytics Ltd. All rights reserved. + +package transformconfig + +import ( + "fmt" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/snowplow-devops/stream-replicator/config" + "github.com/snowplow-devops/stream-replicator/pkg/models" + "github.com/snowplow-devops/stream-replicator/pkg/transform" +) + +func TestParseTransformations_InvalidMessage(t *testing.T) { + testCases := []struct { + Name string + Message string + ExpError string + }{ + { + Name: "message_empty", + Message: "", + ExpError: "invalid message transformation found; empty string", + }, + { + Name: "message_not_found", + Message: "fake", + ExpError: "invalid transformation found; expected one of 'spEnrichedToJson', 'spEnrichedSetPk', 'spEnrichedFilter', 'spEnrichedFilterContext', 'spEnrichedFilterUnstructEvent', 'lua', 'js' or 'none' but got \"fake\"", + }, + { + Name: "message_option_none_a", + Message: "none:wrong", + ExpError: "invalid message transformation found; unexpected colon after \"none\"", + }, + { + Name: "message_option_none_b", + Message: "none:", + ExpError: "invalid message transformation found; unexpected colon after \"none\"", + }, + { + Name: "message_option_spEnrichedToJson", + Message: "spEnrichedToJson:wrong", + ExpError: "invalid message transformation found; unexpected colon after \"spEnrichedToJson\"", + }, + { + Name: "message_no_option_spEnrichedSetPk", + Message: "spEnrichedSetPk", + ExpError: "invalid message transformation found; expected 'spEnrichedSetPk:{option}' but got \"spEnrichedSetPk\"", + }, + { + Name: "message_empty_option_spEnrichedSetPk", + Message: "spEnrichedSetPk:", + ExpError: "invalid message transformation found; empty option for 'spEnrichedSetPk'", + }, + { + Name: "message_no_option_spEnrichedFilter", + Message: "spEnrichedFilter:too:wrong", + ExpError: "invalid message transformation found; expected 'spEnrichedFilter:{option}' but got \"spEnrichedFilter:too:wrong\"", + }, + { + Name: "message_empty_option_spEnrichedFilter", + Message: "spEnrichedFilter:", + ExpError: "invalid message transformation found; empty option for 'spEnrichedFilter'", + }, + { + Name: "message_no_option_spEnrichedFilterContext", + Message: "spEnrichedFilterContext:too:wrong", + ExpError: "invalid message transformation found; expected 'spEnrichedFilterContext:{option}' but got \"spEnrichedFilterContext:too:wrong\"", + }, + { + Name: "message_empty_option_spEnrichedFilterContext", + Message: "spEnrichedFilterContext:", + ExpError: "invalid message transformation found; empty option for 'spEnrichedFilterContext'", + }, + { + Name: "message_no_option_spEnrichedFilterUnstructEvent", + Message: "spEnrichedFilterUnstructEvent:too:wrong", + ExpError: "invalid message transformation found; expected 'spEnrichedFilterUnstructEvent:{option}' but got \"spEnrichedFilterUnstructEvent:too:wrong\"", + }, + { + Name: "message_empty_option_spEnrichedFilterUnstructEvent", + Message: "spEnrichedFilterUnstructEvent:", + ExpError: "invalid message transformation found; empty option for 'spEnrichedFilterUnstructEvent'", + }, + { + Name: "message_no_option_lua", + Message: "lua", + ExpError: "invalid message transformation found; expected 'lua:{option}' but got \"lua\"", + }, + { + Name: "message_empty_option_lua", + Message: "lua:", + ExpError: "invalid message transformation found; empty option for 'lua'", + }, + { + Name: "message_no_option_js", + Message: "js", + ExpError: "invalid message transformation found; expected 'js:{option}' but got \"js\"", + }, + { + Name: "message_empty_option_js", + Message: "js:", + ExpError: "invalid message transformation found; empty option for 'js'", + }, + { + Name: "invalid_transformation_syntax_a", + Message: "spEnrichedToJson,", + ExpError: "empty transformation found; please check the message transformation syntax", + }, + { + Name: "invalid_transformation_syntax_b", + Message: ":", + ExpError: "empty transformation found; please check the message transformation syntax", + }, + { + Name: "invalid_transformation_syntax_c", + Message: ",", + ExpError: "empty transformation found; please check the message transformation syntax", + }, + } + + for _, tt := range testCases { + t.Run(tt.Name, func(t *testing.T) { + assert := assert.New(t) + + parsed, err := parseTransformations(tt.Message) + assert.Nil(parsed) + if err == nil { + t.Fatalf("expected error; got nil") + } + assert.Equal(tt.ExpError, err.Error()) + }) + } +} + +func TestGetTransformations_MissingLayerConfig(t *testing.T) { + fixturesDir := "../../../config/test-fixtures" + testCases := []struct { + Filename string + TransMessage string + ExpectedError string + }{ + { + Filename: "transform-invalid-layer-lua.hcl", + TransMessage: "lua:fun", + ExpectedError: "missing configuration for the custom transformation layer specified: \"lua\"", + }, + { + Filename: "transform-invalid-layer-js.hcl", + TransMessage: "js:fun", + ExpectedError: "missing configuration for the custom transformation layer specified: \"js\"", + }, + } + + for _, tt := range testCases { + t.Run(tt.Filename, func(t *testing.T) { + assert := assert.New(t) + + filename := filepath.Join(fixturesDir, tt.Filename) + t.Setenv("STREAM_REPLICATOR_CONFIG_FILE", filename) + + c, err := config.NewConfig() + assert.NotNil(c) + if err != nil { + t.Fatalf("function NewConfig failed with error: %q", err.Error()) + } + + assert.Equal(c.Data.Transform.Message, tt.TransMessage) + + transformation, err := GetTransformations(c) + assert.Nil(transformation) + assert.NotNil(err) + assert.Equal(tt.ExpectedError, err.Error()) + }) + } +} + +func TestGetTransformations_Builtins(t *testing.T) { + testCases := []struct { + Name string + Provider configProvider + ExpectedErr error + }{ + { + Name: "invalid_transform_message", + Provider: &testConfigProvider{ + message: "tooWrong", + }, + ExpectedErr: fmt.Errorf("invalid transformation found; expected one of 'spEnrichedToJson', 'spEnrichedSetPk', 'spEnrichedFilter', 'spEnrichedFilterContext', 'spEnrichedFilterUnstructEvent', 'lua', 'js' or 'none' but got \"tooWrong\""), + }, + { + Name: "spEnrichedToJson", + Provider: &testConfigProvider{ + message: "spEnrichedToJson", + }, + ExpectedErr: nil, + }, + { + Name: "spEnrichedSetPk", + Provider: &testConfigProvider{ + message: "spEnrichedSetPk:app_id", + }, + ExpectedErr: nil, + }, + { + Name: "spEnrichedFilter", + Provider: &testConfigProvider{ + message: "spEnrichedFilter:app_id==xyz", + }, + ExpectedErr: nil, + }, + { + Name: "spEnrichedFilterContext", + Provider: &testConfigProvider{ + message: "spEnrichedFilterContext:contexts_x_x_x_1.yz==xyz", + }, + ExpectedErr: nil, + }, + { + Name: "spEnrichedFilterUnstructEvent", + Provider: &testConfigProvider{ + message: "spEnrichedFilterUnstructEvent:unstruct_event_x_x_x_1.yz==xyz", + }, + ExpectedErr: nil, + }, + } + + for _, tt := range testCases { + t.Run(tt.Name, func(t *testing.T) { + assert := assert.New(t) + + applyFun, err := GetTransformations(tt.Provider) + + if tt.ExpectedErr != nil { + assert.Equal(tt.ExpectedErr.Error(), err.Error()) + assert.Nil(applyFun) + } else { + assert.Nil(err) + assert.NotNil(applyFun) + } + }) + } +} + +func TestGetTransformations_Custom(t *testing.T) { + testCases := []struct { + Name string + Provider configProvider + ExpectedErr error + }{ + { + Name: "lua", + Provider: &testConfigProvider{ + message: "lua:fun", + layerName: "lua", + component: &testEngine{ + smokeTestErr: nil, + mkFunction: testTransformationFunction, + }, + }, + ExpectedErr: nil, + }, + { + Name: "js", + Provider: &testConfigProvider{ + message: "js:fun", + layerName: "js", + component: &testEngine{ + smokeTestErr: nil, + mkFunction: testTransformationFunction, + }, + }, + ExpectedErr: nil, + }, + } + + for _, tt := range testCases { + t.Run(tt.Name, func(t *testing.T) { + assert := assert.New(t) + + applyFun, err := GetTransformations(tt.Provider) + + if tt.ExpectedErr != nil { + assert.Equal(tt.ExpectedErr.Error(), err.Error()) + assert.Nil(applyFun) + } else { + assert.Nil(err) + assert.NotNil(applyFun) + } + }) + } +} + +func TestLayerRegistry(t *testing.T) { + assert := assert.New(t) + + registry, err := getLayerRegistry() + assert.Nil(err) + + _, okLua := registry["lua"] + assert.True(okLua) + + _, okJs := registry["js"] + assert.True(okJs) +} + +func TestMkEngineFunction(t *testing.T) { + testCases := []struct { + Name string + Provider *testConfigProvider + Unit *transformationUnit + Registry layerRegistry + ExpectedErr error + }{ + { + Name: "missing_layer_config", + Provider: &testConfigProvider{ + layerName: "test", + component: "irrelevant", + err: nil, + }, + Unit: &transformationUnit{ + name: "noTest", + option: "testFun", + }, + Registry: map[string]config.Pluggable{}, + ExpectedErr: fmt.Errorf("missing configuration for the custom transformation layer specified: \"noTest\""), + }, + { + Name: "unknown_layer", + Provider: &testConfigProvider{ + layerName: "test", + component: "irrelevant", + err: nil, + }, + Unit: &transformationUnit{ + name: "test", + option: "testFun", + }, + Registry: map[string]config.Pluggable{}, + ExpectedErr: fmt.Errorf("unknown transformation layer specified"), + }, + { + Name: "provider_error", + Provider: &testConfigProvider{ + layerName: "test", + component: nil, + err: fmt.Errorf("some error"), + }, + Unit: &transformationUnit{ + name: "test", + option: "testFun", + }, + Registry: map[string]config.Pluggable{ + "test": &testPluggable{}, + }, + ExpectedErr: fmt.Errorf("some error"), + }, + { + Name: "no_engine_component", + Provider: &testConfigProvider{ + layerName: "test", + component: "notAnEngine", + err: nil, + }, + Unit: &transformationUnit{ + name: "test", + option: "testFun", + }, + Registry: map[string]config.Pluggable{ + "test": &testPluggable{}, + }, + ExpectedErr: fmt.Errorf("could not interpret custom transformation configuration"), + }, + { + Name: "engine_smoke_test_error", + Provider: &testConfigProvider{ + layerName: "test", + component: &testEngine{ + smokeTestErr: fmt.Errorf("smoke error"), + mkFunction: testTransformationFunction, + }, + err: nil, + }, + Unit: &transformationUnit{ + name: "test", + option: "testFun", + }, + Registry: map[string]config.Pluggable{ + "test": &testPluggable{}, + }, + ExpectedErr: fmt.Errorf("smoke error"), + }, + { + Name: "happy_path", + Provider: &testConfigProvider{ + layerName: "test", + component: &testEngine{ + smokeTestErr: nil, + mkFunction: testTransformationFunction, + }, + err: nil, + }, + Unit: &transformationUnit{ + name: "test", + option: "testFun", + }, + Registry: map[string]config.Pluggable{ + "test": &testPluggable{}, + }, + ExpectedErr: nil, + }, + } + + for _, tt := range testCases { + t.Run(tt.Name, func(t *testing.T) { + assert := assert.New(t) + + fun, err := mkEngineFunction( + tt.Provider, + tt.Unit, + tt.Registry, + ) + + if tt.ExpectedErr != nil { + assert.Equal(tt.ExpectedErr.Error(), err.Error()) + assert.Nil(fun) + } else { + assert.Nil(err) + assert.NotNil(fun) + } + }) + } +} + +// Helpers +type testConfigProvider struct { + message string + layerName string + component interface{} + err error +} + +// *testConfigProvider implements configProvider +func (tc *testConfigProvider) ProvideTransformMessage() string { + return tc.message +} + +func (tc *testConfigProvider) ProvideTransformLayerName() string { + return tc.layerName +} + +func (tc *testConfigProvider) ProvideTransformComponent(p config.Pluggable) (interface{}, error) { + return tc.component, tc.err +} + +type testPluggable struct{} + +// *testPluggable implements config.Pluggable +func (tp *testPluggable) ProvideDefault() (interface{}, error) { + return "placeholder", nil +} + +func (tp *testPluggable) Create(i interface{}) (interface{}, error) { + return "placeholder", nil +} + +type testEngine struct { + smokeTestErr error + mkFunction transform.TransformationFunction +} + +// *testEngine implements transform.Engine +func (te *testEngine) SmokeTest(funName string) error { + return te.smokeTestErr +} + +func (te *testEngine) MakeFunction(funName string) transform.TransformationFunction { + return te.mkFunction +} + +func testTransformationFunction(*models.Message, interface{}) (*models.Message, *models.Message, *models.Message, interface{}) { + return nil, nil, nil, nil +}