From e6aa5c2776e449f47194e705c9b244461cc769e6 Mon Sep 17 00:00:00 2001 From: Colton Allen Date: Wed, 8 Jan 2025 09:04:28 -0600 Subject: [PATCH 1/3] Add features nested structure and associated mapping optimizers --- .../configuration/events/entities/events.yaml | 58 +++++++++++++++++++ .../configuration/events/storages/errors.yaml | 31 ++++++++++ .../events/storages/errors_ro.yaml | 26 +++++++++ .../physical/empty_tag_condition_processor.py | 7 ++- 4 files changed, 120 insertions(+), 2 deletions(-) diff --git a/snuba/datasets/configuration/events/entities/events.yaml b/snuba/datasets/configuration/events/entities/events.yaml index b948d9bb3c1..9514d2ac73f 100644 --- a/snuba/datasets/configuration/events/entities/events.yaml +++ b/snuba/datasets/configuration/events/entities/events.yaml @@ -49,6 +49,24 @@ schema: inner_type: { type: UInt, args: { size: 64 } }, }, }, + { + name: features, + type: Nested, + args: + { + subcolumns: + [{ name: key, type: String }, { name: value, type: String }], + }, + }, + { + name: _features_hash_map, + type: Array, + args: + { + schema_modifiers: [readonly], + inner_type: { type: UInt, args: { size: 64 } }, + }, + }, { name: contexts, type: Nested, @@ -297,6 +315,18 @@ storages: from_col_name: tags_value to_function_name: arrayJoin to_function_column: tags.value + - mapper: ColumnToFunctionOnColumn + args: + from_table_name: null + from_col_name: features_key + to_function_name: arrayJoin + to_function_column: features.key + - mapper: ColumnToFunctionOnColumn + args: + from_table_name: null + from_col_name: features_value + to_function_name: arrayJoin + to_function_column: features.value subscriptables: - mapper: SubscriptableMapper args: @@ -314,6 +344,14 @@ storages: to_nested_col_name: contexts value_subcolumn_name: value nullable: false + - mapper: SubscriptableMapper + args: + from_column_table: null + from_column_name: features + to_nested_col_table: null + to_nested_col_name: features + value_subcolumn_name: value + nullable: false - storage: errors is_writable: true translation_mappers: @@ -408,6 +446,18 @@ storages: from_col_name: tags_value to_function_name: arrayJoin to_function_column: tags.value + - mapper: ColumnToFunctionOnColumn + args: + from_table_name: null + from_col_name: features_key + to_function_name: arrayJoin + to_function_column: features.key + - mapper: ColumnToFunctionOnColumn + args: + from_table_name: null + from_col_name: features_value + to_function_name: arrayJoin + to_function_column: features.value subscriptables: - mapper: SubscriptableMapper args: @@ -425,6 +475,14 @@ storages: to_nested_col_name: contexts value_subcolumn_name: value nullable: false + - mapper: SubscriptableMapper + args: + from_column_table: null + from_column_name: features + to_nested_col_table: null + to_nested_col_name: features + value_subcolumn_name: value + nullable: false storage_selector: selector: ErrorsQueryStorageSelector query_processors: diff --git a/snuba/datasets/configuration/events/storages/errors.yaml b/snuba/datasets/configuration/events/storages/errors.yaml index 26afb5ad48b..823f5434464 100644 --- a/snuba/datasets/configuration/events/storages/errors.yaml +++ b/snuba/datasets/configuration/events/storages/errors.yaml @@ -76,6 +76,24 @@ schema: inner_type: { type: UInt, args: { size: 64 } }, }, }, + { + name: features, + type: Nested, + args: + { + subcolumns: + [{ name: key, type: String }, { name: value, type: String }], + }, + }, + { + name: _features_hash_map, + type: Array, + args: + { + schema_modifiers: [readonly], + inner_type: { type: UInt, args: { size: 64 } }, + }, + }, { name: contexts, type: Nested, @@ -310,10 +328,23 @@ query_processors: column_name: tags hash_map_name: _tags_hash_map killswitch: events_tags_hash_map_enabled + - processor: MappingOptimizer + args: + column_name: features + hash_map_name: _features_hash_map + killswitch: events_features_hash_map_enabled - processor: EmptyTagConditionProcessor + args: + column_name: tags.key + - processor: EmptyTagConditionProcessor + args: + column_name: features.key - processor: ArrayJoinKeyValueOptimizer args: column_name: tags + - processor: ArrayJoinKeyValueOptimizer + args: + column_name: features - processor: PrewhereProcessor args: omit_if_final: diff --git a/snuba/datasets/configuration/events/storages/errors_ro.yaml b/snuba/datasets/configuration/events/storages/errors_ro.yaml index 32af5e145ea..6606d67355a 100644 --- a/snuba/datasets/configuration/events/storages/errors_ro.yaml +++ b/snuba/datasets/configuration/events/storages/errors_ro.yaml @@ -76,6 +76,24 @@ schema: inner_type: { type: UInt, args: { size: 64 } }, }, }, + { + name: features, + type: Nested, + args: + { + subcolumns: + [{ name: key, type: String }, { name: value, type: String }], + }, + }, + { + name: _features_hash_map, + type: Array, + args: + { + schema_modifiers: [readonly], + inner_type: { type: UInt, args: { size: 64 } }, + }, + }, { name: contexts, type: Nested, @@ -306,10 +324,18 @@ query_processors: column_name: tags hash_map_name: _tags_hash_map killswitch: events_tags_hash_map_enabled + - processor: MappingOptimizer + args: + column_name: features + hash_map_name: _features_hash_map + killswitch: events_features_hash_map_enabled - processor: EmptyTagConditionProcessor - processor: ArrayJoinKeyValueOptimizer args: column_name: tags + - processor: ArrayJoinKeyValueOptimizer + args: + column_name: features - processor: PrewhereProcessor args: omit_if_final: diff --git a/snuba/query/processors/physical/empty_tag_condition_processor.py b/snuba/query/processors/physical/empty_tag_condition_processor.py index f48ff12cedb..798c3f5b368 100644 --- a/snuba/query/processors/physical/empty_tag_condition_processor.py +++ b/snuba/query/processors/physical/empty_tag_condition_processor.py @@ -30,18 +30,21 @@ class EmptyTagConditionProcessor(ClickhouseQueryProcessor): the `has` function. """ + def __init__(self, column_name: str) -> None: + self.column_name = column_name + def process_query(self, query: Query, query_settings: QuerySettings) -> None: def process_condition(exp: Expression) -> Expression: result = CONDITION_PATTERN.match(exp) if result is not None: key_column = result.optional_string(KEY_COL_MAPPING_PARAM) - if key_column == "tags.key": + if key_column == self.column_name: rhs = result.optional_string(KEY_MAPPING_PARAM) table_name = result.optional_string(TABLE_MAPPING_PARAM) replacement = FunctionCall( exp.alias, "has", - (Column(None, table_name, "tags.key"), Literal(None, rhs)), + (Column(None, table_name, self.key_column), Literal(None, rhs)), ) assert isinstance(exp, FunctionCall) From 4c859278a85e60105815f68703bc4983ffa4ae03 Mon Sep 17 00:00:00 2001 From: Colton Allen Date: Wed, 8 Jan 2025 10:36:01 -0600 Subject: [PATCH 2/3] Fix definition --- .../processors/physical/empty_tag_condition_processor.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/snuba/query/processors/physical/empty_tag_condition_processor.py b/snuba/query/processors/physical/empty_tag_condition_processor.py index 798c3f5b368..4aa0328d785 100644 --- a/snuba/query/processors/physical/empty_tag_condition_processor.py +++ b/snuba/query/processors/physical/empty_tag_condition_processor.py @@ -44,7 +44,10 @@ def process_condition(exp: Expression) -> Expression: replacement = FunctionCall( exp.alias, "has", - (Column(None, table_name, self.key_column), Literal(None, rhs)), + ( + Column(None, table_name, self.column_name), + Literal(None, rhs), + ), ) assert isinstance(exp, FunctionCall) From 74ea27177d82caa709db406f65557980b0435ee0 Mon Sep 17 00:00:00 2001 From: Colton Allen Date: Wed, 8 Jan 2025 10:37:34 -0600 Subject: [PATCH 3/3] Fix remaining definitions --- .../configuration/discover/storages/discover.yaml | 2 ++ snuba/datasets/configuration/events/storages/errors.yaml | 2 +- .../datasets/configuration/events/storages/errors_ro.yaml | 2 ++ .../configuration/issues/storages/search_issues.yaml | 2 ++ .../datasets/configuration/replays/storages/replays.yaml | 2 ++ snuba/datasets/configuration/spans/storages/spans.yaml | 2 ++ .../configuration/transactions/storages/transactions.yaml | 2 ++ .../test_mapping_optimizer_no_useless_conditions.py | 8 ++++++-- 8 files changed, 19 insertions(+), 3 deletions(-) diff --git a/snuba/datasets/configuration/discover/storages/discover.yaml b/snuba/datasets/configuration/discover/storages/discover.yaml index 4cceaf58e63..e42252cbc75 100644 --- a/snuba/datasets/configuration/discover/storages/discover.yaml +++ b/snuba/datasets/configuration/discover/storages/discover.yaml @@ -111,6 +111,8 @@ query_processors: hash_map_name: _tags_hash_map killswitch: tags_hash_map_enabled - processor: EmptyTagConditionProcessor + args: + column_name: tags.key - processor: ArrayJoinKeyValueOptimizer args: column_name: tags diff --git a/snuba/datasets/configuration/events/storages/errors.yaml b/snuba/datasets/configuration/events/storages/errors.yaml index 823f5434464..6db086c64f4 100644 --- a/snuba/datasets/configuration/events/storages/errors.yaml +++ b/snuba/datasets/configuration/events/storages/errors.yaml @@ -335,7 +335,7 @@ query_processors: killswitch: events_features_hash_map_enabled - processor: EmptyTagConditionProcessor args: - column_name: tags.key + column_name: tags.key.key - processor: EmptyTagConditionProcessor args: column_name: features.key diff --git a/snuba/datasets/configuration/events/storages/errors_ro.yaml b/snuba/datasets/configuration/events/storages/errors_ro.yaml index 6606d67355a..5fea3af85a6 100644 --- a/snuba/datasets/configuration/events/storages/errors_ro.yaml +++ b/snuba/datasets/configuration/events/storages/errors_ro.yaml @@ -330,6 +330,8 @@ query_processors: hash_map_name: _features_hash_map killswitch: events_features_hash_map_enabled - processor: EmptyTagConditionProcessor + args: + column_name: tags.key - processor: ArrayJoinKeyValueOptimizer args: column_name: tags diff --git a/snuba/datasets/configuration/issues/storages/search_issues.yaml b/snuba/datasets/configuration/issues/storages/search_issues.yaml index 9dce63d12e7..b2dc8c57551 100644 --- a/snuba/datasets/configuration/issues/storages/search_issues.yaml +++ b/snuba/datasets/configuration/issues/storages/search_issues.yaml @@ -105,6 +105,8 @@ query_processors: hash_map_name: _tags_hash_map killswitch: tags_hash_map_enabled - processor: EmptyTagConditionProcessor + args: + column_name: tags.key - processor: ArrayJoinKeyValueOptimizer args: column_name: tags diff --git a/snuba/datasets/configuration/replays/storages/replays.yaml b/snuba/datasets/configuration/replays/storages/replays.yaml index 71ede6ede1d..3ee9225c2b0 100644 --- a/snuba/datasets/configuration/replays/storages/replays.yaml +++ b/snuba/datasets/configuration/replays/storages/replays.yaml @@ -190,6 +190,8 @@ query_processors: hash_map_name: _tags_hash_map killswitch: tags_hash_map_enabled - processor: EmptyTagConditionProcessor + args: + column_name: tags.key - processor: ArrayJoinKeyValueOptimizer args: column_name: tags diff --git a/snuba/datasets/configuration/spans/storages/spans.yaml b/snuba/datasets/configuration/spans/storages/spans.yaml index 1e9c3ad6db7..d68a1fa1d94 100644 --- a/snuba/datasets/configuration/spans/storages/spans.yaml +++ b/snuba/datasets/configuration/spans/storages/spans.yaml @@ -156,6 +156,8 @@ query_processors: hash_map_name: _measurements_hash_map killswitch: measurements_hash_map_enabled - processor: EmptyTagConditionProcessor + args: + column_name: tags.key - processor: ArrayJoinKeyValueOptimizer args: column_name: tags diff --git a/snuba/datasets/configuration/transactions/storages/transactions.yaml b/snuba/datasets/configuration/transactions/storages/transactions.yaml index 2dd3e147d53..d6c4be5e02b 100644 --- a/snuba/datasets/configuration/transactions/storages/transactions.yaml +++ b/snuba/datasets/configuration/transactions/storages/transactions.yaml @@ -215,6 +215,8 @@ query_processors: hash_map_name: _tags_hash_map killswitch: tags_hash_map_enabled - processor: EmptyTagConditionProcessor + args: + column_name: tags.key - processor: ArrayJoinKeyValueOptimizer args: column_name: tags diff --git a/tests/query/processors/test_mapping_optimizer_no_useless_conditions.py b/tests/query/processors/test_mapping_optimizer_no_useless_conditions.py index b0b25fd7fb3..339352dc46c 100644 --- a/tests/query/processors/test_mapping_optimizer_no_useless_conditions.py +++ b/tests/query/processors/test_mapping_optimizer_no_useless_conditions.py @@ -269,8 +269,12 @@ def test_useless_has_condition( # change the existence expression to be a has(tags, 'my_tag') expression for boh queries # this allows reuse of the previous test cases - EmptyTagConditionProcessor().process_query(input_query, HTTPQuerySettings()) - EmptyTagConditionProcessor().process_query(expected_query, HTTPQuerySettings()) + EmptyTagConditionProcessor("tags.key").process_query( + input_query, HTTPQuerySettings() + ) + EmptyTagConditionProcessor("tags.key").process_query( + expected_query, HTTPQuerySettings() + ) MappingOptimizer( column_name="tags",