From 4535ce01c4dc9928354698bd19beb6fe1d5101c5 Mon Sep 17 00:00:00 2001 From: xurui-c <159840875+xurui-c@users.noreply.github.com> Date: Thu, 19 Dec 2024 15:05:12 -0800 Subject: [PATCH] fix(inc984): scrub the correct bucket for sentry.user (#6693) the ` user` field is sent in the sentry_tags dictionary which is prefixed with sentry automatically in the message processor. thus, a different attribute column has to be scrubbed Co-authored-by: Rachel Chen --- snuba/manual_jobs/scrub_users_from_eap_spans.py | 2 +- tests/manual_jobs/test_scrub_users_from_eap_spans.py | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/snuba/manual_jobs/scrub_users_from_eap_spans.py b/snuba/manual_jobs/scrub_users_from_eap_spans.py index 043c85af54..89989a67a5 100644 --- a/snuba/manual_jobs/scrub_users_from_eap_spans.py +++ b/snuba/manual_jobs/scrub_users_from_eap_spans.py @@ -29,7 +29,7 @@ def _get_query(self, cluster_name: str | None) -> str: on_cluster = f"ON CLUSTER '{cluster_name}'" if cluster_name else "" return f"""ALTER TABLE eap_spans_2_local {on_cluster} -UPDATE `attr_str_2` = mapApply((k, v) -> (k, if(k = 'user' AND startsWith(v, '{_IP_PREFIX}') AND (isIPv4String(substring(v, 4)) OR isIPv6String(substring(v, 4))), 'ip:scrubbed', v)), `attr_str_2`) +UPDATE `attr_str_11` = mapApply((k, v) -> (k, if(k = 'sentry.user' AND startsWith(v, '{_IP_PREFIX}') AND (isIPv4String(substring(v, 4)) OR isIPv6String(substring(v, 4))), 'ip:scrubbed', v)), `attr_str_11`) WHERE organization_id IN [{organization_ids}] AND _sort_timestamp >= toDateTime('{start_datetime}') AND _sort_timestamp < toDateTime('{end_datetime}')""" diff --git a/tests/manual_jobs/test_scrub_users_from_eap_spans.py b/tests/manual_jobs/test_scrub_users_from_eap_spans.py index e5cf6c104d..fc40acab0b 100644 --- a/tests/manual_jobs/test_scrub_users_from_eap_spans.py +++ b/tests/manual_jobs/test_scrub_users_from_eap_spans.py @@ -111,7 +111,7 @@ def test_generate_query() -> None: job._get_query(None) == """ALTER TABLE eap_spans_2_local -UPDATE `attr_str_2` = mapApply((k, v) -> (k, if(k = 'user' AND startsWith(v, 'ip:') AND (isIPv4String(substring(v, 4)) OR isIPv6String(substring(v, 4))), 'ip:scrubbed', v)), `attr_str_2`) +UPDATE `attr_str_11` = mapApply((k, v) -> (k, if(k = 'sentry.user' AND startsWith(v, 'ip:') AND (isIPv4String(substring(v, 4)) OR isIPv6String(substring(v, 4))), 'ip:scrubbed', v)), `attr_str_11`) WHERE organization_id IN [1,3,5,6] AND _sort_timestamp >= toDateTime('2024-12-01T00:00:00') AND _sort_timestamp < toDateTime('2024-12-10T00:00:00')""" @@ -185,8 +185,6 @@ def _gen_message( "relay_protocol_version": "3", "relay_use_post_or_schedule": "True", "relay_use_post_or_schedule_rejected": "version", - "user.ip": "192.168.0.45", - "user": user, "spans_over_limit": "False", "server_name": "blah", "color": random.choice(["red", "green", "blue"]), @@ -219,11 +217,13 @@ def _generate_request( key=AttributeKey(type=AttributeKey.TYPE_STRING, name="color") ) ), - columns=[Column(key=AttributeKey(type=AttributeKey.TYPE_STRING, name="user"))], + columns=[ + Column(key=AttributeKey(type=AttributeKey.TYPE_STRING, name="sentry.user")) + ], order_by=[ TraceItemTableRequest.OrderBy( column=Column( - key=AttributeKey(type=AttributeKey.TYPE_STRING, name="user") + key=AttributeKey(type=AttributeKey.TYPE_STRING, name="sentry.user") ) ) ], @@ -234,7 +234,7 @@ def _generate_expected_response(user: str) -> TraceItemTableResponse: return TraceItemTableResponse( column_values=[ TraceItemColumnValues( - attribute_name="user", + attribute_name="sentry.user", results=[AttributeValue(val_str=user) for _ in range(20)], ) ],