Skip to content

Commit

Permalink
fix(inc984): scrub the correct bucket for sentry.user (#6693)
Browse files Browse the repository at this point in the history
the ` user` field is sent in the sentry_tags dictionary which is
prefixed with sentry automatically in the message processor. thus, a
different attribute column has to be scrubbed

Co-authored-by: Rachel Chen <[email protected]>
  • Loading branch information
xurui-c and Rachel Chen authored Dec 19, 2024
1 parent 6cb2a72 commit 4535ce0
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 7 deletions.
2 changes: 1 addition & 1 deletion snuba/manual_jobs/scrub_users_from_eap_spans.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def _get_query(self, cluster_name: str | None) -> str:
on_cluster = f"ON CLUSTER '{cluster_name}'" if cluster_name else ""
return f"""ALTER TABLE eap_spans_2_local
{on_cluster}
UPDATE `attr_str_2` = mapApply((k, v) -> (k, if(k = 'user' AND startsWith(v, '{_IP_PREFIX}') AND (isIPv4String(substring(v, 4)) OR isIPv6String(substring(v, 4))), 'ip:scrubbed', v)), `attr_str_2`)
UPDATE `attr_str_11` = mapApply((k, v) -> (k, if(k = 'sentry.user' AND startsWith(v, '{_IP_PREFIX}') AND (isIPv4String(substring(v, 4)) OR isIPv6String(substring(v, 4))), 'ip:scrubbed', v)), `attr_str_11`)
WHERE organization_id IN [{organization_ids}]
AND _sort_timestamp >= toDateTime('{start_datetime}')
AND _sort_timestamp < toDateTime('{end_datetime}')"""
Expand Down
12 changes: 6 additions & 6 deletions tests/manual_jobs/test_scrub_users_from_eap_spans.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def test_generate_query() -> None:
job._get_query(None)
== """ALTER TABLE eap_spans_2_local
UPDATE `attr_str_2` = mapApply((k, v) -> (k, if(k = 'user' AND startsWith(v, 'ip:') AND (isIPv4String(substring(v, 4)) OR isIPv6String(substring(v, 4))), 'ip:scrubbed', v)), `attr_str_2`)
UPDATE `attr_str_11` = mapApply((k, v) -> (k, if(k = 'sentry.user' AND startsWith(v, 'ip:') AND (isIPv4String(substring(v, 4)) OR isIPv6String(substring(v, 4))), 'ip:scrubbed', v)), `attr_str_11`)
WHERE organization_id IN [1,3,5,6]
AND _sort_timestamp >= toDateTime('2024-12-01T00:00:00')
AND _sort_timestamp < toDateTime('2024-12-10T00:00:00')"""
Expand Down Expand Up @@ -185,8 +185,6 @@ def _gen_message(
"relay_protocol_version": "3",
"relay_use_post_or_schedule": "True",
"relay_use_post_or_schedule_rejected": "version",
"user.ip": "192.168.0.45",
"user": user,
"spans_over_limit": "False",
"server_name": "blah",
"color": random.choice(["red", "green", "blue"]),
Expand Down Expand Up @@ -219,11 +217,13 @@ def _generate_request(
key=AttributeKey(type=AttributeKey.TYPE_STRING, name="color")
)
),
columns=[Column(key=AttributeKey(type=AttributeKey.TYPE_STRING, name="user"))],
columns=[
Column(key=AttributeKey(type=AttributeKey.TYPE_STRING, name="sentry.user"))
],
order_by=[
TraceItemTableRequest.OrderBy(
column=Column(
key=AttributeKey(type=AttributeKey.TYPE_STRING, name="user")
key=AttributeKey(type=AttributeKey.TYPE_STRING, name="sentry.user")
)
)
],
Expand All @@ -234,7 +234,7 @@ def _generate_expected_response(user: str) -> TraceItemTableResponse:
return TraceItemTableResponse(
column_values=[
TraceItemColumnValues(
attribute_name="user",
attribute_name="sentry.user",
results=[AttributeValue(val_str=user) for _ in range(20)],
)
],
Expand Down

0 comments on commit 4535ce0

Please sign in to comment.