Skip to content

Commit

Permalink
everything works
Browse files Browse the repository at this point in the history
  • Loading branch information
Rachel Chen authored and Rachel Chen committed Dec 16, 2024
1 parent ebab5ff commit 0a9d96b
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 25 deletions.
19 changes: 0 additions & 19 deletions snuba/manual_jobs/scrub_ips_from_eap_spans.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,27 +42,8 @@ def execute(self, logger: JobLogger) -> None:
else:
cluster_name = None
query = self._get_query(cluster_name)
print("queryyyy", query)
logger.info("Executing query: {query}")
result = connection.execute(query=query, settings={"mutations_sync": 0})

# this shows all the user.ip are scrubbed, so the mapping works
print(
"mapApplyresult",
connection.execute(
query="SELECT mapApply((k, v) -> (k, if(k = 'user.ip', 'scrubbed', v)), `attr_str_1`) from eap_spans_2_local",
settings={"mutations_sync": 0},
),
)

# this shows the table is not updated even though the mapping is correct
print(
"selectresult",
connection.execute(
query="SELECT attr_str_1 from eap_spans_2_local LIMIT 1",
settings={"mutations_sync": 0},
),
)

logger.info("complete")
logger.info(repr(result))
1 change: 0 additions & 1 deletion snuba/web/db_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,6 @@ def execute_query_with_readthrough_caching(
clickhouse_query_settings["query_id"] = query_id
if span:
span.set_data("query_id", query_id)

return execute_query(
clickhouse_query,
query_settings,
Expand Down
16 changes: 11 additions & 5 deletions tests/manual_jobs/test_scrub_ips_from_eap_spans.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,10 +198,13 @@ def _gen_message(
}


def _generate_request(ts: Any, hour_ago: int) -> TraceItemTableRequest:
def _generate_request(
ts: Any, hour_ago: int, project_ids: list[int]
) -> TraceItemTableRequest:
# project_ids is added as an argument to avoid this query getting cached
return TraceItemTableRequest(
meta=RequestMeta(
project_ids=[1, 2, 3],
project_ids=project_ids,
organization_id=1,
cogs_category="something",
referrer="something",
Expand Down Expand Up @@ -239,8 +242,9 @@ def test_span_is_scrubbed() -> None:

ts = Timestamp(seconds=int(BASE_TIME.timestamp()))
hour_ago = int((BASE_TIME - timedelta(hours=1)).timestamp())
message = _generate_request(ts, hour_ago)
response = EndpointTraceItemTable().execute(message)
response = EndpointTraceItemTable().execute(
_generate_request(ts, hour_ago, [1, 2, 3])
)
expected_response = TraceItemTableResponse(
column_values=[
TraceItemColumnValues(
Expand Down Expand Up @@ -271,7 +275,9 @@ def test_span_is_scrubbed() -> None:
)
)

print(response)
response = EndpointTraceItemTable().execute(
_generate_request(ts, hour_ago, [3, 2, 1])
)
assert response == TraceItemTableResponse(
column_values=[
TraceItemColumnValues(
Expand Down

0 comments on commit 0a9d96b

Please sign in to comment.