Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: add warning when encountering unknown field types #1989

Merged
merged 15 commits into from
Aug 13, 2024
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 25 additions & 8 deletions google/cloud/bigquery/_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import math
import re
import os
import warnings
from typing import Optional, Union

from dateutil import relativedelta
Expand Down Expand Up @@ -297,12 +298,7 @@ def _record_from_json(value, field):
record = {}
record_iter = zip(field.fields, value["f"])
for subfield, cell in record_iter:
converter = _CELLDATA_FROM_JSON[subfield.field_type]
if subfield.mode == "REPEATED":
value = [converter(item["v"], subfield) for item in cell["v"]]
else:
value = converter(cell["v"], subfield)
record[subfield.name] = value
record[subfield.name] = _field_from_json(cell["v"], subfield)
return record


Expand Down Expand Up @@ -382,7 +378,16 @@ def _field_to_index_mapping(schema):


def _field_from_json(resource, field):
converter = _CELLDATA_FROM_JSON.get(field.field_type, lambda value, _: value)
def default_converter(value, field):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we can move this function to outside, so we can reuse it for _scalar_field_to_json() as well?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I ended up just moving the warning itself to a new function, since the converter in _scalar_field_to_json has a different function signature.

warnings.warn(
"Unknown type '{}' for field '{}'. Behavior reading this type is not officially supported and may change in the future.".format(
field.field_type, field.name
),
FutureWarning,
)
return value

converter = _CELLDATA_FROM_JSON.get(field.field_type, default_converter)
if field.mode == "REPEATED":
return [converter(item["v"], field) for item in resource]
else:
Expand Down Expand Up @@ -484,6 +489,11 @@ def _json_to_json(value):
return json.dumps(value)


def _string_to_json(value):
"""NOOP string -> string coercion"""
return value


def _timestamp_to_json_parameter(value):
"""Coerce 'value' to an JSON-compatible representation.

Expand Down Expand Up @@ -596,6 +606,7 @@ def _range_field_to_json(range_element_type, value):
"DATE": _date_to_json,
"TIME": _time_to_json,
"JSON": _json_to_json,
"STRING": _string_to_json,
# Make sure DECIMAL and BIGDECIMAL are handled, even though
# requests for them should be converted to NUMERIC. Better safe
# than sorry.
Expand All @@ -622,7 +633,13 @@ def _scalar_field_to_json(field, row_value):
Any: A JSON-serializable object.
"""
converter = _SCALAR_VALUE_TO_JSON_ROW.get(field.field_type)
if converter is None: # STRING doesn't need converting
if converter is None:
warnings.warn(
"Unknown type '{}' for field '{}'. Behavior writing this type is not officially supported and may change in the future.".format(
field.field_type, field.name
),
FutureWarning,
)
return row_value
return converter(row_value)

Expand Down
4 changes: 3 additions & 1 deletion google/cloud/bigquery/_pandas_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,9 @@ def bq_to_arrow_field(bq_field, array_type=None):
metadata=metadata,
)

warnings.warn("Unable to determine type for field '{}'.".format(bq_field.name))
warnings.warn(
"Unable to determine Arrow type for field '{}'.".format(bq_field.name)
)
return None


Expand Down
62 changes: 61 additions & 1 deletion tests/unit/test__helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import decimal
import json
import os
import warnings
import pytest
import packaging
import unittest
Expand Down Expand Up @@ -640,6 +641,17 @@ def test_w_single_scalar_column(self):
row = {"f": [{"v": "1"}]}
self.assertEqual(self._call_fut(row, schema=[col]), (1,))

def test_w_unknown_type(self):
# SELECT 1 AS col
col = _Field("REQUIRED", "col", "UNKNOWN")
row = {"f": [{"v": "1"}]}
with warnings.catch_warnings(record=True) as warned:
self.assertEqual(self._call_fut(row, schema=[col]), ("1",))
self.assertEqual(len(warned), 1)
warning = warned[0]
self.assertTrue("UNKNOWN" in str(warning))
self.assertTrue("col" in str(warning))

def test_w_single_scalar_geography_column(self):
# SELECT 1 AS col
col = _Field("REQUIRED", "geo", "GEOGRAPHY")
Expand All @@ -660,6 +672,17 @@ def test_w_single_array_column(self):
row = {"f": [{"v": [{"v": "1"}, {"v": "2"}, {"v": "3"}]}]}
self.assertEqual(self._call_fut(row, schema=[col]), ([1, 2, 3],))

def test_w_unknown_type_repeated(self):
# SELECT 1 AS col
col = _Field("REPEATED", "col", "UNKNOWN")
row = {"f": [{"v": [{"v": "1"}, {"v": "2"}, {"v": "3"}]}]}
with warnings.catch_warnings(record=True) as warned:
self.assertEqual(self._call_fut(row, schema=[col]), (["1", "2", "3"],))
self.assertEqual(len(warned), 1)
warning = warned[0]
self.assertTrue("UNKNOWN" in str(warning))
self.assertTrue("col" in str(warning))

def test_w_struct_w_nested_array_column(self):
# SELECT ([1, 2], 3, [4, 5]) as col
first = _Field("REPEATED", "first", "INTEGER")
Expand All @@ -684,6 +707,39 @@ def test_w_struct_w_nested_array_column(self):
({"first": [1, 2], "second": 3, "third": [4, 5]},),
)

def test_w_unknown_type_subfield(self):
# SELECT [(1, 2, 3), (4, 5, 6)] as col
first = _Field("REPEATED", "first", "UNKNOWN1")
second = _Field("REQUIRED", "second", "UNKNOWN2")
third = _Field("REPEATED", "third", "INTEGER")
col = _Field("REQUIRED", "col", "RECORD", fields=[first, second, third])
row = {
"f": [
{
"v": {
"f": [
{"v": [{"v": "1"}, {"v": "2"}]},
{"v": "3"},
{"v": [{"v": "4"}, {"v": "5"}]},
]
}
}
]
}
with warnings.catch_warnings(record=True) as warned:
self.assertEqual(
self._call_fut(row, schema=[col]),
({"first": ["1", "2"], "second": "3", "third": [4, 5]},),
)
self.assertEqual(len(warned), 2) # 1 warning per unknown field.
warned = [str(warning) for warning in warned]
self.assertTrue(
any("first" in warning and "UNKNOWN1" in warning for warning in warned)
)
self.assertTrue(
any("second" in warning and "UNKNOWN2" in warning for warning in warned)
)

def test_w_array_of_struct(self):
# SELECT [(1, 2, 3), (4, 5, 6)] as col
first = _Field("REQUIRED", "first", "INTEGER")
Expand Down Expand Up @@ -1076,8 +1132,12 @@ def _call_fut(self, field, value):
def test_w_unknown_field_type(self):
field = _make_field("UNKNOWN")
original = object()
converted = self._call_fut(field, original)
with warnings.catch_warnings(record=True) as warned:
converted = self._call_fut(field, original)
self.assertIs(converted, original)
self.assertEqual(len(warned), 1)
warning = warned[0]
self.assertTrue("UNKNOWN" in str(warning))

def test_w_known_field_type(self):
field = _make_field("INT64")
Expand Down
6 changes: 3 additions & 3 deletions tests/unit/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -2751,9 +2751,9 @@ def test_to_arrow_w_unknown_type(self):
self.assertEqual(ages, [33, 29])
self.assertEqual(sports, ["volleyball", "basketball"])

self.assertEqual(len(warned), 1)
warning = warned[0]
self.assertTrue("sport" in str(warning))
# Expect warning from both the arrow conversion, and the json deserialization.
self.assertEqual(len(warned), 2)
self.assertTrue(all("sport" in str(warning) for warning in warned))

def test_to_arrow_w_empty_table(self):
pyarrow = pytest.importorskip(
Expand Down