diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 5ee5e1850..1eda80712 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -21,6 +21,7 @@ import math import re import os +import warnings from typing import Optional, Union from dateutil import relativedelta @@ -297,12 +298,7 @@ def _record_from_json(value, field): record = {} record_iter = zip(field.fields, value["f"]) for subfield, cell in record_iter: - converter = _CELLDATA_FROM_JSON[subfield.field_type] - if subfield.mode == "REPEATED": - value = [converter(item["v"], subfield) for item in cell["v"]] - else: - value = converter(cell["v"], subfield) - record[subfield.name] = value + record[subfield.name] = _field_from_json(cell["v"], subfield) return record @@ -382,7 +378,11 @@ def _field_to_index_mapping(schema): def _field_from_json(resource, field): - converter = _CELLDATA_FROM_JSON.get(field.field_type, lambda value, _: value) + def default_converter(value, field): + _warn_unknown_field_type(field) + return value + + converter = _CELLDATA_FROM_JSON.get(field.field_type, default_converter) if field.mode == "REPEATED": return [converter(item["v"], field) for item in resource] else: @@ -484,6 +484,11 @@ def _json_to_json(value): return json.dumps(value) +def _string_to_json(value): + """NOOP string -> string coercion""" + return value + + def _timestamp_to_json_parameter(value): """Coerce 'value' to an JSON-compatible representation. @@ -596,6 +601,7 @@ def _range_field_to_json(range_element_type, value): "DATE": _date_to_json, "TIME": _time_to_json, "JSON": _json_to_json, + "STRING": _string_to_json, # Make sure DECIMAL and BIGDECIMAL are handled, even though # requests for them should be converted to NUMERIC. Better safe # than sorry. @@ -609,6 +615,15 @@ def _range_field_to_json(range_element_type, value): _SCALAR_VALUE_TO_JSON_PARAM["TIMESTAMP"] = _timestamp_to_json_parameter +def _warn_unknown_field_type(field): + warnings.warn( + "Unknown type '{}' for field '{}'. Behavior reading and writing this type is not officially supported and may change in the future.".format( + field.field_type, field.name + ), + FutureWarning, + ) + + def _scalar_field_to_json(field, row_value): """Maps a field and value to a JSON-safe value. @@ -621,9 +636,12 @@ def _scalar_field_to_json(field, row_value): Returns: Any: A JSON-serializable object. """ - converter = _SCALAR_VALUE_TO_JSON_ROW.get(field.field_type) - if converter is None: # STRING doesn't need converting - return row_value + + def default_converter(value): + _warn_unknown_field_type(field) + return value + + converter = _SCALAR_VALUE_TO_JSON_ROW.get(field.field_type, default_converter) return converter(row_value) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 8395478fb..c21a02569 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -204,7 +204,9 @@ def bq_to_arrow_field(bq_field, array_type=None): metadata=metadata, ) - warnings.warn("Unable to determine type for field '{}'.".format(bq_field.name)) + warnings.warn( + "Unable to determine Arrow type for field '{}'.".format(bq_field.name) + ) return None diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index 9c59056fd..f1090a7dc 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -591,9 +591,8 @@ def to_api_repr(self) -> dict: Dict: JSON mapping """ value = self.value - converter = _SCALAR_VALUE_TO_JSON_PARAM.get(self.type_) - if converter is not None: - value = converter(value) # type: ignore + converter = _SCALAR_VALUE_TO_JSON_PARAM.get(self.type_, lambda value: value) + value = converter(value) # type: ignore resource: Dict[str, Any] = { "parameterType": {"type": self.type_}, "parameterValue": {"value": value}, @@ -748,9 +747,10 @@ def to_api_repr(self) -> dict: else: a_type = self.array_type.to_api_repr() - converter = _SCALAR_VALUE_TO_JSON_PARAM.get(a_type["type"]) - if converter is not None: - values = [converter(value) for value in values] # type: ignore + converter = _SCALAR_VALUE_TO_JSON_PARAM.get( + a_type["type"], lambda value: value + ) + values = [converter(value) for value in values] # type: ignore a_values = [{"value": value} for value in values] resource = { @@ -792,7 +792,7 @@ def __repr__(self): class StructQueryParameter(_AbstractQueryParameter): - """Named / positional query parameters for struct values. + """Name / positional query parameters for struct values. Args: name (Optional[str]): @@ -897,10 +897,8 @@ def to_api_repr(self) -> dict: values[name] = repr_["parameterValue"] else: s_types[name] = {"name": name, "type": {"type": type_}} - converter = _SCALAR_VALUE_TO_JSON_PARAM.get(type_) - if converter is not None: - value = converter(value) # type: ignore - values[name] = {"value": value} + converter = _SCALAR_VALUE_TO_JSON_PARAM.get(type_, lambda value: value) + values[name] = {"value": converter(value)} resource = { "parameterType": { diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index 1bf21479f..0a307498f 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -17,6 +17,7 @@ import decimal import json import os +import warnings import pytest import packaging import unittest @@ -640,6 +641,17 @@ def test_w_single_scalar_column(self): row = {"f": [{"v": "1"}]} self.assertEqual(self._call_fut(row, schema=[col]), (1,)) + def test_w_unknown_type(self): + # SELECT 1 AS col + col = _Field("REQUIRED", "col", "UNKNOWN") + row = {"f": [{"v": "1"}]} + with warnings.catch_warnings(record=True) as warned: + self.assertEqual(self._call_fut(row, schema=[col]), ("1",)) + self.assertEqual(len(warned), 1) + warning = warned[0] + self.assertTrue("UNKNOWN" in str(warning)) + self.assertTrue("col" in str(warning)) + def test_w_single_scalar_geography_column(self): # SELECT 1 AS col col = _Field("REQUIRED", "geo", "GEOGRAPHY") @@ -660,6 +672,17 @@ def test_w_single_array_column(self): row = {"f": [{"v": [{"v": "1"}, {"v": "2"}, {"v": "3"}]}]} self.assertEqual(self._call_fut(row, schema=[col]), ([1, 2, 3],)) + def test_w_unknown_type_repeated(self): + # SELECT 1 AS col + col = _Field("REPEATED", "col", "UNKNOWN") + row = {"f": [{"v": [{"v": "1"}, {"v": "2"}, {"v": "3"}]}]} + with warnings.catch_warnings(record=True) as warned: + self.assertEqual(self._call_fut(row, schema=[col]), (["1", "2", "3"],)) + self.assertEqual(len(warned), 1) + warning = warned[0] + self.assertTrue("UNKNOWN" in str(warning)) + self.assertTrue("col" in str(warning)) + def test_w_struct_w_nested_array_column(self): # SELECT ([1, 2], 3, [4, 5]) as col first = _Field("REPEATED", "first", "INTEGER") @@ -684,6 +707,39 @@ def test_w_struct_w_nested_array_column(self): ({"first": [1, 2], "second": 3, "third": [4, 5]},), ) + def test_w_unknown_type_subfield(self): + # SELECT [(1, 2, 3), (4, 5, 6)] as col + first = _Field("REPEATED", "first", "UNKNOWN1") + second = _Field("REQUIRED", "second", "UNKNOWN2") + third = _Field("REPEATED", "third", "INTEGER") + col = _Field("REQUIRED", "col", "RECORD", fields=[first, second, third]) + row = { + "f": [ + { + "v": { + "f": [ + {"v": [{"v": "1"}, {"v": "2"}]}, + {"v": "3"}, + {"v": [{"v": "4"}, {"v": "5"}]}, + ] + } + } + ] + } + with warnings.catch_warnings(record=True) as warned: + self.assertEqual( + self._call_fut(row, schema=[col]), + ({"first": ["1", "2"], "second": "3", "third": [4, 5]},), + ) + self.assertEqual(len(warned), 2) # 1 warning per unknown field. + warned = [str(warning) for warning in warned] + self.assertTrue( + any(["first" in warning and "UNKNOWN1" in warning for warning in warned]) + ) + self.assertTrue( + any(["second" in warning and "UNKNOWN2" in warning for warning in warned]) + ) + def test_w_array_of_struct(self): # SELECT [(1, 2, 3), (4, 5, 6)] as col first = _Field("REQUIRED", "first", "INTEGER") @@ -1076,8 +1132,12 @@ def _call_fut(self, field, value): def test_w_unknown_field_type(self): field = _make_field("UNKNOWN") original = object() - converted = self._call_fut(field, original) + with warnings.catch_warnings(record=True) as warned: + converted = self._call_fut(field, original) self.assertIs(converted, original) + self.assertEqual(len(warned), 1) + warning = warned[0] + self.assertTrue("UNKNOWN" in str(warning)) def test_w_known_field_type(self): field = _make_field("INT64") diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index 7c36eb75b..40ef080f7 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -1780,6 +1780,25 @@ def test_to_api_repr_w_nested_struct(self): param = self._make_one("foo", scalar_1, sub) self.assertEqual(param.to_api_repr(), EXPECTED) + def test_to_api_repr_w_unknown_type(self): + EXPECTED = { + "name": "foo", + "parameterType": { + "type": "STRUCT", + "structTypes": [ + {"name": "bar", "type": {"type": "INT64"}}, + {"name": "baz", "type": {"type": "UNKNOWN_TYPE"}}, + ], + }, + "parameterValue": { + "structValues": {"bar": {"value": "123"}, "baz": {"value": "abc"}} + }, + } + sub_1 = _make_subparam("bar", "INT64", 123) + sub_2 = _make_subparam("baz", "UNKNOWN_TYPE", "abc") + param = self._make_one("foo", sub_1, sub_2) + self.assertEqual(param.to_api_repr(), EXPECTED) + def test___eq___wrong_type(self): field = self._make_one("test", _make_subparam("bar", "STRING", "abc")) other = object() diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 7a97c7b78..d6febcfb1 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -2751,9 +2751,9 @@ def test_to_arrow_w_unknown_type(self): self.assertEqual(ages, [33, 29]) self.assertEqual(sports, ["volleyball", "basketball"]) - self.assertEqual(len(warned), 1) - warning = warned[0] - self.assertTrue("sport" in str(warning)) + # Expect warning from both the arrow conversion, and the json deserialization. + self.assertEqual(len(warned), 2) + self.assertTrue(all("sport" in str(warning) for warning in warned)) def test_to_arrow_w_empty_table(self): pyarrow = pytest.importorskip(