diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index 9c02538d..4ec7f804 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -13,6 +13,7 @@ bigquery_storage_v1beta1 = None from pandas_gbq.exceptions import AccessDenied +import pandas_gbq.schema logger = logging.getLogger(__name__) @@ -1269,12 +1270,7 @@ def create(self, table_id, schema): table_ref = self.client.dataset(self.dataset_id).table(table_id) table = Table(table_ref) - # Manually create the schema objects, adding NULLABLE mode - # as a workaround for - # https://github.com/GoogleCloudPlatform/google-cloud-python/issues/4456 - for field in schema["fields"]: - if "mode" not in field: - field["mode"] = "NULLABLE" + schema = pandas_gbq.schema.add_default_nullable_mode(schema) table.schema = [ SchemaField.from_api_repr(field) for field in schema["fields"] diff --git a/pandas_gbq/load.py b/pandas_gbq/load.py index cb190f86..3e9d570e 100644 --- a/pandas_gbq/load.py +++ b/pandas_gbq/load.py @@ -66,12 +66,7 @@ def load_chunks( if schema is None: schema = pandas_gbq.schema.generate_bq_schema(dataframe) - # Manually create the schema objects, adding NULLABLE mode - # as a workaround for - # https://github.com/GoogleCloudPlatform/google-cloud-python/issues/4456 - for field in schema["fields"]: - if "mode" not in field: - field["mode"] = "NULLABLE" + schema = pandas_gbq.schema.add_default_nullable_mode(schema) job_config.schema = [ bigquery.SchemaField.from_api_repr(field) for field in schema["fields"] diff --git a/pandas_gbq/schema.py b/pandas_gbq/schema.py index 91963b7c..bb18fabc 100644 --- a/pandas_gbq/schema.py +++ b/pandas_gbq/schema.py @@ -1,5 +1,7 @@ """Helper methods for BigQuery schemas""" +import copy + def generate_bq_schema(dataframe, default_type="STRING"): """Given a passed dataframe, generate the associated Google BigQuery schema. @@ -62,3 +64,16 @@ def update_schema(schema_old, schema_new): output_fields.append(field) return {"fields": output_fields} + + +def add_default_nullable_mode(schema): + """Manually create the schema objects, adding NULLABLE mode.""" + # Workaround for: + # https://github.com/GoogleCloudPlatform/google-cloud-python/issues/4456 + # + # Returns a copy rather than modifying the mutable arg, + # per Issue #277 + result = copy.deepcopy(schema) + for field in result["fields"]: + field.setdefault("mode", "NULLABLE") + return result diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py index bf609385..f2e52462 100644 --- a/tests/unit/test_gbq.py +++ b/tests/unit/test_gbq.py @@ -1,5 +1,7 @@ # -*- coding: utf-8 -*- +import copy +import datetime from unittest import mock import numpy @@ -477,3 +479,45 @@ def test_generate_bq_schema_deprecated(): with pytest.warns(FutureWarning): df = DataFrame([[1, "two"], [3, "four"]]) gbq.generate_bq_schema(df) + + +def test_load_does_not_modify_schema_arg(): + # Test of Issue # 277 + df = DataFrame( + { + "field1": ["a", "b"], + "field2": [1, 2], + "field3": [datetime.date(2019, 1, 1), datetime.date(2019, 5, 1)], + } + ) + original_schema = [ + {"name": "field1", "type": "STRING", "mode": "REQUIRED"}, + {"name": "field2", "type": "INTEGER"}, + {"name": "field3", "type": "DATE"}, + ] + original_schema_cp = copy.deepcopy(original_schema) + gbq.to_gbq( + df, + "dataset.schematest", + project_id="my-project", + table_schema=original_schema, + if_exists="fail", + ) + assert original_schema == original_schema_cp + + # Test again now that table exists - behavior will differ internally + # branch at if table.exists(table_id) + original_schema = [ + {"name": "field1", "type": "STRING", "mode": "REQUIRED"}, + {"name": "field2", "type": "INTEGER"}, + {"name": "field3", "type": "DATE"}, + ] + original_schema_cp = copy.deepcopy(original_schema) + gbq.to_gbq( + df, + "dataset.schematest", + project_id="my-project", + table_schema=original_schema, + if_exists="append", + ) + assert original_schema == original_schema_cp