Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ensure table_schema arg is not modified inplace #278

Merged
merged 1 commit into from
May 29, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 2 additions & 6 deletions pandas_gbq/gbq.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
bigquery_storage_v1beta1 = None

from pandas_gbq.exceptions import AccessDenied
import pandas_gbq.schema

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -1269,12 +1270,7 @@ def create(self, table_id, schema):
table_ref = self.client.dataset(self.dataset_id).table(table_id)
table = Table(table_ref)

# Manually create the schema objects, adding NULLABLE mode
# as a workaround for
# https://github.com/GoogleCloudPlatform/google-cloud-python/issues/4456
for field in schema["fields"]:
if "mode" not in field:
field["mode"] = "NULLABLE"
schema = pandas_gbq.schema.add_default_nullable_mode(schema)

table.schema = [
SchemaField.from_api_repr(field) for field in schema["fields"]
Expand Down
7 changes: 1 addition & 6 deletions pandas_gbq/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,12 +66,7 @@ def load_chunks(
if schema is None:
schema = pandas_gbq.schema.generate_bq_schema(dataframe)

# Manually create the schema objects, adding NULLABLE mode
# as a workaround for
# https://github.com/GoogleCloudPlatform/google-cloud-python/issues/4456
for field in schema["fields"]:
if "mode" not in field:
field["mode"] = "NULLABLE"
schema = pandas_gbq.schema.add_default_nullable_mode(schema)

job_config.schema = [
bigquery.SchemaField.from_api_repr(field) for field in schema["fields"]
Expand Down
15 changes: 15 additions & 0 deletions pandas_gbq/schema.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
"""Helper methods for BigQuery schemas"""

import copy


def generate_bq_schema(dataframe, default_type="STRING"):
"""Given a passed dataframe, generate the associated Google BigQuery schema.
Expand Down Expand Up @@ -62,3 +64,16 @@ def update_schema(schema_old, schema_new):
output_fields.append(field)

return {"fields": output_fields}


def add_default_nullable_mode(schema):
"""Manually create the schema objects, adding NULLABLE mode."""
# Workaround for:
# https://github.com/GoogleCloudPlatform/google-cloud-python/issues/4456
#
# Returns a copy rather than modifying the mutable arg,
# per Issue #277
result = copy.deepcopy(schema)
for field in result["fields"]:
field.setdefault("mode", "NULLABLE")
return result
44 changes: 44 additions & 0 deletions tests/unit/test_gbq.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# -*- coding: utf-8 -*-

import copy
import datetime
from unittest import mock

import numpy
Expand Down Expand Up @@ -477,3 +479,45 @@ def test_generate_bq_schema_deprecated():
with pytest.warns(FutureWarning):
df = DataFrame([[1, "two"], [3, "four"]])
gbq.generate_bq_schema(df)


def test_load_does_not_modify_schema_arg():
# Test of Issue # 277
df = DataFrame(
{
"field1": ["a", "b"],
"field2": [1, 2],
"field3": [datetime.date(2019, 1, 1), datetime.date(2019, 5, 1)],
}
)
original_schema = [
{"name": "field1", "type": "STRING", "mode": "REQUIRED"},
{"name": "field2", "type": "INTEGER"},
{"name": "field3", "type": "DATE"},
]
original_schema_cp = copy.deepcopy(original_schema)
gbq.to_gbq(
df,
"dataset.schematest",
project_id="my-project",
table_schema=original_schema,
if_exists="fail",
)
assert original_schema == original_schema_cp

# Test again now that table exists - behavior will differ internally
# branch at if table.exists(table_id)
original_schema = [
{"name": "field1", "type": "STRING", "mode": "REQUIRED"},
{"name": "field2", "type": "INTEGER"},
{"name": "field3", "type": "DATE"},
]
bsolomon1124 marked this conversation as resolved.
Show resolved Hide resolved
original_schema_cp = copy.deepcopy(original_schema)
gbq.to_gbq(
df,
"dataset.schematest",
project_id="my-project",
table_schema=original_schema,
if_exists="append",
)
assert original_schema == original_schema_cp