Skip to content

Commit

Permalink
BUG: Update pandas-gbq to latest version of google-cloud-bigquery
Browse files Browse the repository at this point in the history
There was a breaking change in 0.32.0.dev1 which changed the way
configuration for the query job gets loaded. Also, it added the
'description' field to the schema resource, so this change updates the
schema comparison logic to account for that.

Updates the MASTER build in CI to also build with google-cloud-bigquery
at MASTER.
  • Loading branch information
tswast committed Mar 26, 2018
1 parent 8f19fdc commit 9492275
Show file tree
Hide file tree
Showing 7 changed files with 68 additions and 58 deletions.
3 changes: 3 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ install:
conda install -q numpy pytz python-dateutil;
PRE_WHEELS="https://7933911d6844c6c53a7d-47bd50c35cd79bd838daf386af554a83.ssl.cf2.rackcdn.com";
pip install --pre --upgrade --timeout=60 -f $PRE_WHEELS pandas;
pip install -e 'git+https://github.com/GoogleCloudPlatform/google-cloud-python.git#egg=version_subpkg&subdirectory=api_core';
pip install -e 'git+https://github.com/GoogleCloudPlatform/google-cloud-python.git#egg=version_subpkg&subdirectory=core';
pip install -e 'git+https://github.com/GoogleCloudPlatform/google-cloud-python.git#egg=version_subpkg&subdirectory=bigquery';
else
conda install -q pandas=$PANDAS;
fi
Expand Down
2 changes: 1 addition & 1 deletion ci/requirements-3.5-0.18.1.pip
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
google-auth==1.0.2
google-auth-oauthlib==0.0.1
mock
google-cloud-bigquery==0.29.0
google-cloud-bigquery==0.32.0
1 change: 0 additions & 1 deletion ci/requirements-3.6-MASTER.pip
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
google-auth
google-auth-oauthlib
mock
google-cloud-bigquery
4 changes: 3 additions & 1 deletion docs/source/changelog.rst
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
Changelog
=========

0.3.2 / [TBD]
0.4.0 / [TBD]
------------------
- Fix bug with querying for an array of floats (:issue:`123`)
- Fix bug with integer columns on Windows. Explicitly use 64bit integers when converting from BQ types. (:issue:`119`)
- Update ``google-cloud-python`` dependency to version 0.32.0+ (:issue:`TBD`)

0.3.1 / 2018-02-13
------------------
Expand Down
85 changes: 35 additions & 50 deletions pandas_gbq/gbq.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import time
import warnings
from datetime import datetime
from distutils.version import StrictVersion
from time import sleep

import numpy as np
Expand All @@ -23,17 +22,15 @@ def _check_google_client_version():
raise ImportError('Could not import pkg_resources (setuptools).')

# https://github.com/GoogleCloudPlatform/google-cloud-python/blob/master/bigquery/CHANGELOG.md
bigquery_client_minimum_version = '0.29.0'
bigquery_minimum_version = pkg_resources.parse_version('0.32.0.dev1')
bigquery_installed_version = pkg_resources.get_distribution(
'google-cloud-bigquery').parsed_version

_BIGQUERY_CLIENT_VERSION = pkg_resources.get_distribution(
'google-cloud-bigquery').version

if (StrictVersion(_BIGQUERY_CLIENT_VERSION) <
StrictVersion(bigquery_client_minimum_version)):
raise ImportError('pandas-gbq requires google-cloud-bigquery >= {0}, '
'current version {1}'
.format(bigquery_client_minimum_version,
_BIGQUERY_CLIENT_VERSION))
if bigquery_installed_version < bigquery_minimum_version:
raise ImportError(
'pandas-gbq requires google-cloud-bigquery >= {0}, '
'current version {1}'.format(
bigquery_minimum_version, bigquery_installed_version))


def _test_google_api_imports():
Expand Down Expand Up @@ -459,29 +456,23 @@ def run_query(self, query, **kwargs):
}
config = kwargs.get('configuration')
if config is not None:
if len(config) != 1:
raise ValueError("Only one job type must be specified, but "
"given {}".format(','.join(config.keys())))
if 'query' in config:
if 'query' in config['query']:
if query is not None:
raise ValueError("Query statement can't be specified "
"inside config while it is specified "
"as parameter")
query = config['query']['query']
del config['query']['query']

job_config['query'].update(config['query'])
else:
raise ValueError("Only 'query' job type is supported")
job_config.update(config)

if 'query' in config and 'query' in config['query']:
if query is not None:
raise ValueError("Query statement can't be specified "
"inside config while it is specified "
"as parameter")
query = config['query']['query']
del config['query']['query']

self._start_timer()
try:

logger.info('Requesting query... ')
query_reply = self.client.query(
query,
job_config=QueryJobConfig.from_api_repr(job_config['query']))
job_config=QueryJobConfig.from_api_repr(job_config))
logger.info('ok.\nQuery running...')
except (RefreshError, ValueError):
if self.private_key:
Expand Down Expand Up @@ -598,6 +589,15 @@ def schema(self, dataset_id, table_id):
except self.http_error as ex:
self.process_http_error(ex)

def _clean_schema_fields(self, fields):
"""Return a sanitized version of the schema for comparisons."""
fields_sorted = sorted(fields, key=lambda field: field['name'])
# Ignore mode and description when comparing schemas.
return [
{'name': field['name'], 'type': field['type']}
for field in fields_sorted
]

def verify_schema(self, dataset_id, table_id, schema):
"""Indicate whether schemas match exactly
Expand All @@ -621,17 +621,9 @@ def verify_schema(self, dataset_id, table_id, schema):
Whether the schemas match
"""

fields_remote = sorted(self.schema(dataset_id, table_id),
key=lambda x: x['name'])
fields_local = sorted(schema['fields'], key=lambda x: x['name'])

# Ignore mode when comparing schemas.
for field in fields_local:
if 'mode' in field:
del field['mode']
for field in fields_remote:
if 'mode' in field:
del field['mode']
fields_remote = self._clean_schema_fields(
self.schema(dataset_id, table_id))
fields_local = self._clean_schema_fields(schema['fields'])

return fields_remote == fields_local

Expand All @@ -658,16 +650,9 @@ def schema_is_subset(self, dataset_id, table_id, schema):
Whether the passed schema is a subset
"""

fields_remote = self.schema(dataset_id, table_id)
fields_local = schema['fields']

# Ignore mode when comparing schemas.
for field in fields_local:
if 'mode' in field:
del field['mode']
for field in fields_remote:
if 'mode' in field:
del field['mode']
fields_remote = self._clean_schema_fields(
self.schema(dataset_id, table_id))
fields_local = self._clean_schema_fields(schema['fields'])

return all(field in fields_remote for field in fields_local)

Expand Down Expand Up @@ -709,7 +694,7 @@ def _parse_data(schema, rows):
col_names = [str(field['name']) for field in fields]
col_dtypes = [
dtype_map.get(field['type'].upper(), object)
if field['mode'] != 'repeated'
if field['mode'].lower() != 'repeated'
else object
for field in fields
]
Expand Down Expand Up @@ -847,7 +832,7 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
for field in schema['fields']:
if field['type'].upper() in type_map and \
final_df[field['name']].notnull().all() and \
field['mode'] != 'repeated':
field['mode'].lower() != 'repeated':
final_df[field['name']] = \
final_df[field['name']].astype(type_map[field['type'].upper()])

Expand Down
28 changes: 24 additions & 4 deletions pandas_gbq/tests/test_gbq.py
Original file line number Diff line number Diff line change
Expand Up @@ -1266,10 +1266,30 @@ def test_retrieve_schema(self):
test_id = "15"
test_schema = {
'fields': [
{'name': 'A', 'type': 'FLOAT', 'mode': 'NULLABLE'},
{'name': 'B', 'type': 'FLOAT', 'mode': 'NULLABLE'},
{'name': 'C', 'type': 'STRING', 'mode': 'NULLABLE'},
{'name': 'D', 'type': 'TIMESTAMP', 'mode': 'NULLABLE'}
{
'name': 'A',
'type': 'FLOAT',
'mode': 'NULLABLE',
'description': None,
},
{
'name': 'B',
'type': 'FLOAT',
'mode': 'NULLABLE',
'description': None,
},
{
'name': 'C',
'type': 'STRING',
'mode': 'NULLABLE',
'description': None,
},
{
'name': 'D',
'type': 'TIMESTAMP',
'mode': 'NULLABLE',
'description': None,
},
]
}

Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,11 @@ def readme():


INSTALL_REQUIRES = [
'setuptools',
'pandas',
'google-auth>=1.0.0',
'google-auth-oauthlib>=0.0.1',
'google-cloud-bigquery>=0.29.0',
'google-cloud-bigquery>=0.32.0.dev1',
]


Expand Down

0 comments on commit 9492275

Please sign in to comment.