diff --git a/.travis.yml b/.travis.yml index 92129cc6..42378680 100644 --- a/.travis.yml +++ b/.travis.yml @@ -28,6 +28,9 @@ install: conda install -q numpy pytz python-dateutil; PRE_WHEELS="https://7933911d6844c6c53a7d-47bd50c35cd79bd838daf386af554a83.ssl.cf2.rackcdn.com"; pip install --pre --upgrade --timeout=60 -f $PRE_WHEELS pandas; + pip install -e 'git+https://github.com/GoogleCloudPlatform/google-cloud-python.git#egg=version_subpkg&subdirectory=api_core'; + pip install -e 'git+https://github.com/GoogleCloudPlatform/google-cloud-python.git#egg=version_subpkg&subdirectory=core'; + pip install -e 'git+https://github.com/GoogleCloudPlatform/google-cloud-python.git#egg=version_subpkg&subdirectory=bigquery'; else conda install -q pandas=$PANDAS; fi diff --git a/ci/requirements-3.5-0.18.1.pip b/ci/requirements-3.5-0.18.1.pip index f895fb1f..3760d46b 100644 --- a/ci/requirements-3.5-0.18.1.pip +++ b/ci/requirements-3.5-0.18.1.pip @@ -1,4 +1,4 @@ google-auth==1.0.2 google-auth-oauthlib==0.0.1 mock -google-cloud-bigquery==0.29.0 +google-cloud-bigquery==0.32.0 diff --git a/ci/requirements-3.6-MASTER.pip b/ci/requirements-3.6-MASTER.pip index b52f2aeb..78f6834f 100644 --- a/ci/requirements-3.6-MASTER.pip +++ b/ci/requirements-3.6-MASTER.pip @@ -1,4 +1,3 @@ google-auth google-auth-oauthlib mock -google-cloud-bigquery diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index eca6f9ce..d62f714a 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -1,9 +1,11 @@ Changelog ========= -0.3.2 / [TBD] +0.4.0 / [TBD] ------------------ - Fix bug with querying for an array of floats (:issue:`123`) +- Fix bug with integer columns on Windows. Explicitly use 64bit integers when converting from BQ types. (:issue:`119`) +- Update ``google-cloud-python`` dependency to version 0.32.0+ (:issue:`TBD`) 0.3.1 / 2018-02-13 ------------------ diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index 6d5aacf8..3b13a8de 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -4,7 +4,6 @@ import time import warnings from datetime import datetime -from distutils.version import StrictVersion from time import sleep import numpy as np @@ -23,17 +22,15 @@ def _check_google_client_version(): raise ImportError('Could not import pkg_resources (setuptools).') # https://github.com/GoogleCloudPlatform/google-cloud-python/blob/master/bigquery/CHANGELOG.md - bigquery_client_minimum_version = '0.29.0' + bigquery_minimum_version = pkg_resources.parse_version('0.32.0.dev1') + bigquery_installed_version = pkg_resources.get_distribution( + 'google-cloud-bigquery').parsed_version - _BIGQUERY_CLIENT_VERSION = pkg_resources.get_distribution( - 'google-cloud-bigquery').version - - if (StrictVersion(_BIGQUERY_CLIENT_VERSION) < - StrictVersion(bigquery_client_minimum_version)): - raise ImportError('pandas-gbq requires google-cloud-bigquery >= {0}, ' - 'current version {1}' - .format(bigquery_client_minimum_version, - _BIGQUERY_CLIENT_VERSION)) + if bigquery_installed_version < bigquery_minimum_version: + raise ImportError( + 'pandas-gbq requires google-cloud-bigquery >= {0}, ' + 'current version {1}'.format( + bigquery_minimum_version, bigquery_installed_version)) def _test_google_api_imports(): @@ -459,21 +456,15 @@ def run_query(self, query, **kwargs): } config = kwargs.get('configuration') if config is not None: - if len(config) != 1: - raise ValueError("Only one job type must be specified, but " - "given {}".format(','.join(config.keys()))) - if 'query' in config: - if 'query' in config['query']: - if query is not None: - raise ValueError("Query statement can't be specified " - "inside config while it is specified " - "as parameter") - query = config['query']['query'] - del config['query']['query'] - - job_config['query'].update(config['query']) - else: - raise ValueError("Only 'query' job type is supported") + job_config.update(config) + + if 'query' in config and 'query' in config['query']: + if query is not None: + raise ValueError("Query statement can't be specified " + "inside config while it is specified " + "as parameter") + query = config['query']['query'] + del config['query']['query'] self._start_timer() try: @@ -481,7 +472,7 @@ def run_query(self, query, **kwargs): logger.info('Requesting query... ') query_reply = self.client.query( query, - job_config=QueryJobConfig.from_api_repr(job_config['query'])) + job_config=QueryJobConfig.from_api_repr(job_config)) logger.info('ok.\nQuery running...') except (RefreshError, ValueError): if self.private_key: @@ -598,6 +589,15 @@ def schema(self, dataset_id, table_id): except self.http_error as ex: self.process_http_error(ex) + def _clean_schema_fields(self, fields): + """Return a sanitized version of the schema for comparisons.""" + fields_sorted = sorted(fields, key=lambda field: field['name']) + # Ignore mode and description when comparing schemas. + return [ + {'name': field['name'], 'type': field['type']} + for field in fields_sorted + ] + def verify_schema(self, dataset_id, table_id, schema): """Indicate whether schemas match exactly @@ -621,17 +621,9 @@ def verify_schema(self, dataset_id, table_id, schema): Whether the schemas match """ - fields_remote = sorted(self.schema(dataset_id, table_id), - key=lambda x: x['name']) - fields_local = sorted(schema['fields'], key=lambda x: x['name']) - - # Ignore mode when comparing schemas. - for field in fields_local: - if 'mode' in field: - del field['mode'] - for field in fields_remote: - if 'mode' in field: - del field['mode'] + fields_remote = self._clean_schema_fields( + self.schema(dataset_id, table_id)) + fields_local = self._clean_schema_fields(schema['fields']) return fields_remote == fields_local @@ -658,16 +650,9 @@ def schema_is_subset(self, dataset_id, table_id, schema): Whether the passed schema is a subset """ - fields_remote = self.schema(dataset_id, table_id) - fields_local = schema['fields'] - - # Ignore mode when comparing schemas. - for field in fields_local: - if 'mode' in field: - del field['mode'] - for field in fields_remote: - if 'mode' in field: - del field['mode'] + fields_remote = self._clean_schema_fields( + self.schema(dataset_id, table_id)) + fields_local = self._clean_schema_fields(schema['fields']) return all(field in fields_remote for field in fields_local) @@ -709,7 +694,7 @@ def _parse_data(schema, rows): col_names = [str(field['name']) for field in fields] col_dtypes = [ dtype_map.get(field['type'].upper(), object) - if field['mode'] != 'repeated' + if field['mode'].lower() != 'repeated' else object for field in fields ] @@ -847,7 +832,7 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, for field in schema['fields']: if field['type'].upper() in type_map and \ final_df[field['name']].notnull().all() and \ - field['mode'] != 'repeated': + field['mode'].lower() != 'repeated': final_df[field['name']] = \ final_df[field['name']].astype(type_map[field['type'].upper()]) diff --git a/pandas_gbq/tests/test_gbq.py b/pandas_gbq/tests/test_gbq.py index 2df1b9bd..8c74b595 100644 --- a/pandas_gbq/tests/test_gbq.py +++ b/pandas_gbq/tests/test_gbq.py @@ -1266,10 +1266,30 @@ def test_retrieve_schema(self): test_id = "15" test_schema = { 'fields': [ - {'name': 'A', 'type': 'FLOAT', 'mode': 'NULLABLE'}, - {'name': 'B', 'type': 'FLOAT', 'mode': 'NULLABLE'}, - {'name': 'C', 'type': 'STRING', 'mode': 'NULLABLE'}, - {'name': 'D', 'type': 'TIMESTAMP', 'mode': 'NULLABLE'} + { + 'name': 'A', + 'type': 'FLOAT', + 'mode': 'NULLABLE', + 'description': None, + }, + { + 'name': 'B', + 'type': 'FLOAT', + 'mode': 'NULLABLE', + 'description': None, + }, + { + 'name': 'C', + 'type': 'STRING', + 'mode': 'NULLABLE', + 'description': None, + }, + { + 'name': 'D', + 'type': 'TIMESTAMP', + 'mode': 'NULLABLE', + 'description': None, + }, ] } diff --git a/setup.py b/setup.py index a240cf45..6128c23d 100644 --- a/setup.py +++ b/setup.py @@ -17,10 +17,11 @@ def readme(): INSTALL_REQUIRES = [ + 'setuptools', 'pandas', 'google-auth>=1.0.0', 'google-auth-oauthlib>=0.0.1', - 'google-cloud-bigquery>=0.29.0', + 'google-cloud-bigquery>=0.32.0.dev1', ]