From 949227532dfa1cf07e44808454034d85312a2d49 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Thu, 22 Mar 2018 15:55:39 -0700
Subject: [PATCH] BUG: Update pandas-gbq to latest version of
 google-cloud-bigquery

There was a breaking change in 0.32.0.dev1 which changed the way
configuration for the query job gets loaded. Also, it added the
'description' field to the schema resource, so this change updates the
schema comparison logic to account for that.

Updates the MASTER build in CI to also build with google-cloud-bigquery
at MASTER.
---
 .travis.yml                    |  3 ++
 ci/requirements-3.5-0.18.1.pip |  2 +-
 ci/requirements-3.6-MASTER.pip |  1 -
 docs/source/changelog.rst      |  4 +-
 pandas_gbq/gbq.py              | 85 ++++++++++++++--------------------
 pandas_gbq/tests/test_gbq.py   | 28 +++++++++--
 setup.py                       |  3 +-
 7 files changed, 68 insertions(+), 58 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 92129cc6..42378680 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -28,6 +28,9 @@ install:
       conda install -q numpy pytz python-dateutil;
       PRE_WHEELS="https://7933911d6844c6c53a7d-47bd50c35cd79bd838daf386af554a83.ssl.cf2.rackcdn.com";
       pip install --pre --upgrade --timeout=60 -f $PRE_WHEELS pandas;
+      pip install -e 'git+https://github.com/GoogleCloudPlatform/google-cloud-python.git#egg=version_subpkg&subdirectory=api_core';
+      pip install -e 'git+https://github.com/GoogleCloudPlatform/google-cloud-python.git#egg=version_subpkg&subdirectory=core';
+      pip install -e 'git+https://github.com/GoogleCloudPlatform/google-cloud-python.git#egg=version_subpkg&subdirectory=bigquery';
     else
       conda install -q pandas=$PANDAS;
     fi
diff --git a/ci/requirements-3.5-0.18.1.pip b/ci/requirements-3.5-0.18.1.pip
index f895fb1f..3760d46b 100644
--- a/ci/requirements-3.5-0.18.1.pip
+++ b/ci/requirements-3.5-0.18.1.pip
@@ -1,4 +1,4 @@
 google-auth==1.0.2
 google-auth-oauthlib==0.0.1
 mock
-google-cloud-bigquery==0.29.0
+google-cloud-bigquery==0.32.0
diff --git a/ci/requirements-3.6-MASTER.pip b/ci/requirements-3.6-MASTER.pip
index b52f2aeb..78f6834f 100644
--- a/ci/requirements-3.6-MASTER.pip
+++ b/ci/requirements-3.6-MASTER.pip
@@ -1,4 +1,3 @@
 google-auth
 google-auth-oauthlib
 mock
-google-cloud-bigquery
diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst
index eca6f9ce..d62f714a 100644
--- a/docs/source/changelog.rst
+++ b/docs/source/changelog.rst
@@ -1,9 +1,11 @@
 Changelog
 =========
 
-0.3.2 / [TBD]
+0.4.0 / [TBD]
 ------------------
 - Fix bug with querying for an array of floats (:issue:`123`)
+- Fix bug with integer columns on Windows. Explicitly use 64bit integers when converting from BQ types. (:issue:`119`)
+- Update ``google-cloud-python`` dependency to version 0.32.0+ (:issue:`TBD`)
 
 0.3.1 / 2018-02-13
 ------------------
diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 6d5aacf8..3b13a8de 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -4,7 +4,6 @@
 import time
 import warnings
 from datetime import datetime
-from distutils.version import StrictVersion
 from time import sleep
 
 import numpy as np
@@ -23,17 +22,15 @@ def _check_google_client_version():
         raise ImportError('Could not import pkg_resources (setuptools).')
 
     # https://github.com/GoogleCloudPlatform/google-cloud-python/blob/master/bigquery/CHANGELOG.md
-    bigquery_client_minimum_version = '0.29.0'
+    bigquery_minimum_version = pkg_resources.parse_version('0.32.0.dev1')
+    bigquery_installed_version = pkg_resources.get_distribution(
+        'google-cloud-bigquery').parsed_version
 
-    _BIGQUERY_CLIENT_VERSION = pkg_resources.get_distribution(
-        'google-cloud-bigquery').version
-
-    if (StrictVersion(_BIGQUERY_CLIENT_VERSION) <
-            StrictVersion(bigquery_client_minimum_version)):
-        raise ImportError('pandas-gbq requires google-cloud-bigquery >= {0}, '
-                          'current version {1}'
-                          .format(bigquery_client_minimum_version,
-                                  _BIGQUERY_CLIENT_VERSION))
+    if bigquery_installed_version < bigquery_minimum_version:
+        raise ImportError(
+            'pandas-gbq requires google-cloud-bigquery >= {0}, '
+            'current version {1}'.format(
+                bigquery_minimum_version, bigquery_installed_version))
 
 
 def _test_google_api_imports():
@@ -459,21 +456,15 @@ def run_query(self, query, **kwargs):
         }
         config = kwargs.get('configuration')
         if config is not None:
-            if len(config) != 1:
-                raise ValueError("Only one job type must be specified, but "
-                                 "given {}".format(','.join(config.keys())))
-            if 'query' in config:
-                if 'query' in config['query']:
-                    if query is not None:
-                        raise ValueError("Query statement can't be specified "
-                                         "inside config while it is specified "
-                                         "as parameter")
-                    query = config['query']['query']
-                    del config['query']['query']
-
-                job_config['query'].update(config['query'])
-            else:
-                raise ValueError("Only 'query' job type is supported")
+            job_config.update(config)
+
+            if 'query' in config and 'query' in config['query']:
+                if query is not None:
+                    raise ValueError("Query statement can't be specified "
+                                     "inside config while it is specified "
+                                     "as parameter")
+                query = config['query']['query']
+                del config['query']['query']
 
         self._start_timer()
         try:
@@ -481,7 +472,7 @@ def run_query(self, query, **kwargs):
             logger.info('Requesting query... ')
             query_reply = self.client.query(
                 query,
-                job_config=QueryJobConfig.from_api_repr(job_config['query']))
+                job_config=QueryJobConfig.from_api_repr(job_config))
             logger.info('ok.\nQuery running...')
         except (RefreshError, ValueError):
             if self.private_key:
@@ -598,6 +589,15 @@ def schema(self, dataset_id, table_id):
         except self.http_error as ex:
             self.process_http_error(ex)
 
+    def _clean_schema_fields(self, fields):
+        """Return a sanitized version of the schema for comparisons."""
+        fields_sorted = sorted(fields, key=lambda field: field['name'])
+        # Ignore mode and description when comparing schemas.
+        return [
+            {'name': field['name'], 'type': field['type']}
+            for field in fields_sorted
+        ]
+
     def verify_schema(self, dataset_id, table_id, schema):
         """Indicate whether schemas match exactly
 
@@ -621,17 +621,9 @@ def verify_schema(self, dataset_id, table_id, schema):
             Whether the schemas match
         """
 
-        fields_remote = sorted(self.schema(dataset_id, table_id),
-                               key=lambda x: x['name'])
-        fields_local = sorted(schema['fields'], key=lambda x: x['name'])
-
-        # Ignore mode when comparing schemas.
-        for field in fields_local:
-            if 'mode' in field:
-                del field['mode']
-        for field in fields_remote:
-            if 'mode' in field:
-                del field['mode']
+        fields_remote = self._clean_schema_fields(
+            self.schema(dataset_id, table_id))
+        fields_local = self._clean_schema_fields(schema['fields'])
 
         return fields_remote == fields_local
 
@@ -658,16 +650,9 @@ def schema_is_subset(self, dataset_id, table_id, schema):
             Whether the passed schema is a subset
         """
 
-        fields_remote = self.schema(dataset_id, table_id)
-        fields_local = schema['fields']
-
-        # Ignore mode when comparing schemas.
-        for field in fields_local:
-            if 'mode' in field:
-                del field['mode']
-        for field in fields_remote:
-            if 'mode' in field:
-                del field['mode']
+        fields_remote = self._clean_schema_fields(
+            self.schema(dataset_id, table_id))
+        fields_local = self._clean_schema_fields(schema['fields'])
 
         return all(field in fields_remote for field in fields_local)
 
@@ -709,7 +694,7 @@ def _parse_data(schema, rows):
     col_names = [str(field['name']) for field in fields]
     col_dtypes = [
         dtype_map.get(field['type'].upper(), object)
-        if field['mode'] != 'repeated'
+        if field['mode'].lower() != 'repeated'
         else object
         for field in fields
     ]
@@ -847,7 +832,7 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
     for field in schema['fields']:
         if field['type'].upper() in type_map and \
                 final_df[field['name']].notnull().all() and \
-                field['mode'] != 'repeated':
+                field['mode'].lower() != 'repeated':
             final_df[field['name']] = \
                 final_df[field['name']].astype(type_map[field['type'].upper()])
 
diff --git a/pandas_gbq/tests/test_gbq.py b/pandas_gbq/tests/test_gbq.py
index 2df1b9bd..8c74b595 100644
--- a/pandas_gbq/tests/test_gbq.py
+++ b/pandas_gbq/tests/test_gbq.py
@@ -1266,10 +1266,30 @@ def test_retrieve_schema(self):
         test_id = "15"
         test_schema = {
             'fields': [
-                {'name': 'A', 'type': 'FLOAT', 'mode': 'NULLABLE'},
-                {'name': 'B', 'type': 'FLOAT', 'mode': 'NULLABLE'},
-                {'name': 'C', 'type': 'STRING', 'mode': 'NULLABLE'},
-                {'name': 'D', 'type': 'TIMESTAMP', 'mode': 'NULLABLE'}
+                {
+                    'name': 'A',
+                    'type': 'FLOAT',
+                    'mode': 'NULLABLE',
+                    'description': None,
+                },
+                {
+                    'name': 'B',
+                    'type': 'FLOAT',
+                    'mode': 'NULLABLE',
+                    'description': None,
+                },
+                {
+                    'name': 'C',
+                    'type': 'STRING',
+                    'mode': 'NULLABLE',
+                    'description': None,
+                },
+                {
+                    'name': 'D',
+                    'type': 'TIMESTAMP',
+                    'mode': 'NULLABLE',
+                    'description': None,
+                },
             ]
         }
 
diff --git a/setup.py b/setup.py
index a240cf45..6128c23d 100644
--- a/setup.py
+++ b/setup.py
@@ -17,10 +17,11 @@ def readme():
 
 
 INSTALL_REQUIRES = [
+    'setuptools',
     'pandas',
     'google-auth>=1.0.0',
     'google-auth-oauthlib>=0.0.1',
-    'google-cloud-bigquery>=0.29.0',
+    'google-cloud-bigquery>=0.32.0.dev1',
 ]