diff --git a/pandas_gbq/features.py b/pandas_gbq/features.py index 4b70a14a..d2fc33cb 100644 --- a/pandas_gbq/features.py +++ b/pandas_gbq/features.py @@ -5,11 +5,7 @@ """Module for checking dependency versions and supported features.""" # https://github.com/googleapis/python-bigquery/blob/master/CHANGELOG.md -BIGQUERY_MINIMUM_VERSION = "1.27.2" -BIGQUERY_ACCURATE_TIMESTAMP_VERSION = "2.6.0" -BIGQUERY_FROM_DATAFRAME_CSV_VERSION = "2.6.0" -BIGQUERY_SUPPORTS_BIGNUMERIC_VERSION = "2.10.0" -BIGQUERY_NO_DATE_AS_OBJECT_VERSION = "3.0.0dev" +BIGQUERY_MINIMUM_VERSION = "3.3.5" PANDAS_VERBOSITY_DEPRECATION_VERSION = "0.23.0" PANDAS_BOOLEAN_DTYPE_VERSION = "1.0.0" PANDAS_PARQUET_LOSSLESS_TIMESTAMP_VERSION = "1.1.0" @@ -31,9 +27,15 @@ def bigquery_installed_version(self): self._bigquery_installed_version = packaging.version.parse( google.cloud.bigquery.__version__ ) + return self._bigquery_installed_version + + def bigquery_try_import(self): + import google.cloud.bigquery + import packaging.version + bigquery_minimum_version = packaging.version.parse(BIGQUERY_MINIMUM_VERSION) - if self._bigquery_installed_version < bigquery_minimum_version: + if self.bigquery_installed_version < bigquery_minimum_version: raise ImportError( "pandas-gbq requires google-cloud-bigquery >= {0}, " "current version {1}".format( @@ -41,37 +43,7 @@ def bigquery_installed_version(self): ) ) - return self._bigquery_installed_version - - @property - def bigquery_has_accurate_timestamp(self): - import packaging.version - - min_version = packaging.version.parse(BIGQUERY_ACCURATE_TIMESTAMP_VERSION) - return self.bigquery_installed_version >= min_version - - @property - def bigquery_has_bignumeric(self): - import packaging.version - - min_version = packaging.version.parse(BIGQUERY_SUPPORTS_BIGNUMERIC_VERSION) - return self.bigquery_installed_version >= min_version - - @property - def bigquery_has_from_dataframe_with_csv(self): - import packaging.version - - bigquery_from_dataframe_version = packaging.version.parse( - BIGQUERY_FROM_DATAFRAME_CSV_VERSION - ) - return self.bigquery_installed_version >= bigquery_from_dataframe_version - - @property - def bigquery_needs_date_as_object(self): - import packaging.version - - max_version = packaging.version.parse(BIGQUERY_NO_DATE_AS_OBJECT_VERSION) - return self.bigquery_installed_version < max_version + return google.cloud.bigquery @property def pandas_installed_version(self): diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index dbb9e5b5..d090e287 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -367,9 +367,9 @@ def sizeof_fmt(num, suffix="B"): def get_client(self): import google.api_core.client_info - from google.cloud import bigquery import pandas + bigquery = FEATURES.bigquery_try_import() client_info = google.api_core.client_info.ClientInfo( user_agent="pandas-{}".format(pandas.__version__) ) @@ -563,10 +563,6 @@ def _download_results( if max_results is not None: create_bqstorage_client = False - to_dataframe_kwargs = {} - if FEATURES.bigquery_needs_date_as_object: - to_dataframe_kwargs["date_as_object"] = True - try: schema_fields = [field.to_api_repr() for field in rows_iter.schema] conversion_dtypes = _bqschema_to_nullsafe_dtypes(schema_fields) @@ -575,7 +571,6 @@ def _download_results( dtypes=conversion_dtypes, progress_bar_type=progress_bar_type, create_bqstorage_client=create_bqstorage_client, - **to_dataframe_kwargs, ) except self.http_error as ex: self.process_http_error(ex) diff --git a/pandas_gbq/load.py b/pandas_gbq/load.py index bad99584..8243c7f3 100644 --- a/pandas_gbq/load.py +++ b/pandas_gbq/load.py @@ -14,7 +14,6 @@ from google.cloud import bigquery from pandas_gbq import exceptions -from pandas_gbq.features import FEATURES import pandas_gbq.schema @@ -252,28 +251,16 @@ def load_chunks( # TODO: yield progress depending on result() with timeout return [0] elif api_method == "load_csv": - if FEATURES.bigquery_has_from_dataframe_with_csv: - return load_csv_from_dataframe( - client, - dataframe, - destination_table_ref, - write_disposition, - location, - chunksize, - schema, - billing_project=billing_project, - ) - else: - return load_csv_from_file( - client, - dataframe, - destination_table_ref, - write_disposition, - location, - chunksize, - schema, - billing_project=billing_project, - ) + return load_csv_from_dataframe( + client, + dataframe, + destination_table_ref, + write_disposition, + location, + chunksize, + schema, + billing_project=billing_project, + ) else: raise ValueError( f"Got unexpected api_method: {api_method!r}, expected one of 'load_parquet', 'load_csv'." diff --git a/setup.py b/setup.py index d0b16c2e..2d09f41b 100644 --- a/setup.py +++ b/setup.py @@ -34,12 +34,9 @@ "google-api-core >= 2.10.2, <3.0.0dev", "google-auth >=2.13.0", "google-auth-oauthlib >=0.7.0", - # Require 1.27.* because it has a fix for out-of-bounds timestamps. See: - # https://github.com/googleapis/python-bigquery/pull/209 and - # https://github.com/googleapis/python-bigquery-pandas/issues/365 - # Exclude 2.4.* because it has a bug where waiting for the query can hang - # indefinitely. https://github.com/pydata/pandas-gbq/issues/343 - "google-cloud-bigquery >=3.3.5,<4.0.0dev,!=2.4.*", + # Please also update the minimum version in pandas_gbq/features.py to + # allow pandas-gbq to detect invalid package versions at runtime. + "google-cloud-bigquery >=3.3.5,<4.0.0dev", "google-cloud-bigquery-storage >=2.16.2,<3.0.0dev", "packaging >=20.0.0", ] diff --git a/tests/system/test_read_gbq.py b/tests/system/test_read_gbq.py index d57477b1..fada140b 100644 --- a/tests/system/test_read_gbq.py +++ b/tests/system/test_read_gbq.py @@ -454,10 +454,6 @@ def writable_table( ), ), id="bignumeric-normal-range", - marks=pytest.mark.skipif( - not FEATURES.bigquery_has_bignumeric, - reason="BIGNUMERIC not supported in this version of google-cloud-bigquery", - ), ), pytest.param( *QueryTestCase( @@ -538,9 +534,7 @@ def writable_table( ), } ), - use_bqstorage_apis={True, False} - if FEATURES.bigquery_has_accurate_timestamp - else {True}, + use_bqstorage_apis={True, False}, ), id="issue365-extreme-datetimes", ), diff --git a/tests/unit/test_features.py b/tests/unit/test_features.py index bfe2ea9b..8f17c78f 100644 --- a/tests/unit/test_features.py +++ b/tests/unit/test_features.py @@ -13,77 +13,6 @@ def fresh_bigquery_version(monkeypatch): monkeypatch.setattr(FEATURES, "_pandas_installed_version", None) -@pytest.mark.parametrize( - ["bigquery_version", "expected"], - [ - ("1.27.2", False), - ("1.99.100", False), - ("2.5.4", False), - ("2.6.0", True), - ("2.6.1", True), - ("2.12.0", True), - ], -) -def test_bigquery_has_accurate_timestamp(monkeypatch, bigquery_version, expected): - import google.cloud.bigquery - - monkeypatch.setattr(google.cloud.bigquery, "__version__", bigquery_version) - assert FEATURES.bigquery_has_accurate_timestamp == expected - - -@pytest.mark.parametrize( - ["bigquery_version", "expected"], - [ - ("1.27.2", False), - ("1.99.100", False), - ("2.9.999", False), - ("2.10.0", True), - ("2.12.0", True), - ("3.0.0", True), - ], -) -def test_bigquery_has_bignumeric(monkeypatch, bigquery_version, expected): - import google.cloud.bigquery - - monkeypatch.setattr(google.cloud.bigquery, "__version__", bigquery_version) - assert FEATURES.bigquery_has_bignumeric == expected - - -@pytest.mark.parametrize( - ["bigquery_version", "expected"], - [ - ("1.27.2", False), - ("1.99.100", False), - ("2.5.4", False), - ("2.6.0", True), - ("2.6.1", True), - ("2.12.0", True), - ], -) -def test_bigquery_has_from_dataframe_with_csv(monkeypatch, bigquery_version, expected): - import google.cloud.bigquery - - monkeypatch.setattr(google.cloud.bigquery, "__version__", bigquery_version) - assert FEATURES.bigquery_has_from_dataframe_with_csv == expected - - -@pytest.mark.parametrize( - ["bigquery_version", "expected"], - [ - ("1.27.2", True), - ("1.99.100", True), - ("2.12.0", True), - ("3.0.0", False), - ("3.1.0", False), - ], -) -def test_bigquery_needs_date_as_object(monkeypatch, bigquery_version, expected): - import google.cloud.bigquery - - monkeypatch.setattr(google.cloud.bigquery, "__version__", bigquery_version) - assert FEATURES.bigquery_needs_date_as_object == expected - - @pytest.mark.parametrize( ["pandas_version", "expected"], [ diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py index ba620686..703acf27 100644 --- a/tests/unit/test_gbq.py +++ b/tests/unit/test_gbq.py @@ -732,19 +732,11 @@ def test_read_gbq_use_bqstorage_api( assert df is not None mock_list_rows = mock_bigquery_client.list_rows("dest", max_results=100) - if FEATURES.bigquery_needs_date_as_object: - mock_list_rows.to_dataframe.assert_called_once_with( - create_bqstorage_client=True, - dtypes=mock.ANY, - progress_bar_type=mock.ANY, - date_as_object=True, - ) - else: - mock_list_rows.to_dataframe.assert_called_once_with( - create_bqstorage_client=True, - dtypes=mock.ANY, - progress_bar_type=mock.ANY, - ) + mock_list_rows.to_dataframe.assert_called_once_with( + create_bqstorage_client=True, + dtypes=mock.ANY, + progress_bar_type=mock.ANY, + ) def test_read_gbq_calls_tqdm(mock_bigquery_client, mock_service_account_credentials): diff --git a/tests/unit/test_load.py b/tests/unit/test_load.py index 1d99d9b4..b34b1378 100644 --- a/tests/unit/test_load.py +++ b/tests/unit/test_load.py @@ -8,7 +8,6 @@ import decimal from io import StringIO import textwrap -from unittest import mock import db_dtypes import numpy @@ -17,13 +16,10 @@ import pytest from pandas_gbq import exceptions -from pandas_gbq.features import FEATURES from pandas_gbq import load def load_method(bqclient, api_method): - if not FEATURES.bigquery_has_from_dataframe_with_csv and api_method == "load_csv": - return bqclient.load_table_from_file return bqclient.load_table_from_dataframe @@ -180,12 +176,10 @@ def test_load_csv_from_file_generates_schema(mock_bigquery_client): @pytest.mark.parametrize( - ["bigquery_has_from_dataframe_with_csv", "api_method"], - [(True, "load_parquet"), (True, "load_csv"), (False, "load_csv")], + ["api_method"], + [("load_parquet",), ("load_csv",)], ) -def test_load_chunks_omits_policy_tags( - monkeypatch, mock_bigquery_client, bigquery_has_from_dataframe_with_csv, api_method -): +def test_load_chunks_omits_policy_tags(monkeypatch, mock_bigquery_client, api_method): """Ensure that policyTags are omitted. We don't want to change the policyTags via a load job, as this can cause @@ -193,11 +187,6 @@ def test_load_chunks_omits_policy_tags( """ import google.cloud.bigquery - monkeypatch.setattr( - type(FEATURES), - "bigquery_has_from_dataframe_with_csv", - mock.PropertyMock(return_value=bigquery_has_from_dataframe_with_csv), - ) df = pandas.DataFrame({"col1": [1, 2, 3]}) destination = google.cloud.bigquery.TableReference.from_string( "my-project.my_dataset.my_table"