Skip to content

Commit

Permalink
Add progress for to_gbq function using tqdm
Browse files Browse the repository at this point in the history
  • Loading branch information
Amit Kumar authored and aktech committed Apr 27, 2018
1 parent 7711bb0 commit 8b00e9c
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 6 deletions.
22 changes: 16 additions & 6 deletions pandas_gbq/gbq.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@
BIGQUERY_INSTALLED_VERSION = None
SHOW_VERBOSE_DEPRECATION = False

try:
import tqdm # noqa
except ImportError:
tqdm = None


def _check_google_client_version():
global BIGQUERY_INSTALLED_VERSION, SHOW_VERBOSE_DEPRECATION
Expand Down Expand Up @@ -563,16 +568,19 @@ def run_query(self, query, **kwargs):

def load_data(
self, dataframe, dataset_id, table_id, chunksize=None,
schema=None):
schema=None, progress_bar=True):
from pandas_gbq import load

total_rows = len(dataframe)
logger.info("\n\n")

try:
for remaining_rows in load.load_chunks(
self.client, dataframe, dataset_id, table_id,
chunksize=chunksize, schema=schema):
chunks = load.load_chunks(self.client, dataframe, dataset_id,
table_id, chunksize=chunksize,
schema=schema)
if progress_bar and tqdm:
chunks = tqdm.tqdm(chunks)
for remaining_rows in chunks:
logger.info("\rLoad is {0}% Complete".format(
((total_rows - remaining_rows) * 100) / total_rows))
except self.http_error as ex:
Expand Down Expand Up @@ -870,7 +878,7 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,

def to_gbq(dataframe, destination_table, project_id=None, chunksize=None,
verbose=None, reauth=False, if_exists='fail', private_key=None,
auth_local_webserver=False, table_schema=None):
auth_local_webserver=False, table_schema=None, progress_bar=True):
"""Write a DataFrame to a Google BigQuery table.
The main method a user calls to export pandas DataFrame contents to
Expand Down Expand Up @@ -935,6 +943,8 @@ def to_gbq(dataframe, destination_table, project_id=None, chunksize=None,
names of a field.
.. versionadded:: 0.3.1
verbose : None, deprecated
progress_bar : boolean, True by default. It uses the library `tqdm` to show
the progress bar for the upload, chunk by chunk.
"""

_test_google_api_imports()
Expand Down Expand Up @@ -987,7 +997,7 @@ def to_gbq(dataframe, destination_table, project_id=None, chunksize=None,

connector.load_data(
dataframe, dataset_id, table_id, chunksize=chunksize,
schema=table_schema)
schema=table_schema, progress_bar=progress_bar)


def generate_bq_schema(df, default_type='STRING'):
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ pandas
google-auth
google-auth-oauthlib
google-cloud-bigquery
tqdm
4 changes: 4 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ def readme():
'google-cloud-bigquery>=0.29.0',
]

extras = {
'tqdm': 'tqdm>=4.23.0',
}

setup(
name=NAME,
Expand All @@ -50,6 +53,7 @@ def readme():
],
keywords='data',
install_requires=INSTALL_REQUIRES,
extras_require=extras,
packages=find_packages(exclude=['contrib', 'docs', 'tests*']),
test_suite='tests',
)

0 comments on commit 8b00e9c

Please sign in to comment.