-
Notifications
You must be signed in to change notification settings - Fork 122
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
refactor: move query and wait logic to separate module
This prepares the way for using the `query_and_wait` method built-in to the client library when available.
- Loading branch information
Showing
6 changed files
with
248 additions
and
192 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
# Copyright (c) 2017 pandas-gbq Authors All rights reserved. | ||
# Use of this source code is governed by a BSD-style | ||
# license that can be found in the LICENSE file. | ||
|
||
from __future__ import annotations | ||
|
||
import concurrent.futures | ||
import logging | ||
from typing import Optional | ||
|
||
from google.cloud import bigquery | ||
|
||
import pandas_gbq.exceptions | ||
|
||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
# On-demand BQ Queries costs $6.25 per TB. First 1 TB per month is free | ||
# see here for more: https://cloud.google.com/bigquery/pricing | ||
QUERY_PRICE_FOR_TB = 6.25 / 2**40 # USD/TB | ||
|
||
|
||
# http://stackoverflow.com/questions/1094841/reusable-library-to-get-human-readable-version-of-file-size | ||
def sizeof_fmt(num, suffix="B"): | ||
fmt = "%3.1f %s%s" | ||
for unit in ["", "K", "M", "G", "T", "P", "E", "Z"]: | ||
if abs(num) < 1024.0: | ||
return fmt % (num, unit, suffix) | ||
num /= 1024.0 | ||
return fmt % (num, "Y", suffix) | ||
|
||
|
||
def _wait_for_query_job( | ||
connector, | ||
client: bigquery.Client, | ||
query_reply: bigquery.QueryJob, | ||
timeout_ms: Optional[float], | ||
): | ||
"""Wait for query to complete, pausing occasionally to update progress. | ||
Args: | ||
query_reply (QueryJob): | ||
A query job which has started. | ||
timeout_ms (Optional[int]): | ||
How long to wait before cancelling the query. | ||
""" | ||
# Wait at most 10 seconds so we can show progress. | ||
# TODO(https://github.com/googleapis/python-bigquery-pandas/issues/327): | ||
# Include a tqdm progress bar here instead of a stream of log messages. | ||
timeout_sec = 10.0 | ||
if timeout_ms: | ||
timeout_sec = min(timeout_sec, timeout_ms / 1000.0) | ||
|
||
while query_reply.state != "DONE": | ||
connector.log_elapsed_seconds(" Elapsed", "s. Waiting...") | ||
|
||
if timeout_ms and timeout_ms < connector.get_elapsed_seconds() * 1000: | ||
client.cancel_job(query_reply.job_id, location=query_reply.location) | ||
raise pandas_gbq.exceptions.QueryTimeout( | ||
"Query timeout: {} ms".format(timeout_ms) | ||
) | ||
|
||
try: | ||
query_reply.result(timeout=timeout_sec) | ||
except concurrent.futures.TimeoutError: | ||
# Use our own timeout logic | ||
pass | ||
except connector.http_error as ex: | ||
connector.process_http_error(ex) | ||
|
||
|
||
def query_and_wait( | ||
connector, | ||
client: bigquery.Client, | ||
query: str, | ||
*, | ||
job_config: bigquery.QueryJobConfig, | ||
location: Optional[str], | ||
project_id: Optional[str], | ||
max_results: Optional[int], | ||
timeout_ms: Optional[int], | ||
): | ||
from google.auth.exceptions import RefreshError | ||
|
||
try: | ||
logger.debug("Requesting query... ") | ||
query_reply = client.query( | ||
query, | ||
job_config=job_config, | ||
location=location, | ||
project=project_id, | ||
) | ||
logger.debug("Query running...") | ||
except (RefreshError, ValueError) as ex: | ||
if connector.private_key: | ||
raise pandas_gbq.exceptions.AccessDenied( | ||
f"The service account credentials are not valid: {ex}" | ||
) | ||
else: | ||
raise pandas_gbq.exceptions.AccessDenied( | ||
"The credentials have been revoked or expired, " | ||
f"please re-run the application to re-authorize: {ex}" | ||
) | ||
except connector.http_error as ex: | ||
connector.process_http_error(ex) | ||
|
||
job_id = query_reply.job_id | ||
logger.debug("Job ID: %s" % job_id) | ||
|
||
_wait_for_query_job(connector, connector.client, query_reply, timeout_ms) | ||
|
||
if query_reply.cache_hit: | ||
logger.debug("Query done.\nCache hit.\n") | ||
else: | ||
bytes_processed = query_reply.total_bytes_processed or 0 | ||
bytes_billed = query_reply.total_bytes_billed or 0 | ||
logger.debug( | ||
"Query done.\nProcessed: {} Billed: {}".format( | ||
sizeof_fmt(bytes_processed), | ||
sizeof_fmt(bytes_billed), | ||
) | ||
) | ||
logger.debug( | ||
"Standard price: ${:,.2f} USD\n".format(bytes_billed * QUERY_PRICE_FOR_TB) | ||
) | ||
|
||
# As of google-cloud-bigquery 2.3.0, QueryJob.result() uses | ||
# getQueryResults() instead of tabledata.list, which returns the correct | ||
# response with DML/DDL queries. | ||
try: | ||
return query_reply.result(max_results=max_results) | ||
except connector.http_error as ex: | ||
connector.process_http_error(ex) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.