Skip to content

Commit

Permalink
Add default LoadJobConfig to Client
Browse files Browse the repository at this point in the history
  • Loading branch information
chelsea-lin committed Mar 14, 2023
1 parent 8270a10 commit 1cb30f1
Show file tree
Hide file tree
Showing 3 changed files with 349 additions and 8 deletions.
49 changes: 42 additions & 7 deletions google/cloud/bigquery/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,9 @@ class Client(ClientWithProject):
default_query_job_config (Optional[google.cloud.bigquery.job.QueryJobConfig]):
Default ``QueryJobConfig``.
Will be merged into job configs passed into the ``query`` method.
default_load_job_config (Optional[google.cloud.bigquery.job.LoadJobConfig]):
Default ``LoadJobConfig``.
Will be merged into job configs passed into the ``load_table_*`` methods.
client_info (Optional[google.api_core.client_info.ClientInfo]):
The client info used to send a user-agent string along with API
requests. If ``None``, then default info will be used. Generally,
Expand All @@ -235,6 +238,7 @@ def __init__(
_http=None,
location=None,
default_query_job_config=None,
default_load_job_config=None,
client_info=None,
client_options=None,
) -> None:
Expand All @@ -260,6 +264,7 @@ def __init__(
self._connection = Connection(self, **kw_args)
self._location = location
self._default_query_job_config = copy.deepcopy(default_query_job_config)
self._default_load_job_config = copy.deepcopy(default_load_job_config)

@property
def location(self):
Expand All @@ -277,6 +282,17 @@ def default_query_job_config(self):
def default_query_job_config(self, value: QueryJobConfig):
self._default_query_job_config = copy.deepcopy(value)

@property
def default_load_job_config(self):
"""Default ``LoadJobConfig``.
Will be merged into job configs passed into the ``load_table_*`` methods.
"""
return self._default_load_job_config

@default_load_job_config.setter
def default_load_job_config(self, value: LoadJobConfig):
self._default_load_job_config = copy.deepcopy(value)

def close(self):
"""Close the underlying transport objects, releasing system resources.
Expand Down Expand Up @@ -2348,9 +2364,19 @@ def load_table_from_uri(

destination = _table_arg_to_table_ref(destination, default_project=self.project)

# Make a copy so that the job config isn't modified in-place.
if job_config:
job_config = copy.deepcopy(job_config)
_verify_job_config_type(job_config, google.cloud.bigquery.job.LoadJobConfig)
job_config = copy.deepcopy(job_config)
else:
job_config = job.LoadJobConfig()

# Merge this job config with a default job config
if self._default_load_job_config:
_verify_job_config_type(
self._default_load_job_config, google.cloud.bigquery.job.LoadJobConfig
)
job_config = job_config._fill_from_default(self._default_load_job_config)

load_job = job.LoadJob(job_ref, source_uris, destination, self, job_config)
load_job._begin(retry=retry, timeout=timeout)
Expand Down Expand Up @@ -2437,9 +2463,21 @@ def load_table_from_file(

destination = _table_arg_to_table_ref(destination, default_project=self.project)
job_ref = job._JobReference(job_id, project=project, location=location)

# Make a copy so that the job config isn't modified in-place.
if job_config:
job_config = copy.deepcopy(job_config)
_verify_job_config_type(job_config, google.cloud.bigquery.job.LoadJobConfig)
job_config = copy.deepcopy(job_config)
else:
job_config = job.LoadJobConfig()

# Merge this job config with a default job config
if self._default_load_job_config:
_verify_job_config_type(
self._default_load_job_config, google.cloud.bigquery.job.LoadJobConfig
)
job_config = job_config._fill_from_default(self._default_load_job_config)

load_job = job.LoadJob(job_ref, None, destination, self, job_config)
job_resource = load_job.to_api_repr()

Expand Down Expand Up @@ -2569,13 +2607,10 @@ def load_table_from_dataframe(
"""
job_id = _make_job_id(job_id, job_id_prefix)

# Make a copy so that the job config isn't modified in-place.
if job_config:
_verify_job_config_type(job_config, google.cloud.bigquery.job.LoadJobConfig)
# Make a copy so that the job config isn't modified in-place.
job_config_properties = copy.deepcopy(job_config._properties)
job_config = job.LoadJobConfig()
job_config._properties = job_config_properties

job_config = copy.deepcopy(job_config)
else:
job_config = job.LoadJobConfig()

Expand Down
18 changes: 17 additions & 1 deletion tests/unit/job/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1104,7 +1104,7 @@ def test_ctor_with_unknown_property_raises_error(self):
config = self._make_one()
config.wrong_name = None

def test_fill_from_default(self):
def test_fill_query_job_config_from_default(self):
from google.cloud.bigquery import QueryJobConfig

job_config = QueryJobConfig()
Expand All @@ -1120,6 +1120,22 @@ def test_fill_from_default(self):
self.assertTrue(final_job_config.use_query_cache)
self.assertEqual(final_job_config.maximum_bytes_billed, 1000)

def test_fill_load_job_from_default(self):
from google.cloud.bigquery import LoadJobConfig

job_config = LoadJobConfig()
job_config.create_session = True
job_config.encoding = "UTF-8"

default_job_config = LoadJobConfig()
default_job_config.ignore_unknown_values = True
default_job_config.encoding = "ISO-8859-1"

final_job_config = job_config._fill_from_default(default_job_config)
self.assertTrue(final_job_config.create_session)
self.assertTrue(final_job_config.ignore_unknown_values)
self.assertEqual(final_job_config.encoding, "UTF-8")

def test_fill_from_default_conflict(self):
from google.cloud.bigquery import QueryJobConfig

Expand Down
Loading

0 comments on commit 1cb30f1

Please sign in to comment.