Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: search statistics #1616

Merged
merged 56 commits into from
Sep 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
56 commits
Select commit Hold shift + click to select a range
b491836
experimental tweaks
chalmerlowe Jul 19, 2023
0cbb7f4
feat: adds two search statistics classes and property
chalmerlowe Jul 20, 2023
1dbf528
removes several personal debugging sentinels
chalmerlowe Jul 20, 2023
9695712
Merge branch 'main' into feat-search-statistics
chalmerlowe Jul 20, 2023
fcf7012
🦉 Updates from OwlBot post-processor
gcf-owl-bot[bot] Jul 20, 2023
ae992bf
Merge branch 'feat-search-statistics' of https://github.com/googleapi…
gcf-owl-bot[bot] Jul 20, 2023
3815371
🦉 Updates from OwlBot post-processor
gcf-owl-bot[bot] Jul 20, 2023
48086bd
Merge branch 'feat-search-statistics' of https://github.com/googleapi…
gcf-owl-bot[bot] Jul 20, 2023
5b14d5d
adds tests
chalmerlowe Aug 9, 2023
9689a71
🦉 Updates from OwlBot post-processor
gcf-owl-bot[bot] Aug 9, 2023
6af7d6f
cleans up conflict
chalmerlowe Aug 10, 2023
53437f2
adds comment
chalmerlowe Aug 10, 2023
4fd77ba
adds some type hints, adds a test for SearchReasons
chalmerlowe Aug 11, 2023
bb2b52c
cleans up some comments
chalmerlowe Aug 11, 2023
e728883
Merge branch 'main' into feat-search-statistics
chalmerlowe Aug 11, 2023
966ddb1
🦉 Updates from OwlBot post-processor
gcf-owl-bot[bot] Aug 11, 2023
e9fca23
Merge branch 'feat-search-statistics' of https://github.com/googleapi…
gcf-owl-bot[bot] Aug 11, 2023
ba2eb65
🦉 Updates from OwlBot post-processor
gcf-owl-bot[bot] Aug 11, 2023
a0a0e5b
Merge branch 'feat-search-statistics' of https://github.com/googleapi…
gcf-owl-bot[bot] Aug 11, 2023
852514f
Update tests/unit/job/test_query_stats.py
chalmerlowe Aug 11, 2023
2a14bc4
🦉 Updates from OwlBot post-processor
gcf-owl-bot[bot] Aug 11, 2023
8493c3a
Merge branch 'main' into feat-search-statistics
chalmerlowe Aug 15, 2023
7991e4d
updated type checks to be isinstance checks per linter
chalmerlowe Aug 15, 2023
62f9bcc
update linting
chalmerlowe Aug 15, 2023
705084b
Update tests/unit/job/test_query_stats.py
chalmerlowe Aug 15, 2023
4871339
Update tests/unit/job/test_query_stats.py
chalmerlowe Aug 15, 2023
93af6d9
experiments with some tests that are failing
chalmerlowe Aug 17, 2023
b0196c1
🦉 Updates from OwlBot post-processor
gcf-owl-bot[bot] Aug 17, 2023
711d752
Fix linting
chalmerlowe Aug 18, 2023
ccf87a9
update package verification approach
chalmerlowe Aug 18, 2023
5bc0082
update pandas installed version constant
chalmerlowe Aug 18, 2023
0089524
🦉 Updates from OwlBot post-processor
gcf-owl-bot[bot] Aug 18, 2023
fe23c18
remove unused package
chalmerlowe Aug 18, 2023
9bb8f14
set pragma no cover
chalmerlowe Aug 18, 2023
ddd86bd
adds controls to skip testing if pandas exceeds 2.0
chalmerlowe Aug 18, 2023
c55ba91
🦉 Updates from OwlBot post-processor
gcf-owl-bot[bot] Aug 18, 2023
dc150bd
adds pragma no cover to a simple check
chalmerlowe Aug 18, 2023
7d68a8a
add checks against pandas 2.0 on system test
chalmerlowe Aug 18, 2023
2e438d3
🦉 Updates from OwlBot post-processor
gcf-owl-bot[bot] Aug 18, 2023
93c345b
Merge branch 'main' into feat-search-statistics
chalmerlowe Aug 21, 2023
0423fdc
Merge branch 'main' into feat-search-statistics
chalmerlowe Aug 23, 2023
496ab6d
experiments with some tests that are failing
chalmerlowe Aug 17, 2023
392db37
🦉 Updates from OwlBot post-processor
gcf-owl-bot[bot] Aug 17, 2023
a30f23c
resolves merge conflict
chalmerlowe Aug 24, 2023
c1dfc35
resolves merge conflict
chalmerlowe Aug 24, 2023
5a2f268
resolve conflicts
chalmerlowe Aug 24, 2023
744d932
resolve merge conflicts
chalmerlowe Aug 24, 2023
81f8aab
🦉 Updates from OwlBot post-processor
gcf-owl-bot[bot] Aug 24, 2023
861eb98
updates due to faulty confict resolution
chalmerlowe Aug 24, 2023
20ec9e3
adds docstrings to two classes
chalmerlowe Aug 24, 2023
5659dd3
corrects formatting
chalmerlowe Aug 24, 2023
dd1b749
Update tests/unit/job/test_query_stats.py
chalmerlowe Aug 24, 2023
e056787
Update tests/unit/job/test_query_stats.py
chalmerlowe Aug 24, 2023
5ebcbb0
updates default values and corrects mypy errors
chalmerlowe Aug 28, 2023
faafac2
corrects linting
chalmerlowe Aug 28, 2023
9ab5c25
Update google/cloud/bigquery/job/query.py
chalmerlowe Sep 1, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 62 additions & 1 deletion google/cloud/bigquery/job/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,59 @@ def from_api_repr(cls, stats: Dict[str, str]) -> "DmlStats":
return cls(*args)


class IndexUnusedReason(typing.NamedTuple):
"""Reason about why no search index was used in the search query (or sub-query).

https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#indexunusedreason
"""

code: Optional[str] = None
"""Specifies the high-level reason for the scenario when no search index was used.
"""

message: Optional[str] = None
"""Free form human-readable reason for the scenario when no search index was used.
"""

baseTable: Optional[TableReference] = None
"""Specifies the base table involved in the reason that no search index was used.
"""

indexName: Optional[str] = None
"""Specifies the name of the unused search index, if available."""

@classmethod
def from_api_repr(cls, reason):
code = reason.get("code")
message = reason.get("message")
baseTable = reason.get("baseTable")
indexName = reason.get("indexName")

return cls(code, message, baseTable, indexName)


class SearchStats(typing.NamedTuple):
"""Statistics related to Search Queries. Populated as part of JobStatistics2.

https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#searchstatistics
"""

mode: Optional[str] = None
"""Indicates the type of search index usage in the entire search query."""

reason: List[IndexUnusedReason] = []
"""Reason about why no search index was used in the search query (or sub-query)"""

@classmethod
def from_api_repr(cls, stats: Dict[str, Any]):
mode = stats.get("indexUsageMode", None)
reason = [
IndexUnusedReason.from_api_repr(r)
for r in stats.get("indexUnusedReasons", [])
]
return cls(mode, reason)


class ScriptOptions:
"""Options controlling the execution of scripts.

Expand Down Expand Up @@ -724,7 +777,6 @@ def to_api_repr(self) -> dict:
Dict: A dictionary in the format used by the BigQuery API.
"""
resource = copy.deepcopy(self._properties)

# Query parameters have an addition property associated with them
# to indicate if the query is using named or positional parameters.
query_parameters = resource["query"].get("queryParameters")
Expand Down Expand Up @@ -858,6 +910,15 @@ def priority(self):
"""
return self.configuration.priority

@property
def search_stats(self) -> Optional[SearchStats]:
"""Returns a SearchStats object."""

stats = self._job_statistics().get("searchStatistics")
if stats is not None:
return SearchStats.from_api_repr(stats)
return None

@property
def query(self):
"""str: The query text used in this query job.
Expand Down
22 changes: 22 additions & 0 deletions tests/unit/job/test_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -911,6 +911,28 @@ def test_dml_stats(self):
assert isinstance(job.dml_stats, DmlStats)
assert job.dml_stats.inserted_row_count == 35

def test_search_stats(self):
from google.cloud.bigquery.job.query import SearchStats

client = _make_client(project=self.PROJECT)
job = self._make_one(self.JOB_ID, self.QUERY, client)
assert job.search_stats is None

statistics = job._properties["statistics"] = {}
assert job.search_stats is None

query_stats = statistics["query"] = {}
assert job.search_stats is None

query_stats["searchStatistics"] = {
"indexUsageMode": "INDEX_USAGE_MODE_UNSPECIFIED",
"indexUnusedReasons": [],
}
# job.search_stats is a daisy-chain of calls and gets:
# job.search_stats << job._job_statistics << job._properties
assert isinstance(job.search_stats, SearchStats)
assert job.search_stats.mode == "INDEX_USAGE_MODE_UNSPECIFIED"

def test_result(self):
from google.cloud.bigquery.table import RowIterator

Expand Down
69 changes: 69 additions & 0 deletions tests/unit/job/test_query_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,75 @@ def test_from_api_repr_full_stats(self):
assert result.updated_row_count == 4


class TestSearchStatistics:
@staticmethod
def _get_target_class():
from google.cloud.bigquery.job.query import SearchStats

return SearchStats

def _make_one(self, *args, **kwargs):
return self._get_target_class()(*args, **kwargs)

def test_ctor_defaults(self):
search_stats = self._make_one()
assert search_stats.mode is None
assert search_stats.reason == []

def test_from_api_repr_unspecified(self):
klass = self._get_target_class()
result = klass.from_api_repr(
{"indexUsageMode": "INDEX_USAGE_MODE_UNSPECIFIED", "indexUnusedReasons": []}
)

assert isinstance(result, klass)
assert result.mode == "INDEX_USAGE_MODE_UNSPECIFIED"
assert result.reason == []


class TestIndexUnusedReason:
@staticmethod
def _get_target_class():
from google.cloud.bigquery.job.query import IndexUnusedReason

return IndexUnusedReason

def _make_one(self, *args, **kwargs):
return self._get_target_class()(*args, **kwargs)

def test_ctor_defaults(self):
search_reason = self._make_one()
assert search_reason.code is None
assert search_reason.message is None
assert search_reason.baseTable is None
assert search_reason.indexName is None

def test_from_api_repr_unspecified(self):
klass = self._get_target_class()
result = klass.from_api_repr(
{
"code": "INDEX_CONFIG_NOT_AVAILABLE",
"message": "There is no search index...",
"baseTable": {
"projectId": "bigquery-public-data",
"datasetId": "usa_names",
"tableId": "usa_1910_current",
},
"indexName": None,
}
)

assert isinstance(result, klass)
assert result.code == "INDEX_CONFIG_NOT_AVAILABLE"
assert result.message == "There is no search index..."
assert result.baseTable == {
"projectId": "bigquery-public-data",
"datasetId": "usa_names",
"tableId": "usa_1910_current",
}
assert result.indexName is None


class TestQueryPlanEntryStep(_Base):
KIND = "KIND"
SUBSTEPS = ("SUB1", "SUB2")
Expand Down