From 605df48c5a711ec2cdb21f7c596b5be7694ff1ae Mon Sep 17 00:00:00 2001 From: Bob Hogg Date: Wed, 22 Feb 2023 19:39:38 +0000 Subject: [PATCH] fix: Query options were not respecting use_cache In certain circumstances, we were not respecting use_cache for queries, unlike legacy NDB, which is quite emphatic about supporting them. (See https://github.com/GoogleCloudPlatform/datastore-ndb-python/blob/59cb209ed95480025d26531fc91397575438d2fe/ndb/query.py#L186-L187) In #613 we tried to match legacy NDB behavior by updating the cache using the results of queries. We still do that, but now we respect use_cache, which was a valid keyword argument for Query.fetch() and friends, but was not passed down to the context cache when needed. As a result, the cache could mysteriously accumulate lots of memory usage and perhaps even cause you to hit memory limits, even if it was seemingly disabled and it didn't look like there were any objects holding references to your query results. This is a problem for certain batch-style workloads where you know you're only interested in processing a certain entity once. Fixes #752 --- google/cloud/ndb/_datastore_query.py | 12 ++-- tests/system/test_query.py | 26 +++++++++ tests/unit/test__datastore_query.py | 82 ++++++++++++++++++++++++++++ 3 files changed, 116 insertions(+), 4 deletions(-) diff --git a/google/cloud/ndb/_datastore_query.py b/google/cloud/ndb/_datastore_query.py index b3615304..05d951c5 100644 --- a/google/cloud/ndb/_datastore_query.py +++ b/google/cloud/ndb/_datastore_query.py @@ -375,7 +375,7 @@ def _next_batch(self): self._start_cursor = query.start_cursor self._index = 0 self._batch = [ - _Result(result_type, result_pb, query.order_by) + _Result(result_type, result_pb, query.order_by, query_options=query) for result_pb in response.batch.entity_results ] @@ -755,17 +755,21 @@ class _Result(object): order_by (Optional[Sequence[query.PropertyOrder]]): Ordering for the query. Used to merge sorted result sets while maintaining sort order. + query_options (Optional[QueryOptions]): Other query_options. + use_cache is the only supported option. """ _key = None - def __init__(self, result_type, result_pb, order_by=None): + def __init__(self, result_type, result_pb, order_by=None, query_options=None): self.result_type = result_type self.result_pb = result_pb self.order_by = order_by self.cursor = Cursor(result_pb.cursor) + self._query_options = query_options + def __lt__(self, other): """For total ordering.""" return self._compare(other) == -1 @@ -854,7 +858,7 @@ def check_cache(self, context): will cause `None` to be recorded in the cache. """ key = self.key() - if context._use_cache(key): + if context._use_cache(key, self._query_options): try: return context.cache.get_and_validate(key) except KeyError: @@ -880,7 +884,7 @@ def entity(self): if entity is _KEY_NOT_IN_CACHE: # entity not in cache, create one, and then add it to cache entity = model._entity_from_protobuf(self.result_pb.entity) - if context._use_cache(entity.key): + if context._use_cache(entity.key, self._query_options): context.cache[entity.key] = entity return entity diff --git a/tests/system/test_query.py b/tests/system/test_query.py index 51d9ab52..506e5aba 100644 --- a/tests/system/test_query.py +++ b/tests/system/test_query.py @@ -2010,3 +2010,29 @@ class SomeKind(ndb.Model): # If there is a cache hit, we'll get back the same object, not just a copy assert key.get() is retrieved + + +def test_query_with_explicit_use_cache_updates_cache(dispose_of, client_context): + class SomeKind(ndb.Model): + foo = ndb.IntegerProperty() + + entity = SomeKind(foo=42) + key = entity.put(use_cache=False) + dispose_of(key._key) + assert len(client_context.cache) == 0 + + eventually(lambda: SomeKind.query().fetch(use_cache=True), length_equals(1)) + assert len(client_context.cache) == 1 + + +def test_query_with_use_cache_false_does_not_update_cache(dispose_of, client_context): + class SomeKind(ndb.Model): + foo = ndb.IntegerProperty() + + entity = SomeKind(foo=42) + key = entity.put(use_cache=False) + dispose_of(key._key) + assert len(client_context.cache) == 0 + + eventually(lambda: SomeKind.query().fetch(use_cache=False), length_equals(1)) + assert len(client_context.cache) == 0 diff --git a/tests/unit/test__datastore_query.py b/tests/unit/test__datastore_query.py index 4a0de9bc..fc4aca8a 100644 --- a/tests/unit/test__datastore_query.py +++ b/tests/unit/test__datastore_query.py @@ -1500,6 +1500,31 @@ def probably_has_next(self): class Test_Result: + @staticmethod + def test_constructor_defaults(): + result = _datastore_query._Result( + result_type=None, + result_pb=query_pb2.EntityResult(), + ) + assert result.order_by is None + assert result._query_options is None + + @staticmethod + def test_constructor_order_by(): + order = query_module.PropertyOrder("foo") + result = _datastore_query._Result( + result_type=None, result_pb=query_pb2.EntityResult(), order_by=[order] + ) + assert result.order_by == [order] + + @staticmethod + def test_constructor_query_options(): + options = query_module.QueryOptions(use_cache=False) + result = _datastore_query._Result( + result_type=None, result_pb=query_pb2.EntityResult(), query_options=options + ) + assert result._query_options == options + @staticmethod def test_total_ordering(): def result(foo, bar=0, baz=""): @@ -1660,9 +1685,15 @@ def test_entity_full_entity(model): mock.Mock(entity=entity_pb, cursor=b"123", spec=("entity", "cursor")), ) + context = context_module.get_context() + + assert len(context.cache) == 0 assert result.entity() is entity model._entity_from_protobuf.assert_called_once_with(entity_pb) + # Regression test for #752: ensure cache is updated after querying + assert len(context.cache) == 1 + @staticmethod @pytest.mark.usefixtures("in_context") @mock.patch("google.cloud.ndb._datastore_query.model") @@ -1703,6 +1734,57 @@ def test_entity_full_entity_no_cache(model): ) assert result.entity() is entity + # Regression test for #752: ensure cache does not grow (i.e. use up memory) + assert len(context.cache) == 0 + + @staticmethod + @pytest.mark.usefixtures("in_context") + @mock.patch("google.cloud.ndb._datastore_query.model") + def test_entity_full_entity_no_cache_via_cache_options(model): + context = context_module.get_context() + with context.new().use(): + key_pb = entity_pb2.Key( + partition_id=entity_pb2.PartitionId(project_id="testing"), + path=[entity_pb2.Key.PathElement(kind="ThisKind", id=42)], + ) + entity = mock.Mock(key=key_pb) + model._entity_from_protobuf.return_value = entity + result = _datastore_query._Result( + _datastore_query.RESULT_TYPE_FULL, + mock.Mock(entity=entity, cursor=b"123", spec=("entity", "cursor")), + query_options=query_module.QueryOptions(use_cache=False), + ) + assert result.entity() is entity + + # Regression test for #752: ensure cache does not grow (i.e. use up memory) + assert len(context.cache) == 0 + + @staticmethod + @pytest.mark.usefixtures("in_context") + @mock.patch("google.cloud.ndb._datastore_query.model") + def test_entity_full_entity_cache_options_true(model): + key_pb = entity_pb2.Key( + partition_id=entity_pb2.PartitionId(project_id="testing"), + path=[entity_pb2.Key.PathElement(kind="ThisKind", id=42)], + ) + entity_pb = mock.Mock(key=key_pb) + entity = mock.Mock(key=key_module.Key("ThisKind", 42)) + model._entity_from_protobuf.return_value = entity + result = _datastore_query._Result( + _datastore_query.RESULT_TYPE_FULL, + mock.Mock(entity=entity_pb, cursor=b"123", spec=("entity", "cursor")), + query_options=query_module.QueryOptions(use_cache=True), + ) + + context = context_module.get_context() + + assert len(context.cache) == 0 + assert result.entity() is entity + model._entity_from_protobuf.assert_called_once_with(entity_pb) + + # Regression test for #752: ensure cache is updated after querying + assert len(context.cache) == 1 + @staticmethod @pytest.mark.usefixtures("in_context") def test_entity_key_only():