From ec266d3b6d35ae6716fe9caa5ac22820e88d26a8 Mon Sep 17 00:00:00 2001 From: Bob Hogg Date: Wed, 22 Feb 2023 19:39:38 +0000 Subject: [PATCH] fix: Query options were not respecting use_cache In certain circumstances, we were not respecting use_cache for queries, unlike legacy NDB, which is quite emphatic about supporting them. (See https://github.com/GoogleCloudPlatform/datastore-ndb-python/blob/59cb209ed95480025d26531fc91397575438d2fe/ndb/query.py#L186-L187) In #613 we tried to match legacy NDB behavior by updating the cache using the results of queries. We still do that, but now we respect use_cache, which was a valid keyword argument for Query.fetch() and friends, but was not passed down to the context cache when needed. As a result, the cache could mysteriously accumulate lots of memory usage and perhaps even cause you to hit memory limits, even if it was seemingly disabled and it didn't look like there were any objects holding references to your query results. This is a problem for certain batch-style workloads where you know you're only interested in processing a certain entity once. Fixes #752 --- google/cloud/ndb/_datastore_query.py | 12 ++-- tests/system/test_crud.py | 69 +++++++++++++++++++++++ tests/system/test_query.py | 26 +++++++++ tests/unit/test__datastore_query.py | 82 ++++++++++++++++++++++++++++ 4 files changed, 185 insertions(+), 4 deletions(-) diff --git a/google/cloud/ndb/_datastore_query.py b/google/cloud/ndb/_datastore_query.py index b3615304..05d951c5 100644 --- a/google/cloud/ndb/_datastore_query.py +++ b/google/cloud/ndb/_datastore_query.py @@ -375,7 +375,7 @@ def _next_batch(self): self._start_cursor = query.start_cursor self._index = 0 self._batch = [ - _Result(result_type, result_pb, query.order_by) + _Result(result_type, result_pb, query.order_by, query_options=query) for result_pb in response.batch.entity_results ] @@ -755,17 +755,21 @@ class _Result(object): order_by (Optional[Sequence[query.PropertyOrder]]): Ordering for the query. Used to merge sorted result sets while maintaining sort order. + query_options (Optional[QueryOptions]): Other query_options. + use_cache is the only supported option. """ _key = None - def __init__(self, result_type, result_pb, order_by=None): + def __init__(self, result_type, result_pb, order_by=None, query_options=None): self.result_type = result_type self.result_pb = result_pb self.order_by = order_by self.cursor = Cursor(result_pb.cursor) + self._query_options = query_options + def __lt__(self, other): """For total ordering.""" return self._compare(other) == -1 @@ -854,7 +858,7 @@ def check_cache(self, context): will cause `None` to be recorded in the cache. """ key = self.key() - if context._use_cache(key): + if context._use_cache(key, self._query_options): try: return context.cache.get_and_validate(key) except KeyError: @@ -880,7 +884,7 @@ def entity(self): if entity is _KEY_NOT_IN_CACHE: # entity not in cache, create one, and then add it to cache entity = model._entity_from_protobuf(self.result_pb.entity) - if context._use_cache(entity.key): + if context._use_cache(entity.key, self._query_options): context.cache[entity.key] = entity return entity diff --git a/tests/system/test_crud.py b/tests/system/test_crud.py index 00342895..cff12c91 100644 --- a/tests/system/test_crud.py +++ b/tests/system/test_crud.py @@ -601,6 +601,36 @@ class SomeKind(ndb.Model): assert not cache_value +def test_insert_entity_with_use_global_cache_false(dispose_of, client_context): + class SomeKind(ndb.Model): + foo = ndb.IntegerProperty() + bar = ndb.StringProperty() + + global_cache = global_cache_module._InProcessGlobalCache() + with client_context.new(global_cache=global_cache).use() as context: + context.set_global_cache_policy(None) # Use default + + entity = SomeKind(foo=42, bar="none") + key = entity.put(use_global_cache=False) + dispose_of(key._key) + cache_key = _cache.global_cache_key(key._key) + cache_value = global_cache.get([cache_key])[0] + assert not cache_value + + retrieved = key.get(use_global_cache=False) + assert retrieved.foo == 42 + assert retrieved.bar == "none" + + cache_value = global_cache.get([cache_key])[0] + assert not cache_value + + entity.foo = 43 + entity.put(use_global_cache=False) + + cache_value = global_cache.get([cache_key])[0] + assert not cache_value + + @pytest.mark.skipif(not USE_REDIS_CACHE, reason="Redis is not configured") def test_insert_entity_with_redis_cache(dispose_of, redis_context): class SomeKind(ndb.Model): @@ -1873,3 +1903,42 @@ class SomeKind(ndb.Model): dispose_of(key._key) assert key.get().sub_model.data["test"] == 1 + + +def test_put_updates_cache(client_context, dispose_of): + class SomeKind(ndb.Model): + foo = ndb.IntegerProperty() + + client_context.set_cache_policy(None) # Use default + + entity = SomeKind(foo=42) + key = entity.put() + assert len(client_context.cache) == 1 + dispose_of(key._key) + + +def test_put_with_use_cache_true_updates_cache(client_context, dispose_of): + class SomeKind(ndb.Model): + foo = ndb.IntegerProperty() + + client_context.set_cache_policy(None) # Use default + + entity = SomeKind(foo=42) + key = entity.put(use_cache=True) + assert len(client_context.cache) == 1 + assert client_context.cache[key] is entity + + dispose_of(key._key) + + +def test_put_with_use_cache_false_does_not_update_cache(client_context, dispose_of): + class SomeKind(ndb.Model): + foo = ndb.IntegerProperty() + + client_context.set_cache_policy(None) # Use default + + entity = SomeKind(foo=42) + key = entity.put(use_cache=False) + assert len(client_context.cache) == 0 + + dispose_of(key._key) diff --git a/tests/system/test_query.py b/tests/system/test_query.py index 51d9ab52..506e5aba 100644 --- a/tests/system/test_query.py +++ b/tests/system/test_query.py @@ -2010,3 +2010,29 @@ class SomeKind(ndb.Model): # If there is a cache hit, we'll get back the same object, not just a copy assert key.get() is retrieved + + +def test_query_with_explicit_use_cache_updates_cache(dispose_of, client_context): + class SomeKind(ndb.Model): + foo = ndb.IntegerProperty() + + entity = SomeKind(foo=42) + key = entity.put(use_cache=False) + dispose_of(key._key) + assert len(client_context.cache) == 0 + + eventually(lambda: SomeKind.query().fetch(use_cache=True), length_equals(1)) + assert len(client_context.cache) == 1 + + +def test_query_with_use_cache_false_does_not_update_cache(dispose_of, client_context): + class SomeKind(ndb.Model): + foo = ndb.IntegerProperty() + + entity = SomeKind(foo=42) + key = entity.put(use_cache=False) + dispose_of(key._key) + assert len(client_context.cache) == 0 + + eventually(lambda: SomeKind.query().fetch(use_cache=False), length_equals(1)) + assert len(client_context.cache) == 0 diff --git a/tests/unit/test__datastore_query.py b/tests/unit/test__datastore_query.py index 4a0de9bc..fc4aca8a 100644 --- a/tests/unit/test__datastore_query.py +++ b/tests/unit/test__datastore_query.py @@ -1500,6 +1500,31 @@ def probably_has_next(self): class Test_Result: + @staticmethod + def test_constructor_defaults(): + result = _datastore_query._Result( + result_type=None, + result_pb=query_pb2.EntityResult(), + ) + assert result.order_by is None + assert result._query_options is None + + @staticmethod + def test_constructor_order_by(): + order = query_module.PropertyOrder("foo") + result = _datastore_query._Result( + result_type=None, result_pb=query_pb2.EntityResult(), order_by=[order] + ) + assert result.order_by == [order] + + @staticmethod + def test_constructor_query_options(): + options = query_module.QueryOptions(use_cache=False) + result = _datastore_query._Result( + result_type=None, result_pb=query_pb2.EntityResult(), query_options=options + ) + assert result._query_options == options + @staticmethod def test_total_ordering(): def result(foo, bar=0, baz=""): @@ -1660,9 +1685,15 @@ def test_entity_full_entity(model): mock.Mock(entity=entity_pb, cursor=b"123", spec=("entity", "cursor")), ) + context = context_module.get_context() + + assert len(context.cache) == 0 assert result.entity() is entity model._entity_from_protobuf.assert_called_once_with(entity_pb) + # Regression test for #752: ensure cache is updated after querying + assert len(context.cache) == 1 + @staticmethod @pytest.mark.usefixtures("in_context") @mock.patch("google.cloud.ndb._datastore_query.model") @@ -1703,6 +1734,57 @@ def test_entity_full_entity_no_cache(model): ) assert result.entity() is entity + # Regression test for #752: ensure cache does not grow (i.e. use up memory) + assert len(context.cache) == 0 + + @staticmethod + @pytest.mark.usefixtures("in_context") + @mock.patch("google.cloud.ndb._datastore_query.model") + def test_entity_full_entity_no_cache_via_cache_options(model): + context = context_module.get_context() + with context.new().use(): + key_pb = entity_pb2.Key( + partition_id=entity_pb2.PartitionId(project_id="testing"), + path=[entity_pb2.Key.PathElement(kind="ThisKind", id=42)], + ) + entity = mock.Mock(key=key_pb) + model._entity_from_protobuf.return_value = entity + result = _datastore_query._Result( + _datastore_query.RESULT_TYPE_FULL, + mock.Mock(entity=entity, cursor=b"123", spec=("entity", "cursor")), + query_options=query_module.QueryOptions(use_cache=False), + ) + assert result.entity() is entity + + # Regression test for #752: ensure cache does not grow (i.e. use up memory) + assert len(context.cache) == 0 + + @staticmethod + @pytest.mark.usefixtures("in_context") + @mock.patch("google.cloud.ndb._datastore_query.model") + def test_entity_full_entity_cache_options_true(model): + key_pb = entity_pb2.Key( + partition_id=entity_pb2.PartitionId(project_id="testing"), + path=[entity_pb2.Key.PathElement(kind="ThisKind", id=42)], + ) + entity_pb = mock.Mock(key=key_pb) + entity = mock.Mock(key=key_module.Key("ThisKind", 42)) + model._entity_from_protobuf.return_value = entity + result = _datastore_query._Result( + _datastore_query.RESULT_TYPE_FULL, + mock.Mock(entity=entity_pb, cursor=b"123", spec=("entity", "cursor")), + query_options=query_module.QueryOptions(use_cache=True), + ) + + context = context_module.get_context() + + assert len(context.cache) == 0 + assert result.entity() is entity + model._entity_from_protobuf.assert_called_once_with(entity_pb) + + # Regression test for #752: ensure cache is updated after querying + assert len(context.cache) == 1 + @staticmethod @pytest.mark.usefixtures("in_context") def test_entity_key_only():