From 605df48c5a711ec2cdb21f7c596b5be7694ff1ae Mon Sep 17 00:00:00 2001
From: Bob Hogg <rwhogg@google.com>
Date: Wed, 22 Feb 2023 19:39:38 +0000
Subject: [PATCH] fix: Query options were not respecting use_cache

In certain circumstances, we were not respecting use_cache
for queries, unlike legacy NDB, which is quite emphatic
about supporting them.
(See https://github.com/GoogleCloudPlatform/datastore-ndb-python/blob/59cb209ed95480025d26531fc91397575438d2fe/ndb/query.py#L186-L187)

In #613 we tried to match legacy NDB behavior by updating the cache
using the results of queries. We still do that, but now we respect
use_cache, which was a valid keyword argument for Query.fetch()
and friends, but was not passed down to the context cache when
needed.

As a result, the cache could mysteriously accumulate lots of memory
usage and perhaps even cause you to hit memory limits, even if it was
seemingly disabled and it didn't look like there were any objects
holding references to your query results.
This is a problem for certain batch-style workloads where you know
you're only interested in processing a certain entity once.

Fixes #752
---
 google/cloud/ndb/_datastore_query.py | 12 ++--
 tests/system/test_query.py           | 26 +++++++++
 tests/unit/test__datastore_query.py  | 82 ++++++++++++++++++++++++++++
 3 files changed, 116 insertions(+), 4 deletions(-)

diff --git a/google/cloud/ndb/_datastore_query.py b/google/cloud/ndb/_datastore_query.py
index b3615304..05d951c5 100644
--- a/google/cloud/ndb/_datastore_query.py
+++ b/google/cloud/ndb/_datastore_query.py
@@ -375,7 +375,7 @@ def _next_batch(self):
         self._start_cursor = query.start_cursor
         self._index = 0
         self._batch = [
-            _Result(result_type, result_pb, query.order_by)
+            _Result(result_type, result_pb, query.order_by, query_options=query)
             for result_pb in response.batch.entity_results
         ]
 
@@ -755,17 +755,21 @@ class _Result(object):
         order_by (Optional[Sequence[query.PropertyOrder]]): Ordering for the
             query. Used to merge sorted result sets while maintaining sort
             order.
+        query_options (Optional[QueryOptions]): Other query_options.
+            use_cache is the only supported option.
     """
 
     _key = None
 
-    def __init__(self, result_type, result_pb, order_by=None):
+    def __init__(self, result_type, result_pb, order_by=None, query_options=None):
         self.result_type = result_type
         self.result_pb = result_pb
         self.order_by = order_by
 
         self.cursor = Cursor(result_pb.cursor)
 
+        self._query_options = query_options
+
     def __lt__(self, other):
         """For total ordering."""
         return self._compare(other) == -1
@@ -854,7 +858,7 @@ def check_cache(self, context):
                 will cause `None` to be recorded in the cache.
         """
         key = self.key()
-        if context._use_cache(key):
+        if context._use_cache(key, self._query_options):
             try:
                 return context.cache.get_and_validate(key)
             except KeyError:
@@ -880,7 +884,7 @@ def entity(self):
             if entity is _KEY_NOT_IN_CACHE:
                 # entity not in cache, create one, and then add it to cache
                 entity = model._entity_from_protobuf(self.result_pb.entity)
-                if context._use_cache(entity.key):
+                if context._use_cache(entity.key, self._query_options):
                     context.cache[entity.key] = entity
             return entity
 
diff --git a/tests/system/test_query.py b/tests/system/test_query.py
index 51d9ab52..506e5aba 100644
--- a/tests/system/test_query.py
+++ b/tests/system/test_query.py
@@ -2010,3 +2010,29 @@ class SomeKind(ndb.Model):
 
         # If there is a cache hit, we'll get back the same object, not just a copy
         assert key.get() is retrieved
+
+
+def test_query_with_explicit_use_cache_updates_cache(dispose_of, client_context):
+    class SomeKind(ndb.Model):
+        foo = ndb.IntegerProperty()
+
+    entity = SomeKind(foo=42)
+    key = entity.put(use_cache=False)
+    dispose_of(key._key)
+    assert len(client_context.cache) == 0
+
+    eventually(lambda: SomeKind.query().fetch(use_cache=True), length_equals(1))
+    assert len(client_context.cache) == 1
+
+
+def test_query_with_use_cache_false_does_not_update_cache(dispose_of, client_context):
+    class SomeKind(ndb.Model):
+        foo = ndb.IntegerProperty()
+
+    entity = SomeKind(foo=42)
+    key = entity.put(use_cache=False)
+    dispose_of(key._key)
+    assert len(client_context.cache) == 0
+
+    eventually(lambda: SomeKind.query().fetch(use_cache=False), length_equals(1))
+    assert len(client_context.cache) == 0
diff --git a/tests/unit/test__datastore_query.py b/tests/unit/test__datastore_query.py
index 4a0de9bc..fc4aca8a 100644
--- a/tests/unit/test__datastore_query.py
+++ b/tests/unit/test__datastore_query.py
@@ -1500,6 +1500,31 @@ def probably_has_next(self):
 
 
 class Test_Result:
+    @staticmethod
+    def test_constructor_defaults():
+        result = _datastore_query._Result(
+            result_type=None,
+            result_pb=query_pb2.EntityResult(),
+        )
+        assert result.order_by is None
+        assert result._query_options is None
+
+    @staticmethod
+    def test_constructor_order_by():
+        order = query_module.PropertyOrder("foo")
+        result = _datastore_query._Result(
+            result_type=None, result_pb=query_pb2.EntityResult(), order_by=[order]
+        )
+        assert result.order_by == [order]
+
+    @staticmethod
+    def test_constructor_query_options():
+        options = query_module.QueryOptions(use_cache=False)
+        result = _datastore_query._Result(
+            result_type=None, result_pb=query_pb2.EntityResult(), query_options=options
+        )
+        assert result._query_options == options
+
     @staticmethod
     def test_total_ordering():
         def result(foo, bar=0, baz=""):
@@ -1660,9 +1685,15 @@ def test_entity_full_entity(model):
             mock.Mock(entity=entity_pb, cursor=b"123", spec=("entity", "cursor")),
         )
 
+        context = context_module.get_context()
+
+        assert len(context.cache) == 0
         assert result.entity() is entity
         model._entity_from_protobuf.assert_called_once_with(entity_pb)
 
+        # Regression test for #752: ensure cache is updated after querying
+        assert len(context.cache) == 1
+
     @staticmethod
     @pytest.mark.usefixtures("in_context")
     @mock.patch("google.cloud.ndb._datastore_query.model")
@@ -1703,6 +1734,57 @@ def test_entity_full_entity_no_cache(model):
             )
             assert result.entity() is entity
 
+            # Regression test for #752: ensure cache does not grow (i.e. use up memory)
+            assert len(context.cache) == 0
+
+    @staticmethod
+    @pytest.mark.usefixtures("in_context")
+    @mock.patch("google.cloud.ndb._datastore_query.model")
+    def test_entity_full_entity_no_cache_via_cache_options(model):
+        context = context_module.get_context()
+        with context.new().use():
+            key_pb = entity_pb2.Key(
+                partition_id=entity_pb2.PartitionId(project_id="testing"),
+                path=[entity_pb2.Key.PathElement(kind="ThisKind", id=42)],
+            )
+            entity = mock.Mock(key=key_pb)
+            model._entity_from_protobuf.return_value = entity
+            result = _datastore_query._Result(
+                _datastore_query.RESULT_TYPE_FULL,
+                mock.Mock(entity=entity, cursor=b"123", spec=("entity", "cursor")),
+                query_options=query_module.QueryOptions(use_cache=False),
+            )
+            assert result.entity() is entity
+
+            # Regression test for #752: ensure cache does not grow (i.e. use up memory)
+            assert len(context.cache) == 0
+
+    @staticmethod
+    @pytest.mark.usefixtures("in_context")
+    @mock.patch("google.cloud.ndb._datastore_query.model")
+    def test_entity_full_entity_cache_options_true(model):
+        key_pb = entity_pb2.Key(
+            partition_id=entity_pb2.PartitionId(project_id="testing"),
+            path=[entity_pb2.Key.PathElement(kind="ThisKind", id=42)],
+        )
+        entity_pb = mock.Mock(key=key_pb)
+        entity = mock.Mock(key=key_module.Key("ThisKind", 42))
+        model._entity_from_protobuf.return_value = entity
+        result = _datastore_query._Result(
+            _datastore_query.RESULT_TYPE_FULL,
+            mock.Mock(entity=entity_pb, cursor=b"123", spec=("entity", "cursor")),
+            query_options=query_module.QueryOptions(use_cache=True),
+        )
+
+        context = context_module.get_context()
+
+        assert len(context.cache) == 0
+        assert result.entity() is entity
+        model._entity_from_protobuf.assert_called_once_with(entity_pb)
+
+        # Regression test for #752: ensure cache is updated after querying
+        assert len(context.cache) == 1
+
     @staticmethod
     @pytest.mark.usefixtures("in_context")
     def test_entity_key_only():