Skip to content

Commit

Permalink
Avoid duplicate serialization in DynamoDB BatchWriter (#3504)
Browse files Browse the repository at this point in the history
* perform memory-less deepcopy for dynamodb payloads

* test case for double serialization in dynamodb table resource
  • Loading branch information
jonemo authored Nov 24, 2022
1 parent 47f348d commit 865ba34
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 6 deletions.
5 changes: 5 additions & 0 deletions .changes/next-release/bugfix-dynamodb-70676.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"type": "bugfix",
"category": "dynamodb",
"description": "Fixes duplicate serialization issue in DynamoDB BatchWriter"
}
12 changes: 11 additions & 1 deletion boto3/dynamodb/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,18 @@ def register_high_level_interface(base_classes, **kwargs):
base_classes.insert(0, DynamoDBHighLevelResource)


class _ForgetfulDict(dict):
"""A dictionary that discards any items set on it. For use as `memo` in
`copy.deepcopy()` when every instance of a repeated object in the deepcopied
data structure should result in a separate copy.
"""

def __setitem__(self, key, value):
pass


def copy_dynamodb_params(params, **kwargs):
return copy.deepcopy(params)
return copy.deepcopy(params, memo=_ForgetfulDict())


class DynamoDBHighLevelResource:
Expand Down
85 changes: 80 additions & 5 deletions tests/functional/dynamodb/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
import json

from botocore.stub import Stubber

import boto3
Expand All @@ -22,20 +24,93 @@ class TestTableResourceCustomizations(unittest.TestCase):

def setUp(self):
self.resource = boto3.resource('dynamodb', 'us-east-1')
self.table = self.resource.Table('mytable')

def test_resource_has_batch_writer_added(self):
table = self.resource.Table('mytable')
assert hasattr(table, 'batch_writer')
assert hasattr(self.table, 'batch_writer')

def test_operation_without_output(self):
table = self.resource.Table('mytable')
stubber = Stubber(table.meta.client)
stubber = Stubber(self.table.meta.client)
stubber.add_response('tag_resource', {})
arn = 'arn:aws:dynamodb:us-west-2:123456789:table/mytable'

with stubber:
table.meta.client.tag_resource(
self.table.meta.client.tag_resource(
ResourceArn=arn, Tags=[{'Key': 'project', 'Value': 'val'}]
)

stubber.assert_no_pending_responses()

def test_batch_write_does_not_double_serialize(self):
# If multiple items reference the same Python object, the
# object does not get double-serialized.
# https://github.com/boto/boto3/issues/3474

used_twice = {'pkey': 'foo1', 'otherfield': {'foo': 1, 'bar': 2}}
batch_writer = self.table.batch_writer()

# The default Stubber compares the request payload to the
# "expected_params" before automatic serialization happens. This custom
# event handler uses the same technique as the Stubber to record the
# serialized request body, but later in the request lifecycle.
class LateStubber:
def __init__(self, client):
self.intercepted_request_body = None
client.meta.events.register_first(
'before-call.*.*',
self.late_request_interceptor,
)

def late_request_interceptor(self, event_name, params, **kwargs):
if self.intercepted_request_body is not None:
raise AssertionError(
'LateStubber was called more than once, but only one '
'request is expected'
)
body_str = params.get('body', b'').decode('utf-8')
try:
self.intercepted_request_body = json.loads(body_str)
except Exception:
raise AssertionError(
'Expected JSON request body, but failed to JSON decode'
)

late_stubber = LateStubber(self.table.meta.client)

with Stubber(self.table.meta.client) as stubber:
stubber.add_response(
'batch_write_item',
service_response={'UnprocessedItems': {}},
)
batch_writer.put_item(Item=used_twice)
batch_writer.put_item(Item=used_twice)
batch_writer._flush()

expected_request_body = {
'RequestItems': {
'mytable': [
{
'PutRequest': {
'Item': {
'pkey': {'S': 'foo1'},
'otherfield': {
'M': {'foo': {'N': '1'}, 'bar': {'N': '2'}}
},
}
}
},
{
'PutRequest': {
'Item': {
'pkey': {'S': 'foo1'},
'otherfield': {
'M': {'foo': {'N': '1'}, 'bar': {'N': '2'}}
},
}
}
},
]
}
}

assert late_stubber.intercepted_request_body == expected_request_body

0 comments on commit 865ba34

Please sign in to comment.