gh-148914: Fix memoization of in-band PickleBuffer in the Python implementation (GH-149052)

Previously, identical PickleBuffers did not preserve identity.
Also, empty writable PickleBuffer memoized an empty bytearray object
in place of b'' which is a singleton in CPython, so the following
references to b'' were unpickled as an empty bytearray object.
This commit is contained in:
Serhiy Storchaka
2026-05-02 12:04:05 +03:00
committed by GitHub
parent e635ad2c68
commit b89735625d
3 changed files with 54 additions and 9 deletions
+3 -9
View File
@@ -920,17 +920,11 @@ class _Pickler:
# Write data in-band
# XXX The C implementation avoids a copy here
buf = m.tobytes()
in_memo = id(buf) in self.memo
if m.readonly:
if in_memo:
self._save_bytes_no_memo(buf)
else:
self.save_bytes(buf)
self._save_bytes_no_memo(buf)
else:
if in_memo:
self._save_bytearray_no_memo(buf)
else:
self.save_bytearray(buf)
self._save_bytearray_no_memo(buf)
self.memoize(obj)
else:
# Write data out-of-band
self.write(NEXT_BUFFER)
+45
View File
@@ -3100,6 +3100,51 @@ class AbstractPickleTests:
self.assertIsNot(b2a, b2b)
self.assert_is_copy(b2a, b2b)
def test_picklebuffer_memoization(self):
if self.py_version < (3, 8):
self.skipTest('not supported in Python < 3.8')
array_types = [bytes, bytearray]
for proto in range(5, pickle.HIGHEST_PROTOCOL + 1):
for array_type in array_types:
for s in b'', b'xyz', b'xyz'*100:
with self.subTest(proto=proto, array_type=array_type, s=s, independent=False):
b = pickle.PickleBuffer(array_type(s))
p = self.dumps((b, b), proto)
b1, b2 = self.loads(p)
self.assertIs(b1, b2)
with self.subTest(proto=proto, array_type=array_type, s=s, independent=True):
b = array_type(s)
b1a = pickle.PickleBuffer(b)
b2a = pickle.PickleBuffer(b)
p = self.dumps((b1a, b2a), proto)
b1b, b2b = self.loads(p)
if array_type is not bytes:
self.assertIsNot(b1b, b2b)
self.assert_is_copy(b1b, b)
self.assert_is_copy(b2b, b)
def test_empty_picklebuffer_memoization(self):
# gh-148914: Empty writable PickleBuffer memoized an empty bytearray
# with the id of b'' (a singleton in CPython).
if self.py_version < (3, 8):
self.skipTest('not supported in Python < 3.8')
for proto in range(5, pickle.HIGHEST_PROTOCOL + 1):
for readonly in False, True:
with self.subTest(proto=proto, readonly=readonly):
b = b''
ba = bytearray()
buf = pickle.PickleBuffer(b if readonly else ba)
p = self.dumps((buf, b, ba), proto)
buf, b, ba = self.loads(p)
array_type = bytes if readonly else bytearray
self.assertIsInstance(buf, array_type)
self.assertIsInstance(b, bytes)
self.assertIsInstance(ba, bytearray)
self.assertEqual(buf, b'')
self.assertEqual(b, b'')
self.assertEqual(ba, b'')
def test_ints(self):
for proto in protocols:
n = sys.maxsize
@@ -0,0 +1,6 @@
Fix memoization of in-band :class:`~pickle.PickleBuffer` in the Python
implementation of :mod:`pickle`. Previously, identical
:class:`!PickleBuffer`\ s did not preserve identity, and empty writable
:class:`!PickleBuffer` memoized an empty bytearray object in place of
``b''``, so the following references to ``b''`` were unpickled as an empty
bytearray object.