[3.13] gh-148653: Fix some marshal errors related to recursive code objects (GH-148698) (GH-148711) (GH-148713)

(cherry picked from commit d496c637a3)

Forbid marshalling recursive code objects which
cannot be correctly unmarshalled.
Add multiple tests for recursive data structures.
(cherry picked from commit 2e37d83641)
This commit is contained in:
Serhiy Storchaka
2026-04-18 12:29:38 +03:00
committed by GitHub
parent 53e0725680
commit ac1c1e7ef0
3 changed files with 128 additions and 7 deletions
+90
View File
@@ -310,6 +310,96 @@ class BugsTestCase(unittest.TestCase):
last.append([0])
self.assertRaises(ValueError, marshal.dumps, head)
def test_reference_loop_list(self):
a = []
a.append(a)
for v in range(3):
self.assertRaises(ValueError, marshal.dumps, a, v)
for v in range(3, marshal.version + 1):
d = marshal.dumps(a, v)
b = marshal.loads(d)
self.assertIsInstance(b, list)
self.assertIs(b[0], b)
def test_reference_loop_dict(self):
a = {}
a[None] = a
for v in range(3):
self.assertRaises(ValueError, marshal.dumps, a, v)
for v in range(3, marshal.version + 1):
d = marshal.dumps(a, v)
b = marshal.loads(d)
self.assertIsInstance(b, dict)
self.assertIs(b[None], b)
def test_reference_loop_tuple(self):
a = ([],)
a[0].append(a)
for v in range(3):
self.assertRaises(ValueError, marshal.dumps, a, v)
for v in range(3, marshal.version + 1):
d = marshal.dumps(a, v)
b = marshal.loads(d)
self.assertIsInstance(b, tuple)
self.assertIsInstance(b[0], list)
self.assertIs(b[0][0], b)
def test_reference_loop_code(self):
def f():
return 1234.5
code = f.__code__
a = []
code = code.replace(co_consts=code.co_consts + (a,))
a.append(code)
for v in range(marshal.version + 1):
self.assertRaises(ValueError, marshal.dumps, code, v)
def test_loads_reference_loop_list(self):
data = b'\xdb\x01\x00\x00\x00r\x00\x00\x00\x00' # [<R>]
a = marshal.loads(data)
self.assertIsInstance(a, list)
self.assertIs(a[0], a)
def test_loads_reference_loop_dict(self):
data = b'\xfbNr\x00\x00\x00\x000' # {None: <R>}
a = marshal.loads(data)
self.assertIsInstance(a, dict)
self.assertIs(a[None], a)
def test_loads_abnormal_reference_loops(self):
# Indirect self-references of tuples.
data = b'\xa8\x01\x00\x00\x00[\x01\x00\x00\x00r\x00\x00\x00\x00' # ([<R>],)
a = marshal.loads(data)
self.assertIsInstance(a, tuple)
self.assertIsInstance(a[0], list)
self.assertIs(a[0][0], a)
data = b'\xa8\x01\x00\x00\x00{Nr\x00\x00\x00\x000' # ({None: <R>},)
a = marshal.loads(data)
self.assertIsInstance(a, tuple)
self.assertIsInstance(a[0], dict)
self.assertIs(a[0][None], a)
# Direct self-reference which cannot be created in Python.
data = b'\xa8\x01\x00\x00\x00r\x00\x00\x00\x00' # (<R>,)
a = marshal.loads(data)
self.assertIsInstance(a, tuple)
self.assertIs(a[0], a)
# Direct self-references which cannot be created in Python
# because of unhashability.
data = b'\xfbr\x00\x00\x00\x00N0' # {<R>: None}
self.assertRaises(TypeError, marshal.loads, data)
data = b'\xbc\x01\x00\x00\x00r\x00\x00\x00\x00' # {<R>}
self.assertRaises(TypeError, marshal.loads, data)
for data in [
# Direct self-references which cannot be created in Python.
b'\xbe\x01\x00\x00\x00r\x00\x00\x00\x00', # frozenset({<R>})
]:
with self.subTest(data=data):
self.assertRaises(ValueError, marshal.loads, data)
def test_exact_type_match(self):
# Former bug:
# >>> class Int(int): pass
@@ -0,0 +1,2 @@
Forbid :mod:`marshalling <marshal>` recursive code objects
which cannot be correctly unmarshalled.
+36 -7
View File
@@ -310,7 +310,6 @@ static int
w_ref(PyObject *v, char *flag, WFILE *p)
{
_Py_hashtable_entry_t *entry;
int w;
if (p->version < 3 || p->hashtable == NULL)
return 0; /* not writing object references */
@@ -327,20 +326,28 @@ w_ref(PyObject *v, char *flag, WFILE *p)
entry = _Py_hashtable_get_entry(p->hashtable, v);
if (entry != NULL) {
/* write the reference index to the stream */
w = (int)(uintptr_t)entry->value;
uintptr_t w = (uintptr_t)entry->value;
if (w & 0x80000000LU) {
PyErr_Format(PyExc_ValueError, "cannot marshal recursion %T objects", v);
goto err;
}
/* we don't store "long" indices in the dict */
assert(0 <= w && w <= 0x7fffffff);
assert(w <= 0x7fffffff);
w_byte(TYPE_REF, p);
w_long(w, p);
w_long((int)w, p);
return 1;
} else {
size_t s = p->hashtable->nentries;
size_t w = p->hashtable->nentries;
/* we don't support long indices */
if (s >= 0x7fffffff) {
if (w >= 0x7fffffff) {
PyErr_SetString(PyExc_ValueError, "too many objects");
goto err;
}
w = (int)s;
// Corresponding code should call w_complete() after
// writing the object.
if (PyCode_Check(v)) {
w |= 0x80000000LU;
}
if (_Py_hashtable_set(p->hashtable, Py_NewRef(v),
(void *)(uintptr_t)w) < 0) {
Py_DECREF(v);
@@ -354,6 +361,27 @@ err:
return 1;
}
static void
w_complete(PyObject *v, WFILE *p)
{
if (p->version < 3 || p->hashtable == NULL) {
return;
}
if (Py_REFCNT(v) == 1) {
return;
}
_Py_hashtable_entry_t *entry = _Py_hashtable_get_entry(p->hashtable, v);
if (entry == NULL) {
return;
}
assert(entry != NULL);
uintptr_t w = (uintptr_t)entry->value;
assert(w & 0x80000000LU);
w &= ~0x80000000LU;
entry->value = (void *)(uintptr_t)w;
}
static void
w_complex_object(PyObject *v, char flag, WFILE *p);
@@ -603,6 +631,7 @@ w_complex_object(PyObject *v, char flag, WFILE *p)
w_object(co->co_linetable, p);
w_object(co->co_exceptiontable, p);
Py_DECREF(co_code);
w_complete(v, p);
}
else if (PyObject_CheckBuffer(v)) {
/* Write unknown bytes-like objects as a bytes object */