diff --git a/Lib/test/test_marshal.py b/Lib/test/test_marshal.py index 28f24d0fc59cb0..f6962f1e07053a 100644 --- a/Lib/test/test_marshal.py +++ b/Lib/test/test_marshal.py @@ -317,6 +317,123 @@ def test_recursion_limit(self): last.append([0]) self.assertRaises(ValueError, marshal.dumps, head) + def test_reference_loop_list(self): + a = [] + a.append(a) + for v in range(3): + self.assertRaises(ValueError, marshal.dumps, a, v) + for v in range(3, marshal.version + 1): + d = marshal.dumps(a, v) + b = marshal.loads(d) + self.assertIsInstance(b, list) + self.assertIs(b[0], b) + + def test_reference_loop_dict(self): + a = {} + a[None] = a + for v in range(3): + self.assertRaises(ValueError, marshal.dumps, a, v) + for v in range(3, marshal.version + 1): + d = marshal.dumps(a, v) + b = marshal.loads(d) + self.assertIsInstance(b, dict) + self.assertIs(b[None], b) + + def test_reference_loop_tuple(self): + a = ([],) + a[0].append(a) + for v in range(3): + self.assertRaises(ValueError, marshal.dumps, a, v) + for v in range(3, marshal.version + 1): + d = marshal.dumps(a, v) + b = marshal.loads(d) + self.assertIsInstance(b, tuple) + self.assertIsInstance(b[0], list) + self.assertIs(b[0][0], b) + + def test_reference_loop_code(self): + def f(): + return 1234.5 + code = f.__code__ + a = [] + code = code.replace(co_consts=code.co_consts + (a,)) + a.append(code) + for v in range(marshal.version + 1): + self.assertRaises(ValueError, marshal.dumps, code, v) + + def test_reference_loop_slice(self): + a = slice([], None) + a.start.append(a) + for v in range(marshal.version + 1): + self.assertRaises(ValueError, marshal.dumps, a, v) + + a = slice(None, []) + a.stop.append(a) + for v in range(marshal.version + 1): + self.assertRaises(ValueError, marshal.dumps, a, v) + + a = slice(None, None, []) + a.step.append(a) + for v in range(marshal.version + 1): + self.assertRaises(ValueError, marshal.dumps, a, v) + + def test_loads_reference_loop_list(self): + data = b'\xdb\x01\x00\x00\x00r\x00\x00\x00\x00' # [] + a = marshal.loads(data) + self.assertIsInstance(a, list) + self.assertIs(a[0], a) + + def test_loads_reference_loop_dict(self): + data = b'\xfbNr\x00\x00\x00\x000' # {None: } + a = marshal.loads(data) + self.assertIsInstance(a, dict) + self.assertIs(a[None], a) + + def test_loads_abnormal_reference_loops(self): + # Indirect self-references of tuples. + data = b'\xa8\x01\x00\x00\x00[\x01\x00\x00\x00r\x00\x00\x00\x00' # ([],) + a = marshal.loads(data) + self.assertIsInstance(a, tuple) + self.assertIsInstance(a[0], list) + self.assertIs(a[0][0], a) + + data = b'\xa8\x01\x00\x00\x00{Nr\x00\x00\x00\x000' # ({None: },) + a = marshal.loads(data) + self.assertIsInstance(a, tuple) + self.assertIsInstance(a[0], dict) + self.assertIs(a[0][None], a) + + # Direct self-reference which cannot be created in Python. + data = b'\xa8\x01\x00\x00\x00r\x00\x00\x00\x00' # (,) + a = marshal.loads(data) + self.assertIsInstance(a, tuple) + self.assertIs(a[0], a) + + # Direct self-references which cannot be created in Python + # because of unhashability. + data = b'\xfbr\x00\x00\x00\x00N0' # {: None} + self.assertRaises(TypeError, marshal.loads, data) + data = b'\xbc\x01\x00\x00\x00r\x00\x00\x00\x00' # {} + self.assertRaises(TypeError, marshal.loads, data) + + for data in [ + # Indirect self-references of immutable objects. + b'\xba[\x01\x00\x00\x00r\x00\x00\x00\x00NN', # slice([], None) + b'\xbaN[\x01\x00\x00\x00r\x00\x00\x00\x00N', # slice(None, []) + b'\xbaNN[\x01\x00\x00\x00r\x00\x00\x00\x00', # slice(None, None, []) + b'\xba{Nr\x00\x00\x00\x000NN', # slice({None: }, None) + b'\xbaN{Nr\x00\x00\x00\x000N', # slice(None, {None: }) + b'\xbaNN{Nr\x00\x00\x00\x000', # slice(None, None, {None: }) + + # Direct self-references which cannot be created in Python. + b'\xbe\x01\x00\x00\x00r\x00\x00\x00\x00', # frozenset({}) + b'\xbar\x00\x00\x00\x00NN', # slice(, None) + b'\xbaNr\x00\x00\x00\x00N', # slice(None, ) + b'\xbaNNr\x00\x00\x00\x00', # slice(None, None, ) + ]: + with self.subTest(data=data): + self.assertRaises(ValueError, marshal.loads, data) + def test_exact_type_match(self): # Former bug: # >>> class Int(int): pass diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-04-17-20-37-02.gh-issue-148653.nbbHMh.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-17-20-37-02.gh-issue-148653.nbbHMh.rst new file mode 100644 index 00000000000000..0f8bdd1ff7ee2a --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-17-20-37-02.gh-issue-148653.nbbHMh.rst @@ -0,0 +1,2 @@ +Forbid :mod:`marshalling ` recursive code objects and :class:`slice` +objects which cannot be correctly unmarshalled. diff --git a/Python/marshal.c b/Python/marshal.c index 507e882e951a4e..ade5c2e2fbcc70 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -380,7 +380,6 @@ static int w_ref(PyObject *v, char *flag, WFILE *p) { _Py_hashtable_entry_t *entry; - int w; if (p->version < 3 || p->hashtable == NULL) return 0; /* not writing object references */ @@ -397,20 +396,28 @@ w_ref(PyObject *v, char *flag, WFILE *p) entry = _Py_hashtable_get_entry(p->hashtable, v); if (entry != NULL) { /* write the reference index to the stream */ - w = (int)(uintptr_t)entry->value; + uintptr_t w = (uintptr_t)entry->value; + if (w & 0x80000000LU) { + PyErr_Format(PyExc_ValueError, "cannot marshal recursion %T objects", v); + goto err; + } /* we don't store "long" indices in the dict */ - assert(0 <= w && w <= 0x7fffffff); + assert(w <= 0x7fffffff); w_byte(TYPE_REF, p); - w_long(w, p); + w_long((int)w, p); return 1; } else { - size_t s = p->hashtable->nentries; + size_t w = p->hashtable->nentries; /* we don't support long indices */ - if (s >= 0x7fffffff) { + if (w >= 0x7fffffff) { PyErr_SetString(PyExc_ValueError, "too many objects"); goto err; } - w = (int)s; + // Corresponding code should call w_complete() after + // writing the object. + if (PyCode_Check(v) || PySlice_Check(v)) { + w |= 0x80000000LU; + } if (_Py_hashtable_set(p->hashtable, Py_NewRef(v), (void *)(uintptr_t)w) < 0) { Py_DECREF(v); @@ -424,6 +431,27 @@ w_ref(PyObject *v, char *flag, WFILE *p) return 1; } +static void +w_complete(PyObject *v, WFILE *p) +{ + if (p->version < 3 || p->hashtable == NULL) { + return; + } + if (_PyObject_IsUniquelyReferenced(v)) { + return; + } + + _Py_hashtable_entry_t *entry = _Py_hashtable_get_entry(p->hashtable, v); + if (entry == NULL) { + return; + } + assert(entry != NULL); + uintptr_t w = (uintptr_t)entry->value; + assert(w & 0x80000000LU); + w &= ~0x80000000LU; + entry->value = (void *)(uintptr_t)w; +} + static void w_complex_object(PyObject *v, char flag, WFILE *p); @@ -673,6 +701,7 @@ w_complex_object(PyObject *v, char flag, WFILE *p) w_object(co->co_linetable, p); w_object(co->co_exceptiontable, p); Py_DECREF(co_code); + w_complete(v, p); } else if (PyObject_CheckBuffer(v)) { /* Write unknown bytes-like objects as a bytes object */ @@ -698,6 +727,7 @@ w_complex_object(PyObject *v, char flag, WFILE *p) w_object(slice->start, p); w_object(slice->stop, p); w_object(slice->step, p); + w_complete(v, p); } else { W_TYPE(TYPE_UNKNOWN, p);