Skip to content

Commit d496c63

Browse files
[3.14] gh-148653: Fix some marshal errors related to recursive immutable objects (GH-148698) (GH-148711)
Forbid marshalling recursive code and slice objects which cannot be correctly unmarshalled. Add multiple tests for recursive data structures. (cherry picked from commit 2e37d83)
1 parent 67100b3 commit d496c63

File tree

3 files changed

+156
-7
lines changed

3 files changed

+156
-7
lines changed

Lib/test/test_marshal.py

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,123 @@ def test_recursion_limit(self):
317317
last.append([0])
318318
self.assertRaises(ValueError, marshal.dumps, head)
319319

320+
def test_reference_loop_list(self):
321+
a = []
322+
a.append(a)
323+
for v in range(3):
324+
self.assertRaises(ValueError, marshal.dumps, a, v)
325+
for v in range(3, marshal.version + 1):
326+
d = marshal.dumps(a, v)
327+
b = marshal.loads(d)
328+
self.assertIsInstance(b, list)
329+
self.assertIs(b[0], b)
330+
331+
def test_reference_loop_dict(self):
332+
a = {}
333+
a[None] = a
334+
for v in range(3):
335+
self.assertRaises(ValueError, marshal.dumps, a, v)
336+
for v in range(3, marshal.version + 1):
337+
d = marshal.dumps(a, v)
338+
b = marshal.loads(d)
339+
self.assertIsInstance(b, dict)
340+
self.assertIs(b[None], b)
341+
342+
def test_reference_loop_tuple(self):
343+
a = ([],)
344+
a[0].append(a)
345+
for v in range(3):
346+
self.assertRaises(ValueError, marshal.dumps, a, v)
347+
for v in range(3, marshal.version + 1):
348+
d = marshal.dumps(a, v)
349+
b = marshal.loads(d)
350+
self.assertIsInstance(b, tuple)
351+
self.assertIsInstance(b[0], list)
352+
self.assertIs(b[0][0], b)
353+
354+
def test_reference_loop_code(self):
355+
def f():
356+
return 1234.5
357+
code = f.__code__
358+
a = []
359+
code = code.replace(co_consts=code.co_consts + (a,))
360+
a.append(code)
361+
for v in range(marshal.version + 1):
362+
self.assertRaises(ValueError, marshal.dumps, code, v)
363+
364+
def test_reference_loop_slice(self):
365+
a = slice([], None)
366+
a.start.append(a)
367+
for v in range(marshal.version + 1):
368+
self.assertRaises(ValueError, marshal.dumps, a, v)
369+
370+
a = slice(None, [])
371+
a.stop.append(a)
372+
for v in range(marshal.version + 1):
373+
self.assertRaises(ValueError, marshal.dumps, a, v)
374+
375+
a = slice(None, None, [])
376+
a.step.append(a)
377+
for v in range(marshal.version + 1):
378+
self.assertRaises(ValueError, marshal.dumps, a, v)
379+
380+
def test_loads_reference_loop_list(self):
381+
data = b'\xdb\x01\x00\x00\x00r\x00\x00\x00\x00' # [<R>]
382+
a = marshal.loads(data)
383+
self.assertIsInstance(a, list)
384+
self.assertIs(a[0], a)
385+
386+
def test_loads_reference_loop_dict(self):
387+
data = b'\xfbNr\x00\x00\x00\x000' # {None: <R>}
388+
a = marshal.loads(data)
389+
self.assertIsInstance(a, dict)
390+
self.assertIs(a[None], a)
391+
392+
def test_loads_abnormal_reference_loops(self):
393+
# Indirect self-references of tuples.
394+
data = b'\xa8\x01\x00\x00\x00[\x01\x00\x00\x00r\x00\x00\x00\x00' # ([<R>],)
395+
a = marshal.loads(data)
396+
self.assertIsInstance(a, tuple)
397+
self.assertIsInstance(a[0], list)
398+
self.assertIs(a[0][0], a)
399+
400+
data = b'\xa8\x01\x00\x00\x00{Nr\x00\x00\x00\x000' # ({None: <R>},)
401+
a = marshal.loads(data)
402+
self.assertIsInstance(a, tuple)
403+
self.assertIsInstance(a[0], dict)
404+
self.assertIs(a[0][None], a)
405+
406+
# Direct self-reference which cannot be created in Python.
407+
data = b'\xa8\x01\x00\x00\x00r\x00\x00\x00\x00' # (<R>,)
408+
a = marshal.loads(data)
409+
self.assertIsInstance(a, tuple)
410+
self.assertIs(a[0], a)
411+
412+
# Direct self-references which cannot be created in Python
413+
# because of unhashability.
414+
data = b'\xfbr\x00\x00\x00\x00N0' # {<R>: None}
415+
self.assertRaises(TypeError, marshal.loads, data)
416+
data = b'\xbc\x01\x00\x00\x00r\x00\x00\x00\x00' # {<R>}
417+
self.assertRaises(TypeError, marshal.loads, data)
418+
419+
for data in [
420+
# Indirect self-references of immutable objects.
421+
b'\xba[\x01\x00\x00\x00r\x00\x00\x00\x00NN', # slice([<R>], None)
422+
b'\xbaN[\x01\x00\x00\x00r\x00\x00\x00\x00N', # slice(None, [<R>])
423+
b'\xbaNN[\x01\x00\x00\x00r\x00\x00\x00\x00', # slice(None, None, [<R>])
424+
b'\xba{Nr\x00\x00\x00\x000NN', # slice({None: <R>}, None)
425+
b'\xbaN{Nr\x00\x00\x00\x000N', # slice(None, {None: <R>})
426+
b'\xbaNN{Nr\x00\x00\x00\x000', # slice(None, None, {None: <R>})
427+
428+
# Direct self-references which cannot be created in Python.
429+
b'\xbe\x01\x00\x00\x00r\x00\x00\x00\x00', # frozenset({<R>})
430+
b'\xbar\x00\x00\x00\x00NN', # slice(<R>, None)
431+
b'\xbaNr\x00\x00\x00\x00N', # slice(None, <R>)
432+
b'\xbaNNr\x00\x00\x00\x00', # slice(None, None, <R>)
433+
]:
434+
with self.subTest(data=data):
435+
self.assertRaises(ValueError, marshal.loads, data)
436+
320437
def test_exact_type_match(self):
321438
# Former bug:
322439
# >>> class Int(int): pass
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Forbid :mod:`marshalling <marshal>` recursive code objects and :class:`slice`
2+
objects which cannot be correctly unmarshalled.

Python/marshal.c

Lines changed: 37 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -380,7 +380,6 @@ static int
380380
w_ref(PyObject *v, char *flag, WFILE *p)
381381
{
382382
_Py_hashtable_entry_t *entry;
383-
int w;
384383

385384
if (p->version < 3 || p->hashtable == NULL)
386385
return 0; /* not writing object references */
@@ -397,20 +396,28 @@ w_ref(PyObject *v, char *flag, WFILE *p)
397396
entry = _Py_hashtable_get_entry(p->hashtable, v);
398397
if (entry != NULL) {
399398
/* write the reference index to the stream */
400-
w = (int)(uintptr_t)entry->value;
399+
uintptr_t w = (uintptr_t)entry->value;
400+
if (w & 0x80000000LU) {
401+
PyErr_Format(PyExc_ValueError, "cannot marshal recursion %T objects", v);
402+
goto err;
403+
}
401404
/* we don't store "long" indices in the dict */
402-
assert(0 <= w && w <= 0x7fffffff);
405+
assert(w <= 0x7fffffff);
403406
w_byte(TYPE_REF, p);
404-
w_long(w, p);
407+
w_long((int)w, p);
405408
return 1;
406409
} else {
407-
size_t s = p->hashtable->nentries;
410+
size_t w = p->hashtable->nentries;
408411
/* we don't support long indices */
409-
if (s >= 0x7fffffff) {
412+
if (w >= 0x7fffffff) {
410413
PyErr_SetString(PyExc_ValueError, "too many objects");
411414
goto err;
412415
}
413-
w = (int)s;
416+
// Corresponding code should call w_complete() after
417+
// writing the object.
418+
if (PyCode_Check(v) || PySlice_Check(v)) {
419+
w |= 0x80000000LU;
420+
}
414421
if (_Py_hashtable_set(p->hashtable, Py_NewRef(v),
415422
(void *)(uintptr_t)w) < 0) {
416423
Py_DECREF(v);
@@ -424,6 +431,27 @@ w_ref(PyObject *v, char *flag, WFILE *p)
424431
return 1;
425432
}
426433

434+
static void
435+
w_complete(PyObject *v, WFILE *p)
436+
{
437+
if (p->version < 3 || p->hashtable == NULL) {
438+
return;
439+
}
440+
if (_PyObject_IsUniquelyReferenced(v)) {
441+
return;
442+
}
443+
444+
_Py_hashtable_entry_t *entry = _Py_hashtable_get_entry(p->hashtable, v);
445+
if (entry == NULL) {
446+
return;
447+
}
448+
assert(entry != NULL);
449+
uintptr_t w = (uintptr_t)entry->value;
450+
assert(w & 0x80000000LU);
451+
w &= ~0x80000000LU;
452+
entry->value = (void *)(uintptr_t)w;
453+
}
454+
427455
static void
428456
w_complex_object(PyObject *v, char flag, WFILE *p);
429457

@@ -673,6 +701,7 @@ w_complex_object(PyObject *v, char flag, WFILE *p)
673701
w_object(co->co_linetable, p);
674702
w_object(co->co_exceptiontable, p);
675703
Py_DECREF(co_code);
704+
w_complete(v, p);
676705
}
677706
else if (PyObject_CheckBuffer(v)) {
678707
/* Write unknown bytes-like objects as a bytes object */
@@ -698,6 +727,7 @@ w_complex_object(PyObject *v, char flag, WFILE *p)
698727
w_object(slice->start, p);
699728
w_object(slice->stop, p);
700729
w_object(slice->step, p);
730+
w_complete(v, p);
701731
}
702732
else {
703733
W_TYPE(TYPE_UNKNOWN, p);

0 commit comments

Comments
 (0)