Skip to content

Commit 4197762

Browse files
[3.11] gh-145986: Avoid unbound C recursion in conv_content_model in pyexpat.c (CVE 2026-4224) (GH-145987)
Fix C stack overflow (CVE-2026-4224) when an Expat parser with a registered `ElementDeclHandler` parses inline DTD containing deeply nested content model. --------- (cherry picked from commit eb0e8be) (cherry picked from commit e5caf45) Co-authored-by: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>
1 parent 86a67f8 commit 4197762

File tree

3 files changed

+30
-1
lines changed

3 files changed

+30
-1
lines changed

Lib/test/test_pyexpat.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -675,6 +675,24 @@ def test_change_size_2(self):
675675
parser.Parse(xml2, True)
676676
self.assertEqual(self.n, 4)
677677

678+
class ElementDeclHandlerTest(unittest.TestCase):
679+
def test_deeply_nested_content_model(self):
680+
# This should raise a RecursionError and not crash.
681+
# See https://github.com/python/cpython/issues/145986.
682+
N = 500_000
683+
data = (
684+
b'<!DOCTYPE root [\n<!ELEMENT root '
685+
+ b'(a, ' * N + b'a' + b')' * N
686+
+ b'>\n]>\n<root/>\n'
687+
)
688+
689+
parser = expat.ParserCreate()
690+
parser.ElementDeclHandler = lambda _1, _2: None
691+
with support.infinite_recursion():
692+
with self.assertRaises(RecursionError):
693+
parser.Parse(data)
694+
695+
678696
class MalformedInputTest(unittest.TestCase):
679697
def test1(self):
680698
xml = b"\0\r\n"
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
:mod:`xml.parsers.expat`: Fixed a crash caused by unbounded C recursion when
2+
converting deeply nested XML content models with
3+
:meth:`~xml.parsers.expat.xmlparser.ElementDeclHandler`.
4+
This addresses :cve:`2026-4224`.

Modules/pyexpat.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#endif
44

55
#include "Python.h"
6+
#include "pycore_ceval.h" // _Py_EnterRecursiveCall()
67
#include "pycore_runtime.h" // _Py_ID()
78
#include <ctype.h>
89

@@ -578,6 +579,10 @@ static PyObject *
578579
conv_content_model(XML_Content * const model,
579580
PyObject *(*conv_string)(const XML_Char *))
580581
{
582+
if (_Py_EnterRecursiveCall(" in conv_content_model")) {
583+
return NULL;
584+
}
585+
581586
PyObject *result = NULL;
582587
PyObject *children = PyTuple_New(model->numchildren);
583588
int i;
@@ -589,14 +594,16 @@ conv_content_model(XML_Content * const model,
589594
conv_string);
590595
if (child == NULL) {
591596
Py_XDECREF(children);
592-
return NULL;
597+
goto done;
593598
}
594599
PyTuple_SET_ITEM(children, i, child);
595600
}
596601
result = Py_BuildValue("(iiO&N)",
597602
model->type, model->quant,
598603
conv_string,model->name, children);
599604
}
605+
done:
606+
_Py_LeaveRecursiveCall();
600607
return result;
601608
}
602609

0 commit comments

Comments
 (0)