|
| 1 | +From 034dfbac902baa560423f1268dedf74e6730573a Mon Sep 17 00:00:00 2001 |
| 2 | +From: AllSpark <allspark@microsoft.com> |
| 3 | +Date: Wed, 29 Apr 2026 09:37:00 +0000 |
| 4 | +Subject: [PATCH] LP#2146291: Set resolve_entities='internal' as default for |
| 5 | + parser subclasses; update iterparse signature and docs accordingly. |
| 6 | + |
| 7 | +Signed-off-by: Azure Linux Security Servicing Account <azurelinux-security@microsoft.com> |
| 8 | +Upstream-reference: AI Backport of https://github.com/lxml/lxml/commit/ab431ea0b9a7357d968f1d1c5c614649e9aaf358.patch |
| 9 | +--- |
| 10 | + src/lxml/iterparse.pxi | 11 +++++++---- |
| 11 | + src/lxml/parser.pxi | 10 +++++----- |
| 12 | + 2 files changed, 12 insertions(+), 9 deletions(-) |
| 13 | + |
| 14 | +diff --git a/src/lxml/iterparse.pxi b/src/lxml/iterparse.pxi |
| 15 | +index a7299da..52d0ea7 100644 |
| 16 | +--- a/src/lxml/iterparse.pxi |
| 17 | ++++ b/src/lxml/iterparse.pxi |
| 18 | +@@ -6,7 +6,8 @@ cdef class iterparse: |
| 19 | + u"""iterparse(self, source, events=("end",), tag=None, \ |
| 20 | + attribute_defaults=False, dtd_validation=False, \ |
| 21 | + load_dtd=False, no_network=True, remove_blank_text=False, \ |
| 22 | +- remove_comments=False, remove_pis=False, encoding=None, \ |
| 23 | ++ compact=True, resolve_entities='internal', remove_comments=False, \ |
| 24 | ++ remove_pis=False, strip_cdata=True, encoding=None, \ |
| 25 | + html=False, recover=None, huge_tree=False, schema=None) |
| 26 | + |
| 27 | + Incremental parser. |
| 28 | +@@ -42,9 +43,11 @@ cdef class iterparse: |
| 29 | + - remove_blank_text: discard blank text nodes |
| 30 | + - remove_comments: discard comments |
| 31 | + - remove_pis: discard processing instructions |
| 32 | +- - strip_cdata: replace CDATA sections by normal text content (default: True) |
| 33 | ++ - strip_cdata: replace CDATA sections by normal text content (default: |
| 34 | ++ True for XML, ignored otherwise) |
| 35 | + - compact: safe memory for short text content (default: True) |
| 36 | +- - resolve_entities: replace entities by their text value (default: True) |
| 37 | ++ - resolve_entities: replace entities by their text value |
| 38 | ++ (default: 'internal' only) |
| 39 | + - huge_tree: disable security restrictions and support very deep trees |
| 40 | + and very long text content (only affects libxml2 2.7+) |
| 41 | + - html: parse input as HTML (default: XML) |
| 42 | +@@ -67,7 +70,7 @@ cdef class iterparse: |
| 43 | + def __init__(self, source, events=(u"end",), *, tag=None, |
| 44 | + attribute_defaults=False, dtd_validation=False, |
| 45 | + load_dtd=False, no_network=True, remove_blank_text=False, |
| 46 | +- compact=True, resolve_entities=True, remove_comments=False, |
| 47 | ++ compact=True, resolve_entities='internal', remove_comments=False, |
| 48 | + remove_pis=False, strip_cdata=True, encoding=None, |
| 49 | + html=False, recover=None, huge_tree=False, collect_ids=True, |
| 50 | + XMLSchema schema=None): |
| 51 | +diff --git a/src/lxml/parser.pxi b/src/lxml/parser.pxi |
| 52 | +index 068cdd3..c00c524 100644 |
| 53 | +--- a/src/lxml/parser.pxi |
| 54 | ++++ b/src/lxml/parser.pxi |
| 55 | +@@ -1478,7 +1478,7 @@ _XML_DEFAULT_PARSE_OPTIONS = ( |
| 56 | + ) |
| 57 | + |
| 58 | + cdef class XMLParser(_FeedParser): |
| 59 | +- u"""XMLParser(self, encoding=None, attribute_defaults=False, dtd_validation=False, load_dtd=False, no_network=True, ns_clean=False, recover=False, schema: XMLSchema =None, huge_tree=False, remove_blank_text=False, resolve_entities=True, remove_comments=False, remove_pis=False, strip_cdata=True, collect_ids=True, target=None, compact=True) |
| 60 | ++ u"""XMLParser(self, encoding=None, attribute_defaults=False, dtd_validation=False, load_dtd=False, no_network=True, ns_clean=False, recover=False, schema: XMLSchema =None, huge_tree=False, remove_blank_text=False, resolve_entities='internal', remove_comments=False, remove_pis=False, strip_cdata=True, collect_ids=True, target=None, compact=True) |
| 61 | + |
| 62 | + The XML parser. |
| 63 | + |
| 64 | +@@ -1508,7 +1508,7 @@ cdef class XMLParser(_FeedParser): |
| 65 | + - strip_cdata - replace CDATA sections by normal text content (default: True) |
| 66 | + - compact - save memory for short text content (default: True) |
| 67 | + - collect_ids - use a hash table of XML IDs for fast access (default: True, always True with DTD validation) |
| 68 | +- - resolve_entities - replace entities by their text value (default: True) |
| 69 | ++ - resolve_entities - replace entities by their text value (default: 'internal') |
| 70 | + - huge_tree - disable security restrictions and support very deep trees |
| 71 | + and very long text content (only affects libxml2 2.7+) |
| 72 | + |
| 73 | +@@ -1525,7 +1525,7 @@ cdef class XMLParser(_FeedParser): |
| 74 | + def __init__(self, *, encoding=None, attribute_defaults=False, |
| 75 | + dtd_validation=False, load_dtd=False, no_network=True, |
| 76 | + ns_clean=False, recover=False, XMLSchema schema=None, |
| 77 | +- huge_tree=False, remove_blank_text=False, resolve_entities=True, |
| 78 | ++ huge_tree=False, remove_blank_text=False, resolve_entities='internal', |
| 79 | + remove_comments=False, remove_pis=False, strip_cdata=True, |
| 80 | + collect_ids=True, target=None, compact=True): |
| 81 | + cdef int parse_options |
| 82 | +@@ -1594,7 +1594,7 @@ cdef class ETCompatXMLParser(XMLParser): |
| 83 | + u"""ETCompatXMLParser(self, encoding=None, attribute_defaults=False, \ |
| 84 | + dtd_validation=False, load_dtd=False, no_network=True, \ |
| 85 | + ns_clean=False, recover=False, schema=None, \ |
| 86 | +- huge_tree=False, remove_blank_text=False, resolve_entities=True, \ |
| 87 | ++ huge_tree=False, remove_blank_text=False, resolve_entities='internal', \ |
| 88 | + remove_comments=True, remove_pis=True, strip_cdata=True, \ |
| 89 | + target=None, compact=True) |
| 90 | + |
| 91 | +@@ -1608,7 +1608,7 @@ cdef class ETCompatXMLParser(XMLParser): |
| 92 | + def __init__(self, *, encoding=None, attribute_defaults=False, |
| 93 | + dtd_validation=False, load_dtd=False, no_network=True, |
| 94 | + ns_clean=False, recover=False, schema=None, |
| 95 | +- huge_tree=False, remove_blank_text=False, resolve_entities=True, |
| 96 | ++ huge_tree=False, remove_blank_text=False, resolve_entities='internal', |
| 97 | + remove_comments=True, remove_pis=True, strip_cdata=True, |
| 98 | + target=None, compact=True): |
| 99 | + XMLParser.__init__(self, |
| 100 | +-- |
| 101 | +2.45.4 |
| 102 | + |
0 commit comments