1212use SimpleSAML \XPath \XPath ;
1313
1414use function file_get_contents ;
15- use function func_num_args ;
15+ use function restore_error_handler ;
16+ use function set_error_handler ;
1617use function sprintf ;
1718use function strpos ;
1819
2223final class DOMDocumentFactory
2324{
2425 /**
26+ * Base libxml options used when parsing XML.
27+ *
28+ * Note: We add LIBXML_NO_XXE automatically when available (libxml >= 2.13.0).
29+ *
2530 * @var non-negative-int
26- * TODO: Add LIBXML_NO_XXE to the defaults when libxml 2.13.0 become generally available
2731 */
28- public const int DEFAULT_OPTIONS = \LIBXML_COMPACT | \LIBXML_NOENT | \LIBXML_NONET | \LIBXML_NSCLEAN ;
32+ public const int DEFAULT_OPTIONS_BASE = \LIBXML_COMPACT | \LIBXML_NOENT | \LIBXML_NONET | \LIBXML_NSCLEAN ;
2933
3034
3135 /**
32- * @param string $xml
33- * @param non-negative-int $options
36+ * @return non-negative-int
37+ */
38+ public static function getDefaultOptions (): int
39+ {
40+ $ options = self ::DEFAULT_OPTIONS_BASE ;
41+
42+ // Add LIBXML_NO_XXE to the defaults when available (libxml >= 2.13.0)
43+ if (defined ('LIBXML_NO_XXE ' )) {
44+ $ options |= \LIBXML_NO_XXE ;
45+ }
46+
47+ return $ options ;
48+ }
49+
50+
51+ /**
52+ * Create a DOM XML document from an XML string.
53+ *
54+ * The input is validated to reject potentially dangerous constructs (e.g. DOCTYPE).
55+ * Parser warnings/notices are converted into {@see \DOMException}.
56+ *
57+ * @param non-empty-string $xml XML document as a string.
58+ * @param non-negative-int|null $options Libxml parser options. If {@see null}, default options will be used
59+ * (including {@see \LIBXML_NO_XXE} when available).
60+ *
61+ * @return \Dom\XMLDocument
62+ *
63+ * @throws \SimpleSAML\Assert\AssertionFailedException If $xml is empty/whitespace-only or contains a DOCTYPE.
64+ * @throws \SimpleSAML\XML\Exception\RuntimeException If dangerous XML is detected (DOCTYPE is not allowed).
65+ * @throws \DOMException If parsing emits warnings/notices or fails.
3466 */
3567 public static function fromString (
3668 string $ xml ,
37- int $ options = self :: DEFAULT_OPTIONS ,
69+ ? int $ options = null ,
3870 ): Dom \XMLDocument {
3971 Assert::notWhitespaceOnly ($ xml );
4072 Assert::notRegex (
@@ -44,13 +76,25 @@ public static function fromString(
4476 RuntimeException::class,
4577 );
4678
47- // If LIBXML_NO_XXE is available and option not set
48- if (func_num_args () === 1 && defined ('LIBXML_NO_XXE ' )) {
49- $ options |= \LIBXML_NO_XXE ;
50- }
79+ $ options = $ options ?? self ::getDefaultOptions ();
5180
5281 $ domDocument = self ::create ();
53- $ loaded = $ domDocument ->createFromString ($ xml , $ options );
82+
83+ // Convert parser warnings/notices into DOMException to avoid PHP warnings leaking into test output
84+ set_error_handler (
85+ /**
86+ * @throws \DOMException
87+ */
88+ static function (int $ severity , string $ message ): never {
89+ throw new \DOMException ($ message );
90+ },
91+ );
92+
93+ try {
94+ $ loaded = $ domDocument ->createFromString ($ xml , $ options );
95+ } finally {
96+ restore_error_handler ();
97+ }
5498
5599 foreach ($ domDocument ->childNodes as $ child ) {
56100 Assert::false (
@@ -65,12 +109,25 @@ public static function fromString(
65109
66110
67111 /**
68- * @param string $file
69- * @param non-negative-int $options
112+ * Create a DOM XML document from an XML file.
113+ *
114+ * The file is read into a string and then parsed using {@see self::fromString()}.
115+ *
116+ * @param non-empty-string $file Path to the XML file.
117+ * @param non-negative-int|null $options Libxml parser options. If {@see null}, default options will be used
118+ * (including {@see \LIBXML_NO_XXE} when available).
119+ *
120+ * @return \Dom\XMLDocument
121+ *
122+ * @throws \SimpleSAML\XML\Exception\IOException If the file cannot be read.
123+ * @throws \SimpleSAML\Assert\AssertionFailedException If the file content is empty/whitespace-only
124+ * or contains a DOCTYPE.
125+ * @throws \SimpleSAML\XML\Exception\RuntimeException If dangerous XML is detected (DOCTYPE is not allowed).
126+ * @throws \DOMException If parsing emits warnings/notices or fails.
70127 */
71128 public static function fromFile (
72129 string $ file ,
73- int $ options = self :: DEFAULT_OPTIONS ,
130+ ? int $ options = null ,
74131 ): Dom \XMLDocument {
75132 error_clear_last ();
76133 $ xml = @file_get_contents ($ file );
@@ -82,7 +139,8 @@ public static function fromFile(
82139 }
83140
84141 Assert::notWhitespaceOnly ($ xml , sprintf ('File "%s" does not have content ' , $ file ), RuntimeException::class);
85- return (func_num_args () < 2 ) ? static ::fromString ($ xml ) : static ::fromString ($ xml , $ options );
142+
143+ return static ::fromString ($ xml , $ options );
86144 }
87145
88146
@@ -96,18 +154,31 @@ public static function create(string $encoding = 'UTF-8'): Dom\XMLDocument
96154
97155
98156 /**
99- * @param \Dom\XMLDocument $doc
157+ * Normalize namespace declarations in an XML document.
158+ *
159+ * This method collects namespace declarations required by prefixed elements and moves the corresponding
160+ * {@code xmlns:prefix} declarations to the document root, removing {@code xmlns} / {@code xmlns:*} attributes
161+ * from descendant elements.
162+ *
163+ * Note: this mutates the provided document and is not a substitute for XML canonicalization (C14N).
164+ *
165+ * @param \Dom\XMLDocument $doc The XML document to normalize.
166+ *
167+ * @return \Dom\XMLDocument The same document instance, potentially modified. If the document has no root element
168+ * or no namespace declarations to normalize, it is returned unchanged.
100169 */
101170 public static function normalizeDocument (Dom \XMLDocument $ doc ): Dom \XMLDocument
102171 {
103172 // Get the root element
104173 $ root = $ doc ->documentElement ;
174+ if ($ root === null ) {
175+ return $ doc ;
176+ }
105177
106- // Collect all xmlns attributes from the document
107178 $ xpath = XPath::getXPath ($ doc );
108179 $ xmlnsAttributes = [];
109180
110- // Register all namespaces to ensure XPath can handle them
181+ // Collect namespace declarations needed for prefixed elements in the document
111182 foreach ($ xpath ->query ('//*[namespace::*] ' ) as $ node ) {
112183 if ($ node instanceof Dom \Element) {
113184 $ name = 'xmlns: ' . $ node ->prefix ;
@@ -123,40 +194,45 @@ public static function normalizeDocument(Dom\XMLDocument $doc): Dom\XMLDocument
123194 return $ doc ;
124195 }
125196
126- // Remove xmlns attributes from all elements
127- $ nodes = $ xpath ->query ('//*[namespace::*] ' );
128- foreach ($ nodes as $ node ) {
129- if ($ node instanceof Dom \Element) {
130- $ attributesToRemove = [];
131- foreach ($ node ->attributes as $ attr ) {
132- if (strpos ($ attr ->nodeName , 'xmlns ' ) === 0 || $ attr ->nodeName === 'xmlns ' ) {
133- $ attributesToRemove [] = $ attr ->namespaceURI ;
134- }
197+ // Remove xmlns attributes from all elements (proper XMLNS namespace removal)
198+ foreach ($ xpath ->query ('//*[namespace::*] ' ) as $ node ) {
199+ if (!$ node instanceof Dom \Element) {
200+ continue ;
201+ }
202+
203+ foreach ($ node ->attributes as $ attr ) {
204+ if ($ attr ->namespaceURI === C::NS_XMLNS ) {
205+ $ node ->removeAttributeNS (C::NS_XMLNS , $ attr ->localName );
206+ continue ;
135207 }
136208
137- foreach ($ attributesToRemove as $ attrName ) {
138- $ node ->removeAttribute ($ attrName );
209+ if (strpos ($ attr ->nodeName , 'xmlns ' ) === 0 || $ attr ->nodeName === 'xmlns ' ) {
210+ // Fallback for implementations that still expose xmlns attrs without namespaceURI
211+ $ node ->removeAttribute ($ attr ->nodeName );
139212 }
140213 }
141214 }
142215
143216 // Add all collected xmlns attributes to the root element
144217 foreach ($ xmlnsAttributes as $ name => $ value ) {
145- $ root ->setAttribute ( $ name , $ value );
218+ $ root ->setAttributeNS (C:: NS_XMLNS , $ name , $ value );
146219 }
147220
148- // Get the normalized string
149- /** @var \Dom\XMLDocument $ownerDocument */
150- $ ownerDocument = $ root ->ownerDocument ;
151-
152- // Return the normalized XML
153- return static ::fromString ($ ownerDocument ->saveXml ($ ownerDocument ->documentElement ));
221+ return $ doc ;
154222 }
155223
156224
157225 /**
158- * @param \Dom\Element $elt
159- * @param string|null $prefix
226+ * Resolve a namespace URI for a given prefix in the context of an element.
227+ *
228+ * The reserved prefixes {@code xml} and {@code xmlns} are mapped to their well-known namespace URIs.
229+ * For all other prefixes, this method inspects the in-scope namespaces of the document element.
230+ *
231+ * @param \Dom\Element $elt An element belonging to the document whose in-scope namespaces will be consulted.
232+ * @param string|null $prefix The namespace prefix to resolve. Use {@see null} to resolve the default namespace.
233+ *
234+ * @return string|null The namespace URI associated with the given prefix, or {@see null}
235+ * if the prefix is not bound.
160236 */
161237 public static function lookupNamespaceURI (Dom \Element $ elt , ?string $ prefix ): ?string
162238 {
@@ -167,11 +243,9 @@ public static function lookupNamespaceURI(Dom\Element $elt, ?string $prefix): ?s
167243 return C::NS_XMLNS ;
168244 }
169245
170-
171246 /** @var \Dom\NamespaceInfo[] $namespaces */
172247 $ namespaces = $ elt ->ownerDocument ->documentElement ->getInScopeNamespaces ();
173248
174- $ xmlnsAttributes = [];
175249 foreach ($ namespaces as $ ns ) {
176250 if ($ ns ->prefix === $ prefix ) {
177251 return $ ns ->namespaceURI ;
0 commit comments