Skip to content

Commit 8685e5e

Browse files
authored
Merge pull request #90 from ioigoume/feature/dom-migration-php84-ioigoume
Canonicalize XML assertions and harden DOM parsing/namespace handling
2 parents 2bb60e5 + af10c61 commit 8685e5e

67 files changed

Lines changed: 557 additions & 335 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

src/XML/Container/AbstractTestContainer.php

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,5 +31,9 @@ abstract public function getLanguageValue(string $language = 'en'): LanguageValu
3131
abstract public function getXMLAttribute(int $x = 1): XMLAttribute;
3232

3333

34+
/**
35+
* @param non-empty-string $text
36+
* @return \Dom\NodeList<\Dom\Node>
37+
*/
3438
abstract public function getDOMText(string $text): Dom\NodeList;
3539
}

src/XML/Container/XMLSchemaElementsTrait.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ trait XMLSchemaElementsTrait
2626
/** @var array<positive-int, \SimpleSAML\XMLSchema\XML\Appinfo> */
2727
protected array $appinfo = [];
2828

29-
/** @var array<non-empty-string, \Dom\Text> */
29+
/** @var array<non-empty-string, \Dom\NodeList> */
3030
protected array $domText = [];
3131

3232

src/XML/DOMDocumentFactory.php

Lines changed: 114 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@
1212
use SimpleSAML\XPath\XPath;
1313

1414
use function file_get_contents;
15-
use function func_num_args;
15+
use function restore_error_handler;
16+
use function set_error_handler;
1617
use function sprintf;
1718
use function strpos;
1819

@@ -22,19 +23,50 @@
2223
final class DOMDocumentFactory
2324
{
2425
/**
26+
* Base libxml options used when parsing XML.
27+
*
28+
* Note: We add LIBXML_NO_XXE automatically when available (libxml >= 2.13.0).
29+
*
2530
* @var non-negative-int
26-
* TODO: Add LIBXML_NO_XXE to the defaults when libxml 2.13.0 become generally available
2731
*/
28-
public const int DEFAULT_OPTIONS = \LIBXML_COMPACT | \LIBXML_NOENT | \LIBXML_NONET | \LIBXML_NSCLEAN;
32+
public const int DEFAULT_OPTIONS_BASE = \LIBXML_COMPACT | \LIBXML_NOENT | \LIBXML_NONET | \LIBXML_NSCLEAN;
2933

3034

3135
/**
32-
* @param string $xml
33-
* @param non-negative-int $options
36+
* @return non-negative-int
37+
*/
38+
public static function getDefaultOptions(): int
39+
{
40+
$options = self::DEFAULT_OPTIONS_BASE;
41+
42+
// Add LIBXML_NO_XXE to the defaults when available (libxml >= 2.13.0)
43+
if (defined('LIBXML_NO_XXE')) {
44+
$options |= \LIBXML_NO_XXE;
45+
}
46+
47+
return $options;
48+
}
49+
50+
51+
/**
52+
* Create a DOM XML document from an XML string.
53+
*
54+
* The input is validated to reject potentially dangerous constructs (e.g. DOCTYPE).
55+
* Parser warnings/notices are converted into {@see \DOMException}.
56+
*
57+
* @param non-empty-string $xml XML document as a string.
58+
* @param non-negative-int|null $options Libxml parser options. If {@see null}, default options will be used
59+
* (including {@see \LIBXML_NO_XXE} when available).
60+
*
61+
* @return \Dom\XMLDocument
62+
*
63+
* @throws \SimpleSAML\Assert\AssertionFailedException If $xml is empty/whitespace-only or contains a DOCTYPE.
64+
* @throws \SimpleSAML\XML\Exception\RuntimeException If dangerous XML is detected (DOCTYPE is not allowed).
65+
* @throws \DOMException If parsing emits warnings/notices or fails.
3466
*/
3567
public static function fromString(
3668
string $xml,
37-
int $options = self::DEFAULT_OPTIONS,
69+
?int $options = null,
3870
): Dom\XMLDocument {
3971
Assert::notWhitespaceOnly($xml);
4072
Assert::notRegex(
@@ -44,13 +76,25 @@ public static function fromString(
4476
RuntimeException::class,
4577
);
4678

47-
// If LIBXML_NO_XXE is available and option not set
48-
if (func_num_args() === 1 && defined('LIBXML_NO_XXE')) {
49-
$options |= \LIBXML_NO_XXE;
50-
}
79+
$options = $options ?? self::getDefaultOptions();
5180

5281
$domDocument = self::create();
53-
$loaded = $domDocument->createFromString($xml, $options);
82+
83+
// Convert parser warnings/notices into DOMException to avoid PHP warnings leaking into test output
84+
set_error_handler(
85+
/**
86+
* @throws \DOMException
87+
*/
88+
static function (int $severity, string $message): never {
89+
throw new \DOMException($message);
90+
},
91+
);
92+
93+
try {
94+
$loaded = $domDocument->createFromString($xml, $options);
95+
} finally {
96+
restore_error_handler();
97+
}
5498

5599
foreach ($domDocument->childNodes as $child) {
56100
Assert::false(
@@ -65,12 +109,25 @@ public static function fromString(
65109

66110

67111
/**
68-
* @param string $file
69-
* @param non-negative-int $options
112+
* Create a DOM XML document from an XML file.
113+
*
114+
* The file is read into a string and then parsed using {@see self::fromString()}.
115+
*
116+
* @param non-empty-string $file Path to the XML file.
117+
* @param non-negative-int|null $options Libxml parser options. If {@see null}, default options will be used
118+
* (including {@see \LIBXML_NO_XXE} when available).
119+
*
120+
* @return \Dom\XMLDocument
121+
*
122+
* @throws \SimpleSAML\XML\Exception\IOException If the file cannot be read.
123+
* @throws \SimpleSAML\Assert\AssertionFailedException If the file content is empty/whitespace-only
124+
* or contains a DOCTYPE.
125+
* @throws \SimpleSAML\XML\Exception\RuntimeException If dangerous XML is detected (DOCTYPE is not allowed).
126+
* @throws \DOMException If parsing emits warnings/notices or fails.
70127
*/
71128
public static function fromFile(
72129
string $file,
73-
int $options = self::DEFAULT_OPTIONS,
130+
?int $options = null,
74131
): Dom\XMLDocument {
75132
error_clear_last();
76133
$xml = @file_get_contents($file);
@@ -82,7 +139,8 @@ public static function fromFile(
82139
}
83140

84141
Assert::notWhitespaceOnly($xml, sprintf('File "%s" does not have content', $file), RuntimeException::class);
85-
return (func_num_args() < 2) ? static::fromString($xml) : static::fromString($xml, $options);
142+
143+
return static::fromString($xml, $options);
86144
}
87145

88146

@@ -96,18 +154,31 @@ public static function create(string $encoding = 'UTF-8'): Dom\XMLDocument
96154

97155

98156
/**
99-
* @param \Dom\XMLDocument $doc
157+
* Normalize namespace declarations in an XML document.
158+
*
159+
* This method collects namespace declarations required by prefixed elements and moves the corresponding
160+
* {@code xmlns:prefix} declarations to the document root, removing {@code xmlns} / {@code xmlns:*} attributes
161+
* from descendant elements.
162+
*
163+
* Note: this mutates the provided document and is not a substitute for XML canonicalization (C14N).
164+
*
165+
* @param \Dom\XMLDocument $doc The XML document to normalize.
166+
*
167+
* @return \Dom\XMLDocument The same document instance, potentially modified. If the document has no root element
168+
* or no namespace declarations to normalize, it is returned unchanged.
100169
*/
101170
public static function normalizeDocument(Dom\XMLDocument $doc): Dom\XMLDocument
102171
{
103172
// Get the root element
104173
$root = $doc->documentElement;
174+
if ($root === null) {
175+
return $doc;
176+
}
105177

106-
// Collect all xmlns attributes from the document
107178
$xpath = XPath::getXPath($doc);
108179
$xmlnsAttributes = [];
109180

110-
// Register all namespaces to ensure XPath can handle them
181+
// Collect namespace declarations needed for prefixed elements in the document
111182
foreach ($xpath->query('//*[namespace::*]') as $node) {
112183
if ($node instanceof Dom\Element) {
113184
$name = 'xmlns:' . $node->prefix;
@@ -123,40 +194,45 @@ public static function normalizeDocument(Dom\XMLDocument $doc): Dom\XMLDocument
123194
return $doc;
124195
}
125196

126-
// Remove xmlns attributes from all elements
127-
$nodes = $xpath->query('//*[namespace::*]');
128-
foreach ($nodes as $node) {
129-
if ($node instanceof Dom\Element) {
130-
$attributesToRemove = [];
131-
foreach ($node->attributes as $attr) {
132-
if (strpos($attr->nodeName, 'xmlns') === 0 || $attr->nodeName === 'xmlns') {
133-
$attributesToRemove[] = $attr->namespaceURI;
134-
}
197+
// Remove xmlns attributes from all elements (proper XMLNS namespace removal)
198+
foreach ($xpath->query('//*[namespace::*]') as $node) {
199+
if (!$node instanceof Dom\Element) {
200+
continue;
201+
}
202+
203+
foreach ($node->attributes as $attr) {
204+
if ($attr->namespaceURI === C::NS_XMLNS) {
205+
$node->removeAttributeNS(C::NS_XMLNS, $attr->localName);
206+
continue;
135207
}
136208

137-
foreach ($attributesToRemove as $attrName) {
138-
$node->removeAttribute($attrName);
209+
if (strpos($attr->nodeName, 'xmlns') === 0 || $attr->nodeName === 'xmlns') {
210+
// Fallback for implementations that still expose xmlns attrs without namespaceURI
211+
$node->removeAttribute($attr->nodeName);
139212
}
140213
}
141214
}
142215

143216
// Add all collected xmlns attributes to the root element
144217
foreach ($xmlnsAttributes as $name => $value) {
145-
$root->setAttribute($name, $value);
218+
$root->setAttributeNS(C::NS_XMLNS, $name, $value);
146219
}
147220

148-
// Get the normalized string
149-
/** @var \Dom\XMLDocument $ownerDocument */
150-
$ownerDocument = $root->ownerDocument;
151-
152-
// Return the normalized XML
153-
return static::fromString($ownerDocument->saveXml($ownerDocument->documentElement));
221+
return $doc;
154222
}
155223

156224

157225
/**
158-
* @param \Dom\Element $elt
159-
* @param string|null $prefix
226+
* Resolve a namespace URI for a given prefix in the context of an element.
227+
*
228+
* The reserved prefixes {@code xml} and {@code xmlns} are mapped to their well-known namespace URIs.
229+
* For all other prefixes, this method inspects the in-scope namespaces of the document element.
230+
*
231+
* @param \Dom\Element $elt An element belonging to the document whose in-scope namespaces will be consulted.
232+
* @param string|null $prefix The namespace prefix to resolve. Use {@see null} to resolve the default namespace.
233+
*
234+
* @return string|null The namespace URI associated with the given prefix, or {@see null}
235+
* if the prefix is not bound.
160236
*/
161237
public static function lookupNamespaceURI(Dom\Element $elt, ?string $prefix): ?string
162238
{
@@ -167,11 +243,9 @@ public static function lookupNamespaceURI(Dom\Element $elt, ?string $prefix): ?s
167243
return C::NS_XMLNS;
168244
}
169245

170-
171246
/** @var \Dom\NamespaceInfo[] $namespaces */
172247
$namespaces = $elt->ownerDocument->documentElement->getInScopeNamespaces();
173248

174-
$xmlnsAttributes = [];
175249
foreach ($namespaces as $ns) {
176250
if ($ns->prefix === $prefix) {
177251
return $ns->namespaceURI;

src/XML/ExtendableAttributesTrait.php

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
use Dom;
88
use RuntimeException;
99
use SimpleSAML\XML\Assert\Assert;
10-
use SimpleSAML\XML\Attribute;
1110
use SimpleSAML\XML\Constants as C;
1211
use SimpleSAML\XMLSchema\Exception\InvalidDOMAttributeException;
1312
use SimpleSAML\XMLSchema\Exception\SchemaViolationException;
@@ -109,6 +108,10 @@ protected static function getAttributesNSFromXML(
109108
Assert::oneOf($namespace, NS::$PREDEFINED);
110109

111110
foreach ($xml->attributes as $a) {
111+
if ($a->namespaceURI === C::NS_XMLNS) {
112+
continue;
113+
}
114+
112115
if (
113116
$exclusionList
114117
&& (in_array([$a->namespaceURI, $a->localName], $exclusionList, true)
@@ -148,6 +151,10 @@ protected static function getAttributesNSFromXML(
148151
}
149152

150153
foreach ($xml->attributes as $a) {
154+
if ($a->namespaceURI === C::NS_XMLNS) {
155+
continue;
156+
}
157+
151158
if (in_array([$a->namespaceURI, $a->localName], $exclusionList, true)) {
152159
continue;
153160
} elseif (!in_array($a->namespaceURI, $namespace, true)) {

src/XML/TestUtils/SerializableElementTestTrait.php

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,12 @@
55
namespace SimpleSAML\XML\TestUtils;
66

77
use Dom;
8+
use PHPUnit\Framework\Assert;
89
use PHPUnit\Framework\Attributes\Depends;
10+
use SimpleSAML\XML\DOMDocumentFactory;
911

1012
use function class_exists;
13+
use function strval;
1114

1215
/**
1316
* Test for Serializable XML classes to perform default serialization tests.
@@ -48,7 +51,7 @@ public function testUnmarshalling(): void
4851
} else {
4952
$elt = self::$testedClass::fromXML(self::$xmlRepresentation->documentElement);
5053

51-
$this->assertEquals(
54+
$this->assertXmlStringEquals(
5255
self::$xmlRepresentation->saveXml(self::$xmlRepresentation->documentElement),
5356
strval($elt),
5457
);
@@ -74,10 +77,25 @@ public function testSerialization(): void
7477
. ':$xmlRepresentation to a DOMDocument representing the XML-class being tested',
7578
);
7679
} else {
77-
$this->assertEquals(
80+
$this->assertXmlStringEquals(
7881
self::$xmlRepresentation->saveXml(self::$xmlRepresentation->documentElement),
7982
strval(unserialize(serialize(self::$testedClass::fromXML(self::$xmlRepresentation->documentElement)))),
8083
);
8184
}
8285
}
86+
87+
88+
private function assertXmlStringEquals(string $expectedXml, string $actualXml): void
89+
{
90+
$expectedDoc = DOMDocumentFactory::fromString($expectedXml);
91+
$actualDoc = DOMDocumentFactory::fromString($actualXml);
92+
93+
Assert::assertNotNull($expectedDoc->documentElement);
94+
Assert::assertNotNull($actualDoc->documentElement);
95+
96+
Assert::assertSame(
97+
$expectedDoc->documentElement->C14N(),
98+
$actualDoc->documentElement->C14N(),
99+
);
100+
}
83101
}

src/XPath/XPath.php

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,11 @@ private static function registerAncestorNamespaces(Dom\XPath $xp, Dom\Node $node
125125
$uri = (string) $attr->nodeValue;
126126

127127
if (
128-
$prefix === '' || $prefix === null || $prefix === 'xmlns' || $uri === '' || isset($prefixToUri[$prefix])
128+
$prefix === ''
129+
|| $prefix === null
130+
|| $prefix === 'xmlns'
131+
|| $uri === ''
132+
|| isset($prefixToUri[$prefix])
129133
) {
130134
continue;
131135
}

0 commit comments

Comments
 (0)