Skip to content

Commit 40c291c

Browse files
committed
Fix GH-20444: Dom\XMLDocument::C14N() seems broken compared to DOMDocument::C14N()
C14N code expects namespace to be in-tree, but we store namespaces in a different way out-of-tree to avoid reconciliations that break the tree structure in a way unexpected by the DOM spec. In the DOM spec, namespace nodes don't exist; they're regular attributes. To solve this, we temporarily make fake namespace nodes that we later remove. Closes GH-20457.
1 parent 7d4ba80 commit 40c291c

File tree

5 files changed

+204
-12
lines changed

5 files changed

+204
-12
lines changed

NEWS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ PHP NEWS
1818
- DOM:
1919
. Fixed bug GH-20722 (Null pointer dereference in DOM namespace node cloning
2020
via clone on malformed objects). (ndossche)
21+
. Fixed bug GH-20444 (Dom\XMLDocument::C14N() seems broken compared
22+
to DOMDocument::C14N()). (ndossche)
2123

2224
- GD:
2325
. Fixed bug GH-20622 (imagestring/imagestringup overflow). (David Carlier)

ext/dom/node.c

Lines changed: 117 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2081,6 +2081,97 @@ PHP_METHOD(DOMNode, lookupNamespaceURI)
20812081
}
20822082
/* }}} end dom_node_lookup_namespace_uri */
20832083

2084+
static void dom_relink_ns_decls_element(HashTable *links, xmlNodePtr node)
2085+
{
2086+
if (node->type == XML_ELEMENT_NODE) {
2087+
for (xmlAttrPtr attr = node->properties; attr; attr = attr->next) {
2088+
if (php_dom_ns_is_fast((const xmlNode *) attr, php_dom_ns_is_xmlns_magic_token)) {
2089+
xmlNsPtr ns = xmlMalloc(sizeof(*ns));
2090+
if (!ns) {
2091+
return;
2092+
}
2093+
2094+
zval *zv = zend_hash_index_lookup(links, (zend_ulong) node);
2095+
if (Z_ISNULL_P(zv)) {
2096+
ZVAL_LONG(zv, 1);
2097+
} else {
2098+
Z_LVAL_P(zv)++;
2099+
}
2100+
2101+
bool should_free;
2102+
xmlChar *attr_value = php_libxml_attr_value(attr, &should_free);
2103+
2104+
memset(ns, 0, sizeof(*ns));
2105+
ns->type = XML_LOCAL_NAMESPACE;
2106+
ns->href = should_free ? attr_value : xmlStrdup(attr_value);
2107+
ns->prefix = attr->ns->prefix ? xmlStrdup(attr->name) : NULL;
2108+
ns->next = node->nsDef;
2109+
node->nsDef = ns;
2110+
2111+
ns->_private = attr;
2112+
if (attr->prev) {
2113+
attr->prev = attr->next;
2114+
} else {
2115+
node->properties = attr->next;
2116+
}
2117+
if (attr->next) {
2118+
attr->next->prev = attr->prev;
2119+
}
2120+
}
2121+
}
2122+
2123+
/* The default namespace is handled separately from the other namespaces in C14N.
2124+
* The default namespace is explicitly looked up while the other namespaces are
2125+
* deduplicated and compared to a list of visible namespaces. */
2126+
if (node->ns && !node->ns->prefix) {
2127+
/* Workaround for the behaviour where the xmlSearchNs() call inside c14n.c
2128+
* can return the current namespace. */
2129+
zend_hash_index_add_new_ptr(links, (zend_ulong) node | 1, node->ns);
2130+
node->ns = xmlSearchNs(node->doc, node, NULL);
2131+
}
2132+
}
2133+
}
2134+
2135+
static void dom_relink_ns_decls(HashTable *links, xmlNodePtr root)
2136+
{
2137+
dom_relink_ns_decls_element(links, root);
2138+
2139+
xmlNodePtr base = root;
2140+
xmlNodePtr node = base->children;
2141+
while (node != NULL) {
2142+
dom_relink_ns_decls_element(links, node);
2143+
node = php_dom_next_in_tree_order(node, base);
2144+
}
2145+
}
2146+
2147+
static void dom_unlink_ns_decls(HashTable *links)
2148+
{
2149+
ZEND_HASH_MAP_FOREACH_NUM_KEY_VAL(links, zend_ulong h, zval *data) {
2150+
if (h & 1) {
2151+
xmlNodePtr node = (xmlNodePtr) (h ^ 1);
2152+
node->ns = Z_PTR_P(data);
2153+
} else {
2154+
xmlNodePtr node = (xmlNodePtr) h;
2155+
while (Z_LVAL_P(data)-- > 0) {
2156+
xmlNsPtr ns = node->nsDef;
2157+
node->nsDef = ns->next;
2158+
2159+
xmlAttrPtr attr = ns->_private;
2160+
if (attr->prev) {
2161+
attr->prev->next = attr;
2162+
} else {
2163+
node->properties = attr;
2164+
}
2165+
if (attr->next) {
2166+
attr->next->prev = attr;
2167+
}
2168+
2169+
xmlFreeNs(ns);
2170+
}
2171+
}
2172+
} ZEND_HASH_FOREACH_END();
2173+
}
2174+
20842175
static int dom_canonicalize_node_parent_lookup_cb(void *user_data, xmlNodePtr node, xmlNodePtr parent)
20852176
{
20862177
xmlNodePtr root = user_data;
@@ -2136,7 +2227,23 @@ static void dom_canonicalization(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{
21362227

21372228
docp = nodep->doc;
21382229

2139-
if (! docp) {
2230+
HashTable links;
2231+
bool modern = php_dom_follow_spec_node(nodep);
2232+
if (modern) {
2233+
xmlNodePtr root = nodep;
2234+
while (root->parent) {
2235+
root = root->parent;
2236+
}
2237+
2238+
if (UNEXPECTED(root->type != XML_DOCUMENT_NODE && root->type != XML_HTML_DOCUMENT_NODE)) {
2239+
php_dom_throw_error_with_message(HIERARCHY_REQUEST_ERR, "Canonicalization can only happen on nodes attached to a document.", /* strict */ true);
2240+
RETURN_THROWS();
2241+
}
2242+
2243+
zend_hash_init(&links, 0, NULL, NULL, false);
2244+
dom_relink_ns_decls(&links, xmlDocGetRootElement(docp));
2245+
} else if (!docp) {
2246+
/* Note: not triggerable with modern DOM */
21402247
zend_throw_error(NULL, "Node must be associated with a document");
21412248
RETURN_THROWS();
21422249
}
@@ -2158,12 +2265,12 @@ static void dom_canonicalization(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{
21582265
if (!tmp) {
21592266
/* if mode == 0 then $xpath arg is 3, if mode == 1 then $xpath is 4 */
21602267
zend_argument_value_error(3 + mode, "must have a \"query\" key");
2161-
RETURN_THROWS();
2268+
goto clean_links;
21622269
}
21632270
if (Z_TYPE_P(tmp) != IS_STRING) {
21642271
/* if mode == 0 then $xpath arg is 3, if mode == 1 then $xpath is 4 */
21652272
zend_argument_type_error(3 + mode, "\"query\" option must be a string, %s given", zend_zval_value_name(tmp));
2166-
RETURN_THROWS();
2273+
goto clean_links;
21672274
}
21682275
xquery = Z_STRVAL_P(tmp);
21692276

@@ -2195,7 +2302,7 @@ static void dom_canonicalization(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{
21952302
}
21962303
xmlXPathFreeContext(ctxp);
21972304
zend_throw_error(NULL, "XPath query did not return a nodeset");
2198-
RETURN_THROWS();
2305+
goto clean_links;
21992306
}
22002307
}
22012308

@@ -2264,6 +2371,12 @@ static void dom_canonicalization(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{
22642371
RETURN_LONG(bytes);
22652372
}
22662373
}
2374+
2375+
clean_links:
2376+
if (modern) {
2377+
dom_unlink_ns_decls(&links);
2378+
zend_hash_destroy(&links);
2379+
}
22672380
}
22682381
/* }}} */
22692382

ext/dom/tests/canonicalization.phpt

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,32 +21,46 @@ $dom = new DOMDocument();
2121
$dom->loadXML($xml);
2222
$doc = $dom->documentElement->firstChild;
2323

24+
$newDom = Dom\XMLDocument::createFromString($xml);
25+
$newDoc = $newDom->documentElement->firstChild;
26+
$counter = 0;
27+
28+
function check($doc, $newDoc, ...$args) {
29+
global $counter;
30+
$counter++;
31+
echo $doc->C14N(...$args)."\n\n";
32+
if ($doc->C14N(...$args) !== $newDoc->C14N(...$args)) {
33+
var_dump($doc->C14N(...$args), $newDoc->C14N(...$args));
34+
throw new Error("mismatch: $counter");
35+
}
36+
}
37+
2438
/* inclusive/without comments first child element of doc element is context. */
25-
echo $doc->C14N()."\n\n";
39+
check($doc, $newDoc);
2640

2741
/* exclusive/without comments first child element of doc element is context. */
28-
echo $doc->c14N(TRUE)."\n\n";
42+
check($doc, $newDoc, TRUE);
2943

3044
/* inclusive/with comments first child element of doc element is context. */
31-
echo $doc->C14N(FALSE, TRUE)."\n\n";
45+
check($doc, $newDoc, FALSE, TRUE);
3246

3347
/* exclusive/with comments first child element of doc element is context. */
34-
echo $doc->C14N(TRUE, TRUE)."\n\n";
48+
check($doc, $newDoc, TRUE, TRUE);
3549

3650
/* exclusive/without comments using xpath query. */
37-
echo $doc->c14N(TRUE, FALSE, array('query'=>'(//. | //@* | //namespace::*)'))."\n\n";
51+
check($doc, $newDoc, TRUE, FALSE, array('query'=>'(//. | //@* | //namespace::*)'))."\n\n";
3852

3953
/* exclusive/without comments first child element of doc element is context.
4054
using xpath query with registered namespace.
4155
test namespace prefix is also included. */
42-
echo $doc->c14N(TRUE, FALSE,
56+
check($doc, $newDoc, TRUE, FALSE,
4357
array('query'=>'(//a:contain | //a:bar | .//namespace::*)',
4458
'namespaces'=>array('a'=>'http://www.example.com/ns/foo')),
45-
array('test'))."\n\n";
59+
array('test'));
4660

4761
/* exclusive/without comments first child element of doc element is context.
4862
test namespace prefix is also included */
49-
echo $doc->C14N(TRUE, FALSE, NULL, array('test'));
63+
check($doc, $newDoc, TRUE, FALSE, NULL, array('test'));
5064
?>
5165
--EXPECT--
5266
<contain xmlns="http://www.example.com/ns/foo" xmlns:fubar="http://www.example.com/ns/fubar" xmlns:test="urn::test">
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
--TEST--
2+
Canonicalize unattached node should fail
3+
--EXTENSIONS--
4+
dom
5+
--FILE--
6+
<?php
7+
8+
$d = \Dom\XMLDocument::createFromString('<root><child/></root>');
9+
$child = $d->documentElement->firstChild;
10+
$child->remove();
11+
12+
try {
13+
$child->C14N();
14+
} catch (Dom\DOMException $e) {
15+
echo $e->getMessage(), "\n";
16+
}
17+
18+
?>
19+
--EXPECT--
20+
Canonicalization can only happen on nodes attached to a document.
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
--TEST--
2+
GH-20444 (Dom\XMLDocument::C14N() seems broken compared to DOMDocument::C14N())
3+
--EXTENSIONS--
4+
dom
5+
--FILE--
6+
<?php
7+
8+
$xml = <<<EOF
9+
<?xml version="1.0" encoding="UTF-8"?>
10+
<test:root xmlns:test="http://example.com/dummy/ns">
11+
<test:a xmlns:test="http://example.com/dummy/ns"/>
12+
<test:b test:kind="123">abc</test:b>
13+
</test:root>
14+
EOF;
15+
16+
$d = \Dom\XMLDocument::createFromString($xml);
17+
var_dump($d->C14N(true));
18+
19+
$xml = <<<EOF
20+
<?xml version="1.0" encoding="UTF-8"?>
21+
<ns1:root xmlns:ns1="http://example.com/dummy/ns">
22+
<ns1:a/>
23+
<ns1:b>
24+
<ns1:c>123</ns1:c>
25+
</ns1:b>
26+
</ns1:root>
27+
EOF;
28+
29+
$d = \Dom\XMLDocument::createFromString($xml);
30+
var_dump($d->C14N());
31+
32+
?>
33+
--EXPECT--
34+
string(128) "<test:root xmlns:test="http://example.com/dummy/ns">
35+
<test:a></test:a>
36+
<test:b test:kind="123">abc</test:b>
37+
</test:root>"
38+
string(134) "<ns1:root xmlns:ns1="http://example.com/dummy/ns">
39+
<ns1:a></ns1:a>
40+
<ns1:b>
41+
<ns1:c>123</ns1:c>
42+
</ns1:b>
43+
</ns1:root>"

0 commit comments

Comments
 (0)