Skip to content

Commit fb9b9f6

Browse files
Marc-Etienne Barrutclaude
andcommitted
sync data/cdata children when setting element value (fixes #6)
With parse_cdata=True the document is parsed with data nodes, so the element text lives in node_data/node_cdata children which take precedence over the element's own value during printing. Setting node.value only updated the element value, leaving serialization unchanged. Now the first data/cdata child is updated as well, and any extra data/cdata children are removed. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 213d165 commit fb9b9f6

2 files changed

Lines changed: 61 additions & 1 deletion

File tree

rapidxml/c_ext/src/base_object.cpp

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,35 @@ static int rapidxml_BaseObject_setvalue(rapidxml_BaseObject* self,
163163
if (!PyArg_Parse(arg, "s*", &value)) {
164164
return -1;
165165
}
166-
self->underlying_obj->value(self->document->allocate_string(static_cast<const char*>(value.buf)));
166+
char* new_value = self->document->allocate_string(static_cast<const char*>(value.buf));
167+
self->underlying_obj->value(new_value);
168+
if (IS_NODE(reinterpret_cast<PyObject*>(self))) {
169+
rapidxml::xml_node<>* node =
170+
static_cast<rapidxml::xml_node<>*>(self->underlying_obj);
171+
if (node->type() == rapidxml::node_element) {
172+
/*
173+
** When the document is parsed with data nodes (parse_cdata=True),
174+
** the element value is held by data/cdata children, which take
175+
** precedence over the element value when printing.
176+
** Keep them in sync: update the first one, drop the others.
177+
*/
178+
rapidxml::xml_node<>* child = node->first_node();
179+
bool found = false;
180+
while (child != NULL) {
181+
rapidxml::xml_node<>* next = child->next_sibling();
182+
if (child->type() == rapidxml::node_data ||
183+
child->type() == rapidxml::node_cdata) {
184+
if (!found) {
185+
child->value(new_value);
186+
found = true;
187+
} else {
188+
node->remove_node(child);
189+
}
190+
}
191+
child = next;
192+
}
193+
}
194+
}
167195
PyBuffer_Release(&value);
168196
return 0;
169197
}

tests/test_issue6.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
#
4+
# File: test_issue6.py
5+
# by Arzaroth Lekva
6+
# arzaroth@arzaroth.com
7+
#
8+
9+
import rapidxml
10+
11+
def test_set_value_with_parse_cdata():
12+
r = rapidxml.RapidXml(b'<root><loc>old value</loc></root>',
13+
parse_cdata=True)
14+
loc = r.first_node().first_node("loc")
15+
loc.value = "new value"
16+
assert loc.value == "new value"
17+
assert r.unparse() == '<root><loc>new value</loc></root>'
18+
19+
def test_set_value_with_cdata_node():
20+
r = rapidxml.RapidXml(b'<root><loc><![CDATA[old value]]></loc></root>',
21+
parse_cdata=True)
22+
loc = r.first_node().first_node("loc")
23+
loc.value = "new value"
24+
assert loc.value == "new value"
25+
assert r.unparse() == '<root><loc><![CDATA[new value]]></loc></root>'
26+
27+
def test_set_value_without_parse_cdata():
28+
r = rapidxml.RapidXml(b'<root><loc>old value</loc></root>')
29+
loc = r.first_node().first_node("loc")
30+
loc.value = "new value"
31+
assert loc.value == "new value"
32+
assert r.unparse() == '<root><loc>new value</loc></root>'

0 commit comments

Comments
 (0)