Skip to content

Commit d8f8c70

Browse files
committed
Se agrega soporte para serializar XmlDocument.
1 parent 8506a4f commit d8f8c70

2 files changed

Lines changed: 317 additions & 0 deletions

File tree

src/XmlDocument.php

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,4 +312,37 @@ public function jsonSerialize(): array
312312
{
313313
return $this->toArray();
314314
}
315+
316+
/**
317+
* Returns the data to be serialized by PHP's serialize() function.
318+
*
319+
* Stores the full XML string (including the XML declaration with encoding
320+
* and version) so that the document can be completely reconstructed upon
321+
* unserialization, preserving encoding, structure, and content.
322+
*
323+
* @return array{xml: string}
324+
*/
325+
public function __serialize(): array
326+
{
327+
return ['xml' => $this->saveXml()];
328+
}
329+
330+
/**
331+
* Restores the document from the data produced by __serialize().
332+
*
333+
* Calls the parent DOMDocument constructor to allocate the internal C
334+
* structure, reapplies the default output settings, then reloads the XML
335+
* string (which carries its own encoding declaration).
336+
*
337+
* @param array{xml: string} $data Data produced by __serialize().
338+
*/
339+
public function __unserialize(array $data): void
340+
{
341+
parent::__construct();
342+
343+
$this->formatOutput = true;
344+
$this->preserveWhiteSpace = true;
345+
346+
$this->loadXml($data['xml']);
347+
}
315348
}

tests/src/XmlSerializationTest.php

Lines changed: 284 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,284 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
/**
6+
* Derafu: XML - Library for XML manipulation.
7+
*
8+
* Copyright (c) 2026 Esteban De La Fuente Rubio / Derafu <https://www.derafu.dev>
9+
* Licensed under the MIT License.
10+
* See LICENSE file for more details.
11+
*/
12+
13+
namespace Derafu\TestsXml;
14+
15+
use Derafu\Xml\Service\XmlDecoder;
16+
use Derafu\Xml\XmlDocument;
17+
use Derafu\Xml\XmlHelper;
18+
use Derafu\Xml\XPathQuery;
19+
use PHPUnit\Framework\Attributes\CoversClass;
20+
use PHPUnit\Framework\TestCase;
21+
22+
#[CoversClass(XmlDocument::class)]
23+
#[CoversClass(XmlDecoder::class)]
24+
#[CoversClass(XmlHelper::class)]
25+
#[CoversClass(XPathQuery::class)]
26+
class XmlSerializationTest extends TestCase
27+
{
28+
/**
29+
* ISO-8859-1 encoded XML bytes loaded from the comprehensive fixture.
30+
*/
31+
private string $iso88591Xml;
32+
33+
/**
34+
* Expected values after decoding the fixture. Always UTF-8 PHP strings,
35+
* regardless of the source encoding.
36+
*/
37+
private array $expectedValues = [
38+
'vocales_min' => 'á é í ó ú à è ì ò ù',
39+
'vocales_may' => 'Á É Í Ó Ú À È Ì Ò Ù',
40+
'enie' => 'ñ Ñ',
41+
'dieresis' => 'ü Ü ö Ö',
42+
'puntuacion' => '¿Hola? ¡Mundo!',
43+
'simbolos' => '© ® ° ½ ¼ ¾',
44+
'frase' => 'Fabricación de Ñoños en Güemes',
45+
];
46+
47+
protected function setUp(): void
48+
{
49+
$this->iso88591Xml = require __DIR__ . '/../fixtures/encoding/iso88591-comprehensive.php';
50+
}
51+
52+
// -------------------------------------------------------------------------
53+
// Case 1: UTF-8 document serialize/unserialize
54+
// -------------------------------------------------------------------------
55+
56+
/**
57+
* Verifica que un XmlDocument UTF-8 se puede serializar y deserializar
58+
* preservando la codificación y el contenido.
59+
*/
60+
public function testUtf8DocumentPreservesContentAfterSerializeUnserialize(): void
61+
{
62+
$doc = new XmlDocument();
63+
$doc->loadXml(
64+
'<?xml version="1.0" encoding="UTF-8"?><root><element>Valor</element></root>'
65+
);
66+
67+
/** @var XmlDocument $restored */
68+
$restored = unserialize(serialize($doc));
69+
70+
$this->assertInstanceOf(XmlDocument::class, $restored);
71+
$this->assertSame('UTF-8', $restored->getEncoding());
72+
$this->assertSame('root', $restored->getName());
73+
$this->assertSame('Valor', $restored->query('//element'));
74+
}
75+
76+
/**
77+
* Verifica que el XML byte-a-byte es idéntico antes y después de un ciclo
78+
* serialize/unserialize en un documento UTF-8 con caracteres especiales.
79+
*
80+
* - ñ (U+00F1) debe estar como secuencia UTF-8 (0xC3 0xB1), nunca como
81+
* byte ISO-8859-1 (0xF1).
82+
* - á (U+00E1) debe estar como secuencia UTF-8 (0xC3 0xA1), nunca como
83+
* byte ISO-8859-1 (0xE1).
84+
*/
85+
public function testUtf8DocumentWithSpecialCharactersPreservesBytes(): void
86+
{
87+
$doc = new XmlDocument();
88+
$doc->loadXml(
89+
'<?xml version="1.0" encoding="UTF-8"?><root><nombre>áéíóú ñÑ Güemes</nombre></root>'
90+
);
91+
92+
$xmlBefore = $doc->saveXml();
93+
94+
/** @var XmlDocument $restored */
95+
$restored = unserialize(serialize($doc));
96+
$xmlAfter = $restored->saveXml();
97+
98+
$this->assertSame($xmlBefore, $xmlAfter);
99+
100+
// ñ como secuencia UTF-8 (0xC3 0xB1), nunca como byte ISO-8859-1 (0xF1).
101+
$this->assertStringContainsString("\xC3\xB1", $xmlAfter);
102+
$this->assertStringNotContainsString("\xF1", $xmlAfter);
103+
104+
// á como secuencia UTF-8 (0xC3 0xA1), nunca como byte ISO-8859-1 (0xE1).
105+
$this->assertStringContainsString("\xC3\xA1", $xmlAfter);
106+
$this->assertStringNotContainsString("\xE1", $xmlAfter);
107+
}
108+
109+
// -------------------------------------------------------------------------
110+
// Case 2: ISO-8859-1 document serialize/unserialize
111+
// -------------------------------------------------------------------------
112+
113+
/**
114+
* Verifica que un XmlDocument cargado desde bytes ISO-8859-1 reales
115+
* preserva la declaración de codificación después de serialize/unserialize.
116+
*
117+
* - ñ debe estar como byte ISO-8859-1 (0xF1), nunca como secuencia
118+
* UTF-8 (0xC3 0xB1).
119+
* - á debe estar como byte ISO-8859-1 (0xE1), nunca como secuencia
120+
* UTF-8 (0xC3 0xA1).
121+
*/
122+
public function testIso88591DocumentPreservesEncodingAfterSerializeUnserialize(): void
123+
{
124+
$doc = new XmlDocument();
125+
$doc->loadXml($this->iso88591Xml);
126+
127+
/** @var XmlDocument $restored */
128+
$restored = unserialize(serialize($doc));
129+
130+
// La declaración de codificación se preserva.
131+
$this->assertSame('ISO-8859-1', $restored->getEncoding());
132+
133+
$restoredXml = $restored->saveXml();
134+
135+
// El header declara ISO-8859-1.
136+
$this->assertStringContainsString('encoding="ISO-8859-1"', $restoredXml);
137+
138+
// ñ como byte ISO-8859-1 (0xF1), nunca como secuencia UTF-8 (0xC3 0xB1).
139+
$this->assertStringContainsString("\xF1", $restoredXml);
140+
$this->assertStringNotContainsString("\xC3\xB1", $restoredXml);
141+
142+
// á como byte ISO-8859-1 (0xE1), nunca como secuencia UTF-8 (0xC3 0xA1).
143+
$this->assertStringContainsString("\xE1", $restoredXml);
144+
$this->assertStringNotContainsString("\xC3\xA1", $restoredXml);
145+
}
146+
147+
/**
148+
* Verifica que el contenido decodificado de un documento ISO-8859-1 es
149+
* idéntico antes y después de serialize/unserialize.
150+
*
151+
* DOMDocument almacena los valores internamente en UTF-8, por lo que los
152+
* valores decodificados deben ser siempre strings UTF-8 válidos.
153+
*/
154+
public function testIso88591DocumentPreservesDecodedContentAfterSerializeUnserialize(): void
155+
{
156+
$doc = new XmlDocument();
157+
$doc->loadXml($this->iso88591Xml);
158+
159+
/** @var XmlDocument $restored */
160+
$restored = unserialize(serialize($doc));
161+
162+
$decoded = (new XmlDecoder())->decode($restored);
163+
164+
foreach ($this->expectedValues as $field => $expected) {
165+
$this->assertSame(
166+
$expected,
167+
$decoded['documento'][$field],
168+
"El campo '{$field}' tiene un valor inesperado tras serialize/unserialize."
169+
);
170+
}
171+
}
172+
173+
// -------------------------------------------------------------------------
174+
// Case 3: Multiple serialize/unserialize cycles
175+
// -------------------------------------------------------------------------
176+
177+
/**
178+
* Verifica que múltiples ciclos de serialize/unserialize no acumulan
179+
* corrupción en un documento ISO-8859-1.
180+
*
181+
* Un único ciclo ya lo cubre testIso88591DocumentPreservesEncoding*.
182+
* Este test corre tres ciclos adicionales para confirmar que no hay
183+
* deriva en la declaración de codificación, los bytes o los valores
184+
* decodificados.
185+
*/
186+
public function testMultipleCyclesDoNotCorruptIso88591Document(): void
187+
{
188+
$doc = new XmlDocument();
189+
$doc->loadXml($this->iso88591Xml);
190+
191+
for ($i = 0; $i < 3; $i++) {
192+
/** @var XmlDocument $doc */
193+
$doc = unserialize(serialize($doc));
194+
}
195+
196+
// La declaración de codificación sigue siendo ISO-8859-1 tras 3 ciclos.
197+
$this->assertSame('ISO-8859-1', $doc->getEncoding());
198+
199+
$xml = $doc->saveXml();
200+
201+
// ñ sigue siendo el byte ISO-8859-1 0xF1, no la secuencia UTF-8 0xC3 0xB1.
202+
$this->assertStringContainsString("\xF1", $xml);
203+
$this->assertStringNotContainsString("\xC3\xB1", $xml);
204+
205+
// Los valores decodificados coinciden con los originales.
206+
$decoded = (new XmlDecoder())->decode($doc);
207+
foreach ($this->expectedValues as $field => $expected) {
208+
$this->assertSame($expected, $decoded['documento'][$field]);
209+
}
210+
}
211+
212+
/**
213+
* Verifica que múltiples ciclos de serialize/unserialize en un documento
214+
* UTF-8 producen siempre el mismo XML byte-a-byte.
215+
*/
216+
public function testMultipleCyclesProduceSameXmlForUtf8Document(): void
217+
{
218+
$doc = new XmlDocument();
219+
$doc->loadXml(
220+
'<?xml version="1.0" encoding="UTF-8"?><root><frase>Fabricación de Ñoños</frase></root>'
221+
);
222+
223+
$xmlOriginal = $doc->saveXml();
224+
225+
for ($i = 0; $i < 3; $i++) {
226+
/** @var XmlDocument $doc */
227+
$doc = unserialize(serialize($doc));
228+
}
229+
230+
$this->assertSame($xmlOriginal, $doc->saveXml());
231+
}
232+
233+
// -------------------------------------------------------------------------
234+
// Case 4: Methods work correctly after unserialize (XPath, namespace, schema)
235+
// -------------------------------------------------------------------------
236+
237+
/**
238+
* Verifica que los métodos de XmlDocument funcionan correctamente tras
239+
* la deserialización, incluidas las consultas XPath (xPathQuery lazy).
240+
*/
241+
public function testAllMethodsWorkAfterUnserialize(): void
242+
{
243+
$doc = new XmlDocument();
244+
$doc->loadXml(<<<XML
245+
<root xmlns="http://example.com"
246+
xsi:schemaLocation="http://example.com schema.xsd"
247+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
248+
<child>Hola</child>
249+
<nested><value>42</value></nested>
250+
</root>
251+
XML);
252+
253+
/** @var XmlDocument $restored */
254+
$restored = unserialize(serialize($doc));
255+
256+
$this->assertSame('root', $restored->getName());
257+
$this->assertSame('http://example.com', $restored->getNamespace());
258+
$this->assertSame('schema.xsd', $restored->getSchema());
259+
260+
// XPath query (fuerza la inicialización lazy de xPathQuery).
261+
$this->assertSame('Hola', $restored->query('//child'));
262+
$this->assertSame('42', $restored->query('//value'));
263+
}
264+
265+
// -------------------------------------------------------------------------
266+
// Case 5: Serialized form uses the 'xml' key
267+
// -------------------------------------------------------------------------
268+
269+
/**
270+
* Verifica que la forma serializada de PHP contiene la clave 'xml' definida
271+
* en __serialize(), confirmando que el mecanismo correcto está en uso.
272+
*/
273+
public function testSerializedStringContainsXmlKey(): void
274+
{
275+
$doc = new XmlDocument();
276+
$doc->loadXml('<root><element>Value</element></root>');
277+
278+
$serialized = serialize($doc);
279+
280+
// La clave 'xml' del array devuelto por __serialize() debe aparecer
281+
// en la representación serializada de PHP.
282+
$this->assertStringContainsString('s:3:"xml";', $serialized);
283+
}
284+
}

0 commit comments

Comments
 (0)