Skip to content

Commit e0d3e89

Browse files
committed
Se estandariza C14Encoded() para soportar cualquier codificación del XmlDocument.
1 parent 0ac4f92 commit e0d3e89

7 files changed

Lines changed: 90 additions & 59 deletions

File tree

src/Contract/XmlDocumentInterface.php

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,13 @@
2222
*/
2323
interface XmlDocumentInterface extends DOMDocumentInterface, JsonSerializable
2424
{
25+
/**
26+
* Returns the encoding of the XML document.
27+
*
28+
* @return string The encoding of the XML document.
29+
*/
30+
public function getEncoding(): string;
31+
2532
/**
2633
* Sets the encoding of the XML document.
2734
*
@@ -102,33 +109,33 @@ public function saveXml(?DOMNode $node = null, int $options = 0): string;
102109
public function getXml(): string;
103110

104111
/**
105-
* Returns the canonicalized XML string with the correct encoding
106-
* (ISO-8859-1).
112+
* Returns the canonicalized XML string respecting the document's encoding.
107113
*
108114
* This basically uses C14N(), but C14N() always returns the XML in UTF-8
109-
* encoding. So this method allows getting it with the correct ISO-8859-1
110-
* encoding. Also, XML entities are corrected.
115+
* encoding. So this method converts the result to the encoding declared in
116+
* the XML document (e.g., ISO-8859-1). If no encoding is declared, UTF-8
117+
* is used. Also, XML entities are corrected.
111118
*
112119
* @param string|null $xpath The XPath to query the XML and extract only a
113120
* part, from a specific tag/node.
114121
* @return string The canonicalized XML string.
115122
* @throws XmlException If a XPath is passed and not found.
116123
*/
117-
public function C14NWithIso88591Encoding(?string $xpath = null): string;
124+
public function C14NEncoded(?string $xpath = null): string;
118125

119126
/**
120-
* Returns the canonicalized XML string with the correct encoding
121-
* (ISO-8859-1) and flattened.
127+
* Returns the canonicalized XML string respecting the document's encoding
128+
* and flattened.
122129
*
123-
* This is a wrapper of C14NWithIso88591Encoding() that flattens the
124-
* resulting XML.
130+
* This is a wrapper of C14NEncoded() that flattens the resulting XML by
131+
* removing whitespace between tags.
125132
*
126133
* @param string|null $xpath The XPath to query the XML and extract only a
127134
* part, from a specific tag/node.
128135
* @return string The canonicalized XML string and flattened.
129136
* @throws XmlException If a XPath is passed and not found.
130137
*/
131-
public function C14NWithIso88591EncodingFlattened(?string $xpath = null): string;
138+
public function C14NEncodedFlattened(?string $xpath = null): string;
132139

133140
/**
134141
* Gets the XML string of the electronic signature node.

src/XmlDocument.php

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,14 @@ public function __construct(
5353
$this->preserveWhiteSpace = true;
5454
}
5555

56+
/**
57+
* {@inheritDoc}
58+
*/
59+
public function getEncoding(): string
60+
{
61+
return strtoupper($this->encoding ?: 'UTF-8');
62+
}
63+
5664
/**
5765
* {@inheritDoc}
5866
*/
@@ -178,7 +186,7 @@ public function getXml(): string
178186
/**
179187
* {@inheritDoc}
180188
*/
181-
public function C14NWithIso88591Encoding(?string $xpath = null): string
189+
public function C14NEncoded(?string $xpath = null): string
182190
{
183191
// If an XPath is provided, filter the nodes.
184192
if ($xpath) {
@@ -199,9 +207,12 @@ public function C14NWithIso88591Encoding(?string $xpath = null): string
199207
// Fix XML entities.
200208
$xml = XmlHelper::fixEntities($xml);
201209

202-
// Convert the flattened XML from UTF-8 to ISO-8859-1.
203-
// Required because C14N() always delivers data in UTF-8.
204-
$xml = mb_convert_encoding($xml, 'ISO-8859-1', 'UTF-8');
210+
// C14N() always delivers data in UTF-8. Convert to the document's
211+
// declared encoding if it differs from UTF-8.
212+
$encoding = $this->getEncoding();
213+
if ($encoding !== 'UTF-8') {
214+
$xml = mb_convert_encoding($xml, $encoding, 'UTF-8');
215+
}
205216

206217
// Return the canonicalized XML.
207218
return $xml;
@@ -210,10 +221,10 @@ public function C14NWithIso88591Encoding(?string $xpath = null): string
210221
/**
211222
* {@inheritDoc}
212223
*/
213-
public function C14NWithIso88591EncodingFlattened(?string $xpath = null): string
224+
public function C14NEncodedFlattened(?string $xpath = null): string
214225
{
215-
// Get the canonicalized XML encoded in ISO8859-1.
216-
$xml = $this->C14NWithIso88591Encoding($xpath);
226+
// Get the canonicalized XML in the document's encoding.
227+
$xml = $this->C14NEncoded($xpath);
217228

218229
// Remove the spaces between tags.
219230
$xml = preg_replace("/>\s+</", '><', $xml);

tests/fixtures/encode_and_decode.php

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -213,8 +213,8 @@
213213
],
214214
],
215215

216-
// Casos para testArrayToXmlC14NWithIso88591Encoding().
217-
'testArrayToXmlC14NWithIso88591Encoding' => [
216+
// Casos para testArrayToXmlC14NEncoded().
217+
'testArrayToXmlC14NEncoded' => [
218218
// Caso simple con un solo elemento.
219219
'simple_element' => [
220220
'data' => ['root' => ['element' => 'value']],
@@ -242,6 +242,7 @@
242242
// Arreglo con caracteres en UTF-8 que deben convertirse a ISO-8859-1.
243243
'utf8_to_iso' => [
244244
'data' => ['root' => ['element' => 'Árbol']],
245+
'encoding' => 'ISO-8859-1',
245246
'expected' => '<root><element>' . mb_convert_encoding('Árbol', 'ISO-8859-1', 'UTF-8') . '</element></root>',
246247
'expectedException' => null,
247248
],
@@ -415,8 +416,8 @@
415416
],
416417
],
417418

418-
// Casos para testXmlToC14NWithIso88591Encoding().
419-
'testXmlToC14NWithIso88591Encoding' => [
419+
// Casos para testXmlToC14NEncoded().
420+
'testXmlToC14NEncoded' => [
420421
// XML simple con un solo elemento (ISO-8859-1).
421422
'simple_element_iso' => [
422423
'xmlContent' => '<?xml version="1.0" encoding="ISO-8859-1"?><root><element>value</element></root>',
@@ -438,12 +439,14 @@
438439
// XML en UTF-8 con caracteres especiales (ej. tildes, ñ) debe convertise a ISO-8859-1.
439440
'utf8_characters' => [
440441
'xmlContent' => '<?xml version="1.0" encoding="UTF-8"?><root><element>Árbol</element></root>',
442+
'encoding' => 'ISO-8859-1',
441443
'expected' => '<root><element>' . mb_convert_encoding('Árbol', 'ISO-8859-1', 'UTF-8') . '</element></root>',
442444
'expectedException' => null,
443445
],
444446
// XML en ISO-8859-1 con caracteres especiales (ej. tildes, ñ) debe mantenerse como ISO-8859-1.
445447
'iso_characters' => [
446448
'xmlContent' => '<?xml version="1.0" encoding="ISO-8859-1"?><root><element>' . mb_convert_encoding('Árbol', 'ISO-8859-1', 'UTF-8') . '</element></root>',
449+
'encoding' => 'ISO-8859-1',
447450
'expected' => '<root><element>' . mb_convert_encoding('Árbol', 'ISO-8859-1', 'UTF-8') . '</element></root>',
448451
'expectedException' => null,
449452
],

tests/fixtures/special_cases.php

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@
202202
// Cases for testC14NWithSpecialCharacters().
203203
'testC14NWithSpecialCharacters' => [
204204
// Canonicalization with special characters for XML-DSIG.
205-
// NOTE: C14NWithIso88591Encoding() converts to ISO-8859-1,
205+
// NOTE: C14NEncoded() converts to ISO-8859-1 if the encoding is ISO-8859-1,
206206
// so accented characters become substitution characters.
207207
'accented_chars_in_c14n' => [
208208
'data' => ['root' => [
@@ -211,6 +211,7 @@
211211
'@value' => 'Contenido con áéíóú ñ',
212212
],
213213
]],
214+
'encoding' => 'ISO-8859-1',
214215
'expected' => '<root><element id="F33T1" version="1.0">Contenido con ' . mb_convert_encoding('áéíóú ñ', 'ISO-8859-1', 'UTF-8') . '</element></root>',
215216
'expectedException' => null,
216217
],
@@ -231,7 +232,7 @@
231232
'@value' => 'Árbol con ñ y áéíóú & < > " \'',
232233
],
233234
]],
234-
'expected' => '<root><element fecha="2025-01-03" id="F33T1">' . mb_convert_encoding('Árbol con ñ y áéíóú', 'ISO-8859-1', 'UTF-8') . ' &amp; &lt; &gt; &quot; &apos;</element></root>',
235+
'expected' => '<root><element fecha="2025-01-03" id="F33T1">' . 'Árbol con ñ y áéíóú' . ' &amp; &lt; &gt; &quot; &apos;</element></root>',
235236
'expectedException' => null,
236237
],
237238
],
@@ -271,11 +272,11 @@
271272
// Cases for testC14NEncodingValidation().
272273
'testC14NEncodingValidation' => [
273274
// Encoding validation in canonicalization.
274-
// NOTE: C14NWithIso88591Encoding() converts to ISO-8859-1,
275+
// NOTE: C14NEncoded() converts to ISO-8859-1 if the encoding is ISO-8859-1,
275276
// so accented characters become substitution characters.
276277
'accented_chars_encoding' => [
277278
'data' => ['root' => ['element' => 'Árbol con ñ y áéíóú']],
278-
'expected' => '<root><element>' . mb_convert_encoding('Árbol con ñ y áéíóú', 'ISO-8859-1', 'UTF-8') . '</element></root>',
279+
'expected' => '<root><element>' . 'Árbol con ñ y áéíóú' . '</element></root>',
279280
'expectedException' => null,
280281
],
281282
'special_chars_encoding' => [
@@ -285,6 +286,7 @@
285286
],
286287
'mixed_chars_encoding' => [
287288
'data' => ['root' => ['element' => 'Árbol con ñ y & < > " \'']],
289+
'encoding' => 'ISO-8859-1',
288290
'expected' => '<root><element>' . mb_convert_encoding('Árbol con ñ y', 'ISO-8859-1', 'UTF-8') . ' &amp; &lt; &gt; &quot; &apos;</element></root>',
289291
'expectedException' => null,
290292
],

tests/src/XmlServiceTest.php

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -99,9 +99,9 @@ public static function arrayToXmlC14NDataProvider(): array
9999
return self::dataProvider('testArrayToXmlC14N');
100100
}
101101

102-
public static function arrayToXmlC14NWithIso88591EncodingDataProvider(): array
102+
public static function arrayToXmlC14NEncodedDataProvider(): array
103103
{
104-
return self::dataProvider('testArrayToXmlC14NWithIso88591Encoding');
104+
return self::dataProvider('testArrayToXmlC14NEncoded');
105105
}
106106

107107
public static function xmlToArrayDataProvider(): array
@@ -119,9 +119,9 @@ public static function xmlToC14NDataProvider(): array
119119
return self::dataProvider('testXmlToC14N');
120120
}
121121

122-
public static function xmlToC14NWithIso88591EncodingDataProvider(): array
122+
public static function xmlToC14NEncodedDataProvider(): array
123123
{
124-
return self::dataProvider('testXmlToC14NWithIso88591Encoding');
124+
return self::dataProvider('testXmlToC14NEncoded');
125125
}
126126

127127
/**
@@ -201,21 +201,23 @@ public function testArrayToXmlC14N(
201201

202202
/**
203203
* Convierte un arreglo a un Xml y lo guarda como un string XML
204-
* con testArrayToXmlC14NWithIso88591Encoding(), asegurando que la codificación
204+
* con testArrayToXmlC14NEncoded(), asegurando que la codificación
205205
* y contenido son correctos.
206206
*/
207-
#[DataProvider('arrayToXmlC14NWithIso88591EncodingDataProvider')]
208-
public function testArrayToXmlC14NWithIso88591Encoding(
207+
#[DataProvider('arrayToXmlC14NEncodedDataProvider')]
208+
public function testArrayToXmlC14NEncoded(
209209
array $data,
210210
string $expected,
211-
?string $expectedException
211+
?string $expectedException,
212+
string $encoding = 'UTF-8'
212213
): void {
213214
if ($expectedException) {
214215
$this->expectException($expectedException);
215216
}
216217

217218
$xml = $this->xmlService->encode($data);
218-
$xmlString = $xml->C14NWithIso88591Encoding();
219+
$xml->setEncoding($encoding);
220+
$xmlString = $xml->C14NEncoded();
219221

220222
// Validar contenido.
221223
$this->assertSame($expected, $xmlString);
@@ -306,30 +308,32 @@ public function testXmlToC14N(
306308

307309
/**
308310
* Convierte un string XML a un Xml y lo guarda como un string XML
309-
* con C14NWithIso88591Encoding(), asegurando que la codificación y contenido
311+
* con C14NEncoded(), asegurando que la codificación y contenido
310312
* son correctos.
311313
*/
312-
#[DataProvider('xmlToC14NWithIso88591EncodingDataProvider')]
313-
public function testXmlToC14NWithIso88591Encoding(
314+
#[DataProvider('xmlToC14NEncodedDataProvider')]
315+
public function testXmlToC14NEncoded(
314316
string $xmlContent,
315317
string $expected,
316-
?string $expectedException
318+
?string $expectedException,
319+
string $encoding = 'UTF-8'
317320
): void {
318321
if ($expectedException) {
319322
$this->expectException($expectedException);
320323
}
321324

322325
$doc = new XmlDocument();
323326
$doc->loadXml($xmlContent);
324-
$xmlString = $doc->C14NWithIso88591Encoding();
327+
$doc->setEncoding($encoding);
328+
$xmlString = $doc->C14NEncoded();
325329

326330
// Validar contenido.
327331
$this->assertSame($expected, $xmlString);
328332

329333
// Validar codificación.
330334
$this->assertSame(
331-
'ISO-8859-1',
332-
mb_detect_encoding($xmlString, 'ISO-8859-1', true)
335+
$encoding,
336+
mb_detect_encoding($xmlString, $encoding, true)
333337
);
334338
}
335339

tests/src/XmlSpecialCasesTest.php

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -230,15 +230,17 @@ public function testSpecialCharactersInAttributes(
230230
public function testC14NWithSpecialCharacters(
231231
array $data,
232232
string $expected,
233-
?string $expectedException
233+
?string $expectedException,
234+
string $encoding = 'UTF-8'
234235
): void {
235236
if ($expectedException) {
236237
$this->expectException($expectedException);
237238
}
238239

239240
$xml = $this->xmlService->encode($data);
240-
$c14n = $xml->C14NWithIso88591Encoding();
241-
$c14n = XmlHelper::fixEntities($c14n);
241+
$xml->setEncoding($encoding);
242+
$c14n = $xml->C14NEncoded();
243+
$c14n = XmlHelper::fixEntities($c14n); // TODO: Necessary?
242244
243245
// Validate canonicalized content.
244246
$this->assertSame($expected, $c14n);
@@ -284,15 +286,17 @@ public function testSpecialWhitespaceCharacters(
284286
public function testC14NEncodingValidation(
285287
array $data,
286288
string $expected,
287-
?string $expectedException
289+
?string $expectedException,
290+
string $encoding = 'UTF-8'
288291
): void {
289292
if ($expectedException) {
290293
$this->expectException($expectedException);
291294
}
292295

293296
$xml = $this->xmlService->encode($data);
294-
$c14n = $xml->C14NWithIso88591Encoding();
295-
$c14n = XmlHelper::fixEntities($c14n);
297+
$xml->setEncoding($encoding);
298+
$c14n = $xml->C14NEncoded();
299+
$c14n = XmlHelper::fixEntities($c14n); // TODO: Necessary?
296300
297301
// Validate canonicalized content.
298302
$this->assertSame($expected, $c14n);
@@ -348,7 +352,7 @@ public function testExportedXmlCompatibilityWithIso88591Encoding(
348352
$this->assertStringContainsString('encoding="ISO-8859-1"', $xmlString);
349353

350354
// Verify canonicalization for DSIG.
351-
$c14n = $xml->C14NWithIso88591Encoding();
355+
$c14n = $xml->C14NEncoded();
352356
$this->assertSame(
353357
'ISO-8859-1',
354358
mb_detect_encoding($c14n, 'ISO-8859-1', true)
@@ -433,7 +437,7 @@ public function testC14NPreservesSpecialCharacters(): void
433437
]];
434438

435439
$xml = $this->xmlService->encode($data);
436-
$c14n = $xml->C14NWithIso88591Encoding();
440+
$c14n = $xml->C14NEncoded();
437441
$c14n = XmlHelper::fixEntities($c14n);
438442

439443
// Verify that it is compatible with XML-DSIG.
@@ -496,7 +500,7 @@ public function testEncodingValidationForSII(): void
496500
// This is the expected behavior for SII XML-DSIG compatibility.
497501

498502
// Verify canonicalization.
499-
$c14n = $xml->C14NWithIso88591Encoding();
503+
$c14n = $xml->C14NEncoded();
500504
$this->assertSame(
501505
'ISO-8859-1',
502506
mb_detect_encoding($c14n, 'ISO-8859-1', true)

0 commit comments

Comments
 (0)