Skip to content

Commit 6bc29ce

Browse files
author
Maruan Sahyoun
committed
PDFBOX-6178, PDFBOX-4076: handle #00 as valid; see testfile by Acrobat
git-svn-id: https://svn.apache.org/repos/asf/pdfbox/trunk@1932405 13f79535-47bb-0310-9956-ffa450edef68
1 parent 1213a60 commit 6bc29ce

4 files changed

Lines changed: 46 additions & 34 deletions

File tree

pdfbox/pom.xml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -933,6 +933,19 @@
933933
<sha512>d39486af0614bd099167a6adaab833aed41a0ebec7b85b13b382a2fdb6fddbcaaea9ab26ed0a81b72822258c8dd66dd535fa5c76afd1e5a8b1bff7d81e890274</sha512>
934934
</configuration>
935935
</execution>
936+
<execution>
937+
<id>PDFBOX-6178-1</id>
938+
<phase>generate-test-resources</phase>
939+
<goals>
940+
<goal>wget</goal>
941+
</goals>
942+
<configuration>
943+
<url>https://issues.apache.org/jira/secure/attachment/13081311/form_selected_ASCII_NUL_acrobat.pdf</url>
944+
<outputDirectory>${project.build.directory}/pdfs</outputDirectory>
945+
<outputFileName>PDFBOX-6178-1.pdf</outputFileName>
946+
<sha512>83bc557e6f7d3e98de6e81168b2e2fb3def5025cc5fab1ddb3ef658505351c615253587f09b93503bb73a6225f3d3898be894e9d05dba8d464f4dd9c54514bc3</sha512>
947+
</configuration>
948+
</execution>
936949
</executions>
937950
</plugin>
938951
</plugins>

pdfbox/src/main/java/org/apache/pdfbox/cos/COSName.java

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -715,15 +715,6 @@ public static COSName getPDFName(String aName)
715715
*/
716716
public static COSName getPDFName(byte[] bytes)
717717
{
718-
for (byte b : bytes)
719-
{
720-
if (b == 0)
721-
{
722-
throw new IllegalArgumentException(
723-
"PDF name bytes must not contain null (0x00) characters");
724-
}
725-
}
726-
727718
// Wrap for lookup only to avoid unnecessary copying of the byte array for the key.
728719
ByteBuffer lookupKey = ByteBuffer.wrap(bytes);
729720

pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1852,19 +1852,7 @@ protected COSName parseCOSName() throws IOException
18521852
String hex = Character.toString((char) ch1) + (char) ch2;
18531853
try
18541854
{
1855-
// Beginning with PDF 1.2 a name object is an atomic symbol uniquely defined by a
1856-
// sequence of any characters (8-bit values) except null (character code 0)
1857-
// Although not explicitly stated in the PDF specification, it is generally accepted that
1858-
// the # escape sequence is used to represent characters that are not allowed in a name object,
1859-
// such as the null byte (0x00). Therefore, we will throw an IOException if we encounter a #00 sequence
1860-
// in a name object, as this would indicate an invalid name according to the PDF specification.
1861-
// ASCII NUL (0x00) is already handled in BaseParser#isEndOfName
1862-
int decoded = Integer.parseInt(hex, 16);
1863-
if (decoded == 0)
1864-
{
1865-
throw new IOException("PDF name must not contain null byte (0x00), found #00 at offset " + source.getPosition());
1866-
}
1867-
buffer.write(decoded);
1855+
buffer.write(Integer.parseInt(hex, 16));
18681856
}
18691857
catch (NumberFormatException e)
18701858
{

pdfbox/src/test/java/org/apache/pdfbox/cos/TestCOSName.java

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
package org.apache.pdfbox.cos;
1818

1919
import static org.junit.jupiter.api.Assertions.assertEquals;
20-
import static org.junit.jupiter.api.Assertions.assertThrows;
2120
import static org.junit.jupiter.api.Assertions.assertTrue;
2221

2322
import java.io.ByteArrayOutputStream;
@@ -34,16 +33,6 @@ class TestCOSName
3433
{
3534
private static final File TARGETPDFDIR = new File("target/pdfs");
3635

37-
@Test
38-
void testNullByteRejection()
39-
{
40-
// Null bytes should not be allowed in name bytes
41-
byte[] bytesWithNull = new byte[] { 'N', 'a', 'm', 'e', 0, 'X' };
42-
assertThrows(IllegalArgumentException.class, () -> {
43-
COSName.getPDFName(bytesWithNull);
44-
}, "getPDFName should reject bytes containing null (0x00)");
45-
}
46-
4736
/**
4837
* PDFBOX-4076: Check that characters outside of US_ASCII are not replaced with "?".
4938
*
@@ -72,7 +61,7 @@ void PDFBox4076() throws IOException
7261
}
7362

7463
/**
75-
* PDFBOX-4076: Check that characters outside of US_ASCII are not replaced with "?".
64+
* PDFBOX-6178: Ensure that names with escape sequences #xx are written as is.
7665
*
7766
* @throws IOException
7867
*/
@@ -103,4 +92,35 @@ void PDFBox6178() throws IOException
10392
System.out.println(writtenKeys);
10493
}
10594
}
95+
96+
/**
97+
* PDFBOX-6178: Ensure that names with escape sequences #xx are written as is.
98+
*
99+
* @throws IOException
100+
*/
101+
@Test
102+
void NameWithASCII_NUL() throws IOException
103+
{
104+
ByteArrayOutputStream baos = new ByteArrayOutputStream();
105+
106+
try (PDDocument document = Loader.loadPDF(new File(TARGETPDFDIR,"PDFBOX-6178-1.pdf"))) {
107+
PDField field = document.getDocumentCatalog()
108+
.getAcroForm(null)
109+
.getField("Geschlecht");
110+
111+
field.getWidgets()
112+
.get(0).getAppearance().getNormalAppearance().getCOSObject()
113+
.keySet().forEach(k -> {
114+
try {
115+
k.writePDF(baos);
116+
} catch (IOException e) {
117+
// ignored
118+
}
119+
});
120+
121+
String writtenKeys = new String(baos.toByteArray(), "UTF-8");
122+
assertTrue(writtenKeys.contains("/m#00nnlich"), "Output should be /m#00nnlich (with 0xE4 as hex escape)");
123+
System.out.println(writtenKeys);
124+
}
125+
}
106126
}

0 commit comments

Comments
 (0)