Skip to content

Commit 261b1ca

Browse files
author
Maruan Sahyoun
committed
PDFBOX-6178, PDFBOX-4076: add tests; partially created with Claude Haiku 4.5
git-svn-id: https://svn.apache.org/repos/asf/pdfbox/trunk@1932403 13f79535-47bb-0310-9956-ffa450edef68
1 parent 48f761d commit 261b1ca

2 files changed

Lines changed: 294 additions & 0 deletions

File tree

pdfbox/src/test/java/org/apache/pdfbox/cos/TestCOSName.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
package org.apache.pdfbox.cos;
1818

1919
import static org.junit.jupiter.api.Assertions.assertEquals;
20+
import static org.junit.jupiter.api.Assertions.assertThrows;
2021
import static org.junit.jupiter.api.Assertions.assertTrue;
2122

2223
import java.io.ByteArrayOutputStream;
@@ -33,6 +34,16 @@ class TestCOSName
3334
{
3435
private static final File TARGETPDFDIR = new File("target/pdfs");
3536

37+
@Test
38+
void testNullByteRejection()
39+
{
40+
// Null bytes should not be allowed in name bytes
41+
byte[] bytesWithNull = new byte[] { 'N', 'a', 'm', 'e', 0, 'X' };
42+
assertThrows(IllegalArgumentException.class, () -> {
43+
COSName.getPDFName(bytesWithNull);
44+
}, "getPDFName should reject bytes containing null (0x00)");
45+
}
46+
3647
/**
3748
* PDFBOX-4076: Check that characters outside of US_ASCII are not replaced with "?".
3849
*

pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestCOSParser.java

Lines changed: 283 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@
2020
import static org.junit.jupiter.api.Assertions.assertEquals;
2121

2222
import java.io.IOException;
23+
import java.nio.charset.StandardCharsets;
2324

25+
import org.apache.pdfbox.cos.COSName;
2426
import org.apache.pdfbox.cos.COSString;
2527
import org.apache.pdfbox.io.RandomAccessReadBuffer;
2628
import org.junit.jupiter.api.Test;
@@ -88,4 +90,285 @@ void testCheckForEndOfString() throws IOException
8890
assertEquals(output, cosString.getString());
8991
}
9092

93+
// COSName parsing tests based on examples from PDF 32000-1:2008, Table 4, Section 7.3.5
94+
95+
@Test
96+
void testTable4Example_Name1() throws IOException
97+
{
98+
// /Name1 → "Name1"
99+
byte[] inputBytes = "/Name1 ".getBytes(StandardCharsets.US_ASCII);
100+
RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
101+
COSParser cosParser = new COSParser(buffer);
102+
COSName name = cosParser.parseCOSName();
103+
assertEquals("Name1", name.getName());
104+
}
105+
106+
@Test
107+
void testTable4Example_ASomewhatLongerName() throws IOException
108+
{
109+
// /ASomewhatLongerName → "ASomewhatLongerName"
110+
byte[] inputBytes = "/ASomewhatLongerName ".getBytes(StandardCharsets.US_ASCII);
111+
RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
112+
COSParser cosParser = new COSParser(buffer);
113+
COSName name = cosParser.parseCOSName();
114+
assertEquals("ASomewhatLongerName", name.getName());
115+
}
116+
117+
@Test
118+
void testTable4Example_WithSpecialCharacters() throws IOException
119+
{
120+
// /A;Name_With-Various***Characters? → "A;Name_With-Various***Characters?"
121+
byte[] inputBytes = "/A;Name_With-Various***Characters? ".getBytes(StandardCharsets.US_ASCII);
122+
RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
123+
COSParser cosParser = new COSParser(buffer);
124+
COSName name = cosParser.parseCOSName();
125+
assertEquals("A;Name_With-Various***Characters?", name.getName());
126+
}
127+
128+
@Test
129+
void testTable4Example_Numeric() throws IOException
130+
{
131+
// /1.2 → "1.2"
132+
byte[] inputBytes = "/1.2 ".getBytes(StandardCharsets.US_ASCII);
133+
RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
134+
COSParser cosParser = new COSParser(buffer);
135+
COSName name = cosParser.parseCOSName();
136+
assertEquals("1.2", name.getName());
137+
}
138+
139+
@Test
140+
void testTable4Example_DollarSigns() throws IOException
141+
{
142+
// /$$ → "$$"
143+
byte[] inputBytes = "/$$ ".getBytes(StandardCharsets.US_ASCII);
144+
RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
145+
COSParser cosParser = new COSParser(buffer);
146+
COSName name = cosParser.parseCOSName();
147+
assertEquals("$$", name.getName());
148+
}
149+
150+
@Test
151+
void testTable4Example_AtPattern() throws IOException
152+
{
153+
// /@pattern → "@pattern"
154+
byte[] inputBytes = "/@pattern ".getBytes(StandardCharsets.US_ASCII);
155+
RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
156+
COSParser cosParser = new COSParser(buffer);
157+
COSName name = cosParser.parseCOSName();
158+
assertEquals("@pattern", name.getName());
159+
}
160+
161+
@Test
162+
void testTable4Example_DotNotdef() throws IOException
163+
{
164+
// /.notdef → ".notdef" (space is 0x20, hex-encoded as #20)
165+
byte[] inputBytes = "/#2Enotdef ".getBytes(StandardCharsets.US_ASCII);
166+
RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
167+
COSParser cosParser = new COSParser(buffer);
168+
COSName name = cosParser.parseCOSName();
169+
assertEquals(".notdef", name.getName());
170+
}
171+
172+
@Test
173+
void testTable4Example_HexEncodedSpace() throws IOException
174+
{
175+
// /lime#20Green → "lime Green"
176+
byte[] inputBytes = "/lime#20Green ".getBytes(StandardCharsets.US_ASCII);
177+
RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
178+
COSParser cosParser = new COSParser(buffer);
179+
COSName name = cosParser.parseCOSName();
180+
assertEquals("lime Green", name.getName());
181+
}
182+
183+
@Test
184+
void testTable4Example_HexEncodedParentheses() throws IOException
185+
{
186+
// /paired#28#29parentheses → "paired()parentheses"
187+
// (#28 = '(', #29 = ')')
188+
byte[] inputBytes = "/paired#28#29parentheses ".getBytes(StandardCharsets.US_ASCII);
189+
RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
190+
COSParser cosParser = new COSParser(buffer);
191+
COSName name = cosParser.parseCOSName();
192+
assertEquals("paired()parentheses", name.getName());
193+
}
194+
195+
@Test
196+
void testTable4Example_HexEncodedNumberSign() throws IOException
197+
{
198+
// /The_Key_of_F#23_Minor → "The_Key_of_F#_Minor"
199+
// (#23 = '#')
200+
byte[] inputBytes = "/The_Key_of_F#23_Minor ".getBytes(StandardCharsets.US_ASCII);
201+
RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
202+
COSParser cosParser = new COSParser(buffer);
203+
COSName name = cosParser.parseCOSName();
204+
assertEquals("The_Key_of_F#_Minor", name.getName());
205+
}
206+
207+
@Test
208+
void testTable4Example_HexEncodedLetter() throws IOException
209+
{
210+
// /A#42 → "AB" (note #42 = 'B')
211+
byte[] inputBytes = "/A#42 ".getBytes(StandardCharsets.US_ASCII);
212+
RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
213+
COSParser cosParser = new COSParser(buffer);
214+
COSName name = cosParser.parseCOSName();
215+
assertEquals("AB", name.getName());
216+
}
217+
218+
@Test
219+
void testTable4Example_EmptyName() throws IOException
220+
{
221+
// / → "" (empty name is valid per spec)
222+
byte[] inputBytes = "/ ".getBytes(StandardCharsets.US_ASCII);
223+
RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
224+
COSParser cosParser = new COSParser(buffer);
225+
COSName name = cosParser.parseCOSName();
226+
assertEquals("", name.getName());
227+
}
228+
229+
@Test
230+
void testNullCharacterTermination() throws IOException
231+
{
232+
// /Name\0Extra should parse as "Name" and stop at null
233+
byte[] inputBytes = new byte[] { '/', 'N', 'a', 'm', 'e', 0, 'E', 'x', 't', 'r', 'a', ' ' };
234+
RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
235+
COSParser cosParser = new COSParser(buffer);
236+
COSName name = cosParser.parseCOSName();
237+
assertEquals("Name", name.getName());
238+
}
239+
240+
@Test
241+
void testInvalidHexSequence() throws IOException
242+
{
243+
// /Name#GG should keep #G literally since G is not a valid hex digit
244+
byte[] inputBytes = "/Name#GG ".getBytes(StandardCharsets.US_ASCII);
245+
RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
246+
COSParser cosParser = new COSParser(buffer);
247+
COSName name = cosParser.parseCOSName();
248+
// When # is not followed by two hex digits, both chars are kept literally
249+
assertEquals("Name#GG", name.getName());
250+
}
251+
252+
@Test
253+
void testHexEscapeLowercase() throws IOException
254+
{
255+
// /Name#2fTest (lowercase hex #2f = '/')
256+
byte[] inputBytes = "/Name#2fTest ".getBytes(StandardCharsets.US_ASCII);
257+
RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
258+
COSParser cosParser = new COSParser(buffer);
259+
COSName name = cosParser.parseCOSName();
260+
assertEquals("Name/Test", name.getName());
261+
}
262+
263+
@Test
264+
void testHexEscapeUppercase() throws IOException
265+
{
266+
// /Name#2FTest (uppercase hex #2F = '/')
267+
byte[] inputBytes = "/Name#2FTest ".getBytes(StandardCharsets.US_ASCII);
268+
RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
269+
COSParser cosParser = new COSParser(buffer);
270+
COSName name = cosParser.parseCOSName();
271+
assertEquals("Name/Test", name.getName());
272+
}
273+
274+
@Test
275+
void testNameTerminationByDelimiters() throws IOException
276+
{
277+
// Test termination by '>'
278+
byte[] inputBytes = "/Name1>".getBytes(StandardCharsets.US_ASCII);
279+
RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
280+
COSParser cosParser = new COSParser(buffer);
281+
COSName name = cosParser.parseCOSName();
282+
assertEquals("Name1", name.getName());
283+
284+
// Test termination by '<'
285+
inputBytes = "/Name2<".getBytes(StandardCharsets.US_ASCII);
286+
buffer = new RandomAccessReadBuffer(inputBytes);
287+
cosParser = new COSParser(buffer);
288+
name = cosParser.parseCOSName();
289+
assertEquals("Name2", name.getName());
290+
291+
// Test termination by '['
292+
inputBytes = "/Name3[".getBytes(StandardCharsets.US_ASCII);
293+
buffer = new RandomAccessReadBuffer(inputBytes);
294+
cosParser = new COSParser(buffer);
295+
name = cosParser.parseCOSName();
296+
assertEquals("Name3", name.getName());
297+
298+
// Test termination by ']'
299+
inputBytes = "/Name4]".getBytes(StandardCharsets.US_ASCII);
300+
buffer = new RandomAccessReadBuffer(inputBytes);
301+
cosParser = new COSParser(buffer);
302+
name = cosParser.parseCOSName();
303+
assertEquals("Name4", name.getName());
304+
305+
// Test termination by '('
306+
inputBytes = "/Name5(".getBytes(StandardCharsets.US_ASCII);
307+
buffer = new RandomAccessReadBuffer(inputBytes);
308+
cosParser = new COSParser(buffer);
309+
name = cosParser.parseCOSName();
310+
assertEquals("Name5", name.getName());
311+
312+
// Test termination by ')'
313+
inputBytes = "/Name6)".getBytes(StandardCharsets.US_ASCII);
314+
buffer = new RandomAccessReadBuffer(inputBytes);
315+
cosParser = new COSParser(buffer);
316+
name = cosParser.parseCOSName();
317+
assertEquals("Name6", name.getName());
318+
319+
// Test termination by '/'
320+
inputBytes = "/Name7/".getBytes(StandardCharsets.US_ASCII);
321+
buffer = new RandomAccessReadBuffer(inputBytes);
322+
cosParser = new COSParser(buffer);
323+
name = cosParser.parseCOSName();
324+
assertEquals("Name7", name.getName());
325+
326+
// Test termination by '%'
327+
inputBytes = "/Name8%".getBytes(StandardCharsets.US_ASCII);
328+
buffer = new RandomAccessReadBuffer(inputBytes);
329+
cosParser = new COSParser(buffer);
330+
name = cosParser.parseCOSName();
331+
assertEquals("Name8", name.getName());
332+
}
333+
334+
@Test
335+
void testASCIIRegularCharacters() throws IOException
336+
{
337+
// Test a range of ASCII characters that are not delimiters
338+
// PDF delimiters that terminate name parsing: whitespace, <, >, [, ], {, }, /, %, (, )
339+
byte[] inputBytes = "/!\"$'*+-._:;=@~^`|\\".getBytes(StandardCharsets.US_ASCII);
340+
RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
341+
COSParser cosParser = new COSParser(buffer);
342+
COSName name = cosParser.parseCOSName();
343+
// All these non-delimiter characters should be preserved
344+
assertEquals("!\"$'*+-._:;=@~^`|\\", name.getName());
345+
}
346+
347+
@Test
348+
void testUTF8InNames()
349+
{
350+
// Create a name with UTF-8 encoded characters
351+
String nameStr = "Test中国";
352+
byte[] nameBytes = nameStr.getBytes(StandardCharsets.UTF_8);
353+
COSName name = COSName.getPDFName(nameBytes);
354+
355+
// The name should preserve the UTF-8 bytes
356+
byte[] retrievedBytes = name.getBytes();
357+
// Verify by recreating the string
358+
String retrievedStr = new String(retrievedBytes, StandardCharsets.UTF_8);
359+
assertEquals(nameStr, retrievedStr);
360+
}
361+
362+
@Test
363+
void testNameCanonicaliation()
364+
{
365+
byte[] bytes1 = "TestName".getBytes(StandardCharsets.US_ASCII);
366+
byte[] bytes2 = "TestName".getBytes(StandardCharsets.US_ASCII);
367+
368+
COSName name1 = COSName.getPDFName(bytes1);
369+
COSName name2 = COSName.getPDFName(bytes2);
370+
371+
// Same bytes should return references to identical object
372+
assertEquals(name1, name2);
373+
}
91374
}

0 commit comments

Comments
 (0)