Skip to content

Commit 171101a

Browse files
committed
feat: add allowUnclosedQuote flag to reader builders (#173)
Allows callers to opt into rejecting input that ends inside a quoted field (EOF before a closing quote). Default in 4.x is `true` (current lenient behavior); the default will flip to `false` in 5.0. The check fires once at EOF: in StrictCsvParser.processBufferTail when STATUS_QUOTED_MODE is still set, and in RelaxedCsvParser.parseQuoted on the EOF exit of the OUTER loop. The closed-quote hot path is unaffected. Independent of this flag, exceeding maxBufferSize inside an unclosed quoted region continues to throw, as before.
1 parent be09d93 commit 171101a

10 files changed

Lines changed: 183 additions & 11 deletions

File tree

docs/src/content/docs/architecture/interpretation.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,24 @@ In this case, we observe:
208208
- For performance reasons, FastCSV switches to unquoted-field-parsing when the first character is not a quote
209209
character. It's not reasonable to slow down the parser in order to guess which broken format is used.
210210

211+
### Unclosed quoted fields
212+
213+
A quoted field may be unintentionally truncated, leaving an opening quote without a matching close before end-of-file:
214+
215+
```
216+
"value 1,value 2CRLF
217+
value 3CRLF
218+
```
219+
220+
By default, FastCSV consumes the remaining bytes (including any newlines) as the content of the final field, in line with its tolerance of real-world malformed input.
221+
222+
This behavior can be made strict by setting `CsvReaderBuilder.allowUnclosedQuote(false)` (also available
223+
on `IndexedCsvReaderBuilder`). FastCSV then throws a `CsvParseException` that references the starting line of the
224+
offending record.
225+
226+
The default of this flag will change to `false` in version 5.0. To preserve the current lenient behavior across the
227+
upgrade, set it explicitly to `true`.
228+
211229
### Other field enclosures
212230

213231
The RFC does not mention any field enclosures other than double quotes.

docs/src/content/docs/guides/basic.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ CsvReader.builder()
5555
.extraFieldStrategy(FieldMismatchStrategy.STRICT)
5656
.missingFieldStrategy(FieldMismatchStrategy.STRICT)
5757
.allowExtraCharsAfterClosingQuote(false)
58+
.allowUnclosedQuote(true)
5859
.detectBomHeader(false)
5960
.maxBufferSize(16777216);
6061
```

lib/src/intTest/java/blackbox/reader/AbstractCsvReaderTest.java

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import java.io.IOException;
99
import java.io.StringReader;
1010
import java.io.UncheckedIOException;
11+
import java.util.ArrayList;
1112
import java.util.Arrays;
1213
import java.util.List;
1314
import java.util.NoSuchElementException;
@@ -29,6 +30,7 @@
2930
import de.siegmar.fastcsv.reader.CsvRecord;
3031
import de.siegmar.fastcsv.reader.CsvRecordHandler;
3132
import de.siegmar.fastcsv.reader.FieldMismatchStrategy;
33+
import de.siegmar.fastcsv.reader.FieldModifier;
3234
import de.siegmar.fastcsv.reader.FieldModifiers;
3335
import testutil.CsvRecordAssert;
3436

@@ -220,6 +222,61 @@ void allowExtraCharsAfterClosingQuoteNot() {
220222
.hasRootCauseMessage("Unexpected character after closing quote: 'b' (0x62)");
221223
}
222224

225+
// allow unclosed quote at end of input
226+
227+
@Test
228+
void allowUnclosedQuoteLenient() {
229+
assertThat(readAll("\"abc"))
230+
.singleElement(CsvRecordAssert.CSV_RECORD)
231+
.fields().containsExactly("abc");
232+
}
233+
234+
@Test
235+
void allowUnclosedQuoteStrict() {
236+
// Throw must reference the record's starting line, not the EOF line; second record's start
237+
// (line 2) verifies cross-record line tracking too.
238+
crb.allowUnclosedQuote(false);
239+
assertThatThrownBy(() -> readAll("a,b\n\"c"))
240+
.isInstanceOf(CsvParseException.class)
241+
.hasRootCauseInstanceOf(CsvParseException.class)
242+
.hasRootCauseMessage("Unclosed quoted field at end of input (record starting at line 2)");
243+
}
244+
245+
@Test
246+
void allowUnclosedQuoteStrictAcceptsClosedAtEof() {
247+
// Guard against false positives on the closing-quote-immediately-before-EOF path.
248+
crb.allowUnclosedQuote(false);
249+
assertThatCode(() -> readAll("\"closed\"")).doesNotThrowAnyException();
250+
}
251+
252+
@Test
253+
void allowUnclosedQuoteIgnoresComments() {
254+
// Comment paths are quote-agnostic — an unbalanced quote inside a comment must stay a comment.
255+
crb.allowUnclosedQuote(false).commentStrategy(CommentStrategy.READ);
256+
assertThat(readAll("#abc\"def"))
257+
.singleElement(CsvRecordAssert.CSV_RECORD)
258+
.isComment()
259+
.fields().containsExactly("abc\"def");
260+
}
261+
262+
@Test
263+
void allowUnclosedQuoteThrowsBeforeFieldModifier() {
264+
// The throw must happen before addField is called, so a registered FieldModifier
265+
// never sees a phantom field for the failing record.
266+
final List<String> seen = new ArrayList<>();
267+
final var handler = CsvRecordHandler.of(c -> c.fieldModifier(FieldModifier.modify(value -> {
268+
seen.add(value);
269+
return value;
270+
})));
271+
272+
crb.allowUnclosedQuote(false);
273+
274+
assertThatThrownBy(() -> crb.build(handler, "a,b\n\"c").stream().toList())
275+
.isInstanceOf(CsvParseException.class);
276+
277+
assertThat(seen).containsExactly("a", "b");
278+
}
279+
223280
// field by index
224281

225282
@Test

lib/src/intTest/java/blackbox/reader/CsvReaderBuilderTest.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,8 @@ void builderToString() {
8181
CsvReaderBuilder[fieldSeparator=,, quoteCharacter=", \
8282
commentStrategy=NONE, commentCharacter=#, skipEmptyLines=true, \
8383
extraFieldStrategy=STRICT, missingFieldStrategy=STRICT, allowExtraCharsAfterClosingQuote=false, \
84-
trimWhitespacesAroundQuotes=false, detectBomHeader=false, maxBufferSize=16777216]""");
84+
allowUnclosedQuote=true, trimWhitespacesAroundQuotes=false, detectBomHeader=false, \
85+
maxBufferSize=16777216]""");
8586
}
8687

8788
@Test
@@ -132,6 +133,7 @@ void chained() {
132133
.extraFieldStrategy(FieldMismatchStrategy.STRICT)
133134
.missingFieldStrategy(FieldMismatchStrategy.STRICT)
134135
.allowExtraCharsAfterClosingQuote(false)
136+
.allowUnclosedQuote(true)
135137
.ofCsvRecord("foo");
136138

137139
assertThat(reader).isNotNull();

lib/src/intTest/java/blackbox/reader/IndexedCsvReaderTest.java

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ void readerToString() throws IOException {
7878
.isEqualTo("""
7979
IndexedCsvReader[file=%s, charset=UTF-8, fieldSeparator=,, \
8080
quoteCharacter=", commentStrategy=NONE, commentCharacter=#, \
81-
allowExtraCharsAfterClosingQuote=false, pageSize=1, \
81+
allowExtraCharsAfterClosingQuote=false, allowUnclosedQuote=true, pageSize=1, \
8282
index=CsvIndex[bomHeaderLength=0, fileSize=3, fieldSeparator=44, quoteCharacter=34, \
8383
commentStrategy=NONE, commentCharacter=35, recordCount=1, pageCount=1]]""",
8484
file);
@@ -197,6 +197,26 @@ void allowExtraCharsAfterClosingQuoteNot() throws IOException {
197197
}
198198
}
199199

200+
// allow unclosed quote at end of input
201+
202+
@Test
203+
void allowUnclosedQuoteLenientByDefault() throws IOException {
204+
try (var csv = singlePageBuilder().ofCsvRecord(prepareTestFile("\"abc"), StandardCharsets.UTF_8)) {
205+
CsvRecordAssert.assertThat(csv.readPage(0).getFirst()).fields().containsExactly("abc");
206+
}
207+
}
208+
209+
@Test
210+
void allowUnclosedQuoteStrictThrows() throws IOException {
211+
final var bldr = singlePageBuilder().allowUnclosedQuote(false);
212+
try (var csv = bldr.ofCsvRecord(prepareTestFile("\"abc"), StandardCharsets.UTF_8)) {
213+
assertThatThrownBy(() -> csv.readPage(0))
214+
.isInstanceOf(CsvParseException.class)
215+
.hasRootCauseInstanceOf(CsvParseException.class)
216+
.hasRootCauseMessage("Unclosed quoted field at end of input (record starting at line 1)");
217+
}
218+
}
219+
200220
private IndexedCsvReader<CsvRecord> buildSinglePage(final String data) throws IOException {
201221
return singlePageBuilder().ofCsvRecord(prepareTestFile(data));
202222
}

lib/src/main/java/de/siegmar/fastcsv/reader/CsvReader.java

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,7 @@ public static final class CsvReaderBuilder {
440440
private FieldMismatchStrategy extraFieldStrategy = FieldMismatchStrategy.STRICT;
441441
private FieldMismatchStrategy missingFieldStrategy = FieldMismatchStrategy.STRICT;
442442
private boolean allowExtraCharsAfterClosingQuote;
443+
private boolean allowUnclosedQuote = true;
443444
private boolean trimWhitespacesAroundQuotes;
444445
private boolean detectBomHeader;
445446
private int maxBufferSize = DEFAULT_MAX_BUFFER_SIZE;
@@ -605,6 +606,25 @@ public CsvReaderBuilder allowExtraCharsAfterClosingQuote(final boolean allowExtr
605606
return this;
606607
}
607608

609+
/// Defines whether input that ends inside a quoted field (EOF before a closing quote) is tolerated.
610+
///
611+
/// Example: `"foo,bar`
612+
///
613+
/// If this is set to `true`, the value `foo,bar` will be returned as a single field; otherwise,
614+
/// a [CsvParseException] will be thrown.
615+
///
616+
/// Independent of this flag, a [CsvParseException] is thrown if the unclosed region exceeds
617+
/// [#maxBufferSize(int)].
618+
///
619+
/// **The default will change to `false` in version 5.0.**
620+
///
621+
/// @param allowUnclosedQuote allow input ending inside a quoted field (default: `true`).
622+
/// @return This updated object, allowing additional method calls to be chained together.
623+
public CsvReaderBuilder allowUnclosedQuote(final boolean allowUnclosedQuote) {
624+
this.allowUnclosedQuote = allowUnclosedQuote;
625+
return this;
626+
}
627+
608628
/// Defines whether whitespaces before an opening quote and after a closing quote should be allowed and trimmed.
609629
///
610630
/// RFC 4180 does not allow whitespaces between the quotation mark and the field separator or
@@ -981,12 +1001,13 @@ public <T> CsvReader<T> build(final CsvCallbackHandler<T> callbackHandler, final
9811001
final CsvParser csvParser;
9821002
if (isRelaxedConfiguration()) {
9831003
csvParser = new RelaxedCsvParser(fieldSeparator, quoteCharacter, commentStrategy,
984-
commentCharacter, trimWhitespacesAroundQuotes, callbackHandler,
1004+
commentCharacter, trimWhitespacesAroundQuotes, allowUnclosedQuote, callbackHandler,
9851005
maxBufferSize, reader
9861006
);
9871007
} else {
9881008
csvParser = new StrictCsvParser(fieldSeparator.charAt(0), quoteCharacter, commentStrategy,
989-
commentCharacter, allowExtraCharsAfterClosingQuote, callbackHandler, maxBufferSize, reader);
1009+
commentCharacter, allowExtraCharsAfterClosingQuote, allowUnclosedQuote,
1010+
callbackHandler, maxBufferSize, reader);
9901011
}
9911012

9921013
return newReader(callbackHandler, csvParser);
@@ -1009,12 +1030,13 @@ public <T> CsvReader<T> build(final CsvCallbackHandler<T> callbackHandler, final
10091030
final CsvParser csvParser;
10101031
if (isRelaxedConfiguration()) {
10111032
csvParser = new RelaxedCsvParser(fieldSeparator, quoteCharacter, commentStrategy,
1012-
commentCharacter, trimWhitespacesAroundQuotes, callbackHandler,
1033+
commentCharacter, trimWhitespacesAroundQuotes, allowUnclosedQuote, callbackHandler,
10131034
maxBufferSize, data
10141035
);
10151036
} else {
10161037
csvParser = new StrictCsvParser(fieldSeparator.charAt(0), quoteCharacter, commentStrategy,
1017-
commentCharacter, allowExtraCharsAfterClosingQuote, callbackHandler, data);
1038+
commentCharacter, allowExtraCharsAfterClosingQuote, allowUnclosedQuote,
1039+
callbackHandler, data);
10181040
}
10191041

10201042
return newReader(callbackHandler, csvParser);
@@ -1092,6 +1114,7 @@ public String toString() {
10921114
.add("extraFieldStrategy=" + extraFieldStrategy)
10931115
.add("missingFieldStrategy=" + missingFieldStrategy)
10941116
.add("allowExtraCharsAfterClosingQuote=" + allowExtraCharsAfterClosingQuote)
1117+
.add("allowUnclosedQuote=" + allowUnclosedQuote)
10951118
.add("trimWhitespacesAroundQuotes=" + trimWhitespacesAroundQuotes)
10961119
.add("detectBomHeader=" + detectBomHeader)
10971120
.add("maxBufferSize=" + maxBufferSize)

lib/src/main/java/de/siegmar/fastcsv/reader/IndexedCsvReader.java

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ public final class IndexedCsvReader<T> implements Closeable {
5151
private final CommentStrategy commentStrategy;
5252
private final char commentCharacter;
5353
private final boolean allowExtraCharsAfterClosingQuote;
54+
private final boolean allowUnclosedQuote;
5455
private final int pageSize;
5556
private final RandomAccessFile raf;
5657
private final Lock fileLock = new ReentrantLock();
@@ -63,6 +64,7 @@ public final class IndexedCsvReader<T> implements Closeable {
6364
final char fieldSeparator, final char quoteCharacter,
6465
final CommentStrategy commentStrategy, final char commentCharacter,
6566
final boolean allowExtraCharsAfterClosingQuote,
67+
final boolean allowUnclosedQuote,
6668
final int maxBufferSize,
6769
final int pageSize,
6870
final CsvCallbackHandler<T> csvRecordHandler,
@@ -78,6 +80,7 @@ public final class IndexedCsvReader<T> implements Closeable {
7880
this.commentStrategy = commentStrategy;
7981
this.commentCharacter = commentCharacter;
8082
this.allowExtraCharsAfterClosingQuote = allowExtraCharsAfterClosingQuote;
83+
this.allowUnclosedQuote = allowUnclosedQuote;
8184
this.pageSize = pageSize;
8285
this.csvRecordHandler = csvRecordHandler;
8386

@@ -103,7 +106,7 @@ public final class IndexedCsvReader<T> implements Closeable {
103106

104107
raf = new RandomAccessFile(file.toFile(), "r");
105108
csvParser = new StrictCsvParser(fieldSeparator, quoteCharacter, commentStrategy, commentCharacter,
106-
allowExtraCharsAfterClosingQuote, csvRecordHandler, maxBufferSize,
109+
allowExtraCharsAfterClosingQuote, allowUnclosedQuote, csvRecordHandler, maxBufferSize,
107110
new InputStreamReader(new RandomAccessFileInputStream(raf), charset));
108111
}
109112

@@ -260,6 +263,7 @@ public String toString() {
260263
.add("commentStrategy=" + commentStrategy)
261264
.add("commentCharacter=" + commentCharacter)
262265
.add("allowExtraCharsAfterClosingQuote=" + allowExtraCharsAfterClosingQuote)
266+
.add("allowUnclosedQuote=" + allowUnclosedQuote)
263267
.add("pageSize=" + pageSize)
264268
.add("index=" + csvIndex)
265269
.toString();
@@ -291,6 +295,7 @@ public static final class IndexedCsvReaderBuilder {
291295
private CommentStrategy commentStrategy = CommentStrategy.NONE;
292296
private char commentCharacter = '#';
293297
private boolean allowExtraCharsAfterClosingQuote;
298+
private boolean allowUnclosedQuote = true;
294299

295300
@Nullable
296301
private StatusListener statusListener;
@@ -368,6 +373,25 @@ public IndexedCsvReaderBuilder allowExtraCharsAfterClosingQuote(
368373
return this;
369374
}
370375

376+
/// Defines whether input that ends inside a quoted field (EOF before a closing quote) is tolerated.
377+
///
378+
/// Example: `"foo,bar`
379+
///
380+
/// If this is set to `true`, the value `foo,bar` will be returned as a single field; otherwise,
381+
/// a [CsvParseException] will be thrown.
382+
///
383+
/// Independent of this flag, a [CsvParseException] is thrown if the unclosed region exceeds
384+
/// [#maxBufferSize(int)].
385+
///
386+
/// **The default will change to `false` in version 5.0.**
387+
///
388+
/// @param allowUnclosedQuote allow input ending inside a quoted field (default: `true`).
389+
/// @return This updated object, allowing additional method calls to be chained together.
390+
public IndexedCsvReaderBuilder allowUnclosedQuote(final boolean allowUnclosedQuote) {
391+
this.allowUnclosedQuote = allowUnclosedQuote;
392+
return this;
393+
}
394+
371395
/// Sets the `statusListener` to listen for indexer status updates.
372396
///
373397
/// @param statusListener the status listener.
@@ -502,8 +526,8 @@ public <T> IndexedCsvReader<T> build(final CsvCallbackHandler<T> callbackHandler
502526
: new StatusListener() { };
503527

504528
return new IndexedCsvReader<>(file, charset, fieldSeparator, quoteCharacter, commentStrategy,
505-
commentCharacter, allowExtraCharsAfterClosingQuote, maxBufferSize, pageSize, callbackHandler,
506-
csvIndex, sl);
529+
commentCharacter, allowExtraCharsAfterClosingQuote, allowUnclosedQuote,
530+
maxBufferSize, pageSize, callbackHandler, csvIndex, sl);
507531
}
508532

509533
}

lib/src/main/java/de/siegmar/fastcsv/reader/RelaxedCsvParser.java

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ final class RelaxedCsvParser implements CsvParser {
3636
private final CommentStrategy cStrat;
3737
private final char cChar;
3838
private final boolean trimWhitespacesAroundQuotes;
39+
private final boolean allowUnclosedQuote;
3940
private final CsvCallbackHandler<?> callbackHandler;
4041
private final int maxBufferSize;
4142
private final LookaheadReader reader;
@@ -48,6 +49,7 @@ final class RelaxedCsvParser implements CsvParser {
4849
RelaxedCsvParser(final String fsep, final char qChar,
4950
final CommentStrategy cStrat, final char cChar,
5051
final boolean trimWhitespacesAroundQuotes,
52+
final boolean allowUnclosedQuote,
5153
final CsvCallbackHandler<?> callbackHandler,
5254
final int maxBufferSize,
5355
final Reader reader) {
@@ -59,6 +61,7 @@ final class RelaxedCsvParser implements CsvParser {
5961
this.cStrat = cStrat;
6062
this.cChar = cChar;
6163
this.trimWhitespacesAroundQuotes = trimWhitespacesAroundQuotes;
64+
this.allowUnclosedQuote = allowUnclosedQuote;
6265
this.callbackHandler = callbackHandler;
6366
this.maxBufferSize = maxBufferSize;
6467
final int initialBufferSize = Math.min(maxBufferSize, DEFAULT_BUFFER_SIZE);
@@ -70,6 +73,7 @@ final class RelaxedCsvParser implements CsvParser {
7073
RelaxedCsvParser(final String fsep, final char qChar,
7174
final CommentStrategy cStrat, final char cChar,
7275
final boolean trimWhitespacesAroundQuotes,
76+
final boolean allowUnclosedQuote,
7377
final CsvCallbackHandler<?> callbackHandler,
7478
final int maxBufferSize,
7579
final String data) {
@@ -81,6 +85,7 @@ final class RelaxedCsvParser implements CsvParser {
8185
this.cStrat = cStrat;
8286
this.cChar = cChar;
8387
this.trimWhitespacesAroundQuotes = trimWhitespacesAroundQuotes;
88+
this.allowUnclosedQuote = allowUnclosedQuote;
8489
this.callbackHandler = callbackHandler;
8590
this.maxBufferSize = maxBufferSize;
8691
final int dataSize = Math.max(data.length(), 1);
@@ -214,7 +219,17 @@ private boolean parseQuoted() throws IOException {
214219
boolean endOfRecord = true;
215220

216221
int ch;
217-
OUTER: while ((ch = reader.read()) != EOF) {
222+
OUTER: while (true) {
223+
ch = reader.read();
224+
if (ch == EOF) {
225+
if (!allowUnclosedQuote) {
226+
throw new CsvParseException(
227+
"Unclosed quoted field at end of input (record starting at line %d)"
228+
.formatted(startingLineNumber));
229+
}
230+
break;
231+
}
232+
218233
// fast-forward
219234
while (currentFieldIndex < currentField.length && reader.len > reader.start
220235
&& ch != CR && ch != LF && ch != qChar) {

0 commit comments

Comments
 (0)