Skip to content

Commit fb018d2

Browse files
committed
feat: lock skipLines once CSV parsing has started
skipLines(int) and skipLines(Predicate, int) operate on physical lines and are intended for skipping non-CSV preamble before the actual CSV data starts. Calling them after iteration has begun can split a quoted field at an embedded newline and corrupt the parser's view of subsequent records. Track whether parsing has started (set in fetchRecord before the first parse() call) and throw IllegalStateException from both skipLines overloads if invoked after that point. Document the throw contract on both methods. No existing test calls skipLines after iteration starts, so this is a tightening rather than a behaviour change for any documented use case. JMH: FastCsvReadBenchmark and FastCsvReadRelaxedBenchmark unchanged within noise.
1 parent 9c0d607 commit fb018d2

2 files changed

Lines changed: 41 additions & 0 deletions

File tree

lib/src/intTest/java/blackbox/reader/AbstractSkipLinesTest.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,24 @@ void predicateIoException() {
299299
.hasMessage("java.io.IOException: Cannot read");
300300
}
301301

302+
@Test
303+
void skipLinesCountAfterIterationStartedThrows() {
304+
final CsvReader<CsvRecord> csv = crb.ofCsvRecord("a\nb\nc");
305+
assertThat(csv.iterator()).hasNext();
306+
assertThatThrownBy(() -> csv.skipLines(1))
307+
.isInstanceOf(IllegalStateException.class)
308+
.hasMessage("skipLines must be called before any CSV records are read");
309+
}
310+
311+
@Test
312+
void skipLinesPredicateAfterIterationStartedThrows() {
313+
final CsvReader<CsvRecord> csv = crb.ofCsvRecord("a\nb\nc");
314+
assertThat(csv.iterator()).hasNext();
315+
assertThatThrownBy(() -> csv.skipLines(_ -> true, 1))
316+
.isInstanceOf(IllegalStateException.class)
317+
.hasMessage("skipLines must be called before any CSV records are read");
318+
}
319+
302320
@Test
303321
void predicateNotCalledForEmptyLine() {
304322
final Predicate<String> p = line -> {

lib/src/main/java/de/siegmar/fastcsv/reader/CsvReader.java

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ public final class CsvReader<T> implements Iterable<T>, Closeable {
6868
private final CloseableIterator<T> csvRecordIterator = new CsvRecordIterator();
6969

7070
private int firstRecordFieldCount = -1;
71+
private boolean parsingStarted;
7172

7273
@SuppressWarnings("checkstyle:ParameterNumber")
7374
CsvReader(final CsvParser csvParser, final CsvCallbackHandler<T> callbackHandler,
@@ -95,16 +96,26 @@ public static CsvReaderBuilder builder() {
9596

9697
/// Skips the specified number of lines.
9798
///
99+
/// **Note:** "lines" here means *physical* lines terminated by `CR`, `LF`, or `CRLF` —
100+
/// not CSV records. This method is intended for skipping non-CSV preamble lines that
101+
/// appear before the actual CSV data starts and must therefore be called before any
102+
/// records are read; once iteration has begun this method throws
103+
/// [IllegalStateException].
104+
///
98105
/// The setting [CsvReaderBuilder#skipEmptyLines(boolean)] has no effect on this method.
99106
///
100107
/// @param lineCount the number of lines to skip.
101108
/// @throws IllegalArgumentException if lineCount is negative.
109+
/// @throws IllegalStateException if any CSV records have already been read.
102110
/// @throws UncheckedIOException if an I/O error occurs.
103111
/// @throws CsvParseException unless enough lines are available to skip.
104112
public void skipLines(final int lineCount) {
105113
if (lineCount < 0) {
106114
throw new IllegalArgumentException("lineCount must be non-negative");
107115
}
116+
if (parsingStarted) {
117+
throw new IllegalStateException("skipLines must be called before any CSV records are read");
118+
}
108119

109120
int i = 0;
110121
try {
@@ -123,20 +134,31 @@ public void skipLines(final int lineCount) {
123134
///
124135
/// The method returns the number of lines actually skipped.
125136
///
137+
/// **Note:** "lines" here means *physical* lines terminated by `CR`, `LF`, or `CRLF` —
138+
/// not CSV records. The predicate sees each physical line as a plain string, with no
139+
/// awareness of quoting or escaping. This method is intended for skipping non-CSV
140+
/// preamble lines that appear before the actual CSV data starts and must therefore be
141+
/// called before any records are read; once iteration has begun this method throws
142+
/// [IllegalStateException].
143+
///
126144
/// The setting [CsvReaderBuilder#skipEmptyLines(boolean)] has no effect on this method.
127145
///
128146
/// @param predicate the predicate to match the lines.
129147
/// @param maxLines the maximum number of lines to skip.
130148
/// @return the number of lines actually skipped.
131149
/// @throws NullPointerException if predicate is `null`.
132150
/// @throws IllegalArgumentException if maxLines is negative.
151+
/// @throws IllegalStateException if any CSV records have already been read.
133152
/// @throws UncheckedIOException if an I/O error occurs.
134153
/// @throws CsvParseException if no matching line is found within the maximum limit of maxLines.
135154
public int skipLines(final Predicate<String> predicate, final int maxLines) {
136155
Objects.requireNonNull(predicate, "predicate must not be null");
137156
if (maxLines < 0) {
138157
throw new IllegalArgumentException("maxLines must be non-negative");
139158
}
159+
if (parsingStarted) {
160+
throw new IllegalStateException("skipLines must be called before any CSV records are read");
161+
}
140162

141163
if (maxLines == 0) {
142164
return 0;
@@ -222,6 +244,7 @@ public Stream<T> stream() {
222244

223245
@Nullable
224246
private T fetchRecord() throws IOException {
247+
parsingStarted = true;
225248
while (csvParser.parse()) {
226249
final T csvRecord = processRecord();
227250

0 commit comments

Comments
 (0)