Skip to content

Commit e51f9b5

Browse files
authored
fix: DH-21438: fix performance regression caused by recent changes (#310)
1 parent 1f6c654 commit e51f9b5

1 file changed

Lines changed: 40 additions & 17 deletions

File tree

src/main/java/io/deephaven/csv/reading/cells/DelimitedCellGrabber.java

Lines changed: 40 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -124,28 +124,42 @@ private void processQuotedMode(final ByteSlice dest, final MutableBoolean lastIn
124124
final MutableBoolean endOfInput) throws CsvReaderException {
125125
startOffset = offset;
126126
boolean prevCharWasCarriageReturn = false;
127-
while (true) {
128-
if (offset == size) {
129-
if (!tryEnsureMore()) {
130-
throw new CsvReaderException("Cell did not have closing quote character");
127+
outer: while (true) {
128+
if (!tryEnsureMore()) {
129+
throw new CsvReaderException("Cell did not have closing quote character");
130+
}
131+
132+
// Advance through buffer while the characters are not special.
133+
// This tighter loop makes for more optimizer-friendly code.
134+
byte ch = buffer[offset];
135+
while (ch != '\n' && ch != '\r' && ch != escapeChar && ch != quoteChar) {
136+
++offset;
137+
if (offset == size) {
138+
// Refill buffer or finish.
139+
continue outer;
131140
}
141+
ch = buffer[offset];
132142
}
133-
final byte ch = buffer[offset++];
143+
144+
++offset;
145+
134146
// Maintain a correct row number. This is somewhat tricky.
135147
if (ch == '\r') {
136148
++physicalRowNum;
137149
prevCharWasCarriageReturn = true;
138-
} else {
139-
if (ch == '\n' && !prevCharWasCarriageReturn) {
150+
continue;
151+
}
152+
153+
if (ch == '\n') {
154+
if (!prevCharWasCarriageReturn) {
140155
++physicalRowNum;
141156
}
142157
prevCharWasCarriageReturn = false;
143-
}
144-
if (ch != quoteChar && ch != escapeChar) {
145-
// Ordinary character. Note: in quoted mode we will gladly eat field and line separators.
146158
continue;
147159
}
148160

161+
prevCharWasCarriageReturn = false;
162+
149163
if (ch == escapeChar) {
150164
processEscapeChar();
151165
continue;
@@ -236,15 +250,27 @@ private void skipWhitespace() throws CsvReaderException {
236250
private void finishField(final ByteSlice dest, final MutableBoolean lastInRow,
237251
final MutableBoolean endOfInput)
238252
throws CsvReaderException {
239-
while (true) {
253+
outer: while (true) {
240254
if (!tryEnsureMore()) {
241255
finish(dest);
242256
// End of input sets both flags.
243257
lastInRow.setValue(true);
244258
endOfInput.setValue(true);
245259
return;
246260
}
247-
final byte ch = buffer[offset];
261+
262+
// Advance through buffer while the characters are not special.
263+
// This tighter loop makes for more optimizer-friendly code.
264+
byte ch = buffer[offset];
265+
while (ch != fieldDelimiter && ch != '\n' && ch != '\r' && ch != escapeChar) {
266+
++offset;
267+
if (offset == size) {
268+
// Refill buffer or finish.
269+
continue outer;
270+
}
271+
ch = buffer[offset];
272+
}
273+
248274
if (ch == fieldDelimiter) {
249275
finish(dest);
250276
++offset; // ... and skip over the field delimiter.
@@ -280,13 +306,10 @@ private void finishField(final ByteSlice dest, final MutableBoolean lastInRow,
280306
++physicalRowNum;
281307
return;
282308
}
283-
if (ch == escapeChar) {
284-
++offset;
285-
processEscapeChar();
286-
continue;
287-
}
288309

310+
// ch is escapeChar
289311
++offset;
312+
processEscapeChar();
290313
}
291314
}
292315

0 commit comments

Comments
 (0)