Skip to content

Commit c40ba82

Browse files
committed
fix: DH-21438: fix performance regression caused by recent changes
1 parent 1f6c654 commit c40ba82

1 file changed

Lines changed: 40 additions & 16 deletions

File tree

src/main/java/io/deephaven/csv/reading/cells/DelimitedCellGrabber.java

Lines changed: 40 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -124,28 +124,43 @@ private void processQuotedMode(final ByteSlice dest, final MutableBoolean lastIn
124124
final MutableBoolean endOfInput) throws CsvReaderException {
125125
startOffset = offset;
126126
boolean prevCharWasCarriageReturn = false;
127+
outer:
127128
while (true) {
128-
if (offset == size) {
129-
if (!tryEnsureMore()) {
130-
throw new CsvReaderException("Cell did not have closing quote character");
129+
if (!tryEnsureMore()) {
130+
throw new CsvReaderException("Cell did not have closing quote character");
131+
}
132+
133+
// Advance through buffer while the characters are not special.
134+
// This tighter loop makes for more optimizer-friendly code.
135+
byte ch = buffer[offset];
136+
while (ch != '\n' && ch != '\r' && ch != escapeChar && ch != quoteChar) {
137+
++offset;
138+
if (offset == size) {
139+
// Refill buffer or finish.
140+
continue outer;
131141
}
142+
ch = buffer[offset];
132143
}
133-
final byte ch = buffer[offset++];
144+
145+
++offset;
146+
134147
// Maintain a correct row number. This is somewhat tricky.
135148
if (ch == '\r') {
136149
++physicalRowNum;
137150
prevCharWasCarriageReturn = true;
138-
} else {
139-
if (ch == '\n' && !prevCharWasCarriageReturn) {
151+
continue;
152+
}
153+
154+
if (ch == '\n') {
155+
if (!prevCharWasCarriageReturn) {
140156
++physicalRowNum;
141157
}
142158
prevCharWasCarriageReturn = false;
143-
}
144-
if (ch != quoteChar && ch != escapeChar) {
145-
// Ordinary character. Note: in quoted mode we will gladly eat field and line separators.
146159
continue;
147160
}
148161

162+
prevCharWasCarriageReturn = false;
163+
149164
if (ch == escapeChar) {
150165
processEscapeChar();
151166
continue;
@@ -236,6 +251,7 @@ private void skipWhitespace() throws CsvReaderException {
236251
private void finishField(final ByteSlice dest, final MutableBoolean lastInRow,
237252
final MutableBoolean endOfInput)
238253
throws CsvReaderException {
254+
outer:
239255
while (true) {
240256
if (!tryEnsureMore()) {
241257
finish(dest);
@@ -244,7 +260,19 @@ private void finishField(final ByteSlice dest, final MutableBoolean lastInRow,
244260
endOfInput.setValue(true);
245261
return;
246262
}
247-
final byte ch = buffer[offset];
263+
264+
// Advance through buffer while the characters are not special.
265+
// This tighter loop makes for more optimizer-friendly code.
266+
byte ch = buffer[offset];
267+
while (ch != fieldDelimiter && ch != '\n' && ch != '\r' && ch != escapeChar) {
268+
++offset;
269+
if (offset == size) {
270+
// Refill buffer or finish.
271+
continue outer;
272+
}
273+
ch = buffer[offset];
274+
}
275+
248276
if (ch == fieldDelimiter) {
249277
finish(dest);
250278
++offset; // ... and skip over the field delimiter.
@@ -280,13 +308,9 @@ private void finishField(final ByteSlice dest, final MutableBoolean lastInRow,
280308
++physicalRowNum;
281309
return;
282310
}
283-
if (ch == escapeChar) {
284-
++offset;
285-
processEscapeChar();
286-
continue;
287-
}
288-
311+
assert ch == escapeChar;
289312
++offset;
313+
processEscapeChar();
290314
}
291315
}
292316

0 commit comments

Comments
 (0)