Skip to content

Commit 1ad8109

Browse files
authored
Fix issue where line starts with a text qualifier (#54)
In Issue 53, wvdvegt discovered a bug where a line could start with a text qualifier and the state machine would incorrectly begin a new field rather than resume the previous field.
1 parent 3db66fc commit 1ad8109

4 files changed

Lines changed: 91 additions & 24 deletions

File tree

CSVFile.nuspec

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
<package >
33
<metadata>
44
<id>CSVFile</id>
5-
<version>3.1.0</version>
5+
<version>3.1.1</version>
66
<title>CSVFile</title>
77
<authors>Ted Spence</authors>
88
<owners>Ted Spence</owners>
@@ -13,16 +13,12 @@
1313
<description>Tiny and fast CSV and TSV parsing library (40KB) with zero dependencies. Compatible with most dot net versions.</description>
1414
<icon>docs/icons8-spreadsheet-96.png</icon>
1515
<releaseNotes>
16-
April 4, 2022
16+
March 7, 2023
1717

18-
* Improve handling for read-only properties with more errors and options
19-
* Added support for excluding columns from serialization
20-
* Added support for forcing columns to have text qualifiers
21-
* Support for fully async serialization
22-
* Unified all serialization code
18+
* Fix issue when reading a stream with a text qualified field that ends with a newline
2319
</releaseNotes>
2420
<readme>docs/README.md</readme>
25-
<copyright>Copyright 2006 - 2022</copyright>
21+
<copyright>Copyright 2006 - 2023</copyright>
2622
<tags>fast csv parser serialization deserialization streaming async</tags>
2723
<repository type="git" url="https://github.com/tspence/csharp-csv-reader" />
2824
<dependencies>

src/CSVStateMachine.cs

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -157,33 +157,22 @@ public string[] ParseChunk(string chunk, bool reachedEnd)
157157
}
158158
var c = _line[_position];
159159

160-
// Are we currently processing a text block (which may optionally span multiple lines)?
161-
if (_inTextQualifier || (!_inTextQualifier && c == _settings.TextQualifier && _work.Length == 0))
160+
// If we are resuming after starting a text qualifier, can we find the end?
161+
if (_inTextQualifier)
162162
{
163-
if (_inTextQualifier)
164-
{
165-
_work.Append(c);
166-
}
167-
_inTextQualifier = true;
168-
169-
// Our next task is to find the end of this qualified-text field
170163
var p2 = -1;
171164
while (p2 < 0)
172165
{
173-
174-
// If we don't see an end in sight, read more from the stream
175166
p2 = _line.IndexOf(_settings.TextQualifier, _position + 1);
176167
if (p2 < 0)
177168
{
178-
179-
// No text qualifiers yet? Let's read more from the stream and continue
180-
_work.Append(_line.Substring(_position + 1));
181-
_line = string.Empty;
182-
_position = -1;
183169
if (reachedEnd)
184170
{
185171
State = CSVState.MissingTrailingQualifier;
186172
}
173+
174+
// Backtrack one character so we can move forward when the next chunk loads
175+
_position--;
187176
return null;
188177
}
189178

@@ -203,6 +192,12 @@ public string[] ParseChunk(string chunk, bool reachedEnd)
203192
// We're done parsing this text qualifier
204193
_inTextQualifier = false;
205194
}
195+
// Is this the start of a text qualified field?
196+
else if (c == _settings.TextQualifier && _work.Length == 0)
197+
{
198+
_inTextQualifier = true;
199+
_position--;
200+
}
206201
// Are we at a line separator? Let's do a quick test first
207202
else if (c == _settings.LineSeparator[0] && _position + _settings.LineSeparator.Length <= _line.Length)
208203
{

tests/BasicParseTests.cs

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,5 +190,51 @@ public void ParseSepLineTest()
190190
CSV.ParseSepLine("sep= this is a test since separators can't be more than a single character");
191191
});
192192
}
193+
194+
[Test]
195+
public void TestIssue53()
196+
{
197+
// This use case was reported by wvdvegt as https://github.com/tspence/csharp-csv-reader/issues/53
198+
var line = CSV.ParseLine("\"test\",\"\n\",,,,\"Normal\",\"False\",,,\"Normal\",\"\"");
199+
Assert.AreEqual("test", line[0]);
200+
Assert.AreEqual("\n", line[1]);
201+
Assert.AreEqual("", line[2]);
202+
Assert.AreEqual("", line[3]);
203+
Assert.AreEqual("", line[4]);
204+
Assert.AreEqual("Normal", line[5]);
205+
Assert.AreEqual("False", line[6]);
206+
Assert.AreEqual("", line[7]);
207+
Assert.AreEqual("", line[8]);
208+
Assert.AreEqual("Normal", line[9]);
209+
Assert.AreEqual("", line[10]);
210+
211+
// Try same thing with MS-DOS newlines - CRLF
212+
var line2 = CSV.ParseLine("\"test\",\"\r\n\",,,,\"Normal\",\"False\",,,\"Normal\",\"\"");
213+
Assert.AreEqual("test", line2[0]);
214+
Assert.AreEqual("\r\n", line2[1]);
215+
Assert.AreEqual("", line2[2]);
216+
Assert.AreEqual("", line2[3]);
217+
Assert.AreEqual("", line2[4]);
218+
Assert.AreEqual("Normal", line2[5]);
219+
Assert.AreEqual("False", line2[6]);
220+
Assert.AreEqual("", line2[7]);
221+
Assert.AreEqual("", line2[8]);
222+
Assert.AreEqual("Normal", line2[9]);
223+
Assert.AreEqual("", line2[10]);
224+
225+
// Try same thing with just LF only
226+
var line3 = CSV.ParseLine("\"test\",\"\r\",,,,\"Normal\",\"False\",,,\"Normal\",\"\"");
227+
Assert.AreEqual("test", line3[0]);
228+
Assert.AreEqual("\r", line3[1]);
229+
Assert.AreEqual("", line3[2]);
230+
Assert.AreEqual("", line3[3]);
231+
Assert.AreEqual("", line3[4]);
232+
Assert.AreEqual("Normal", line3[5]);
233+
Assert.AreEqual("False", line3[6]);
234+
Assert.AreEqual("", line3[7]);
235+
Assert.AreEqual("", line3[8]);
236+
Assert.AreEqual("Normal", line3[9]);
237+
Assert.AreEqual("", line3[10]);
238+
}
193239
}
194240
}

tests/ReaderTest.cs

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,36 @@ public void TestSepLineOverride()
251251
}
252252
}
253253

254+
255+
[Test]
256+
public void TestIssue53()
257+
{
258+
var settings = new CSVSettings()
259+
{
260+
HeaderRowIncluded = false
261+
};
262+
263+
// This use case was reported by wvdvegt as https://github.com/tspence/csharp-csv-reader/issues/53
264+
var source = "\"test\",\"" + Environment.NewLine + "\",,,,\"Normal\",\"False\",,,\"Normal\",\"\"";
265+
using (var cr = CSVReader.FromString(source, settings))
266+
{
267+
foreach (var line in cr.Lines())
268+
{
269+
Assert.AreEqual("test", line[0]);
270+
Assert.AreEqual(Environment.NewLine, line[1]);
271+
Assert.AreEqual("", line[2]);
272+
Assert.AreEqual("", line[3]);
273+
Assert.AreEqual("", line[4]);
274+
Assert.AreEqual("Normal", line[5]);
275+
Assert.AreEqual("False", line[6]);
276+
Assert.AreEqual("", line[7]);
277+
Assert.AreEqual("", line[8]);
278+
Assert.AreEqual("Normal", line[9]);
279+
Assert.AreEqual("", line[10]);
280+
}
281+
}
282+
}
283+
254284
#if HAS_ASYNC_IENUM
255285
[Test]
256286
public async Task TestAsyncReader()

0 commit comments

Comments
 (0)