Skip to content

Commit 4355ed8

Browse files
authored
Fix buffer read for windows style newlines (#67)
Implement Joel's test for complex large files. Correct the counting of positions when a chunk ends with a `\r` and the next chunk begins with a `\n`. Update to version 3.2.0.
1 parent 5af4695 commit 4355ed8

11 files changed

Lines changed: 1787 additions & 8 deletions

File tree

.github/workflows/dotnet.yml

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,19 +27,22 @@ jobs:
2727
- name: Build (Framework 2.0 Tests)
2828
run: msbuild ./tests/net20/tests.net20.csproj
2929
- name: Test (net20)
30-
run: ./NUnit.ConsoleRunner.3.4.0/tools/nunit3-console.exe ./tests/net20/bin/Debug/tests.net20.dll
30+
working-directory: ./tests/net20/bin/Debug/
31+
run: ../../../../NUnit.ConsoleRunner.3.4.0/tools/nunit3-console.exe ./tests.net20.dll
3132
- name: Build (Framework 4.0)
3233
run: msbuild ./src/net40/src.net40.csproj
3334
- name: Build (Framework 4.0 Tests)
3435
run: msbuild ./tests/net40/tests.net40.csproj
3536
- name: Test (net40)
36-
run: ./NUnit.ConsoleRunner.3.4.0/tools/nunit3-console.exe ./tests/net40/bin/Debug/tests.net40.dll
37+
working-directory: ./tests/net40/bin/Debug
38+
run: ../../../../NUnit.ConsoleRunner.3.4.0/tools/nunit3-console.exe ./tests.net40.dll
3739
- name: Build (Framework 4.5)
3840
run: msbuild ./src/net45/src.net45.csproj
3941
- name: Build (Framework 4.5 Tests)
4042
run: msbuild ./tests/net45/tests.net45.csproj
4143
- name: Test (net45)
42-
run: ./NUnit.ConsoleRunner.3.4.0/tools/nunit3-console.exe ./tests/net45/bin/Debug/tests.net45.dll
44+
working-directory: ./tests/net45/bin/Debug/
45+
run: ../../../../NUnit.ConsoleRunner.3.4.0/tools/nunit3-console.exe ./tests.net45.dll
4346
- name: Build (DotNet Core 5.0 and NetStandard 2.0)
4447
run: dotnet build ./csharp-csv-reader.sln
4548
- name: Test (net50)

CSVFile.nuspec

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
<package >
33
<metadata>
44
<id>CSVFile</id>
5-
<version>3.1.4</version>
5+
<version>3.2.0</version>
66
<title>CSVFile</title>
77
<authors>Ted Spence</authors>
88
<owners>Ted Spence</owners>
@@ -15,8 +15,8 @@
1515
<releaseNotes>
1616
August 5, 2024
1717

18-
* Add serialization options for arrays and objects
19-
* Fix bad deploy of 3.1.3
18+
* Fix issue with Windows-style newlines crossing chunks found by @joelverhagen
19+
* Fix issue with endless loops reported by @wvvegt
2020
</releaseNotes>
2121
<readme>docs/README.md</readme>
2222
<copyright>Copyright 2006 - 2024</copyright>

src/CSV.cs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,10 @@ public static IEnumerable<string[]> ParseStream(StreamReader inStream, CSVSettin
7373
{
7474
yield return row;
7575
}
76+
else if (inStream.EndOfStream)
77+
{
78+
break;
79+
}
7680
}
7781
}
7882

@@ -101,6 +105,10 @@ public static async IAsyncEnumerable<string[]> ParseStreamAsync(StreamReader inS
101105
{
102106
yield return row;
103107
}
108+
else if (inStream.EndOfStream)
109+
{
110+
break;
111+
}
104112
}
105113
}
106114
#endif

src/CSVStateMachine.cs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ public class CSVStateMachine
6565
/// <returns></returns>
6666
public bool NeedsMoreText()
6767
{
68-
return String.IsNullOrEmpty(_line) || _position >= _line.Length;
68+
return String.IsNullOrEmpty(_line) || _position + _settings.LineSeparator.Length >= _line.Length;
6969
}
7070

7171
/// <summary>
@@ -202,11 +202,13 @@ public string[] ParseChunk(string chunk, bool reachedEnd)
202202
var notEnoughChars = _position + _settings.LineSeparator.Length > _line.Length;
203203
if (notEnoughChars && !reachedEnd)
204204
{
205+
// Backtrack one character so we can pick up the line separator completely next time
206+
_position--;
205207
return null;
206208
}
207209

208210
// If we have reached the end, but this isn't a complete line separator, it's just text
209-
if (notEnoughChars && reachedEnd)
211+
if (notEnoughChars)
210212
{
211213
_work.Append(c);
212214
}

tests/PackageAssets.csv

Lines changed: 1695 additions & 0 deletions
Large diffs are not rendered by default.

tests/ReaderTest.cs

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@
44
* Home page: https://github.com/tspence/csharp-csv-reader
55
*/
66
using System;
7+
using System.Diagnostics;
8+
using System.IO;
9+
using System.Linq;
10+
using System.Text;
711
using NUnit.Framework;
812
using CSVFile;
913
#if HAS_ASYNC
@@ -336,6 +340,41 @@ public void TestMultipleNewlines()
336340
}
337341
}
338342

343+
[Test]
344+
public void TestIssue62()
345+
{
346+
var inputLines = File.ReadAllLines("PackageAssets.csv");
347+
var desiredLines = 53_543;
348+
var linesToRead = Enumerable
349+
.Repeat(inputLines, desiredLines / inputLines.Length + 1)
350+
.SelectMany(x => x)
351+
.Take(desiredLines)
352+
.ToArray();
353+
354+
var config = new CSVSettings
355+
{
356+
HeaderRowIncluded = false,
357+
};
358+
359+
var outputLines = 0;
360+
var rawText = string.Join(Environment.NewLine, linesToRead);
361+
var rawBytes = Encoding.UTF8.GetBytes(rawText);
362+
using (var memoryStream = new MemoryStream(rawBytes))
363+
{
364+
using (var streamReader = new StreamReader(memoryStream))
365+
{
366+
using (var csvReader = new CSVReader(streamReader, config))
367+
{
368+
foreach (var row in csvReader)
369+
{
370+
outputLines++;
371+
}
372+
}
373+
}
374+
}
375+
Assert.AreEqual(desiredLines, outputLines);
376+
}
377+
339378
#if HAS_ASYNC_IENUM
340379
[Test]
341380
public async Task TestAsyncReader()

tests/net20/tests.net20.csproj

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,12 @@
7474
<Name>src.net20</Name>
7575
</ProjectReference>
7676
</ItemGroup>
77+
<ItemGroup>
78+
<Content Include="..\PackageAssets.csv">
79+
<Link>PackageAssets.csv</Link>
80+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
81+
</Content>
82+
</ItemGroup>
7783
<ItemGroup>
7884
<None Include="packages.config" />
7985
</ItemGroup>

tests/net40/tests.net40.csproj

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,12 @@
7171
<Name>src.net40</Name>
7272
</ProjectReference>
7373
</ItemGroup>
74+
<ItemGroup>
75+
<Content Include="..\PackageAssets.csv">
76+
<Link>PackageAssets.csv</Link>
77+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
78+
</Content>
79+
</ItemGroup>
7480
<ItemGroup>
7581
<None Include="packages.config" />
7682
</ItemGroup>

tests/net45/tests.net45.csproj

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,12 @@
7171
<Name>src.net45</Name>
7272
</ProjectReference>
7373
</ItemGroup>
74+
<ItemGroup>
75+
<Content Include="..\PackageAssets.csv">
76+
<Link>PackageAssets.csv</Link>
77+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
78+
</Content>
79+
</ItemGroup>
7480
<ItemGroup>
7581
<None Include="packages.config" />
7682
</ItemGroup>

tests/net50/tests.net50.csproj

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,4 +30,11 @@
3030
<ProjectReference Include="..\..\src\net50\src.net50.csproj" />
3131
</ItemGroup>
3232

33+
<ItemGroup>
34+
<Content Include="..\PackageAssets.csv">
35+
<Link>PackageAssets.csv</Link>
36+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
37+
</Content>
38+
</ItemGroup>
39+
3340
</Project>

0 commit comments

Comments
 (0)