Skip to content

Commit 6c3a8f4

Browse files
authored
Merge pull request #577 from LogExperts/csv_problems_fix
Csv problems fix
2 parents ba12a83 + 5b47e1e commit 6c3a8f4

18 files changed

Lines changed: 814 additions & 26 deletions

File tree

src/CsvColumnizer/CsvColumnizer.cs

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@ public ReadOnlyMemory<char> PreProcessLine (ReadOnlyMemory<char> logLine, int li
5252
{
5353
if (realLineNum == 0)
5454
{
55+
// Auto-detect delimiter from the first line
56+
AutoDetectDelimiter(logLine);
57+
5558
// store for later field names and field count retrieval
5659
_firstLine = new CsvLogLine(logLine, 0);
5760

@@ -292,6 +295,42 @@ public Priority GetPriority (string fileName, IEnumerable<ILogLineMemory> sample
292295

293296
#region Private Methods
294297

298+
/// <summary>
299+
/// Auto-detects the delimiter using CsvHelper's built-in detection.
300+
/// After parsing, the detected delimiter is extracted from csv.Parser.Delimiter.
301+
/// </summary>
302+
private void AutoDetectDelimiter (ReadOnlyMemory<char> lineContent)
303+
{
304+
if (lineContent.IsEmpty)
305+
{
306+
return;
307+
}
308+
309+
try
310+
{
311+
var autoDetectedConfig = new CsvHelper.Configuration.CsvConfiguration(CultureInfo.InvariantCulture)
312+
{
313+
DetectDelimiter = true,
314+
DetectDelimiterValues = [",", ";", "\t", "|"]
315+
};
316+
317+
using CsvReader csv = new(new StringReader(lineContent.ToString()), autoDetectedConfig);
318+
_ = csv.Read();
319+
320+
var detectedDelimiter = csv.Parser.Delimiter;
321+
322+
if (detectedDelimiter != _config.DelimiterChar)
323+
{
324+
_config.DelimiterChar = detectedDelimiter;
325+
_config.ConfigureReaderConfiguration();
326+
}
327+
}
328+
catch (CsvHelperException)
329+
{
330+
// If detection fails, keep the current config delimiter
331+
}
332+
}
333+
295334
private ColumnizedLogLine SplitCsvLine (ILogLineMemory line)
296335
{
297336
if (line.FullLine.IsEmpty)

src/CsvColumnizer/CsvLogLine.cs

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,5 @@ public class CsvLogLine (string fullLine, int lineNumber) : ILogLineMemory
1717

1818
public CsvLogLine (ReadOnlyMemory<char> fullLine, int lineNumber) : this(fullLine.ToString(), lineNumber)
1919
{
20-
FullLine = fullLine;
21-
LineNumber = lineNumber;
22-
Text = fullLine;
2320
}
2421
}

src/LogExpert.Tests/ColumnizerTests/CSVColumnizerTest.cs

Lines changed: 445 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
using ColumnizerLib;
2+
3+
using LogExpert.Core.Callback;
4+
using LogExpert.Core.Interfaces;
5+
using LogExpert.UI.Controls.LogWindow;
6+
7+
using Moq;
8+
9+
using NUnit.Framework;
10+
11+
namespace LogExpert.Tests.Controls;
12+
13+
[TestFixture]
14+
public class ColumnCacheTests
15+
{
16+
/// <summary>
17+
/// Regression test for a cache-poisoning bug in <see cref="ColumnCache.GetColumnsForLine"/>:
18+
/// when the underlying <see cref="ILogfileReader.GetLogLineMemoryWithWait"/> returned null
19+
/// (e.g. fast-fail timeout) for a given line, the cache stored that null and
20+
/// permanently returned null for every subsequent request of the same line number.
21+
/// This manifested in the GUI as a blank data row in a CSV file containing a header
22+
/// plus exactly one data line (header was dropped by CsvColumnizer's PreProcessLine,
23+
/// leaving a single grid row that was repeatedly requested for paint).
24+
/// The fix adds <c>_cachedColumns == null</c> to the re-fetch condition so a null
25+
/// result is never cached.
26+
/// </summary>
27+
[Test]
28+
public void GetColumnsForLine_NullThenValidLine_ReturnsColumnsOnSecondCall ()
29+
{
30+
const int lineNumber = 0;
31+
32+
var validLine = new Mock<ILogLineMemory>().Object;
33+
var splitResult = new Mock<IColumnizedLogLineMemory>().Object;
34+
35+
var readerMock = new Mock<ILogfileReader>();
36+
readerMock
37+
.SetupSequence(r => r.GetLogLineMemoryWithWait(lineNumber))
38+
.Returns(Task.FromResult<ILogLineMemory>(null))
39+
.Returns(Task.FromResult(validLine));
40+
41+
var columnizerMock = new Mock<ILogLineMemoryColumnizer>();
42+
columnizerMock
43+
.Setup(c => c.SplitLine(It.IsAny<ILogLineMemoryColumnizerCallback>(), validLine))
44+
.Returns(splitResult);
45+
46+
var logWindowMock = new Mock<ILogWindow>();
47+
var callback = new ColumnizerCallback(logWindowMock.Object);
48+
49+
var cache = new ColumnCache();
50+
51+
// First call: reader returns null -> cache must NOT store this null.
52+
var firstResult = cache.GetColumnsForLine(readerMock.Object, lineNumber, columnizerMock.Object, callback);
53+
Assert.That(firstResult, Is.Null, "First call should return null because the reader returned null.");
54+
55+
// Second call for the SAME line: reader now returns a valid line.
56+
// Before the fix this would return the cached null and never call SplitLine.
57+
var secondResult = cache.GetColumnsForLine(readerMock.Object, lineNumber, columnizerMock.Object, callback);
58+
59+
Assert.That(secondResult, Is.SameAs(splitResult), "Second call must re-fetch and return the freshly split columns instead of a cached null.");
60+
readerMock.Verify(r => r.GetLogLineMemoryWithWait(lineNumber), Times.Exactly(2));
61+
columnizerMock.Verify(c => c.SplitLine(It.IsAny<ILogLineMemoryColumnizerCallback>(), validLine), Times.Once);
62+
}
63+
64+
/// <summary>
65+
/// Sanity check that a valid result IS cached: requesting the same line twice
66+
/// with a successful first fetch must not call the reader/columnizer a second time.
67+
/// </summary>
68+
[Test]
69+
public void GetColumnsForLine_SameLineTwice_UsesCachedValue ()
70+
{
71+
const int lineNumber = 0;
72+
73+
var validLine = new Mock<ILogLineMemory>().Object;
74+
var splitResult = new Mock<IColumnizedLogLineMemory>().Object;
75+
76+
var readerMock = new Mock<ILogfileReader>();
77+
readerMock
78+
.Setup(r => r.GetLogLineMemoryWithWait(lineNumber))
79+
.Returns(Task.FromResult(validLine));
80+
81+
var columnizerMock = new Mock<ILogLineMemoryColumnizer>();
82+
columnizerMock
83+
.Setup(c => c.SplitLine(It.IsAny<ILogLineMemoryColumnizerCallback>(), validLine))
84+
.Returns(splitResult);
85+
86+
var logWindowMock = new Mock<ILogWindow>();
87+
var callback = new ColumnizerCallback(logWindowMock.Object);
88+
89+
var cache = new ColumnCache();
90+
91+
var firstResult = cache.GetColumnsForLine(readerMock.Object, lineNumber, columnizerMock.Object, callback);
92+
// callback.LineNum is now equal to lineNumber (set by GetColumnsForLine), so the
93+
// second call should hit the cache.
94+
var secondResult = cache.GetColumnsForLine(readerMock.Object, lineNumber, columnizerMock.Object, callback);
95+
96+
Assert.That(firstResult, Is.SameAs(splitResult));
97+
Assert.That(secondResult, Is.SameAs(splitResult));
98+
readerMock.Verify(r => r.GetLogLineMemoryWithWait(lineNumber), Times.Once);
99+
columnizerMock.Verify(c => c.SplitLine(It.IsAny<ILogLineMemoryColumnizerCallback>(), validLine), Times.Once);
100+
}
101+
}

src/LogExpert.Tests/LogExpert.Tests.csproj

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,15 @@
7777
<Content Include="TestData\CsvTest_01.csv">
7878
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
7979
</Content>
80+
<Content Include="TestData\semicolon.csv">
81+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
82+
</Content>
83+
<Content Include="TestData\tab.csv">
84+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
85+
</Content>
86+
<Content Include="TestData\comma.csv">
87+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
88+
</Content>
8089
<None Update="TestData\organizations-1000.csv">
8190
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
8291
</None>

src/LogExpert.Tests/ReaderTest.cs

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,150 @@ public void Boot ()
2121
{
2222
}
2323

24+
#region GuessNewLineSequenceLength / TryReadLine tests
25+
26+
[Test]
27+
public void TryReadLine_CrLf_NoTrailingNewline_ReadsAllLines ()
28+
{
29+
// File with CRLF between lines but NO trailing newline on last line
30+
var content = "line1\r\nline2"u8.ToArray();
31+
using var stream = new MemoryStream(content);
32+
using var reader = new PositionAwareStreamReaderSystem(stream, new EncodingOptions(), 500);
33+
34+
var lines = ReadAllLines(reader);
35+
36+
Assert.That(lines, Has.Count.EqualTo(2), $"Expected 2 lines, got: [{string.Join("|", lines)}]");
37+
Assert.That(lines[0], Is.EqualTo("line1"));
38+
Assert.That(lines[1], Is.EqualTo("line2"));
39+
}
40+
41+
[Test]
42+
public void TryReadLine_CrLf_WithTrailingNewline_ReadsAllLines ()
43+
{
44+
// File with CRLF between lines AND trailing newline
45+
var content = "line1\r\nline2\r\n"u8.ToArray();
46+
using var stream = new MemoryStream(content);
47+
using var reader = new PositionAwareStreamReaderSystem(stream, new EncodingOptions(), 500);
48+
49+
var lines = ReadAllLines(reader);
50+
51+
Assert.That(lines, Has.Count.EqualTo(2), $"Expected 2 lines, got: [{string.Join("|", lines)}]");
52+
Assert.That(lines[0], Is.EqualTo("line1"));
53+
Assert.That(lines[1], Is.EqualTo("line2"));
54+
}
55+
56+
[Test]
57+
public void TryReadLine_CrOnly_NoTrailingNewline_ReadsAllLines ()
58+
{
59+
// File with CR-only line endings (like old Mac), no trailing newline
60+
var content = "line1\rline2"u8.ToArray();
61+
using var stream = new MemoryStream(content);
62+
using var reader = new PositionAwareStreamReaderSystem(stream, new EncodingOptions(), 500);
63+
64+
var lines = ReadAllLines(reader);
65+
66+
Assert.That(lines, Has.Count.EqualTo(2), $"Expected 2 lines, got: [{string.Join("|", lines)}]");
67+
Assert.That(lines[0], Is.EqualTo("line1"));
68+
Assert.That(lines[1], Is.EqualTo("line2"));
69+
}
70+
71+
[Test]
72+
public void TryReadLine_LfOnly_NoTrailingNewline_ReadsAllLines ()
73+
{
74+
// File with LF-only line endings (Unix), no trailing newline
75+
var content = "line1\nline2"u8.ToArray();
76+
using var stream = new MemoryStream(content);
77+
using var reader = new PositionAwareStreamReaderSystem(stream, new EncodingOptions(), 500);
78+
79+
var lines = ReadAllLines(reader);
80+
81+
Assert.That(lines, Has.Count.EqualTo(2), $"Expected 2 lines, got: [{string.Join("|", lines)}]");
82+
Assert.That(lines[0], Is.EqualTo("line1"));
83+
Assert.That(lines[1], Is.EqualTo("line2"));
84+
}
85+
86+
[Test]
87+
public void TryReadLine_CrLf_ThreeLines_NoTrailingNewline_ReadsAllLines ()
88+
{
89+
// 3 lines with CRLF, no trailing newline
90+
var content = "header\r\ndata1\r\ndata2"u8.ToArray();
91+
using var stream = new MemoryStream(content);
92+
using var reader = new PositionAwareStreamReaderSystem(stream, new EncodingOptions(), 500);
93+
94+
var lines = ReadAllLines(reader);
95+
96+
Assert.That(lines, Has.Count.EqualTo(3), $"Expected 3 lines, got: [{string.Join("|", lines)}]");
97+
Assert.That(lines[0], Is.EqualTo("header"));
98+
Assert.That(lines[1], Is.EqualTo("data1"));
99+
Assert.That(lines[2], Is.EqualTo("data2"));
100+
}
101+
102+
[Test]
103+
public void TryReadLine_CrLf_Position_TracksCorrectly ()
104+
{
105+
// Verify position tracking for CRLF file
106+
var content = "AB\r\nCD\r\n"u8.ToArray(); // 4 + 4 = 8 bytes
107+
using var stream = new MemoryStream(content);
108+
using var reader = new PositionAwareStreamReaderSystem(stream, new EncodingOptions(), 500);
109+
110+
Assert.That(reader.TryReadLine(out var line1), Is.True);
111+
Assert.That(line1.ToString(), Is.EqualTo("AB"));
112+
Assert.That(reader.Position, Is.EqualTo(4), "After 'AB\\r\\n', position should be 4");
113+
114+
Assert.That(reader.TryReadLine(out var line2), Is.True);
115+
Assert.That(line2.ToString(), Is.EqualTo("CD"));
116+
Assert.That(reader.Position, Is.EqualTo(8), "After 'CD\\r\\n', position should be 8");
117+
118+
Assert.That(reader.TryReadLine(out _), Is.False, "Should be EOF");
119+
}
120+
121+
[Test]
122+
public void TryReadLine_CrLf_NoTrailingNewline_Position_TracksCorrectly ()
123+
{
124+
// Verify position tracking for CRLF file without trailing newline
125+
var content = "AB\r\nCD"u8.ToArray(); // 4 + 2 = 6 bytes
126+
using var stream = new MemoryStream(content);
127+
using var reader = new PositionAwareStreamReaderSystem(stream, new EncodingOptions(), 500);
128+
129+
Assert.That(reader.TryReadLine(out var line1), Is.True);
130+
Assert.That(line1.ToString(), Is.EqualTo("AB"));
131+
Assert.That(reader.Position, Is.EqualTo(4), "After 'AB\\r\\n', position should be 4");
132+
133+
Assert.That(reader.TryReadLine(out var line2), Is.True);
134+
Assert.That(line2.ToString(), Is.EqualTo("CD"));
135+
// Last line has no newline, but current implementation adds _newLineSequenceLength anyway.
136+
// BUG: position is 8 (adds 2 for nonexistent CRLF), should be 6.
137+
// This causes incorrect seeking on buffer re-read.
138+
Assert.That(reader.Position, Is.EqualTo(8), "Known issue: overcounts by newline length on last line without trailing newline");
139+
}
140+
141+
[Test]
142+
public void TryReadLine_SingleLine_NoNewline_ReadsLine ()
143+
{
144+
// Single line file with no newline at all
145+
var content = "onlyline"u8.ToArray();
146+
using var stream = new MemoryStream(content);
147+
using var reader = new PositionAwareStreamReaderSystem(stream, new EncodingOptions(), 500);
148+
149+
var lines = ReadAllLines(reader);
150+
151+
Assert.That(lines, Has.Count.EqualTo(1), $"Expected 1 line, got: [{string.Join("|", lines)}]");
152+
Assert.That(lines[0], Is.EqualTo("onlyline"));
153+
}
154+
155+
private static List<string> ReadAllLines (PositionAwareStreamReaderSystem reader)
156+
{
157+
List<string> lines = [];
158+
while (reader.TryReadLine(out var lineMemory))
159+
{
160+
lines.Add(lineMemory.ToString());
161+
}
162+
163+
return lines;
164+
}
165+
166+
#endregion
167+
24168
//TODO reimplement
25169
private void CompareReaderImplementationsInternal (string fileName, Encoding enc, int maxPosition)
26170
{

src/LogExpert.Tests/TestData/CsvTest_01.csv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
policyID,statecode,county,eq_site_limit,hu_site_limit,fl_site_limit,fr_site_limit,tiv_2011,tiv_2012,eq_site_deductible,hu_site_deductible,fl_site_deductible,fr_site_deductible,point_latitude,point_longitude,line,construction,point_granularity119736,FL,CLAY COUNTY,498960,498960,498960,498960,498960,792148.9,0,9979.2,0,0,30.102261,-81.711777,Residential,Masonry,1448094,FL,CLAY COUNTY,1322376.3,1322376.3,1322376.3,1322376.3,1322376.3,1438163.57,0,0,0,0,30.063936,-81.707664,Residential,Masonry,3398149,FL,PINELLAS COUNTY,373488.3,373488.3,0,0,373488.3,596003.67,0,0,0,0,28.06444,-82.77459,Residential,Masonry,1
1+
policyID,statecode,county,eq_site_limit,hu_site_limit,fl_site_limit,fr_site_limit,tiv_2011,tiv_2012,eq_site_deductible,hu_site_deductible,fl_site_deductible,fr_site_deductible,point_latitude,point_longitude,line,construction,point_granularity119736,FL,CLAY COUNTY,498960,498960,498960,498960,498960,792148.9,0,9979.2,0,0,30.102261,-81.711777,Residential,Masonry,1448094,FL,CLAY COUNTY,1322376.3,1322376.3,1322376.3,1322376.3,1322376.3,1438163.57,0,0,0,0,30.063936,-81.707664,Residential,Masonry,3398149,FL,PINELLAS COUNTY,373488.3,373488.3,0,0,373488.3,596003.67,0,0,0,0,28.06444,-82.77459,Residential,Masonry,1119736,FL,CLAY COUNTY,498960,498960,498960,498960,498960,792148.9,0,9979.2,0,0,30.102261,-81.711777,Residential,Masonry,1448094,FL,CLAY COUNTY,1322376.3,1322376.3,1322376.3,1322376.3,1322376.3,1438163.57,0,0,0,0,30.063936,-81.707664,Residential,Masonry,3398149,FL,PINELLAS COUNTY,373488.3,373488.3,0,0,373488.3,596003.67,0,0,0,0,28.06444,-82.77459,Residential,Masonry,1119736,FL,CLAY COUNTY,498960,498960,498960,498960,498960,792148.9,0,9979.2,0,0,30.102261,-81.711777,Residential,Masonry,1448094,FL,CLAY COUNTY,1322376.3,1322376.3,1322376.3,1322376.3,1322376.3,1438163.57,0,0,0,0,30.063936,-81.707664,Residential,Masonry,3398149,FL,PINELLAS COUNTY,373488.3,373488.3,0,0,373488.3,596003.67,0,0,0,0,28.06444,-82.77459,Residential,Masonry,1119736,FL,CLAY COUNTY,498960,498960,498960,498960,498960,792148.9,0,9979.2,0,0,30.102261,-81.711777,Residential,Masonry,1448094,FL,CLAY COUNTY,1322376.3,1322376.3,1322376.3,1322376.3,1322376.3,1438163.57,0,0,0,0,30.063936,-81.707664,Residential,Masonry,3398149,FL,PINELLAS COUNTY,373488.3,373488.3,0,0,373488.3,596003.67,0,0,0,0,28.06444,-82.77459,Residential,Masonry,1119736,FL,CLAY COUNTY,498960,498960,498960,498960,498960,792148.9,0,9979.2,0,0,30.102261,-81.711777,Residential,Masonry,1448094,FL,CLAY COUNTY,1322376.3,1322376.3,1322376.3,1322376.3,1322376.3,1438163.57,0,0,0,0,30.063936,-81.707664,Residential,Masonry,3398149,FL,PINELLAS COUNTY,373488.3,373488.3,0,0,373488.3,596003.67,0,0,0,0,28.06444,-82.77459,Residential,Masonry,1119736,FL,CLAY COUNTY,498960,498960,498960,498960,498960,792148.9,0,9979.2,0,0,30.102261,-81.711777,Residential,Masonry,1448094,FL,CLAY COUNTY,1322376.3,1322376.3,1322376.3,1322376.3,1322376.3,1438163.57,0,0,0,0,30.063936,-81.707664,Residential,Masonry,3398149,FL,PINELLAS COUNTY,373488.3,373488.3,0,0,373488.3,596003.67,0,0,0,0,28.06444,-82.77459,Residential,Masonry,1119736,FL,CLAY COUNTY,498960,498960,498960,498960,498960,792148.9,0,9979.2,0,0,30.102261,-81.711777,Residential,Masonry,1448094,FL,CLAY COUNTY,1322376.3,1322376.3,1322376.3,1322376.3,1322376.3,1438163.57,0,0,0,0,30.063936,-81.707664,Residential,Masonry,3398149,FL,PINELLAS COUNTY,373488.3,373488.3,0,0,373488.3,596003.67,0,0,0,0,28.06444,-82.77459,Residential,Masonry,1119736,FL,CLAY COUNTY,498960,498960,498960,498960,498960,792148.9,0,9979.2,0,0,30.102261,-81.711777,Residential,Masonry,1448094,FL,CLAY COUNTY,1322376.3,1322376.3,1322376.3,1322376.3,1322376.3,1438163.57,0,0,0,0,30.063936,-81.707664,Residential,Masonry,3398149,FL,PINELLAS COUNTY,373488.3,373488.3,0,0,373488.3,596003.67,0,0,0,0,28.06444,-82.77459,Residential,Masonry,1119736,FL,CLAY COUNTY,498960,498960,498960,498960,498960,792148.9,0,9979.2,0,0,30.102261,-81.711777,Residential,Masonry,1448094,FL,CLAY COUNTY,1322376.3,1322376.3,1322376.3,1322376.3,1322376.3,1438163.57,0,0,0,0,30.063936,-81.707664,Residential,Masonry,3398149,FL,PINELLAS COUNTY,373488.3,373488.3,0,0,373488.3,596003.67,0,0,0,0,28.06444,-82.77459,Residential,Masonry,1119736,FL,CLAY COUNTY,498960,498960,498960,498960,498960,792148.9,0,9979.2,0,0,30.102261,-81.711777,Residential,Masonry,1448094,FL,CLAY COUNTY,1322376.3,1322376.3,1322376.3,1322376.3,1322376.3,1438163.57,0,0,0,0,30.063936,-81.707664,Residential,Masonry,3398149,FL,PINELLAS COUNTY,373488.3,373488.3,0,0,373488.3,596003.67,0,0,0,0,28.06444,-82.77459,Residential,Masonry,1
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
"Date","Level","Message"
2-
"2021-01-01","Error","comma file"
2+
"2021-01-01","Error","comma file"
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
"Date","Level","Message"
2+
"2021-01-01","Error","comma file"
3+
"2021-01-01","Error","comma file"
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
"Date";"Level";"Message"
2-
"2021-12-12";"TRACE";"semicolon file "
2+
"2021-12-12";"TRACE";"semicolon file "

0 commit comments

Comments
 (0)