Skip to content

Commit c85b4d8

Browse files
author
BRUNER Patrick
committed
csv bugfixes and benchmarks
1 parent 843a397 commit c85b4d8

6 files changed

Lines changed: 129 additions & 2 deletions

File tree

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
using System.Text;
2+
3+
using BenchmarkDotNet.Attributes;
4+
5+
using ColumnizerLib;
6+
7+
using CsvColumnizer;
8+
9+
using Moq;
10+
11+
namespace LogExpert.Benchmarks;
12+
13+
/// <summary>
14+
/// Benchmarks for CsvColumnizer covering PreProcessLine, Selected, and SplitLine operations
15+
/// across varying line counts and column widths.
16+
/// </summary>
17+
[MemoryDiagnoser]
18+
[RankColumn]
19+
public class CsvColumnizerBenchmarks
20+
{
21+
private ILogLineMemory[] _dataLines = null!;
22+
private CsvColumnizer.CsvColumnizer _columnizer = null!;
23+
24+
[Params(100, 1_000, 10_000)]
25+
public int LineCount { get; set; }
26+
27+
[Params(5, 15)]
28+
public int ColumnCount { get; set; }
29+
30+
[GlobalSetup]
31+
public void Setup ()
32+
{
33+
// Build header and data lines
34+
var headerParts = new string[ColumnCount];
35+
for (var i = 0; i < ColumnCount; i++)
36+
{
37+
headerParts[i] = $"Column{i}";
38+
}
39+
40+
var header = string.Join(";", headerParts);
41+
42+
// Initialize columnizer with header
43+
_columnizer = new CsvColumnizer.CsvColumnizer();
44+
_columnizer.PreProcessLine(header.AsMemory(), 0, 0);
45+
46+
var mockCallback = new Mock<ILogLineMemoryColumnizerCallback>();
47+
_columnizer.Selected(mockCallback.Object);
48+
49+
// Generate data lines
50+
_dataLines = new ILogLineMemory[LineCount];
51+
var random = new Random(42);
52+
53+
for (var i = 0; i < LineCount; i++)
54+
{
55+
var parts = new string[ColumnCount];
56+
for (var j = 0; j < ColumnCount; j++)
57+
{
58+
parts[j] = GenerateFieldValue(random, j);
59+
}
60+
61+
_dataLines[i] = new CsvLogLine(string.Join(";", parts), i + 1);
62+
}
63+
}
64+
65+
[Benchmark(Description = "SplitLine: parse all lines")]
66+
public int SplitAllLines ()
67+
{
68+
var totalColumns = 0;
69+
for (var i = 0; i < _dataLines.Length; i++)
70+
{
71+
var result = _columnizer.SplitLine(null, _dataLines[i]);
72+
totalColumns += result.ColumnValues.Length;
73+
}
74+
75+
return totalColumns;
76+
}
77+
78+
[Benchmark(Description = "PreProcessLine: preprocess all lines")]
79+
public int PreProcessAllLines ()
80+
{
81+
var processed = 0;
82+
for (var i = 0; i < _dataLines.Length; i++)
83+
{
84+
var result = _columnizer.PreProcessLine(_dataLines[i].FullLine, i + 1, i + 1);
85+
if (!result.IsEmpty)
86+
{
87+
processed++;
88+
}
89+
}
90+
91+
return processed;
92+
}
93+
94+
[Benchmark(Description = "Selected: re-detect columns from header")]
95+
public int RedetectColumns ()
96+
{
97+
var mockCallback = new Mock<ILogLineMemoryColumnizerCallback>();
98+
_columnizer.Selected(mockCallback.Object);
99+
return _columnizer.GetColumnCount();
100+
}
101+
102+
private static string GenerateFieldValue (Random random, int columnIndex)
103+
{
104+
// Mix of value types: numbers, short text, quoted text with commas
105+
return (columnIndex % 4) switch
106+
{
107+
0 => random.Next(1, 100000).ToString(),
108+
1 => $"text_{random.Next(1, 9999)}",
109+
2 => $"\"Value, with quotes {random.Next(1, 999)}\"",
110+
_ => new string((char)('A' + random.Next(0, 26)), random.Next(5, 20)),
111+
};
112+
}
113+
}

src/LogExpert.Benchmarks/LogExpert.Benchmarks.csproj

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22

33
<PropertyGroup>
44
<OutputType>Exe</OutputType>
5-
<TargetFramework>net10.0</TargetFramework>
5+
<TargetFramework>net10.0-windows</TargetFramework>
6+
<EnableWindowsTargeting>true</EnableWindowsTargeting>
7+
<UseWindowsForms>true</UseWindowsForms>
68
<ImplicitUsings>enable</ImplicitUsings>
79
<Nullable>enable</Nullable>
810
<!-- Override Directory.Build.props settings for BenchmarkDotNet compatibility -->
@@ -12,11 +14,14 @@
1214

1315
<ItemGroup>
1416
<PackageReference Include="BenchmarkDotNet" />
17+
<PackageReference Include="Moq" />
1518
</ItemGroup>
1619

1720
<ItemGroup>
1821
<ProjectReference Include="..\LogExpert.Core\LogExpert.Core.csproj" />
1922
<ProjectReference Include="..\PluginRegistry\LogExpert.PluginRegistry.csproj" />
23+
<ProjectReference Include="..\CsvColumnizer\CsvColumnizer.csproj" />
24+
<ProjectReference Include="..\ColumnizerLib\ColumnizerLib.csproj" />
2025
</ItemGroup>
2126

2227
<!-- Exclude the shared AssemblyInfo.cs that Directory.Build.props tries to add -->

src/LogExpert.Benchmarks/Program.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ public static void Main (string[] args)
1616
_ = BenchmarkRunner.Run<BufferIndexBenchmarks>();
1717
_ = BenchmarkRunner.Run<ReadThroughputBenchmarks>();
1818
_ = BenchmarkRunner.Run<BufferIndexContentionBenchmarks>();
19+
_ = BenchmarkRunner.Run<CsvColumnizerBenchmarks>();
1920
}
2021
else
2122
{
@@ -28,6 +29,7 @@ public static void Main (string[] args)
2829
Console.WriteLine("ReadThroughputBenchmarks: Benchmarks for read throughput");
2930
Console.WriteLine("BufferIndexBenchmarks: Benchmarks for buffer index");
3031
Console.WriteLine("BufferIndexContentionBenchmarks: Benchmarks for buffer index contention");
32+
Console.WriteLine("CsvColumnizerBenchmarks: Benchmarks for CSV columnizer (SplitLine, PreProcess, Selected)");
3133
Console.WriteLine("Dry run:");
3234
Console.WriteLine("dotnet run -c Release -- --filter \"*<benchmarkname>*\" --job Dry --noOverwrite");
3335
Console.WriteLine("Short run:");
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
"Date","Level","Message"
2+
"2021-01-01","Error","comma file"
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
"Date";"Level";"Message"
2+
"2021-12-12";"TRACE";"semicolon file "

src/LogExpert.UI/Controls/LogWindow/LogWindow.cs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3430,7 +3430,10 @@ private void SetColumnizerInternal (ILogLineMemoryColumnizer columnizer)
34303430
_ = columnComboBox.Items.Add(columnName);
34313431
}
34323432

3433-
columnComboBox.SelectedIndex = 0;
3433+
if (columnComboBox.Items.Count > 0)
3434+
{
3435+
columnComboBox.SelectedIndex = 0;
3436+
}
34343437

34353438
OnColumnizerChanged(CurrentColumnizer);
34363439
}

0 commit comments

Comments
 (0)