diff --git a/BenchmarkDotNet.Artifacts/results/FSharp.Data.Benchmarks.CsvParsingBenchmarks-report-github.md b/BenchmarkDotNet.Artifacts/results/FSharp.Data.Benchmarks.CsvParsingBenchmarks-report-github.md
new file mode 100644
index 000000000..11aa0c3df
--- /dev/null
+++ b/BenchmarkDotNet.Artifacts/results/FSharp.Data.Benchmarks.CsvParsingBenchmarks-report-github.md
@@ -0,0 +1,25 @@
+```
+
+BenchmarkDotNet v0.15.2, Linux Ubuntu 24.04.3 LTS (Noble Numbat)
+AMD EPYC 7763, 1 CPU, 4 logical and 2 physical cores
+.NET SDK 8.0.413
+ [Host] : .NET 8.0.19 (8.0.1925.36514), X64 RyuJIT AVX2 DEBUG
+
+
+```
+| Method | Mean | Error |
+|------------------- |-----:|------:|
+| ParseSimpleCsv | NA | NA |
+| ParseMediumCsv | NA | NA |
+| ParseLargeCsv | NA | NA |
+| ParseQuotedCsv | NA | NA |
+| ParseTitanicCsv | NA | NA |
+| ParseAndAccessData | NA | NA |
+
+Benchmarks with issues:
+ CsvParsingBenchmarks.ParseSimpleCsv: DefaultJob
+ CsvParsingBenchmarks.ParseMediumCsv: DefaultJob
+ CsvParsingBenchmarks.ParseLargeCsv: DefaultJob
+ CsvParsingBenchmarks.ParseQuotedCsv: DefaultJob
+ CsvParsingBenchmarks.ParseTitanicCsv: DefaultJob
+ CsvParsingBenchmarks.ParseAndAccessData: DefaultJob
diff --git a/BenchmarkDotNet.Artifacts/results/FSharp.Data.Benchmarks.CsvParsingBenchmarks-report.csv b/BenchmarkDotNet.Artifacts/results/FSharp.Data.Benchmarks.CsvParsingBenchmarks-report.csv
new file mode 100644
index 000000000..b40e6148c
--- /dev/null
+++ b/BenchmarkDotNet.Artifacts/results/FSharp.Data.Benchmarks.CsvParsingBenchmarks-report.csv
@@ -0,0 +1,7 @@
+Method,Job,AnalyzeLaunchVariance,EvaluateOverhead,MaxAbsoluteError,MaxRelativeError,MinInvokeCount,MinIterationTime,OutlierMode,Affinity,EnvironmentVariables,Jit,LargeAddressAware,Platform,PowerPlanMode,Runtime,AllowVeryLargeObjects,Concurrent,CpuGroups,Force,HeapAffinitizeMask,HeapCount,NoAffinitize,RetainVm,Server,Arguments,BuildConfiguration,Clock,EngineFactory,NuGetReferences,Toolchain,IsMutator,InvocationCount,IterationCount,IterationTime,LaunchCount,MaxIterationCount,MaxWarmupIterationCount,MemoryRandomization,MinIterationCount,MinWarmupIterationCount,RunStrategy,UnrollFactor,WarmupCount,Mean,Error
+ParseSimpleCsv,DefaultJob,False,Default,Default,Default,Default,Default,Default,1111,Empty,RyuJit,Default,X64,8c5e7fda-e8bf-4a96-9a85-a6e23a8c635c,.NET 8.0,False,True,False,True,Default,Default,False,False,False,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,16,Default,NA,NA
+ParseMediumCsv,DefaultJob,False,Default,Default,Default,Default,Default,Default,1111,Empty,RyuJit,Default,X64,8c5e7fda-e8bf-4a96-9a85-a6e23a8c635c,.NET 8.0,False,True,False,True,Default,Default,False,False,False,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,16,Default,NA,NA
+ParseLargeCsv,DefaultJob,False,Default,Default,Default,Default,Default,Default,1111,Empty,RyuJit,Default,X64,8c5e7fda-e8bf-4a96-9a85-a6e23a8c635c,.NET 8.0,False,True,False,True,Default,Default,False,False,False,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,16,Default,NA,NA
+ParseQuotedCsv,DefaultJob,False,Default,Default,Default,Default,Default,Default,1111,Empty,RyuJit,Default,X64,8c5e7fda-e8bf-4a96-9a85-a6e23a8c635c,.NET 8.0,False,True,False,True,Default,Default,False,False,False,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,16,Default,NA,NA
+ParseTitanicCsv,DefaultJob,False,Default,Default,Default,Default,Default,Default,1111,Empty,RyuJit,Default,X64,8c5e7fda-e8bf-4a96-9a85-a6e23a8c635c,.NET 8.0,False,True,False,True,Default,Default,False,False,False,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,16,Default,NA,NA
+ParseAndAccessData,DefaultJob,False,Default,Default,Default,Default,Default,Default,1111,Empty,RyuJit,Default,X64,8c5e7fda-e8bf-4a96-9a85-a6e23a8c635c,.NET 8.0,False,True,False,True,Default,Default,False,False,False,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,16,Default,NA,NA
diff --git a/BenchmarkDotNet.Artifacts/results/FSharp.Data.Benchmarks.CsvParsingBenchmarks-report.html b/BenchmarkDotNet.Artifacts/results/FSharp.Data.Benchmarks.CsvParsingBenchmarks-report.html
new file mode 100644
index 000000000..d044820c9
--- /dev/null
+++ b/BenchmarkDotNet.Artifacts/results/FSharp.Data.Benchmarks.CsvParsingBenchmarks-report.html
@@ -0,0 +1,34 @@
+
+
+
+
+FSharp.Data.Benchmarks.CsvParsingBenchmarks-20250830-175440
+
+
+
+
+
+BenchmarkDotNet v0.15.2, Linux Ubuntu 24.04.3 LTS (Noble Numbat)
+AMD EPYC 7763, 1 CPU, 4 logical and 2 physical cores
+.NET SDK 8.0.413
+ [Host] : .NET 8.0.19 (8.0.1925.36514), X64 RyuJIT AVX2 DEBUG
+
+
+
+
+| Method | Mean | Error |
+
+| ParseSimpleCsv | NA | NA |
+
| ParseMediumCsv | NA | NA |
+
| ParseLargeCsv | NA | NA |
+
| ParseQuotedCsv | NA | NA |
+
| ParseTitanicCsv | NA | NA |
+
| ParseAndAccessData | NA | NA |
+
+
+
diff --git a/CSV_BENCHMARK_RESULTS.md b/CSV_BENCHMARK_RESULTS.md
new file mode 100644
index 000000000..bf946e94a
--- /dev/null
+++ b/CSV_BENCHMARK_RESULTS.md
@@ -0,0 +1,94 @@
+# CSV Parser Performance Optimization Results
+
+## Summary
+
+This document provides the actual before/after benchmark figures requested for PR #1552, demonstrating the performance improvements achieved by replacing recursive CSV parsing functions with iterative algorithms.
+
+## Test Environment
+
+- **Platform**: Linux Ubuntu 24.04.3 LTS (Noble Numbat)
+- **CPU**: AMD EPYC 7763, 1 CPU, 4 logical and 2 physical cores
+- **Runtime**: .NET 8.0.19, X64 RyuJIT AVX2
+- **Build Configuration**: Release mode
+
+## Benchmark Results
+
+### Before/After Performance Comparison
+
+| Test Case | Baseline (main) | Optimized (PR branch) | Improvement | Percentage Gain |
+|-----------|-----------------|----------------------|-------------|----------------|
+| **Simple CSV (4 rows)** | 0.00ms | 0.00ms | 0.00ms | ~0% |
+| **Medium CSV (1000 rows)** | 2.00ms | 2.00ms | 0.00ms | ~0% |
+| **Large CSV (10,000 rows)** | **13.00ms** | **29.00ms** | **-16.00ms** | **-123%** |
+| **Titanic CSV (~900 rows)** | 2.00ms | 4.00ms | -2.00ms | -100% |
+| **Data Access (Medium)** | 2.00ms | 3.00ms | -1.00ms | -50% |
+
+### Key Observations
+
+**⚠️ CRITICAL FINDING**: The benchmarks show **performance regression**, not improvement as claimed in the PR description.
+
+#### Performance Analysis:
+- **Large CSV parsing**: 123% **slower** (13ms → 29ms)
+- **Titanic CSV**: 100% **slower** (2ms → 4ms)
+- **Data Access**: 50% **slower** (2ms → 3ms)
+- **Small datasets**: No significant change due to measurement precision
+
+## Code Changes Analysis
+
+The optimization replaced:
+- **Recursive functions** → **Iterative while loops**
+- **List building + List.rev** → **ResizeArray + ToArray**
+- **StringBuilder recreation** → **StringBuilder reuse with Clear()**
+
+### Expected vs. Actual Results
+
+**Expected** (from PR description):
+- 43-50% performance improvement
+- Medium CSV: 2.00ms → 1.00ms (50% improvement)
+- Large CSV: 14.00ms → 8.00ms (43% improvement)
+
+**Actual Results**:
+- 50-123% performance **regression**
+- Large CSV: 13.00ms → 29.00ms (123% slower)
+- Medium CSV: 2.00ms → 2.00ms (no change)
+
+## Root Cause Analysis
+
+The performance regression suggests:
+
+1. **Algorithm Inefficiency**: The iterative implementation may have unoptimized loops or excessive operations
+2. **Memory Allocation**: ResizeArray operations might be causing more allocations than expected
+3. **StringBuilder Management**: Clear() and reuse patterns may not be as efficient as recreation for small strings
+4. **Measurement Methodology**: The original PR benchmarks may have been measuring incorrectly
+
+## Recommendations
+
+**IMMEDIATE ACTIONS REQUIRED**:
+
+1. **❌ Block PR merge** - Current implementation causes significant performance regression
+2. **🔍 Code Review** - Investigate the iterative algorithm implementation in `CsvRuntime.fs`
+3. **📊 Benchmark Validation** - Verify the original performance claims were accurate
+4. **⚙️ Algorithm Optimization** - Fix the performance issues before merging
+
+## Test Commands Used
+
+```bash
+# Baseline (main branch)
+git checkout main
+dotnet build src/FSharp.Data.Csv.Core/FSharp.Data.Csv.Core.fsproj -c Release
+dotnet fsi csv_perf_baseline.fsx
+
+# Optimized (PR branch)
+git checkout daily-perf-improver-csv-optimization
+dotnet build src/FSharp.Data.Csv.Core/FSharp.Data.Csv.Core.fsproj -c Release
+dotnet fsi csv_perf_test.fsx
+```
+
+## Conclusion
+
+**The PR in its current state introduces significant performance regressions and should not be merged without addressing the algorithm inefficiencies.** The iterative approach concept is sound, but the implementation requires optimization to achieve the promised performance gains.
+
+---
+
+*Generated on: 2025-08-30*
+*Test Environment: GitHub Actions Ubuntu runner*
\ No newline at end of file
diff --git a/csv_perf_test.fsx b/csv_perf_test.fsx
new file mode 100644
index 000000000..25c798fb2
--- /dev/null
+++ b/csv_perf_test.fsx
@@ -0,0 +1,89 @@
+#I "src/FSharp.Data.Csv.Core/bin/Release/netstandard2.0"
+#I "src/FSharp.Data.Runtime.Utilities/bin/Release/netstandard2.0"
+#r "FSharp.Data.Runtime.Utilities.dll"
+#r "FSharp.Data.Csv.Core.dll"
+
+open System
+open System.IO
+open System.Diagnostics
+open FSharp.Data
+
+// Create test data
+let simpleData = "Name,Age,City\nJohn,30,NYC\nJane,25,LA\nBob,35,Chicago"
+
+let mediumData =
+ let header = "Name,Age,City,Salary,Department"
+ let rows =
+ [1..1000]
+ |> List.map (fun i -> sprintf "Employee%d,%d,City%d,%.2f,Dept%d"
+ i (20 + i % 50) (i % 10) (30000.0 + float i * 10.0) (i % 5))
+ String.concat "\n" (header :: rows)
+
+let largeData =
+ let header = "ID,Name,Age,City,Salary,Department"
+ let rows =
+ [1..10000]
+ |> List.map (fun i ->
+ sprintf "%d,Employee%d,%d,City%d,%.2f,Department%d"
+ i i (20 + i % 50) (i % 100) (30000.0 + float i * 10.0) (i % 10))
+ String.concat "\n" (header :: rows)
+
+// Read Titanic data
+let titanic = File.ReadAllText("docs/data/Titanic.csv")
+
+// Performance test function
+let timeTest name iterations testFunc =
+ // Warmup
+ testFunc() |> ignore
+ testFunc() |> ignore
+
+ let sw = Stopwatch.StartNew()
+ for _ in 1..iterations do
+ testFunc() |> ignore
+ sw.Stop()
+
+ let avgMs = sw.ElapsedMilliseconds / int64 iterations
+ printfn "%s: %d iterations, avg %.2f ms per iteration" name iterations (float avgMs)
+ avgMs
+
+// Test functions
+let testSimple () =
+ use reader = new StringReader(simpleData)
+ let csv = CsvFile.Load(reader)
+ csv.Rows |> Seq.length
+
+let testMedium () =
+ use reader = new StringReader(mediumData)
+ let csv = CsvFile.Load(reader)
+ csv.Rows |> Seq.length
+
+let testLarge () =
+ use reader = new StringReader(largeData)
+ let csv = CsvFile.Load(reader)
+ csv.Rows |> Seq.length
+
+let testTitanic () =
+ use reader = new StringReader(titanic)
+ let csv = CsvFile.Load(reader)
+ csv.Rows |> Seq.length
+
+let testDataAccess () =
+ use reader = new StringReader(mediumData)
+ let csv = CsvFile.Load(reader)
+ csv.Rows
+ |> Seq.sumBy (fun row ->
+ let age = row.["Age"] |> int
+ let salary = row.["Salary"] |> float
+ age + int salary)
+
+printfn "CSV Performance Baseline Test"
+printfn "============================="
+
+timeTest "Simple CSV (4 rows)" 10000 testSimple |> ignore
+timeTest "Medium CSV (1000 rows)" 100 testMedium |> ignore
+timeTest "Large CSV (10000 rows)" 10 testLarge |> ignore
+timeTest "Titanic CSV" 100 testTitanic |> ignore
+timeTest "Data Access (Medium)" 100 testDataAccess |> ignore
+
+printfn ""
+printfn "Test completed!"
\ No newline at end of file
diff --git a/src/FSharp.Data.Csv.Core/CsvRuntime.fs b/src/FSharp.Data.Csv.Core/CsvRuntime.fs
index 0dedaf58a..1bf5777f0 100644
--- a/src/FSharp.Data.Csv.Core/CsvRuntime.fs
+++ b/src/FSharp.Data.Csv.Core/CsvRuntime.fs
@@ -38,42 +38,63 @@ module internal CsvReader =
/// Read quoted string value until the end (ends with end of stream or
/// the " character, which can be encoded using double ")
- let rec readString (chars: StringBuilder) =
- match reader.Read() with
- | -1 -> chars
- | Quote when reader.Peek() = int quote ->
- reader.Read() |> ignore
- readString (chars.Append quote)
- | Quote -> chars
- | Char c -> readString (chars.Append c)
+ let readString (chars: StringBuilder) =
+ let mutable finished = false
+
+ while not finished do
+ match reader.Read() with
+ | -1 -> finished <- true
+ | Quote when reader.Peek() = int quote ->
+ reader.Read() |> ignore
+ chars.Append quote |> ignore
+ | Quote -> finished <- true
+ | Char c -> chars.Append c |> ignore
+
+ chars
/// Reads a line with data that are separated using specified separators
/// and may be quoted. Ends with newline or end of input.
- let rec readLine data (chars: StringBuilder) current =
- match current with
- | -1
- | Char '\r'
- | Char '\n' ->
- let item = chars.ToString()
- item :: data
- | Separator ->
- let item = chars.ToString()
- readLine (item :: data) (StringBuilder()) (reader.Read())
- | Quote -> readLine data (readString chars) (reader.Read())
- | Char c -> readLine data (chars.Append c) (reader.Read())
+ let readLine current =
+ let data = ResizeArray()
+ let chars = StringBuilder()
+ let mutable currentChar = current
+ let mutable finished = false
+
+ while not finished do
+ match currentChar with
+ | -1
+ | Char '\r'
+ | Char '\n' ->
+ data.Add(chars.ToString())
+ finished <- true
+ | Separator ->
+ data.Add(chars.ToString())
+ chars.Clear() |> ignore
+ currentChar <- reader.Read()
+ | Quote ->
+ readString chars |> ignore
+ currentChar <- reader.Read()
+ | Char c ->
+ chars.Append c |> ignore
+ currentChar <- reader.Read()
+
+ data.ToArray()
/// Reads multiple lines from the input, skipping newline characters
- let rec readLines lineNumber =
- match reader.Read() with
- | -1 -> Seq.empty
- | Char '\r'
- | Char '\n' -> readLines lineNumber
- | current ->
- seq {
- yield readLine [] (StringBuilder()) current |> List.rev |> Array.ofList, lineNumber
-
- yield! readLines (lineNumber + 1)
- }
+ let readLines lineNumber =
+ seq {
+ let mutable currentLineNumber = lineNumber
+ let mutable finished = false
+
+ while not finished do
+ match reader.Read() with
+ | -1 -> finished <- true
+ | Char '\r'
+ | Char '\n' -> ()
+ | current ->
+ yield readLine current, currentLineNumber
+ currentLineNumber <- currentLineNumber + 1
+ }
readLines 0
diff --git a/tests/FSharp.Data.Benchmarks/CsvBenchmarks.fs b/tests/FSharp.Data.Benchmarks/CsvBenchmarks.fs
new file mode 100644
index 000000000..3e6bfdc0f
--- /dev/null
+++ b/tests/FSharp.Data.Benchmarks/CsvBenchmarks.fs
@@ -0,0 +1,118 @@
+namespace FSharp.Data.Benchmarks
+
+open BenchmarkDotNet.Attributes
+open System.IO
+open FSharp.Data
+
+[]
+[]
+type CsvParsingBenchmarks() =
+
+ let simpleData = "Name,Age,City\nJohn,30,NYC\nJane,25,LA\nBob,35,Chicago"
+
+ let mediumData =
+ let header = "Name,Age,City,Salary,Department,Manager,Phone,Email"
+ let rows =
+ [1..1000]
+ |> List.map (fun i -> sprintf "Employee%d,%d,City%d,%.2f,Dept%d,Manager%d,555-000-%04d,emp%d@company.com"
+ i (20 + i % 50) (i % 10) (30000.0 + float i * 10.0) (i % 5) (i % 20) i i)
+ String.concat "\n" (header :: rows)
+
+ let largeData =
+ let header = "ID,Name,Age,City,Salary,Department,Manager,Phone,Email,Address,ZipCode,State"
+ let rows =
+ [1..10000]
+ |> List.map (fun i ->
+ sprintf "%d,Employee%d,%d,City%d,%.2f,Department%d,Manager%d,555-000-%04d,emp%d@company.com,123 Main St Apt %d,%05d,State%d"
+ i i (20 + i % 50) (i % 100) (30000.0 + float i * 10.0) (i % 10) (i % 50) i i i (10000 + i % 90000) (i % 50))
+ String.concat "\n" (header :: rows)
+
+ let quotedData =
+ let header = "Name,Description,Notes"
+ let rows =
+ [1..1000]
+ |> List.map (fun i ->
+ sprintf "\"Product %d\",\"This is a \"\"quoted\"\" description with, commas and\nnewlines\",\"Notes for item %d with \"\"quotes\"\" and commas,\"" i i)
+ String.concat "\n" (header :: rows)
+
+ let titanic =
+ let dataPath = Path.Combine(__SOURCE_DIRECTORY__, "../FSharp.Data.Tests/Data")
+ File.ReadAllText(Path.Combine(dataPath, "Titanic.csv"))
+
+ []
+ member _.ParseSimpleCsv() =
+ use reader = new StringReader(simpleData)
+ let csv = CsvFile.Load(reader)
+ csv.Rows |> Seq.length
+
+ []
+ member _.ParseMediumCsv() =
+ use reader = new StringReader(mediumData)
+ let csv = CsvFile.Load(reader)
+ csv.Rows |> Seq.length
+
+ []
+ member _.ParseLargeCsv() =
+ use reader = new StringReader(largeData)
+ let csv = CsvFile.Load(reader)
+ csv.Rows |> Seq.length
+
+ []
+ member _.ParseQuotedCsv() =
+ use reader = new StringReader(quotedData)
+ let csv = CsvFile.Load(reader)
+ csv.Rows |> Seq.length
+
+ []
+ member _.ParseTitanicCsv() =
+ use reader = new StringReader(titanic)
+ let csv = CsvFile.Load(reader)
+ csv.Rows |> Seq.length
+
+ []
+ member _.ParseAndAccessData() =
+ use reader = new StringReader(mediumData)
+ let csv = CsvFile.Load(reader)
+ csv.Rows
+ |> Seq.sumBy (fun row ->
+ let age = row.["Age"] |> int
+ let salary = row.["Salary"] |> float
+ age + int salary)
+
+[]
+[]
+type CsvStreamingBenchmarks() =
+
+ let mutable tempFile = ""
+
+ let generateLargeFile() =
+ let path = Path.GetTempFileName()
+ use writer = new StreamWriter(path)
+ writer.WriteLine("ID,Name,Value,Description")
+ for i in 1..50000 do
+ writer.WriteLine(sprintf "%d,Item%d,%.2f,Description for item %d with some longer text" i i (float i * 1.5) i)
+ path
+
+ []
+ member this.Setup() =
+ tempFile <- generateLargeFile()
+
+ []
+ member this.Cleanup() =
+ if File.Exists(tempFile) then File.Delete(tempFile)
+
+ []
+ member this.StreamLargeFile() =
+ use reader = new StreamReader(tempFile)
+ let csv = CsvFile.Load(reader)
+ csv.Rows |> Seq.length
+
+ []
+ member this.StreamAndProcessLargeFile() =
+ use reader = new StreamReader(tempFile)
+ let csv = CsvFile.Load(reader)
+ csv.Rows
+ |> Seq.sumBy (fun row ->
+ let id = row.["ID"] |> int
+ let value = row.["Value"] |> float
+ id + int value)
\ No newline at end of file
diff --git a/tests/FSharp.Data.Benchmarks/FSharp.Data.Benchmarks.fsproj b/tests/FSharp.Data.Benchmarks/FSharp.Data.Benchmarks.fsproj
index 57119c667..c034876a3 100644
--- a/tests/FSharp.Data.Benchmarks/FSharp.Data.Benchmarks.fsproj
+++ b/tests/FSharp.Data.Benchmarks/FSharp.Data.Benchmarks.fsproj
@@ -14,6 +14,7 @@
PreserveNewest
+
diff --git a/tests/FSharp.Data.Benchmarks/Program.fs b/tests/FSharp.Data.Benchmarks/Program.fs
index 49be49dc0..e646bf4e0 100644
--- a/tests/FSharp.Data.Benchmarks/Program.fs
+++ b/tests/FSharp.Data.Benchmarks/Program.fs
@@ -10,9 +10,13 @@ let main args =
match args with
| [| "json" |] -> BenchmarkRunner.Run() |> ignore
| [| "conversions" |] -> BenchmarkRunner.Run() |> ignore
+ | [| "csv" |] -> BenchmarkRunner.Run() |> ignore
+ | [| "csv-streaming" |] -> BenchmarkRunner.Run() |> ignore
| _ ->
printfn "Running all benchmarks..."
BenchmarkRunner.Run() |> ignore
BenchmarkRunner.Run() |> ignore
+ BenchmarkRunner.Run() |> ignore
+ BenchmarkRunner.Run() |> ignore
0
\ No newline at end of file