Skip to content

Commit cadaa09

Browse files
authored
Merge pull request #1604 from fsprojects/repo-assist/fix-csv-schema-paren-946-6367eea86dd341c8
[Repo Assist] Fix CSV schema parsing: column names containing parentheses corrupt type annotation
2 parents 03f33be + a509fb5 commit cadaa09

2 files changed

Lines changed: 18 additions & 1 deletion

File tree

src/FSharp.Data.Csv.Core/CsvInference.fs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,10 @@ let private nameToTypeForCsv =
3636
|> dict
3737

3838
let private nameAndTypeRegex =
39-
lazy Regex(@"^(?<name>.+)\((?<type>.+)\)$", RegexOptions.Compiled ||| RegexOptions.RightToLeft)
39+
// Note: do NOT use RightToLeft here — it causes incorrect splits when the column name
40+
// itself contains parentheses (e.g. "Na( )me (type)" would give name="Na", type=" )me (type").
41+
// Without RightToLeft, greedy backtracking correctly matches the *last* "(type)" group.
42+
lazy Regex(@"^(?<name>.+)\((?<type>.+)\)$", RegexOptions.Compiled)
4043

4144
let private overrideByNameRegex =
4245
lazy

tests/FSharp.Data.DesignTime.Tests/InferenceTests.fs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -359,6 +359,20 @@ let ``Inference schema override by parameter``() =
359359

360360
actual |> should equal expected
361361

362+
[<Test>]
363+
let ``Column name with parentheses is parsed correctly in schema`` () =
364+
// Regression test for https://github.com/fsprojects/FSharp.Data/issues/946
365+
// A column name like "Na( )me" must not be split at the first '(' when the
366+
// schema type annotation "(int)" appears at the end.
367+
let source = CsvFile.Parse("Na( )me,other\n1,2")
368+
let actual =
369+
inferType source Int32.MaxValue [||] culture "Na( )me (int),other (string)" false false
370+
||> CsvInference.getFields false
371+
|> List.map (fun field -> field.Name, field.Value.RuntimeType)
372+
373+
let expected = [ "Na( )me", typeof<int>; "other", typeof<string> ]
374+
actual |> should equal expected
375+
362376
[<Test>]
363377
let ``Doesn't infer 12-002 as a date``() =
364378
// a previous version inferred a IntOrStringOrDateTime

0 commit comments

Comments
 (0)