Refocus perf follow-up on targeted decode diagnostics

adz · adz · commit 7c99daf167dc · 2026-04-02T22:51:03.000+10:30
diff --git a/TASKS.md b/TASKS.md
@@ -2,20 +2,23 @@
 
 Active work only. Historical completed work lives in [notes/AGENT_NOTES.md](notes/AGENT_NOTES.md) and [AGENTS.md](AGENTS.md).
 
-- [x] **Task 42: Finalize the core boxed contract runtime**
-  - Remove `ISchema`, `SchemaDefinition`, `SchemaField`, and `obj[] -> obj` record construction from the active core implementation.
-  - Move the public authored DSL to `Schema.*` over `Schema<'T>`.
-  - Delete the old schema DSL instead of keeping compatibility shims.
-  - Retarget `Json`, `Xml`, `Yaml`, `KeyValue`, and `JsonSchema` to compile from the new boxed contract IR directly.
-  - Remove any `Lowering.lower` or legacy boxed-schema bridge from the runtime path.
-  - Update tests, benchmarks, and docs to the new surface.
-  - Explicitly exclude bridge internals from this task. Bridge cleanup can follow once the core runtime shape is stable.
-  - Completion bar: there is no old boxed DSL left in the core runtime path or public authored surface.
+- Recently completed: **Task 42** landed the boxed contract runtime and `Schema<'T>` surface. Keep the historical rationale, benchmark notes, and follow-up findings in [notes/AGENT_NOTES.md](notes/AGENT_NOTES.md) and [docs/PROFILE-REPORT-ANALYSIS-INSTRUCTIONAL.md](docs/PROFILE-REPORT-ANALYSIS-INSTRUCTIONAL.md).
+
+- [ ] **Task 50: Run a narrow post-Task-42 performance follow-up**
+  - Treat the current numbers as two separate problems: typed record decode overhead and handwritten parser hot loops.
+  - Stay disciplined: no broad runtime rewrite until a narrower experiment wins clearly on the published scenario set.
+  - First pass:
+    - generalize the typed JSON record-decode lane beyond the benchmark-only hand-written shapes
+    - profile string-heavy decode and serialize hotspots again after each step
+    - identify whether the next worthwhile work is parser scanning, string handling, unknown-field skipping, or record assembly
+  - Output:
+    - refresh the benchmark notes with before/after numbers
+    - either promote one proven optimization direction into production work or explicitly close the line of investigation
 
 - [ ] **Task 49: Review and improve the new DSL for DX**
-  - After Task 42, review the new `Schema.*` surface for compactness, clarity, and maintainability.
+  - Review the new `Schema.*` surface for compactness, clarity, and maintainability after the immediate perf follow-up is settled.
   - Capture improvements in `PLAN-TO-IMPROVE-DSL`.
-  - Do not implement that review directly in the same pass unless it is required to complete Task 42.
+  - Do not fold speculative API cleanup into performance work unless it materially simplifies a measured hot path.
 
 - [ ] **Task 37: Add structured decode error outputs for app boundaries**
   - Provide a structured error model that callers can use for REST responses, startup config failures, and message rejection logs.
diff --git a/benchmarks/BenchmarkScenarios.fs b/benchmarks/BenchmarkScenarios.fs
@@ -54,6 +54,15 @@ type TelemetryPoint = {
     Healthy: bool
 }
 
+type FlatProbe = {
+    Id: int
+    Name: string
+    Code: string
+    Enabled: bool
+    Score: float
+    Trace: string
+}
+
 module ParserScanExperiment =
     let private mixHash state value = (state * 16777619) ^^^ value
 
@@ -279,11 +288,7 @@ module TypedJsonExperiment =
 
         current
 
-    let private record2
-        (field1: Field<'A>)
-        (field2: Field<'B>)
-        (ctor: 'A -> 'B -> 'T)
-        : Decoder<'T> =
+    let private record2 (field1: Field<'A>) (field2: Field<'B>) (ctor: 'A -> 'B -> 'T) : Decoder<'T> =
         fun src ->
             let mutable value1 = Unchecked.defaultof<'A>
             let mutable value2 = Unchecked.defaultof<'B>
@@ -652,7 +657,9 @@ module TypedJsonExperiment =
     let private articlesDecoder = list articleDecoder
     let private telemetryDecoder = list telemetryPointDecoder
 
-    let deserializeSmallMessageBytes (bytes: byte[]) = deserializeBytes smallMessageDecoder bytes
+    let deserializeSmallMessageBytes (bytes: byte[]) =
+        deserializeBytes smallMessageDecoder bytes
+
     let deserializePeopleBytes (bytes: byte[]) = deserializeBytes peopleDecoder bytes
     let deserializeArticlesBytes (bytes: byte[]) = deserializeBytes articlesDecoder bytes
     let deserializeTelemetryBytes (bytes: byte[]) = deserializeBytes telemetryDecoder bytes
@@ -722,9 +729,28 @@ module Schemas =
         |> Schema.field "Healthy" (fun (point: TelemetryPoint) -> point.Healthy)
         |> Schema.build
 
+    let flatProbe =
+        Schema.record (fun id name code enabled score trace -> {
+            Id = id
+            Name = name
+            Code = code
+            Enabled = enabled
+            Score = score
+            Trace = trace
+        })
+        |> Schema.field "Id" (fun (probe: FlatProbe) -> probe.Id)
+        |> Schema.field "Name" (fun (probe: FlatProbe) -> probe.Name)
+        |> Schema.field "Code" (fun (probe: FlatProbe) -> probe.Code)
+        |> Schema.field "Enabled" (fun (probe: FlatProbe) -> probe.Enabled)
+        |> Schema.field "Score" (fun (probe: FlatProbe) -> probe.Score)
+        |> Schema.field "Trace" (fun (probe: FlatProbe) -> probe.Trace)
+        |> Schema.build
+
     let personList = Schema.list person
     let articleList = Schema.list article
     let telemetryList = Schema.list telemetryPoint
+    let flatProbeList = Schema.list flatProbe
+    let stringList = Schema.list Schema.string
 
 module Data =
     let private stjOptions = JsonSerializerOptions()
@@ -820,29 +846,48 @@ module Data =
     let serializeJsonNewtonsoft value = JsonConvert.SerializeObject(value)
     let utf8Bytes (json: string) = Encoding.UTF8.GetBytes(json)
 
-    let createParserStringArray count =
+    let createStringSamples count =
         [ 1..count ]
         |> List.map (fun index ->
             String.replicate 2 $"entry-{index}-with-escapes-\"quoted\"-and-\\\\slashes\\\\-plus-newlines\n")
-        |> serializeJson
+
+    let createFlatProbes count =
+        [ 1..count ]
+        |> List.map (fun index -> {
+            Id = index
+            Name = $"record-{index}"
+            Code = $"X{index}"
+            Enabled = index % 2 = 0
+            Score = 18.25 + float index / 10.0
+            Trace = $"01HV{index:D4}ABCDEF"
+        })
+
+    let createParserStringArray count =
+        createStringSamples count |> serializeJson
 
     let createParserNumberArray count =
         [ 1..count ]
         |> List.map (fun index -> 1_700_000_000_000L + int64 (index * 37))
         |> serializeJson
 
-    let createParserFlatObjectArray count =
+    let createParserFlatObjectArray count = createFlatProbes count |> serializeJson
+
+    ///
+    /// The unknown-field variant keeps the core record shape unchanged so the
+    /// profile can isolate skip-path cost without nested-record noise.
+    let createParserFlatObjectArrayWithUnknownFields count =
         let items =
-            [ 1..count ]
-            |> List.map (fun index ->
+            createFlatProbes count
+            |> List.map (fun probe ->
                 sprintf
-                    """{"Id":%d,"Name":"record-%d","Code":"X%d","Enabled":%s,"Score":%s,"Trace":"01HV%04dABCDEF"}"""
-                    index
-                    index
-                    index
-                    (if index % 2 = 0 then "true" else "false")
-                    ((18.25 + float index / 10.0).ToString(System.Globalization.CultureInfo.InvariantCulture))
-                    index)
+                    """{"Id":%d,"Name":"%s","Code":"%s","Enabled":%s,"Score":%s,"Trace":"%s","Extra":"ignored-%d"}"""
+                    probe.Id
+                    probe.Name
+                    probe.Code
+                    (if probe.Enabled then "true" else "false")
+                    (probe.Score.ToString(System.Globalization.CultureInfo.InvariantCulture))
+                    probe.Trace
+                    probe.Id)
 
         "[" + String.concat "," items + "]"
 
@@ -904,6 +949,22 @@ module Workloads =
                 ^^^ System.Decimal.ToInt32(System.Decimal.Truncate(value.Voltage * 100M)))
             0
 
+    let private hashStrings (values: string list) =
+        values |> List.fold (fun acc value -> acc ^^^ value.Length) 0
+
+    let private hashFlatProbes (values: FlatProbe list) =
+        values
+        |> List.fold
+            (fun acc value ->
+                acc
+                ^^^ value.Id
+                ^^^ value.Name.Length
+                ^^^ value.Code.Length
+                ^^^ (if value.Enabled then 1 else 0)
+                ^^^ int (value.Score * 100.0)
+                ^^^ value.Trace.Length)
+            0
+
     let private hashJsonValue (value: JsonValue) =
         let rec loop state current =
             match current with
@@ -983,6 +1044,43 @@ module Workloads =
             HashValue = (fun boxed -> hashJsonValue (unbox boxed))
         }
 
+    ///
+    /// Decode-only diagnostics keep serialize out of the picture when the
+    /// question is whether string handling, flat-record assembly, or
+    /// unknown-field skipping is the next worthwhile decode target.
+    let private makeDecodeDiagnosticWorkload<'T>
+        (name: string)
+        (description: string)
+        (deserializeIterations: int)
+        (decodeJson: string)
+        (codec: Json.Codec<'T>)
+        (stjDeserialize: string -> 'T)
+        (hashValue: 'T -> int)
+        =
+        let decodeBytes = Data.utf8Bytes decodeJson
+
+        let diagnosticOnly () =
+            failwith "This diagnostic workload is intended for decode and parser operations only."
+
+        {
+            Name = name
+            Description = description
+            SerializeIterations = 1
+            DeserializeIterations = deserializeIterations
+            JsonSizeBytes = decodeBytes.Length
+            CodecMapperSerialize = (fun () -> diagnosticOnly ())
+            StjSerialize = (fun () -> diagnosticOnly ())
+            NewtonsoftSerialize = (fun () -> diagnosticOnly ())
+            OurParserScanBytes = (fun () -> ParserScanExperiment.scanWithOurParser decodeBytes)
+            Utf8JsonReaderScanBytes = (fun () -> ParserScanExperiment.scanWithUtf8JsonReader decodeBytes)
+            CodecMapperDeserializeBytes = (fun () -> box (Json.deserializeBytes codec decodeBytes))
+            TypedExperimentDeserializeBytes = None
+            StjDeserialize = (fun () -> box (stjDeserialize decodeJson))
+            NewtonsoftDeserialize = (fun () -> box (JsonConvert.DeserializeObject<'T>(decodeJson)))
+            HashSerialized = String.length
+            HashValue = (fun boxed -> hashValue (unbox boxed))
+        }
+
     let createLegacyPersonBatch recordCount =
         let value = Data.createPeople recordCount
         let decodeJson = Data.serializeJson value
@@ -1120,6 +1218,7 @@ module Workloads =
         let stringArray = Data.createParserStringArray 1000
         let numberArray = Data.createParserNumberArray 4000
         let flatObjects = Data.createParserFlatObjectArray 400
+        let flatObjectsWithUnknowns = Data.createParserFlatObjectArrayWithUnknownFields 400
 
         [|
             makeParserDiagnosticWorkload
@@ -1139,6 +1238,33 @@ module Workloads =
                 "Parser-only diagnostic: flat object traversal with repeated property names."
                 1500
                 flatObjects
+
+            makeDecodeDiagnosticWorkload
+                "decode-strings-1000"
+                "Decode diagnostic: escaped string array to isolate string unescaping and list construction."
+                3000
+                stringArray
+                (Json.compile Schemas.stringList)
+                (fun json -> System.Text.Json.JsonSerializer.Deserialize<string list>(json, stjOptions))
+                hashStrings
+
+            makeDecodeDiagnosticWorkload
+                "decode-flat-objects-400"
+                "Decode diagnostic: flat records to isolate field dispatch and record assembly."
+                1500
+                flatObjects
+                (Json.compile Schemas.flatProbeList)
+                (fun json -> System.Text.Json.JsonSerializer.Deserialize<FlatProbe list>(json, stjOptions))
+                hashFlatProbes
+
+            makeDecodeDiagnosticWorkload
+                "decode-flat-objects-400-unknown-fields"
+                "Decode diagnostic: flat records with ignored fields to isolate unknown-field skipping."
+                1500
+                flatObjectsWithUnknowns
+                (Json.compile Schemas.flatProbeList)
+                (fun json -> System.Text.Json.JsonSerializer.Deserialize<FlatProbe list>(json, stjOptions))
+                hashFlatProbes
         |]
 
     let names =
diff --git a/docs/HOW_TO_PROFILE_BENCHMARK_HOT_PATHS.md b/docs/HOW_TO_PROFILE_BENCHMARK_HOT_PATHS.md
@@ -28,6 +28,9 @@ Artifacts land under `.artifacts/profiling/<operation>-<scenario-or-records>-<it
 - `stj-deserialize`
 - `newtonsoft-serialize`
 - `newtonsoft-deserialize`
+- `our-parser-scan-bytes`
+- `utf8jsonreader-scan-bytes`
+- `typed-experiment-deserialize-bytes`
 
 ## Typical workflow
 
@@ -51,6 +54,7 @@ Read `perf.stat.txt` for high-level counters and `perf.report.txt` for the hotte
 
 - The profile wrapper now defaults to the `person-batch-25` scenario from the shared benchmark matrix.
 - Pass a scenario name such as `telemetry-500` or `escaped-articles-20` to profile one of the standard workloads.
+- The diagnostics matrix now also includes decode-focused scenarios such as `decode-strings-1000`, `decode-flat-objects-400`, and `decode-flat-objects-400-unknown-fields` when you need to isolate string handling, flat record assembly, or unknown-field skipping.
 - Passing a plain integer as the third argument still uses the legacy nested-record batch with `--records <n>`.
 - The wrapper sets `DOTNET_PerfMapEnabled=3` and `COMPlus_PerfMapEnabled=3` so `perf inject --jit` has the metadata it needs for managed symbol names.
 - If `perf record` is blocked by local kernel permissions, the script will fail before writing `perf.report.txt`. In that case, fix local `perf` permissions first and rerun the same command.
diff --git a/notes/AGENT_NOTES.md b/notes/AGENT_NOTES.md
@@ -150,7 +150,10 @@ This keeps compilation cost visible and avoids hidden recompilation or implicit
 - `benchmarks/CodecMapper.Benchmarks/Program.fs` now forces BenchmarkDotNet onto `InProcessEmitToolchain` to avoid child-project generation entirely.
 - The remaining warning during local runs is just Linux process-priority elevation failure (`Permission denied`), which does not stop benchmarks from executing.
 - A manual Release runner was added in `benchmarks/CodecMapper.Benchmarks.Runner` to keep benchmark reporting moving while that tooling issue remains unresolved.
-- `BenchmarkScenarios.fs` now expresses the benchmark-only typed JSON decode lane through reusable `recordN` and `list` combinators instead of one bespoke decoder loop per workload shape. Keep future `Task 42` work in that benchmark-only lane unless the production runtime decision has been made explicitly.
+- `Json.fs` now ships a production typed-record decode fast path for supported JSON record shapes, backed by `JsonTypedRecordDecode.fs`.
+- Focused reruns on April 2, 2026 showed the old benchmark-only typed lane no longer beating production on the checked scenarios (`small-message` and `telemetry-500`), so treat it as a comparison artifact rather than as the main forward path.
+- `BenchmarkScenarios.fs` now includes decode diagnostics for escaped string arrays, flat records, and flat records with unknown fields so profiling can separate parser scanning, string handling, record assembly, and skip-path cost more cleanly.
+- A first `.NET`-only escaped-string decoder experiment that replaced `StringBuilder` with a pooled `char[]` path produced mixed results and was reverted the same day: it improved the synthetic long escaped-string array diagnostic, but it did not hold up cleanly on the more realistic `escaped-articles-20` workload. Treat string decode as still open, but prefer narrower experiments over broad decoder rewrites.
 - Keep the manual Release runner for fast local snapshots and README updates; use BenchmarkDotNet when you specifically want richer statistical output.
 
 ## Formatting
diff --git a/tests/CodecMapper.Tests/JsonParserTests.fs b/tests/CodecMapper.Tests/JsonParserTests.fs
@@ -31,6 +31,15 @@ let ``Decode unicode escape string JSON`` () =
     let decoded = Json.deserialize codec (quoted """Hello, Wor\u006c\u0064!""")
     test <@ decoded = "Hello, World!" @>
 
+[<Fact>]
+let ``Decode escaped strings with mixed unicode content JSON`` () =
+    let codec = Json.compileSchema Schema.string
+
+    let decoded =
+        Json.deserialize codec (quoted """Ol\u00e1,\n\t\"mundo\" \\ snowman: \u2603""")
+
+    test <@ decoded = "Olá,\n\t\"mundo\" \\ snowman: ☃" @>
+
 [<Fact>]
 let ``Round-trip bool JSON`` () =
     let codec = Json.compileSchema Schema.bool