managedcode
diff --git a/‎README.md‎
Lines changed: 89 additions & 32 deletions b/‎README.md‎
Lines changed: 89 additions & 32 deletions
diff --git a/‎benchmarks/MarkdownLd.Kb.Benchmarks/AGENTS.md‎
Lines changed: 2 additions & 1 deletion b/‎benchmarks/MarkdownLd.Kb.Benchmarks/AGENTS.md‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎benchmarks/MarkdownLd.Kb.Benchmarks/BenchmarkCategories.cs‎
Lines changed: 6 additions & 0 deletions b/‎benchmarks/MarkdownLd.Kb.Benchmarks/BenchmarkCategories.cs‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎benchmarks/MarkdownLd.Kb.Benchmarks/BenchmarkCorpusFactory.cs‎
Lines changed: 23 additions & 53 deletions b/‎benchmarks/MarkdownLd.Kb.Benchmarks/BenchmarkCorpusFactory.cs‎
Lines changed: 23 additions & 53 deletions
diff --git a/‎benchmarks/MarkdownLd.Kb.Benchmarks/BenchmarkCorpusProfile.cs‎
Lines changed: 10 additions & 0 deletions b/‎benchmarks/MarkdownLd.Kb.Benchmarks/BenchmarkCorpusProfile.cs‎
Lines changed: 10 additions & 0 deletions
@@ -1312,45 +1312,102 @@ dotnet run --project benchmarks/MarkdownLd.Kb.Benchmarks -c Release -- --filter
 dotnet run --project benchmarks/MarkdownLd.Kb.Benchmarks -c Release -- --filter "*GraphBuildBenchmarks*"
 dotnet run --project benchmarks/MarkdownLd.Kb.Benchmarks -c Release -- --filter "*GraphSearchBenchmarks*"
 dotnet run --project benchmarks/MarkdownLd.Kb.Benchmarks -c Release -- --filter "*TiktokenSearchBenchmarks*"
+dotnet run --project benchmarks/MarkdownLd.Kb.Benchmarks -c Release -- --filter "*GraphPersistenceBenchmarks*"
+dotnet run --project benchmarks/MarkdownLd.Kb.Benchmarks -c Release -- --filter "*GraphLifecycleSmokeBenchmarks*" --job Dry
 MARKDOWN_LD_KB_BENCHMARK_PROFILE=cpu dotnet run --project benchmarks/MarkdownLd.Kb.Benchmarks -c Release -- --filter "*FuzzyEditDistanceBenchmarks*"
 ```
 
-Benchmark reports are written to `artifacts/benchmarks/results` as Markdown, CSV, and JSON. The reports are intentionally ignored by git because they depend on the local machine and current system load. PR validation runs `FuzzyEditDistanceBenchmarks` as a mandatory smoke benchmark and uploads the reports as the `benchmark-smoke` artifact. The full benchmark workflow in `.github/workflows/benchmarks.yml` runs manually or on the weekly schedule and uploads the complete `benchmarkdotnet-results` artifact.
+Benchmark reports are written to `artifacts/benchmarks/results` as Markdown, CSV, and full JSON. The reports are intentionally ignored by git because they depend on the local machine and current system load. PR validation runs `FuzzyEditDistanceBenchmarks` as a mandatory smoke benchmark and uploads the reports as the `benchmark-smoke` artifact. The full benchmark workflow in `.github/workflows/benchmarks.yml` runs manually or on the weekly schedule and uploads the complete `benchmarkdotnet-results` artifact. The benchmark config adds one default `ShortRun` job only when the command does not already pass `--job`, `--job=...`, or `-j`.
 
-Latest local benchmark run, executed on May 3, 2026 with BenchmarkDotNet 0.15.8, .NET 10.0.5, ShortRun, Apple M2 Pro, exported these reports:
+The exported BenchmarkDotNet reports include the diagnostic columns that matter for this library:
 
-| Suite | Benchmarks executed | Export prefix |
-| --- | ---: | --- |
-| Fuzzy edit distance | 8 | `ManagedCode.MarkdownLd.Kb.Benchmarks.FuzzyEditDistanceBenchmarks-report` |
-| Graph build | 3 | `ManagedCode.MarkdownLd.Kb.Benchmarks.GraphBuildBenchmarks-report` |
-| Graph search | 54 | `ManagedCode.MarkdownLd.Kb.Benchmarks.GraphSearchBenchmarks-report` |
-| Tiktoken search | 18 | `ManagedCode.MarkdownLd.Kb.Benchmarks.TiktokenSearchBenchmarks-report` |
-
-Graph build scales over generated Markdown corpora like this:
-
-| Documents | Mean | Allocated |
-| ---: | ---: | ---: |
-| 25 | 1.169 ms | 1.81 MB |
-| 250 | 9.873 ms | 14.65 MB |
-| 1000 | 70.672 ms | 57.94 MB |
+| Area | Report data | Used for |
+| --- | --- | --- |
+| Latency | `Mean`, `Error`, `StdDev`, `Ratio`, `RatioSD`; full JSON also keeps min, quartiles, max, percentiles, and raw measurements | compare retrieval paths under the same generated workload |
+| Allocation and GC | `Allocated`, `Alloc Ratio`, `Gen0`, `Gen1`, `Gen2` | find APIs that allocate enough to hurt repeated search calls |
+| Threading | `Completed Work Items`, `Lock Contentions` | identify SPARQL and federation paths that schedule work or contend on locks |
+| Repro metadata | runtime, JIT, platform, job, iteration counts, corpus profile, query scenario | keep local runs comparable without pretending they are machine-independent |
+| Optional profiles | EventPipe `cpu`, `gc`, or `jit` artifacts when `MARKDOWN_LD_KB_BENCHMARK_PROFILE` is set | inspect hot methods after a suspicious benchmark result |
+
+Benchmark workload profiles are named by shape instead of using unexplained document-count params:
+
+| Profile | Shape |
+| --- | --- |
+| `ShortDocuments` | 250 compact runbook-like Markdown documents |
+| `LongDocuments` | 80 long recovery playbooks with repeated sections |
+| `LargeCorpus` | 1000 compact documents for scale, persistence, and build pressure |
+| `TokenizedMultilingual` | 250 multilingual/CJK/token-heavy documents |
+| `FederatedRunbooks` | 250 SPARQL/service/runbook documents for local federation paths |
+
+Latest local benchmark run, executed on May 3, 2026 with BenchmarkDotNet 0.15.8, .NET 10.0.5, Apple M2 Pro, exported these reports:
+
+| Suite | Job | Benchmarks executed | Export prefix |
+| --- | --- | ---: | --- |
+| Fuzzy edit distance | ShortRun | 8 | `ManagedCode.MarkdownLd.Kb.Benchmarks.FuzzyEditDistanceBenchmarks-report` |
+| Graph build | ShortRun | 4 | `ManagedCode.MarkdownLd.Kb.Benchmarks.GraphBuildBenchmarks-report` |
+| Graph search | ShortRun | 54 | `ManagedCode.MarkdownLd.Kb.Benchmarks.GraphSearchBenchmarks-report` |
+| Tiktoken search | ShortRun | 12 | `ManagedCode.MarkdownLd.Kb.Benchmarks.TiktokenSearchBenchmarks-report` |
+| Graph persistence | ShortRun | 39 | `ManagedCode.MarkdownLd.Kb.Benchmarks.GraphPersistenceBenchmarks-report` |
+| Graph lifecycle smoke | Dry | 1 | `ManagedCode.MarkdownLd.Kb.Benchmarks.GraphLifecycleSmokeBenchmarks-report` |
+
+Graph build:
+
+| Profile | Mean | StdDev | Allocated |
+| --- | ---: | ---: | ---: |
+| `ShortDocuments` | 9.548 ms | 0.0298 ms | 14.70 MB |
+| `LongDocuments` | 7.544 ms | 0.0149 ms | 14.35 MB |
+| `LargeCorpus` | 59.453 ms | 12.7272 ms | 58.08 MB |
+| `TokenizedMultilingual` | 12.433 ms | 0.0508 ms | 17.77 MB |
 
 Graph search exact-query mean time:
 
-| Documents | Ranked graph | BM25 | BM25 fuzzy | Focused | Schema SPARQL | Local federated |
-| ---: | ---: | ---: | ---: | ---: | ---: | ---: |
-| 25 | 0.111 ms | 0.197 ms | 0.234 ms | 0.191 ms | 5.799 ms | 8.769 ms |
-| 250 | 1.225 ms | 2.251 ms | 2.663 ms | 2.103 ms | 59.994 ms | 65.124 ms |
-| 1000 | 8.907 ms | 16.187 ms | 17.939 ms | 13.258 ms | 282.885 ms | 293.240 ms |
-
-Typo-query search at 1000 generated documents measured 7.551 ms for ranked graph search, 12.044 ms for BM25, 20.340 ms for BM25 fuzzy, 12.700 ms for focused search, 258.953 ms for schema SPARQL, and 306.332 ms for local federated schema search. The fuzzy paths are opt-in and are expected to spend extra time to recover typo-heavy queries; they are not intended to beat exact lexical matching on raw speed.
-
-Tiktoken token-distance search mean time:
-
-| Documents | Exact query | Fuzzy-corrected exact | Typo query | Fuzzy-corrected typo | No-match query | Fuzzy-corrected no-match |
-| ---: | ---: | ---: | ---: | ---: | ---: | ---: |
-| 25 | 15.04 us | 15.94 us | 17.15 us | 24.99 us | 15.39 us | 18.60 us |
-| 100 | 61.65 us | 57.72 us | 64.87 us | 75.74 us | 57.71 us | 64.71 us |
-| 250 | 146.74 us | 146.27 us | 161.66 us | 184.64 us | 145.79 us | 153.14 us |
+| Profile | Ranked graph | BM25 | BM25 fuzzy | Focused | Schema SPARQL | Local federated |
+| --- | ---: | ---: | ---: | ---: | ---: | ---: |
+| `ShortDocuments` | 1.200 ms | 2.018 ms | 2.627 ms | 2.053 ms | 46.034 ms | 49.615 ms |
+| `LongDocuments` | 0.480 ms | 3.577 ms | 3.574 ms | 0.642 ms | 12.819 ms | 14.561 ms |
+| `FederatedRunbooks` | 1.334 ms | 2.723 ms | 2.720 ms | 2.271 ms | 45.981 ms | 55.269 ms |
+
+Graph search exact-query allocated memory per operation:
+
+| Profile | Ranked graph | BM25 | BM25 fuzzy | Focused | Schema SPARQL | Local federated |
+| --- | ---: | ---: | ---: | ---: | ---: | ---: |
+| `ShortDocuments` | 2.37 MB | 4.83 MB | 7.22 MB | 3.27 MB | 60.34 MB | 62.33 MB |
+| `LongDocuments` | 1.91 MB | 10.67 MB | 10.67 MB | 1.21 MB | 20.22 MB | 22.21 MB |
+| `FederatedRunbooks` | 2.54 MB | 6.80 MB | 6.80 MB | 3.48 MB | 60.75 MB | 62.61 MB |
+
+The `ShortDocuments` exact-query diagnostic slice shows the current hot paths:
+
+| Method | Mean | Allocated | Alloc ratio | Gen0 | Gen1 | Gen2 | Work items | Lock contentions |
+| --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
+| Ranked graph | 1.200 ms | 2.37 MB | 1.00x | 296.8750 | 107.4219 | 0 | 0 | 0 |
+| BM25 | 2.018 ms | 4.83 MB | 2.04x | 601.5625 | 210.9375 | 0 | 0 | 0 |
+| BM25 fuzzy | 2.627 ms | 7.22 MB | 3.04x | 902.3438 | 230.4688 | 0 | 0 | 0 |
+| Focused | 2.053 ms | 3.27 MB | 1.38x | 406.2500 | 179.6875 | 0 | 0 | 0 |
+| Schema SPARQL | 46.034 ms | 60.34 MB | 25.44x | 8500.0000 | 1833.3333 | 500.0000 | 551 | 325 |
+| Local federated | 49.615 ms | 62.33 MB | 26.27x | 8666.6667 | 2166.6667 | 500.0000 | 552 | 315.1667 |
+
+Allocation and GC columns come directly from BenchmarkDotNet diagnosers. Treat the ratios and relative pressure inside the same run as the useful signal; ShortRun is a fast diagnostic pass, not a release-grade SLA measurement.
+
+Persistence and export on the `LargeCorpus` profile:
+
+| Method | Mean | StdDev | Allocated |
+| --- | ---: | ---: | ---: |
+| `CreateSnapshot` | 4.527 ms | 0.008 ms | 5.31 MB |
+| `SerializeTurtle` | 9.203 ms | 0.088 ms | 18.07 MB |
+| `SerializeJsonLd` | 13.157 ms | 0.086 ms | 20.31 MB |
+| `SaveTurtleToFile` | 29.853 ms | 0.122 ms | 34.74 MB |
+| `SaveJsonLdToFile` | 38.144 ms | 1.436 ms | 37.02 MB |
+| `LoadTurtleFromFile` | 35.983 ms | 0.373 ms | 28.10 MB |
+| `LoadJsonLdFromFile` | 99.980 ms | 2.262 ms | 75.32 MB |
+
+Tiktoken token-distance search:
+
+| Profile | Query | Exact | Fuzzy-corrected | Exact allocated | Fuzzy allocated |
+| --- | --- | ---: | ---: | ---: | ---: |
+| `LongDocuments` | Exact | 955.1 us | 952.7 us | 2.38 MB | 2.38 MB |
+| `LongDocuments` | Typo | 1.112 ms | 1.291 ms | 2.78 MB | 3.73 MB |
+| `TokenizedMultilingual` | Exact | 680.8 us | 690.5 us | 1.81 MB | 1.81 MB |
+| `TokenizedMultilingual` | Typo | 811.3 us | 861.2 us | 1.81 MB | 1.82 MB |
 
 Fuzzy edit-distance mean time:
 
@@ -1361,4 +1418,4 @@ Fuzzy edit-distance mean time:
 | Long insertion | 21.980 ns | 7,990.146 ns | 363.53x | 0 B | 640 B |
 | Long no-match | 70.283 ns | 8,990.700 ns | 127.92x | 328 B | 672 B |
 
-These numbers are local measurements, not a cross-machine performance contract. The full Markdown, CSV, and JSON BenchmarkDotNet reports remain the source for detailed diagnostics.
+These numbers are local measurements, not a cross-machine performance contract. The README keeps compact slices only; [Performance Benchmarks](docs/Features/PerformanceBenchmarks.md) and the full Markdown, CSV, and JSON BenchmarkDotNet reports remain the source for detailed diagnostics.
@@ -21,8 +21,9 @@ Purpose: BenchmarkDotNet performance suite for Markdown-LD Knowledge Bank.
 ## Commands
 
 - build: `(cd ../.. && dotnet build MarkdownLd.Kb.slnx --no-restore)`
-- smoke: `(cd ../.. && dotnet run --project benchmarks/MarkdownLd.Kb.Benchmarks -c Release -- --filter "*FuzzyEditDistanceBenchmarks*" --dry)`
+- smoke: `(cd ../.. && dotnet run --project benchmarks/MarkdownLd.Kb.Benchmarks -c Release -- --filter "*GraphLifecycleSmokeBenchmarks*" --job Dry)`
 - quick: `(cd ../.. && dotnet run --project benchmarks/MarkdownLd.Kb.Benchmarks -c Release -- --filter "*GraphSearchBenchmarks*")`
+- persistence: `(cd ../.. && dotnet run --project benchmarks/MarkdownLd.Kb.Benchmarks -c Release -- --filter "*GraphPersistenceBenchmarks*")`
 - profile: `(cd ../.. && MARKDOWN_LD_KB_BENCHMARK_PROFILE=cpu dotnet run --project benchmarks/MarkdownLd.Kb.Benchmarks -c Release -- --filter "*FuzzyEditDistanceBenchmarks*")`
 
 ## Applicable Skills
 
@@ -4,9 +4,15 @@ internal static class BenchmarkCategories
 {
     internal const string Algorithm = "algorithm";
     internal const string Build = "build";
+    internal const string Export = "export";
     internal const string Federation = "federation";
     internal const string Fuzzy = "fuzzy";
     internal const string Graph = "graph";
+    internal const string Load = "load";
+    internal const string Persistence = "persistence";
+    internal const string Save = "save";
     internal const string Search = "search";
+    internal const string Serialization = "serialization";
+    internal const string Smoke = "smoke";
     internal const string Tiktoken = "tiktoken";
 }
@@ -6,22 +6,21 @@ internal static class BenchmarkCorpusFactory
 {
     private const string BaseUriText = "https://bench.example/";
     private const string LocalFederatedEndpointText = "https://bench.example/sparql/local";
-    private const string CacheTitlePrefix = "Cache restore runbook";
-    private const string BillingTitlePrefix = "Billing export guide";
-    private const string ReleaseTitlePrefix = "Release evidence checklist";
     private const string CacheQuery = "cache restore manifest evidence";
+    private const string LongQuery = "incident escalation recovery dependency timeline checkpoint";
+    private const string TokenizedQuery = "cache restore manifest token 実行 evidence";
+    private const string FederatedQuery = "federated sparql service binding runbook evidence";
     private const string TypoQuery = "cach restre manifst evidnce";
+    private const string LongTypoQuery = "incidnt escalaton recovry depndency chekpoint";
+    private const string TokenizedTypoQuery = "cach restore manifst tokne 実行 evidnce";
+    private const string FederatedTypoQuery = "federatd sparq servce bindng evidnce";
     private const string NoMatchQuery = "satellite coffee roasting";
     private static readonly Uri BaseUri = new(BaseUriText);
     private static readonly Uri LocalFederatedEndpoint = new(LocalFederatedEndpointText);
 
-    public static MarkdownSourceDocument[] CreateSources(int documentCount)
+    public static MarkdownSourceDocument[] CreateSources(BenchmarkCorpusProfile profile)
     {
-        return Enumerable.Range(0, documentCount)
-            .Select(index => new MarkdownSourceDocument(
-                $"content/bench/doc-{index:D5}.md",
-                CreateMarkdown(index)))
-            .ToArray();
+        return BenchmarkMarkdownCorpus.CreateSources(profile);
     }
 
     public static MarkdownKnowledgeBuildResult BuildNone(
@@ -46,13 +45,13 @@ public static MarkdownKnowledgeBuildResult BuildTiktoken(
         return pipeline.BuildAsync(sources).GetAwaiter().GetResult();
     }
 
-    public static string GetQuery(BenchmarkQueryScenario scenario)
+    public static string GetQuery(BenchmarkCorpusProfile profile, BenchmarkQueryScenario scenario)
     {
         return scenario switch
         {
-            BenchmarkQueryScenario.Typo => TypoQuery,
+            BenchmarkQueryScenario.Typo => GetTypoQuery(profile),
             BenchmarkQueryScenario.NoMatch => NoMatchQuery,
-            _ => CacheQuery,
+            _ => GetExactQuery(profile),
         };
     }
 
@@ -95,54 +94,25 @@ public static FederatedSparqlExecutionOptions CreateFederatedOptions(KnowledgeGr
         };
     }
 
-    private static string CreateMarkdown(int index)
-    {
-        var family = index % 3;
-        var title = CreateTitle(family, index);
-        var topic = CreateTopic(family);
-        var body = CreateBody(family, index);
-        return $$"""
-            ---
-            title: {{title}}
-            summary: {{topic}} summary for benchmark document {{index}}.
-            tags:
-              - benchmark
-              - {{topic}}
-            ---
-            # {{title}}
-
-            {{body}}
-            """;
-    }
-
-    private static string CreateTitle(int family, int index)
+    private static string GetExactQuery(BenchmarkCorpusProfile profile)
     {
-        return family switch
+        return profile switch
         {
-            1 => $"{BillingTitlePrefix} {index:D5}",
-            2 => $"{ReleaseTitlePrefix} {index:D5}",
-            _ => $"{CacheTitlePrefix} {index:D5}",
-        };
-    }
-
-    private static string CreateTopic(int family)
-    {
-        return family switch
-        {
-            1 => "billing invoice export payment checkpoint",
-            2 => "release gate approval evidence checklist",
-            _ => "cache restore manifest rollback evidence",
+            BenchmarkCorpusProfile.LongDocuments => LongQuery,
+            BenchmarkCorpusProfile.TokenizedMultilingual => TokenizedQuery,
+            BenchmarkCorpusProfile.FederatedRunbooks => FederatedQuery,
+            _ => CacheQuery,
         };
     }
 
-    private static string CreateBody(int family, int index)
+    private static string GetTypoQuery(BenchmarkCorpusProfile profile)
     {
-        var identifier = $"validationfingerprintcheckpointtoken{index:D5}manifestwindowrollbackevidence";
-        return family switch
+        return profile switch
         {
-            1 => $"Billing export verifies invoice payment checkpoint evidence with marker {identifier}.",
-            2 => $"Release evidence checklist confirms approval gates and deployment notes with marker {identifier}.",
-            _ => $"Cache restore validates manifest rollback evidence and runbook recovery with marker {identifier}.",
+            BenchmarkCorpusProfile.LongDocuments => LongTypoQuery,
+            BenchmarkCorpusProfile.TokenizedMultilingual => TokenizedTypoQuery,
+            BenchmarkCorpusProfile.FederatedRunbooks => FederatedTypoQuery,
+            _ => TypoQuery,
         };
     }
 }
@@ -0,0 +1,10 @@
+namespace ManagedCode.MarkdownLd.Kb.Benchmarks;
+
+public enum BenchmarkCorpusProfile
+{
+    ShortDocuments,
+    LongDocuments,
+    LargeCorpus,
+    TokenizedMultilingual,
+    FederatedRunbooks,
+}