Skip to content

Commit 53d6f8e

Browse files
committed
Switch benchmarks to warmup config; re-derive thresholds
Monitoring strategy with WarmupCount=1, IterationCount=4 — same total work as before, but iteration 1's JIT/EF-model-build/cold-cache cost is discarded instead of folded into the measurement, so StdDev tightens and thresholds can come down to genuine 3σ above the with-index mean. Both comment lines (baseline + with-index) are re-measured under this config so the speedup comparison is apples-to-apples.
1 parent da3929a commit 53d6f8e

3 files changed

Lines changed: 27 additions & 20 deletions

File tree

backend/FwLite/FwLiteProjectSync.Tests/BenchmarkSupport.cs

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,9 @@ namespace FwLiteProjectSync.Tests;
1313
internal static class BenchmarkSupport
1414
{
1515
/// <summary>
16-
/// Standard config for sync benchmarks: in-process toolchain, cold iterations, and a logger
17-
/// that pipes the BDN summary table into xUnit's test output.
16+
/// Standard config for sync benchmarks: in-process toolchain, one warmup + four measurement
17+
/// iterations (Monitoring strategy), and a logger that pipes the BDN summary table into
18+
/// xUnit's test output.
1819
/// </summary>
1920
/// <remarks>
2021
/// The in-process toolchain is what lets a benchmark class read a static field set by its
@@ -25,14 +26,20 @@ internal static class BenchmarkSupport
2526
/// for multi-iteration sync benchmarks (e.g. delete-heavy alone is ~80s/iter and full-import
2627
/// setup adds ~50s/iter, so 5 iterations need ~11 min). 30 min covers the worst case with
2728
/// headroom.
29+
///
30+
/// Monitoring + WarmupCount=1 means iteration 1 absorbs JIT + EF model build + first-touch
31+
/// file cache and is discarded; iterations 2-5 are measured. ColdStart would skip warmup
32+
/// entirely; for these slow ops the JIT/model-build noise in iteration 1 is large enough
33+
/// that one discarded warmup tightens StdDev meaningfully without changing total CI time.
2834
/// </remarks>
2935
public static IConfig ConfigFor(ITestOutputHelper output)
3036
{
3137
var toolchain = new InProcessNoEmitToolchain(TimeSpan.FromMinutes(30), logOutput: false);
3238
return ManualConfig.CreateEmpty()
3339
.AddJob(Job.Default
34-
.WithStrategy(RunStrategy.ColdStart)
35-
.WithIterationCount(5)
40+
.WithStrategy(RunStrategy.Monitoring)
41+
.WithWarmupCount(1)
42+
.WithIterationCount(4)
3643
.WithToolchain(toolchain))
3744
.AddExporter(JsonExporter.FullCompressed)
3845
.AddColumnProvider(DefaultColumnProviders.Instance)

backend/FwLite/FwLiteProjectSync.Tests/SyncBenchmark.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,9 @@ public void First_Sync_Sena3()
5252

5353
public class FirstSyncBench
5454
{
55-
// CI initial result: mean 49.5s, StdDev 2.4s (medium variance) => 57s (~3σ above mean)
56-
// CI with commits order index: mean 44.5s, StdDev 3.5s (medium variance) => 55s (~ above mean)
57-
public const double ThresholdSeconds = 55.0;
55+
// CI baseline (no index): mean 52.34s, StdDev 0.27s (low variance) => 53s (~3σ above mean)
56+
// CI with commits order index: mean 44.35s, StdDev 1.10s (low variance) => 50s (~ above mean, generous for run-to-run drift)
57+
public const double ThresholdSeconds = 50.0;
5858

5959
internal static Sena3Fixture Fixture = null!;
6060

backend/FwLite/FwLiteProjectSync.Tests/SyncMutationBenchmark.cs

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -69,20 +69,20 @@ public class MutationSyncBench
6969
{
7070
public static readonly IReadOnlyDictionary<string, double> ThresholdSecondsByProfile = new Dictionary<string, double>
7171
{
72-
// CI 2026-05-06: mean 58.3s, StdDev 4.1s (high variance) => 72s (~ above mean)
73-
// CI with commits order index: mean 46.9s, StdDev 2.2s (medium variance) => 53s (~ above mean)
74-
["component-heavy"] = 53.0,
75-
// CI 2026-05-06: mean 94.3s, StdDev 5.7s (high variance) => 115s (~ above mean)
76-
// CI with commits order index: mean 91.6s, StdDev 4.3s (high variance) => 105s (~ above mean)
77-
["delete-heavy"] = 105.0,
78-
// CI 2026-05-06: mean 36.2s, StdDev 3.3s (medium variance) => 45s (~ above mean)
79-
// CI with commits order index: mean 33.4s, StdDev 0.8s (low variance) => 36s (~ above mean)
80-
["mixed-realistic"] = 36.0,
81-
// CI 2026-05-06: mean 5.05s, StdDev 0.4s (low variance) => 7s (generous margin since it's already pretty fast and we want to avoid false positives from noise).
82-
// CI with commits order index: mean 3.7s, StdDev 0.1s (low variance) => 5s (pretty fast, so meh)
72+
// CI baseline (no index): mean 52.40s, StdDev 1.51s (low variance) => 61s (~ above mean)
73+
// CI with commits order index: mean 50.49s, StdDev 0.87s (low variance) => 61s (~12σ above mean — kept generous, run-to-run drift can be ~3s here)
74+
["component-heavy"] = 61.0,
75+
// CI baseline (no index): mean 87.02s, StdDev 3.85s (medium variance) => 97s (~ above mean)
76+
// CI with commits order index: mean 87.92s, StdDev 1.42s (low variance) => 97s (~ above mean — kept generous, run-to-run drift can be ~2s here)
77+
["delete-heavy"] = 97.0,
78+
// CI baseline (no index): mean 32.99s, StdDev 0.77s (low variance) => 38s (~ above mean)
79+
// CI with commits order index: mean 32.95s, StdDev 1.08s (low variance) => 38s (~ above mean)
80+
["mixed-realistic"] = 38.0,
81+
// CI baseline (no index): mean 4.52s, StdDev 0.08s (low variance) => 5s (generous margin since it's already pretty fast and we want to avoid false positives from noise)
82+
// CI with commits order index: mean 3.53s, StdDev 0.10s (low variance) => 5s (pretty fast, so meh — same margin works)
8383
["patch-heavy"] = 5.0,
84-
// CI 2026-05-06: mean 0.77s, StdDev 0.2s (low variance) => 3s (super fast, so meh)
85-
// CI with commits order index: mean 0.58s, StdDev 0.02s (low variance) => 2s (super fast, so meh)
84+
// CI baseline (no index): mean 0.69s, StdDev 0.08s (low variance) => 2s (super fast, so meh)
85+
// CI with commits order index: mean 0.57s, StdDev 0.05s (low variance) => 2s (super fast, so meh)
8686
["reorder-heavy"] = 2.0,
8787
};
8888

0 commit comments

Comments
 (0)