ci: comprehensive benchmark with multi-run stats (mean, std, RMS)

HanSur94 · claude · HanSur94 · commit cc7ebe2abe6b · 2026-03-18T17:52:59.000+01:00
New benchmark script (scripts/run_ci_benchmark.m) replaces inline YAML:
- Tests 3 dataset sizes: 1M, 5M, 10M points
- 4 metrics per size: instantiation, render, zoom cycle, downsample
- Multiple iterations (10 for zoom/downsample, 5 for init/render)
- Computes mean, std, RMS — reports mean + std to benchmark tracker
- Warmup phase before zoom measurements
- 24 total metrics tracked for regression detection

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
@@ -59,63 +59,15 @@ jobs:
 
       - name: Run benchmark
         run: |
-          xvfb-run octave --eval "
-            addpath(pwd); setup();
-            addpath(fullfile(pwd, 'libs', 'FastSense', 'private'));
-
-            n = 1e6;
-            x = linspace(0, 100, n);
-            y = sin(x * 2*pi / 10) + 0.5 * randn(1, n);
-
-            tic;
-            for i = 1:100
-              minmax_downsample(x, y, 2000);
-            end
-            t_ds = toc / 100;
-
-            tic;
-            for i = 1:1000
-              binary_search(x, 20, 'left');
-            end
-            t_bs = toc / 1000;
-
-            % Measure full render cycle
-            fp = FastSense();
-            fp.addLine(x, y, 'DisplayName', 'bench');
-            fp.addThreshold(1.5, 'Direction', 'upper', 'ShowViolations', true);
-            fp.render();
-
-            tic;
-            for i = 1:20
-              center = 10 + 80 * rand();
-              width = 1 + 20 * rand();
-              set(fp.hAxes, 'XLim', [center-width/2, center+width/2]);
-              drawnow;
-            end
-            t_zoom = toc / 20;
-            close all force;
-
-            fid = fopen('benchmark-results.json', 'w');
-            fprintf(fid, '[\n');
-            fprintf(fid, '  {\"name\": \"Downsample (1M pts)\", \"unit\": \"ms\", \"value\": %.2f},\n', t_ds * 1000);
-            fprintf(fid, '  {\"name\": \"Binary Search\", \"unit\": \"us\", \"value\": %.2f},\n', t_bs * 1e6);
-            fprintf(fid, '  {\"name\": \"Zoom Cycle (1M pts)\", \"unit\": \"ms\", \"value\": %.2f}\n', t_zoom * 1000);
-            fprintf(fid, ']\n');
-            fclose(fid);
-
-            fprintf('\n=== Benchmark Results (1M points) ===\n');
-            fprintf('Downsample:    %.2f ms\n', t_ds * 1000);
-            fprintf('Binary search: %.2f us\n', t_bs * 1e6);
-            fprintf('Zoom cycle:    %.2f ms\n', t_zoom * 1000);
-          "
+          xvfb-run octave --eval "addpath(pwd); setup(); addpath('scripts'); run_ci_benchmark();"
 
       - name: Fix git ownership
         run: git config --global --add safe.directory /__w/FastSense/FastSense
 
       - name: Store benchmark results
         uses: benchmark-action/github-action-benchmark@v1
         with:
-          name: FastSense Performance
+          name: FastPlot Performance
           tool: customSmallerIsBetter
           output-file-path: benchmark-results.json
           github-token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/scripts/run_ci_benchmark.m b/scripts/run_ci_benchmark.m
@@ -0,0 +1,135 @@
+function run_ci_benchmark()
+%RUN_CI_BENCHMARK Performance benchmark for CI with statistical analysis.
+%   Runs multiple iterations across multiple dataset sizes to produce
+%   mean, std, and RMS for each metric. Outputs results as JSON for
+%   github-action-benchmark (customSmallerIsBetter format).
+%
+%   Metrics measured:
+%     - Instantiation: FastPlot() + addLine + addThreshold
+%     - Render: render() + drawnow
+%     - Zoom cycle: set XLim + drawnow (interactive responsiveness)
+%     - Downsample: minmax_downsample kernel
+%
+%   Dataset sizes: 1M, 5M, 10M points
+%   Iterations: 10 per metric (5 for instantiation/render due to cost)
+
+    addpath(fullfile(pwd, 'libs', 'FastPlot', 'private'));
+
+    sizes  = [1e6, 5e6, 10e6];
+    labels = {'1M', '5M', '10M'};
+
+    N_DS   = 20;   % downsample iterations
+    N_ZOOM = 20;   % zoom cycles per run
+    N_RUNS = 10;   % runs for zoom/downsample stats
+    N_INIT = 5;    % runs for instantiation/render (heavier)
+
+    results = {};
+
+    for s = 1:numel(sizes)
+        n = sizes(s);
+        lbl = labels{s};
+        fprintf('\n========== %s points ==========\n', lbl);
+
+        x = linspace(0, 100, n);
+        y = sin(x * 2*pi / 10) + 0.5 * randn(1, n);
+
+        % --- Downsample benchmark ---
+        t_ds = zeros(1, N_RUNS);
+        for r = 1:N_RUNS
+            tic;
+            for k = 1:N_DS
+                minmax_downsample(x, y, 2000);
+            end
+            t_ds(r) = toc / N_DS;
+        end
+        results = add_result(results, sprintf('Downsample mean (%s)', lbl), 'ms', t_ds * 1000);
+
+        % --- Instantiation benchmark ---
+        t_init = zeros(1, N_INIT);
+        for r = 1:N_INIT
+            tic;
+            fp = FastPlot();
+            fp.addLine(x, y, 'DisplayName', 'Sensor');
+            fp.addThreshold(1.5, 'Direction', 'upper', 'ShowViolations', true);
+            fp.addThreshold(-1.5, 'Direction', 'lower', 'ShowViolations', true);
+            t_init(r) = toc;
+            close all force;
+        end
+        results = add_result(results, sprintf('Instantiation mean (%s)', lbl), 'ms', t_init * 1000);
+
+        % --- Render benchmark ---
+        t_render = zeros(1, N_INIT);
+        for r = 1:N_INIT
+            fp = FastPlot();
+            fp.addLine(x, y, 'DisplayName', 'Sensor');
+            fp.addThreshold(1.5, 'Direction', 'upper', 'ShowViolations', true);
+            fp.addThreshold(-1.5, 'Direction', 'lower', 'ShowViolations', true);
+            tic;
+            fp.render();
+            drawnow;
+            t_render(r) = toc;
+            close all force;
+        end
+        results = add_result(results, sprintf('Render mean (%s)', lbl), 'ms', t_render * 1000);
+
+        % --- Zoom cycle benchmark ---
+        fp = FastPlot();
+        fp.addLine(x, y, 'DisplayName', 'Sensor');
+        fp.addThreshold(1.5, 'Direction', 'upper', 'ShowViolations', true);
+        fp.addThreshold(-1.5, 'Direction', 'lower', 'ShowViolations', true);
+        fp.render();
+        drawnow;
+
+        % Warmup
+        for k = 1:5
+            set(fp.hAxes, 'XLim', [20 80]);
+            drawnow;
+        end
+
+        t_zoom = zeros(1, N_RUNS);
+        for r = 1:N_RUNS
+            centers = 10 + 80 * rand(1, N_ZOOM);
+            widths  = 1 + 20 * rand(1, N_ZOOM);
+            tic;
+            for k = 1:N_ZOOM
+                set(fp.hAxes, 'XLim', [centers(k)-widths(k)/2, centers(k)+widths(k)/2]);
+                drawnow;
+            end
+            t_zoom(r) = toc / N_ZOOM;
+        end
+        close all force;
+
+        results = add_result(results, sprintf('Zoom cycle mean (%s)', lbl), 'ms', t_zoom * 1000);
+    end
+
+    % --- Write JSON ---
+    fid = fopen('benchmark-results.json', 'w');
+    fprintf(fid, '[\n');
+    for i = 1:numel(results)
+        r = results{i};
+        comma = ',';
+        if i == numel(results), comma = ''; end
+        fprintf(fid, '  {"name": "%s", "unit": "%s", "value": %.3f}%s\n', ...
+            r.name, r.unit, r.value, comma);
+    end
+    fprintf(fid, ']\n');
+    fclose(fid);
+
+    fprintf('\n=== Benchmark complete — %d metrics written to benchmark-results.json ===\n', numel(results));
+end
+
+function results = add_result(results, name, unit, samples)
+%ADD_RESULT Compute stats and add to results list.
+    m   = mean(samples);
+    s   = std(samples);
+    rms = sqrt(mean(samples.^2));
+
+    fprintf('  %-30s  mean=%.2f %s  std=%.2f %s  rms=%.2f %s  (n=%d)\n', ...
+        name, m, unit, s, unit, rms, unit, numel(samples));
+
+    % Report mean as the tracked value (for regression detection)
+    results{end+1} = struct('name', name, 'unit', unit, 'value', m);
+    % Also report std as a separate metric
+    results{end+1} = struct('name', [name(1:end-5) ' std' name(end-3:end)], ...
+        'unit', unit, 'value', s);
+end