diff --git a/Cargo.lock b/Cargo.lock index c4f8665e90..6076dc378a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1311,6 +1311,7 @@ dependencies = [ "ringbuf", "sentry", "serde", + "serde_json", "specta", "tokio", "tokio-util", diff --git a/apps/desktop/src-tauri/src/lib.rs b/apps/desktop/src-tauri/src/lib.rs index 00dc6903b1..795fbe7710 100644 --- a/apps/desktop/src-tauri/src/lib.rs +++ b/apps/desktop/src-tauri/src/lib.rs @@ -1915,11 +1915,29 @@ async fn set_playhead_position( editor_instance: WindowEditorInstance, frame_number: u32, ) -> Result<(), String> { - editor_instance - .modify_and_emit_state(|state| { - state.playhead_position = frame_number; - }) - .await; + let state_changed = { + let state = editor_instance.state.lock().await; + state.playhead_position != frame_number + }; + + if state_changed { + editor_instance + .modify_and_emit_state(|state| { + state.playhead_position = frame_number; + }) + .await; + } + + let playback_handle = if state_changed { + let state = editor_instance.state.lock().await; + state.playback_task.clone() + } else { + None + }; + + if let Some(handle) = playback_handle { + handle.seek(frame_number); + } Ok(()) } @@ -2539,11 +2557,29 @@ async fn is_camera_window_open(app: AppHandle) -> bool { #[specta::specta] #[instrument(skip(editor_instance))] async fn seek_to(editor_instance: WindowEditorInstance, frame_number: u32) -> Result<(), String> { - editor_instance - .modify_and_emit_state(|state| { - state.playhead_position = frame_number; - }) - .await; + let state_changed = { + let state = editor_instance.state.lock().await; + state.playhead_position != frame_number + }; + + if state_changed { + editor_instance + .modify_and_emit_state(|state| { + state.playhead_position = frame_number; + }) + .await; + } + + let playback_handle = if state_changed { + let state = editor_instance.state.lock().await; + state.playback_task.clone() + } else { + None + }; + + if let Some(handle) = playback_handle { + handle.seek(frame_number); + } Ok(()) } diff --git a/apps/desktop/src/routes/editor/Timeline/index.tsx b/apps/desktop/src/routes/editor/Timeline/index.tsx index 8aacea955f..a1e32c6c04 100644 --- a/apps/desktop/src/routes/editor/Timeline/index.tsx +++ b/apps/desktop/src/routes/editor/Timeline/index.tsx @@ -7,6 +7,7 @@ import { createSignal, Index, type JSX, + onCleanup, onMount, Show, } from "solid-js"; @@ -89,7 +90,6 @@ export function Timeline() { editorState, projectActions, meta, - previewResolutionBase, } = useEditorContext(); const duration = () => editorInstance.recordingDuration; @@ -221,6 +221,17 @@ export function Timeline() { let pendingScrollDelta = 0; let scrollRafId: number | null = null; + let pendingSeekFrame: number | null = null; + let seekRafId: number | null = null; + let seekInFlight = false; + let inFlightSeekFrame: number | null = null; + let lastCompletedSeekFrame: number | null = null; + + onCleanup(() => { + if (zoomRafId !== null) cancelAnimationFrame(zoomRafId); + if (scrollRafId !== null) cancelAnimationFrame(scrollRafId); + if (seekRafId !== null) cancelAnimationFrame(seekRafId); + }); function flushPendingZoom() { if (pendingZoomDelta === 0 || pendingZoomOrigin === null) { @@ -266,7 +277,50 @@ export function Timeline() { } } - async function handleUpdatePlayhead(e: MouseEvent) { + function scheduleSeek(frameNumber: number) { + if ( + frameNumber === pendingSeekFrame || + frameNumber === inFlightSeekFrame || + frameNumber === lastCompletedSeekFrame + ) { + return; + } + pendingSeekFrame = frameNumber; + if (seekRafId === null) { + seekRafId = requestAnimationFrame(flushPendingSeek); + } + } + + async function flushPendingSeek() { + seekRafId = null; + + if (seekInFlight || pendingSeekFrame === null) { + if (pendingSeekFrame !== null && seekRafId === null) { + seekRafId = requestAnimationFrame(flushPendingSeek); + } + return; + } + + const frameNumber = pendingSeekFrame; + pendingSeekFrame = null; + seekInFlight = true; + inFlightSeekFrame = frameNumber; + + try { + await commands.seekTo(frameNumber); + lastCompletedSeekFrame = frameNumber; + } catch (err) { + console.error("Failed to seek timeline playhead:", err); + } finally { + seekInFlight = false; + inFlightSeekFrame = null; + if (pendingSeekFrame !== null && seekRafId === null) { + seekRafId = requestAnimationFrame(flushPendingSeek); + } + } + } + + function handleUpdatePlayhead(e: MouseEvent) { const { left } = timelineBounds; if ( zoomSegmentDragState.type !== "moving" && @@ -274,33 +328,14 @@ export function Timeline() { maskSegmentDragState.type !== "moving" && textSegmentDragState.type !== "moving" ) { - // Guard against missing bounds and clamp computed time to [0, totalDuration()] if (left == null) return; const rawTime = secsPerPixel() * (e.clientX - left) + transform().position; const newTime = Math.min(Math.max(0, rawTime), totalDuration()); - - // If playing, some backends require restart to seek reliably - if (editorState.playing) { - try { - await commands.stopPlayback(); - - // Round to nearest frame to prevent off-by-one drift - const targetFrame = Math.round(newTime * FPS); - await commands.seekTo(targetFrame); - - // If the user paused during these async ops, bail out without restarting - if (!editorState.playing) { - setEditorState("playbackTime", newTime); - return; - } - - await commands.startPlayback(FPS, previewResolutionBase()); - setEditorState("playing", true); - } catch (err) { - console.error("Failed to seek during playback:", err); - } - } + const total = totalDuration(); + const maxFrame = Math.max(0, Math.ceil(total * FPS) - 1); + const targetFrame = Math.min(Math.round(newTime * FPS), maxFrame); + scheduleSeek(targetFrame); setEditorState("playbackTime", newTime); } diff --git a/crates/editor/Cargo.toml b/crates/editor/Cargo.toml index c612d1e33f..0cc16b4111 100644 --- a/crates/editor/Cargo.toml +++ b/crates/editor/Cargo.toml @@ -23,6 +23,7 @@ axum = { version = "0.7.5", features = ["ws"] } ffmpeg.workspace = true specta.workspace = true serde = { workspace = true } +serde_json = "1" sentry.workspace = true futures = { workspace = true } tracing.workspace = true diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index a2253d580f..c06f822c5d 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -10,8 +10,10 @@ This document tracks performance benchmarks for Cap's playback and decoding syst |--------|--------|-----------| | Decoder Init | <200ms | - | | Decode Latency (p95) | <50ms | - | +| Startup to First Frame | <250ms | configurable | | Effective FPS | ≥30 fps | ±2 fps | | Decode Jitter | <10ms | - | +| Scrub Seek Latency (p95) | <40ms | - | | A/V Sync (mic↔video) | <100ms | - | | A/V Sync (system↔video) | <100ms | - | | Camera-Display Drift | <100ms | - | @@ -20,6 +22,7 @@ This document tracks performance benchmarks for Cap's playback and decoding syst - **Decoder Tests**: Init time, hardware acceleration detection, fallback handling - **Playback Tests**: Sequential decode, frame retrieval, latency percentiles +- **Scrub Tests**: Random access seek decode latency and seek failure rate - **Audio Sync Tests**: Mic-video sync, system audio-video sync - **Camera Sync Tests**: Camera-display drift, frame count alignment - **Decode Benchmark**: Creation, sequential, seek, and random access performance @@ -40,10 +43,12 @@ This document tracks performance benchmarks for Cap's playback and decoding syst ```bash # Run full playback validation on recordings from real-device-test-runner cargo run -p cap-recording --example playback-test-runner -- full +cargo run -p cap-recording --example playback-test-runner -- full --startup-threshold-ms 250 # Run specific test categories cargo run -p cap-recording --example playback-test-runner -- decoder cargo run -p cap-recording --example playback-test-runner -- playback +cargo run -p cap-recording --example playback-test-runner -- scrub cargo run -p cap-recording --example playback-test-runner -- audio-sync cargo run -p cap-recording --example playback-test-runner -- camera-sync @@ -52,6 +57,69 @@ cargo run -p cap-recording --example playback-test-runner -- --recording-path /p # List available recordings cargo run -p cap-recording --example playback-test-runner -- list + +# Emit machine-readable JSON report +cargo run -p cap-recording --example playback-test-runner -- full --json-output /tmp/playback-benchmark.json +``` + +Aggregate JSON outputs from multiple machines: + +```bash +node scripts/aggregate-playback-benchmarks.js --input /path/to/json-results --output /tmp/playback-benchmark-aggregate.md +node scripts/build-playback-matrix-report.js --input /path/to/json-results --output /tmp/playback-matrix-status.md +``` + +Validate matrix coverage and required formats: + +```bash +node scripts/validate-playback-matrix.js --input /path/to/json-results --require-formats mp4,fragmented +node scripts/validate-playback-matrix.js --input /path/to/json-results --require-formats mp4,fragmented --output-json /tmp/playback-matrix-validation.json + +# Finalize aggregate + status + validation artifacts +node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented +node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 +node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5 +node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-missing-candidate +node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-candidate-only +node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --min-samples-per-row 3 +node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-parse-errors +node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-zero-compared +node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-skipped-files +node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --output-json /tmp/playback-matrix-final/playback-finalize-summary.json +node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 --publish-target /workspace/crates/editor/PLAYBACK-BENCHMARKS.md + +Finalize summary JSON includes generated timestamp, artifact paths, settings, pass/fail flags, and git branch/commit metadata when available. +Finalize summary JSON also carries comparison failure reasons and gate outcomes when comparison is enabled. +Finalize summary JSON includes comparison file stats (including parse error counts) when comparison is enabled. +Finalize summary JSON includes comparison summary counts (compared rows, regressions, missing/candidate-only/insufficient-sample counts) when comparison is enabled. + +# Publish matrix artifacts into this benchmark history +node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --bottlenecks-md /tmp/playback-matrix-final/playback-bottlenecks.md +node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --comparison-md /tmp/playback-matrix-final/playback-comparison.md --comparison-json /tmp/playback-matrix-final/playback-comparison.json +node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --finalize-summary-json /tmp/playback-matrix-final/playback-finalize-summary.json + +# Analyze bottlenecks from matrix results +node scripts/analyze-playback-matrix-bottlenecks.js --input /path/to/json-results --output /tmp/playback-bottlenecks.md --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 +node scripts/analyze-playback-matrix-bottlenecks.js --input /path/to/json-results --output /tmp/playback-bottlenecks.md --output-json /tmp/playback-bottlenecks.json --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 + +# Compare candidate against baseline and flag regressions +node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-comparison.md --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5 +node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results-a --baseline /path/to/baseline-results-b --candidate /path/to/candidate-results-a --candidate /path/to/candidate-results-b --output /tmp/playback-comparison.md +node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --allow-missing-candidate +node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-candidate-only +node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --min-samples-per-row 3 +node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-parse-errors +node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-zero-compared +node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-skipped-files +node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-comparison.md --output-json /tmp/playback-comparison.json + +Comparison output reports both baseline rows missing in candidate and candidate-only rows not present in baseline. +Comparison table also reports baseline/candidate run counts per row when multiple JSON inputs contribute to the same key. +Comparison JSON summary includes `failureReasons` and `gateOutcomes` fields for automation-friendly gate diagnostics. +Minimum sample gating is applied against metrics that are actually comparable for the row (for example, scrub samples are not required for rows with no scrub metric comparison). +Comparison JSON also includes baseline/candidate file parsing stats and optional parse-error gating support. +Comparison can optionally gate on skipped input files via `--fail-on-skipped-files`. +Comparison file stats now include skipped-file breakdown for `no_reports` and `no_usable_metrics`. ``` #### Decode Performance Benchmark @@ -62,6 +130,12 @@ cargo run -p cap-editor --example decode-benchmark -- --video /path/to/video.mp4 # With custom FPS and iterations cargo run -p cap-editor --example decode-benchmark -- --video /path/to/video.mp4 --fps 60 --iterations 50 + +# Emit machine-readable JSON with startup/scrub metrics +cargo run -p cap-editor --example decode-benchmark -- --video /path/to/video.mp4 --fps 60 --sequential-frames 180 --random-samples 120 --output-json /tmp/decode-benchmark.json + +# Fragmented segment input is supported by passing the display directory +cargo run -p cap-editor --example decode-benchmark -- --video /path/to/segment/display --fps 60 --output-json /tmp/decode-benchmark-fragmented.json ``` #### Combined Workflow (Recording → Playback) @@ -74,6 +148,30 @@ cargo run -p cap-recording --example real-device-test-runner -- full --keep-outp cargo run -p cap-recording --example playback-test-runner -- full ``` +### Cross-Platform Validation Matrix + +Run these scenarios on each required hardware class and append outputs via `--benchmark-output`. + +```bash +cargo run -p cap-recording --example playback-test-runner -- full --fps 60 --benchmark-output --notes "platform= gpu= scenario=full" +cargo run -p cap-recording --example playback-test-runner -- scrub --fps 60 --benchmark-output --notes "platform= gpu= scenario=scrub" +``` + +Automated helper for machine runs: + +```bash +node scripts/run-playback-benchmark-matrix.js --platform macos-13 --gpu apple-silicon --output-dir /tmp/playback-matrix --fps 60 --startup-threshold-ms 250 --input-dir /tmp/cap-real-device-tests +node scripts/run-playback-benchmark-matrix.js --platform windows-11 --gpu nvidia-discrete --output-dir /tmp/playback-matrix-windows-nvidia --fps 60 --startup-threshold-ms 250 --require-formats mp4,fragmented --input-dir /tmp/cap-real-device-tests +node scripts/run-playback-benchmark-matrix.js --platform windows-11 --gpu integrated --output-dir /tmp/playback-matrix-windows-integrated --fps 60 --startup-threshold-ms 250 --scenarios scrub --input-dir /tmp/cap-real-device-tests +``` + +| Platform | GPU Class | MP4 Full | Fragmented Full | MP4 Scrub | Fragmented Scrub | Notes | +|----------|-----------|----------|-----------------|-----------|------------------|-------| +| macOS 13+ | Apple Silicon | ☐ | ☐ | ☐ | ☐ | | +| Windows 11 | NVIDIA discrete | ☐ | ☐ | ☐ | ☐ | | +| Windows 11 | AMD discrete | ☐ | ☐ | ☐ | ☐ | | +| Windows 11 | Integrated baseline | ☐ | ☐ | ☐ | ☐ | | + --- ## Benchmark History @@ -106,6 +204,19 @@ cargo run -p cap-recording --example playback-test-runner -- full | **P50/P95/P99** | Latency percentiles | Sorted distribution | | **Effective FPS** | Actual decode throughput | frames / elapsed_time | | **Jitter** | Decode time variance (std dev) | sqrt(variance) | +| **First Decode** | Decode latency for first successful frame | elapsed from first frame request | +| **Startup to First** | Time from playback test start to first decoded frame | elapsed since playback test start | + +### Scrub Metrics + +| Metric | Description | How Measured | +|--------|-------------|--------------| +| **Seek Operations** | Total random seek attempts | Fixed operation count per segment | +| **Successful Seeks** | Seeks returning a decoded frame | Count of non-None seek decodes | +| **Failed Seeks** | Seeks returning no frame | Count of None seek decodes | +| **Avg Seek Time** | Mean random seek decode latency | Avg of seek decode times | +| **P50/P95/P99 Seek** | Seek latency percentiles | Sorted seek time distribution | +| **Max Seek Time** | Worst seek decode latency | Max of seek decode times | ### Audio Sync Metrics @@ -188,5 +299,6 @@ When analyzing benchmark results, focus on: ## Related Documentation - [Recording Benchmarks](../recording/BENCHMARKS.md) - Recording performance tracking +- [Playback Matrix Runbook](./PLAYBACK-MATRIX-RUNBOOK.md) - Cross-platform evidence collection workflow - [cap-rendering/decoder](../rendering/src/decoder.rs) - Decoder implementation - [cap-video-decode](../video-decode/) - Platform-specific decoders diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index d30b940f21..62ccbb4f18 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -35,7 +35,7 @@ ## Current Status -**Last Updated**: 2026-01-30 +**Last Updated**: 2026-02-13 ### Performance Summary @@ -60,10 +60,12 @@ - ✅ Multi-position decoder pool for smooth scrubbing - ✅ Mic audio sync within tolerance - ✅ Camera-display sync perfect (0ms drift) +- ✅ Editor playback now keeps a live seek channel during playback instead of stop/start restart loops +- ✅ Audio playback defaults to low-latency streaming buffer path with bounded prefill ### Known Issues (Lower Priority) 1. **System audio timing**: ~162ms difference inherited from recording-side timing issue -2. **Display decoder init time**: 337ms due to multi-position pool (creates 3 decoders) +2. **Display decoder init time**: baseline was 337ms from eager multi-decoder setup; now reduced by lazy decoder warmup but needs benchmark confirmation --- @@ -73,12 +75,17 @@ *(Update this section as you work)* - [ ] **Test fragmented mode** - Run playback tests on fragmented recordings -- [ ] **Investigate display decoder init time** - 337ms may be optimizable +- [ ] **Collect cross-platform benchmark evidence** - macOS 13+ and Windows GPU matrix for FPS, scrub settle, audio start latency, and A/V drift +- [ ] **Validate lazy decoder warmup impact** - measure display decoder init and scrub settle before/after on real recordings +- [ ] **Validate streaming audio startup/sync** - benchmark low-latency path vs legacy pre-render path across long timelines ### Completed - [x] **Run initial baseline** - Established current playback performance metrics (2026-01-28) - [x] **Profile decoder init time** - Hardware acceleration confirmed (AVAssetReader) (2026-01-28) - [x] **Identify latency hotspots** - No issues found, p95=3.1ms (2026-01-28) +- [x] **Remove seek restart churn in timeline path** - in-playback seeks now route through live playback handle (2026-02-13) +- [x] **Switch default audio mode to low-latency streaming** - full prerender now opt-in by env flag (2026-02-13) +- [x] **Reduce eager AVAssetReader decoder warmup** - pool now initializes lazily beyond first warm decoders (2026-02-13) --- @@ -91,6 +98,7 @@ cargo run -p cap-recording --example playback-test-runner -- full # Test specific categories cargo run -p cap-recording --example playback-test-runner -- decoder cargo run -p cap-recording --example playback-test-runner -- playback +cargo run -p cap-recording --example playback-test-runner -- scrub cargo run -p cap-recording --example playback-test-runner -- audio-sync cargo run -p cap-recording --example playback-test-runner -- camera-sync @@ -127,13 +135,368 @@ cargo run -p cap-recording --example playback-test-runner -- full ## Completed Fixes -*(Document fixes here as they are implemented)* +1. **Low-latency audio startup enabled by default (2026-02-13)** + - `AudioPlayback::spawn()` now selects streaming `create_stream()` path by default. + - Legacy full-timeline prerender path is still available via `CAP_AUDIO_PRERENDER_PLAYBACK=1`. + - `AudioPlaybackBuffer` is available on all platforms so Windows can use streaming sync logic. + +2. **In-playback seek path without stop/start (2026-02-13)** + - Added seek channel to `PlaybackHandle` and playback loop. + - `seek_to` and `set_playhead_position` commands now forward seek requests to active playback. + - Timeline seek no longer tears down and recreates playback while playing. + - Seek signaling now uses watch semantics so only latest frame target is consumed under heavy scrub load. + - Tauri playhead/seek commands now skip no-op same-frame state emission to reduce state/event churn. + +3. **Lazy decoder pool warmup on macOS AVAssetReader (2026-02-13)** + - Initial warmup now creates only a small subset of decoder instances. + - Additional decoder instances are initialized lazily when scrub patterns request them. + - Failed lazy init falls back safely to currently available decoders. + +4. **Playback benchmark runner now captures scrub and startup metrics (2026-02-13)** + - Added `scrub` benchmark mode to `playback-test-runner`. + - Playback result now includes first-frame decode and startup-to-first-frame latency. + - Scrub result now reports seek p50/p95/p99 and seek failure counts. + +5. **Playback runtime emits startup latency signals (2026-02-13)** + - Playback loop now logs first rendered frame latency. + - Audio stream setup now logs startup preparation time and first callback latency. + - Playback loop now logs seek settle latency (`seek_target_frame` to rendered frame). + +6. **Decode benchmark now supports machine-readable evidence output (2026-02-13)** + - `decode-benchmark` supports `--output-json` for structured metric capture. + - Added sequential frame and random sample count controls to scale benchmark depth per hardware class. + - Supports fragmented segment directories for duration-aware benchmarking. + +7. **Timeline seek dispatch now coalesces during drag (2026-02-13)** + - Frontend seek calls are requestAnimationFrame-batched. + - Only the latest pending seek frame is sent while an async seek is in-flight. + - Duplicate same-frame seeks are dropped in both frontend dispatch and playback seek signaling. + +8. **Playback frame wait timeout now scales with target FPS (2026-02-13)** + - Replaced fixed 200ms frame fetch waits with FPS-derived bounded timeout. + - Reduces long stall windows on 60fps playback and improves real-time catch-up behavior. + - In-flight polling interval now scales with frame budget instead of fixed 5ms. + - Catch-up skip threshold now adapts with late streak depth and logs skip event telemetry. + - Warmup target and warmup timeout now scale with FPS, reducing startup buffering overhead. + - Prefetch ahead/behind windows now scale with FPS to reduce unnecessary decode pressure at lower targets. + - Prefetch parallelism now scales with FPS target to increase decode throughput under 60fps workloads. + +8. **Playback benchmark runner now supports JSON evidence export (2026-02-13)** + - `playback-test-runner` supports `--json-output` for structured report emission. + - JSON output includes command metadata, system info, summary, and per-recording test detail. + - Command metadata now includes input scope and output flags for reproducibility. + - Startup-to-first-frame threshold is configurable with `--startup-threshold-ms` and tracked as pass/fail signal. + +9. **Added JSON aggregate utility for cross-platform benchmark collation (2026-02-13)** + - `scripts/aggregate-playback-benchmarks.js` builds a markdown table from multiple JSON outputs. + - Aggregates platform/gpu/scenario-tagged runs for matrix reporting. + +10. **Added matrix run helper for platform/GPU benchmark execution (2026-02-13)** + - `scripts/run-playback-benchmark-matrix.js` runs `full` and `scrub` scenarios with tagged notes and JSON output. + - Automatically generates aggregate markdown for each machine run directory. + - Performs per-machine post-run validation for required scenarios and optional format requirements. + - Supports scenario subset reruns via `--scenarios` for faster targeted validation. + - Supports startup threshold tuning via `--startup-threshold-ms`. + +11. **Added matrix completeness validator (2026-02-13)** + - `scripts/validate-playback-matrix.js` validates required platform/gpu/scenario cells. + - Supports required format checks per cell (mp4 + fragmented). + - Root `package.json` now exposes `bench:playback:*` script aliases for matrix, aggregate, and validate flows. + - Can emit structured validation JSON for artifact upload and automation. + +12. **Added matrix status report generator (2026-02-13)** + - `scripts/build-playback-matrix-report.js` generates concise matrix markdown from JSON results. + - Highlights missing cells, scenario pass/fail, and format coverage per platform/GPU row. + +13. **Added matrix finalization helper (2026-02-13)** + - `scripts/finalize-playback-matrix.js` generates aggregate markdown, status markdown, and validation JSON in one command. + - Supports optional required format enforcement during finalization. + - Also emits bottleneck analysis markdown using configurable FPS/scrub/startup thresholds. + - Can optionally publish finalized artifacts directly into benchmark history target. + +14. **Added matrix summary publisher (2026-02-13)** + - `scripts/publish-playback-matrix-summary.js` injects finalized matrix artifacts into playback benchmark history. + - Keeps matrix evidence updates consistent and repeatable. + - Supports optional bottleneck analysis attachment in published summary. + +15. **Added bottleneck analyzer for continuous FPS optimization (2026-02-13)** + - `scripts/analyze-playback-matrix-bottlenecks.js` ranks matrix cells by FPS, startup, and scrub threshold breaches. + - Produces prioritized optimization backlog from real matrix evidence. + - Supports structured JSON output for automation and regression tracking. + +16. **Added baseline-vs-candidate comparator for regression gating (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` compares candidate matrix outputs against baseline outputs. + - Flags regressions when FPS drops or startup/scrub latency increase beyond configured tolerance. + - Exits non-zero on regressions so matrix-driven optimization loops can be gated automatically. + +17. **Added prefetch generation gating for live seek correctness and latency (2026-02-13)** + - Prefetch outputs are tagged with seek-generation IDs and stale generation frames are dropped. + - Seek events now advance generation and flush prefetch consumption to prevent old in-flight decode outputs from polluting post-seek playback. + - Reduces redundant decode/render work during aggressive scrub and improves settle reliability. + +18. **Flushed prefetched-frame buffer on seek generation changes (2026-02-13)** + - Live seek handling now clears prefetch buffer immediately on seek events. + - Prevents stale buffered frames from prior playback position from being reused after seek jumps. + - Reduces unnecessary post-seek frame scans and improves settle determinism. + +19. **Tightened in-flight prefetch buffering to current playhead (2026-02-13)** + - In-flight wait path now buffers only frames at or ahead of current frame. + - Avoids re-queueing older frames from initial start position baseline. + - Reduces avoidable prefetch buffer churn during late playback and aggressive seek scenarios. + +20. **Expanded comparison gating for multi-run matrix diffs (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` now supports multiple baseline and candidate inputs. + - Enables aggregate regression gating across batched machine runs instead of one directory at a time. + - Improves reliability of continuous optimization loops when matrix outputs are split across multiple sources. + +21. **Added finalization-integrated regression gate support (2026-02-13)** + - `scripts/finalize-playback-matrix.js` now supports `--compare-baseline` and threshold args. + - Finalization can now produce aggregate/status/validation/bottleneck artifacts and run baseline-vs-candidate gating in one command. + - Keeps optimization loops strict by failing finalize runs when regression tolerances are exceeded. + +22. **Made in-flight tracking generation-aware to avoid seek races (2026-02-13)** + - Shared in-flight frame tracking now keys entries by `(seek_generation, frame_number)`. + - Prevents old-generation decode completions from removing new-generation in-flight markers for the same frame number. + - Improves seek correctness under rapid repeated seeks to nearby frame ranges. + +23. **Added comparison artifact publishing in finalize workflows (2026-02-13)** + - `scripts/publish-playback-matrix-summary.js` now accepts optional `--comparison-md`. + - `scripts/finalize-playback-matrix.js` now forwards generated comparison artifact to publishing when both compare and publish options are enabled. + - Keeps benchmark history entries self-contained with regression gate evidence. + +24. **Separated prefetch/direct decode in-flight tracking (2026-02-13)** + - Playback now tracks prefetch in-flight frames and direct decode in-flight frames in separate generation-aware sets. + - Prevents prefetch-side clear/reset paths from clearing direct decode in-flight markers. + - In-flight wait logic now checks both sets and direct decode outputs are dropped when a pending seek is detected before frame use. + +25. **Added comparison coverage gating for missing candidate rows (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` now reports baseline rows that are missing in candidate runs. + - Comparison now fails by default when candidate coverage is missing baseline rows. + - Optional `--allow-missing-candidate` flag keeps metric regression checks while allowing partial candidate matrices. + +26. **Fixed finalize publish ordering for comparison artifacts (2026-02-13)** + - `scripts/finalize-playback-matrix.js` now executes baseline comparison before publish when both options are enabled. + - Prevents publish step from referencing missing comparison artifact files. + - Added finalize passthrough support for `--allow-missing-candidate`. + +27. **Added structured JSON output for comparison gating (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` now supports `--output-json`. + - Emits comparison summary/regression/missing-coverage details for automation. + - `scripts/finalize-playback-matrix.js` now writes comparison markdown and JSON artifacts during baseline comparison runs. + +28. **Switched playback prefetch buffer to keyed map storage (2026-02-13)** + - Playback prefetch buffer now uses `BTreeMap` keyed by frame number. + - Removes repeated linear scans over deque entries for target frame lookup in hot playback path. + - Retains bounded buffer behavior with deterministic far-ahead/oldest eviction. + +29. **Added sorted prefetch stale-frame pruning (2026-02-13)** + - Playback loop now prunes prefetched frames older than current playhead from the keyed buffer. + - Uses ordered map operations to remove outdated frames efficiently. + - Reduces stale-buffer buildup during frame skips and sustained catch-up scenarios. + +30. **Published comparison gate status in matrix summaries (2026-02-13)** + - `scripts/publish-playback-matrix-summary.js` now accepts optional `--comparison-json`. + - Published matrix summary now includes comparison gate pass/fail, regression count, and missing-candidate-row count when comparison JSON is provided. + - `scripts/finalize-playback-matrix.js` now forwards both comparison markdown and comparison JSON to publish flow. + +31. **Tightened prefetch warmup/skip maintenance with keyed buffer helpers (2026-02-13)** + - Warmup first-frame timer now starts only after at least one eligible prefetched frame is present in the keyed buffer. + - Skip catch-up path now uses ordered stale-frame pruning helper instead of full-map retain filtering. + - Reduces avoidable warmup timing noise and stale-buffer maintenance overhead in high-skip playback paths. + +32. **Expanded comparison outputs with candidate-only coverage visibility (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` now reports candidate-only rows that do not exist in baseline. + - Markdown and JSON comparison outputs now include both missing-candidate and candidate-only coverage summaries. + - Improves matrix diff diagnostics when test coverage differs between baseline and candidate runs. + +33. **Extended published comparison summary fields (2026-02-13)** + - `scripts/publish-playback-matrix-summary.js` now includes candidate-only row count from comparison JSON in published matrix summary bullets. + - Keeps published matrix evidence aligned with expanded comparison coverage diagnostics. + +34. **Published comparison policy mode in summary output (2026-02-13)** + - Published matrix summary now includes comparison policy modes for missing-candidate and candidate-only coverage handling. + - Keeps published evidence explicit about whether coverage gaps were allowed or gated in the comparison run. + +35. **Added strict candidate-only gating option for comparison workflows (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` now supports `--fail-on-candidate-only`. + - When enabled, comparison exits non-zero if candidate contains rows not present in baseline. + - `scripts/finalize-playback-matrix.js` now forwards the same strict option in integrated compare flows. + +36. **Required contiguous prefetched frames for warmup readiness (2026-02-13)** + - Playback warmup readiness now checks contiguous prefetched frame coverage from current frame. + - Avoids treating sparse/non-contiguous prefetched entries as equivalent to contiguous startup readiness. + - Reduces early playback start jitter risk when warmup buffer is fragmented. + +37. **Added finalize summary JSON artifact output (2026-02-13)** + - `scripts/finalize-playback-matrix.js` now supports optional `--output-json`. + - Finalize now emits `playback-finalize-summary.json` by default in output directory. + - Summary JSON includes artifact paths, settings, and validation/comparison pass flags for automation. + +38. **Optimized contiguous warmup coverage scan on keyed buffer (2026-02-13)** + - Contiguous prefetched-frame counting now walks ordered keys via map range iteration. + - Reduces repeated keyed lookups during warmup readiness checks. + - Preserves contiguous coverage semantics while lowering per-loop lookup overhead. + +39. **Added git metadata to finalize summary artifacts (2026-02-13)** + - `scripts/finalize-playback-matrix.js` now records git branch and commit SHA in finalize summary JSON output. + - Improves traceability of benchmark artifacts to exact source revision. + +40. **Wired finalize summary artifact into publish flow (2026-02-13)** + - `scripts/finalize-playback-matrix.js` now generates finalize summary JSON before publish step. + - Finalize now forwards `--finalize-summary-json` to `publish-playback-matrix-summary.js`. + - Published matrix summaries can now include finalize artifact metadata in one-shot finalize runs. + +41. **Improved comparison aggregation across multi-input runs (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` now aggregates metrics per comparison key across all contributing input reports instead of last-write-wins replacement. + - Comparison output now includes baseline/candidate run counts per row to surface aggregation depth. + - Fixed comparison regression evaluation to use explicit options parameter wiring inside compare function. + +42. **Skipped contiguous warmup scans before first eligible frame (2026-02-13)** + - Warmup loop now defers contiguous-prefetch counting until first warmup frame arrival is observed. + - Reduces avoidable buffer scan work during pre-frame warmup wait. + +43. **Added minimum sample-count gating for matrix comparisons (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` now supports `--min-samples-per-row`. + - Comparison now flags rows with insufficient effective sample counts and reports them in markdown/JSON outputs. + - `scripts/finalize-playback-matrix.js` forwards minimum sample gating settings into compare stage, and publish summary now surfaces sample gating status fields. + +44. **Fixed sample gating semantics for non-comparable metrics (2026-02-13)** + - Minimum sample checks now only consider metrics that are actually comparable for the row. + - Prevents scrub sample requirements from incorrectly failing non-scrub comparison rows. + - Comparison output now includes compared metric count and effective sample count per row. + +45. **Extended finalize summary comparison diagnostics (2026-02-13)** + - `scripts/finalize-playback-matrix.js` now includes comparison failure reasons and gate outcomes in summary results. + - `scripts/publish-playback-matrix-summary.js` now surfaces finalize comparison failure reasons when present. + +44. **Cached warmup contiguous coverage counts during warmup (2026-02-13)** + - Warmup loop now recomputes contiguous prefetched coverage only when warmup buffer content changes. + - Avoids repeated contiguous scans on idle warmup iterations. + +45. **Added explicit comparison gate diagnostics in JSON and published summaries (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` now emits `failureReasons` and `gateOutcomes` in summary JSON. + - `scripts/publish-playback-matrix-summary.js` now surfaces comparison failure reasons when present. + +46. **Added parse-error gating and parse stats to comparison flows (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` now supports `--fail-on-parse-errors`. + - Comparison JSON now includes baseline/candidate file parsing stats and parse error entries. + - `scripts/finalize-playback-matrix.js` now forwards parse-error gating option to compare stage; published summary surfaces parse policy and parse error counts. + +47. **Made keyed prefetch insert helper report structural changes (2026-02-13)** + - `insert_prefetched_frame` now returns whether keyed prefetch buffer changed (insert and/or trim). + - Warmup loop now uses this direct signal instead of length-only delta checks for contiguous coverage cache invalidation. + - Improves warmup cache correctness when inserts and trims occur with stable overall buffer length. + +48. **Extended finalize summary with comparison file stats (2026-02-13)** + - Finalize summary JSON now includes comparison file stats payload when comparison is enabled. + - Publish summary now surfaces finalize baseline/candidate parse error counts from finalize summary metadata. + +49. **Stabilized comparison report ordering for reproducibility (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` now deterministically sorts comparison, missing, candidate-only, and insufficient-sample rows. + - Keeps markdown/JSON outputs stable across repeated runs with identical inputs. + +50. **Extended finalize summary with comparison count rollups (2026-02-13)** + - `scripts/finalize-playback-matrix.js` now captures comparison count rollups in summary results (compared rows, regressions, missing/candidate-only/insufficient-sample counts). + - `scripts/publish-playback-matrix-summary.js` now surfaces these finalize comparison counts in published summaries. + +51. **Added optional zero-comparison gating (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` now supports `--fail-on-zero-compared`. + - Enables strict failure when comparison processing yields zero comparable rows. + - `scripts/finalize-playback-matrix.js` forwards zero-comparison gating option in integrated compare flows. + +52. **Added warmup-stage seek handling before playback loop entry (2026-02-13)** + - Warmup loop now consumes seek updates immediately instead of waiting for playback loop start. + - Seek during warmup now resets warmup timers/buffer state and updates frame/audio playhead targets immediately. + - Improves responsiveness when users seek while playback is still warming up. + +53. **Added optional skipped-file gating for comparison workflows (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` now supports `--fail-on-skipped-files`. + - Enables strict failure when baseline/candidate inputs include skipped JSON files without usable benchmark payloads. + - `scripts/finalize-playback-matrix.js` forwards skipped-file gating option in integrated compare flows. + +54. **Added skipped-file reason breakdown in comparison file stats (2026-02-13)** + - Comparison file stats now report skipped-file reasons as `skippedNoReports` and `skippedNoUsableMetrics`. + - Published summary now surfaces skipped-file breakdown for baseline and candidate inputs. + +55. **Scaled warmup idle poll interval by frame budget (2026-02-13)** + - Warmup loop fallback poll now scales with frame duration and stays in bounded low-latency range. + - Reduces fixed 100ms idle poll delay during warmup while avoiding high-frequency busy polling. + +56. **Retained in-flight prefetch markers for small frame-request shifts (2026-02-13)** + - Frame-request rebases now clear in-flight marker sets only for backward or large-distance seeks that also reset in-flight futures. + - Prevents duplicate decode scheduling caused by clearing marker sets while earlier in-flight futures are still active. + +57. **Added local in-flight frame tracking inside prefetch scheduler (2026-02-13)** + - Prefetch scheduler now tracks active frame numbers locally and uses this set for duplicate scheduling checks. + - Reduces repeated shared lock reads in prefetch hot-path while preserving cross-thread in-flight visibility. + +58. **Batched warmup prefetch queue consumption (2026-02-13)** + - Warmup stage now drains all immediately available prefetched frames on each receive wake-up. + - Improves warmup readiness convergence by reducing one-frame-per-iteration queue handling overhead. + +59. **Scaled prefetch idle polling by frame budget (2026-02-13)** + - Prefetch scheduler idle-yield interval now scales with target frame duration in a bounded low-latency range. + - Reduces fixed 1ms wakeup churn in empty in-flight periods while keeping prefetch request responsiveness high. + +60. **Bounded behind-prefetch dedupe memory growth (2026-02-13)** + - Behind-prefetch dedupe tracking now keeps a bounded eviction-ordered window instead of unbounded growth over long playback sessions. + - Prevents long-session hash-set expansion from degrading behind-prefetch lookup efficiency. + +61. **Cached clip-offset lookups for decode scheduling paths (2026-02-13)** + - Prefetch and direct-decode paths now use cached clip-index-to-offset maps instead of repeated linear clip scans. + - Reduces per-frame scheduling overhead in playback and prefetch loops when projects contain many clips. + +62. **Deduplicated frame-request watch updates (2026-02-13)** + - Playback loop now uses change-aware frame-request signaling instead of unconditional watch broadcasts. + - Reduces redundant prefetch wakeups and channel churn when requested frame value does not change. + +63. **Removed duplicate keyed-buffer lookups during prefetch insert (2026-02-13)** + - Prefetch buffer insertion now uses a single `BTreeMap::entry` match to detect insertion and store new frames. + - Eliminates the prior contains-check plus entry-insert double lookup in prefetch hot path. + +64. **Centralized change-aware frame request signaling (2026-02-13)** + - Playback now uses a shared helper for change-aware frame-request watch updates across warmup/seek/skip paths. + - Reduces duplicated watch-update closure logic and keeps no-op request dedupe behavior consistent in all frame-request call sites. + +65. **Short-circuited frame waits when seek updates are pending (2026-02-13)** + - In-flight frame wait polling now exits early when a seek change is observed, instead of waiting through full wait budgets. + - Startup prefetch wait path now also bails out immediately when seek state changes during wait. + +66. **Added pre-wait seek guards before startup and direct decode waits (2026-02-13)** + - Startup prefetch wait path now checks for pending seeks before entering timeout waits and before skip fallback on timeout. + - Direct decode fallback path now checks for pending seek updates before scheduling synchronous decode work. + +67. **Batched keyed-buffer trims during queue-drain insertion (2026-02-13)** + - Warmup and playback queue-drain paths now insert prefetched frames without per-item trim checks and apply one bounded trim pass after the batch. + - Reduces repeated trim work when multiple prefetched frames are drained in the same loop iteration. + +68. **Limited prefetch state resets to major/backward rebases (2026-02-13)** + - Frame-request rebases now only reset decoded-ramp and behind-prefetch tracking on backward seeks or large seek-distance jumps. + - Preserves prefetch ramp state on small forward rebases to reduce unnecessary throughput drops. + +69. **Gated behind-prefetch scans to one pass per playback frame (2026-02-13)** + - Behind-prefetch scheduling now scans at most once for each observed playback frame value. + - Avoids repeated behind-window scan work in tight scheduler loops when playback position has not advanced. + +70. **Batched in-flight wait-path prefetch trims (2026-02-13)** + - In-flight wait buffering now inserts prefetched frames without per-frame trim checks while waiting. + - Applies one bounded trim pass after wait-loop buffering to reduce repeated trim overhead under burst receive windows. --- ## Root Cause Analysis Archive -*(Document investigated issues here)* +1. **Audio start delay from full-track prerender** + - Root cause: playback startup used `create_stream_prerendered()` for all sample formats, forcing full timeline audio render before output stream started. + - Fix direction: switch default to incremental `AudioPlaybackBuffer` path with bounded prefill and live playhead correction. + +2. **Scrub lag from playback restart loop** + - Root cause: timeline seek while playing called stop → seek → start, rebuilding playback/audio state on every interactive seek. + - Fix direction: add live seek channel into running playback loop and route frontend seeks to it. + +3. **Display decoder init inflation on macOS** + - Root cause: AVAssetReader decoder pool eagerly initialized multiple decoders during startup. + - Fix direction: reduce eager warmup and lazily instantiate additional pool decoders when scrub behavior actually needs them. --- @@ -199,6 +562,203 @@ Decoder Pipeline: --- +### Session 2026-02-13 (Audio Startup + Live Seek + Lazy Decoder Warmup) + +**Goal**: Remove major editor playback bottlenecks affecting startup latency, scrub responsiveness, and decoder init overhead. + +**What was done**: +1. Switched playback audio startup default to streaming buffer path. +2. Kept prerender audio path behind `CAP_AUDIO_PRERENDER_PLAYBACK` as explicit fallback. +3. Enabled `AudioPlaybackBuffer` for all platforms so Windows uses live buffering/sync path. +4. Added a seek channel to `PlaybackHandle` and integrated seek handling into the main playback loop. +5. Updated Tauri seek/playhead commands to forward seeks into active playback handle. +6. Removed frontend timeline stop/start cycle when seeking while playing. +7. Reduced AVAssetReader eager pool warmup and added lazy decoder instantiation for additional pool slots. +8. Extended playback benchmark tooling with scrub mode and startup latency metrics. +9. Added playback runtime startup telemetry logs for first frame and audio callback bring-up. +10. Enhanced decode benchmark example with structured JSON output and configurable sample depth. +11. Added timeline seek dispatch coalescing to reduce seek command storms during drag. +12. Added JSON report output support to playback-test-runner for benchmark evidence collection. +13. Added cross-platform benchmark JSON aggregation utility script. +14. Added matrix execution helper script for full + scrub benchmark runs per platform/GPU. +15. Added matrix validation script for required cell and format coverage checks. +16. Added matrix status report generator for concise artifact summaries. +17. Added one-shot finalization script for aggregate + status + validation outputs. +18. Added benchmark history publisher script for finalized matrix artifacts. +19. Added matrix bottleneck analysis script for prioritized FPS optimization follow-up. +20. Added baseline-vs-candidate comparison script to gate regressions in optimization loops. +21. Added seek-generation prefetch gating to drop stale decode outputs after live seek updates. +22. Cleared prefetched-frame buffer on live seek handling to avoid stale buffered frame reuse. +23. Restricted in-flight prefetch buffering to current frame or newer frames during frame wait path. +24. Expanded benchmark comparison gating to support multi-input baseline/candidate matrix sets. +25. Added optional baseline comparison gating inside matrix finalization workflow. +26. Made in-flight frame tracking generation-aware to prevent cross-seek marker collisions. +27. Split prefetch and direct decode in-flight tracking and guarded direct decode frame usage when seek updates are pending. +28. Added missing-candidate-row coverage gating in baseline-vs-candidate comparison script with optional override flag. +29. Fixed finalize compare/publish ordering so comparison artifacts exist before publish attachment and added finalize support for missing-candidate override. +30. Added structured JSON output for baseline-vs-candidate comparison script and wired finalize comparison runs to emit comparison JSON artifacts. +26. Made shared in-flight frame tracking generation-aware to prevent cross-seek marker collisions. +27. Added comparison artifact attachment support in publish/finalize matrix summary workflows. +28. Split prefetch and direct decode in-flight tracking to avoid cross-path marker interference. +29. Added missing-candidate-row gating to baseline-vs-candidate comparison workflow. +30. Fixed finalize compare/publish ordering and propagated missing-candidate override into finalize compare flow. +31. Added structured JSON artifact emission for baseline-vs-candidate comparison workflows. +32. Replaced playback prefetch deque scans with keyed `BTreeMap` buffering for lower lookup overhead in frame acquisition path. +33. Added ordered stale-frame pruning in keyed prefetch buffer to keep playback buffer aligned with current playhead. +34. Added comparison gate status fields to published matrix summary entries via comparison JSON attachment. +35. Tightened keyed prefetch buffer warmup timing and skip-path pruning behavior using map-aware helper usage in playback loop. +36. Expanded baseline-vs-candidate comparison outputs with candidate-only row reporting. +37. Added strict `fail-on-candidate-only` gating option for compare/finalize matrix comparison workflows. +38. Added candidate-only row count reporting in published matrix summary comparison status bullets. +39. Updated playback warmup start condition to require contiguous prefetched frame coverage from current frame. +40. Added comparison policy mode reporting (allow/fail) for missing-candidate and candidate-only coverage in published matrix summaries. +41. Added finalize summary JSON artifact output with artifact/settings/result metadata for automation workflows. +42. Optimized contiguous prefetched-frame warmup scan using ordered map range iteration instead of repeated key lookups. +43. Added git branch/commit metadata into finalize summary JSON artifacts for source traceability. +44. Wired finalize summary JSON into publish flow so one-shot finalize runs can publish summary metadata alongside matrix artifacts. +45. Improved multi-input comparison aggregation by merging per-key metrics across runs and surfacing baseline/candidate run counts per comparison row. +46. Skipped contiguous warmup coverage scans until first warmup frame observation to reduce pre-frame warmup loop scan overhead. +47. Added minimum-sample comparison gating with `--min-samples-per-row`, insufficient-sample reporting, and finalize passthrough support. +48. Cached warmup contiguous coverage values and only recomputed contiguous scan when warmup buffer changed. +49. Added comparison JSON gate diagnostics (`failureReasons`, `gateOutcomes`) and surfaced failure reasons in published summary output. +50. Corrected minimum sample gating semantics to only count comparable metrics and added compared-metric/effective-sample columns in comparison output. +51. Extended finalize summary and publish output with comparison failure reasons and gate outcome metadata. +52. Added comparison parse-error gating (`--fail-on-parse-errors`) with parse stats surfaced in comparison JSON, finalize settings, and published summaries. +53. Updated keyed prefetch insert helper to emit structural-change signals for warmup contiguous coverage cache invalidation. +54. Extended finalize summary and publish output with comparison file stats (including parse error counts). +55. Stabilized comparison output ordering with deterministic sorting for comparison rows and coverage-delta sections. +56. Extended finalize and publish summaries with comparison count rollups (compared rows, regressions, missing/candidate-only/insufficient-sample counts). +57. Added optional zero-comparison gating (`--fail-on-zero-compared`) for compare/finalize flows and surfaced zero-compare policy in comparison/published summaries. +58. Added warmup-stage seek handling to apply seeks immediately while playback warmup is in progress. +59. Added optional skipped-file gating (`--fail-on-skipped-files`) for compare/finalize flows and surfaced skipped-file policy in comparison/published summaries. +60. Added skipped-file reason breakdown (`skippedNoReports`, `skippedNoUsableMetrics`) into comparison file stats and published summaries. +61. Scaled warmup idle poll interval with frame budget to reduce warmup fallback latency under sparse frame arrival. +62. Retained in-flight prefetch markers for small frame-request shifts to avoid duplicate decode scheduling during active prefetch execution. +63. Added local in-flight frame tracking in prefetch scheduler to reduce lock-heavy duplicate-check lookups on scheduling hot-path. +64. Batched warmup prefetch queue consumption to reduce warmup staging overhead and improve contiguous warmup-fill responsiveness. +65. Scaled prefetch idle polling with frame budget to reduce scheduler wakeup churn during empty in-flight periods. +66. Bounded behind-prefetch dedupe tracking window to avoid unbounded growth and preserve lookup efficiency over long sessions. +67. Cached clip-offset lookups for prefetch and direct-decode scheduling to replace repeated linear clip scans on decode hot paths. +68. Deduplicated frame-request watch updates so unchanged frame requests no longer trigger redundant watch notifications. +69. Replaced contains+entry prefetch insertion with single-entry map insertion to remove duplicate keyed-buffer lookups in frame-insert hot path. +70. Centralized change-aware frame-request watch updates via shared helper for consistent no-op dedupe behavior across warmup/seek/skip paths. +71. Added seek-aware short-circuiting in in-flight/startup frame wait paths to avoid waiting full frame-fetch budgets when seek updates arrive. +72. Added pre-wait seek guards in startup/direct-decode fallback paths so pending seeks skip timeout waits and avoid stale synchronous decode work. +73. Batched keyed-buffer trimming during warmup/playback prefetch queue drains to avoid per-insert trim checks under burst frame arrivals. +74. Limited prefetch state resets to backward/major frame-request rebases so small forward rebases preserve decode-ramp and behind-prefetch tracking state. +75. Gated behind-prefetch scheduling scans to one pass per playback frame to avoid repeated behind-window scan churn while playback position is unchanged. +76. Batched in-flight wait-path prefetch trimming so buffered wait inserts trim once per wait pass instead of per buffered frame. + +**Changes Made**: +- `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. +- `crates/editor/src/audio.rs`: cross-platform `AudioPlaybackBuffer`, windows-only smooth seek helper. +- `apps/desktop/src-tauri/src/lib.rs`: forward `seek_to` and `set_playhead_position` into active playback handle. +- `apps/desktop/src/routes/editor/Timeline/index.tsx`: seek while playing now sends direct `seekTo` without playback restart. +- `crates/rendering/src/decoder/avassetreader.rs`: lower eager decoder warmup and lazy pool growth. +- `crates/recording/examples/playback-test-runner.rs`: added scrub command and startup/scrub latency metrics. +- `crates/editor/PLAYBACK-BENCHMARKS.md`: updated benchmark reference and metric definitions. +- `crates/editor/src/playback.rs`: added first-render and audio-callback startup latency logging. +- `crates/editor/examples/decode-benchmark.rs`: added `--output-json`, startup metrics, and configurable sequential/random sampling. +- `apps/desktop/src/routes/editor/Timeline/index.tsx`: added requestAnimationFrame-based seek coalescing with in-flight protection. +- `crates/recording/examples/playback-test-runner.rs`: added `--json-output` to emit structured benchmark reports. +- `scripts/aggregate-playback-benchmarks.js`: added markdown aggregation for multiple playback benchmark JSON artifacts. +- `scripts/run-playback-benchmark-matrix.js`: added orchestrated full/scrub benchmark runner with per-machine aggregate generation. +- `scripts/validate-playback-matrix.js`: added required matrix cell/format validation for aggregated evidence. +- `scripts/build-playback-matrix-report.js`: added concise matrix status report generation from JSON benchmark outputs. +- `scripts/finalize-playback-matrix.js`: added one-shot matrix artifact finalization workflow. +- `scripts/publish-playback-matrix-summary.js`: added matrix artifact publisher into PLAYBACK-BENCHMARKS history region. +- `scripts/analyze-playback-matrix-bottlenecks.js`: added prioritized bottleneck analysis output from matrix JSON evidence. +- `scripts/compare-playback-benchmark-runs.js`: added regression-aware baseline/candidate comparison with configurable FPS/startup/scrub tolerances. +- `scripts/compare-playback-benchmark-runs.js`: fixed options wiring inside comparison regression checks and now aggregates per-key metrics across multi-input runs with run-count reporting. +- `scripts/compare-playback-benchmark-runs.js`: comparison row sets are now deterministically sorted for stable markdown/json artifact diffs. +- `scripts/compare-playback-benchmark-runs.js`: added optional `--fail-on-zero-compared` and zero-compare gate diagnostics in markdown/json outputs. +- `scripts/finalize-playback-matrix.js`: forwards `--fail-on-zero-compared` into compare stage and records policy in finalize summary settings. +- `scripts/finalize-playback-matrix.js`: finalize summary now includes comparison count rollup fields for compared rows, regressions, and coverage deltas. +- `scripts/publish-playback-matrix-summary.js`: publish summary now surfaces finalize comparison count rollups when finalize summary metadata is attached. +- `scripts/publish-playback-matrix-summary.js`: added optional baseline-vs-candidate comparison artifact attachment in published summaries. +- `crates/editor/src/playback.rs`: warmup loop now handles seek updates immediately, resetting warmup state and updating frame/audio targets before playback loop entry. +- `crates/editor/src/playback.rs`: warmup loop fallback polling now scales with frame budget instead of fixed 100ms sleep to improve responsiveness without busy waiting. +- `crates/editor/src/playback.rs`: frame-request rebases now preserve in-flight marker sets unless in-flight futures are explicitly reset for backward/large seek changes. +- `crates/editor/src/playback.rs`: prefetch scheduler now uses a local in-flight frame set for duplicate scheduling checks and mirrors it into shared generation-keyed in-flight markers for playback coordination. +- `crates/editor/src/playback.rs`: warmup prefetch receive path now drains immediately queued prefetched frames in batches to accelerate warmup buffer population. +- `crates/editor/src/playback.rs`: prefetch scheduler idle polling now scales with frame budget (bounded) instead of fixed 1ms delay, reducing idle wakeup overhead. +- `crates/editor/src/playback.rs`: behind-prefetch dedupe tracking now uses a bounded eviction-ordered window to prevent unbounded set growth during long playback. +- `crates/editor/src/playback.rs`: prefetch and playback direct-decode paths now use cached clip-offset maps rebuilt on project updates, avoiding repeated clip list linear searches. +- `crates/editor/src/playback.rs`: frame-request updates now use `watch::Sender::send_if_modified` across playback/warmup/skip paths to avoid redundant unchanged-frame notifications. +- `crates/editor/src/playback.rs`: prefetch insertion now uses single `BTreeMap::entry` insertion path instead of separate contains-check + insert lookup. +- `crates/editor/src/playback.rs`: frame-request watch updates now route through shared helper to keep no-op dedupe behavior and call-site logic consistent across warmup/seek/skip paths. +- `crates/editor/src/playback.rs`: in-flight and startup frame wait paths now short-circuit when seek updates are pending to improve scrub responsiveness under wait pressure. +- `crates/editor/src/playback.rs`: startup prefetch wait and direct-decode fallback paths now pre-check seek updates before waiting/synchronous decode scheduling to skip stale work under active seeks. +- `crates/editor/src/playback.rs`: warmup and playback queue-drain insertion paths now perform untrimmed batch insertions and run a single keyed-buffer trim pass after each drain batch. +- `crates/editor/src/playback.rs`: frame-request rebases now only clear prefetch ramp/behind-tracking state on backward or large-distance jumps, preserving throughput state on small forward rebases. +- `crates/editor/src/playback.rs`: behind-prefetch scheduling now scans at most once per playback frame value, reducing repeated behind-window scan overhead in tight scheduler loops. +- `crates/editor/src/playback.rs`: in-flight wait buffering now uses untrimmed inserts plus a single post-wait trim pass, reducing repeated keyed-buffer trim operations during wait-path burst buffering. +- `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. +- `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. +- `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. +- `scripts/compare-playback-benchmark-runs.js`: added structured comparison JSON output with pass/fail summary and regression detail payload. +- `scripts/compare-playback-benchmark-runs.js`: comparison outputs now include candidate-only rows in addition to missing-candidate coverage deltas. +- `scripts/finalize-playback-matrix.js`: baseline comparison flow now writes both `playback-comparison.md` and `playback-comparison.json`. +- `scripts/compare-playback-benchmark-runs.js`: added optional strict `--fail-on-candidate-only` coverage gate and surfaced coverage gate mode in comparison markdown output. +- `scripts/compare-playback-benchmark-runs.js`: added optional strict `--fail-on-skipped-files` gate and parse/skip policy reporting in comparison markdown/json outputs. +- `scripts/compare-playback-benchmark-runs.js`: comparison file stats now include skipped-file reason breakdown (`skippedNoReports`, `skippedNoUsableMetrics`). +- `scripts/finalize-playback-matrix.js`: added passthrough support for strict `--fail-on-candidate-only` compare mode in one-shot finalize workflows. +- `scripts/finalize-playback-matrix.js`: forwards `--fail-on-skipped-files` into compare stage and records skipped-file policy in finalize summary settings. +- `scripts/publish-playback-matrix-summary.js`: published comparison status now includes candidate-only row count from comparison JSON summary. +- `scripts/publish-playback-matrix-summary.js`: published comparison status now includes missing-candidate and candidate-only coverage policy modes from comparison JSON tolerance settings. +- `crates/editor/src/playback.rs`: warmup readiness now requires contiguous prefetched frame coverage from current frame instead of raw buffer length threshold. +- `crates/editor/src/playback.rs`: contiguous warmup coverage scan now uses ordered map range iteration to reduce repeated key lookup overhead. +- `crates/editor/src/playback.rs`: warmup first-frame timing now only starts after eligible prefetched frame insertion, and skip catch-up now reuses ordered stale-prune helper. +- `scripts/finalize-playback-matrix.js`: added optional `--output-json` and default finalize summary JSON emission with artifact path and pass/fail metadata. +- `scripts/finalize-playback-matrix.js`: finalize summary JSON now includes git branch and commit metadata when available. +- `scripts/finalize-playback-matrix.js`: finalize now writes summary JSON before publish and passes `--finalize-summary-json` into publish flow. +- `scripts/publish-playback-matrix-summary.js`: publish flow now supports optional finalize summary JSON input and surfaces finalize source/validation metadata. +- `scripts/compare-playback-benchmark-runs.js`: added `--min-samples-per-row`, insufficient-sample row reporting, and sample gate fields in markdown/JSON outputs. +- `scripts/compare-playback-benchmark-runs.js`: minimum sample checks now apply only to metrics that are comparable for each row; output now includes compared metric count and effective sample count columns. +- `scripts/finalize-playback-matrix.js`: forwards `--min-samples-per-row` into compare stage and captures it in finalize summary settings. +- `scripts/publish-playback-matrix-summary.js`: published comparison status now includes insufficient sample row count and minimum sample threshold fields. +- `scripts/compare-playback-benchmark-runs.js`: comparison JSON summary now includes explicit `failureReasons` and `gateOutcomes` fields. +- `scripts/publish-playback-matrix-summary.js`: published comparison status now includes comparison failure reasons when present. +- `scripts/finalize-playback-matrix.js`: finalize summary now includes comparison failure reasons and gate outcome fields in results metadata. +- `scripts/compare-playback-benchmark-runs.js`: added parse-error gating and baseline/candidate file parse stats/parse error entries in JSON output. +- `scripts/finalize-playback-matrix.js`: forwards parse-error gating and records parse-error policy in finalize summary settings. +- `scripts/publish-playback-matrix-summary.js`: published comparison status now includes parse policy and baseline/candidate parse error counts. +- `scripts/publish-playback-matrix-summary.js`: published comparison status now includes skipped-file policy mode from comparison tolerance settings. +- `scripts/publish-playback-matrix-summary.js`: published comparison status now includes skipped-file breakdown counts for no-reports and no-usable-metrics cases. +- `crates/editor/src/playback.rs`: `insert_prefetched_frame` now returns structural-change signals and warmup cache invalidation uses this signal to avoid stale contiguous counts when insert+trim keeps buffer length unchanged. +- `scripts/finalize-playback-matrix.js`: finalize summary now includes comparison file stats payload when comparison runs are enabled. +- `scripts/publish-playback-matrix-summary.js`: publish summary now surfaces finalize baseline/candidate parse error counts from finalize summary metadata. +- `crates/editor/src/playback.rs`: warmup loop now skips contiguous coverage scanning until first warmup frame has been observed. +- `crates/editor/src/playback.rs`: warmup contiguous coverage counts are now cached and recomputed only on warmup buffer changes. +- `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. +- `crates/editor/src/playback.rs`: added ordered pruning of stale prefetched frames below current playhead to reduce stale buffer overhead during catch-up. +- `scripts/publish-playback-matrix-summary.js`: publish flow now surfaces comparison gate status/summary metrics when comparison JSON is provided. +- `scripts/finalize-playback-matrix.js`: finalize publish pass now forwards both comparison markdown and comparison JSON artifacts. +- `crates/editor/src/playback.rs`: added seek-generation tagging for prefetched frames so stale in-flight decode results are ignored after seek generation advances. +- `crates/editor/src/playback.rs`: seek handling now clears prefetched frame buffer on generation changes to guarantee stale buffered frames are discarded immediately. +- `crates/editor/src/playback.rs`: in-flight prefetch wait path now only buffers frames at or ahead of current frame to reduce stale buffer accumulation. +- `scripts/compare-playback-benchmark-runs.js`: comparison gating now accepts multiple baseline and candidate inputs for aggregated matrix regression checks. +- `scripts/finalize-playback-matrix.js`: finalization now supports optional baseline comparison gating and threshold controls in the same pass. +- `crates/editor/src/playback.rs`: in-flight frame markers now include seek generation to prevent old decode paths from clearing current-generation markers. +- `scripts/publish-playback-matrix-summary.js`: publish flow now supports optional comparison artifact attachment. +- `scripts/finalize-playback-matrix.js`: finalize flow now includes comparison artifact when publishing and baseline comparison are both requested. +- `crates/editor/src/playback.rs`: prefetch and direct decode now use separate generation-aware in-flight sets, with combined checks in frame wait path. +- `scripts/compare-playback-benchmark-runs.js`: comparison now reports and gates missing candidate rows relative to baseline coverage. +- `scripts/finalize-playback-matrix.js`: comparison now runs before publish in combined workflows and forwards missing-candidate override to compare step. +- `scripts/compare-playback-benchmark-runs.js`: comparison now supports optional structured JSON output for downstream automation. +- `scripts/finalize-playback-matrix.js`: baseline comparison in finalize now writes both markdown and JSON comparison artifacts. + +**Results**: +- ✅ `cargo +stable check -p cap-editor` passes after changes. +- ✅ `cargo +stable check -p cap-rendering` passes after changes. +- ✅ `pnpm --dir apps/desktop exec tsc --noEmit` passes after frontend seek changes. +- ⚠️ `cargo +stable check -p cap-desktop` and `cargo +stable run -p cap-recording --example playback-test-runner -- list` fail in this Linux environment because `scap-targets` does not currently compile on this target (`DisplayIdImpl`/`WindowImpl` unresolved), preventing local benchmark execution here. +- ⚠️ Cross-platform FPS/scrub/A-V benchmark evidence still pending on macOS and Windows devices with real recordings. + +**Stopping point**: Core playback code-path optimizations are implemented and compiling in touched crates; next step is benchmark execution on macOS 13+ and Windows GPU matrix to quantify gains. + +--- + ### Session 2026-01-28 (Initial Baseline - MP4) **Goal**: Establish initial playback performance baseline @@ -327,6 +887,7 @@ The CPU RGBA→NV12 conversion was taking 15-25ms per frame for 3024x1964 resolu ## References - `PLAYBACK-BENCHMARKS.md` - Raw performance test data (auto-updated by test runner) +- `PLAYBACK-MATRIX-RUNBOOK.md` - Cross-platform playback evidence collection process - `../recording/FINDINGS.md` - Recording performance findings (source of test files) - `../recording/BENCHMARKS.md` - Recording benchmark data - `examples/playback-test-runner.rs` - Playback test implementation diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md new file mode 100644 index 0000000000..b4302f8808 --- /dev/null +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -0,0 +1,221 @@ +# Playback Matrix Runbook + +This runbook defines how to collect benchmark evidence for desktop playback performance and sync validation across required hardware classes. + +## Target matrix + +| Platform | GPU class | Required scenarios | +|---|---|---| +| macOS 13+ | Apple Silicon | full, scrub | +| Windows 11 | NVIDIA discrete | full, scrub | +| Windows 11 | AMD discrete | full, scrub | +| Windows 11 | Integrated baseline | full, scrub | + +## Preconditions + +1. Build can run on target machine. +2. Real-device recording outputs are available. +3. Recordings include both MP4 and fragmented samples. +4. Node and Rust toolchains are installed. + +## Inputs and output directories + +Set these per machine: + +- `INPUT_DIR`: recording root (default `/tmp/cap-real-device-tests`) +- `OUT_DIR`: machine-local output folder for JSON and aggregate markdown + +Example: + +```bash +export INPUT_DIR="/tmp/cap-real-device-tests" +export OUT_DIR="/tmp/cap-playback-matrix/macos-apple-silicon" +mkdir -p "$OUT_DIR" +``` + +## Machine run command + +Run this once per platform/GPU class: + +```bash +node scripts/run-playback-benchmark-matrix.js \ + --platform "" \ + --gpu "" \ + --output-dir "$OUT_DIR" \ + --fps 60 \ + --startup-threshold-ms 250 \ + --require-formats mp4,fragmented \ + --scenarios full,scrub \ + --input-dir "$INPUT_DIR" +``` + +Equivalent shortcut: + +```bash +pnpm bench:playback:matrix -- --platform "" --gpu "" --output-dir "$OUT_DIR" --fps 60 --startup-threshold-ms 250 --require-formats mp4,fragmented --scenarios full,scrub --input-dir "$INPUT_DIR" +``` + +Rerun only scrub scenario for a machine: + +```bash +pnpm bench:playback:matrix -- --platform "" --gpu "" --output-dir "$OUT_DIR" --fps 60 --scenarios scrub --input-dir "$INPUT_DIR" +``` + +Examples: + +```bash +node scripts/run-playback-benchmark-matrix.js --platform macos-13 --gpu apple-silicon --output-dir /tmp/cap-playback-matrix/macos-apple-silicon --fps 60 --input-dir /tmp/cap-real-device-tests +node scripts/run-playback-benchmark-matrix.js --platform windows-11 --gpu nvidia-discrete --output-dir C:\temp\cap-playback-matrix\windows-nvidia --fps 60 --input-dir C:\temp\cap-real-device-tests +node scripts/run-playback-benchmark-matrix.js --platform windows-11 --gpu amd-discrete --output-dir C:\temp\cap-playback-matrix\windows-amd --fps 60 --input-dir C:\temp\cap-real-device-tests +node scripts/run-playback-benchmark-matrix.js --platform windows-11 --gpu integrated --output-dir C:\temp\cap-playback-matrix\windows-integrated --fps 60 --input-dir C:\temp\cap-real-device-tests +``` + +## Outputs produced per machine + +Each run directory contains: + +- timestamped `full` scenario JSON +- timestamped `scrub` scenario JSON +- `--aggregate.md` summary table +- `--validation.json` matrix validation result + +## Cross-machine aggregation + +After collecting all machine folders into a shared root: + +```bash +node scripts/aggregate-playback-benchmarks.js --input /path/to/all-machine-results --output /tmp/playback-matrix-aggregate.md +node scripts/build-playback-matrix-report.js --input /path/to/all-machine-results --output /tmp/playback-matrix-status.md +``` + +Equivalent shortcut: + +```bash +pnpm bench:playback:aggregate -- --input /path/to/all-machine-results --output /tmp/playback-matrix-aggregate.md +pnpm bench:playback:report -- --input /path/to/all-machine-results --output /tmp/playback-matrix-status.md +``` + +Validate matrix completeness: + +```bash +node scripts/validate-playback-matrix.js --input /path/to/all-machine-results --require-formats mp4,fragmented +``` + +Equivalent shortcut: + +```bash +pnpm bench:playback:validate -- --input /path/to/all-machine-results --require-formats mp4,fragmented +``` + +One-shot finalize command: + +```bash +pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented +pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-dir /tmp/playback-matrix-final --output-json /tmp/playback-matrix-final/playback-finalize-summary.json +``` + +Finalize summary JSON includes artifact paths, gate outcomes, settings, and git branch/commit metadata when available. +When comparison is enabled, finalize summary JSON also includes comparison failure reasons and gate outcome details. +When comparison is enabled, finalize summary JSON also includes comparison file stats such as baseline/candidate parse error counts. +When comparison is enabled, finalize summary JSON also includes comparison summary counts for compared rows, regressions, and coverage deltas. + +Include optimization thresholds when finalizing: + +```bash +pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 + +# include baseline comparison gate during finalization +pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5 + +# optional: allow missing candidate rows during compare gate +pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-missing-candidate + +# optional: fail finalize compare gate when candidate includes rows absent in baseline +pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-candidate-only + +# optional: require minimum sample count per compared row +pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --min-samples-per-row 3 + +# optional: fail comparison gate when any input JSON fails to parse +pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-parse-errors + +# optional: fail comparison gate when no comparable rows remain +pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-zero-compared + +# optional: fail comparison gate when any input JSON is skipped +pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-skipped-files +``` + +Finalize and publish to benchmark history in one command: + +```bash +pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 --publish-target /workspace/crates/editor/PLAYBACK-BENCHMARKS.md +``` + +Publish finalized artifacts into benchmark history: + +```bash +pnpm bench:playback:publish -- \ + --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md \ + --status-md /tmp/playback-matrix-final/playback-matrix-status.md \ + --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json \ + --bottlenecks-md /tmp/playback-matrix-final/playback-bottlenecks.md + +pnpm bench:playback:publish -- \ + --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md \ + --status-md /tmp/playback-matrix-final/playback-matrix-status.md \ + --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json \ + --comparison-md /tmp/playback-matrix-final/playback-comparison.md \ + --comparison-json /tmp/playback-matrix-final/playback-comparison.json \ + --finalize-summary-json /tmp/playback-matrix-final/playback-finalize-summary.json +``` + +Generate bottleneck analysis for optimization backlog: + +```bash +pnpm bench:playback:analyze -- --input /path/to/all-machine-results --output /tmp/playback-matrix-final/playback-bottlenecks.md --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 +pnpm bench:playback:analyze -- --input /path/to/all-machine-results --output /tmp/playback-matrix-final/playback-bottlenecks.md --output-json /tmp/playback-matrix-final/playback-bottlenecks.json --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 +``` + +Compare candidate run against baseline and fail on regressions: + +```bash +pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-matrix-final/playback-comparison.md --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5 + +# multiple baseline/candidate directories can be provided +pnpm bench:playback:compare -- --baseline /path/to/baseline-a --baseline /path/to/baseline-b --candidate /path/to/candidate-a --candidate /path/to/candidate-b --output /tmp/playback-matrix-final/playback-comparison.md + +# optional: allow missing candidate rows while still checking metric regressions +pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --allow-missing-candidate + +# emit structured JSON alongside markdown for automation +pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-matrix-final/playback-comparison.md --output-json /tmp/playback-matrix-final/playback-comparison.json + +# compare output now includes both missing-candidate rows and candidate-only rows +# optional: fail compare gate when candidate includes rows absent in baseline +pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-candidate-only + +# when multiple inputs are provided, comparison output includes baseline/candidate run counts per row +# optional: require minimum sample count per compared row +pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --min-samples-per-row 3 + +# comparison JSON includes failureReasons and gateOutcomes for automation +# minimum sample gating uses metrics that are actually comparable for each row +# optional: fail comparison gate when any input JSON fails to parse +pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-parse-errors + +# optional: fail comparison gate when no comparable rows remain after filtering +pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-zero-compared + +# optional: fail comparison gate when any input JSON is skipped (no reports/usable metrics) +pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-skipped-files + +# comparison file stats include skipped-file breakdown (no_reports / no_usable_metrics) +``` + +## Evidence checklist + +1. Confirm all matrix rows exist. +2. Confirm each row has both `full` and `scrub` scenarios. +3. Capture aggregate markdown and raw JSON artifacts. +4. Attach outputs to playback findings update. diff --git a/crates/editor/examples/decode-benchmark.rs b/crates/editor/examples/decode-benchmark.rs index d29ab2dda0..b7e74ce4c3 100644 --- a/crates/editor/examples/decode-benchmark.rs +++ b/crates/editor/examples/decode-benchmark.rs @@ -1,12 +1,19 @@ use cap_rendering::decoder::{AsyncVideoDecoderHandle, spawn_decoder}; +use serde::Serialize; +use std::fs; use std::path::{Path, PathBuf}; use std::process::Command; use std::time::Instant; +use std::time::{SystemTime, UNIX_EPOCH}; use tokio::runtime::Runtime; const DEFAULT_DURATION_SECS: f32 = 60.0; fn get_video_duration(path: &Path) -> f32 { + if path.is_dir() { + return get_fragmented_video_duration(path); + } + let output = Command::new("ffprobe") .args([ "-v", @@ -33,28 +40,137 @@ fn get_video_duration(path: &Path) -> f32 { } } +fn get_fragmented_video_duration(path: &Path) -> f32 { + let init_segment = path.join("init.mp4"); + if !init_segment.exists() { + eprintln!( + "Warning: Fragmented input {} missing init.mp4", + path.display() + ); + return DEFAULT_DURATION_SECS; + } + + let mut fragments: Vec = match fs::read_dir(path) { + Ok(entries) => entries + .filter_map(|entry| entry.ok()) + .map(|entry| entry.path()) + .filter(|entry| entry.extension().is_some_and(|ext| ext == "m4s")) + .collect(), + Err(error) => { + eprintln!( + "Warning: Failed to read fragmented directory {}: {}", + path.display(), + error + ); + return DEFAULT_DURATION_SECS; + } + }; + fragments.sort(); + + if fragments.is_empty() { + eprintln!( + "Warning: Fragmented input {} has no .m4s segments", + path.display() + ); + return DEFAULT_DURATION_SECS; + } + + let timestamp = SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|value| value.as_nanos()) + .unwrap_or(0); + let combined_path = + std::env::temp_dir().join(format!("cap-decode-benchmark-combined-{timestamp}.mp4")); + + let mut combined_data = match fs::read(&init_segment) { + Ok(data) => data, + Err(error) => { + eprintln!( + "Warning: Failed to read init segment {}: {}", + init_segment.display(), + error + ); + return DEFAULT_DURATION_SECS; + } + }; + + for fragment in fragments { + match fs::read(&fragment) { + Ok(data) => combined_data.extend(data), + Err(error) => { + eprintln!( + "Warning: Failed to read segment {}: {}", + fragment.display(), + error + ); + return DEFAULT_DURATION_SECS; + } + } + } + + if let Err(error) = fs::write(&combined_path, &combined_data) { + eprintln!( + "Warning: Failed to write combined fragmented video {}: {}", + combined_path.display(), + error + ); + return DEFAULT_DURATION_SECS; + } + + let duration = get_video_duration(&combined_path); + if let Err(error) = fs::remove_file(&combined_path) { + eprintln!( + "Warning: Failed to remove temporary combined file {}: {}", + combined_path.display(), + error + ); + } + duration +} + #[derive(Debug, Clone)] struct BenchmarkConfig { video_path: PathBuf, fps: u32, iterations: usize, + sequential_frames: usize, + random_samples: usize, + output_json: Option, } -#[derive(Debug, Default)] +#[derive(Debug, Default, Serialize)] struct BenchmarkResults { decoder_creation_ms: f64, sequential_decode_times_ms: Vec, + first_frame_decode_ms: f64, + startup_to_first_frame_ms: f64, + sequential_p50_ms: f64, + sequential_p95_ms: f64, + sequential_p99_ms: f64, sequential_fps: f64, sequential_failures: usize, seek_times_by_distance: Vec<(f32, f64)>, seek_failures: usize, random_access_times_ms: Vec, random_access_avg_ms: f64, + random_access_p50_ms: f64, + random_access_p95_ms: f64, + random_access_p99_ms: f64, random_access_failures: usize, cache_hits: usize, cache_misses: usize, } +#[derive(Debug, Serialize)] +struct BenchmarkOutput { + video_path: PathBuf, + fps: u32, + iterations: usize, + sequential_frames: usize, + random_samples: usize, + results: BenchmarkResults, +} + impl BenchmarkResults { fn print_report(&self) { println!("\n{}", "=".repeat(60)); @@ -96,6 +212,14 @@ impl BenchmarkResults { println!(" Avg decode time: {avg:.2}ms"); println!(" Min decode time: {min:.2}ms"); println!(" Max decode time: {max:.2}ms"); + println!(" P50 decode time: {:.2}ms", self.sequential_p50_ms); + println!(" P95 decode time: {:.2}ms", self.sequential_p95_ms); + println!(" P99 decode time: {:.2}ms", self.sequential_p99_ms); + println!(" First frame decode: {:.2}ms", self.first_frame_decode_ms); + println!( + " Startup to first frame: {:.2}ms", + self.startup_to_first_frame_ms + ); println!(" Effective FPS: {:.1}", self.sequential_fps); } println!(); @@ -138,18 +262,9 @@ impl BenchmarkResults { println!(" Avg access time: {avg:.2}ms"); println!(" Min access time: {min:.2}ms"); println!(" Max access time: {max:.2}ms"); - println!( - " P50: {:.2}ms", - percentile(&self.random_access_times_ms, 50.0) - ); - println!( - " P95: {:.2}ms", - percentile(&self.random_access_times_ms, 95.0) - ); - println!( - " P99: {:.2}ms", - percentile(&self.random_access_times_ms, 99.0) - ); + println!(" P50: {:.2}ms", self.random_access_p50_ms); + println!(" P95: {:.2}ms", self.random_access_p95_ms); + println!(" P99: {:.2}ms", self.random_access_p99_ms); } println!(); @@ -215,10 +330,13 @@ async fn benchmark_sequential_decode( fps: u32, frame_count: usize, start_time: f32, -) -> (Vec, f64, usize) { +) -> (Vec, f64, usize, f64, f64) { let mut times = Vec::with_capacity(frame_count); let mut failures = 0; let overall_start = Instant::now(); + let mut first_frame_decode_ms = 0.0; + let mut startup_to_first_frame_ms = 0.0; + let mut first_frame_captured = false; for i in 0..frame_count { let time = start_time + (i as f32 / fps as f32); @@ -227,6 +345,11 @@ async fn benchmark_sequential_decode( Some(_frame) => { let elapsed = start.elapsed(); times.push(elapsed.as_secs_f64() * 1000.0); + if !first_frame_captured { + first_frame_captured = true; + first_frame_decode_ms = elapsed.as_secs_f64() * 1000.0; + startup_to_first_frame_ms = overall_start.elapsed().as_secs_f64() * 1000.0; + } } None => { failures += 1; @@ -243,7 +366,13 @@ async fn benchmark_sequential_decode( 0.0 }; - (times, effective_fps, failures) + ( + times, + effective_fps, + failures, + first_frame_decode_ms, + startup_to_first_frame_ms, + ) } async fn benchmark_seek( @@ -308,6 +437,10 @@ async fn run_full_benchmark(config: BenchmarkConfig) -> BenchmarkResults { config.video_path.display() ); println!("FPS: {}, Iterations: {}", config.fps, config.iterations); + println!( + "Sequential frames: {}, Random samples: {}", + config.sequential_frames, config.random_samples + ); println!(); println!("[1/5] Benchmarking decoder creation..."); @@ -341,12 +474,20 @@ async fn run_full_benchmark(config: BenchmarkConfig) -> BenchmarkResults { println!("Detected video duration: {video_duration:.2}s"); println!(); - println!("[3/5] Benchmarking sequential decode (100 frames from start)..."); - let (seq_times, seq_fps, seq_failures) = - benchmark_sequential_decode(&decoder, config.fps, 100, 0.0).await; + println!( + "[3/5] Benchmarking sequential decode ({} frames from start)...", + config.sequential_frames + ); + let (seq_times, seq_fps, seq_failures, first_frame_decode_ms, startup_to_first_frame_ms) = + benchmark_sequential_decode(&decoder, config.fps, config.sequential_frames, 0.0).await; results.sequential_decode_times_ms = seq_times; results.sequential_fps = seq_fps; results.sequential_failures = seq_failures; + results.first_frame_decode_ms = first_frame_decode_ms; + results.startup_to_first_frame_ms = startup_to_first_frame_ms; + results.sequential_p50_ms = percentile(&results.sequential_decode_times_ms, 50.0); + results.sequential_p95_ms = percentile(&results.sequential_decode_times_ms, 95.0); + results.sequential_p99_ms = percentile(&results.sequential_decode_times_ms, 99.0); println!(" Done: {seq_fps:.1} effective FPS"); if seq_failures > 0 { println!(" Warning: {seq_failures} frames failed to decode"); @@ -370,9 +511,12 @@ async fn run_full_benchmark(config: BenchmarkConfig) -> BenchmarkResults { } } - println!("[5/5] Benchmarking random access (50 samples)..."); + println!( + "[5/5] Benchmarking random access ({} samples)...", + config.random_samples + ); let (random_times, random_failures) = - benchmark_random_access(&decoder, config.fps, video_duration, 50).await; + benchmark_random_access(&decoder, config.fps, video_duration, config.random_samples).await; results.random_access_times_ms = random_times; results.random_access_failures = random_failures; results.random_access_avg_ms = if results.random_access_times_ms.is_empty() { @@ -381,6 +525,9 @@ async fn run_full_benchmark(config: BenchmarkConfig) -> BenchmarkResults { results.random_access_times_ms.iter().sum::() / results.random_access_times_ms.len() as f64 }; + results.random_access_p50_ms = percentile(&results.random_access_times_ms, 50.0); + results.random_access_p95_ms = percentile(&results.random_access_times_ms, 95.0); + results.random_access_p99_ms = percentile(&results.random_access_times_ms, 99.0); println!(" Done: {:.2}ms avg", results.random_access_avg_ms); if random_failures > 0 { println!(" Warning: {random_failures} random accesses failed"); @@ -389,6 +536,53 @@ async fn run_full_benchmark(config: BenchmarkConfig) -> BenchmarkResults { results } +fn write_json_output(config: &BenchmarkConfig, results: &BenchmarkResults) { + let Some(output_path) = &config.output_json else { + return; + }; + + let output = BenchmarkOutput { + video_path: config.video_path.clone(), + fps: config.fps, + iterations: config.iterations, + sequential_frames: config.sequential_frames, + random_samples: config.random_samples, + results: BenchmarkResults { + decoder_creation_ms: results.decoder_creation_ms, + sequential_decode_times_ms: results.sequential_decode_times_ms.clone(), + first_frame_decode_ms: results.first_frame_decode_ms, + startup_to_first_frame_ms: results.startup_to_first_frame_ms, + sequential_p50_ms: results.sequential_p50_ms, + sequential_p95_ms: results.sequential_p95_ms, + sequential_p99_ms: results.sequential_p99_ms, + sequential_fps: results.sequential_fps, + sequential_failures: results.sequential_failures, + seek_times_by_distance: results.seek_times_by_distance.clone(), + seek_failures: results.seek_failures, + random_access_times_ms: results.random_access_times_ms.clone(), + random_access_avg_ms: results.random_access_avg_ms, + random_access_p50_ms: results.random_access_p50_ms, + random_access_p95_ms: results.random_access_p95_ms, + random_access_p99_ms: results.random_access_p99_ms, + random_access_failures: results.random_access_failures, + cache_hits: results.cache_hits, + cache_misses: results.cache_misses, + }, + }; + + match serde_json::to_string_pretty(&output) { + Ok(json) => match fs::write(output_path, json) { + Ok(()) => println!("Wrote benchmark JSON to {}", output_path.display()), + Err(error) => eprintln!( + "Failed to write benchmark JSON to {}: {}", + output_path.display(), + error + ), + }, + Err(error) => eprintln!("Failed to serialize benchmark JSON output: {}", error), + } +} + fn main() { let args: Vec = std::env::args().collect(); @@ -397,7 +591,7 @@ fn main() { .position(|a| a == "--video") .and_then(|i| args.get(i + 1)) .map(PathBuf::from) - .expect("Usage: decode-benchmark --video [--fps ] [--iterations ]"); + .expect("Usage: decode-benchmark --video [--fps ] [--iterations ] [--sequential-frames ] [--random-samples ] [--output-json ]"); let fps = args .iter() @@ -413,14 +607,38 @@ fn main() { .and_then(|s| s.parse().ok()) .unwrap_or(100); + let sequential_frames = args + .iter() + .position(|a| a == "--sequential-frames") + .and_then(|i| args.get(i + 1)) + .and_then(|s| s.parse().ok()) + .unwrap_or(100); + + let random_samples = args + .iter() + .position(|a| a == "--random-samples") + .and_then(|i| args.get(i + 1)) + .and_then(|s| s.parse().ok()) + .unwrap_or(50); + + let output_json = args + .iter() + .position(|a| a == "--output-json") + .and_then(|i| args.get(i + 1)) + .map(PathBuf::from); + let config = BenchmarkConfig { video_path, fps, iterations, + sequential_frames, + random_samples, + output_json, }; let rt = Runtime::new().expect("Failed to create Tokio runtime"); - let results = rt.block_on(run_full_benchmark(config)); + let results = rt.block_on(run_full_benchmark(config.clone())); results.print_report(); + write_json_output(&config, &results); } diff --git a/crates/editor/src/audio.rs b/crates/editor/src/audio.rs index d2cdbf8df4..d631b0f9a1 100644 --- a/crates/editor/src/audio.rs +++ b/crates/editor/src/audio.rs @@ -7,7 +7,6 @@ use cap_project::{AudioConfiguration, ClipOffsets, ProjectConfiguration, Timelin use ffmpeg::{ ChannelLayout, Dictionary, format as avformat, frame::Audio as FFAudio, software::resampling, }; -#[cfg(not(target_os = "windows"))] use ringbuf::{ HeapRb, traits::{Consumer, Observer, Producer}, @@ -248,14 +247,12 @@ impl AudioRenderer { } } -#[cfg(not(target_os = "windows"))] pub struct AudioPlaybackBuffer { frame_buffer: AudioRenderer, resampler: AudioResampler, resampled_buffer: HeapRb, } -#[cfg(not(target_os = "windows"))] impl AudioPlaybackBuffer { pub const PLAYBACK_SAMPLES_COUNT: u32 = 512; @@ -296,6 +293,19 @@ impl AudioPlaybackBuffer { self.frame_buffer.set_playhead(playhead, project); } + #[cfg(target_os = "windows")] + pub fn set_playhead_smooth(&mut self, playhead: f64, project: &ProjectConfiguration) { + let current_playhead = self.frame_buffer.elapsed_samples_to_playhead(); + let drift = (playhead - current_playhead).abs(); + + if drift > 0.2 { + self.set_playhead(playhead, project); + return; + } + + self.frame_buffer.set_playhead(playhead, project); + } + #[allow(dead_code)] pub fn current_playhead(&self) -> f64 { self.frame_buffer.elapsed_samples_to_playhead() @@ -426,9 +436,10 @@ impl AudioResampler { }) } - #[cfg(not(target_os = "windows"))] pub fn reset(&mut self) { - *self = Self::new(self.output).unwrap(); + if let Ok(resampler) = Self::new(self.output) { + *self = resampler; + } } fn current_frame_data(&self) -> &[u8] { diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 000f209c6b..4b63f7b7f3 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -1,14 +1,13 @@ -use cap_audio::FromSampleBytes; -#[cfg(not(target_os = "windows"))] -use cap_audio::{LatencyCorrectionConfig, LatencyCorrector, default_output_latency_hint}; +use cap_audio::{ + FromSampleBytes, LatencyCorrectionConfig, LatencyCorrector, default_output_latency_hint, +}; use cap_media::MediaError; use cap_media_info::AudioInfo; -use cap_project::{ProjectConfiguration, XY}; +use cap_project::{ClipOffsets, ProjectConfiguration, XY}; use cap_rendering::{ DecodedSegmentFrames, ProjectUniforms, RenderVideoConstants, ZoomFocusInterpolator, spring_mass_damper::SpringMassDamperSimulationConfig, }; -#[cfg(not(target_os = "windows"))] use cpal::{BufferSize, SupportedBufferSize}; use cpal::{ SampleFormat, @@ -17,9 +16,12 @@ use cpal::{ use futures::stream::{FuturesUnordered, StreamExt}; use lru::LruCache; use std::{ - collections::{HashSet, VecDeque}, + collections::{BTreeMap, HashMap, HashSet, VecDeque}, num::NonZeroUsize, - sync::{Arc, RwLock}, + sync::{ + Arc, RwLock, + atomic::{AtomicBool, Ordering}, + }, time::Duration, }; use tokio::{ @@ -28,7 +30,6 @@ use tokio::{ }; use tracing::{error, info, warn}; -#[cfg(not(target_os = "windows"))] use crate::audio::AudioPlaybackBuffer; use crate::{ audio::AudioSegment, editor, editor_instance::SegmentMedia, segments::get_audio_segments, @@ -36,8 +37,6 @@ use crate::{ const PREFETCH_BUFFER_SIZE: usize = 60; const PARALLEL_DECODE_TASKS: usize = 4; -const MAX_PREFETCH_AHEAD: u32 = 60; -const PREFETCH_BEHIND: u32 = 15; const FRAME_CACHE_SIZE: usize = 60; #[derive(Debug)] @@ -64,12 +63,14 @@ pub enum PlaybackEvent { pub struct PlaybackHandle { stop_tx: watch::Sender, event_rx: watch::Receiver, + seek_tx: watch::Sender, } struct PrefetchedFrame { frame_number: u32, segment_frames: DecodedSegmentFrames, segment_index: u32, + generation: u64, } struct FrameCache { @@ -98,6 +99,115 @@ impl FrameCache { self.cache .put(frame_number, (segment_frames, segment_index)); } + + fn clear(&mut self) { + self.cache.clear(); + } +} + +fn trim_prefetch_buffer(buffer: &mut BTreeMap, current_frame: u32) -> bool { + let mut changed = false; + while buffer.len() > PREFETCH_BUFFER_SIZE { + let far_ahead_frame = buffer + .iter() + .rev() + .find(|(frame, _)| **frame > current_frame.saturating_add(PREFETCH_BUFFER_SIZE as u32)) + .map(|(frame, _)| *frame); + + if let Some(frame) = far_ahead_frame { + buffer.remove(&frame); + changed = true; + continue; + } + + let Some(oldest_frame) = buffer.keys().next().copied() else { + break; + }; + buffer.remove(&oldest_frame); + changed = true; + } + changed +} + +fn insert_prefetched_frame( + buffer: &mut BTreeMap, + prefetched: PrefetchedFrame, + current_frame: u32, +) -> bool { + let inserted_new = insert_prefetched_frame_untrimmed(buffer, prefetched, current_frame); + let trimmed = trim_prefetch_buffer(buffer, current_frame); + inserted_new || trimmed +} + +fn insert_prefetched_frame_untrimmed( + buffer: &mut BTreeMap, + prefetched: PrefetchedFrame, + current_frame: u32, +) -> bool { + if prefetched.frame_number < current_frame { + return false; + } + + let frame_number = prefetched.frame_number; + let inserted_new = match buffer.entry(frame_number) { + std::collections::btree_map::Entry::Vacant(entry) => { + entry.insert(prefetched); + true + } + std::collections::btree_map::Entry::Occupied(_) => false, + }; + inserted_new +} + +fn prune_prefetch_buffer_before_frame( + buffer: &mut BTreeMap, + current_frame: u32, +) { + while let Some((frame, _)) = buffer.first_key_value() { + if *frame >= current_frame { + break; + } + buffer.pop_first(); + } +} + +fn count_contiguous_prefetched_frames( + buffer: &BTreeMap, + start_frame: u32, + limit: usize, +) -> usize { + let mut contiguous = 0usize; + let mut expected_frame = start_frame; + for (frame, _) in buffer.range(start_frame..) { + if *frame != expected_frame { + break; + } + contiguous += 1; + if contiguous >= limit { + break; + } + expected_frame = expected_frame.saturating_add(1); + } + contiguous +} + +fn build_clip_offsets_lookup(project: &ProjectConfiguration) -> HashMap { + project + .clips + .iter() + .map(|clip| (clip.index, clip.offsets)) + .collect() +} + +fn send_watch_u32_if_changed(tx: &watch::Sender, value: u32) { + let _ = tx.send_if_modified(|current| { + if *current == value { + false + } else { + *current = value; + true + } + }); } impl Playback { @@ -118,23 +228,32 @@ impl Playback { let (event_tx, mut event_rx) = watch::channel(PlaybackEvent::Start); event_rx.borrow_and_update(); + let (seek_tx, mut seek_rx) = watch::channel(self.start_frame_number); + seek_rx.borrow_and_update(); let handle = PlaybackHandle { stop_tx: stop_tx.clone(), event_rx, + seek_tx, }; let (prefetch_tx, mut prefetch_rx) = tokio_mpsc::channel::(PREFETCH_BUFFER_SIZE * 2); let (frame_request_tx, mut frame_request_rx) = watch::channel(self.start_frame_number); let (playback_position_tx, playback_position_rx) = watch::channel(self.start_frame_number); + let (seek_generation_tx, mut seek_generation_rx) = watch::channel(0u64); + seek_generation_rx.borrow_and_update(); - let in_flight_frames: Arc>> = Arc::new(RwLock::new(HashSet::new())); - let prefetch_in_flight = in_flight_frames.clone(); - let main_in_flight = in_flight_frames; + let prefetch_in_flight_frames: Arc>> = + Arc::new(RwLock::new(HashSet::new())); + let prefetch_in_flight = prefetch_in_flight_frames.clone(); + let playback_prefetch_in_flight = prefetch_in_flight_frames; + let playback_decode_in_flight: Arc>> = + Arc::new(RwLock::new(HashSet::new())); let prefetch_stop_rx = stop_rx.clone(); let mut prefetch_project = self.project.clone(); + let mut prefetch_seek_generation = seek_generation_rx.clone(); let prefetch_segment_medias = self.segment_medias.clone(); let (prefetch_duration, has_timeline) = if let Some(timeline) = &self.project.borrow().timeline { @@ -153,7 +272,7 @@ impl Playback { } type PrefetchFuture = std::pin::Pin< Box< - dyn std::future::Future)> + dyn std::future::Future)> + Send, >, >; @@ -161,10 +280,36 @@ impl Playback { let mut in_flight: FuturesUnordered = FuturesUnordered::new(); let mut frames_decoded: u32 = 0; let mut prefetched_behind: HashSet = HashSet::new(); - const INITIAL_PARALLEL_TASKS: usize = 4; + let mut prefetched_behind_order: VecDeque = VecDeque::new(); + let mut scheduled_in_flight_frames: HashSet = HashSet::new(); + let mut last_behind_scan_frame: Option = None; const RAMP_UP_AFTER_FRAMES: u32 = 5; + let dynamic_prefetch_ahead = fps.clamp(30, 90).min(PREFETCH_BUFFER_SIZE as u32); + let dynamic_prefetch_behind = (fps / 4).clamp(8, 24); + let dynamic_parallel_tasks = if fps >= 60 { + 6 + } else if fps >= 45 { + 5 + } else { + PARALLEL_DECODE_TASKS + }; + let initial_parallel_tasks = dynamic_parallel_tasks.min(4); + let prefetch_idle_poll_interval = Duration::from_secs_f64(1.0 / fps_f64) + .mul_f64(0.25) + .max(Duration::from_millis(2)) + .min(Duration::from_millis(8)); + let prefetched_behind_capacity = (dynamic_prefetch_behind as usize).saturating_mul(8); + let mut active_generation = *prefetch_seek_generation.borrow(); let mut cached_project = prefetch_project.borrow().clone(); + let mut prefetch_clip_offsets = build_clip_offsets_lookup(&cached_project); + info!( + dynamic_prefetch_ahead, + dynamic_prefetch_behind, + dynamic_parallel_tasks, + prefetch_idle_poll_interval_ms = prefetch_idle_poll_interval.as_secs_f64() * 1000.0, + "Prefetch window configuration" + ); loop { if *prefetch_stop_rx.borrow() { @@ -173,6 +318,26 @@ impl Playback { if prefetch_project.has_changed().unwrap_or(false) { cached_project = prefetch_project.borrow_and_update().clone(); + prefetch_clip_offsets = build_clip_offsets_lookup(&cached_project); + } + + if prefetch_seek_generation.has_changed().unwrap_or(false) { + let generation = *prefetch_seek_generation.borrow_and_update(); + if generation != active_generation { + active_generation = generation; + next_prefetch_frame = *frame_request_rx.borrow(); + frames_decoded = 0; + prefetched_behind.clear(); + prefetched_behind_order.clear(); + last_behind_scan_frame = None; + + if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { + in_flight_guard.clear(); + } + scheduled_in_flight_frames.clear(); + + in_flight = FuturesUnordered::new(); + } } if let Ok(true) = frame_request_rx.has_changed() { @@ -187,26 +352,28 @@ impl Playback { }; next_prefetch_frame = requested; - frames_decoded = 0; - prefetched_behind.clear(); - - if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { - in_flight_guard.clear(); - } - if is_backward_seek || seek_distance > MAX_PREFETCH_AHEAD / 2 { + if is_backward_seek || seek_distance > dynamic_prefetch_ahead / 2 { + frames_decoded = 0; + prefetched_behind.clear(); + prefetched_behind_order.clear(); + last_behind_scan_frame = None; + if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { + in_flight_guard.clear(); + } + scheduled_in_flight_frames.clear(); in_flight = FuturesUnordered::new(); } } } let current_playback_frame = *playback_position_rx.borrow(); - let max_prefetch_frame = current_playback_frame + MAX_PREFETCH_AHEAD; + let max_prefetch_frame = current_playback_frame + dynamic_prefetch_ahead; let effective_parallel = if frames_decoded < RAMP_UP_AFTER_FRAMES { - INITIAL_PARALLEL_TASKS + initial_parallel_tasks } else { - PARALLEL_DECODE_TASKS + dynamic_parallel_tasks }; while in_flight.len() < effective_parallel { @@ -222,11 +389,7 @@ impl Playback { break; } - let already_in_flight = prefetch_in_flight - .read() - .map(|guard| guard.contains(&frame_num)) - .unwrap_or(false); - if already_in_flight { + if scheduled_in_flight_frames.contains(&frame_num) { next_prefetch_frame += 1; continue; } @@ -236,20 +399,20 @@ impl Playback { && let Some(segment_media) = prefetch_segment_medias.get(segment.recording_clip as usize) { - let clip_offsets = cached_project - .clips - .iter() - .find(|v| v.index == segment.recording_clip) - .map(|v| v.offsets) + let clip_offsets = prefetch_clip_offsets + .get(&segment.recording_clip) + .copied() .unwrap_or_default(); let decoders = segment_media.decoders.clone(); let hide_camera = cached_project.camera.hide; let segment_index = segment.recording_clip; let is_initial = frames_decoded < 10; + let generation = active_generation; + scheduled_in_flight_frames.insert(frame_num); if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { - in_flight_guard.insert(frame_num); + in_flight_guard.insert((generation, frame_num)); } in_flight.push(Box::pin(async move { @@ -266,15 +429,18 @@ impl Playback { .get_frames(segment_time as f32, !hide_camera, clip_offsets) .await }; - (frame_num, segment_index, result) + (frame_num, segment_index, generation, result) })); } next_prefetch_frame += 1; } - if in_flight.len() < effective_parallel { - for behind_offset in 1..=PREFETCH_BEHIND { + if in_flight.len() < effective_parallel + && last_behind_scan_frame != Some(current_playback_frame) + { + last_behind_scan_frame = Some(current_playback_frame); + for behind_offset in 1..=dynamic_prefetch_behind { if in_flight.len() >= effective_parallel { break; } @@ -288,11 +454,7 @@ impl Playback { continue; } - let already_in_flight = prefetch_in_flight - .read() - .map(|guard| guard.contains(&behind_frame)) - .unwrap_or(false); - if already_in_flight { + if scheduled_in_flight_frames.contains(&behind_frame) { continue; } @@ -301,27 +463,34 @@ impl Playback { && let Some(segment_media) = prefetch_segment_medias.get(segment.recording_clip as usize) { - let clip_offsets = cached_project - .clips - .iter() - .find(|v| v.index == segment.recording_clip) - .map(|v| v.offsets) + let clip_offsets = prefetch_clip_offsets + .get(&segment.recording_clip) + .copied() .unwrap_or_default(); let decoders = segment_media.decoders.clone(); let hide_camera = cached_project.camera.hide; let segment_index = segment.recording_clip; + let generation = active_generation; + scheduled_in_flight_frames.insert(behind_frame); if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { - in_flight_guard.insert(behind_frame); + in_flight_guard.insert((generation, behind_frame)); } - prefetched_behind.insert(behind_frame); + if prefetched_behind.insert(behind_frame) { + prefetched_behind_order.push_back(behind_frame); + while prefetched_behind_order.len() > prefetched_behind_capacity { + if let Some(evicted) = prefetched_behind_order.pop_front() { + prefetched_behind.remove(&evicted); + } + } + } in_flight.push(Box::pin(async move { let result = decoders .get_frames(segment_time as f32, !hide_camera, clip_offsets) .await; - (behind_frame, segment_index, result) + (behind_frame, segment_index, generation, result) })); } } @@ -330,10 +499,16 @@ impl Playback { tokio::select! { biased; - Some((frame_num, segment_index, result)) = in_flight.next() => { + Some((frame_num, segment_index, generation, result)) = in_flight.next() => { + scheduled_in_flight_frames.remove(&frame_num); if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { - in_flight_guard.remove(&frame_num); + in_flight_guard.remove(&(generation, frame_num)); + } + + if generation != active_generation { + continue; } + frames_decoded = frames_decoded.saturating_add(1); if let Some(segment_frames) = result { @@ -341,6 +516,7 @@ impl Playback { frame_number: frame_num, segment_frames, segment_index, + generation, }).await; } else if frames_decoded <= 5 { warn!( @@ -351,12 +527,13 @@ impl Playback { } } - _ = tokio::time::sleep(Duration::from_millis(1)), if in_flight.is_empty() => {} + _ = tokio::time::sleep(prefetch_idle_poll_interval), if in_flight.is_empty() => {} } } }); tokio::spawn(async move { + let playback_task_start = Instant::now(); let duration = if let Some(timeline) = &self.project.borrow().timeline { timeline.duration() } else { @@ -378,24 +555,64 @@ impl Playback { .spawn(); let frame_duration = Duration::from_secs_f64(1.0 / fps_f64); + let frame_fetch_timeout = frame_duration + .mul_f64(4.0) + .max(Duration::from_millis(20)) + .min(Duration::from_millis(80)); + let in_flight_poll_interval = frame_duration + .mul_f64(0.25) + .max(Duration::from_millis(1)) + .min(Duration::from_millis(4)); let mut frame_number = self.start_frame_number; - let mut prefetch_buffer: VecDeque = - VecDeque::with_capacity(PREFETCH_BUFFER_SIZE); + let mut prefetch_buffer: BTreeMap = BTreeMap::new(); let mut frame_cache = FrameCache::new(FRAME_CACHE_SIZE); - let aggressive_skip_threshold = 10u32; + let mut seek_generation = 0u64; + let base_skip_threshold = (fps / 6).clamp(6, 16); + let mut late_streak = 0u32; + let mut skip_events = 0u64; let mut total_frames_rendered = 0u64; - let mut _total_frames_skipped = 0u64; - - let warmup_target_frames = 20usize; - let warmup_after_first_timeout = Duration::from_millis(1000); + let mut total_frames_skipped = 0u64; + let mut first_render_logged = false; + let mut pending_seek_observation: Option<(u32, Instant)> = None; + + let warmup_target_frames = (fps.saturating_div(4)).clamp(8, 16) as usize; + let warmup_after_first_timeout = frame_duration + .mul_f64((warmup_target_frames as f64) * 2.0) + .max(Duration::from_millis(200)) + .min(Duration::from_millis(700)); let warmup_no_frames_timeout = Duration::from_secs(5); - let warmup_start = Instant::now(); + let warmup_idle_poll_interval = frame_duration + .mul_f64(0.5) + .max(Duration::from_millis(8)) + .min(Duration::from_millis(25)); + let mut warmup_start = Instant::now(); let mut first_frame_time: Option = None; + let mut warmup_contiguous_prefetched = 0usize; + let mut warmup_buffer_changed = false; + info!( + warmup_target_frames, + warmup_after_first_timeout_ms = warmup_after_first_timeout.as_secs_f64() * 1000.0, + warmup_idle_poll_interval_ms = warmup_idle_poll_interval.as_secs_f64() * 1000.0, + "Playback warmup configuration" + ); while !*stop_rx.borrow() { + if first_frame_time.is_some() && warmup_buffer_changed { + warmup_contiguous_prefetched = count_contiguous_prefetched_frames( + &prefetch_buffer, + frame_number, + warmup_target_frames, + ); + warmup_buffer_changed = false; + } + let contiguous_prefetched = if first_frame_time.is_some() { + warmup_contiguous_prefetched + } else { + 0 + }; let should_start = if let Some(first_time) = first_frame_time { - prefetch_buffer.len() >= warmup_target_frames + contiguous_prefetched >= warmup_target_frames || first_time.elapsed() > warmup_after_first_timeout } else { false @@ -416,11 +633,58 @@ impl Playback { tokio::select! { Some(prefetched) = prefetch_rx.recv() => { - if prefetched.frame_number >= frame_number { - prefetch_buffer.push_back(prefetched); - if first_frame_time.is_none() { - first_frame_time = Some(Instant::now()); + let mut next_prefetched = Some(prefetched); + let mut prefetched_batch_changed = false; + + loop { + let Some(prefetched) = next_prefetched.take() else { + break; + }; + + if prefetched.generation == seek_generation + && insert_prefetched_frame_untrimmed( + &mut prefetch_buffer, + prefetched, + frame_number, + ) + { + prefetched_batch_changed = true; } + + next_prefetched = prefetch_rx.try_recv().ok(); + } + + if trim_prefetch_buffer(&mut prefetch_buffer, frame_number) { + prefetched_batch_changed = true; + } + + if prefetched_batch_changed { + warmup_buffer_changed = true; + } + + if first_frame_time.is_none() && !prefetch_buffer.is_empty() { + first_frame_time = Some(Instant::now()); + } + } + _ = seek_rx.changed() => { + let seek_frame = *seek_rx.borrow_and_update(); + seek_generation = seek_generation.saturating_add(1); + frame_number = seek_frame; + prefetch_buffer.clear(); + frame_cache.clear(); + warmup_contiguous_prefetched = 0; + warmup_buffer_changed = false; + first_frame_time = None; + warmup_start = Instant::now(); + let _ = seek_generation_tx.send(seek_generation); + send_watch_u32_if_changed(&frame_request_tx, frame_number); + send_watch_u32_if_changed(&playback_position_tx, frame_number); + if has_audio + && audio_playhead_tx + .send(frame_number as f64 / fps_f64) + .is_err() + { + break; } } _ = stop_rx.changed() => { @@ -428,48 +692,85 @@ impl Playback { break; } } - _ = tokio::time::sleep(Duration::from_millis(100)) => { + _ = tokio::time::sleep(warmup_idle_poll_interval) => { } } } - prefetch_buffer - .make_contiguous() - .sort_by_key(|p| p.frame_number); - - let start = Instant::now(); + let mut playback_anchor_start = Instant::now(); + let mut playback_anchor_frame = frame_number; let mut cached_project = self.project.borrow().clone(); + let mut playback_clip_offsets = build_clip_offsets_lookup(&cached_project); 'playback: loop { + if seek_rx.has_changed().unwrap_or(false) { + let seek_frame = *seek_rx.borrow_and_update(); + seek_generation = seek_generation.saturating_add(1); + frame_number = seek_frame; + playback_anchor_start = Instant::now(); + playback_anchor_frame = seek_frame; + pending_seek_observation = Some((seek_frame, Instant::now())); + prefetch_buffer.clear(); + frame_cache.clear(); + let _ = seek_generation_tx.send(seek_generation); + send_watch_u32_if_changed(&frame_request_tx, frame_number); + send_watch_u32_if_changed(&playback_position_tx, frame_number); + if has_audio + && audio_playhead_tx + .send(frame_number as f64 / fps_f64) + .is_err() + { + break 'playback; + } + } + if self.project.has_changed().unwrap_or(false) { cached_project = self.project.borrow_and_update().clone(); + playback_clip_offsets = build_clip_offsets_lookup(&cached_project); } + let mut drained_prefetch_changed = false; while let Ok(prefetched) = prefetch_rx.try_recv() { - if prefetched.frame_number >= frame_number { - prefetch_buffer.push_back(prefetched); - while prefetch_buffer.len() > PREFETCH_BUFFER_SIZE { - if let Some(idx) = prefetch_buffer - .iter() - .enumerate() - .filter(|(_, p)| { - p.frame_number > frame_number + PREFETCH_BUFFER_SIZE as u32 - }) - .max_by_key(|(_, p)| p.frame_number) - .map(|(i, _)| i) - { - prefetch_buffer.remove(idx); - } else { - prefetch_buffer.pop_front(); - } + if prefetched.generation == seek_generation { + if insert_prefetched_frame_untrimmed( + &mut prefetch_buffer, + prefetched, + frame_number, + ) { + drained_prefetch_changed = true; } } } + if drained_prefetch_changed { + let _ = trim_prefetch_buffer(&mut prefetch_buffer, frame_number); + } + prune_prefetch_buffer_before_frame(&mut prefetch_buffer, frame_number); - let frame_offset = frame_number.saturating_sub(self.start_frame_number) as f64; - let next_deadline = start + frame_duration.mul_f64(frame_offset); + let frame_offset = frame_number.saturating_sub(playback_anchor_frame) as f64; + let next_deadline = playback_anchor_start + frame_duration.mul_f64(frame_offset); tokio::select! { _ = stop_rx.changed() => break 'playback, + _ = seek_rx.changed() => { + let seek_frame = *seek_rx.borrow_and_update(); + seek_generation = seek_generation.saturating_add(1); + frame_number = seek_frame; + playback_anchor_start = Instant::now(); + playback_anchor_frame = seek_frame; + pending_seek_observation = Some((seek_frame, Instant::now())); + prefetch_buffer.clear(); + frame_cache.clear(); + let _ = seek_generation_tx.send(seek_generation); + send_watch_u32_if_changed(&frame_request_tx, frame_number); + send_watch_u32_if_changed(&playback_position_tx, frame_number); + if has_audio + && audio_playhead_tx + .send(frame_number as f64 / fps_f64) + .is_err() + { + break 'playback; + } + continue; + } _ = tokio::time::sleep_until(next_deadline) => {} } @@ -488,43 +789,60 @@ impl Playback { was_cached = true; Some(cached) } else { - let prefetched_idx = prefetch_buffer - .iter() - .position(|p| p.frame_number == frame_number); - - if let Some(idx) = prefetched_idx { - let prefetched = prefetch_buffer.remove(idx).unwrap(); + if let Some(prefetched) = prefetch_buffer.remove(&frame_number) { Some(( Arc::new(prefetched.segment_frames), prefetched.segment_index, )) } else { - let is_in_flight = main_in_flight + let in_flight_key = (seek_generation, frame_number); + let is_in_flight = playback_prefetch_in_flight .read() - .map(|guard| guard.contains(&frame_number)) - .unwrap_or(false); + .map(|guard| guard.contains(&in_flight_key)) + .unwrap_or(false) + || playback_decode_in_flight + .read() + .map(|guard| guard.contains(&in_flight_key)) + .unwrap_or(false); if is_in_flight { let wait_start = Instant::now(); - let max_wait = Duration::from_millis(200); + let max_wait = frame_fetch_timeout; let mut found_frame = None; + let mut buffered_wait_prefetch_changed = false; while wait_start.elapsed() < max_wait { tokio::select! { _ = stop_rx.changed() => break 'playback, Some(prefetched) = prefetch_rx.recv() => { + if prefetched.generation != seek_generation { + continue; + } if prefetched.frame_number == frame_number { found_frame = Some(prefetched); break; - } else if prefetched.frame_number >= self.start_frame_number { - prefetch_buffer.push_back(prefetched); + } else if prefetched.frame_number >= frame_number { + if insert_prefetched_frame_untrimmed( + &mut prefetch_buffer, + prefetched, + frame_number, + ) { + buffered_wait_prefetch_changed = true; + } } } - _ = tokio::time::sleep(Duration::from_millis(5)) => { - let still_in_flight = main_in_flight + _ = tokio::time::sleep(in_flight_poll_interval) => { + if seek_rx.has_changed().unwrap_or(false) { + break; + } + let still_in_flight = playback_prefetch_in_flight .read() - .map(|guard| guard.contains(&frame_number)) - .unwrap_or(false); + .map(|guard| guard.contains(&in_flight_key)) + .unwrap_or(false) + || playback_decode_in_flight + .read() + .map(|guard| guard.contains(&in_flight_key)) + .unwrap_or(false); if !still_in_flight { break; } @@ -532,54 +850,79 @@ impl Playback { } } + if buffered_wait_prefetch_changed { + let _ = trim_prefetch_buffer(&mut prefetch_buffer, frame_number); + } + + if seek_rx.has_changed().unwrap_or(false) { + continue; + } + if let Some(prefetched) = found_frame { Some(( Arc::new(prefetched.segment_frames), prefetched.segment_index, )) } else { - let prefetched_idx = prefetch_buffer - .iter() - .position(|p| p.frame_number == frame_number); - if let Some(idx) = prefetched_idx { - let prefetched = prefetch_buffer.remove(idx).unwrap(); + if let Some(prefetched) = prefetch_buffer.remove(&frame_number) { Some(( Arc::new(prefetched.segment_frames), prefetched.segment_index, )) } else { frame_number = frame_number.saturating_add(1); - _total_frames_skipped += 1; + total_frames_skipped += 1; continue; } } } else if prefetch_buffer.is_empty() && total_frames_rendered < 15 { - let _ = frame_request_tx.send(frame_number); + if seek_rx.has_changed().unwrap_or(false) { + continue; + } - let wait_result = tokio::time::timeout( - Duration::from_millis(200), - prefetch_rx.recv(), - ) - .await; + send_watch_u32_if_changed(&frame_request_tx, frame_number); + + let wait_result = + tokio::time::timeout(frame_fetch_timeout, prefetch_rx.recv()).await; + + if seek_rx.has_changed().unwrap_or(false) { + continue; + } if let Ok(Some(prefetched)) = wait_result { + if prefetched.generation != seek_generation { + frame_number = frame_number.saturating_add(1); + total_frames_skipped += 1; + continue; + } if prefetched.frame_number == frame_number { Some(( Arc::new(prefetched.segment_frames), prefetched.segment_index, )) } else { - prefetch_buffer.push_back(prefetched); + let _ = insert_prefetched_frame( + &mut prefetch_buffer, + prefetched, + frame_number, + ); frame_number = frame_number.saturating_add(1); - _total_frames_skipped += 1; + total_frames_skipped += 1; continue; } } else { + if seek_rx.has_changed().unwrap_or(false) { + continue; + } frame_number = frame_number.saturating_add(1); - _total_frames_skipped += 1; + total_frames_skipped += 1; continue; } } else { + if seek_rx.has_changed().unwrap_or(false) { + continue; + } + let Some((segment_time, segment)) = cached_project.get_segment_time(playback_time) else { @@ -593,43 +936,45 @@ impl Playback { continue; }; - let clip_offsets = cached_project - .clips - .iter() - .find(|v| v.index == segment.recording_clip) - .map(|v| v.offsets) + let clip_offsets = playback_clip_offsets + .get(&segment.recording_clip) + .copied() .unwrap_or_default(); - if let Ok(mut guard) = main_in_flight.write() { - guard.insert(frame_number); + if let Ok(mut guard) = playback_decode_in_flight.write() { + guard.insert(in_flight_key); } - let max_wait = Duration::from_millis(200); + let max_wait = frame_fetch_timeout; let data = tokio::select! { _ = stop_rx.changed() => { - if let Ok(mut guard) = main_in_flight.write() { - guard.remove(&frame_number); + if let Ok(mut guard) = playback_decode_in_flight.write() { + guard.remove(&in_flight_key); } break 'playback }, _ = tokio::time::sleep(max_wait) => { - if let Ok(mut guard) = main_in_flight.write() { - guard.remove(&frame_number); + if let Ok(mut guard) = playback_decode_in_flight.write() { + guard.remove(&in_flight_key); } frame_number = frame_number.saturating_add(1); - _total_frames_skipped += 1; + total_frames_skipped += 1; continue; }, data = segment_media .decoders .get_frames(segment_time as f32, !cached_project.camera.hide, clip_offsets) => { - if let Ok(mut guard) = main_in_flight.write() { - guard.remove(&frame_number); + if let Ok(mut guard) = playback_decode_in_flight.write() { + guard.remove(&in_flight_key); } data }, }; + if seek_rx.has_changed().unwrap_or(false) { + continue; + } + data.map(|frames| (Arc::new(frames), segment.recording_clip)) } } @@ -685,6 +1030,25 @@ impl Playback { .await; total_frames_rendered += 1; + if !first_render_logged { + first_render_logged = true; + info!( + first_render_latency_ms = + playback_task_start.elapsed().as_secs_f64() * 1000.0, + "Playback rendered first frame" + ); + } + if let Some((seek_target_frame, seek_started_at)) = pending_seek_observation + && frame_number >= seek_target_frame + { + info!( + seek_target_frame, + rendered_frame = frame_number, + seek_settle_ms = seek_started_at.elapsed().as_secs_f64() * 1000.0, + "Playback seek settled" + ); + pending_seek_observation = None; + } } event_tx.send(PlaybackEvent::Frame(frame_number)).ok(); @@ -699,23 +1063,28 @@ impl Playback { break 'playback; } - let expected_frame = self.start_frame_number - + (start.elapsed().as_secs_f64() * fps_f64).floor() as u32; + let expected_frame = playback_anchor_frame + + (playback_anchor_start.elapsed().as_secs_f64() * fps_f64).floor() as u32; if frame_number < expected_frame { let frames_behind = expected_frame - frame_number; + late_streak = late_streak.saturating_add(1); + let threshold_reduction = (late_streak / 12).min(base_skip_threshold); + let dynamic_skip_threshold = + base_skip_threshold.saturating_sub(threshold_reduction); - if frames_behind <= aggressive_skip_threshold { + if frames_behind <= dynamic_skip_threshold { continue; } let skipped = frames_behind.saturating_sub(1); if skipped > 0 { frame_number += skipped; - _total_frames_skipped += skipped as u64; + total_frames_skipped += skipped as u64; + skip_events = skip_events.saturating_add(1); - prefetch_buffer.retain(|p| p.frame_number >= frame_number); - let _ = frame_request_tx.send(frame_number); + prune_prefetch_buffer_before_frame(&mut prefetch_buffer, frame_number); + send_watch_u32_if_changed(&frame_request_tx, frame_number); let _ = playback_position_tx.send(frame_number); if has_audio && audio_playhead_tx @@ -724,10 +1093,29 @@ impl Playback { { break 'playback; } + + if skipped >= fps.saturating_div(2) || skip_events % 120 == 0 { + info!( + skipped_frames = skipped, + frames_behind, + dynamic_skip_threshold, + late_streak, + total_frames_skipped, + skip_events, + "Playback applied frame skip catch-up" + ); + } } + } else { + late_streak = 0; } } + info!( + total_frames_rendered, + total_frames_skipped, skip_events, "Playback loop completed" + ); + stop_tx.send(true).ok(); event_tx.send(PlaybackEvent::Stop).ok(); @@ -742,6 +1130,17 @@ impl PlaybackHandle { self.stop_tx.send(true).ok(); } + pub fn seek(&self, frame_number: u32) { + let _ = self.seek_tx.send_if_modified(|current_frame| { + if *current_frame == frame_number { + false + } else { + *current_frame = frame_number; + true + } + }); + } + pub async fn receive_event(&mut self) -> watch::Ref<'_, PlaybackEvent> { self.event_rx.changed().await.ok(); self.event_rx.borrow_and_update() @@ -759,6 +1158,12 @@ struct AudioPlayback { } impl AudioPlayback { + fn use_prerendered_audio() -> bool { + std::env::var("CAP_AUDIO_PRERENDER_PLAYBACK") + .map(|value| value == "1" || value.eq_ignore_ascii_case("true")) + .unwrap_or(false) + } + fn spawn(self) -> bool { let handle = tokio::runtime::Handle::current(); @@ -768,6 +1173,7 @@ impl AudioPlayback { } std::thread::spawn(move || { + let audio_thread_start = Instant::now(); let host = cpal::default_host(); let device = match host.default_output_device() { Some(d) => d, @@ -787,26 +1193,80 @@ impl AudioPlayback { } }; + let use_prerendered_audio = Self::use_prerendered_audio(); let duration_secs = self.duration_secs; + if use_prerendered_audio { + info!("Using pre-rendered audio playback mode"); + } else { + info!("Using low-latency streaming audio playback mode"); + } let result = match supported_config.sample_format() { SampleFormat::I16 => { - self.create_stream_prerendered::(device, supported_config, duration_secs) + if use_prerendered_audio { + self.create_stream_prerendered::( + device, + supported_config, + duration_secs, + ) + } else { + self.create_stream::(device, supported_config) + } } SampleFormat::I32 => { - self.create_stream_prerendered::(device, supported_config, duration_secs) + if use_prerendered_audio { + self.create_stream_prerendered::( + device, + supported_config, + duration_secs, + ) + } else { + self.create_stream::(device, supported_config) + } } SampleFormat::F32 => { - self.create_stream_prerendered::(device, supported_config, duration_secs) + if use_prerendered_audio { + self.create_stream_prerendered::( + device, + supported_config, + duration_secs, + ) + } else { + self.create_stream::(device, supported_config) + } } SampleFormat::I64 => { - self.create_stream_prerendered::(device, supported_config, duration_secs) + if use_prerendered_audio { + self.create_stream_prerendered::( + device, + supported_config, + duration_secs, + ) + } else { + self.create_stream::(device, supported_config) + } } SampleFormat::U8 => { - self.create_stream_prerendered::(device, supported_config, duration_secs) + if use_prerendered_audio { + self.create_stream_prerendered::( + device, + supported_config, + duration_secs, + ) + } else { + self.create_stream::(device, supported_config) + } } SampleFormat::F64 => { - self.create_stream_prerendered::(device, supported_config, duration_secs) + if use_prerendered_audio { + self.create_stream_prerendered::( + device, + supported_config, + duration_secs, + ) + } else { + self.create_stream::(device, supported_config) + } } format => { error!( @@ -828,6 +1288,10 @@ impl AudioPlayback { } }; + info!( + startup_prepare_ms = audio_thread_start.elapsed().as_secs_f64() * 1000.0, + "Audio stream prepared, starting playback stream" + ); if let Err(e) = stream.play() { error!( "Failed to play audio stream: {}. Skipping audio playback.", @@ -843,7 +1307,6 @@ impl AudioPlayback { true } - #[cfg(not(target_os = "windows"))] #[allow(dead_code)] fn create_stream( self, @@ -999,6 +1462,8 @@ impl AudioPlayback { let mut latency_corrector = LatencyCorrector::new(static_latency_hint, latency_config); let initial_compensation_secs = latency_corrector.initial_compensation_secs(); let device_sample_rate = sample_rate; + let stream_build_start = Instant::now(); + let callback_started = Arc::new(AtomicBool::new(false)); { let project_snapshot = project.borrow(); @@ -1036,6 +1501,7 @@ impl AudioPlayback { let headroom_for_stream = headroom_samples; let mut playhead_rx_for_stream = playhead_rx.clone(); let mut last_video_playhead = playhead; + let callback_started_for_stream = callback_started.clone(); #[cfg(target_os = "windows")] const FIXED_LATENCY_SECS: f64 = 0.08; @@ -1055,6 +1521,13 @@ impl AudioPlayback { let stream_result = device.build_output_stream( &config, move |buffer: &mut [T], info| { + if !callback_started_for_stream.swap(true, Ordering::Relaxed) { + info!( + startup_to_callback_ms = + stream_build_start.elapsed().as_secs_f64() * 1000.0, + "Audio output callback started" + ); + } #[cfg(not(target_os = "windows"))] let latency_secs = latency_corrector.update_from_callback(info); #[cfg(target_os = "windows")] diff --git a/crates/recording/examples/playback-test-runner.rs b/crates/recording/examples/playback-test-runner.rs index 437b3844a3..16865ae654 100644 --- a/crates/recording/examples/playback-test-runner.rs +++ b/crates/recording/examples/playback-test-runner.rs @@ -4,6 +4,7 @@ use cap_project::{RecordingMeta, RecordingMetaInner, StudioRecordingMeta}; use cap_rendering::decoder::spawn_decoder; use chrono::{Local, Utc}; use clap::{Parser, Subcommand}; +use serde::Serialize; use std::{ fs, path::{Path, PathBuf}, @@ -38,12 +39,18 @@ struct Cli { #[arg(long, global = true, default_value = "30")] fps: u32, + #[arg(long, global = true, default_value_t = STARTUP_TO_FIRST_FRAME_WARNING_MS)] + startup_threshold_ms: f64, + #[arg(long, global = true)] verbose: bool, #[arg(long, global = true)] benchmark_output: bool, + #[arg(long, global = true)] + json_output: Option, + #[arg(long, global = true)] notes: Option, } @@ -53,6 +60,7 @@ enum Commands { Full, Decoder, Playback, + Scrub, AudioSync, CameraSync, List, @@ -60,10 +68,12 @@ enum Commands { const FPS_TOLERANCE: f64 = 2.0; const DECODE_LATENCY_WARNING_MS: f64 = 50.0; +const SCRUB_SEEK_WARNING_MS: f64 = 40.0; +const STARTUP_TO_FIRST_FRAME_WARNING_MS: f64 = 250.0; const AUDIO_VIDEO_SYNC_TOLERANCE_MS: f64 = 100.0; const CAMERA_SYNC_TOLERANCE_MS: f64 = 100.0; -#[derive(Debug, Clone, Default)] +#[derive(Debug, Clone, Default, Serialize)] struct DecoderTestResult { passed: bool, decoder_type: String, @@ -75,13 +85,15 @@ struct DecoderTestResult { errors: Vec, } -#[derive(Debug, Clone, Default)] +#[derive(Debug, Clone, Default, Serialize)] struct PlaybackTestResult { passed: bool, segment_index: usize, total_frames: usize, decoded_frames: usize, failed_frames: usize, + first_frame_decode_time_ms: f64, + startup_to_first_frame_ms: f64, avg_decode_time_ms: f64, min_decode_time_ms: f64, max_decode_time_ms: f64, @@ -93,10 +105,28 @@ struct PlaybackTestResult { fps_ok: bool, jitter_ms: f64, decode_latency_ok: bool, + startup_latency_ok: bool, + startup_threshold_ms: f64, + errors: Vec, +} + +#[derive(Debug, Clone, Default, Serialize)] +struct ScrubTestResult { + passed: bool, + segment_index: usize, + seek_operations: usize, + successful_seeks: usize, + failed_seeks: usize, + avg_seek_time_ms: f64, + p50_seek_time_ms: f64, + p95_seek_time_ms: f64, + p99_seek_time_ms: f64, + max_seek_time_ms: f64, + seek_latency_ok: bool, errors: Vec, } -#[derive(Debug, Clone, Default)] +#[derive(Debug, Clone, Default, Serialize)] struct AudioSyncTestResult { passed: bool, segment_index: usize, @@ -114,7 +144,7 @@ struct AudioSyncTestResult { errors: Vec, } -#[derive(Debug, Clone, Default)] +#[derive(Debug, Clone, Default, Serialize)] struct CameraSyncTestResult { passed: bool, segment_index: usize, @@ -130,7 +160,7 @@ struct CameraSyncTestResult { errors: Vec, } -#[derive(Debug, Clone, Default)] +#[derive(Debug, Clone, Default, Serialize)] struct RecordingTestReport { recording_path: PathBuf, recording_name: String, @@ -141,6 +171,7 @@ struct RecordingTestReport { has_system_audio: bool, decoder_results: Vec, playback_results: Vec, + scrub_results: Vec, audio_sync_results: Vec, camera_sync_results: Vec, overall_passed: bool, @@ -208,17 +239,49 @@ impl RecordingTestReport { result.p95_decode_time_ms, result.p99_decode_time_ms ); + println!( + " Startup: first_decode={:.1}ms startup_to_first={:.1}ms", + result.first_frame_decode_time_ms, result.startup_to_first_frame_ms + ); if !result.fps_ok { println!(" WARN: FPS outside tolerance!"); } if !result.decode_latency_ok { println!(" WARN: Decode latency exceeds {DECODE_LATENCY_WARNING_MS}ms!"); } + if !result.startup_latency_ok { + println!( + " WARN: Startup-to-first-frame exceeds {:.1}ms!", + result.startup_threshold_ms + ); + } for err in &result.errors { println!(" ERROR: {err}"); } } + if !self.scrub_results.is_empty() { + println!("\n SCRUB TESTS:"); + for result in &self.scrub_results { + let status = if result.passed { "OK" } else { "FAIL" }; + println!( + " Segment {}: [{}] seeks={}/{} avg={:.1}ms p95={:.1}ms", + result.segment_index, + status, + result.successful_seeks, + result.seek_operations, + result.avg_seek_time_ms, + result.p95_seek_time_ms + ); + if !result.seek_latency_ok { + println!(" WARN: Scrub seek latency exceeds {SCRUB_SEEK_WARNING_MS}ms!"); + } + for err in &result.errors { + println!(" ERROR: {err}"); + } + } + } + if !self.audio_sync_results.is_empty() { println!("\n AUDIO SYNC TESTS:"); for result in &self.audio_sync_results { @@ -339,11 +402,14 @@ async fn test_playback( meta: &StudioRecordingMeta, segment_index: usize, fps: u32, + startup_threshold_ms: f64, verbose: bool, ) -> PlaybackTestResult { + let playback_start = Instant::now(); let mut result = PlaybackTestResult { segment_index, expected_fps: fps as f64, + startup_threshold_ms, ..Default::default() }; @@ -384,6 +450,11 @@ async fn test_playback( let decode_time_ms = start.elapsed().as_secs_f64() * 1000.0; decode_times.push(decode_time_ms); decoded_count += 1; + if decoded_count == 1 { + result.first_frame_decode_time_ms = decode_time_ms; + result.startup_to_first_frame_ms = + playback_start.elapsed().as_secs_f64() * 1000.0; + } if frame.width() == 0 || frame.height() == 0 { result @@ -439,15 +510,107 @@ async fn test_playback( result.fps_ok = (result.effective_fps - result.expected_fps).abs() <= FPS_TOLERANCE || result.effective_fps >= result.expected_fps; result.decode_latency_ok = result.p95_decode_time_ms <= DECODE_LATENCY_WARNING_MS; + result.startup_latency_ok = result.startup_to_first_frame_ms <= startup_threshold_ms; result.passed = result.fps_ok && result.decode_latency_ok + && result.startup_latency_ok && result.failed_frames == 0 && result.decoded_frames > 0; result } +async fn test_scrub( + recording_meta: &RecordingMeta, + meta: &StudioRecordingMeta, + segment_index: usize, + fps: u32, + verbose: bool, +) -> ScrubTestResult { + let mut result = ScrubTestResult { + segment_index, + seek_operations: 120, + ..Default::default() + }; + + let display_path = match meta { + StudioRecordingMeta::SingleSegment { segment } => { + recording_meta.path(&segment.display.path) + } + StudioRecordingMeta::MultipleSegments { inner } => { + recording_meta.path(&inner.segments[segment_index].display.path) + } + }; + + let decoder = match spawn_decoder("display", display_path.clone(), fps, 0.0, false).await { + Ok(d) => d, + Err(e) => { + result.errors.push(format!("Failed to create decoder: {e}")); + return result; + } + }; + + let duration_secs = get_video_duration(&display_path); + let total_frames = (duration_secs * fps as f64).ceil() as usize; + if total_frames < 2 { + result + .errors + .push("Video duration too short for scrub benchmark".to_string()); + return result; + } + + let mut seek_times = Vec::with_capacity(result.seek_operations); + + for operation in 0..result.seek_operations { + let target_frame = ((operation * 7919) % total_frames).max(1); + let target_time = target_frame as f32 / fps as f32; + let seek_start = Instant::now(); + match decoder.get_frame(target_time).await { + Some(_) => { + let seek_time_ms = seek_start.elapsed().as_secs_f64() * 1000.0; + seek_times.push(seek_time_ms); + result.successful_seeks += 1; + if verbose && operation % 20 == 0 { + println!( + " Scrub {} / {}: frame={} time={:.3}s seek={:.1}ms", + operation + 1, + result.seek_operations, + target_frame, + target_time, + seek_time_ms + ); + } + } + None => { + result.failed_seeks += 1; + if verbose { + println!( + " Scrub {} / {}: frame={} FAILED", + operation + 1, + result.seek_operations, + target_frame + ); + } + } + } + } + + if !seek_times.is_empty() { + result.avg_seek_time_ms = seek_times.iter().sum::() / seek_times.len() as f64; + result.p50_seek_time_ms = percentile(&seek_times, 50.0); + result.p95_seek_time_ms = percentile(&seek_times, 95.0); + result.p99_seek_time_ms = percentile(&seek_times, 99.0); + result.max_seek_time_ms = seek_times.iter().copied().fold(f64::NEG_INFINITY, f64::max); + } + + result.seek_latency_ok = result.p95_seek_time_ms <= SCRUB_SEEK_WARNING_MS; + result.passed = + result.seek_latency_ok && result.failed_seeks == 0 && result.successful_seeks > 0; + + result +} + async fn test_audio_sync( recording_meta: &RecordingMeta, meta: &StudioRecordingMeta, @@ -733,8 +896,10 @@ fn discover_recordings(input_dir: &Path) -> Vec { async fn run_tests_on_recording( recording_path: &Path, fps: u32, + startup_threshold_ms: f64, run_decoder: bool, run_playback: bool, + run_scrub: bool, run_audio_sync: bool, run_camera_sync: bool, verbose: bool, @@ -821,11 +986,27 @@ async fn run_tests_on_recording( if verbose { println!(" Testing playback for segment {segment_idx}..."); } - let playback_result = - test_playback(&meta, studio_meta.as_ref(), segment_idx, fps, verbose).await; + let playback_result = test_playback( + &meta, + studio_meta.as_ref(), + segment_idx, + fps, + startup_threshold_ms, + verbose, + ) + .await; report.playback_results.push(playback_result); } + if run_scrub { + if verbose { + println!(" Testing scrub performance for segment {segment_idx}..."); + } + let scrub_result = + test_scrub(&meta, studio_meta.as_ref(), segment_idx, fps, verbose).await; + report.scrub_results.push(scrub_result); + } + if run_audio_sync { if verbose { println!(" Testing audio sync for segment {segment_idx}..."); @@ -848,21 +1029,40 @@ async fn run_tests_on_recording( let decoder_ok = report.decoder_results.iter().all(|r| r.passed); let playback_ok = report.playback_results.iter().all(|r| r.passed); + let scrub_ok = report.scrub_results.iter().all(|r| r.passed); let audio_ok = report.audio_sync_results.iter().all(|r| r.passed); let camera_ok = report.camera_sync_results.iter().all(|r| r.passed); - report.overall_passed = decoder_ok && playback_ok && audio_ok && camera_ok; + report.overall_passed = decoder_ok && playback_ok && scrub_ok && audio_ok && camera_ok; Ok(report) } -#[derive(Debug)] +#[derive(Debug, Clone, Serialize)] struct SystemInfo { os: String, arch: String, cpu: String, } +#[derive(Debug, Serialize)] +struct JsonBenchmarkSummary { + total_recordings: usize, + passed_recordings: usize, + failed_recordings: usize, +} + +#[derive(Debug, Serialize)] +struct JsonBenchmarkOutput { + generated_at_utc: String, + local_time: String, + command: String, + notes: Option, + system: SystemInfo, + summary: JsonBenchmarkSummary, + reports: Vec, +} + impl SystemInfo { fn collect() -> Self { let mut sys = System::new_all(); @@ -906,6 +1106,19 @@ fn get_failure_tags(report: &RecordingTestReport) -> Vec { if report.playback_results.iter().any(|r| !r.decode_latency_ok) { tags.push("LATENCY".to_string()); } + if report + .playback_results + .iter() + .any(|r| !r.startup_latency_ok) + { + tags.push("STARTUP".to_string()); + } + if report.scrub_results.iter().any(|r| !r.seek_latency_ok) { + tags.push("SCRUB_LATENCY".to_string()); + } + if report.scrub_results.iter().any(|r| r.failed_seeks > 0) { + tags.push("SCRUB_ERRORS".to_string()); + } if report.playback_results.iter().any(|r| r.failed_frames > 0) { tags.push("DECODE_ERRORS".to_string()); } @@ -1007,6 +1220,16 @@ fn report_to_markdown(report: &RecordingTestReport) -> String { result.p99_decode_time_ms, result.max_decode_time_ms )); + md.push_str(&format!( + "| ↳ Startup | {} | first_decode={:.1}ms startup_to_first={:.1}ms |\n", + if result.startup_latency_ok { + "✅" + } else { + "❌" + }, + result.first_frame_decode_time_ms, + result.startup_to_first_frame_ms + )); if result.failed_frames > 0 { md.push_str(&format!( "| ↳ Failed Frames | ⚠️ | {} |\n", @@ -1015,6 +1238,31 @@ fn report_to_markdown(report: &RecordingTestReport) -> String { } } + for result in &report.scrub_results { + md.push_str(&format!( + "| Scrub Seg {} | {} | seeks={}/{} avg={:.1}ms p95={:.1}ms p99={:.1}ms |\n", + result.segment_index, + if result.passed { "✅" } else { "❌" }, + result.successful_seeks, + result.seek_operations, + result.avg_seek_time_ms, + result.p95_seek_time_ms, + result.p99_seek_time_ms + )); + md.push_str(&format!( + "| ↳ Scrub Latency | {} | max={:.1}ms threshold={:.1}ms |\n", + if result.seek_latency_ok { "✅" } else { "❌" }, + result.max_seek_time_ms, + SCRUB_SEEK_WARNING_MS + )); + if result.failed_seeks > 0 { + md.push_str(&format!( + "| ↳ Scrub Failures | ⚠️ | {} |\n", + result.failed_seeks + )); + } + } + for result in &report.audio_sync_results { if result.has_mic_audio { let status = if result.mic_sync_ok { "✅" } else { "❌" }; @@ -1185,6 +1433,41 @@ fn write_benchmark_to_file(benchmark_md: &str) -> anyhow::Result<()> { Ok(()) } +fn write_json_output_to_file( + output_path: &Path, + reports: &[RecordingTestReport], + notes: Option<&str>, + command: &str, +) -> anyhow::Result<()> { + let passed = reports.iter().filter(|r| r.overall_passed).count(); + let total = reports.len(); + let failed = total.saturating_sub(passed); + + let output = JsonBenchmarkOutput { + generated_at_utc: Utc::now().format("%Y-%m-%d %H:%M:%S UTC").to_string(), + local_time: Local::now().format("%Y-%m-%d %H:%M:%S").to_string(), + command: command.to_string(), + notes: notes.map(ToString::to_string), + system: SystemInfo::collect(), + summary: JsonBenchmarkSummary { + total_recordings: total, + passed_recordings: passed, + failed_recordings: failed, + }, + reports: reports.to_vec(), + }; + + let json = serde_json::to_string_pretty(&output)?; + fs::write(output_path, json)?; + + println!( + "\n✅ JSON benchmark results written to {}", + output_path.display() + ); + + Ok(()) +} + fn print_summary(reports: &[RecordingTestReport]) { println!("\n{}", "=".repeat(70)); println!("PLAYBACK TEST SUMMARY"); @@ -1202,6 +1485,7 @@ fn print_summary(reports: &[RecordingTestReport]) { let decoder_failed = report.decoder_results.iter().any(|r| !r.passed); let playback_failed = report.playback_results.iter().any(|r| !r.passed); + let scrub_failed = report.scrub_results.iter().any(|r| !r.passed); let audio_failed = report.audio_sync_results.iter().any(|r| !r.passed); let camera_failed = report.camera_sync_results.iter().any(|r| !r.passed); @@ -1211,6 +1495,9 @@ fn print_summary(reports: &[RecordingTestReport]) { if playback_failed { print!(" [PLAYBACK]"); } + if scrub_failed { + print!(" [SCRUB]"); + } if audio_failed { print!(" [AUDIO SYNC]"); } @@ -1224,6 +1511,66 @@ fn print_summary(reports: &[RecordingTestReport]) { println!(); } +fn command_name(command: Option<&Commands>) -> &'static str { + match command { + Some(Commands::Decoder) => "decoder", + Some(Commands::Playback) => "playback", + Some(Commands::Scrub) => "scrub", + Some(Commands::AudioSync) => "audio-sync", + Some(Commands::CameraSync) => "camera-sync", + Some(Commands::Full) | None => "full", + Some(Commands::List) => "list", + } +} + +fn shell_quote(value: &str) -> String { + let is_safe = value + .chars() + .all(|char| char.is_ascii_alphanumeric() || "-_./:=,".contains(char)); + if is_safe { + value.to_string() + } else { + format!("'{}'", value.replace('\'', "'\"'\"'")) + } +} + +fn build_command_string(cli: &Cli) -> String { + let mut command = format!( + "cargo run -p cap-recording --example playback-test-runner -- {} --fps {} --startup-threshold-ms {:.1}", + command_name(cli.command.as_ref()), + cli.fps, + cli.startup_threshold_ms + ); + + if let Some(path) = &cli.recording_path { + command.push_str(" --recording-path "); + command.push_str(&shell_quote(path.to_string_lossy().as_ref())); + } else { + command.push_str(" --input-dir "); + command.push_str(&shell_quote(cli.input_dir.to_string_lossy().as_ref())); + } + + if cli.verbose { + command.push_str(" --verbose"); + } + + if cli.benchmark_output { + command.push_str(" --benchmark-output"); + } + + if let Some(path) = &cli.json_output { + command.push_str(" --json-output "); + command.push_str(&shell_quote(path.to_string_lossy().as_ref())); + } + + if let Some(notes) = &cli.notes { + command.push_str(" --notes "); + command.push_str(&shell_quote(notes)); + } + + command +} + #[tokio::main] async fn main() -> anyhow::Result<()> { tracing_subscriber::registry() @@ -1235,7 +1582,7 @@ async fn main() -> anyhow::Result<()> { let cli = Cli::parse(); - if let Some(Commands::List) = cli.command { + if matches!(cli.command.as_ref(), Some(Commands::List)) { let recordings = discover_recordings(&cli.input_dir); if recordings.is_empty() { println!("No recordings found in {}", cli.input_dir.display()); @@ -1269,21 +1616,24 @@ async fn main() -> anyhow::Result<()> { return Ok(()); } - let (run_decoder, run_playback, run_audio_sync, run_camera_sync) = match cli.command { - Some(Commands::Decoder) => (true, false, false, false), - Some(Commands::Playback) => (false, true, false, false), - Some(Commands::AudioSync) => (false, false, true, false), - Some(Commands::CameraSync) => (false, false, false, true), - Some(Commands::Full) | None => (true, true, true, true), - Some(Commands::List) => unreachable!(), - }; + let (run_decoder, run_playback, run_scrub, run_audio_sync, run_camera_sync) = + match cli.command.as_ref() { + Some(Commands::Decoder) => (true, false, false, false, false), + Some(Commands::Playback) => (false, true, false, false, false), + Some(Commands::Scrub) => (false, false, true, false, false), + Some(Commands::AudioSync) => (false, false, false, true, false), + Some(Commands::CameraSync) => (false, false, false, false, true), + Some(Commands::Full) | None => (true, true, true, true, true), + Some(Commands::List) => unreachable!(), + }; println!("\nCap Playback Test Runner"); println!("{}", "=".repeat(40)); println!( - "Testing {} recording(s) at {} FPS", + "Testing {} recording(s) at {} FPS (startup threshold: {:.1}ms)", recordings.len(), - cli.fps + cli.fps, + cli.startup_threshold_ms ); println!(); @@ -1295,8 +1645,10 @@ async fn main() -> anyhow::Result<()> { match run_tests_on_recording( recording_path, cli.fps, + cli.startup_threshold_ms, run_decoder, run_playback, + run_scrub, run_audio_sync, run_camera_sync, cli.verbose, @@ -1315,24 +1667,9 @@ async fn main() -> anyhow::Result<()> { print_summary(&reports); - if cli.benchmark_output { - let command = format!( - "cargo run -p cap-recording --example playback-test-runner -- {} --fps {}{}", - match cli.command { - Some(Commands::Decoder) => "decoder", - Some(Commands::Playback) => "playback", - Some(Commands::AudioSync) => "audio-sync", - Some(Commands::CameraSync) => "camera-sync", - Some(Commands::Full) | None => "full", - Some(Commands::List) => "list", - }, - cli.fps, - cli.recording_path - .as_ref() - .map(|p| format!(" --recording-path {}", p.display())) - .unwrap_or_default(), - ); + let command = build_command_string(&cli); + if cli.benchmark_output { let benchmark_md = generate_benchmark_markdown(&reports, cli.notes.as_deref(), command.trim()); @@ -1341,6 +1678,14 @@ async fn main() -> anyhow::Result<()> { } } + if let Some(output_path) = &cli.json_output { + if let Err(e) = + write_json_output_to_file(output_path, &reports, cli.notes.as_deref(), command.trim()) + { + tracing::error!("Failed to write JSON benchmark results: {}", e); + } + } + let failed = reports.iter().filter(|r| !r.overall_passed).count(); std::process::exit(if failed > 0 { 1 } else { 0 }); } diff --git a/crates/rendering/src/decoder/avassetreader.rs b/crates/rendering/src/decoder/avassetreader.rs index 73f9c62a4a..62582efe67 100644 --- a/crates/rendering/src/decoder/avassetreader.rs +++ b/crates/rendering/src/decoder/avassetreader.rs @@ -417,6 +417,8 @@ pub struct AVAssetReaderDecoder { } impl AVAssetReaderDecoder { + const INITIAL_WARM_DECODER_COUNT: usize = 2; + fn new(path: PathBuf, tokio_handle: TokioHandle) -> Result { let keyframe_index = cap_video_decode::avassetreader::KeyframeIndex::build(&path).ok(); let fps = keyframe_index @@ -449,7 +451,10 @@ impl AVAssetReaderDecoder { let mut decoders = vec![primary_instance]; let initial_positions = pool_manager.positions(); - for pos in initial_positions.iter().skip(1) { + let warm_decoder_count = Self::INITIAL_WARM_DECODER_COUNT + .max(1) + .min(initial_positions.len()); + for pos in initial_positions.iter().take(warm_decoder_count).skip(1) { let start_time = pos.position_secs; match DecoderInstance::new( path.clone(), @@ -496,11 +501,65 @@ impl AVAssetReaderDecoder { }) } + fn ensure_decoder_available(&mut self, decoder_id: usize) -> usize { + if decoder_id < self.decoders.len() { + return decoder_id; + } + + let Some(template) = self.decoders.first() else { + return 0; + }; + let template_path = template.path.clone(); + let template_tokio_handle = template.tokio_handle.clone(); + let template_keyframe_index = template.keyframe_index.clone(); + + while self.decoders.len() <= decoder_id { + let next_id = self.decoders.len(); + let Some(position) = self + .pool_manager + .positions() + .iter() + .find(|p| p.id == next_id) + .map(|p| p.position_secs) + else { + break; + }; + + match DecoderInstance::new( + template_path.clone(), + template_tokio_handle.clone(), + position, + template_keyframe_index.clone(), + ) { + Ok(instance) => { + self.decoders.push(instance); + tracing::info!( + decoder_id = next_id, + position_secs = position, + total_decoders = self.decoders.len(), + "Lazily initialized decoder instance" + ); + } + Err(e) => { + tracing::warn!( + decoder_id = next_id, + position_secs = position, + error = %e, + "Failed to lazily initialize decoder instance" + ); + break; + } + } + } + + decoder_id.min(self.decoders.len().saturating_sub(1)) + } + fn select_best_decoder(&mut self, requested_time: f32) -> (usize, bool) { let (best_id, _distance, needs_reset) = self.pool_manager.find_best_decoder_for_time(requested_time); - let decoder_idx = best_id.min(self.decoders.len().saturating_sub(1)); + let decoder_idx = self.ensure_decoder_available(best_id); if needs_reset && decoder_idx < self.decoders.len() { self.decoders[decoder_idx].reset(requested_time); diff --git a/package.json b/package.json index 47f69790ef..4f81b478cc 100644 --- a/package.json +++ b/package.json @@ -41,6 +41,14 @@ "web": "pnpm --filter=@cap/web", "env-setup": "node scripts/env-cli.js", "check-tauri-versions": "node scripts/check-tauri-plugin-versions.js", + "bench:playback:matrix": "node scripts/run-playback-benchmark-matrix.js", + "bench:playback:aggregate": "node scripts/aggregate-playback-benchmarks.js", + "bench:playback:validate": "node scripts/validate-playback-matrix.js", + "bench:playback:report": "node scripts/build-playback-matrix-report.js", + "bench:playback:finalize": "node scripts/finalize-playback-matrix.js", + "bench:playback:publish": "node scripts/publish-playback-matrix-summary.js", + "bench:playback:analyze": "node scripts/analyze-playback-matrix-bottlenecks.js", + "bench:playback:compare": "node scripts/compare-playback-benchmark-runs.js", "clean": "find . -name node_modules -o -name .next -o -name .output -o -name .turbo -o -name dist -type d -prune | xargs rm -rf", "lgtm-otel": "docker run -p 3010:3000 -p 4317:4317 -p 4318:4318 --rm -it docker.io/grafana/otel-lgtm", "with-env": "dotenv -e .env --" diff --git a/scripts/aggregate-playback-benchmarks.js b/scripts/aggregate-playback-benchmarks.js new file mode 100644 index 0000000000..b1e89d0e10 --- /dev/null +++ b/scripts/aggregate-playback-benchmarks.js @@ -0,0 +1,248 @@ +#!/usr/bin/env node + +import fs from "node:fs"; +import path from "node:path"; + +function parseArgs(argv) { + const inputs = []; + let output = null; + let help = false; + + for (let i = 2; i < argv.length; i++) { + const arg = argv[i]; + if (arg === "--") { + continue; + } + if (arg === "--help" || arg === "-h") { + help = true; + continue; + } + if (arg === "--input" || arg === "-i") { + const value = argv[i + 1]; + if (!value) { + throw new Error("Missing value for --input"); + } + inputs.push(path.resolve(value)); + i += 1; + continue; + } + if (arg === "--output" || arg === "-o") { + const value = argv[i + 1]; + if (!value) { + throw new Error("Missing value for --output"); + } + output = path.resolve(value); + i += 1; + continue; + } + throw new Error(`Unknown argument: ${arg}`); + } + + return { inputs, output, help }; +} + +function collectJsonFiles(targetPath) { + if (!fs.existsSync(targetPath)) { + throw new Error(`Input path does not exist: ${targetPath}`); + } + + const stats = fs.statSync(targetPath); + if (stats.isFile()) { + return targetPath.endsWith(".json") ? [targetPath] : []; + } + + const files = []; + const entries = fs.readdirSync(targetPath, { withFileTypes: true }); + for (const entry of entries) { + const fullPath = path.join(targetPath, entry.name); + if (entry.isDirectory()) { + files.push(...collectJsonFiles(fullPath)); + } else if (entry.isFile() && entry.name.endsWith(".json")) { + files.push(fullPath); + } + } + return files; +} + +function parseNotes(notes) { + if (!notes) { + return {}; + } + const parsed = {}; + for (const token of notes.split(/\s+/)) { + if (!token.includes("=")) { + continue; + } + const [key, ...rest] = token.split("="); + const value = rest.join("="); + if (!key || !value) { + continue; + } + parsed[key.trim()] = value.trim(); + } + return parsed; +} + +function numberOrNull(value) { + if (typeof value !== "number" || Number.isNaN(value)) { + return null; + } + return value; +} + +function maxOrNull(values) { + const numeric = values.map(numberOrNull).filter((value) => value !== null); + if (numeric.length === 0) { + return null; + } + return Math.max(...numeric); +} + +function avgOrNull(values) { + const numeric = values.map(numberOrNull).filter((value) => value !== null); + if (numeric.length === 0) { + return null; + } + return numeric.reduce((acc, value) => acc + value, 0) / numeric.length; +} + +function formatMetric(value, digits = 1) { + return value === null ? "n/a" : value.toFixed(digits); +} + +function extractRows(jsonPath, data) { + if (!Array.isArray(data.reports)) { + return []; + } + + const notes = parseNotes(data.notes); + const platform = notes.platform ?? "unknown"; + const gpu = notes.gpu ?? "unknown"; + const scenario = notes.scenario ?? "unspecified"; + const runTime = data.generated_at_utc ?? "unknown"; + + const rows = []; + for (const report of data.reports) { + const playbackResults = Array.isArray(report.playback_results) + ? report.playback_results + : []; + const scrubResults = Array.isArray(report.scrub_results) + ? report.scrub_results + : []; + const audioResults = Array.isArray(report.audio_sync_results) + ? report.audio_sync_results + : []; + + const effectiveFpsMin = playbackResults.length + ? Math.min( + ...playbackResults + .map((result) => numberOrNull(result.effective_fps)) + .filter((value) => value !== null), + ) + : null; + const scrubP95Max = maxOrNull( + scrubResults.map((result) => result.p95_seek_time_ms), + ); + const startupAvg = avgOrNull( + playbackResults.map((result) => result.startup_to_first_frame_ms), + ); + const micDiffMax = maxOrNull( + audioResults + .filter((result) => result.has_mic_audio) + .map((result) => result.mic_video_diff_ms), + ); + const sysDiffMax = maxOrNull( + audioResults + .filter((result) => result.has_system_audio) + .map((result) => result.system_audio_video_diff_ms), + ); + + rows.push({ + runTime, + platform, + gpu, + scenario, + recording: report.recording_name ?? path.basename(jsonPath), + format: report.is_fragmented ? "fragmented" : "mp4", + status: report.overall_passed ? "PASS" : "FAIL", + effectiveFpsMin, + scrubP95Max, + startupAvg, + micDiffMax, + sysDiffMax, + command: data.command ?? "unknown", + source: jsonPath, + }); + } + + return rows; +} + +function buildMarkdown(rows) { + const sorted = [...rows].sort((a, b) => (a.runTime < b.runTime ? 1 : -1)); + const passed = sorted.filter((row) => row.status === "PASS").length; + const failed = sorted.length - passed; + + let md = ""; + md += `# Playback Benchmark Aggregate\n\n`; + md += `Generated: ${new Date().toISOString()}\n\n`; + md += `Total rows: ${sorted.length}, Passed: ${passed}, Failed: ${failed}\n\n`; + md += + "| Run Time (UTC) | Platform | GPU | Scenario | Recording | Format | Status | FPS(min) | Scrub p95(ms) | Startup avg(ms) | Mic diff max(ms) | Sys diff max(ms) |\n"; + md += "|---|---|---|---|---|---|---|---:|---:|---:|---:|---:|\n"; + for (const row of sorted) { + md += `| ${row.runTime} | ${row.platform} | ${row.gpu} | ${row.scenario} | ${row.recording} | ${row.format} | ${row.status} | ${formatMetric(row.effectiveFpsMin)} | ${formatMetric(row.scrubP95Max)} | ${formatMetric(row.startupAvg)} | ${formatMetric(row.micDiffMax)} | ${formatMetric(row.sysDiffMax)} |\n`; + } + md += "\n"; + return md; +} + +function printUsage() { + console.log(`Usage: node scripts/aggregate-playback-benchmarks.js --input [--input ...] [--output ] + +Aggregates playback-test-runner JSON outputs into a markdown summary table.`); +} + +function main() { + const args = parseArgs(process.argv); + if (args.help) { + printUsage(); + return; + } + if (args.inputs.length === 0) { + throw new Error("At least one --input is required"); + } + + const files = new Set(); + for (const inputPath of args.inputs) { + for (const filePath of collectJsonFiles(inputPath)) { + files.add(filePath); + } + } + + if (files.size === 0) { + throw new Error("No JSON files found for aggregation"); + } + + const rows = []; + for (const filePath of files) { + const raw = fs.readFileSync(filePath, "utf8"); + const parsed = JSON.parse(raw); + rows.push(...extractRows(filePath, parsed)); + } + + const markdown = buildMarkdown(rows); + if (args.output) { + fs.writeFileSync(args.output, markdown, "utf8"); + console.log(`Wrote aggregate markdown to ${args.output}`); + } else { + process.stdout.write(markdown); + } +} + +try { + main(); +} catch (error) { + console.error(error instanceof Error ? error.message : String(error)); + process.exit(1); +} diff --git a/scripts/analyze-playback-matrix-bottlenecks.js b/scripts/analyze-playback-matrix-bottlenecks.js new file mode 100644 index 0000000000..8bf7c4317a --- /dev/null +++ b/scripts/analyze-playback-matrix-bottlenecks.js @@ -0,0 +1,286 @@ +#!/usr/bin/env node + +import fs from "node:fs"; +import path from "node:path"; + +function parseArgs(argv) { + const options = { + inputs: [], + output: null, + outputJson: null, + targetFps: 60, + maxScrubP95Ms: 40, + maxStartupMs: 250, + }; + + for (let i = 2; i < argv.length; i++) { + const arg = argv[i]; + if (arg === "--") continue; + if (arg === "--help" || arg === "-h") { + options.help = true; + continue; + } + if (arg === "--input" || arg === "-i") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --input"); + options.inputs.push(path.resolve(value)); + continue; + } + if (arg === "--output" || arg === "-o") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --output"); + options.output = path.resolve(value); + continue; + } + if (arg === "--output-json") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --output-json"); + options.outputJson = path.resolve(value); + continue; + } + if (arg === "--target-fps") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value <= 0) { + throw new Error("Invalid --target-fps value"); + } + options.targetFps = value; + continue; + } + if (arg === "--max-scrub-p95-ms") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value <= 0) { + throw new Error("Invalid --max-scrub-p95-ms value"); + } + options.maxScrubP95Ms = value; + continue; + } + if (arg === "--max-startup-ms") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value <= 0) { + throw new Error("Invalid --max-startup-ms value"); + } + options.maxStartupMs = value; + continue; + } + throw new Error(`Unknown argument: ${arg}`); + } + + return options; +} + +function usage() { + console.log(`Usage: node scripts/analyze-playback-matrix-bottlenecks.js --input [--input ...] [--output ] [--output-json ] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] + +Analyzes playback matrix JSON outputs and highlights prioritized bottlenecks.`); +} + +function collectJsonFiles(targetPath) { + if (!fs.existsSync(targetPath)) { + throw new Error(`Input path does not exist: ${targetPath}`); + } + const stats = fs.statSync(targetPath); + if (stats.isFile()) { + return targetPath.endsWith(".json") ? [targetPath] : []; + } + const files = []; + for (const entry of fs.readdirSync(targetPath, { withFileTypes: true })) { + const fullPath = path.join(targetPath, entry.name); + if (entry.isDirectory()) { + files.push(...collectJsonFiles(fullPath)); + } else if (entry.isFile() && entry.name.endsWith(".json")) { + files.push(fullPath); + } + } + return files; +} + +function parseNotes(notes) { + if (!notes) return {}; + const result = {}; + for (const token of notes.split(/\s+/)) { + if (!token.includes("=")) continue; + const [key, ...rest] = token.split("="); + const value = rest.join("="); + if (!key || !value) continue; + result[key.trim()] = value.trim(); + } + return result; +} + +function average(values) { + if (values.length === 0) return null; + return values.reduce((acc, value) => acc + value, 0) / values.length; +} + +function max(values) { + if (values.length === 0) return null; + return Math.max(...values); +} + +function scoreIssue(issue, options) { + let score = 0; + if (issue.fpsMin !== null && issue.fpsMin < options.targetFps) { + score += (options.targetFps - issue.fpsMin) * 5; + } + if (issue.scrubP95 !== null && issue.scrubP95 > options.maxScrubP95Ms) { + score += issue.scrubP95 - options.maxScrubP95Ms; + } + if (issue.startupAvg !== null && issue.startupAvg > options.maxStartupMs) { + score += (issue.startupAvg - options.maxStartupMs) / 2; + } + return score; +} + +function formatValue(value, digits = 1) { + return value === null ? "n/a" : value.toFixed(digits); +} + +function collectIssues(files, options) { + const issues = []; + + for (const filePath of files) { + const parsed = JSON.parse(fs.readFileSync(filePath, "utf8")); + const notes = parseNotes(parsed.notes); + const platform = notes.platform ?? "unknown"; + const gpu = notes.gpu ?? "unknown"; + const scenario = notes.scenario ?? "unspecified"; + const reports = Array.isArray(parsed.reports) ? parsed.reports : []; + + for (const report of reports) { + const playback = Array.isArray(report.playback_results) + ? report.playback_results + : []; + const scrub = Array.isArray(report.scrub_results) + ? report.scrub_results + : []; + + const fpsValues = playback + .map((entry) => entry.effective_fps) + .filter((entry) => typeof entry === "number"); + const startupValues = playback + .map((entry) => entry.startup_to_first_frame_ms) + .filter((entry) => typeof entry === "number"); + const scrubP95Values = scrub + .map((entry) => entry.p95_seek_time_ms) + .filter((entry) => typeof entry === "number"); + + const issue = { + platform, + gpu, + scenario, + recording: report.recording_name ?? path.basename(filePath), + format: report.is_fragmented ? "fragmented" : "mp4", + fpsMin: fpsValues.length ? Math.min(...fpsValues) : null, + startupAvg: average(startupValues), + scrubP95: max(scrubP95Values), + filePath, + }; + issue.score = scoreIssue(issue, options); + if (issue.score > 0) { + issues.push(issue); + } + } + } + + issues.sort((a, b) => b.score - a.score); + return issues; +} + +function recommendation(issue, options) { + const recommendations = []; + if (issue.fpsMin !== null && issue.fpsMin < options.targetFps) { + recommendations.push("inspect decode/render path and frame wait behavior"); + } + if (issue.scrubP95 !== null && issue.scrubP95 > options.maxScrubP95Ms) { + recommendations.push( + "investigate seek dispatch pressure and decoder reposition cost", + ); + } + if (issue.startupAvg !== null && issue.startupAvg > options.maxStartupMs) { + recommendations.push("optimize startup warmup and first-frame path"); + } + return recommendations.join("; "); +} + +function buildMarkdown(issues, options) { + let md = ""; + md += "# Playback Matrix Bottleneck Analysis\n\n"; + md += `Generated: ${new Date().toISOString()}\n\n`; + md += `Thresholds: target_fps=${options.targetFps}, max_scrub_p95_ms=${options.maxScrubP95Ms}, max_startup_ms=${options.maxStartupMs}\n\n`; + + if (issues.length === 0) { + md += "No bottlenecks detected for configured thresholds.\n"; + return md; + } + + md += + "| Rank | Platform | GPU | Scenario | Recording | Format | FPS(min) | Startup avg(ms) | Scrub p95(ms) | Score | Recommendation |\n"; + md += "|---:|---|---|---|---|---|---:|---:|---:|---:|---|\n"; + issues.forEach((issue, index) => { + md += `| ${index + 1} | ${issue.platform} | ${issue.gpu} | ${issue.scenario} | ${issue.recording} | ${issue.format} | ${formatValue(issue.fpsMin)} | ${formatValue(issue.startupAvg)} | ${formatValue(issue.scrubP95)} | ${formatValue(issue.score, 2)} | ${recommendation(issue, options)} |\n`; + }); + md += "\n"; + return md; +} + +function buildJson(issues, options) { + return { + generatedAt: new Date().toISOString(), + thresholds: { + targetFps: options.targetFps, + maxScrubP95Ms: options.maxScrubP95Ms, + maxStartupMs: options.maxStartupMs, + }, + issueCount: issues.length, + issues: issues.map((issue, index) => ({ + rank: index + 1, + ...issue, + recommendation: recommendation(issue, options), + })), + }; +} + +function main() { + const options = parseArgs(process.argv); + if (options.help) { + usage(); + return; + } + if (options.inputs.length === 0) { + throw new Error("At least one --input is required"); + } + + const files = new Set(); + for (const input of options.inputs) { + for (const filePath of collectJsonFiles(input)) { + files.add(filePath); + } + } + if (files.size === 0) { + throw new Error("No JSON files found"); + } + + const issues = collectIssues([...files], options); + const markdown = buildMarkdown(issues, options); + if (options.output) { + fs.writeFileSync(options.output, markdown, "utf8"); + console.log(`Wrote bottleneck analysis to ${options.output}`); + } else { + process.stdout.write(markdown); + } + if (options.outputJson) { + fs.writeFileSync( + options.outputJson, + JSON.stringify(buildJson(issues, options), null, 2), + "utf8", + ); + console.log(`Wrote bottleneck analysis JSON to ${options.outputJson}`); + } +} + +try { + main(); +} catch (error) { + console.error(error instanceof Error ? error.message : String(error)); + process.exit(1); +} diff --git a/scripts/build-playback-matrix-report.js b/scripts/build-playback-matrix-report.js new file mode 100644 index 0000000000..e29c6b4621 --- /dev/null +++ b/scripts/build-playback-matrix-report.js @@ -0,0 +1,294 @@ +#!/usr/bin/env node + +import fs from "node:fs"; +import path from "node:path"; + +const DEFAULT_REQUIRED_CELLS = [ + { platform: "macos-13", gpu: "apple-silicon", scenario: "full" }, + { platform: "macos-13", gpu: "apple-silicon", scenario: "scrub" }, + { platform: "windows-11", gpu: "nvidia-discrete", scenario: "full" }, + { platform: "windows-11", gpu: "nvidia-discrete", scenario: "scrub" }, + { platform: "windows-11", gpu: "amd-discrete", scenario: "full" }, + { platform: "windows-11", gpu: "amd-discrete", scenario: "scrub" }, + { platform: "windows-11", gpu: "integrated", scenario: "full" }, + { platform: "windows-11", gpu: "integrated", scenario: "scrub" }, +]; + +function parseArgs(argv) { + const options = { + inputs: [], + output: null, + useDefaultMatrix: true, + requiredCells: [], + }; + + for (let i = 2; i < argv.length; i++) { + const arg = argv[i]; + if (arg === "--") continue; + if (arg === "--help" || arg === "-h") { + options.help = true; + continue; + } + if (arg === "--input" || arg === "-i") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --input"); + options.inputs.push(path.resolve(value)); + continue; + } + if (arg === "--output" || arg === "-o") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --output"); + options.output = path.resolve(value); + continue; + } + if (arg === "--no-default-matrix") { + options.useDefaultMatrix = false; + continue; + } + if (arg === "--require-cell") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --require-cell"); + options.requiredCells.push(parseCell(value)); + options.useDefaultMatrix = false; + continue; + } + throw new Error(`Unknown argument: ${arg}`); + } + + return options; +} + +function parseCell(value) { + const [platform, gpu, scenario] = value.split(":"); + if (!platform || !gpu || !scenario) { + throw new Error( + `Invalid --require-cell format: ${value}. Expected platform:gpu:scenario`, + ); + } + return { platform, gpu, scenario }; +} + +function usage() { + console.log(`Usage: node scripts/build-playback-matrix-report.js --input [--input ...] [--output ] + +Builds a concise playback matrix markdown report from playback benchmark JSON outputs.`); +} + +function collectJsonFiles(targetPath) { + if (!fs.existsSync(targetPath)) { + throw new Error(`Input path does not exist: ${targetPath}`); + } + + const stats = fs.statSync(targetPath); + if (stats.isFile()) { + return targetPath.endsWith(".json") ? [targetPath] : []; + } + + const files = []; + for (const entry of fs.readdirSync(targetPath, { withFileTypes: true })) { + const fullPath = path.join(targetPath, entry.name); + if (entry.isDirectory()) { + files.push(...collectJsonFiles(fullPath)); + } else if (entry.isFile() && entry.name.endsWith(".json")) { + files.push(fullPath); + } + } + return files; +} + +function parseNotes(notes) { + if (!notes) return {}; + const result = {}; + for (const token of notes.split(/\s+/)) { + if (!token.includes("=")) continue; + const [key, ...rest] = token.split("="); + const value = rest.join("="); + if (!key || !value) continue; + result[key.trim()] = value.trim(); + } + return result; +} + +function cellKey(platform, gpu, scenario) { + return `${platform}|${gpu}|${scenario}`; +} + +function platformGpuKey(platform, gpu) { + return `${platform}|${gpu}`; +} + +function timestampOrEpoch(value) { + const parsed = Date.parse(value ?? ""); + return Number.isNaN(parsed) ? 0 : parsed; +} + +function upsertLatestCell(cells, candidate) { + const key = cellKey(candidate.platform, candidate.gpu, candidate.scenario); + const existing = cells.get(key); + if ( + !existing || + timestampOrEpoch(candidate.generatedAt) >= + timestampOrEpoch(existing.generatedAt) + ) { + cells.set(key, candidate); + } +} + +function collectData(files) { + const latestCells = new Map(); + const formatCoverage = new Map(); + + for (const filePath of files) { + const raw = fs.readFileSync(filePath, "utf8"); + const parsed = JSON.parse(raw); + const notes = parseNotes(parsed.notes); + const platform = notes.platform ?? "unknown"; + const gpu = notes.gpu ?? "unknown"; + const scenario = notes.scenario ?? "unspecified"; + const reports = Array.isArray(parsed.reports) ? parsed.reports : []; + const pass = reports.every((report) => report.overall_passed === true); + const generatedAt = parsed.generated_at_utc ?? ""; + + const formats = new Set(); + for (const report of reports) { + formats.add(report.is_fragmented ? "fragmented" : "mp4"); + } + + upsertLatestCell(latestCells, { + platform, + gpu, + scenario, + pass, + generatedAt, + filePath, + formats, + }); + + const pgKey = platformGpuKey(platform, gpu); + if (!formatCoverage.has(pgKey)) { + formatCoverage.set(pgKey, new Set()); + } + for (const format of formats) { + formatCoverage.get(pgKey).add(format); + } + } + + return { latestCells, formatCoverage }; +} + +function formatStatus(entry) { + if (!entry) return "MISSING"; + return entry.pass ? "PASS" : "FAIL"; +} + +function formatCoverageStatus(formats, target) { + if (!formats || !formats.has(target)) return "NO"; + return "YES"; +} + +function buildReport(requiredCells, latestCells, formatCoverage) { + const platformGpuPairs = new Map(); + for (const cell of requiredCells) { + const key = platformGpuKey(cell.platform, cell.gpu); + if (!platformGpuPairs.has(key)) { + platformGpuPairs.set(key, { platform: cell.platform, gpu: cell.gpu }); + } + } + + const rows = []; + let missingCount = 0; + let failCount = 0; + for (const { platform, gpu } of platformGpuPairs.values()) { + const full = latestCells.get(cellKey(platform, gpu, "full")); + const scrub = latestCells.get(cellKey(platform, gpu, "scrub")); + const formats = formatCoverage.get(platformGpuKey(platform, gpu)); + const fullStatus = formatStatus(full); + const scrubStatus = formatStatus(scrub); + if (fullStatus === "MISSING" || scrubStatus === "MISSING") { + missingCount += 1; + } + if (fullStatus === "FAIL" || scrubStatus === "FAIL") { + failCount += 1; + } + rows.push({ + platform, + gpu, + fullStatus, + scrubStatus, + mp4: formatCoverageStatus(formats, "mp4"), + fragmented: formatCoverageStatus(formats, "fragmented"), + fullTime: full?.generatedAt ?? "n/a", + scrubTime: scrub?.generatedAt ?? "n/a", + }); + } + + let markdown = ""; + markdown += "# Playback Matrix Status Report\n\n"; + markdown += `Generated: ${new Date().toISOString()}\n\n`; + markdown += `Rows: ${rows.length}, Missing rows: ${missingCount}, Rows with failures: ${failCount}\n\n`; + markdown += + "| Platform | GPU | Full | Scrub | MP4 Seen | Fragmented Seen | Full Timestamp | Scrub Timestamp |\n"; + markdown += "|---|---|---|---|---|---|---|---|\n"; + for (const row of rows) { + markdown += `| ${row.platform} | ${row.gpu} | ${row.fullStatus} | ${row.scrubStatus} | ${row.mp4} | ${row.fragmented} | ${row.fullTime} | ${row.scrubTime} |\n`; + } + markdown += "\n"; + + const missingCells = requiredCells.filter((cell) => { + return !latestCells.has(cellKey(cell.platform, cell.gpu, cell.scenario)); + }); + if (missingCells.length > 0) { + markdown += "## Missing Cells\n\n"; + for (const cell of missingCells) { + markdown += `- ${cell.platform}:${cell.gpu}:${cell.scenario}\n`; + } + markdown += "\n"; + } + + return markdown; +} + +function main() { + const options = parseArgs(process.argv); + if (options.help) { + usage(); + return; + } + if (options.inputs.length === 0) { + throw new Error("At least one --input is required"); + } + + const files = new Set(); + for (const inputPath of options.inputs) { + for (const filePath of collectJsonFiles(inputPath)) { + files.add(filePath); + } + } + if (files.size === 0) { + throw new Error("No JSON files found"); + } + + const requiredCells = options.useDefaultMatrix + ? [...DEFAULT_REQUIRED_CELLS, ...options.requiredCells] + : options.requiredCells; + if (requiredCells.length === 0) { + throw new Error("No required cells configured"); + } + + const { latestCells, formatCoverage } = collectData([...files]); + const report = buildReport(requiredCells, latestCells, formatCoverage); + + if (options.output) { + fs.writeFileSync(options.output, report, "utf8"); + console.log(`Wrote matrix report to ${options.output}`); + } else { + process.stdout.write(report); + } +} + +try { + main(); +} catch (error) { + console.error(error instanceof Error ? error.message : String(error)); + process.exit(1); +} diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js new file mode 100644 index 0000000000..e34c386eb6 --- /dev/null +++ b/scripts/compare-playback-benchmark-runs.js @@ -0,0 +1,683 @@ +#!/usr/bin/env node + +import fs from "node:fs"; +import path from "node:path"; + +function parseArgs(argv) { + const options = { + baselineInputs: [], + candidateInputs: [], + output: null, + outputJson: null, + allowFpsDrop: 2, + allowStartupIncreaseMs: 25, + allowScrubP95IncreaseMs: 5, + allowMissingCandidate: false, + failOnCandidateOnly: false, + minSamplesPerRow: 1, + failOnParseErrors: false, + failOnZeroCompared: false, + failOnSkippedFiles: false, + }; + + for (let i = 2; i < argv.length; i++) { + const arg = argv[i]; + if (arg === "--") continue; + if (arg === "--help" || arg === "-h") { + options.help = true; + continue; + } + if (arg === "--baseline") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --baseline"); + options.baselineInputs.push(path.resolve(value)); + continue; + } + if (arg === "--candidate") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --candidate"); + options.candidateInputs.push(path.resolve(value)); + continue; + } + if (arg === "--output" || arg === "-o") { + options.output = path.resolve(argv[++i] ?? ""); + continue; + } + if (arg === "--output-json") { + options.outputJson = path.resolve(argv[++i] ?? ""); + continue; + } + if (arg === "--allow-fps-drop") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value < 0) { + throw new Error("Invalid --allow-fps-drop value"); + } + options.allowFpsDrop = value; + continue; + } + if (arg === "--allow-startup-increase-ms") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value < 0) { + throw new Error("Invalid --allow-startup-increase-ms value"); + } + options.allowStartupIncreaseMs = value; + continue; + } + if (arg === "--allow-scrub-p95-increase-ms") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value < 0) { + throw new Error("Invalid --allow-scrub-p95-increase-ms value"); + } + options.allowScrubP95IncreaseMs = value; + continue; + } + if (arg === "--allow-missing-candidate") { + options.allowMissingCandidate = true; + continue; + } + if (arg === "--fail-on-candidate-only") { + options.failOnCandidateOnly = true; + continue; + } + if (arg === "--min-samples-per-row") { + const value = Number.parseInt(argv[++i] ?? "", 10); + if (!Number.isFinite(value) || value < 1) { + throw new Error("Invalid --min-samples-per-row value"); + } + options.minSamplesPerRow = value; + continue; + } + if (arg === "--fail-on-parse-errors") { + options.failOnParseErrors = true; + continue; + } + if (arg === "--fail-on-zero-compared") { + options.failOnZeroCompared = true; + continue; + } + if (arg === "--fail-on-skipped-files") { + options.failOnSkippedFiles = true; + continue; + } + throw new Error(`Unknown argument: ${arg}`); + } + + return options; +} + +function usage() { + console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline [--baseline ...] --candidate [--candidate ...] [--output ] [--output-json ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--min-samples-per-row 1] [--fail-on-parse-errors] [--fail-on-zero-compared] [--fail-on-skipped-files] + +Compares baseline and candidate playback matrix JSON outputs and flags regressions. Multiple --baseline and --candidate inputs are supported.`); +} + +function collectJsonFiles(targetPath) { + if (!fs.existsSync(targetPath)) { + throw new Error(`Path does not exist: ${targetPath}`); + } + const stats = fs.statSync(targetPath); + if (stats.isFile()) { + return targetPath.endsWith(".json") ? [targetPath] : []; + } + const files = []; + for (const entry of fs.readdirSync(targetPath, { withFileTypes: true })) { + const fullPath = path.join(targetPath, entry.name); + if (entry.isDirectory()) { + files.push(...collectJsonFiles(fullPath)); + } else if (entry.isFile() && entry.name.endsWith(".json")) { + files.push(fullPath); + } + } + return files; +} + +function parseNotes(notes) { + if (!notes) return {}; + const parsed = {}; + for (const token of notes.split(/\s+/)) { + if (!token.includes("=")) continue; + const [key, ...rest] = token.split("="); + const value = rest.join("="); + if (!key || !value) continue; + parsed[key.trim()] = value.trim(); + } + return parsed; +} + +function average(values) { + if (values.length === 0) return null; + return values.reduce((acc, value) => acc + value, 0) / values.length; +} + +function maximum(values) { + if (values.length === 0) return null; + return Math.max(...values); +} + +function collectMetrics(files) { + const accumulators = new Map(); + const stats = { + totalFiles: files.length, + parsedFiles: 0, + usableFiles: 0, + skippedFiles: 0, + skippedNoReports: 0, + skippedNoUsableMetrics: 0, + parseErrors: [], + }; + + for (const filePath of files) { + let parsed; + try { + parsed = JSON.parse(fs.readFileSync(filePath, "utf8")); + stats.parsedFiles += 1; + } catch (error) { + stats.parseErrors.push({ + file: filePath, + error: error instanceof Error ? error.message : String(error), + }); + continue; + } + + if (!Array.isArray(parsed.reports) || parsed.reports.length === 0) { + stats.skippedFiles += 1; + stats.skippedNoReports += 1; + continue; + } + + const notes = parseNotes(parsed.notes); + const platform = notes.platform ?? "unknown"; + const gpu = notes.gpu ?? "unknown"; + const scenario = notes.scenario ?? "unspecified"; + const reports = Array.isArray(parsed.reports) ? parsed.reports : []; + let fileContributedRows = false; + + for (const report of reports) { + const key = `${platform}|${gpu}|${scenario}|${report.recording_name ?? "unknown"}|${report.is_fragmented ? "fragmented" : "mp4"}`; + + const playback = Array.isArray(report.playback_results) + ? report.playback_results + : []; + const scrub = Array.isArray(report.scrub_results) + ? report.scrub_results + : []; + + const fpsValues = playback + .map((entry) => entry.effective_fps) + .filter((entry) => typeof entry === "number"); + const startupValues = playback + .map((entry) => entry.startup_to_first_frame_ms) + .filter((entry) => typeof entry === "number"); + const scrubP95Values = scrub + .map((entry) => entry.p95_seek_time_ms) + .filter((entry) => typeof entry === "number"); + const hasUsableMetrics = + fpsValues.length > 0 || + startupValues.length > 0 || + scrubP95Values.length > 0; + if (!hasUsableMetrics) { + continue; + } + + const existing = accumulators.get(key) ?? { + key, + platform, + gpu, + scenario, + recording: report.recording_name ?? "unknown", + format: report.is_fragmented ? "fragmented" : "mp4", + reportCount: 0, + fpsSamples: [], + startupSamples: [], + scrubP95Samples: [], + }; + existing.reportCount += 1; + existing.fpsSamples.push(...fpsValues); + existing.startupSamples.push(...startupValues); + existing.scrubP95Samples.push(...scrubP95Values); + accumulators.set(key, existing); + fileContributedRows = true; + } + + if (fileContributedRows) { + stats.usableFiles += 1; + } else { + stats.skippedFiles += 1; + stats.skippedNoUsableMetrics += 1; + } + } + + const rows = new Map(); + for (const [key, row] of accumulators) { + rows.set(key, { + key, + platform: row.platform, + gpu: row.gpu, + scenario: row.scenario, + recording: row.recording, + format: row.format, + reportCount: row.reportCount, + fpsSampleCount: row.fpsSamples.length, + startupSampleCount: row.startupSamples.length, + scrubSampleCount: row.scrubP95Samples.length, + fpsMin: row.fpsSamples.length ? Math.min(...row.fpsSamples) : null, + startupAvg: average(row.startupSamples), + scrubP95Max: maximum(row.scrubP95Samples), + }); + } + + return { rows, stats }; +} + +function delta(candidate, baseline) { + if (candidate === null || baseline === null) return null; + return candidate - baseline; +} + +function formatNumber(value, digits = 2) { + return value === null ? "n/a" : value.toFixed(digits); +} + +function compareCoverageRows(a, b) { + return ( + a.platform.localeCompare(b.platform) || + a.gpu.localeCompare(b.gpu) || + a.scenario.localeCompare(b.scenario) || + a.recording.localeCompare(b.recording) || + a.format.localeCompare(b.format) + ); +} + +function compareMetrics(baselineRows, candidateRows, options) { + const comparisons = []; + const missingCandidateRows = []; + const candidateOnlyRows = []; + const insufficientSampleRows = []; + + for (const [key, baseline] of baselineRows) { + const candidate = candidateRows.get(key); + if (!candidate) { + missingCandidateRows.push({ + platform: baseline.platform, + gpu: baseline.gpu, + scenario: baseline.scenario, + recording: baseline.recording, + format: baseline.format, + }); + } + } + + for (const [key, candidate] of candidateRows) { + const baseline = baselineRows.get(key); + if (!baseline) { + candidateOnlyRows.push({ + platform: candidate.platform, + gpu: candidate.gpu, + scenario: candidate.scenario, + recording: candidate.recording, + format: candidate.format, + }); + continue; + } + + const fpsDelta = delta(candidate.fpsMin, baseline.fpsMin); + const startupDelta = delta(candidate.startupAvg, baseline.startupAvg); + const scrubDelta = delta(candidate.scrubP95Max, baseline.scrubP95Max); + + const regressions = []; + const fpsMinSamples = Math.min( + baseline.fpsSampleCount, + candidate.fpsSampleCount, + ); + const startupMinSamples = Math.min( + baseline.startupSampleCount, + candidate.startupSampleCount, + ); + const scrubMinSamples = Math.min( + baseline.scrubSampleCount, + candidate.scrubSampleCount, + ); + const comparableSampleCounts = []; + if (fpsDelta !== null) { + comparableSampleCounts.push(fpsMinSamples); + } + if (startupDelta !== null) { + comparableSampleCounts.push(startupMinSamples); + } + if (scrubDelta !== null) { + comparableSampleCounts.push(scrubMinSamples); + } + const effectiveSampleCount = + comparableSampleCounts.length > 0 + ? Math.min(...comparableSampleCounts) + : 0; + if (effectiveSampleCount < options.minSamplesPerRow) { + insufficientSampleRows.push({ + platform: candidate.platform, + gpu: candidate.gpu, + scenario: candidate.scenario, + recording: candidate.recording, + format: candidate.format, + effectiveSampleCount, + requiredSampleCount: options.minSamplesPerRow, + }); + regressions.push( + `insufficient_samples=${effectiveSampleCount}/${options.minSamplesPerRow}`, + ); + } + + if (fpsDelta !== null && fpsDelta < -options.allowFpsDrop) { + regressions.push(`fps_drop=${formatNumber(fpsDelta)}`); + } + if ( + startupDelta !== null && + startupDelta > options.allowStartupIncreaseMs + ) { + regressions.push(`startup_increase=${formatNumber(startupDelta)}`); + } + if (scrubDelta !== null && scrubDelta > options.allowScrubP95IncreaseMs) { + regressions.push(`scrub_p95_increase=${formatNumber(scrubDelta)}`); + } + + comparisons.push({ + platform: candidate.platform, + gpu: candidate.gpu, + scenario: candidate.scenario, + recording: candidate.recording, + format: candidate.format, + baselineReportCount: baseline.reportCount, + candidateReportCount: candidate.reportCount, + fpsMinSamples, + startupMinSamples, + scrubMinSamples, + comparedMetricCount: comparableSampleCounts.length, + effectiveSampleCount, + fpsDelta, + startupDelta, + scrubDelta, + regressions, + }); + } + + comparisons.sort( + (a, b) => + b.regressions.length - a.regressions.length || compareCoverageRows(a, b), + ); + missingCandidateRows.sort(compareCoverageRows); + candidateOnlyRows.sort(compareCoverageRows); + insufficientSampleRows.sort(compareCoverageRows); + return { + comparisons, + missingCandidateRows, + candidateOnlyRows, + insufficientSampleRows, + }; +} + +function escapeTableCell(value) { + return String(value).replace(/\\/g, "\\\\").replace(/\|/g, "\\|"); +} + +function toMarkdown( + comparisons, + missingCandidateRows, + candidateOnlyRows, + insufficientSampleRows, + baselineStats, + candidateStats, + options, +) { + const regressions = comparisons.filter( + (entry) => entry.regressions.length > 0, + ); + let md = ""; + md += "# Playback Benchmark Comparison\n\n"; + md += `Generated: ${new Date().toISOString()}\n\n`; + md += `Tolerance: fps_drop<=${options.allowFpsDrop}, startup_increase<=${options.allowStartupIncreaseMs}ms, scrub_p95_increase<=${options.allowScrubP95IncreaseMs}ms\n\n`; + md += `Coverage gate: missing_candidate=${options.allowMissingCandidate ? "allow" : "fail"}, candidate_only=${options.failOnCandidateOnly ? "fail" : "allow"}\n\n`; + md += `Sample gate: min_samples_per_row>=${options.minSamplesPerRow}\n\n`; + md += `Parse gate: parse_errors=${options.failOnParseErrors ? "fail" : "allow"}\n\n`; + md += `Zero-compare gate: compared_rows=${options.failOnZeroCompared ? "fail_if_zero" : "allow"}\n\n`; + md += `Skipped-file gate: skipped_files=${options.failOnSkippedFiles ? "fail" : "allow"}\n\n`; + md += `Baseline files: total=${baselineStats.totalFiles}, parsed=${baselineStats.parsedFiles}, usable=${baselineStats.usableFiles}, skipped=${baselineStats.skippedFiles}, skipped_no_reports=${baselineStats.skippedNoReports}, skipped_no_usable_metrics=${baselineStats.skippedNoUsableMetrics}, parse_errors=${baselineStats.parseErrors.length}\n`; + md += `Candidate files: total=${candidateStats.totalFiles}, parsed=${candidateStats.parsedFiles}, usable=${candidateStats.usableFiles}, skipped=${candidateStats.skippedFiles}, skipped_no_reports=${candidateStats.skippedNoReports}, skipped_no_usable_metrics=${candidateStats.skippedNoUsableMetrics}, parse_errors=${candidateStats.parseErrors.length}\n\n`; + md += `Compared rows: ${comparisons.length}, regressions: ${regressions.length}, missing candidate rows: ${missingCandidateRows.length}, candidate-only rows: ${candidateOnlyRows.length}, insufficient sample rows: ${insufficientSampleRows.length}\n\n`; + if ( + baselineStats.parseErrors.length > 0 || + candidateStats.parseErrors.length > 0 + ) { + md += "## Parse Errors\n\n"; + md += "| Side | File | Error |\n"; + md += "|---|---|---|\n"; + for (const entry of baselineStats.parseErrors.slice(0, 20)) { + md += `| baseline | ${escapeTableCell(entry.file)} | ${escapeTableCell(entry.error)} |\n`; + } + for (const entry of candidateStats.parseErrors.slice(0, 20)) { + md += `| candidate | ${escapeTableCell(entry.file)} | ${escapeTableCell(entry.error)} |\n`; + } + md += "\n"; + } + if (missingCandidateRows.length > 0) { + md += "## Missing Candidate Rows\n\n"; + md += "| Platform | GPU | Scenario | Recording | Format |\n"; + md += "|---|---|---|---|---|\n"; + for (const row of missingCandidateRows) { + md += `| ${row.platform} | ${row.gpu} | ${row.scenario} | ${row.recording} | ${row.format} |\n`; + } + md += "\n"; + } + if (candidateOnlyRows.length > 0) { + md += "## Candidate-Only Rows\n\n"; + md += "| Platform | GPU | Scenario | Recording | Format |\n"; + md += "|---|---|---|---|---|\n"; + for (const row of candidateOnlyRows) { + md += `| ${row.platform} | ${row.gpu} | ${row.scenario} | ${row.recording} | ${row.format} |\n`; + } + md += "\n"; + } + if (insufficientSampleRows.length > 0) { + md += "## Insufficient Sample Rows\n\n"; + md += + "| Platform | GPU | Scenario | Recording | Format | Effective Samples | Required Samples |\n"; + md += "|---|---|---|---|---|---:|---:|\n"; + for (const row of insufficientSampleRows) { + md += `| ${row.platform} | ${row.gpu} | ${row.scenario} | ${row.recording} | ${row.format} | ${row.effectiveSampleCount} | ${row.requiredSampleCount} |\n`; + } + md += "\n"; + } + md += + "| Platform | GPU | Scenario | Recording | Format | B Runs | C Runs | F Samples | S Samples | Q Samples | Metrics | Effective Samples | FPS Δ | Startup Δ (ms) | Scrub p95 Δ (ms) | Regression |\n"; + md += + "|---|---|---|---|---|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---|\n"; + for (const row of comparisons) { + md += `| ${row.platform} | ${row.gpu} | ${row.scenario} | ${row.recording} | ${row.format} | ${row.baselineReportCount} | ${row.candidateReportCount} | ${row.fpsMinSamples} | ${row.startupMinSamples} | ${row.scrubMinSamples} | ${row.comparedMetricCount} | ${row.effectiveSampleCount} | ${formatNumber(row.fpsDelta)} | ${formatNumber(row.startupDelta)} | ${formatNumber(row.scrubDelta)} | ${row.regressions.length > 0 ? row.regressions.join(", ") : "none"} |\n`; + } + md += "\n"; + return md; +} + +function buildJsonOutput( + comparisons, + missingCandidateRows, + candidateOnlyRows, + insufficientSampleRows, + baselineStats, + candidateStats, + options, +) { + const regressions = comparisons.filter( + (entry) => entry.regressions.length > 0, + ); + const hasMissingCandidateRows = missingCandidateRows.length > 0; + const hasCandidateOnlyRows = candidateOnlyRows.length > 0; + const hasInsufficientSamples = insufficientSampleRows.length > 0; + const hasMetricRegressions = regressions.some((entry) => + entry.regressions.some( + (issue) => + issue.startsWith("fps_drop=") || + issue.startsWith("startup_increase=") || + issue.startsWith("scrub_p95_increase="), + ), + ); + const failureReasons = []; + if (hasMetricRegressions) { + failureReasons.push("metric_regression"); + } + if (hasInsufficientSamples) { + failureReasons.push("insufficient_samples"); + } + if (!options.allowMissingCandidate && hasMissingCandidateRows) { + failureReasons.push("missing_candidate_rows"); + } + if (options.failOnCandidateOnly && hasCandidateOnlyRows) { + failureReasons.push("candidate_only_rows"); + } + if ( + options.failOnParseErrors && + (baselineStats.parseErrors.length > 0 || + candidateStats.parseErrors.length > 0) + ) { + failureReasons.push("parse_errors"); + } + if (options.failOnZeroCompared && comparisons.length === 0) { + failureReasons.push("zero_compared_rows"); + } + if ( + options.failOnSkippedFiles && + (baselineStats.skippedFiles > 0 || candidateStats.skippedFiles > 0) + ) { + failureReasons.push("skipped_files"); + } + const passed = failureReasons.length === 0; + return { + generatedAt: new Date().toISOString(), + tolerance: { + allowFpsDrop: options.allowFpsDrop, + allowStartupIncreaseMs: options.allowStartupIncreaseMs, + allowScrubP95IncreaseMs: options.allowScrubP95IncreaseMs, + allowMissingCandidate: options.allowMissingCandidate, + failOnCandidateOnly: options.failOnCandidateOnly, + minSamplesPerRow: options.minSamplesPerRow, + failOnParseErrors: options.failOnParseErrors, + failOnZeroCompared: options.failOnZeroCompared, + failOnSkippedFiles: options.failOnSkippedFiles, + }, + fileStats: { + baseline: baselineStats, + candidate: candidateStats, + }, + summary: { + comparedRows: comparisons.length, + regressions: regressions.length, + missingCandidateRows: missingCandidateRows.length, + candidateOnlyRows: candidateOnlyRows.length, + insufficientSampleRows: insufficientSampleRows.length, + passed, + failureReasons, + gateOutcomes: { + metricRegressions: !hasMetricRegressions, + insufficientSamples: !hasInsufficientSamples, + missingCandidateRows: + options.allowMissingCandidate || !hasMissingCandidateRows, + candidateOnlyRows: + !options.failOnCandidateOnly || !hasCandidateOnlyRows, + parseErrors: + !options.failOnParseErrors || + (baselineStats.parseErrors.length === 0 && + candidateStats.parseErrors.length === 0), + zeroComparedRows: !options.failOnZeroCompared || comparisons.length > 0, + skippedFiles: + !options.failOnSkippedFiles || + (baselineStats.skippedFiles === 0 && + candidateStats.skippedFiles === 0), + }, + }, + regressions, + missingCandidateRows, + candidateOnlyRows, + insufficientSampleRows, + comparisons, + }; +} + +function main() { + const options = parseArgs(process.argv); + if (options.help) { + usage(); + return; + } + if ( + options.baselineInputs.length === 0 || + options.candidateInputs.length === 0 + ) { + throw new Error("At least one --baseline and one --candidate are required"); + } + + const baselineFiles = [ + ...new Set(options.baselineInputs.flatMap(collectJsonFiles)), + ]; + const candidateFiles = [ + ...new Set(options.candidateInputs.flatMap(collectJsonFiles)), + ]; + if (baselineFiles.length === 0) { + throw new Error("No baseline JSON files found"); + } + if (candidateFiles.length === 0) { + throw new Error("No candidate JSON files found"); + } + + const baselineCollected = collectMetrics(baselineFiles); + const candidateCollected = collectMetrics(candidateFiles); + const baselineRows = baselineCollected.rows; + const candidateRows = candidateCollected.rows; + const { + comparisons, + missingCandidateRows, + candidateOnlyRows, + insufficientSampleRows, + } = compareMetrics(baselineRows, candidateRows, options); + const markdown = toMarkdown( + comparisons, + missingCandidateRows, + candidateOnlyRows, + insufficientSampleRows, + baselineCollected.stats, + candidateCollected.stats, + options, + ); + const outputJson = buildJsonOutput( + comparisons, + missingCandidateRows, + candidateOnlyRows, + insufficientSampleRows, + baselineCollected.stats, + candidateCollected.stats, + options, + ); + + if (options.output) { + fs.writeFileSync(options.output, markdown, "utf8"); + console.log(`Wrote comparison report to ${options.output}`); + } else { + process.stdout.write(markdown); + } + if (options.outputJson) { + fs.writeFileSync( + options.outputJson, + JSON.stringify(outputJson, null, 2), + "utf8", + ); + console.log(`Wrote comparison JSON to ${options.outputJson}`); + } + + if (!outputJson.summary.passed) { + process.exit(1); + } +} + +try { + main(); +} catch (error) { + console.error(error instanceof Error ? error.message : String(error)); + process.exit(1); +} diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js new file mode 100644 index 0000000000..3c5dc0eb0b --- /dev/null +++ b/scripts/finalize-playback-matrix.js @@ -0,0 +1,418 @@ +#!/usr/bin/env node + +import { spawnSync } from "node:child_process"; +import fs from "node:fs"; +import path from "node:path"; + +function parseArgs(argv) { + const options = { + inputs: [], + outputDir: null, + requireFormats: [], + targetFps: 60, + maxScrubP95Ms: 40, + maxStartupMs: 250, + analyze: true, + publishTarget: null, + compareBaselineInputs: [], + outputJson: null, + allowFpsDrop: 2, + allowStartupIncreaseMs: 25, + allowScrubP95IncreaseMs: 5, + allowMissingCandidate: false, + failOnCandidateOnly: false, + minSamplesPerRow: 1, + failOnParseErrors: false, + failOnZeroCompared: false, + failOnSkippedFiles: false, + }; + + for (let i = 2; i < argv.length; i++) { + const arg = argv[i]; + if (arg === "--") continue; + if (arg === "--help" || arg === "-h") { + options.help = true; + continue; + } + if (arg === "--input" || arg === "-i") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --input"); + options.inputs.push(path.resolve(value)); + continue; + } + if (arg === "--output-dir" || arg === "-o") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --output-dir"); + options.outputDir = path.resolve(value); + continue; + } + if (arg === "--require-formats") { + const value = argv[++i] ?? ""; + options.requireFormats = value + .split(",") + .map((entry) => entry.trim().toLowerCase()) + .filter(Boolean); + continue; + } + if (arg === "--target-fps") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value <= 0) { + throw new Error("Invalid --target-fps value"); + } + options.targetFps = value; + continue; + } + if (arg === "--max-scrub-p95-ms") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value <= 0) { + throw new Error("Invalid --max-scrub-p95-ms value"); + } + options.maxScrubP95Ms = value; + continue; + } + if (arg === "--max-startup-ms") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value <= 0) { + throw new Error("Invalid --max-startup-ms value"); + } + options.maxStartupMs = value; + continue; + } + if (arg === "--skip-analyze") { + options.analyze = false; + continue; + } + if (arg === "--publish-target") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --publish-target"); + options.publishTarget = path.resolve(value); + continue; + } + if (arg === "--output-json") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --output-json"); + options.outputJson = path.resolve(value); + continue; + } + if (arg === "--compare-baseline") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --compare-baseline"); + options.compareBaselineInputs.push(path.resolve(value)); + continue; + } + if (arg === "--allow-fps-drop") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value < 0) { + throw new Error("Invalid --allow-fps-drop value"); + } + options.allowFpsDrop = value; + continue; + } + if (arg === "--allow-startup-increase-ms") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value < 0) { + throw new Error("Invalid --allow-startup-increase-ms value"); + } + options.allowStartupIncreaseMs = value; + continue; + } + if (arg === "--allow-scrub-p95-increase-ms") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value < 0) { + throw new Error("Invalid --allow-scrub-p95-increase-ms value"); + } + options.allowScrubP95IncreaseMs = value; + continue; + } + if (arg === "--allow-missing-candidate") { + options.allowMissingCandidate = true; + continue; + } + if (arg === "--fail-on-candidate-only") { + options.failOnCandidateOnly = true; + continue; + } + if (arg === "--min-samples-per-row") { + const value = Number.parseInt(argv[++i] ?? "", 10); + if (!Number.isFinite(value) || value < 1) { + throw new Error("Invalid --min-samples-per-row value"); + } + options.minSamplesPerRow = value; + continue; + } + if (arg === "--fail-on-parse-errors") { + options.failOnParseErrors = true; + continue; + } + if (arg === "--fail-on-zero-compared") { + options.failOnZeroCompared = true; + continue; + } + if (arg === "--fail-on-skipped-files") { + options.failOnSkippedFiles = true; + continue; + } + throw new Error(`Unknown argument: ${arg}`); + } + + return options; +} + +function usage() { + console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--output-json ] [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--compare-baseline ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--min-samples-per-row 1] [--fail-on-parse-errors] [--fail-on-zero-compared] [--fail-on-skipped-files] [--publish-target ] + +Generates aggregate markdown, status markdown, validation JSON, and bottleneck analysis for collected playback matrix outputs. Optionally compares candidate inputs against baseline inputs and fails on regressions.`); +} + +function run(command, args) { + const result = spawnSync(command, args, { stdio: "inherit" }); + if (result.status !== 0) { + throw new Error(`Command failed: ${command} ${args.join(" ")}`); + } +} + +function readCommandOutput(command, args) { + const result = spawnSync(command, args, { encoding: "utf8" }); + if (result.status !== 0) { + return null; + } + return result.stdout.trim() || null; +} + +function main() { + const options = parseArgs(process.argv); + if (options.help) { + usage(); + return; + } + if (options.inputs.length === 0) { + throw new Error("At least one --input is required"); + } + if (!options.outputDir) { + throw new Error("--output-dir is required"); + } + if (!fs.existsSync(options.outputDir)) { + fs.mkdirSync(options.outputDir, { recursive: true }); + } + + const aggregatePath = path.join( + options.outputDir, + "playback-benchmark-aggregate.md", + ); + const statusPath = path.join(options.outputDir, "playback-matrix-status.md"); + const validationPath = path.join( + options.outputDir, + "playback-matrix-validation.json", + ); + const bottleneckPath = path.join( + options.outputDir, + "playback-bottlenecks.md", + ); + const bottleneckJsonPath = path.join( + options.outputDir, + "playback-bottlenecks.json", + ); + const comparisonPath = path.join(options.outputDir, "playback-comparison.md"); + const comparisonJsonPath = path.join( + options.outputDir, + "playback-comparison.json", + ); + const summaryJsonPath = + options.outputJson ?? + path.join(options.outputDir, "playback-finalize-summary.json"); + + const aggregateArgs = ["scripts/aggregate-playback-benchmarks.js"]; + const statusArgs = ["scripts/build-playback-matrix-report.js"]; + const validateArgs = [ + "scripts/validate-playback-matrix.js", + "--output-json", + validationPath, + ]; + + for (const input of options.inputs) { + aggregateArgs.push("--input", input); + statusArgs.push("--input", input); + validateArgs.push("--input", input); + } + + aggregateArgs.push("--output", aggregatePath); + statusArgs.push("--output", statusPath); + if (options.requireFormats.length > 0) { + validateArgs.push("--require-formats", options.requireFormats.join(",")); + } + + run("node", aggregateArgs); + run("node", statusArgs); + run("node", validateArgs); + if (options.analyze) { + const analyzeArgs = ["scripts/analyze-playback-matrix-bottlenecks.js"]; + for (const input of options.inputs) { + analyzeArgs.push("--input", input); + } + analyzeArgs.push( + "--output", + bottleneckPath, + "--output-json", + bottleneckJsonPath, + "--target-fps", + String(options.targetFps), + "--max-scrub-p95-ms", + String(options.maxScrubP95Ms), + "--max-startup-ms", + String(options.maxStartupMs), + ); + run("node", analyzeArgs); + } + if (options.compareBaselineInputs.length > 0) { + const compareArgs = ["scripts/compare-playback-benchmark-runs.js"]; + for (const baselineInput of options.compareBaselineInputs) { + compareArgs.push("--baseline", baselineInput); + } + for (const candidateInput of options.inputs) { + compareArgs.push("--candidate", candidateInput); + } + compareArgs.push( + "--output", + comparisonPath, + "--output-json", + comparisonJsonPath, + "--allow-fps-drop", + String(options.allowFpsDrop), + "--allow-startup-increase-ms", + String(options.allowStartupIncreaseMs), + "--allow-scrub-p95-increase-ms", + String(options.allowScrubP95IncreaseMs), + ); + if (options.allowMissingCandidate) { + compareArgs.push("--allow-missing-candidate"); + } + if (options.failOnCandidateOnly) { + compareArgs.push("--fail-on-candidate-only"); + } + if (options.failOnParseErrors) { + compareArgs.push("--fail-on-parse-errors"); + } + if (options.failOnZeroCompared) { + compareArgs.push("--fail-on-zero-compared"); + } + if (options.failOnSkippedFiles) { + compareArgs.push("--fail-on-skipped-files"); + } + compareArgs.push("--min-samples-per-row", String(options.minSamplesPerRow)); + run("node", compareArgs); + } + const validation = JSON.parse(fs.readFileSync(validationPath, "utf8")); + const comparison = + options.compareBaselineInputs.length > 0 + ? JSON.parse(fs.readFileSync(comparisonJsonPath, "utf8")) + : null; + const gitBranch = readCommandOutput("git", [ + "rev-parse", + "--abbrev-ref", + "HEAD", + ]); + const gitCommit = readCommandOutput("git", ["rev-parse", "HEAD"]); + const summary = { + generatedAt: new Date().toISOString(), + inputs: options.inputs, + outputDir: options.outputDir, + git: { + branch: gitBranch, + commit: gitCommit, + }, + artifacts: { + aggregatePath, + statusPath, + validationPath, + bottleneckPath: options.analyze ? bottleneckPath : null, + bottleneckJsonPath: options.analyze ? bottleneckJsonPath : null, + comparisonPath: + options.compareBaselineInputs.length > 0 ? comparisonPath : null, + comparisonJsonPath: + options.compareBaselineInputs.length > 0 ? comparisonJsonPath : null, + }, + settings: { + requireFormats: options.requireFormats, + targetFps: options.targetFps, + maxScrubP95Ms: options.maxScrubP95Ms, + maxStartupMs: options.maxStartupMs, + analyze: options.analyze, + publishTarget: options.publishTarget, + compareBaselineInputs: options.compareBaselineInputs, + allowFpsDrop: options.allowFpsDrop, + allowStartupIncreaseMs: options.allowStartupIncreaseMs, + allowScrubP95IncreaseMs: options.allowScrubP95IncreaseMs, + allowMissingCandidate: options.allowMissingCandidate, + failOnCandidateOnly: options.failOnCandidateOnly, + minSamplesPerRow: options.minSamplesPerRow, + failOnParseErrors: options.failOnParseErrors, + failOnZeroCompared: options.failOnZeroCompared, + failOnSkippedFiles: options.failOnSkippedFiles, + }, + results: { + validationPassed: validation.passed === true, + comparisonPassed: comparison ? comparison.summary?.passed === true : null, + comparisonComparedRows: comparison?.summary?.comparedRows ?? null, + comparisonRegressions: comparison?.summary?.regressions ?? null, + comparisonMissingCandidateRows: + comparison?.summary?.missingCandidateRows ?? null, + comparisonCandidateOnlyRows: + comparison?.summary?.candidateOnlyRows ?? null, + comparisonInsufficientSampleRows: + comparison?.summary?.insufficientSampleRows ?? null, + comparisonFailureReasons: comparison?.summary?.failureReasons ?? null, + comparisonGateOutcomes: comparison?.summary?.gateOutcomes ?? null, + comparisonFileStats: comparison?.fileStats ?? null, + }, + }; + fs.writeFileSync(summaryJsonPath, JSON.stringify(summary, null, 2), "utf8"); + if (options.publishTarget) { + const publishArgs = [ + "scripts/publish-playback-matrix-summary.js", + "--aggregate-md", + aggregatePath, + "--status-md", + statusPath, + "--validation-json", + validationPath, + "--target", + options.publishTarget, + ]; + if (options.analyze) { + publishArgs.push("--bottlenecks-md", bottleneckPath); + } + if (options.compareBaselineInputs.length > 0) { + publishArgs.push( + "--comparison-md", + comparisonPath, + "--comparison-json", + comparisonJsonPath, + ); + } + publishArgs.push("--finalize-summary-json", summaryJsonPath); + run("node", publishArgs); + } + + console.log(`Aggregate markdown: ${aggregatePath}`); + console.log(`Status markdown: ${statusPath}`); + console.log(`Validation JSON: ${validationPath}`); + if (options.analyze) { + console.log(`Bottleneck analysis: ${bottleneckPath}`); + console.log(`Bottleneck analysis JSON: ${bottleneckJsonPath}`); + } + if (options.publishTarget) { + console.log(`Published target: ${options.publishTarget}`); + } + if (options.compareBaselineInputs.length > 0) { + console.log(`Comparison report: ${comparisonPath}`); + console.log(`Comparison JSON: ${comparisonJsonPath}`); + } + console.log(`Finalize summary JSON: ${summaryJsonPath}`); +} + +try { + main(); +} catch (error) { + console.error(error instanceof Error ? error.message : String(error)); + process.exit(1); +} diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js new file mode 100644 index 0000000000..55f6b5b4cc --- /dev/null +++ b/scripts/publish-playback-matrix-summary.js @@ -0,0 +1,267 @@ +#!/usr/bin/env node + +import fs from "node:fs"; +import path from "node:path"; + +function parseArgs(argv) { + const options = { + aggregateMd: null, + statusMd: null, + bottlenecksMd: null, + comparisonMd: null, + comparisonJson: null, + finalizeSummaryJson: null, + validationJson: null, + target: path.resolve("crates/editor/PLAYBACK-BENCHMARKS.md"), + }; + + for (let i = 2; i < argv.length; i++) { + const arg = argv[i]; + if (arg === "--") continue; + if (arg === "--help" || arg === "-h") { + options.help = true; + continue; + } + if (arg === "--aggregate-md") { + options.aggregateMd = path.resolve(argv[++i] ?? ""); + continue; + } + if (arg === "--status-md") { + options.statusMd = path.resolve(argv[++i] ?? ""); + continue; + } + if (arg === "--bottlenecks-md") { + options.bottlenecksMd = path.resolve(argv[++i] ?? ""); + continue; + } + if (arg === "--comparison-md") { + options.comparisonMd = path.resolve(argv[++i] ?? ""); + continue; + } + if (arg === "--comparison-json") { + options.comparisonJson = path.resolve(argv[++i] ?? ""); + continue; + } + if (arg === "--finalize-summary-json") { + options.finalizeSummaryJson = path.resolve(argv[++i] ?? ""); + continue; + } + if (arg === "--validation-json") { + options.validationJson = path.resolve(argv[++i] ?? ""); + continue; + } + if (arg === "--target") { + options.target = path.resolve(argv[++i] ?? ""); + continue; + } + throw new Error(`Unknown argument: ${arg}`); + } + + return options; +} + +function usage() { + console.log(`Usage: node scripts/publish-playback-matrix-summary.js --aggregate-md --status-md --validation-json [--bottlenecks-md ] [--comparison-md ] [--comparison-json ] [--finalize-summary-json ] [--target ] + +Prepends a matrix summary section into PLAYBACK-BENCHMARKS.md benchmark history region.`); +} + +function ensureFile(filePath, label) { + if (!filePath || !fs.existsSync(filePath)) { + throw new Error(`${label} file not found: ${filePath ?? "undefined"}`); + } +} + +function buildSummarySection( + aggregateMd, + statusMd, + validationJson, + bottlenecksMd, + comparisonMd, + comparisonJson, + finalizeSummaryJson, +) { + const now = new Date().toISOString(); + const validation = JSON.parse(validationJson); + const status = validation.passed ? "✅ MATRIX PASS" : "❌ MATRIX FAIL"; + + let markdown = ""; + markdown += `### Matrix Summary Run: ${now}\n\n`; + markdown += `**Validation:** ${status}\n\n`; + markdown += `- Validated cells: ${validation.validatedCells}\n`; + markdown += `- Observed cells: ${validation.observedCells}\n`; + markdown += `- Missing cells: ${validation.missingCells?.length ?? 0}\n`; + markdown += `- Format failures: ${validation.formatFailures?.length ?? 0}\n\n`; + if (comparisonJson) { + const comparison = JSON.parse(comparisonJson); + const comparisonPassed = comparison.summary?.passed === true; + markdown += `- Comparison gate: ${comparisonPassed ? "✅ PASS" : "❌ FAIL"}\n`; + markdown += `- Comparison regressions: ${comparison.summary?.regressions ?? "n/a"}\n`; + markdown += `- Missing candidate rows: ${comparison.summary?.missingCandidateRows ?? "n/a"}\n\n`; + markdown += `- Candidate-only rows: ${comparison.summary?.candidateOnlyRows ?? "n/a"}\n\n`; + markdown += `- Insufficient sample rows: ${comparison.summary?.insufficientSampleRows ?? "n/a"}\n`; + markdown += `- Minimum samples per row: ${comparison.tolerance?.minSamplesPerRow ?? "n/a"}\n\n`; + markdown += `- Missing candidate policy: ${comparison.tolerance?.allowMissingCandidate ? "allow" : "fail"}\n`; + markdown += `- Candidate-only policy: ${comparison.tolerance?.failOnCandidateOnly ? "fail" : "allow"}\n\n`; + markdown += `- Parse error policy: ${comparison.tolerance?.failOnParseErrors ? "fail" : "allow"}\n`; + markdown += `- Zero-compare policy: ${comparison.tolerance?.failOnZeroCompared ? "fail" : "allow"}\n`; + markdown += `- Skipped-file policy: ${comparison.tolerance?.failOnSkippedFiles ? "fail" : "allow"}\n`; + markdown += `- Baseline parse errors: ${comparison.fileStats?.baseline?.parseErrors?.length ?? "n/a"}\n`; + markdown += `- Candidate parse errors: ${comparison.fileStats?.candidate?.parseErrors?.length ?? "n/a"}\n\n`; + markdown += `- Baseline skipped files: ${comparison.fileStats?.baseline?.skippedFiles ?? "n/a"}\n`; + markdown += `- Candidate skipped files: ${comparison.fileStats?.candidate?.skippedFiles ?? "n/a"}\n`; + markdown += `- Baseline skipped (no reports): ${comparison.fileStats?.baseline?.skippedNoReports ?? "n/a"}\n`; + markdown += `- Candidate skipped (no reports): ${comparison.fileStats?.candidate?.skippedNoReports ?? "n/a"}\n`; + markdown += `- Baseline skipped (no usable metrics): ${comparison.fileStats?.baseline?.skippedNoUsableMetrics ?? "n/a"}\n`; + markdown += `- Candidate skipped (no usable metrics): ${comparison.fileStats?.candidate?.skippedNoUsableMetrics ?? "n/a"}\n\n`; + const failureReasons = Array.isArray(comparison.summary?.failureReasons) + ? comparison.summary.failureReasons + : []; + if (failureReasons.length > 0) { + markdown += `- Comparison failure reasons: ${failureReasons.join(", ")}\n\n`; + } + } + if (finalizeSummaryJson) { + const finalizeSummary = JSON.parse(finalizeSummaryJson); + markdown += `- Finalize source branch: ${finalizeSummary.git?.branch ?? "n/a"}\n`; + markdown += `- Finalize source commit: ${finalizeSummary.git?.commit ?? "n/a"}\n`; + markdown += `- Finalize validation passed: ${finalizeSummary.results?.validationPassed === true ? "true" : "false"}\n`; + if (finalizeSummary.results?.comparisonPassed !== null) { + markdown += `- Finalize comparison passed: ${finalizeSummary.results?.comparisonPassed === true ? "true" : "false"}\n`; + } + if (finalizeSummary.results?.comparisonComparedRows !== null) { + markdown += `- Finalize compared rows: ${finalizeSummary.results?.comparisonComparedRows}\n`; + markdown += `- Finalize comparison regressions: ${finalizeSummary.results?.comparisonRegressions ?? "n/a"}\n`; + markdown += `- Finalize missing candidate rows: ${finalizeSummary.results?.comparisonMissingCandidateRows ?? "n/a"}\n`; + markdown += `- Finalize candidate-only rows: ${finalizeSummary.results?.comparisonCandidateOnlyRows ?? "n/a"}\n`; + markdown += `- Finalize insufficient sample rows: ${finalizeSummary.results?.comparisonInsufficientSampleRows ?? "n/a"}\n`; + } + const finalizeFileStats = finalizeSummary.results?.comparisonFileStats; + if (finalizeFileStats?.baseline || finalizeFileStats?.candidate) { + markdown += `- Finalize baseline parse errors: ${finalizeFileStats?.baseline?.parseErrors?.length ?? "n/a"}\n`; + markdown += `- Finalize candidate parse errors: ${finalizeFileStats?.candidate?.parseErrors?.length ?? "n/a"}\n`; + } + const finalizeFailureReasons = Array.isArray( + finalizeSummary.results?.comparisonFailureReasons, + ) + ? finalizeSummary.results.comparisonFailureReasons + : []; + if (finalizeFailureReasons.length > 0) { + markdown += `- Finalize comparison failure reasons: ${finalizeFailureReasons.join(", ")}\n`; + } + markdown += "\n"; + } + + if ((validation.missingCells?.length ?? 0) > 0) { + markdown += "**Missing Cells**\n"; + for (const cell of validation.missingCells) { + markdown += `- ${cell.platform}:${cell.gpu}:${cell.scenario}\n`; + } + markdown += "\n"; + } + + if ((validation.formatFailures?.length ?? 0) > 0) { + markdown += "**Format Failures**\n"; + for (const failure of validation.formatFailures) { + markdown += `- ${failure.platform}:${failure.gpu}:${failure.scenario} missing ${failure.requiredFormat}\n`; + } + markdown += "\n"; + } + + markdown += "
\nMatrix Status Report\n\n"; + markdown += `${statusMd.trim()}\n\n`; + markdown += "
\n\n"; + + markdown += "
\nAggregate Benchmark Report\n\n"; + markdown += `${aggregateMd.trim()}\n\n`; + markdown += "
\n\n"; + + if (bottlenecksMd) { + markdown += "
\nBottleneck Analysis\n\n"; + markdown += `${bottlenecksMd.trim()}\n\n`; + markdown += "
\n\n"; + } + if (comparisonMd) { + markdown += + "
\nBaseline vs Candidate Comparison\n\n"; + markdown += `${comparisonMd.trim()}\n\n`; + markdown += "
\n\n"; + } + + return markdown; +} + +function writeToBenchmarkHistory(targetFile, summaryMd) { + const markerStart = ""; + const markerEnd = ""; + const current = fs.readFileSync(targetFile, "utf8"); + const start = current.indexOf(markerStart); + const end = current.indexOf(markerEnd); + if (start === -1 || end === -1 || start >= end) { + throw new Error(`Could not find benchmark result markers in ${targetFile}`); + } + + const insertPos = start + markerStart.length; + const updated = + current.slice(0, insertPos) + "\n\n" + summaryMd + current.slice(end); + fs.writeFileSync(targetFile, updated, "utf8"); +} + +function main() { + const options = parseArgs(process.argv); + if (options.help) { + usage(); + return; + } + + ensureFile(options.aggregateMd, "Aggregate markdown"); + ensureFile(options.statusMd, "Status markdown"); + ensureFile(options.validationJson, "Validation JSON"); + if (options.bottlenecksMd) { + ensureFile(options.bottlenecksMd, "Bottlenecks markdown"); + } + if (options.comparisonMd) { + ensureFile(options.comparisonMd, "Comparison markdown"); + } + if (options.comparisonJson) { + ensureFile(options.comparisonJson, "Comparison JSON"); + } + if (options.finalizeSummaryJson) { + ensureFile(options.finalizeSummaryJson, "Finalize summary JSON"); + } + ensureFile(options.target, "Target"); + + const aggregateMd = fs.readFileSync(options.aggregateMd, "utf8"); + const statusMd = fs.readFileSync(options.statusMd, "utf8"); + const validationJson = fs.readFileSync(options.validationJson, "utf8"); + const bottlenecksMd = options.bottlenecksMd + ? fs.readFileSync(options.bottlenecksMd, "utf8") + : null; + const comparisonMd = options.comparisonMd + ? fs.readFileSync(options.comparisonMd, "utf8") + : null; + const comparisonJson = options.comparisonJson + ? fs.readFileSync(options.comparisonJson, "utf8") + : null; + const finalizeSummaryJson = options.finalizeSummaryJson + ? fs.readFileSync(options.finalizeSummaryJson, "utf8") + : null; + const summaryMd = buildSummarySection( + aggregateMd, + statusMd, + validationJson, + bottlenecksMd, + comparisonMd, + comparisonJson, + finalizeSummaryJson, + ); + writeToBenchmarkHistory(options.target, summaryMd); + console.log(`Published matrix summary into ${options.target}`); +} + +try { + main(); +} catch (error) { + console.error(error instanceof Error ? error.message : String(error)); + process.exit(1); +} diff --git a/scripts/run-playback-benchmark-matrix.js b/scripts/run-playback-benchmark-matrix.js new file mode 100644 index 0000000000..626c19a7ba --- /dev/null +++ b/scripts/run-playback-benchmark-matrix.js @@ -0,0 +1,255 @@ +#!/usr/bin/env node + +import { spawnSync } from "node:child_process"; +import fs from "node:fs"; +import path from "node:path"; + +function parseArgs(argv) { + const options = { + platform: null, + gpu: null, + outputDir: null, + fps: 60, + startupThresholdMs: 250, + recordingPath: null, + inputDir: null, + validate: true, + requireFormats: [], + scenarios: ["full", "scrub"], + }; + + for (let i = 2; i < argv.length; i++) { + const arg = argv[i]; + if (arg === "--") { + continue; + } + if (arg === "--help" || arg === "-h") { + options.help = true; + continue; + } + if (arg === "--platform") { + options.platform = argv[++i] ?? null; + continue; + } + if (arg === "--gpu") { + options.gpu = argv[++i] ?? null; + continue; + } + if (arg === "--output-dir") { + options.outputDir = argv[++i] ?? null; + continue; + } + if (arg === "--fps") { + const value = Number.parseInt(argv[++i] ?? "", 10); + if (!Number.isFinite(value) || value <= 0) { + throw new Error("Invalid --fps value"); + } + options.fps = value; + continue; + } + if (arg === "--startup-threshold-ms") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value <= 0) { + throw new Error("Invalid --startup-threshold-ms value"); + } + options.startupThresholdMs = value; + continue; + } + if (arg === "--recording-path") { + options.recordingPath = argv[++i] ?? null; + continue; + } + if (arg === "--input-dir") { + options.inputDir = argv[++i] ?? null; + continue; + } + if (arg === "--skip-validate") { + options.validate = false; + continue; + } + if (arg === "--require-formats") { + const value = argv[++i] ?? ""; + options.requireFormats = value + .split(",") + .map((entry) => entry.trim().toLowerCase()) + .filter(Boolean); + continue; + } + if (arg === "--scenarios") { + const value = argv[++i] ?? ""; + const scenarios = value + .split(",") + .map((entry) => entry.trim().toLowerCase()) + .filter(Boolean); + if (scenarios.length === 0) { + throw new Error("Invalid --scenarios value"); + } + options.scenarios = scenarios; + continue; + } + throw new Error(`Unknown argument: ${arg}`); + } + + return options; +} + +function usage() { + console.log(`Usage: node scripts/run-playback-benchmark-matrix.js --platform --gpu --output-dir [--fps 60] [--startup-threshold-ms 250] [--recording-path ] [--input-dir ] [--require-formats mp4,fragmented] [--scenarios full,scrub] + +Runs playback benchmark matrix scenarios and writes JSON outputs. + +Required: + --platform Platform label (for notes metadata) + --gpu GPU label (for notes metadata) + --output-dir Directory for benchmark JSON outputs + +Optional: + --fps FPS for benchmark runs (default: 60) + --startup-threshold-ms Startup-to-first-frame threshold in ms (default: 250) + --recording-path Specific recording path + --input-dir Recording discovery directory + --require-formats Required formats for local validation (comma-separated) + --scenarios Scenarios to run (comma-separated; default: full,scrub) + --skip-validate Skip post-run validation`); +} + +function run(command, args) { + const result = spawnSync(command, args, { stdio: "inherit" }); + if (result.status !== 0) { + throw new Error(`Command failed: ${command} ${args.join(" ")}`); + } +} + +function scenarioOutputPath(outputDir, platform, gpu, scenario) { + const stamp = new Date().toISOString().replace(/[:.]/g, "-"); + return path.join(outputDir, `${stamp}-${platform}-${gpu}-${scenario}.json`); +} + +function scenarioArgs(options, scenario) { + const jsonOutput = scenarioOutputPath( + options.outputDir, + options.platform, + options.gpu, + scenario, + ); + const notes = `platform=${options.platform} gpu=${options.gpu} scenario=${scenario}`; + + const args = [ + "run", + "-p", + "cap-recording", + "--example", + "playback-test-runner", + "--", + scenario, + "--fps", + String(options.fps), + "--startup-threshold-ms", + String(options.startupThresholdMs), + "--json-output", + jsonOutput, + "--notes", + notes, + ]; + + if (options.recordingPath) { + args.push("--recording-path", options.recordingPath); + } else if (options.inputDir) { + args.push("--input-dir", options.inputDir); + } + + return args; +} + +function validateOptions(options) { + if (!options.platform || !options.gpu || !options.outputDir) { + throw new Error( + "Missing required options: --platform, --gpu, --output-dir", + ); + } + const validScenarios = new Set([ + "full", + "scrub", + "decoder", + "playback", + "audio-sync", + "camera-sync", + ]); + for (const scenario of options.scenarios) { + if (!validScenarios.has(scenario)) { + throw new Error(`Unsupported scenario: ${scenario}`); + } + } + + const absoluteOutputDir = path.resolve(options.outputDir); + options.outputDir = absoluteOutputDir; + if (!fs.existsSync(absoluteOutputDir)) { + fs.mkdirSync(absoluteOutputDir, { recursive: true }); + } +} + +function main() { + const options = parseArgs(process.argv); + if (options.help) { + usage(); + return; + } + + validateOptions(options); + + console.log( + `Running matrix for platform=${options.platform} gpu=${options.gpu}`, + ); + for (const scenario of options.scenarios) { + run("cargo", scenarioArgs(options, scenario)); + } + + const aggregatePath = path.join( + options.outputDir, + `${options.platform}-${options.gpu}-aggregate.md`, + ); + run("node", [ + "scripts/aggregate-playback-benchmarks.js", + "--input", + options.outputDir, + "--output", + aggregatePath, + ]); + console.log(`Aggregate markdown: ${aggregatePath}`); + + if (options.validate) { + const validationJsonPath = path.join( + options.outputDir, + `${options.platform}-${options.gpu}-validation.json`, + ); + const validateArgs = [ + "scripts/validate-playback-matrix.js", + "--input", + options.outputDir, + "--no-default-matrix", + "--output-json", + validationJsonPath, + ]; + for (const scenario of options.scenarios) { + validateArgs.push( + "--require-cell", + `${options.platform}:${options.gpu}:${scenario}`, + ); + } + + if (options.requireFormats.length > 0) { + validateArgs.push("--require-formats", options.requireFormats.join(",")); + } + + run("node", validateArgs); + console.log("Matrix run validation passed"); + console.log(`Validation JSON: ${validationJsonPath}`); + } +} + +try { + main(); +} catch (error) { + console.error(error instanceof Error ? error.message : String(error)); + process.exit(1); +} diff --git a/scripts/validate-playback-matrix.js b/scripts/validate-playback-matrix.js new file mode 100644 index 0000000000..b06f01e006 --- /dev/null +++ b/scripts/validate-playback-matrix.js @@ -0,0 +1,259 @@ +#!/usr/bin/env node + +import fs from "node:fs"; +import path from "node:path"; + +const DEFAULT_REQUIRED_CELLS = [ + { platform: "macos-13", gpu: "apple-silicon", scenario: "full" }, + { platform: "macos-13", gpu: "apple-silicon", scenario: "scrub" }, + { platform: "windows-11", gpu: "nvidia-discrete", scenario: "full" }, + { platform: "windows-11", gpu: "nvidia-discrete", scenario: "scrub" }, + { platform: "windows-11", gpu: "amd-discrete", scenario: "full" }, + { platform: "windows-11", gpu: "amd-discrete", scenario: "scrub" }, + { platform: "windows-11", gpu: "integrated", scenario: "full" }, + { platform: "windows-11", gpu: "integrated", scenario: "scrub" }, +]; + +function parseArgs(argv) { + const options = { + inputs: [], + requiredCells: [], + requiredFormats: [], + useDefaultMatrix: true, + outputJson: null, + }; + + for (let i = 2; i < argv.length; i++) { + const arg = argv[i]; + if (arg === "--") { + continue; + } + if (arg === "--help" || arg === "-h") { + options.help = true; + continue; + } + if (arg === "--input" || arg === "-i") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --input"); + options.inputs.push(path.resolve(value)); + continue; + } + if (arg === "--require-cell") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --require-cell"); + options.requiredCells.push(parseCell(value)); + options.useDefaultMatrix = false; + continue; + } + if (arg === "--require-formats") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --require-formats"); + options.requiredFormats = value + .split(",") + .map((entry) => entry.trim().toLowerCase()) + .filter(Boolean); + continue; + } + if (arg === "--no-default-matrix") { + options.useDefaultMatrix = false; + continue; + } + if (arg === "--output-json") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --output-json"); + options.outputJson = path.resolve(value); + continue; + } + throw new Error(`Unknown argument: ${arg}`); + } + + return options; +} + +function parseCell(value) { + const [platform, gpu, scenario] = value.split(":"); + if (!platform || !gpu || !scenario) { + throw new Error( + `Invalid --require-cell format: ${value}. Expected platform:gpu:scenario`, + ); + } + return { platform, gpu, scenario }; +} + +function printUsage() { + console.log(`Usage: node scripts/validate-playback-matrix.js --input [--input ...] [--require-cell platform:gpu:scenario ...] [--require-formats mp4,fragmented] [--output-json ] + +Validates that required benchmark matrix cells are present in playback benchmark JSON results. + +Options: + --input, -i JSON file or directory containing JSON files (repeatable) + --require-cell Required cell as platform:gpu:scenario (repeatable) + --require-formats Comma-separated required formats per cell + --no-default-matrix Disable built-in required matrix + --output-json Write validation result JSON file + --help, -h Show help`); +} + +function collectJsonFiles(targetPath) { + if (!fs.existsSync(targetPath)) { + throw new Error(`Input path does not exist: ${targetPath}`); + } + const stats = fs.statSync(targetPath); + if (stats.isFile()) { + return targetPath.endsWith(".json") ? [targetPath] : []; + } + const files = []; + for (const entry of fs.readdirSync(targetPath, { withFileTypes: true })) { + const fullPath = path.join(targetPath, entry.name); + if (entry.isDirectory()) { + files.push(...collectJsonFiles(fullPath)); + } else if (entry.isFile() && entry.name.endsWith(".json")) { + files.push(fullPath); + } + } + return files; +} + +function parseNotes(notes) { + if (!notes) return {}; + const result = {}; + for (const token of notes.split(/\s+/)) { + if (!token.includes("=")) continue; + const [key, ...rest] = token.split("="); + const value = rest.join("="); + if (!key || !value) continue; + result[key.trim()] = value.trim(); + } + return result; +} + +function keyForCell(cell) { + return `${cell.platform}|${cell.gpu}|${cell.scenario}`; +} + +function collectObservedCells(files) { + const observed = new Map(); + for (const filePath of files) { + const parsed = JSON.parse(fs.readFileSync(filePath, "utf8")); + const notes = parseNotes(parsed.notes); + const platform = notes.platform ?? "unknown"; + const gpu = notes.gpu ?? "unknown"; + const scenario = notes.scenario ?? "unspecified"; + const key = keyForCell({ platform, gpu, scenario }); + if (!observed.has(key)) { + observed.set(key, { + platform, + gpu, + scenario, + formats: new Set(), + files: new Set(), + }); + } + const entry = observed.get(key); + entry.files.add(filePath); + const reports = Array.isArray(parsed.reports) ? parsed.reports : []; + for (const report of reports) { + entry.formats.add(report.is_fragmented ? "fragmented" : "mp4"); + } + } + return observed; +} + +function main() { + const options = parseArgs(process.argv); + if (options.help) { + printUsage(); + return; + } + if (options.inputs.length === 0) { + throw new Error("At least one --input is required"); + } + + const files = new Set(); + for (const input of options.inputs) { + for (const filePath of collectJsonFiles(input)) { + files.add(filePath); + } + } + if (files.size === 0) { + throw new Error("No JSON files found"); + } + + const requiredCells = options.useDefaultMatrix + ? [...DEFAULT_REQUIRED_CELLS, ...options.requiredCells] + : options.requiredCells; + if (requiredCells.length === 0) { + throw new Error("No required matrix cells configured"); + } + + const observed = collectObservedCells([...files]); + const missingCells = []; + const formatFailures = []; + + for (const cell of requiredCells) { + const key = keyForCell(cell); + const observedCell = observed.get(key); + if (!observedCell) { + missingCells.push(cell); + continue; + } + for (const requiredFormat of options.requiredFormats) { + if (!observedCell.formats.has(requiredFormat)) { + formatFailures.push({ + ...cell, + requiredFormat, + observedFormats: [...observedCell.formats], + }); + } + } + } + + const validationResult = { + validatedCells: requiredCells.length, + observedCells: observed.size, + requiredFormats: options.requiredFormats, + missingCells, + formatFailures, + passed: missingCells.length === 0 && formatFailures.length === 0, + }; + + if (options.outputJson) { + fs.writeFileSync( + options.outputJson, + JSON.stringify(validationResult, null, 2), + ); + console.log(`Validation JSON: ${options.outputJson}`); + } + + console.log(`Validated ${validationResult.validatedCells} required cells`); + console.log(`Observed ${validationResult.observedCells} unique cells`); + + if (missingCells.length > 0) { + console.log("Missing required cells:"); + for (const cell of missingCells) { + console.log(` - ${cell.platform}:${cell.gpu}:${cell.scenario}`); + } + } + + if (formatFailures.length > 0) { + console.log("Missing required formats:"); + for (const failure of formatFailures) { + console.log( + ` - ${failure.platform}:${failure.gpu}:${failure.scenario} missing ${failure.requiredFormat} (observed: ${failure.observedFormats.join(", ") || "none"})`, + ); + } + } + + if (!validationResult.passed) { + process.exit(1); + } + + console.log("Matrix validation passed"); +} + +try { + main(); +} catch (error) { + console.error(error instanceof Error ? error.message : String(error)); + process.exit(1); +}