diff --git a/Cargo.lock b/Cargo.lock
index c4f8665e90..6076dc378a 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1311,6 +1311,7 @@ dependencies = [
  "ringbuf",
  "sentry",
  "serde",
+ "serde_json",
  "specta",
  "tokio",
  "tokio-util",
diff --git a/apps/desktop/src-tauri/src/lib.rs b/apps/desktop/src-tauri/src/lib.rs
index 00dc6903b1..795fbe7710 100644
--- a/apps/desktop/src-tauri/src/lib.rs
+++ b/apps/desktop/src-tauri/src/lib.rs
@@ -1915,11 +1915,29 @@ async fn set_playhead_position(
     editor_instance: WindowEditorInstance,
     frame_number: u32,
 ) -> Result<(), String> {
-    editor_instance
-        .modify_and_emit_state(|state| {
-            state.playhead_position = frame_number;
-        })
-        .await;
+    let state_changed = {
+        let state = editor_instance.state.lock().await;
+        state.playhead_position != frame_number
+    };
+
+    if state_changed {
+        editor_instance
+            .modify_and_emit_state(|state| {
+                state.playhead_position = frame_number;
+            })
+            .await;
+    }
+
+    let playback_handle = if state_changed {
+        let state = editor_instance.state.lock().await;
+        state.playback_task.clone()
+    } else {
+        None
+    };
+
+    if let Some(handle) = playback_handle {
+        handle.seek(frame_number);
+    }
 
     Ok(())
 }
@@ -2539,11 +2557,29 @@ async fn is_camera_window_open(app: AppHandle) -> bool {
 #[specta::specta]
 #[instrument(skip(editor_instance))]
 async fn seek_to(editor_instance: WindowEditorInstance, frame_number: u32) -> Result<(), String> {
-    editor_instance
-        .modify_and_emit_state(|state| {
-            state.playhead_position = frame_number;
-        })
-        .await;
+    let state_changed = {
+        let state = editor_instance.state.lock().await;
+        state.playhead_position != frame_number
+    };
+
+    if state_changed {
+        editor_instance
+            .modify_and_emit_state(|state| {
+                state.playhead_position = frame_number;
+            })
+            .await;
+    }
+
+    let playback_handle = if state_changed {
+        let state = editor_instance.state.lock().await;
+        state.playback_task.clone()
+    } else {
+        None
+    };
+
+    if let Some(handle) = playback_handle {
+        handle.seek(frame_number);
+    }
 
     Ok(())
 }
diff --git a/apps/desktop/src/routes/editor/Timeline/index.tsx b/apps/desktop/src/routes/editor/Timeline/index.tsx
index 8aacea955f..a1e32c6c04 100644
--- a/apps/desktop/src/routes/editor/Timeline/index.tsx
+++ b/apps/desktop/src/routes/editor/Timeline/index.tsx
@@ -7,6 +7,7 @@ import {
 	createSignal,
 	Index,
 	type JSX,
+	onCleanup,
 	onMount,
 	Show,
 } from "solid-js";
@@ -89,7 +90,6 @@ export function Timeline() {
 		editorState,
 		projectActions,
 		meta,
-		previewResolutionBase,
 	} = useEditorContext();
 
 	const duration = () => editorInstance.recordingDuration;
@@ -221,6 +221,17 @@ export function Timeline() {
 
 	let pendingScrollDelta = 0;
 	let scrollRafId: number | null = null;
+	let pendingSeekFrame: number | null = null;
+	let seekRafId: number | null = null;
+	let seekInFlight = false;
+	let inFlightSeekFrame: number | null = null;
+	let lastCompletedSeekFrame: number | null = null;
+
+	onCleanup(() => {
+		if (zoomRafId !== null) cancelAnimationFrame(zoomRafId);
+		if (scrollRafId !== null) cancelAnimationFrame(scrollRafId);
+		if (seekRafId !== null) cancelAnimationFrame(seekRafId);
+	});
 
 	function flushPendingZoom() {
 		if (pendingZoomDelta === 0 || pendingZoomOrigin === null) {
@@ -266,7 +277,50 @@ export function Timeline() {
 		}
 	}
 
-	async function handleUpdatePlayhead(e: MouseEvent) {
+	function scheduleSeek(frameNumber: number) {
+		if (
+			frameNumber === pendingSeekFrame ||
+			frameNumber === inFlightSeekFrame ||
+			frameNumber === lastCompletedSeekFrame
+		) {
+			return;
+		}
+		pendingSeekFrame = frameNumber;
+		if (seekRafId === null) {
+			seekRafId = requestAnimationFrame(flushPendingSeek);
+		}
+	}
+
+	async function flushPendingSeek() {
+		seekRafId = null;
+
+		if (seekInFlight || pendingSeekFrame === null) {
+			if (pendingSeekFrame !== null && seekRafId === null) {
+				seekRafId = requestAnimationFrame(flushPendingSeek);
+			}
+			return;
+		}
+
+		const frameNumber = pendingSeekFrame;
+		pendingSeekFrame = null;
+		seekInFlight = true;
+		inFlightSeekFrame = frameNumber;
+
+		try {
+			await commands.seekTo(frameNumber);
+			lastCompletedSeekFrame = frameNumber;
+		} catch (err) {
+			console.error("Failed to seek timeline playhead:", err);
+		} finally {
+			seekInFlight = false;
+			inFlightSeekFrame = null;
+			if (pendingSeekFrame !== null && seekRafId === null) {
+				seekRafId = requestAnimationFrame(flushPendingSeek);
+			}
+		}
+	}
+
+	function handleUpdatePlayhead(e: MouseEvent) {
 		const { left } = timelineBounds;
 		if (
 			zoomSegmentDragState.type !== "moving" &&
@@ -274,33 +328,14 @@ export function Timeline() {
 			maskSegmentDragState.type !== "moving" &&
 			textSegmentDragState.type !== "moving"
 		) {
-			// Guard against missing bounds and clamp computed time to [0, totalDuration()]
 			if (left == null) return;
 			const rawTime =
 				secsPerPixel() * (e.clientX - left) + transform().position;
 			const newTime = Math.min(Math.max(0, rawTime), totalDuration());
-
-			// If playing, some backends require restart to seek reliably
-			if (editorState.playing) {
-				try {
-					await commands.stopPlayback();
-
-					// Round to nearest frame to prevent off-by-one drift
-					const targetFrame = Math.round(newTime * FPS);
-					await commands.seekTo(targetFrame);
-
-					// If the user paused during these async ops, bail out without restarting
-					if (!editorState.playing) {
-						setEditorState("playbackTime", newTime);
-						return;
-					}
-
-					await commands.startPlayback(FPS, previewResolutionBase());
-					setEditorState("playing", true);
-				} catch (err) {
-					console.error("Failed to seek during playback:", err);
-				}
-			}
+			const total = totalDuration();
+			const maxFrame = Math.max(0, Math.ceil(total * FPS) - 1);
+			const targetFrame = Math.min(Math.round(newTime * FPS), maxFrame);
+			scheduleSeek(targetFrame);
 
 			setEditorState("playbackTime", newTime);
 		}
diff --git a/crates/editor/Cargo.toml b/crates/editor/Cargo.toml
index c612d1e33f..0cc16b4111 100644
--- a/crates/editor/Cargo.toml
+++ b/crates/editor/Cargo.toml
@@ -23,6 +23,7 @@ axum = { version = "0.7.5", features = ["ws"] }
 ffmpeg.workspace = true
 specta.workspace = true
 serde = { workspace = true }
+serde_json = "1"
 sentry.workspace = true
 futures = { workspace = true }
 tracing.workspace = true
diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md
index a2253d580f..c06f822c5d 100644
--- a/crates/editor/PLAYBACK-BENCHMARKS.md
+++ b/crates/editor/PLAYBACK-BENCHMARKS.md
@@ -10,8 +10,10 @@ This document tracks performance benchmarks for Cap's playback and decoding syst
 |--------|--------|-----------|
 | Decoder Init | <200ms | - |
 | Decode Latency (p95) | <50ms | - |
+| Startup to First Frame | <250ms | configurable |
 | Effective FPS | ≥30 fps | ±2 fps |
 | Decode Jitter | <10ms | - |
+| Scrub Seek Latency (p95) | <40ms | - |
 | A/V Sync (mic↔video) | <100ms | - |
 | A/V Sync (system↔video) | <100ms | - |
 | Camera-Display Drift | <100ms | - |
@@ -20,6 +22,7 @@ This document tracks performance benchmarks for Cap's playback and decoding syst
 
 - **Decoder Tests**: Init time, hardware acceleration detection, fallback handling
 - **Playback Tests**: Sequential decode, frame retrieval, latency percentiles
+- **Scrub Tests**: Random access seek decode latency and seek failure rate
 - **Audio Sync Tests**: Mic-video sync, system audio-video sync
 - **Camera Sync Tests**: Camera-display drift, frame count alignment
 - **Decode Benchmark**: Creation, sequential, seek, and random access performance
@@ -40,10 +43,12 @@ This document tracks performance benchmarks for Cap's playback and decoding syst
 ```bash
 # Run full playback validation on recordings from real-device-test-runner
 cargo run -p cap-recording --example playback-test-runner -- full
+cargo run -p cap-recording --example playback-test-runner -- full --startup-threshold-ms 250
 
 # Run specific test categories
 cargo run -p cap-recording --example playback-test-runner -- decoder
 cargo run -p cap-recording --example playback-test-runner -- playback
+cargo run -p cap-recording --example playback-test-runner -- scrub
 cargo run -p cap-recording --example playback-test-runner -- audio-sync
 cargo run -p cap-recording --example playback-test-runner -- camera-sync
 
@@ -52,6 +57,69 @@ cargo run -p cap-recording --example playback-test-runner -- --recording-path /p
 
 # List available recordings
 cargo run -p cap-recording --example playback-test-runner -- list
+
+# Emit machine-readable JSON report
+cargo run -p cap-recording --example playback-test-runner -- full --json-output /tmp/playback-benchmark.json
+```
+
+Aggregate JSON outputs from multiple machines:
+
+```bash
+node scripts/aggregate-playback-benchmarks.js --input /path/to/json-results --output /tmp/playback-benchmark-aggregate.md
+node scripts/build-playback-matrix-report.js --input /path/to/json-results --output /tmp/playback-matrix-status.md
+```
+
+Validate matrix coverage and required formats:
+
+```bash
+node scripts/validate-playback-matrix.js --input /path/to/json-results --require-formats mp4,fragmented
+node scripts/validate-playback-matrix.js --input /path/to/json-results --require-formats mp4,fragmented --output-json /tmp/playback-matrix-validation.json
+
+# Finalize aggregate + status + validation artifacts
+node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented
+node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250
+node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5
+node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-missing-candidate
+node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-candidate-only
+node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --min-samples-per-row 3
+node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-parse-errors
+node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-zero-compared
+node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-skipped-files
+node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --output-json /tmp/playback-matrix-final/playback-finalize-summary.json
+node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 --publish-target /workspace/crates/editor/PLAYBACK-BENCHMARKS.md
+
+Finalize summary JSON includes generated timestamp, artifact paths, settings, pass/fail flags, and git branch/commit metadata when available.
+Finalize summary JSON also carries comparison failure reasons and gate outcomes when comparison is enabled.
+Finalize summary JSON includes comparison file stats (including parse error counts) when comparison is enabled.
+Finalize summary JSON includes comparison summary counts (compared rows, regressions, missing/candidate-only/insufficient-sample counts) when comparison is enabled.
+
+# Publish matrix artifacts into this benchmark history
+node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --bottlenecks-md /tmp/playback-matrix-final/playback-bottlenecks.md
+node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --comparison-md /tmp/playback-matrix-final/playback-comparison.md --comparison-json /tmp/playback-matrix-final/playback-comparison.json
+node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --finalize-summary-json /tmp/playback-matrix-final/playback-finalize-summary.json
+
+# Analyze bottlenecks from matrix results
+node scripts/analyze-playback-matrix-bottlenecks.js --input /path/to/json-results --output /tmp/playback-bottlenecks.md --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250
+node scripts/analyze-playback-matrix-bottlenecks.js --input /path/to/json-results --output /tmp/playback-bottlenecks.md --output-json /tmp/playback-bottlenecks.json --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250
+
+# Compare candidate against baseline and flag regressions
+node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-comparison.md --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5
+node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results-a --baseline /path/to/baseline-results-b --candidate /path/to/candidate-results-a --candidate /path/to/candidate-results-b --output /tmp/playback-comparison.md
+node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --allow-missing-candidate
+node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-candidate-only
+node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --min-samples-per-row 3
+node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-parse-errors
+node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-zero-compared
+node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-skipped-files
+node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-comparison.md --output-json /tmp/playback-comparison.json
+
+Comparison output reports both baseline rows missing in candidate and candidate-only rows not present in baseline.
+Comparison table also reports baseline/candidate run counts per row when multiple JSON inputs contribute to the same key.
+Comparison JSON summary includes `failureReasons` and `gateOutcomes` fields for automation-friendly gate diagnostics.
+Minimum sample gating is applied against metrics that are actually comparable for the row (for example, scrub samples are not required for rows with no scrub metric comparison).
+Comparison JSON also includes baseline/candidate file parsing stats and optional parse-error gating support.
+Comparison can optionally gate on skipped input files via `--fail-on-skipped-files`.
+Comparison file stats now include skipped-file breakdown for `no_reports` and `no_usable_metrics`.
 ```
 
 #### Decode Performance Benchmark
@@ -62,6 +130,12 @@ cargo run -p cap-editor --example decode-benchmark -- --video /path/to/video.mp4
 
 # With custom FPS and iterations
 cargo run -p cap-editor --example decode-benchmark -- --video /path/to/video.mp4 --fps 60 --iterations 50
+
+# Emit machine-readable JSON with startup/scrub metrics
+cargo run -p cap-editor --example decode-benchmark -- --video /path/to/video.mp4 --fps 60 --sequential-frames 180 --random-samples 120 --output-json /tmp/decode-benchmark.json
+
+# Fragmented segment input is supported by passing the display directory
+cargo run -p cap-editor --example decode-benchmark -- --video /path/to/segment/display --fps 60 --output-json /tmp/decode-benchmark-fragmented.json
 ```
 
 #### Combined Workflow (Recording → Playback)
@@ -74,6 +148,30 @@ cargo run -p cap-recording --example real-device-test-runner -- full --keep-outp
 cargo run -p cap-recording --example playback-test-runner -- full
 ```
 
+### Cross-Platform Validation Matrix
+
+Run these scenarios on each required hardware class and append outputs via `--benchmark-output`.
+
+```bash
+cargo run -p cap-recording --example playback-test-runner -- full --fps 60 --benchmark-output --notes "platform=<platform> gpu=<gpu> scenario=full"
+cargo run -p cap-recording --example playback-test-runner -- scrub --fps 60 --benchmark-output --notes "platform=<platform> gpu=<gpu> scenario=scrub"
+```
+
+Automated helper for machine runs:
+
+```bash
+node scripts/run-playback-benchmark-matrix.js --platform macos-13 --gpu apple-silicon --output-dir /tmp/playback-matrix --fps 60 --startup-threshold-ms 250 --input-dir /tmp/cap-real-device-tests
+node scripts/run-playback-benchmark-matrix.js --platform windows-11 --gpu nvidia-discrete --output-dir /tmp/playback-matrix-windows-nvidia --fps 60 --startup-threshold-ms 250 --require-formats mp4,fragmented --input-dir /tmp/cap-real-device-tests
+node scripts/run-playback-benchmark-matrix.js --platform windows-11 --gpu integrated --output-dir /tmp/playback-matrix-windows-integrated --fps 60 --startup-threshold-ms 250 --scenarios scrub --input-dir /tmp/cap-real-device-tests
+```
+
+| Platform | GPU Class | MP4 Full | Fragmented Full | MP4 Scrub | Fragmented Scrub | Notes |
+|----------|-----------|----------|-----------------|-----------|------------------|-------|
+| macOS 13+ | Apple Silicon | ☐ | ☐ | ☐ | ☐ | |
+| Windows 11 | NVIDIA discrete | ☐ | ☐ | ☐ | ☐ | |
+| Windows 11 | AMD discrete | ☐ | ☐ | ☐ | ☐ | |
+| Windows 11 | Integrated baseline | ☐ | ☐ | ☐ | ☐ | |
+
 ---
 
 ## Benchmark History
@@ -106,6 +204,19 @@ cargo run -p cap-recording --example playback-test-runner -- full
 | **P50/P95/P99** | Latency percentiles | Sorted distribution |
 | **Effective FPS** | Actual decode throughput | frames / elapsed_time |
 | **Jitter** | Decode time variance (std dev) | sqrt(variance) |
+| **First Decode** | Decode latency for first successful frame | elapsed from first frame request |
+| **Startup to First** | Time from playback test start to first decoded frame | elapsed since playback test start |
+
+### Scrub Metrics
+
+| Metric | Description | How Measured |
+|--------|-------------|--------------|
+| **Seek Operations** | Total random seek attempts | Fixed operation count per segment |
+| **Successful Seeks** | Seeks returning a decoded frame | Count of non-None seek decodes |
+| **Failed Seeks** | Seeks returning no frame | Count of None seek decodes |
+| **Avg Seek Time** | Mean random seek decode latency | Avg of seek decode times |
+| **P50/P95/P99 Seek** | Seek latency percentiles | Sorted seek time distribution |
+| **Max Seek Time** | Worst seek decode latency | Max of seek decode times |
 
 ### Audio Sync Metrics
 
@@ -188,5 +299,6 @@ When analyzing benchmark results, focus on:
 ## Related Documentation
 
 - [Recording Benchmarks](../recording/BENCHMARKS.md) - Recording performance tracking
+- [Playback Matrix Runbook](./PLAYBACK-MATRIX-RUNBOOK.md) - Cross-platform evidence collection workflow
 - [cap-rendering/decoder](../rendering/src/decoder.rs) - Decoder implementation
 - [cap-video-decode](../video-decode/) - Platform-specific decoders
diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md
index d30b940f21..62ccbb4f18 100644
--- a/crates/editor/PLAYBACK-FINDINGS.md
+++ b/crates/editor/PLAYBACK-FINDINGS.md
@@ -35,7 +35,7 @@
 
 ## Current Status
 
-**Last Updated**: 2026-01-30
+**Last Updated**: 2026-02-13
 
 ### Performance Summary
 
@@ -60,10 +60,12 @@
 - ✅ Multi-position decoder pool for smooth scrubbing
 - ✅ Mic audio sync within tolerance
 - ✅ Camera-display sync perfect (0ms drift)
+- ✅ Editor playback now keeps a live seek channel during playback instead of stop/start restart loops
+- ✅ Audio playback defaults to low-latency streaming buffer path with bounded prefill
 
 ### Known Issues (Lower Priority)
 1. **System audio timing**: ~162ms difference inherited from recording-side timing issue
-2. **Display decoder init time**: 337ms due to multi-position pool (creates 3 decoders)
+2. **Display decoder init time**: baseline was 337ms from eager multi-decoder setup; now reduced by lazy decoder warmup but needs benchmark confirmation
 
 ---
 
@@ -73,12 +75,17 @@
 *(Update this section as you work)*
 
 - [ ] **Test fragmented mode** - Run playback tests on fragmented recordings
-- [ ] **Investigate display decoder init time** - 337ms may be optimizable
+- [ ] **Collect cross-platform benchmark evidence** - macOS 13+ and Windows GPU matrix for FPS, scrub settle, audio start latency, and A/V drift
+- [ ] **Validate lazy decoder warmup impact** - measure display decoder init and scrub settle before/after on real recordings
+- [ ] **Validate streaming audio startup/sync** - benchmark low-latency path vs legacy pre-render path across long timelines
 
 ### Completed
 - [x] **Run initial baseline** - Established current playback performance metrics (2026-01-28)
 - [x] **Profile decoder init time** - Hardware acceleration confirmed (AVAssetReader) (2026-01-28)
 - [x] **Identify latency hotspots** - No issues found, p95=3.1ms (2026-01-28)
+- [x] **Remove seek restart churn in timeline path** - in-playback seeks now route through live playback handle (2026-02-13)
+- [x] **Switch default audio mode to low-latency streaming** - full prerender now opt-in by env flag (2026-02-13)
+- [x] **Reduce eager AVAssetReader decoder warmup** - pool now initializes lazily beyond first warm decoders (2026-02-13)
 
 ---
 
@@ -91,6 +98,7 @@ cargo run -p cap-recording --example playback-test-runner -- full
 # Test specific categories
 cargo run -p cap-recording --example playback-test-runner -- decoder
 cargo run -p cap-recording --example playback-test-runner -- playback
+cargo run -p cap-recording --example playback-test-runner -- scrub
 cargo run -p cap-recording --example playback-test-runner -- audio-sync
 cargo run -p cap-recording --example playback-test-runner -- camera-sync
 
@@ -127,13 +135,368 @@ cargo run -p cap-recording --example playback-test-runner -- full
 
 ## Completed Fixes
 
-*(Document fixes here as they are implemented)*
+1. **Low-latency audio startup enabled by default (2026-02-13)**
+   - `AudioPlayback::spawn()` now selects streaming `create_stream()` path by default.
+   - Legacy full-timeline prerender path is still available via `CAP_AUDIO_PRERENDER_PLAYBACK=1`.
+   - `AudioPlaybackBuffer` is available on all platforms so Windows can use streaming sync logic.
+
+2. **In-playback seek path without stop/start (2026-02-13)**
+   - Added seek channel to `PlaybackHandle` and playback loop.
+   - `seek_to` and `set_playhead_position` commands now forward seek requests to active playback.
+   - Timeline seek no longer tears down and recreates playback while playing.
+   - Seek signaling now uses watch semantics so only latest frame target is consumed under heavy scrub load.
+   - Tauri playhead/seek commands now skip no-op same-frame state emission to reduce state/event churn.
+
+3. **Lazy decoder pool warmup on macOS AVAssetReader (2026-02-13)**
+   - Initial warmup now creates only a small subset of decoder instances.
+   - Additional decoder instances are initialized lazily when scrub patterns request them.
+   - Failed lazy init falls back safely to currently available decoders.
+
+4. **Playback benchmark runner now captures scrub and startup metrics (2026-02-13)**
+   - Added `scrub` benchmark mode to `playback-test-runner`.
+   - Playback result now includes first-frame decode and startup-to-first-frame latency.
+   - Scrub result now reports seek p50/p95/p99 and seek failure counts.
+
+5. **Playback runtime emits startup latency signals (2026-02-13)**
+   - Playback loop now logs first rendered frame latency.
+   - Audio stream setup now logs startup preparation time and first callback latency.
+   - Playback loop now logs seek settle latency (`seek_target_frame` to rendered frame).
+
+6. **Decode benchmark now supports machine-readable evidence output (2026-02-13)**
+   - `decode-benchmark` supports `--output-json` for structured metric capture.
+   - Added sequential frame and random sample count controls to scale benchmark depth per hardware class.
+   - Supports fragmented segment directories for duration-aware benchmarking.
+
+7. **Timeline seek dispatch now coalesces during drag (2026-02-13)**
+   - Frontend seek calls are requestAnimationFrame-batched.
+   - Only the latest pending seek frame is sent while an async seek is in-flight.
+   - Duplicate same-frame seeks are dropped in both frontend dispatch and playback seek signaling.
+
+8. **Playback frame wait timeout now scales with target FPS (2026-02-13)**
+   - Replaced fixed 200ms frame fetch waits with FPS-derived bounded timeout.
+   - Reduces long stall windows on 60fps playback and improves real-time catch-up behavior.
+   - In-flight polling interval now scales with frame budget instead of fixed 5ms.
+   - Catch-up skip threshold now adapts with late streak depth and logs skip event telemetry.
+   - Warmup target and warmup timeout now scale with FPS, reducing startup buffering overhead.
+   - Prefetch ahead/behind windows now scale with FPS to reduce unnecessary decode pressure at lower targets.
+   - Prefetch parallelism now scales with FPS target to increase decode throughput under 60fps workloads.
+
+8. **Playback benchmark runner now supports JSON evidence export (2026-02-13)**
+   - `playback-test-runner` supports `--json-output` for structured report emission.
+   - JSON output includes command metadata, system info, summary, and per-recording test detail.
+   - Command metadata now includes input scope and output flags for reproducibility.
+   - Startup-to-first-frame threshold is configurable with `--startup-threshold-ms` and tracked as pass/fail signal.
+
+9. **Added JSON aggregate utility for cross-platform benchmark collation (2026-02-13)**
+   - `scripts/aggregate-playback-benchmarks.js` builds a markdown table from multiple JSON outputs.
+   - Aggregates platform/gpu/scenario-tagged runs for matrix reporting.
+
+10. **Added matrix run helper for platform/GPU benchmark execution (2026-02-13)**
+   - `scripts/run-playback-benchmark-matrix.js` runs `full` and `scrub` scenarios with tagged notes and JSON output.
+   - Automatically generates aggregate markdown for each machine run directory.
+   - Performs per-machine post-run validation for required scenarios and optional format requirements.
+   - Supports scenario subset reruns via `--scenarios` for faster targeted validation.
+   - Supports startup threshold tuning via `--startup-threshold-ms`.
+
+11. **Added matrix completeness validator (2026-02-13)**
+   - `scripts/validate-playback-matrix.js` validates required platform/gpu/scenario cells.
+   - Supports required format checks per cell (mp4 + fragmented).
+   - Root `package.json` now exposes `bench:playback:*` script aliases for matrix, aggregate, and validate flows.
+   - Can emit structured validation JSON for artifact upload and automation.
+
+12. **Added matrix status report generator (2026-02-13)**
+   - `scripts/build-playback-matrix-report.js` generates concise matrix markdown from JSON results.
+   - Highlights missing cells, scenario pass/fail, and format coverage per platform/GPU row.
+
+13. **Added matrix finalization helper (2026-02-13)**
+   - `scripts/finalize-playback-matrix.js` generates aggregate markdown, status markdown, and validation JSON in one command.
+   - Supports optional required format enforcement during finalization.
+   - Also emits bottleneck analysis markdown using configurable FPS/scrub/startup thresholds.
+   - Can optionally publish finalized artifacts directly into benchmark history target.
+
+14. **Added matrix summary publisher (2026-02-13)**
+   - `scripts/publish-playback-matrix-summary.js` injects finalized matrix artifacts into playback benchmark history.
+   - Keeps matrix evidence updates consistent and repeatable.
+   - Supports optional bottleneck analysis attachment in published summary.
+
+15. **Added bottleneck analyzer for continuous FPS optimization (2026-02-13)**
+   - `scripts/analyze-playback-matrix-bottlenecks.js` ranks matrix cells by FPS, startup, and scrub threshold breaches.
+   - Produces prioritized optimization backlog from real matrix evidence.
+   - Supports structured JSON output for automation and regression tracking.
+
+16. **Added baseline-vs-candidate comparator for regression gating (2026-02-13)**
+   - `scripts/compare-playback-benchmark-runs.js` compares candidate matrix outputs against baseline outputs.
+   - Flags regressions when FPS drops or startup/scrub latency increase beyond configured tolerance.
+   - Exits non-zero on regressions so matrix-driven optimization loops can be gated automatically.
+
+17. **Added prefetch generation gating for live seek correctness and latency (2026-02-13)**
+   - Prefetch outputs are tagged with seek-generation IDs and stale generation frames are dropped.
+   - Seek events now advance generation and flush prefetch consumption to prevent old in-flight decode outputs from polluting post-seek playback.
+   - Reduces redundant decode/render work during aggressive scrub and improves settle reliability.
+
+18. **Flushed prefetched-frame buffer on seek generation changes (2026-02-13)**
+   - Live seek handling now clears prefetch buffer immediately on seek events.
+   - Prevents stale buffered frames from prior playback position from being reused after seek jumps.
+   - Reduces unnecessary post-seek frame scans and improves settle determinism.
+
+19. **Tightened in-flight prefetch buffering to current playhead (2026-02-13)**
+   - In-flight wait path now buffers only frames at or ahead of current frame.
+   - Avoids re-queueing older frames from initial start position baseline.
+   - Reduces avoidable prefetch buffer churn during late playback and aggressive seek scenarios.
+
+20. **Expanded comparison gating for multi-run matrix diffs (2026-02-13)**
+   - `scripts/compare-playback-benchmark-runs.js` now supports multiple baseline and candidate inputs.
+   - Enables aggregate regression gating across batched machine runs instead of one directory at a time.
+   - Improves reliability of continuous optimization loops when matrix outputs are split across multiple sources.
+
+21. **Added finalization-integrated regression gate support (2026-02-13)**
+   - `scripts/finalize-playback-matrix.js` now supports `--compare-baseline` and threshold args.
+   - Finalization can now produce aggregate/status/validation/bottleneck artifacts and run baseline-vs-candidate gating in one command.
+   - Keeps optimization loops strict by failing finalize runs when regression tolerances are exceeded.
+
+22. **Made in-flight tracking generation-aware to avoid seek races (2026-02-13)**
+   - Shared in-flight frame tracking now keys entries by `(seek_generation, frame_number)`.
+   - Prevents old-generation decode completions from removing new-generation in-flight markers for the same frame number.
+   - Improves seek correctness under rapid repeated seeks to nearby frame ranges.
+
+23. **Added comparison artifact publishing in finalize workflows (2026-02-13)**
+   - `scripts/publish-playback-matrix-summary.js` now accepts optional `--comparison-md`.
+   - `scripts/finalize-playback-matrix.js` now forwards generated comparison artifact to publishing when both compare and publish options are enabled.
+   - Keeps benchmark history entries self-contained with regression gate evidence.
+
+24. **Separated prefetch/direct decode in-flight tracking (2026-02-13)**
+   - Playback now tracks prefetch in-flight frames and direct decode in-flight frames in separate generation-aware sets.
+   - Prevents prefetch-side clear/reset paths from clearing direct decode in-flight markers.
+   - In-flight wait logic now checks both sets and direct decode outputs are dropped when a pending seek is detected before frame use.
+
+25. **Added comparison coverage gating for missing candidate rows (2026-02-13)**
+   - `scripts/compare-playback-benchmark-runs.js` now reports baseline rows that are missing in candidate runs.
+   - Comparison now fails by default when candidate coverage is missing baseline rows.
+   - Optional `--allow-missing-candidate` flag keeps metric regression checks while allowing partial candidate matrices.
+
+26. **Fixed finalize publish ordering for comparison artifacts (2026-02-13)**
+   - `scripts/finalize-playback-matrix.js` now executes baseline comparison before publish when both options are enabled.
+   - Prevents publish step from referencing missing comparison artifact files.
+   - Added finalize passthrough support for `--allow-missing-candidate`.
+
+27. **Added structured JSON output for comparison gating (2026-02-13)**
+   - `scripts/compare-playback-benchmark-runs.js` now supports `--output-json`.
+   - Emits comparison summary/regression/missing-coverage details for automation.
+   - `scripts/finalize-playback-matrix.js` now writes comparison markdown and JSON artifacts during baseline comparison runs.
+
+28. **Switched playback prefetch buffer to keyed map storage (2026-02-13)**
+   - Playback prefetch buffer now uses `BTreeMap<u32, PrefetchedFrame>` keyed by frame number.
+   - Removes repeated linear scans over deque entries for target frame lookup in hot playback path.
+   - Retains bounded buffer behavior with deterministic far-ahead/oldest eviction.
+
+29. **Added sorted prefetch stale-frame pruning (2026-02-13)**
+   - Playback loop now prunes prefetched frames older than current playhead from the keyed buffer.
+   - Uses ordered map operations to remove outdated frames efficiently.
+   - Reduces stale-buffer buildup during frame skips and sustained catch-up scenarios.
+
+30. **Published comparison gate status in matrix summaries (2026-02-13)**
+   - `scripts/publish-playback-matrix-summary.js` now accepts optional `--comparison-json`.
+   - Published matrix summary now includes comparison gate pass/fail, regression count, and missing-candidate-row count when comparison JSON is provided.
+   - `scripts/finalize-playback-matrix.js` now forwards both comparison markdown and comparison JSON to publish flow.
+
+31. **Tightened prefetch warmup/skip maintenance with keyed buffer helpers (2026-02-13)**
+   - Warmup first-frame timer now starts only after at least one eligible prefetched frame is present in the keyed buffer.
+   - Skip catch-up path now uses ordered stale-frame pruning helper instead of full-map retain filtering.
+   - Reduces avoidable warmup timing noise and stale-buffer maintenance overhead in high-skip playback paths.
+
+32. **Expanded comparison outputs with candidate-only coverage visibility (2026-02-13)**
+   - `scripts/compare-playback-benchmark-runs.js` now reports candidate-only rows that do not exist in baseline.
+   - Markdown and JSON comparison outputs now include both missing-candidate and candidate-only coverage summaries.
+   - Improves matrix diff diagnostics when test coverage differs between baseline and candidate runs.
+
+33. **Extended published comparison summary fields (2026-02-13)**
+   - `scripts/publish-playback-matrix-summary.js` now includes candidate-only row count from comparison JSON in published matrix summary bullets.
+   - Keeps published matrix evidence aligned with expanded comparison coverage diagnostics.
+
+34. **Published comparison policy mode in summary output (2026-02-13)**
+   - Published matrix summary now includes comparison policy modes for missing-candidate and candidate-only coverage handling.
+   - Keeps published evidence explicit about whether coverage gaps were allowed or gated in the comparison run.
+
+35. **Added strict candidate-only gating option for comparison workflows (2026-02-13)**
+   - `scripts/compare-playback-benchmark-runs.js` now supports `--fail-on-candidate-only`.
+   - When enabled, comparison exits non-zero if candidate contains rows not present in baseline.
+   - `scripts/finalize-playback-matrix.js` now forwards the same strict option in integrated compare flows.
+
+36. **Required contiguous prefetched frames for warmup readiness (2026-02-13)**
+   - Playback warmup readiness now checks contiguous prefetched frame coverage from current frame.
+   - Avoids treating sparse/non-contiguous prefetched entries as equivalent to contiguous startup readiness.
+   - Reduces early playback start jitter risk when warmup buffer is fragmented.
+
+37. **Added finalize summary JSON artifact output (2026-02-13)**
+   - `scripts/finalize-playback-matrix.js` now supports optional `--output-json`.
+   - Finalize now emits `playback-finalize-summary.json` by default in output directory.
+   - Summary JSON includes artifact paths, settings, and validation/comparison pass flags for automation.
+
+38. **Optimized contiguous warmup coverage scan on keyed buffer (2026-02-13)**
+   - Contiguous prefetched-frame counting now walks ordered keys via map range iteration.
+   - Reduces repeated keyed lookups during warmup readiness checks.
+   - Preserves contiguous coverage semantics while lowering per-loop lookup overhead.
+
+39. **Added git metadata to finalize summary artifacts (2026-02-13)**
+   - `scripts/finalize-playback-matrix.js` now records git branch and commit SHA in finalize summary JSON output.
+   - Improves traceability of benchmark artifacts to exact source revision.
+
+40. **Wired finalize summary artifact into publish flow (2026-02-13)**
+   - `scripts/finalize-playback-matrix.js` now generates finalize summary JSON before publish step.
+   - Finalize now forwards `--finalize-summary-json` to `publish-playback-matrix-summary.js`.
+   - Published matrix summaries can now include finalize artifact metadata in one-shot finalize runs.
+
+41. **Improved comparison aggregation across multi-input runs (2026-02-13)**
+   - `scripts/compare-playback-benchmark-runs.js` now aggregates metrics per comparison key across all contributing input reports instead of last-write-wins replacement.
+   - Comparison output now includes baseline/candidate run counts per row to surface aggregation depth.
+   - Fixed comparison regression evaluation to use explicit options parameter wiring inside compare function.
+
+42. **Skipped contiguous warmup scans before first eligible frame (2026-02-13)**
+   - Warmup loop now defers contiguous-prefetch counting until first warmup frame arrival is observed.
+   - Reduces avoidable buffer scan work during pre-frame warmup wait.
+
+43. **Added minimum sample-count gating for matrix comparisons (2026-02-13)**
+   - `scripts/compare-playback-benchmark-runs.js` now supports `--min-samples-per-row`.
+   - Comparison now flags rows with insufficient effective sample counts and reports them in markdown/JSON outputs.
+   - `scripts/finalize-playback-matrix.js` forwards minimum sample gating settings into compare stage, and publish summary now surfaces sample gating status fields.
+
+44. **Fixed sample gating semantics for non-comparable metrics (2026-02-13)**
+   - Minimum sample checks now only consider metrics that are actually comparable for the row.
+   - Prevents scrub sample requirements from incorrectly failing non-scrub comparison rows.
+   - Comparison output now includes compared metric count and effective sample count per row.
+
+45. **Extended finalize summary comparison diagnostics (2026-02-13)**
+   - `scripts/finalize-playback-matrix.js` now includes comparison failure reasons and gate outcomes in summary results.
+   - `scripts/publish-playback-matrix-summary.js` now surfaces finalize comparison failure reasons when present.
+
+44. **Cached warmup contiguous coverage counts during warmup (2026-02-13)**
+   - Warmup loop now recomputes contiguous prefetched coverage only when warmup buffer content changes.
+   - Avoids repeated contiguous scans on idle warmup iterations.
+
+45. **Added explicit comparison gate diagnostics in JSON and published summaries (2026-02-13)**
+   - `scripts/compare-playback-benchmark-runs.js` now emits `failureReasons` and `gateOutcomes` in summary JSON.
+   - `scripts/publish-playback-matrix-summary.js` now surfaces comparison failure reasons when present.
+
+46. **Added parse-error gating and parse stats to comparison flows (2026-02-13)**
+   - `scripts/compare-playback-benchmark-runs.js` now supports `--fail-on-parse-errors`.
+   - Comparison JSON now includes baseline/candidate file parsing stats and parse error entries.
+   - `scripts/finalize-playback-matrix.js` now forwards parse-error gating option to compare stage; published summary surfaces parse policy and parse error counts.
+
+47. **Made keyed prefetch insert helper report structural changes (2026-02-13)**
+   - `insert_prefetched_frame` now returns whether keyed prefetch buffer changed (insert and/or trim).
+   - Warmup loop now uses this direct signal instead of length-only delta checks for contiguous coverage cache invalidation.
+   - Improves warmup cache correctness when inserts and trims occur with stable overall buffer length.
+
+48. **Extended finalize summary with comparison file stats (2026-02-13)**
+   - Finalize summary JSON now includes comparison file stats payload when comparison is enabled.
+   - Publish summary now surfaces finalize baseline/candidate parse error counts from finalize summary metadata.
+
+49. **Stabilized comparison report ordering for reproducibility (2026-02-13)**
+   - `scripts/compare-playback-benchmark-runs.js` now deterministically sorts comparison, missing, candidate-only, and insufficient-sample rows.
+   - Keeps markdown/JSON outputs stable across repeated runs with identical inputs.
+
+50. **Extended finalize summary with comparison count rollups (2026-02-13)**
+   - `scripts/finalize-playback-matrix.js` now captures comparison count rollups in summary results (compared rows, regressions, missing/candidate-only/insufficient-sample counts).
+   - `scripts/publish-playback-matrix-summary.js` now surfaces these finalize comparison counts in published summaries.
+
+51. **Added optional zero-comparison gating (2026-02-13)**
+   - `scripts/compare-playback-benchmark-runs.js` now supports `--fail-on-zero-compared`.
+   - Enables strict failure when comparison processing yields zero comparable rows.
+   - `scripts/finalize-playback-matrix.js` forwards zero-comparison gating option in integrated compare flows.
+
+52. **Added warmup-stage seek handling before playback loop entry (2026-02-13)**
+   - Warmup loop now consumes seek updates immediately instead of waiting for playback loop start.
+   - Seek during warmup now resets warmup timers/buffer state and updates frame/audio playhead targets immediately.
+   - Improves responsiveness when users seek while playback is still warming up.
+
+53. **Added optional skipped-file gating for comparison workflows (2026-02-13)**
+   - `scripts/compare-playback-benchmark-runs.js` now supports `--fail-on-skipped-files`.
+   - Enables strict failure when baseline/candidate inputs include skipped JSON files without usable benchmark payloads.
+   - `scripts/finalize-playback-matrix.js` forwards skipped-file gating option in integrated compare flows.
+
+54. **Added skipped-file reason breakdown in comparison file stats (2026-02-13)**
+   - Comparison file stats now report skipped-file reasons as `skippedNoReports` and `skippedNoUsableMetrics`.
+   - Published summary now surfaces skipped-file breakdown for baseline and candidate inputs.
+
+55. **Scaled warmup idle poll interval by frame budget (2026-02-13)**
+   - Warmup loop fallback poll now scales with frame duration and stays in bounded low-latency range.
+   - Reduces fixed 100ms idle poll delay during warmup while avoiding high-frequency busy polling.
+
+56. **Retained in-flight prefetch markers for small frame-request shifts (2026-02-13)**
+   - Frame-request rebases now clear in-flight marker sets only for backward or large-distance seeks that also reset in-flight futures.
+   - Prevents duplicate decode scheduling caused by clearing marker sets while earlier in-flight futures are still active.
+
+57. **Added local in-flight frame tracking inside prefetch scheduler (2026-02-13)**
+   - Prefetch scheduler now tracks active frame numbers locally and uses this set for duplicate scheduling checks.
+   - Reduces repeated shared lock reads in prefetch hot-path while preserving cross-thread in-flight visibility.
+
+58. **Batched warmup prefetch queue consumption (2026-02-13)**
+   - Warmup stage now drains all immediately available prefetched frames on each receive wake-up.
+   - Improves warmup readiness convergence by reducing one-frame-per-iteration queue handling overhead.
+
+59. **Scaled prefetch idle polling by frame budget (2026-02-13)**
+   - Prefetch scheduler idle-yield interval now scales with target frame duration in a bounded low-latency range.
+   - Reduces fixed 1ms wakeup churn in empty in-flight periods while keeping prefetch request responsiveness high.
+
+60. **Bounded behind-prefetch dedupe memory growth (2026-02-13)**
+   - Behind-prefetch dedupe tracking now keeps a bounded eviction-ordered window instead of unbounded growth over long playback sessions.
+   - Prevents long-session hash-set expansion from degrading behind-prefetch lookup efficiency.
+
+61. **Cached clip-offset lookups for decode scheduling paths (2026-02-13)**
+   - Prefetch and direct-decode paths now use cached clip-index-to-offset maps instead of repeated linear clip scans.
+   - Reduces per-frame scheduling overhead in playback and prefetch loops when projects contain many clips.
+
+62. **Deduplicated frame-request watch updates (2026-02-13)**
+   - Playback loop now uses change-aware frame-request signaling instead of unconditional watch broadcasts.
+   - Reduces redundant prefetch wakeups and channel churn when requested frame value does not change.
+
+63. **Removed duplicate keyed-buffer lookups during prefetch insert (2026-02-13)**
+   - Prefetch buffer insertion now uses a single `BTreeMap::entry` match to detect insertion and store new frames.
+   - Eliminates the prior contains-check plus entry-insert double lookup in prefetch hot path.
+
+64. **Centralized change-aware frame request signaling (2026-02-13)**
+   - Playback now uses a shared helper for change-aware frame-request watch updates across warmup/seek/skip paths.
+   - Reduces duplicated watch-update closure logic and keeps no-op request dedupe behavior consistent in all frame-request call sites.
+
+65. **Short-circuited frame waits when seek updates are pending (2026-02-13)**
+   - In-flight frame wait polling now exits early when a seek change is observed, instead of waiting through full wait budgets.
+   - Startup prefetch wait path now also bails out immediately when seek state changes during wait.
+
+66. **Added pre-wait seek guards before startup and direct decode waits (2026-02-13)**
+   - Startup prefetch wait path now checks for pending seeks before entering timeout waits and before skip fallback on timeout.
+   - Direct decode fallback path now checks for pending seek updates before scheduling synchronous decode work.
+
+67. **Batched keyed-buffer trims during queue-drain insertion (2026-02-13)**
+   - Warmup and playback queue-drain paths now insert prefetched frames without per-item trim checks and apply one bounded trim pass after the batch.
+   - Reduces repeated trim work when multiple prefetched frames are drained in the same loop iteration.
+
+68. **Limited prefetch state resets to major/backward rebases (2026-02-13)**
+   - Frame-request rebases now only reset decoded-ramp and behind-prefetch tracking on backward seeks or large seek-distance jumps.
+   - Preserves prefetch ramp state on small forward rebases to reduce unnecessary throughput drops.
+
+69. **Gated behind-prefetch scans to one pass per playback frame (2026-02-13)**
+   - Behind-prefetch scheduling now scans at most once for each observed playback frame value.
+   - Avoids repeated behind-window scan work in tight scheduler loops when playback position has not advanced.
+
+70. **Batched in-flight wait-path prefetch trims (2026-02-13)**
+   - In-flight wait buffering now inserts prefetched frames without per-frame trim checks while waiting.
+   - Applies one bounded trim pass after wait-loop buffering to reduce repeated trim overhead under burst receive windows.
 
 ---
 
 ## Root Cause Analysis Archive
 
-*(Document investigated issues here)*
+1. **Audio start delay from full-track prerender**
+   - Root cause: playback startup used `create_stream_prerendered()` for all sample formats, forcing full timeline audio render before output stream started.
+   - Fix direction: switch default to incremental `AudioPlaybackBuffer` path with bounded prefill and live playhead correction.
+
+2. **Scrub lag from playback restart loop**
+   - Root cause: timeline seek while playing called stop → seek → start, rebuilding playback/audio state on every interactive seek.
+   - Fix direction: add live seek channel into running playback loop and route frontend seeks to it.
+
+3. **Display decoder init inflation on macOS**
+   - Root cause: AVAssetReader decoder pool eagerly initialized multiple decoders during startup.
+   - Fix direction: reduce eager warmup and lazily instantiate additional pool decoders when scrub behavior actually needs them.
 
 ---
 
@@ -199,6 +562,203 @@ Decoder Pipeline:
 
 ---
 
+### Session 2026-02-13 (Audio Startup + Live Seek + Lazy Decoder Warmup)
+
+**Goal**: Remove major editor playback bottlenecks affecting startup latency, scrub responsiveness, and decoder init overhead.
+
+**What was done**:
+1. Switched playback audio startup default to streaming buffer path.
+2. Kept prerender audio path behind `CAP_AUDIO_PRERENDER_PLAYBACK` as explicit fallback.
+3. Enabled `AudioPlaybackBuffer` for all platforms so Windows uses live buffering/sync path.
+4. Added a seek channel to `PlaybackHandle` and integrated seek handling into the main playback loop.
+5. Updated Tauri seek/playhead commands to forward seeks into active playback handle.
+6. Removed frontend timeline stop/start cycle when seeking while playing.
+7. Reduced AVAssetReader eager pool warmup and added lazy decoder instantiation for additional pool slots.
+8. Extended playback benchmark tooling with scrub mode and startup latency metrics.
+9. Added playback runtime startup telemetry logs for first frame and audio callback bring-up.
+10. Enhanced decode benchmark example with structured JSON output and configurable sample depth.
+11. Added timeline seek dispatch coalescing to reduce seek command storms during drag.
+12. Added JSON report output support to playback-test-runner for benchmark evidence collection.
+13. Added cross-platform benchmark JSON aggregation utility script.
+14. Added matrix execution helper script for full + scrub benchmark runs per platform/GPU.
+15. Added matrix validation script for required cell and format coverage checks.
+16. Added matrix status report generator for concise artifact summaries.
+17. Added one-shot finalization script for aggregate + status + validation outputs.
+18. Added benchmark history publisher script for finalized matrix artifacts.
+19. Added matrix bottleneck analysis script for prioritized FPS optimization follow-up.
+20. Added baseline-vs-candidate comparison script to gate regressions in optimization loops.
+21. Added seek-generation prefetch gating to drop stale decode outputs after live seek updates.
+22. Cleared prefetched-frame buffer on live seek handling to avoid stale buffered frame reuse.
+23. Restricted in-flight prefetch buffering to current frame or newer frames during frame wait path.
+24. Expanded benchmark comparison gating to support multi-input baseline/candidate matrix sets.
+25. Added optional baseline comparison gating inside matrix finalization workflow.
+26. Made in-flight frame tracking generation-aware to prevent cross-seek marker collisions.
+27. Split prefetch and direct decode in-flight tracking and guarded direct decode frame usage when seek updates are pending.
+28. Added missing-candidate-row coverage gating in baseline-vs-candidate comparison script with optional override flag.
+29. Fixed finalize compare/publish ordering so comparison artifacts exist before publish attachment and added finalize support for missing-candidate override.
+30. Added structured JSON output for baseline-vs-candidate comparison script and wired finalize comparison runs to emit comparison JSON artifacts.
+26. Made shared in-flight frame tracking generation-aware to prevent cross-seek marker collisions.
+27. Added comparison artifact attachment support in publish/finalize matrix summary workflows.
+28. Split prefetch and direct decode in-flight tracking to avoid cross-path marker interference.
+29. Added missing-candidate-row gating to baseline-vs-candidate comparison workflow.
+30. Fixed finalize compare/publish ordering and propagated missing-candidate override into finalize compare flow.
+31. Added structured JSON artifact emission for baseline-vs-candidate comparison workflows.
+32. Replaced playback prefetch deque scans with keyed `BTreeMap` buffering for lower lookup overhead in frame acquisition path.
+33. Added ordered stale-frame pruning in keyed prefetch buffer to keep playback buffer aligned with current playhead.
+34. Added comparison gate status fields to published matrix summary entries via comparison JSON attachment.
+35. Tightened keyed prefetch buffer warmup timing and skip-path pruning behavior using map-aware helper usage in playback loop.
+36. Expanded baseline-vs-candidate comparison outputs with candidate-only row reporting.
+37. Added strict `fail-on-candidate-only` gating option for compare/finalize matrix comparison workflows.
+38. Added candidate-only row count reporting in published matrix summary comparison status bullets.
+39. Updated playback warmup start condition to require contiguous prefetched frame coverage from current frame.
+40. Added comparison policy mode reporting (allow/fail) for missing-candidate and candidate-only coverage in published matrix summaries.
+41. Added finalize summary JSON artifact output with artifact/settings/result metadata for automation workflows.
+42. Optimized contiguous prefetched-frame warmup scan using ordered map range iteration instead of repeated key lookups.
+43. Added git branch/commit metadata into finalize summary JSON artifacts for source traceability.
+44. Wired finalize summary JSON into publish flow so one-shot finalize runs can publish summary metadata alongside matrix artifacts.
+45. Improved multi-input comparison aggregation by merging per-key metrics across runs and surfacing baseline/candidate run counts per comparison row.
+46. Skipped contiguous warmup coverage scans until first warmup frame observation to reduce pre-frame warmup loop scan overhead.
+47. Added minimum-sample comparison gating with `--min-samples-per-row`, insufficient-sample reporting, and finalize passthrough support.
+48. Cached warmup contiguous coverage values and only recomputed contiguous scan when warmup buffer changed.
+49. Added comparison JSON gate diagnostics (`failureReasons`, `gateOutcomes`) and surfaced failure reasons in published summary output.
+50. Corrected minimum sample gating semantics to only count comparable metrics and added compared-metric/effective-sample columns in comparison output.
+51. Extended finalize summary and publish output with comparison failure reasons and gate outcome metadata.
+52. Added comparison parse-error gating (`--fail-on-parse-errors`) with parse stats surfaced in comparison JSON, finalize settings, and published summaries.
+53. Updated keyed prefetch insert helper to emit structural-change signals for warmup contiguous coverage cache invalidation.
+54. Extended finalize summary and publish output with comparison file stats (including parse error counts).
+55. Stabilized comparison output ordering with deterministic sorting for comparison rows and coverage-delta sections.
+56. Extended finalize and publish summaries with comparison count rollups (compared rows, regressions, missing/candidate-only/insufficient-sample counts).
+57. Added optional zero-comparison gating (`--fail-on-zero-compared`) for compare/finalize flows and surfaced zero-compare policy in comparison/published summaries.
+58. Added warmup-stage seek handling to apply seeks immediately while playback warmup is in progress.
+59. Added optional skipped-file gating (`--fail-on-skipped-files`) for compare/finalize flows and surfaced skipped-file policy in comparison/published summaries.
+60. Added skipped-file reason breakdown (`skippedNoReports`, `skippedNoUsableMetrics`) into comparison file stats and published summaries.
+61. Scaled warmup idle poll interval with frame budget to reduce warmup fallback latency under sparse frame arrival.
+62. Retained in-flight prefetch markers for small frame-request shifts to avoid duplicate decode scheduling during active prefetch execution.
+63. Added local in-flight frame tracking in prefetch scheduler to reduce lock-heavy duplicate-check lookups on scheduling hot-path.
+64. Batched warmup prefetch queue consumption to reduce warmup staging overhead and improve contiguous warmup-fill responsiveness.
+65. Scaled prefetch idle polling with frame budget to reduce scheduler wakeup churn during empty in-flight periods.
+66. Bounded behind-prefetch dedupe tracking window to avoid unbounded growth and preserve lookup efficiency over long sessions.
+67. Cached clip-offset lookups for prefetch and direct-decode scheduling to replace repeated linear clip scans on decode hot paths.
+68. Deduplicated frame-request watch updates so unchanged frame requests no longer trigger redundant watch notifications.
+69. Replaced contains+entry prefetch insertion with single-entry map insertion to remove duplicate keyed-buffer lookups in frame-insert hot path.
+70. Centralized change-aware frame-request watch updates via shared helper for consistent no-op dedupe behavior across warmup/seek/skip paths.
+71. Added seek-aware short-circuiting in in-flight/startup frame wait paths to avoid waiting full frame-fetch budgets when seek updates arrive.
+72. Added pre-wait seek guards in startup/direct-decode fallback paths so pending seeks skip timeout waits and avoid stale synchronous decode work.
+73. Batched keyed-buffer trimming during warmup/playback prefetch queue drains to avoid per-insert trim checks under burst frame arrivals.
+74. Limited prefetch state resets to backward/major frame-request rebases so small forward rebases preserve decode-ramp and behind-prefetch tracking state.
+75. Gated behind-prefetch scheduling scans to one pass per playback frame to avoid repeated behind-window scan churn while playback position is unchanged.
+76. Batched in-flight wait-path prefetch trimming so buffered wait inserts trim once per wait pass instead of per buffered frame.
+
+**Changes Made**:
+- `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling.
+- `crates/editor/src/audio.rs`: cross-platform `AudioPlaybackBuffer`, windows-only smooth seek helper.
+- `apps/desktop/src-tauri/src/lib.rs`: forward `seek_to` and `set_playhead_position` into active playback handle.
+- `apps/desktop/src/routes/editor/Timeline/index.tsx`: seek while playing now sends direct `seekTo` without playback restart.
+- `crates/rendering/src/decoder/avassetreader.rs`: lower eager decoder warmup and lazy pool growth.
+- `crates/recording/examples/playback-test-runner.rs`: added scrub command and startup/scrub latency metrics.
+- `crates/editor/PLAYBACK-BENCHMARKS.md`: updated benchmark reference and metric definitions.
+- `crates/editor/src/playback.rs`: added first-render and audio-callback startup latency logging.
+- `crates/editor/examples/decode-benchmark.rs`: added `--output-json`, startup metrics, and configurable sequential/random sampling.
+- `apps/desktop/src/routes/editor/Timeline/index.tsx`: added requestAnimationFrame-based seek coalescing with in-flight protection.
+- `crates/recording/examples/playback-test-runner.rs`: added `--json-output` to emit structured benchmark reports.
+- `scripts/aggregate-playback-benchmarks.js`: added markdown aggregation for multiple playback benchmark JSON artifacts.
+- `scripts/run-playback-benchmark-matrix.js`: added orchestrated full/scrub benchmark runner with per-machine aggregate generation.
+- `scripts/validate-playback-matrix.js`: added required matrix cell/format validation for aggregated evidence.
+- `scripts/build-playback-matrix-report.js`: added concise matrix status report generation from JSON benchmark outputs.
+- `scripts/finalize-playback-matrix.js`: added one-shot matrix artifact finalization workflow.
+- `scripts/publish-playback-matrix-summary.js`: added matrix artifact publisher into PLAYBACK-BENCHMARKS history region.
+- `scripts/analyze-playback-matrix-bottlenecks.js`: added prioritized bottleneck analysis output from matrix JSON evidence.
+- `scripts/compare-playback-benchmark-runs.js`: added regression-aware baseline/candidate comparison with configurable FPS/startup/scrub tolerances.
+- `scripts/compare-playback-benchmark-runs.js`: fixed options wiring inside comparison regression checks and now aggregates per-key metrics across multi-input runs with run-count reporting.
+- `scripts/compare-playback-benchmark-runs.js`: comparison row sets are now deterministically sorted for stable markdown/json artifact diffs.
+- `scripts/compare-playback-benchmark-runs.js`: added optional `--fail-on-zero-compared` and zero-compare gate diagnostics in markdown/json outputs.
+- `scripts/finalize-playback-matrix.js`: forwards `--fail-on-zero-compared` into compare stage and records policy in finalize summary settings.
+- `scripts/finalize-playback-matrix.js`: finalize summary now includes comparison count rollup fields for compared rows, regressions, and coverage deltas.
+- `scripts/publish-playback-matrix-summary.js`: publish summary now surfaces finalize comparison count rollups when finalize summary metadata is attached.
+- `scripts/publish-playback-matrix-summary.js`: added optional baseline-vs-candidate comparison artifact attachment in published summaries.
+- `crates/editor/src/playback.rs`: warmup loop now handles seek updates immediately, resetting warmup state and updating frame/audio targets before playback loop entry.
+- `crates/editor/src/playback.rs`: warmup loop fallback polling now scales with frame budget instead of fixed 100ms sleep to improve responsiveness without busy waiting.
+- `crates/editor/src/playback.rs`: frame-request rebases now preserve in-flight marker sets unless in-flight futures are explicitly reset for backward/large seek changes.
+- `crates/editor/src/playback.rs`: prefetch scheduler now uses a local in-flight frame set for duplicate scheduling checks and mirrors it into shared generation-keyed in-flight markers for playback coordination.
+- `crates/editor/src/playback.rs`: warmup prefetch receive path now drains immediately queued prefetched frames in batches to accelerate warmup buffer population.
+- `crates/editor/src/playback.rs`: prefetch scheduler idle polling now scales with frame budget (bounded) instead of fixed 1ms delay, reducing idle wakeup overhead.
+- `crates/editor/src/playback.rs`: behind-prefetch dedupe tracking now uses a bounded eviction-ordered window to prevent unbounded set growth during long playback.
+- `crates/editor/src/playback.rs`: prefetch and playback direct-decode paths now use cached clip-offset maps rebuilt on project updates, avoiding repeated clip list linear searches.
+- `crates/editor/src/playback.rs`: frame-request updates now use `watch::Sender::send_if_modified` across playback/warmup/skip paths to avoid redundant unchanged-frame notifications.
+- `crates/editor/src/playback.rs`: prefetch insertion now uses single `BTreeMap::entry` insertion path instead of separate contains-check + insert lookup.
+- `crates/editor/src/playback.rs`: frame-request watch updates now route through shared helper to keep no-op dedupe behavior and call-site logic consistent across warmup/seek/skip paths.
+- `crates/editor/src/playback.rs`: in-flight and startup frame wait paths now short-circuit when seek updates are pending to improve scrub responsiveness under wait pressure.
+- `crates/editor/src/playback.rs`: startup prefetch wait and direct-decode fallback paths now pre-check seek updates before waiting/synchronous decode scheduling to skip stale work under active seeks.
+- `crates/editor/src/playback.rs`: warmup and playback queue-drain insertion paths now perform untrimmed batch insertions and run a single keyed-buffer trim pass after each drain batch.
+- `crates/editor/src/playback.rs`: frame-request rebases now only clear prefetch ramp/behind-tracking state on backward or large-distance jumps, preserving throughput state on small forward rebases.
+- `crates/editor/src/playback.rs`: behind-prefetch scheduling now scans at most once per playback frame value, reducing repeated behind-window scan overhead in tight scheduler loops.
+- `crates/editor/src/playback.rs`: in-flight wait buffering now uses untrimmed inserts plus a single post-wait trim pass, reducing repeated keyed-buffer trim operations during wait-path burst buffering.
+- `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks.
+- `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps.
+- `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag.
+- `scripts/compare-playback-benchmark-runs.js`: added structured comparison JSON output with pass/fail summary and regression detail payload.
+- `scripts/compare-playback-benchmark-runs.js`: comparison outputs now include candidate-only rows in addition to missing-candidate coverage deltas.
+- `scripts/finalize-playback-matrix.js`: baseline comparison flow now writes both `playback-comparison.md` and `playback-comparison.json`.
+- `scripts/compare-playback-benchmark-runs.js`: added optional strict `--fail-on-candidate-only` coverage gate and surfaced coverage gate mode in comparison markdown output.
+- `scripts/compare-playback-benchmark-runs.js`: added optional strict `--fail-on-skipped-files` gate and parse/skip policy reporting in comparison markdown/json outputs.
+- `scripts/compare-playback-benchmark-runs.js`: comparison file stats now include skipped-file reason breakdown (`skippedNoReports`, `skippedNoUsableMetrics`).
+- `scripts/finalize-playback-matrix.js`: added passthrough support for strict `--fail-on-candidate-only` compare mode in one-shot finalize workflows.
+- `scripts/finalize-playback-matrix.js`: forwards `--fail-on-skipped-files` into compare stage and records skipped-file policy in finalize summary settings.
+- `scripts/publish-playback-matrix-summary.js`: published comparison status now includes candidate-only row count from comparison JSON summary.
+- `scripts/publish-playback-matrix-summary.js`: published comparison status now includes missing-candidate and candidate-only coverage policy modes from comparison JSON tolerance settings.
+- `crates/editor/src/playback.rs`: warmup readiness now requires contiguous prefetched frame coverage from current frame instead of raw buffer length threshold.
+- `crates/editor/src/playback.rs`: contiguous warmup coverage scan now uses ordered map range iteration to reduce repeated key lookup overhead.
+- `crates/editor/src/playback.rs`: warmup first-frame timing now only starts after eligible prefetched frame insertion, and skip catch-up now reuses ordered stale-prune helper.
+- `scripts/finalize-playback-matrix.js`: added optional `--output-json` and default finalize summary JSON emission with artifact path and pass/fail metadata.
+- `scripts/finalize-playback-matrix.js`: finalize summary JSON now includes git branch and commit metadata when available.
+- `scripts/finalize-playback-matrix.js`: finalize now writes summary JSON before publish and passes `--finalize-summary-json` into publish flow.
+- `scripts/publish-playback-matrix-summary.js`: publish flow now supports optional finalize summary JSON input and surfaces finalize source/validation metadata.
+- `scripts/compare-playback-benchmark-runs.js`: added `--min-samples-per-row`, insufficient-sample row reporting, and sample gate fields in markdown/JSON outputs.
+- `scripts/compare-playback-benchmark-runs.js`: minimum sample checks now apply only to metrics that are comparable for each row; output now includes compared metric count and effective sample count columns.
+- `scripts/finalize-playback-matrix.js`: forwards `--min-samples-per-row` into compare stage and captures it in finalize summary settings.
+- `scripts/publish-playback-matrix-summary.js`: published comparison status now includes insufficient sample row count and minimum sample threshold fields.
+- `scripts/compare-playback-benchmark-runs.js`: comparison JSON summary now includes explicit `failureReasons` and `gateOutcomes` fields.
+- `scripts/publish-playback-matrix-summary.js`: published comparison status now includes comparison failure reasons when present.
+- `scripts/finalize-playback-matrix.js`: finalize summary now includes comparison failure reasons and gate outcome fields in results metadata.
+- `scripts/compare-playback-benchmark-runs.js`: added parse-error gating and baseline/candidate file parse stats/parse error entries in JSON output.
+- `scripts/finalize-playback-matrix.js`: forwards parse-error gating and records parse-error policy in finalize summary settings.
+- `scripts/publish-playback-matrix-summary.js`: published comparison status now includes parse policy and baseline/candidate parse error counts.
+- `scripts/publish-playback-matrix-summary.js`: published comparison status now includes skipped-file policy mode from comparison tolerance settings.
+- `scripts/publish-playback-matrix-summary.js`: published comparison status now includes skipped-file breakdown counts for no-reports and no-usable-metrics cases.
+- `crates/editor/src/playback.rs`: `insert_prefetched_frame` now returns structural-change signals and warmup cache invalidation uses this signal to avoid stale contiguous counts when insert+trim keeps buffer length unchanged.
+- `scripts/finalize-playback-matrix.js`: finalize summary now includes comparison file stats payload when comparison runs are enabled.
+- `scripts/publish-playback-matrix-summary.js`: publish summary now surfaces finalize baseline/candidate parse error counts from finalize summary metadata.
+- `crates/editor/src/playback.rs`: warmup loop now skips contiguous coverage scanning until first warmup frame has been observed.
+- `crates/editor/src/playback.rs`: warmup contiguous coverage counts are now cached and recomputed only on warmup buffer changes.
+- `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval.
+- `crates/editor/src/playback.rs`: added ordered pruning of stale prefetched frames below current playhead to reduce stale buffer overhead during catch-up.
+- `scripts/publish-playback-matrix-summary.js`: publish flow now surfaces comparison gate status/summary metrics when comparison JSON is provided.
+- `scripts/finalize-playback-matrix.js`: finalize publish pass now forwards both comparison markdown and comparison JSON artifacts.
+- `crates/editor/src/playback.rs`: added seek-generation tagging for prefetched frames so stale in-flight decode results are ignored after seek generation advances.
+- `crates/editor/src/playback.rs`: seek handling now clears prefetched frame buffer on generation changes to guarantee stale buffered frames are discarded immediately.
+- `crates/editor/src/playback.rs`: in-flight prefetch wait path now only buffers frames at or ahead of current frame to reduce stale buffer accumulation.
+- `scripts/compare-playback-benchmark-runs.js`: comparison gating now accepts multiple baseline and candidate inputs for aggregated matrix regression checks.
+- `scripts/finalize-playback-matrix.js`: finalization now supports optional baseline comparison gating and threshold controls in the same pass.
+- `crates/editor/src/playback.rs`: in-flight frame markers now include seek generation to prevent old decode paths from clearing current-generation markers.
+- `scripts/publish-playback-matrix-summary.js`: publish flow now supports optional comparison artifact attachment.
+- `scripts/finalize-playback-matrix.js`: finalize flow now includes comparison artifact when publishing and baseline comparison are both requested.
+- `crates/editor/src/playback.rs`: prefetch and direct decode now use separate generation-aware in-flight sets, with combined checks in frame wait path.
+- `scripts/compare-playback-benchmark-runs.js`: comparison now reports and gates missing candidate rows relative to baseline coverage.
+- `scripts/finalize-playback-matrix.js`: comparison now runs before publish in combined workflows and forwards missing-candidate override to compare step.
+- `scripts/compare-playback-benchmark-runs.js`: comparison now supports optional structured JSON output for downstream automation.
+- `scripts/finalize-playback-matrix.js`: baseline comparison in finalize now writes both markdown and JSON comparison artifacts.
+
+**Results**:
+- ✅ `cargo +stable check -p cap-editor` passes after changes.
+- ✅ `cargo +stable check -p cap-rendering` passes after changes.
+- ✅ `pnpm --dir apps/desktop exec tsc --noEmit` passes after frontend seek changes.
+- ⚠️ `cargo +stable check -p cap-desktop` and `cargo +stable run -p cap-recording --example playback-test-runner -- list` fail in this Linux environment because `scap-targets` does not currently compile on this target (`DisplayIdImpl`/`WindowImpl` unresolved), preventing local benchmark execution here.
+- ⚠️ Cross-platform FPS/scrub/A-V benchmark evidence still pending on macOS and Windows devices with real recordings.
+
+**Stopping point**: Core playback code-path optimizations are implemented and compiling in touched crates; next step is benchmark execution on macOS 13+ and Windows GPU matrix to quantify gains.
+
+---
+
 ### Session 2026-01-28 (Initial Baseline - MP4)
 
 **Goal**: Establish initial playback performance baseline
@@ -327,6 +887,7 @@ The CPU RGBA→NV12 conversion was taking 15-25ms per frame for 3024x1964 resolu
 ## References
 
 - `PLAYBACK-BENCHMARKS.md` - Raw performance test data (auto-updated by test runner)
+- `PLAYBACK-MATRIX-RUNBOOK.md` - Cross-platform playback evidence collection process
 - `../recording/FINDINGS.md` - Recording performance findings (source of test files)
 - `../recording/BENCHMARKS.md` - Recording benchmark data
 - `examples/playback-test-runner.rs` - Playback test implementation
diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md
new file mode 100644
index 0000000000..b4302f8808
--- /dev/null
+++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md
@@ -0,0 +1,221 @@
+# Playback Matrix Runbook
+
+This runbook defines how to collect benchmark evidence for desktop playback performance and sync validation across required hardware classes.
+
+## Target matrix
+
+| Platform | GPU class | Required scenarios |
+|---|---|---|
+| macOS 13+ | Apple Silicon | full, scrub |
+| Windows 11 | NVIDIA discrete | full, scrub |
+| Windows 11 | AMD discrete | full, scrub |
+| Windows 11 | Integrated baseline | full, scrub |
+
+## Preconditions
+
+1. Build can run on target machine.
+2. Real-device recording outputs are available.
+3. Recordings include both MP4 and fragmented samples.
+4. Node and Rust toolchains are installed.
+
+## Inputs and output directories
+
+Set these per machine:
+
+- `INPUT_DIR`: recording root (default `/tmp/cap-real-device-tests`)
+- `OUT_DIR`: machine-local output folder for JSON and aggregate markdown
+
+Example:
+
+```bash
+export INPUT_DIR="/tmp/cap-real-device-tests"
+export OUT_DIR="/tmp/cap-playback-matrix/macos-apple-silicon"
+mkdir -p "$OUT_DIR"
+```
+
+## Machine run command
+
+Run this once per platform/GPU class:
+
+```bash
+node scripts/run-playback-benchmark-matrix.js \
+  --platform "<platform-label>" \
+  --gpu "<gpu-label>" \
+  --output-dir "$OUT_DIR" \
+  --fps 60 \
+  --startup-threshold-ms 250 \
+  --require-formats mp4,fragmented \
+  --scenarios full,scrub \
+  --input-dir "$INPUT_DIR"
+```
+
+Equivalent shortcut:
+
+```bash
+pnpm bench:playback:matrix -- --platform "<platform-label>" --gpu "<gpu-label>" --output-dir "$OUT_DIR" --fps 60 --startup-threshold-ms 250 --require-formats mp4,fragmented --scenarios full,scrub --input-dir "$INPUT_DIR"
+```
+
+Rerun only scrub scenario for a machine:
+
+```bash
+pnpm bench:playback:matrix -- --platform "<platform-label>" --gpu "<gpu-label>" --output-dir "$OUT_DIR" --fps 60 --scenarios scrub --input-dir "$INPUT_DIR"
+```
+
+Examples:
+
+```bash
+node scripts/run-playback-benchmark-matrix.js --platform macos-13 --gpu apple-silicon --output-dir /tmp/cap-playback-matrix/macos-apple-silicon --fps 60 --input-dir /tmp/cap-real-device-tests
+node scripts/run-playback-benchmark-matrix.js --platform windows-11 --gpu nvidia-discrete --output-dir C:\temp\cap-playback-matrix\windows-nvidia --fps 60 --input-dir C:\temp\cap-real-device-tests
+node scripts/run-playback-benchmark-matrix.js --platform windows-11 --gpu amd-discrete --output-dir C:\temp\cap-playback-matrix\windows-amd --fps 60 --input-dir C:\temp\cap-real-device-tests
+node scripts/run-playback-benchmark-matrix.js --platform windows-11 --gpu integrated --output-dir C:\temp\cap-playback-matrix\windows-integrated --fps 60 --input-dir C:\temp\cap-real-device-tests
+```
+
+## Outputs produced per machine
+
+Each run directory contains:
+
+- timestamped `full` scenario JSON
+- timestamped `scrub` scenario JSON
+- `<platform>-<gpu>-aggregate.md` summary table
+- `<platform>-<gpu>-validation.json` matrix validation result
+
+## Cross-machine aggregation
+
+After collecting all machine folders into a shared root:
+
+```bash
+node scripts/aggregate-playback-benchmarks.js --input /path/to/all-machine-results --output /tmp/playback-matrix-aggregate.md
+node scripts/build-playback-matrix-report.js --input /path/to/all-machine-results --output /tmp/playback-matrix-status.md
+```
+
+Equivalent shortcut:
+
+```bash
+pnpm bench:playback:aggregate -- --input /path/to/all-machine-results --output /tmp/playback-matrix-aggregate.md
+pnpm bench:playback:report -- --input /path/to/all-machine-results --output /tmp/playback-matrix-status.md
+```
+
+Validate matrix completeness:
+
+```bash
+node scripts/validate-playback-matrix.js --input /path/to/all-machine-results --require-formats mp4,fragmented
+```
+
+Equivalent shortcut:
+
+```bash
+pnpm bench:playback:validate -- --input /path/to/all-machine-results --require-formats mp4,fragmented
+```
+
+One-shot finalize command:
+
+```bash
+pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented
+pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-dir /tmp/playback-matrix-final --output-json /tmp/playback-matrix-final/playback-finalize-summary.json
+```
+
+Finalize summary JSON includes artifact paths, gate outcomes, settings, and git branch/commit metadata when available.
+When comparison is enabled, finalize summary JSON also includes comparison failure reasons and gate outcome details.
+When comparison is enabled, finalize summary JSON also includes comparison file stats such as baseline/candidate parse error counts.
+When comparison is enabled, finalize summary JSON also includes comparison summary counts for compared rows, regressions, and coverage deltas.
+
+Include optimization thresholds when finalizing:
+
+```bash
+pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250
+
+# include baseline comparison gate during finalization
+pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5
+
+# optional: allow missing candidate rows during compare gate
+pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-missing-candidate
+
+# optional: fail finalize compare gate when candidate includes rows absent in baseline
+pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-candidate-only
+
+# optional: require minimum sample count per compared row
+pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --min-samples-per-row 3
+
+# optional: fail comparison gate when any input JSON fails to parse
+pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-parse-errors
+
+# optional: fail comparison gate when no comparable rows remain
+pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-zero-compared
+
+# optional: fail comparison gate when any input JSON is skipped
+pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-skipped-files
+```
+
+Finalize and publish to benchmark history in one command:
+
+```bash
+pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 --publish-target /workspace/crates/editor/PLAYBACK-BENCHMARKS.md
+```
+
+Publish finalized artifacts into benchmark history:
+
+```bash
+pnpm bench:playback:publish -- \
+  --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md \
+  --status-md /tmp/playback-matrix-final/playback-matrix-status.md \
+  --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json \
+  --bottlenecks-md /tmp/playback-matrix-final/playback-bottlenecks.md
+
+pnpm bench:playback:publish -- \
+  --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md \
+  --status-md /tmp/playback-matrix-final/playback-matrix-status.md \
+  --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json \
+  --comparison-md /tmp/playback-matrix-final/playback-comparison.md \
+  --comparison-json /tmp/playback-matrix-final/playback-comparison.json \
+  --finalize-summary-json /tmp/playback-matrix-final/playback-finalize-summary.json
+```
+
+Generate bottleneck analysis for optimization backlog:
+
+```bash
+pnpm bench:playback:analyze -- --input /path/to/all-machine-results --output /tmp/playback-matrix-final/playback-bottlenecks.md --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250
+pnpm bench:playback:analyze -- --input /path/to/all-machine-results --output /tmp/playback-matrix-final/playback-bottlenecks.md --output-json /tmp/playback-matrix-final/playback-bottlenecks.json --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250
+```
+
+Compare candidate run against baseline and fail on regressions:
+
+```bash
+pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-matrix-final/playback-comparison.md --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5
+
+# multiple baseline/candidate directories can be provided
+pnpm bench:playback:compare -- --baseline /path/to/baseline-a --baseline /path/to/baseline-b --candidate /path/to/candidate-a --candidate /path/to/candidate-b --output /tmp/playback-matrix-final/playback-comparison.md
+
+# optional: allow missing candidate rows while still checking metric regressions
+pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --allow-missing-candidate
+
+# emit structured JSON alongside markdown for automation
+pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-matrix-final/playback-comparison.md --output-json /tmp/playback-matrix-final/playback-comparison.json
+
+# compare output now includes both missing-candidate rows and candidate-only rows
+# optional: fail compare gate when candidate includes rows absent in baseline
+pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-candidate-only
+
+# when multiple inputs are provided, comparison output includes baseline/candidate run counts per row
+# optional: require minimum sample count per compared row
+pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --min-samples-per-row 3
+
+# comparison JSON includes failureReasons and gateOutcomes for automation
+# minimum sample gating uses metrics that are actually comparable for each row
+# optional: fail comparison gate when any input JSON fails to parse
+pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-parse-errors
+
+# optional: fail comparison gate when no comparable rows remain after filtering
+pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-zero-compared
+
+# optional: fail comparison gate when any input JSON is skipped (no reports/usable metrics)
+pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-skipped-files
+
+# comparison file stats include skipped-file breakdown (no_reports / no_usable_metrics)
+```
+
+## Evidence checklist
+
+1. Confirm all matrix rows exist.
+2. Confirm each row has both `full` and `scrub` scenarios.
+3. Capture aggregate markdown and raw JSON artifacts.
+4. Attach outputs to playback findings update.
diff --git a/crates/editor/examples/decode-benchmark.rs b/crates/editor/examples/decode-benchmark.rs
index d29ab2dda0..b7e74ce4c3 100644
--- a/crates/editor/examples/decode-benchmark.rs
+++ b/crates/editor/examples/decode-benchmark.rs
@@ -1,12 +1,19 @@
 use cap_rendering::decoder::{AsyncVideoDecoderHandle, spawn_decoder};
+use serde::Serialize;
+use std::fs;
 use std::path::{Path, PathBuf};
 use std::process::Command;
 use std::time::Instant;
+use std::time::{SystemTime, UNIX_EPOCH};
 use tokio::runtime::Runtime;
 
 const DEFAULT_DURATION_SECS: f32 = 60.0;
 
 fn get_video_duration(path: &Path) -> f32 {
+    if path.is_dir() {
+        return get_fragmented_video_duration(path);
+    }
+
     let output = Command::new("ffprobe")
         .args([
             "-v",
@@ -33,28 +40,137 @@ fn get_video_duration(path: &Path) -> f32 {
     }
 }
 
+fn get_fragmented_video_duration(path: &Path) -> f32 {
+    let init_segment = path.join("init.mp4");
+    if !init_segment.exists() {
+        eprintln!(
+            "Warning: Fragmented input {} missing init.mp4",
+            path.display()
+        );
+        return DEFAULT_DURATION_SECS;
+    }
+
+    let mut fragments: Vec<PathBuf> = match fs::read_dir(path) {
+        Ok(entries) => entries
+            .filter_map(|entry| entry.ok())
+            .map(|entry| entry.path())
+            .filter(|entry| entry.extension().is_some_and(|ext| ext == "m4s"))
+            .collect(),
+        Err(error) => {
+            eprintln!(
+                "Warning: Failed to read fragmented directory {}: {}",
+                path.display(),
+                error
+            );
+            return DEFAULT_DURATION_SECS;
+        }
+    };
+    fragments.sort();
+
+    if fragments.is_empty() {
+        eprintln!(
+            "Warning: Fragmented input {} has no .m4s segments",
+            path.display()
+        );
+        return DEFAULT_DURATION_SECS;
+    }
+
+    let timestamp = SystemTime::now()
+        .duration_since(UNIX_EPOCH)
+        .map(|value| value.as_nanos())
+        .unwrap_or(0);
+    let combined_path =
+        std::env::temp_dir().join(format!("cap-decode-benchmark-combined-{timestamp}.mp4"));
+
+    let mut combined_data = match fs::read(&init_segment) {
+        Ok(data) => data,
+        Err(error) => {
+            eprintln!(
+                "Warning: Failed to read init segment {}: {}",
+                init_segment.display(),
+                error
+            );
+            return DEFAULT_DURATION_SECS;
+        }
+    };
+
+    for fragment in fragments {
+        match fs::read(&fragment) {
+            Ok(data) => combined_data.extend(data),
+            Err(error) => {
+                eprintln!(
+                    "Warning: Failed to read segment {}: {}",
+                    fragment.display(),
+                    error
+                );
+                return DEFAULT_DURATION_SECS;
+            }
+        }
+    }
+
+    if let Err(error) = fs::write(&combined_path, &combined_data) {
+        eprintln!(
+            "Warning: Failed to write combined fragmented video {}: {}",
+            combined_path.display(),
+            error
+        );
+        return DEFAULT_DURATION_SECS;
+    }
+
+    let duration = get_video_duration(&combined_path);
+    if let Err(error) = fs::remove_file(&combined_path) {
+        eprintln!(
+            "Warning: Failed to remove temporary combined file {}: {}",
+            combined_path.display(),
+            error
+        );
+    }
+    duration
+}
+
 #[derive(Debug, Clone)]
 struct BenchmarkConfig {
     video_path: PathBuf,
     fps: u32,
     iterations: usize,
+    sequential_frames: usize,
+    random_samples: usize,
+    output_json: Option<PathBuf>,
 }
 
-#[derive(Debug, Default)]
+#[derive(Debug, Default, Serialize)]
 struct BenchmarkResults {
     decoder_creation_ms: f64,
     sequential_decode_times_ms: Vec<f64>,
+    first_frame_decode_ms: f64,
+    startup_to_first_frame_ms: f64,
+    sequential_p50_ms: f64,
+    sequential_p95_ms: f64,
+    sequential_p99_ms: f64,
     sequential_fps: f64,
     sequential_failures: usize,
     seek_times_by_distance: Vec<(f32, f64)>,
     seek_failures: usize,
     random_access_times_ms: Vec<f64>,
     random_access_avg_ms: f64,
+    random_access_p50_ms: f64,
+    random_access_p95_ms: f64,
+    random_access_p99_ms: f64,
     random_access_failures: usize,
     cache_hits: usize,
     cache_misses: usize,
 }
 
+#[derive(Debug, Serialize)]
+struct BenchmarkOutput {
+    video_path: PathBuf,
+    fps: u32,
+    iterations: usize,
+    sequential_frames: usize,
+    random_samples: usize,
+    results: BenchmarkResults,
+}
+
 impl BenchmarkResults {
     fn print_report(&self) {
         println!("\n{}", "=".repeat(60));
@@ -96,6 +212,14 @@ impl BenchmarkResults {
             println!("  Avg decode time: {avg:.2}ms");
             println!("  Min decode time: {min:.2}ms");
             println!("  Max decode time: {max:.2}ms");
+            println!("  P50 decode time: {:.2}ms", self.sequential_p50_ms);
+            println!("  P95 decode time: {:.2}ms", self.sequential_p95_ms);
+            println!("  P99 decode time: {:.2}ms", self.sequential_p99_ms);
+            println!("  First frame decode: {:.2}ms", self.first_frame_decode_ms);
+            println!(
+                "  Startup to first frame: {:.2}ms",
+                self.startup_to_first_frame_ms
+            );
             println!("  Effective FPS: {:.1}", self.sequential_fps);
         }
         println!();
@@ -138,18 +262,9 @@ impl BenchmarkResults {
             println!("  Avg access time: {avg:.2}ms");
             println!("  Min access time: {min:.2}ms");
             println!("  Max access time: {max:.2}ms");
-            println!(
-                "  P50: {:.2}ms",
-                percentile(&self.random_access_times_ms, 50.0)
-            );
-            println!(
-                "  P95: {:.2}ms",
-                percentile(&self.random_access_times_ms, 95.0)
-            );
-            println!(
-                "  P99: {:.2}ms",
-                percentile(&self.random_access_times_ms, 99.0)
-            );
+            println!("  P50: {:.2}ms", self.random_access_p50_ms);
+            println!("  P95: {:.2}ms", self.random_access_p95_ms);
+            println!("  P99: {:.2}ms", self.random_access_p99_ms);
         }
         println!();
 
@@ -215,10 +330,13 @@ async fn benchmark_sequential_decode(
     fps: u32,
     frame_count: usize,
     start_time: f32,
-) -> (Vec<f64>, f64, usize) {
+) -> (Vec<f64>, f64, usize, f64, f64) {
     let mut times = Vec::with_capacity(frame_count);
     let mut failures = 0;
     let overall_start = Instant::now();
+    let mut first_frame_decode_ms = 0.0;
+    let mut startup_to_first_frame_ms = 0.0;
+    let mut first_frame_captured = false;
 
     for i in 0..frame_count {
         let time = start_time + (i as f32 / fps as f32);
@@ -227,6 +345,11 @@ async fn benchmark_sequential_decode(
             Some(_frame) => {
                 let elapsed = start.elapsed();
                 times.push(elapsed.as_secs_f64() * 1000.0);
+                if !first_frame_captured {
+                    first_frame_captured = true;
+                    first_frame_decode_ms = elapsed.as_secs_f64() * 1000.0;
+                    startup_to_first_frame_ms = overall_start.elapsed().as_secs_f64() * 1000.0;
+                }
             }
             None => {
                 failures += 1;
@@ -243,7 +366,13 @@ async fn benchmark_sequential_decode(
         0.0
     };
 
-    (times, effective_fps, failures)
+    (
+        times,
+        effective_fps,
+        failures,
+        first_frame_decode_ms,
+        startup_to_first_frame_ms,
+    )
 }
 
 async fn benchmark_seek(
@@ -308,6 +437,10 @@ async fn run_full_benchmark(config: BenchmarkConfig) -> BenchmarkResults {
         config.video_path.display()
     );
     println!("FPS: {}, Iterations: {}", config.fps, config.iterations);
+    println!(
+        "Sequential frames: {}, Random samples: {}",
+        config.sequential_frames, config.random_samples
+    );
     println!();
 
     println!("[1/5] Benchmarking decoder creation...");
@@ -341,12 +474,20 @@ async fn run_full_benchmark(config: BenchmarkConfig) -> BenchmarkResults {
     println!("Detected video duration: {video_duration:.2}s");
     println!();
 
-    println!("[3/5] Benchmarking sequential decode (100 frames from start)...");
-    let (seq_times, seq_fps, seq_failures) =
-        benchmark_sequential_decode(&decoder, config.fps, 100, 0.0).await;
+    println!(
+        "[3/5] Benchmarking sequential decode ({} frames from start)...",
+        config.sequential_frames
+    );
+    let (seq_times, seq_fps, seq_failures, first_frame_decode_ms, startup_to_first_frame_ms) =
+        benchmark_sequential_decode(&decoder, config.fps, config.sequential_frames, 0.0).await;
     results.sequential_decode_times_ms = seq_times;
     results.sequential_fps = seq_fps;
     results.sequential_failures = seq_failures;
+    results.first_frame_decode_ms = first_frame_decode_ms;
+    results.startup_to_first_frame_ms = startup_to_first_frame_ms;
+    results.sequential_p50_ms = percentile(&results.sequential_decode_times_ms, 50.0);
+    results.sequential_p95_ms = percentile(&results.sequential_decode_times_ms, 95.0);
+    results.sequential_p99_ms = percentile(&results.sequential_decode_times_ms, 99.0);
     println!("      Done: {seq_fps:.1} effective FPS");
     if seq_failures > 0 {
         println!("      Warning: {seq_failures} frames failed to decode");
@@ -370,9 +511,12 @@ async fn run_full_benchmark(config: BenchmarkConfig) -> BenchmarkResults {
         }
     }
 
-    println!("[5/5] Benchmarking random access (50 samples)...");
+    println!(
+        "[5/5] Benchmarking random access ({} samples)...",
+        config.random_samples
+    );
     let (random_times, random_failures) =
-        benchmark_random_access(&decoder, config.fps, video_duration, 50).await;
+        benchmark_random_access(&decoder, config.fps, video_duration, config.random_samples).await;
     results.random_access_times_ms = random_times;
     results.random_access_failures = random_failures;
     results.random_access_avg_ms = if results.random_access_times_ms.is_empty() {
@@ -381,6 +525,9 @@ async fn run_full_benchmark(config: BenchmarkConfig) -> BenchmarkResults {
         results.random_access_times_ms.iter().sum::<f64>()
             / results.random_access_times_ms.len() as f64
     };
+    results.random_access_p50_ms = percentile(&results.random_access_times_ms, 50.0);
+    results.random_access_p95_ms = percentile(&results.random_access_times_ms, 95.0);
+    results.random_access_p99_ms = percentile(&results.random_access_times_ms, 99.0);
     println!("      Done: {:.2}ms avg", results.random_access_avg_ms);
     if random_failures > 0 {
         println!("      Warning: {random_failures} random accesses failed");
@@ -389,6 +536,53 @@ async fn run_full_benchmark(config: BenchmarkConfig) -> BenchmarkResults {
     results
 }
 
+fn write_json_output(config: &BenchmarkConfig, results: &BenchmarkResults) {
+    let Some(output_path) = &config.output_json else {
+        return;
+    };
+
+    let output = BenchmarkOutput {
+        video_path: config.video_path.clone(),
+        fps: config.fps,
+        iterations: config.iterations,
+        sequential_frames: config.sequential_frames,
+        random_samples: config.random_samples,
+        results: BenchmarkResults {
+            decoder_creation_ms: results.decoder_creation_ms,
+            sequential_decode_times_ms: results.sequential_decode_times_ms.clone(),
+            first_frame_decode_ms: results.first_frame_decode_ms,
+            startup_to_first_frame_ms: results.startup_to_first_frame_ms,
+            sequential_p50_ms: results.sequential_p50_ms,
+            sequential_p95_ms: results.sequential_p95_ms,
+            sequential_p99_ms: results.sequential_p99_ms,
+            sequential_fps: results.sequential_fps,
+            sequential_failures: results.sequential_failures,
+            seek_times_by_distance: results.seek_times_by_distance.clone(),
+            seek_failures: results.seek_failures,
+            random_access_times_ms: results.random_access_times_ms.clone(),
+            random_access_avg_ms: results.random_access_avg_ms,
+            random_access_p50_ms: results.random_access_p50_ms,
+            random_access_p95_ms: results.random_access_p95_ms,
+            random_access_p99_ms: results.random_access_p99_ms,
+            random_access_failures: results.random_access_failures,
+            cache_hits: results.cache_hits,
+            cache_misses: results.cache_misses,
+        },
+    };
+
+    match serde_json::to_string_pretty(&output) {
+        Ok(json) => match fs::write(output_path, json) {
+            Ok(()) => println!("Wrote benchmark JSON to {}", output_path.display()),
+            Err(error) => eprintln!(
+                "Failed to write benchmark JSON to {}: {}",
+                output_path.display(),
+                error
+            ),
+        },
+        Err(error) => eprintln!("Failed to serialize benchmark JSON output: {}", error),
+    }
+}
+
 fn main() {
     let args: Vec<String> = std::env::args().collect();
 
@@ -397,7 +591,7 @@ fn main() {
         .position(|a| a == "--video")
         .and_then(|i| args.get(i + 1))
         .map(PathBuf::from)
-        .expect("Usage: decode-benchmark --video <path> [--fps <fps>] [--iterations <n>]");
+        .expect("Usage: decode-benchmark --video <path> [--fps <fps>] [--iterations <n>] [--sequential-frames <n>] [--random-samples <n>] [--output-json <path>]");
 
     let fps = args
         .iter()
@@ -413,14 +607,38 @@ fn main() {
         .and_then(|s| s.parse().ok())
         .unwrap_or(100);
 
+    let sequential_frames = args
+        .iter()
+        .position(|a| a == "--sequential-frames")
+        .and_then(|i| args.get(i + 1))
+        .and_then(|s| s.parse().ok())
+        .unwrap_or(100);
+
+    let random_samples = args
+        .iter()
+        .position(|a| a == "--random-samples")
+        .and_then(|i| args.get(i + 1))
+        .and_then(|s| s.parse().ok())
+        .unwrap_or(50);
+
+    let output_json = args
+        .iter()
+        .position(|a| a == "--output-json")
+        .and_then(|i| args.get(i + 1))
+        .map(PathBuf::from);
+
     let config = BenchmarkConfig {
         video_path,
         fps,
         iterations,
+        sequential_frames,
+        random_samples,
+        output_json,
     };
 
     let rt = Runtime::new().expect("Failed to create Tokio runtime");
-    let results = rt.block_on(run_full_benchmark(config));
+    let results = rt.block_on(run_full_benchmark(config.clone()));
 
     results.print_report();
+    write_json_output(&config, &results);
 }
diff --git a/crates/editor/src/audio.rs b/crates/editor/src/audio.rs
index d2cdbf8df4..d631b0f9a1 100644
--- a/crates/editor/src/audio.rs
+++ b/crates/editor/src/audio.rs
@@ -7,7 +7,6 @@ use cap_project::{AudioConfiguration, ClipOffsets, ProjectConfiguration, Timelin
 use ffmpeg::{
     ChannelLayout, Dictionary, format as avformat, frame::Audio as FFAudio, software::resampling,
 };
-#[cfg(not(target_os = "windows"))]
 use ringbuf::{
     HeapRb,
     traits::{Consumer, Observer, Producer},
@@ -248,14 +247,12 @@ impl AudioRenderer {
     }
 }
 
-#[cfg(not(target_os = "windows"))]
 pub struct AudioPlaybackBuffer<T: FromSampleBytes> {
     frame_buffer: AudioRenderer,
     resampler: AudioResampler,
     resampled_buffer: HeapRb<T>,
 }
 
-#[cfg(not(target_os = "windows"))]
 impl<T: FromSampleBytes> AudioPlaybackBuffer<T> {
     pub const PLAYBACK_SAMPLES_COUNT: u32 = 512;
 
@@ -296,6 +293,19 @@ impl<T: FromSampleBytes> AudioPlaybackBuffer<T> {
         self.frame_buffer.set_playhead(playhead, project);
     }
 
+    #[cfg(target_os = "windows")]
+    pub fn set_playhead_smooth(&mut self, playhead: f64, project: &ProjectConfiguration) {
+        let current_playhead = self.frame_buffer.elapsed_samples_to_playhead();
+        let drift = (playhead - current_playhead).abs();
+
+        if drift > 0.2 {
+            self.set_playhead(playhead, project);
+            return;
+        }
+
+        self.frame_buffer.set_playhead(playhead, project);
+    }
+
     #[allow(dead_code)]
     pub fn current_playhead(&self) -> f64 {
         self.frame_buffer.elapsed_samples_to_playhead()
@@ -426,9 +436,10 @@ impl AudioResampler {
         })
     }
 
-    #[cfg(not(target_os = "windows"))]
     pub fn reset(&mut self) {
-        *self = Self::new(self.output).unwrap();
+        if let Ok(resampler) = Self::new(self.output) {
+            *self = resampler;
+        }
     }
 
     fn current_frame_data(&self) -> &[u8] {
diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs
index 000f209c6b..4b63f7b7f3 100644
--- a/crates/editor/src/playback.rs
+++ b/crates/editor/src/playback.rs
@@ -1,14 +1,13 @@
-use cap_audio::FromSampleBytes;
-#[cfg(not(target_os = "windows"))]
-use cap_audio::{LatencyCorrectionConfig, LatencyCorrector, default_output_latency_hint};
+use cap_audio::{
+    FromSampleBytes, LatencyCorrectionConfig, LatencyCorrector, default_output_latency_hint,
+};
 use cap_media::MediaError;
 use cap_media_info::AudioInfo;
-use cap_project::{ProjectConfiguration, XY};
+use cap_project::{ClipOffsets, ProjectConfiguration, XY};
 use cap_rendering::{
     DecodedSegmentFrames, ProjectUniforms, RenderVideoConstants, ZoomFocusInterpolator,
     spring_mass_damper::SpringMassDamperSimulationConfig,
 };
-#[cfg(not(target_os = "windows"))]
 use cpal::{BufferSize, SupportedBufferSize};
 use cpal::{
     SampleFormat,
@@ -17,9 +16,12 @@ use cpal::{
 use futures::stream::{FuturesUnordered, StreamExt};
 use lru::LruCache;
 use std::{
-    collections::{HashSet, VecDeque},
+    collections::{BTreeMap, HashMap, HashSet, VecDeque},
     num::NonZeroUsize,
-    sync::{Arc, RwLock},
+    sync::{
+        Arc, RwLock,
+        atomic::{AtomicBool, Ordering},
+    },
     time::Duration,
 };
 use tokio::{
@@ -28,7 +30,6 @@ use tokio::{
 };
 use tracing::{error, info, warn};
 
-#[cfg(not(target_os = "windows"))]
 use crate::audio::AudioPlaybackBuffer;
 use crate::{
     audio::AudioSegment, editor, editor_instance::SegmentMedia, segments::get_audio_segments,
@@ -36,8 +37,6 @@ use crate::{
 
 const PREFETCH_BUFFER_SIZE: usize = 60;
 const PARALLEL_DECODE_TASKS: usize = 4;
-const MAX_PREFETCH_AHEAD: u32 = 60;
-const PREFETCH_BEHIND: u32 = 15;
 const FRAME_CACHE_SIZE: usize = 60;
 
 #[derive(Debug)]
@@ -64,12 +63,14 @@ pub enum PlaybackEvent {
 pub struct PlaybackHandle {
     stop_tx: watch::Sender<bool>,
     event_rx: watch::Receiver<PlaybackEvent>,
+    seek_tx: watch::Sender<u32>,
 }
 
 struct PrefetchedFrame {
     frame_number: u32,
     segment_frames: DecodedSegmentFrames,
     segment_index: u32,
+    generation: u64,
 }
 
 struct FrameCache {
@@ -98,6 +99,115 @@ impl FrameCache {
         self.cache
             .put(frame_number, (segment_frames, segment_index));
     }
+
+    fn clear(&mut self) {
+        self.cache.clear();
+    }
+}
+
+fn trim_prefetch_buffer(buffer: &mut BTreeMap<u32, PrefetchedFrame>, current_frame: u32) -> bool {
+    let mut changed = false;
+    while buffer.len() > PREFETCH_BUFFER_SIZE {
+        let far_ahead_frame = buffer
+            .iter()
+            .rev()
+            .find(|(frame, _)| **frame > current_frame.saturating_add(PREFETCH_BUFFER_SIZE as u32))
+            .map(|(frame, _)| *frame);
+
+        if let Some(frame) = far_ahead_frame {
+            buffer.remove(&frame);
+            changed = true;
+            continue;
+        }
+
+        let Some(oldest_frame) = buffer.keys().next().copied() else {
+            break;
+        };
+        buffer.remove(&oldest_frame);
+        changed = true;
+    }
+    changed
+}
+
+fn insert_prefetched_frame(
+    buffer: &mut BTreeMap<u32, PrefetchedFrame>,
+    prefetched: PrefetchedFrame,
+    current_frame: u32,
+) -> bool {
+    let inserted_new = insert_prefetched_frame_untrimmed(buffer, prefetched, current_frame);
+    let trimmed = trim_prefetch_buffer(buffer, current_frame);
+    inserted_new || trimmed
+}
+
+fn insert_prefetched_frame_untrimmed(
+    buffer: &mut BTreeMap<u32, PrefetchedFrame>,
+    prefetched: PrefetchedFrame,
+    current_frame: u32,
+) -> bool {
+    if prefetched.frame_number < current_frame {
+        return false;
+    }
+
+    let frame_number = prefetched.frame_number;
+    let inserted_new = match buffer.entry(frame_number) {
+        std::collections::btree_map::Entry::Vacant(entry) => {
+            entry.insert(prefetched);
+            true
+        }
+        std::collections::btree_map::Entry::Occupied(_) => false,
+    };
+    inserted_new
+}
+
+fn prune_prefetch_buffer_before_frame(
+    buffer: &mut BTreeMap<u32, PrefetchedFrame>,
+    current_frame: u32,
+) {
+    while let Some((frame, _)) = buffer.first_key_value() {
+        if *frame >= current_frame {
+            break;
+        }
+        buffer.pop_first();
+    }
+}
+
+fn count_contiguous_prefetched_frames(
+    buffer: &BTreeMap<u32, PrefetchedFrame>,
+    start_frame: u32,
+    limit: usize,
+) -> usize {
+    let mut contiguous = 0usize;
+    let mut expected_frame = start_frame;
+    for (frame, _) in buffer.range(start_frame..) {
+        if *frame != expected_frame {
+            break;
+        }
+        contiguous += 1;
+        if contiguous >= limit {
+            break;
+        }
+        expected_frame = expected_frame.saturating_add(1);
+    }
+    contiguous
+}
+
+fn build_clip_offsets_lookup(project: &ProjectConfiguration) -> HashMap<u32, ClipOffsets> {
+    project
+        .clips
+        .iter()
+        .map(|clip| (clip.index, clip.offsets))
+        .collect()
+}
+
+fn send_watch_u32_if_changed(tx: &watch::Sender<u32>, value: u32) {
+    let _ = tx.send_if_modified(|current| {
+        if *current == value {
+            false
+        } else {
+            *current = value;
+            true
+        }
+    });
 }
 
 impl Playback {
@@ -118,23 +228,32 @@ impl Playback {
 
         let (event_tx, mut event_rx) = watch::channel(PlaybackEvent::Start);
         event_rx.borrow_and_update();
+        let (seek_tx, mut seek_rx) = watch::channel(self.start_frame_number);
+        seek_rx.borrow_and_update();
 
         let handle = PlaybackHandle {
             stop_tx: stop_tx.clone(),
             event_rx,
+            seek_tx,
         };
 
         let (prefetch_tx, mut prefetch_rx) =
             tokio_mpsc::channel::<PrefetchedFrame>(PREFETCH_BUFFER_SIZE * 2);
         let (frame_request_tx, mut frame_request_rx) = watch::channel(self.start_frame_number);
         let (playback_position_tx, playback_position_rx) = watch::channel(self.start_frame_number);
+        let (seek_generation_tx, mut seek_generation_rx) = watch::channel(0u64);
+        seek_generation_rx.borrow_and_update();
 
-        let in_flight_frames: Arc<RwLock<HashSet<u32>>> = Arc::new(RwLock::new(HashSet::new()));
-        let prefetch_in_flight = in_flight_frames.clone();
-        let main_in_flight = in_flight_frames;
+        let prefetch_in_flight_frames: Arc<RwLock<HashSet<(u64, u32)>>> =
+            Arc::new(RwLock::new(HashSet::new()));
+        let prefetch_in_flight = prefetch_in_flight_frames.clone();
+        let playback_prefetch_in_flight = prefetch_in_flight_frames;
+        let playback_decode_in_flight: Arc<RwLock<HashSet<(u64, u32)>>> =
+            Arc::new(RwLock::new(HashSet::new()));
 
         let prefetch_stop_rx = stop_rx.clone();
         let mut prefetch_project = self.project.clone();
+        let mut prefetch_seek_generation = seek_generation_rx.clone();
         let prefetch_segment_medias = self.segment_medias.clone();
         let (prefetch_duration, has_timeline) =
             if let Some(timeline) = &self.project.borrow().timeline {
@@ -153,7 +272,7 @@ impl Playback {
             }
             type PrefetchFuture = std::pin::Pin<
                 Box<
-                    dyn std::future::Future<Output = (u32, u32, Option<DecodedSegmentFrames>)>
+                    dyn std::future::Future<Output = (u32, u32, u64, Option<DecodedSegmentFrames>)>
                         + Send,
                 >,
             >;
@@ -161,10 +280,36 @@ impl Playback {
             let mut in_flight: FuturesUnordered<PrefetchFuture> = FuturesUnordered::new();
             let mut frames_decoded: u32 = 0;
             let mut prefetched_behind: HashSet<u32> = HashSet::new();
-            const INITIAL_PARALLEL_TASKS: usize = 4;
+            let mut prefetched_behind_order: VecDeque<u32> = VecDeque::new();
+            let mut scheduled_in_flight_frames: HashSet<u32> = HashSet::new();
+            let mut last_behind_scan_frame: Option<u32> = None;
             const RAMP_UP_AFTER_FRAMES: u32 = 5;
+            let dynamic_prefetch_ahead = fps.clamp(30, 90).min(PREFETCH_BUFFER_SIZE as u32);
+            let dynamic_prefetch_behind = (fps / 4).clamp(8, 24);
+            let dynamic_parallel_tasks = if fps >= 60 {
+                6
+            } else if fps >= 45 {
+                5
+            } else {
+                PARALLEL_DECODE_TASKS
+            };
+            let initial_parallel_tasks = dynamic_parallel_tasks.min(4);
+            let prefetch_idle_poll_interval = Duration::from_secs_f64(1.0 / fps_f64)
+                .mul_f64(0.25)
+                .max(Duration::from_millis(2))
+                .min(Duration::from_millis(8));
+            let prefetched_behind_capacity = (dynamic_prefetch_behind as usize).saturating_mul(8);
+            let mut active_generation = *prefetch_seek_generation.borrow();
 
             let mut cached_project = prefetch_project.borrow().clone();
+            let mut prefetch_clip_offsets = build_clip_offsets_lookup(&cached_project);
+            info!(
+                dynamic_prefetch_ahead,
+                dynamic_prefetch_behind,
+                dynamic_parallel_tasks,
+                prefetch_idle_poll_interval_ms = prefetch_idle_poll_interval.as_secs_f64() * 1000.0,
+                "Prefetch window configuration"
+            );
 
             loop {
                 if *prefetch_stop_rx.borrow() {
@@ -173,6 +318,26 @@ impl Playback {
 
                 if prefetch_project.has_changed().unwrap_or(false) {
                     cached_project = prefetch_project.borrow_and_update().clone();
+                    prefetch_clip_offsets = build_clip_offsets_lookup(&cached_project);
+                }
+
+                if prefetch_seek_generation.has_changed().unwrap_or(false) {
+                    let generation = *prefetch_seek_generation.borrow_and_update();
+                    if generation != active_generation {
+                        active_generation = generation;
+                        next_prefetch_frame = *frame_request_rx.borrow();
+                        frames_decoded = 0;
+                        prefetched_behind.clear();
+                        prefetched_behind_order.clear();
+                        last_behind_scan_frame = None;
+
+                        if let Ok(mut in_flight_guard) = prefetch_in_flight.write() {
+                            in_flight_guard.clear();
+                        }
+                        scheduled_in_flight_frames.clear();
+
+                        in_flight = FuturesUnordered::new();
+                    }
                 }
 
                 if let Ok(true) = frame_request_rx.has_changed() {
@@ -187,26 +352,28 @@ impl Playback {
                         };
 
                         next_prefetch_frame = requested;
-                        frames_decoded = 0;
-                        prefetched_behind.clear();
-
-                        if let Ok(mut in_flight_guard) = prefetch_in_flight.write() {
-                            in_flight_guard.clear();
-                        }
 
-                        if is_backward_seek || seek_distance > MAX_PREFETCH_AHEAD / 2 {
+                        if is_backward_seek || seek_distance > dynamic_prefetch_ahead / 2 {
+                            frames_decoded = 0;
+                            prefetched_behind.clear();
+                            prefetched_behind_order.clear();
+                            last_behind_scan_frame = None;
+                            if let Ok(mut in_flight_guard) = prefetch_in_flight.write() {
+                                in_flight_guard.clear();
+                            }
+                            scheduled_in_flight_frames.clear();
                             in_flight = FuturesUnordered::new();
                         }
                     }
                 }
 
                 let current_playback_frame = *playback_position_rx.borrow();
-                let max_prefetch_frame = current_playback_frame + MAX_PREFETCH_AHEAD;
+                let max_prefetch_frame = current_playback_frame + dynamic_prefetch_ahead;
 
                 let effective_parallel = if frames_decoded < RAMP_UP_AFTER_FRAMES {
-                    INITIAL_PARALLEL_TASKS
+                    initial_parallel_tasks
                 } else {
-                    PARALLEL_DECODE_TASKS
+                    dynamic_parallel_tasks
                 };
 
                 while in_flight.len() < effective_parallel {
@@ -222,11 +389,7 @@ impl Playback {
                         break;
                     }
 
-                    let already_in_flight = prefetch_in_flight
-                        .read()
-                        .map(|guard| guard.contains(&frame_num))
-                        .unwrap_or(false);
-                    if already_in_flight {
+                    if scheduled_in_flight_frames.contains(&frame_num) {
                         next_prefetch_frame += 1;
                         continue;
                     }
@@ -236,20 +399,20 @@ impl Playback {
                         && let Some(segment_media) =
                             prefetch_segment_medias.get(segment.recording_clip as usize)
                     {
-                        let clip_offsets = cached_project
-                            .clips
-                            .iter()
-                            .find(|v| v.index == segment.recording_clip)
-                            .map(|v| v.offsets)
+                        let clip_offsets = prefetch_clip_offsets
+                            .get(&segment.recording_clip)
+                            .copied()
                             .unwrap_or_default();
 
                         let decoders = segment_media.decoders.clone();
                         let hide_camera = cached_project.camera.hide;
                         let segment_index = segment.recording_clip;
                         let is_initial = frames_decoded < 10;
+                        let generation = active_generation;
 
+                        scheduled_in_flight_frames.insert(frame_num);
                         if let Ok(mut in_flight_guard) = prefetch_in_flight.write() {
-                            in_flight_guard.insert(frame_num);
+                            in_flight_guard.insert((generation, frame_num));
                         }
 
                         in_flight.push(Box::pin(async move {
@@ -266,15 +429,18 @@ impl Playback {
                                     .get_frames(segment_time as f32, !hide_camera, clip_offsets)
                                     .await
                             };
-                            (frame_num, segment_index, result)
+                            (frame_num, segment_index, generation, result)
                         }));
                     }
 
                     next_prefetch_frame += 1;
                 }
 
-                if in_flight.len() < effective_parallel {
-                    for behind_offset in 1..=PREFETCH_BEHIND {
+                if in_flight.len() < effective_parallel
+                    && last_behind_scan_frame != Some(current_playback_frame)
+                {
+                    last_behind_scan_frame = Some(current_playback_frame);
+                    for behind_offset in 1..=dynamic_prefetch_behind {
                         if in_flight.len() >= effective_parallel {
                             break;
                         }
@@ -288,11 +454,7 @@ impl Playback {
                             continue;
                         }
 
-                        let already_in_flight = prefetch_in_flight
-                            .read()
-                            .map(|guard| guard.contains(&behind_frame))
-                            .unwrap_or(false);
-                        if already_in_flight {
+                        if scheduled_in_flight_frames.contains(&behind_frame) {
                             continue;
                         }
 
@@ -301,27 +463,34 @@ impl Playback {
                             && let Some(segment_media) =
                                 prefetch_segment_medias.get(segment.recording_clip as usize)
                         {
-                            let clip_offsets = cached_project
-                                .clips
-                                .iter()
-                                .find(|v| v.index == segment.recording_clip)
-                                .map(|v| v.offsets)
+                            let clip_offsets = prefetch_clip_offsets
+                                .get(&segment.recording_clip)
+                                .copied()
                                 .unwrap_or_default();
 
                             let decoders = segment_media.decoders.clone();
                             let hide_camera = cached_project.camera.hide;
                             let segment_index = segment.recording_clip;
+                            let generation = active_generation;
 
+                            scheduled_in_flight_frames.insert(behind_frame);
                             if let Ok(mut in_flight_guard) = prefetch_in_flight.write() {
-                                in_flight_guard.insert(behind_frame);
+                                in_flight_guard.insert((generation, behind_frame));
                             }
 
-                            prefetched_behind.insert(behind_frame);
+                            if prefetched_behind.insert(behind_frame) {
+                                prefetched_behind_order.push_back(behind_frame);
+                                while prefetched_behind_order.len() > prefetched_behind_capacity {
+                                    if let Some(evicted) = prefetched_behind_order.pop_front() {
+                                        prefetched_behind.remove(&evicted);
+                                    }
+                                }
+                            }
                             in_flight.push(Box::pin(async move {
                                 let result = decoders
                                     .get_frames(segment_time as f32, !hide_camera, clip_offsets)
                                     .await;
-                                (behind_frame, segment_index, result)
+                                (behind_frame, segment_index, generation, result)
                             }));
                         }
                     }
@@ -330,10 +499,16 @@ impl Playback {
                 tokio::select! {
                     biased;
 
-                    Some((frame_num, segment_index, result)) = in_flight.next() => {
+                    Some((frame_num, segment_index, generation, result)) = in_flight.next() => {
+                        scheduled_in_flight_frames.remove(&frame_num);
                         if let Ok(mut in_flight_guard) = prefetch_in_flight.write() {
-                            in_flight_guard.remove(&frame_num);
+                            in_flight_guard.remove(&(generation, frame_num));
+                        }
+
+                        if generation != active_generation {
+                            continue;
                         }
+
                         frames_decoded = frames_decoded.saturating_add(1);
 
                         if let Some(segment_frames) = result {
@@ -341,6 +516,7 @@ impl Playback {
                                 frame_number: frame_num,
                                 segment_frames,
                                 segment_index,
+                                generation,
                             }).await;
                         } else if frames_decoded <= 5 {
                             warn!(
@@ -351,12 +527,13 @@ impl Playback {
                         }
                     }
 
-                    _ = tokio::time::sleep(Duration::from_millis(1)), if in_flight.is_empty() => {}
+                    _ = tokio::time::sleep(prefetch_idle_poll_interval), if in_flight.is_empty() => {}
                 }
             }
         });
 
         tokio::spawn(async move {
+            let playback_task_start = Instant::now();
             let duration = if let Some(timeline) = &self.project.borrow().timeline {
                 timeline.duration()
             } else {
@@ -378,24 +555,64 @@ impl Playback {
             .spawn();
 
             let frame_duration = Duration::from_secs_f64(1.0 / fps_f64);
+            let frame_fetch_timeout = frame_duration
+                .mul_f64(4.0)
+                .max(Duration::from_millis(20))
+                .min(Duration::from_millis(80));
+            let in_flight_poll_interval = frame_duration
+                .mul_f64(0.25)
+                .max(Duration::from_millis(1))
+                .min(Duration::from_millis(4));
             let mut frame_number = self.start_frame_number;
-            let mut prefetch_buffer: VecDeque<PrefetchedFrame> =
-                VecDeque::with_capacity(PREFETCH_BUFFER_SIZE);
+            let mut prefetch_buffer: BTreeMap<u32, PrefetchedFrame> = BTreeMap::new();
             let mut frame_cache = FrameCache::new(FRAME_CACHE_SIZE);
-            let aggressive_skip_threshold = 10u32;
+            let mut seek_generation = 0u64;
+            let base_skip_threshold = (fps / 6).clamp(6, 16);
+            let mut late_streak = 0u32;
+            let mut skip_events = 0u64;
 
             let mut total_frames_rendered = 0u64;
-            let mut _total_frames_skipped = 0u64;
-
-            let warmup_target_frames = 20usize;
-            let warmup_after_first_timeout = Duration::from_millis(1000);
+            let mut total_frames_skipped = 0u64;
+            let mut first_render_logged = false;
+            let mut pending_seek_observation: Option<(u32, Instant)> = None;
+
+            let warmup_target_frames = (fps.saturating_div(4)).clamp(8, 16) as usize;
+            let warmup_after_first_timeout = frame_duration
+                .mul_f64((warmup_target_frames as f64) * 2.0)
+                .max(Duration::from_millis(200))
+                .min(Duration::from_millis(700));
             let warmup_no_frames_timeout = Duration::from_secs(5);
-            let warmup_start = Instant::now();
+            let warmup_idle_poll_interval = frame_duration
+                .mul_f64(0.5)
+                .max(Duration::from_millis(8))
+                .min(Duration::from_millis(25));
+            let mut warmup_start = Instant::now();
             let mut first_frame_time: Option<Instant> = None;
+            let mut warmup_contiguous_prefetched = 0usize;
+            let mut warmup_buffer_changed = false;
+            info!(
+                warmup_target_frames,
+                warmup_after_first_timeout_ms = warmup_after_first_timeout.as_secs_f64() * 1000.0,
+                warmup_idle_poll_interval_ms = warmup_idle_poll_interval.as_secs_f64() * 1000.0,
+                "Playback warmup configuration"
+            );
 
             while !*stop_rx.borrow() {
+                if first_frame_time.is_some() && warmup_buffer_changed {
+                    warmup_contiguous_prefetched = count_contiguous_prefetched_frames(
+                        &prefetch_buffer,
+                        frame_number,
+                        warmup_target_frames,
+                    );
+                    warmup_buffer_changed = false;
+                }
+                let contiguous_prefetched = if first_frame_time.is_some() {
+                    warmup_contiguous_prefetched
+                } else {
+                    0
+                };
                 let should_start = if let Some(first_time) = first_frame_time {
-                    prefetch_buffer.len() >= warmup_target_frames
+                    contiguous_prefetched >= warmup_target_frames
                         || first_time.elapsed() > warmup_after_first_timeout
                 } else {
                     false
@@ -416,11 +633,58 @@ impl Playback {
 
                 tokio::select! {
                     Some(prefetched) = prefetch_rx.recv() => {
-                        if prefetched.frame_number >= frame_number {
-                            prefetch_buffer.push_back(prefetched);
-                            if first_frame_time.is_none() {
-                                first_frame_time = Some(Instant::now());
+                        let mut next_prefetched = Some(prefetched);
+                        let mut prefetched_batch_changed = false;
+
+                        loop {
+                            let Some(prefetched) = next_prefetched.take() else {
+                                break;
+                            };
+
+                            if prefetched.generation == seek_generation
+                                && insert_prefetched_frame_untrimmed(
+                                    &mut prefetch_buffer,
+                                    prefetched,
+                                    frame_number,
+                                )
+                            {
+                                prefetched_batch_changed = true;
                             }
+
+                            next_prefetched = prefetch_rx.try_recv().ok();
+                        }
+
+                        if trim_prefetch_buffer(&mut prefetch_buffer, frame_number) {
+                            prefetched_batch_changed = true;
+                        }
+
+                        if prefetched_batch_changed {
+                            warmup_buffer_changed = true;
+                        }
+
+                        if first_frame_time.is_none() && !prefetch_buffer.is_empty() {
+                            first_frame_time = Some(Instant::now());
+                        }
+                    }
+                    _ = seek_rx.changed() => {
+                        let seek_frame = *seek_rx.borrow_and_update();
+                        seek_generation = seek_generation.saturating_add(1);
+                        frame_number = seek_frame;
+                        prefetch_buffer.clear();
+                        frame_cache.clear();
+                        warmup_contiguous_prefetched = 0;
+                        warmup_buffer_changed = false;
+                        first_frame_time = None;
+                        warmup_start = Instant::now();
+                        let _ = seek_generation_tx.send(seek_generation);
+                        send_watch_u32_if_changed(&frame_request_tx, frame_number);
+                        send_watch_u32_if_changed(&playback_position_tx, frame_number);
+                        if has_audio
+                            && audio_playhead_tx
+                                .send(frame_number as f64 / fps_f64)
+                                .is_err()
+                        {
+                            break;
                         }
                     }
                     _ = stop_rx.changed() => {
@@ -428,48 +692,85 @@ impl Playback {
                             break;
                         }
                     }
-                    _ = tokio::time::sleep(Duration::from_millis(100)) => {
+                    _ = tokio::time::sleep(warmup_idle_poll_interval) => {
                     }
                 }
             }
 
-            prefetch_buffer
-                .make_contiguous()
-                .sort_by_key(|p| p.frame_number);
-
-            let start = Instant::now();
+            let mut playback_anchor_start = Instant::now();
+            let mut playback_anchor_frame = frame_number;
             let mut cached_project = self.project.borrow().clone();
+            let mut playback_clip_offsets = build_clip_offsets_lookup(&cached_project);
 
             'playback: loop {
+                if seek_rx.has_changed().unwrap_or(false) {
+                    let seek_frame = *seek_rx.borrow_and_update();
+                    seek_generation = seek_generation.saturating_add(1);
+                    frame_number = seek_frame;
+                    playback_anchor_start = Instant::now();
+                    playback_anchor_frame = seek_frame;
+                    pending_seek_observation = Some((seek_frame, Instant::now()));
+                    prefetch_buffer.clear();
+                    frame_cache.clear();
+                    let _ = seek_generation_tx.send(seek_generation);
+                    send_watch_u32_if_changed(&frame_request_tx, frame_number);
+                    send_watch_u32_if_changed(&playback_position_tx, frame_number);
+                    if has_audio
+                        && audio_playhead_tx
+                            .send(frame_number as f64 / fps_f64)
+                            .is_err()
+                    {
+                        break 'playback;
+                    }
+                }
+
                 if self.project.has_changed().unwrap_or(false) {
                     cached_project = self.project.borrow_and_update().clone();
+                    playback_clip_offsets = build_clip_offsets_lookup(&cached_project);
                 }
+                let mut drained_prefetch_changed = false;
                 while let Ok(prefetched) = prefetch_rx.try_recv() {
-                    if prefetched.frame_number >= frame_number {
-                        prefetch_buffer.push_back(prefetched);
-                        while prefetch_buffer.len() > PREFETCH_BUFFER_SIZE {
-                            if let Some(idx) = prefetch_buffer
-                                .iter()
-                                .enumerate()
-                                .filter(|(_, p)| {
-                                    p.frame_number > frame_number + PREFETCH_BUFFER_SIZE as u32
-                                })
-                                .max_by_key(|(_, p)| p.frame_number)
-                                .map(|(i, _)| i)
-                            {
-                                prefetch_buffer.remove(idx);
-                            } else {
-                                prefetch_buffer.pop_front();
-                            }
+                    if prefetched.generation == seek_generation {
+                        if insert_prefetched_frame_untrimmed(
+                            &mut prefetch_buffer,
+                            prefetched,
+                            frame_number,
+                        ) {
+                            drained_prefetch_changed = true;
                         }
                     }
                 }
+                if drained_prefetch_changed {
+                    let _ = trim_prefetch_buffer(&mut prefetch_buffer, frame_number);
+                }
+                prune_prefetch_buffer_before_frame(&mut prefetch_buffer, frame_number);
 
-                let frame_offset = frame_number.saturating_sub(self.start_frame_number) as f64;
-                let next_deadline = start + frame_duration.mul_f64(frame_offset);
+                let frame_offset = frame_number.saturating_sub(playback_anchor_frame) as f64;
+                let next_deadline = playback_anchor_start + frame_duration.mul_f64(frame_offset);
 
                 tokio::select! {
                     _ = stop_rx.changed() => break 'playback,
+                    _ = seek_rx.changed() => {
+                        let seek_frame = *seek_rx.borrow_and_update();
+                        seek_generation = seek_generation.saturating_add(1);
+                        frame_number = seek_frame;
+                        playback_anchor_start = Instant::now();
+                        playback_anchor_frame = seek_frame;
+                        pending_seek_observation = Some((seek_frame, Instant::now()));
+                        prefetch_buffer.clear();
+                        frame_cache.clear();
+                        let _ = seek_generation_tx.send(seek_generation);
+                        send_watch_u32_if_changed(&frame_request_tx, frame_number);
+                        send_watch_u32_if_changed(&playback_position_tx, frame_number);
+                        if has_audio
+                            && audio_playhead_tx
+                                .send(frame_number as f64 / fps_f64)
+                                .is_err()
+                        {
+                            break 'playback;
+                        }
+                        continue;
+                    }
                     _ = tokio::time::sleep_until(next_deadline) => {}
                 }
 
@@ -488,43 +789,60 @@ impl Playback {
                     was_cached = true;
                     Some(cached)
                 } else {
-                    let prefetched_idx = prefetch_buffer
-                        .iter()
-                        .position(|p| p.frame_number == frame_number);
-
-                    if let Some(idx) = prefetched_idx {
-                        let prefetched = prefetch_buffer.remove(idx).unwrap();
+                    if let Some(prefetched) = prefetch_buffer.remove(&frame_number) {
                         Some((
                             Arc::new(prefetched.segment_frames),
                             prefetched.segment_index,
                         ))
                     } else {
-                        let is_in_flight = main_in_flight
+                        let in_flight_key = (seek_generation, frame_number);
+                        let is_in_flight = playback_prefetch_in_flight
                             .read()
-                            .map(|guard| guard.contains(&frame_number))
-                            .unwrap_or(false);
+                            .map(|guard| guard.contains(&in_flight_key))
+                            .unwrap_or(false)
+                            || playback_decode_in_flight
+                                .read()
+                                .map(|guard| guard.contains(&in_flight_key))
+                                .unwrap_or(false);
 
                         if is_in_flight {
                             let wait_start = Instant::now();
-                            let max_wait = Duration::from_millis(200);
+                            let max_wait = frame_fetch_timeout;
                             let mut found_frame = None;
+                            let mut buffered_wait_prefetch_changed = false;
 
                             while wait_start.elapsed() < max_wait {
                                 tokio::select! {
                                     _ = stop_rx.changed() => break 'playback,
                                     Some(prefetched) = prefetch_rx.recv() => {
+                                        if prefetched.generation != seek_generation {
+                                            continue;
+                                        }
                                         if prefetched.frame_number == frame_number {
                                             found_frame = Some(prefetched);
                                             break;
-                                        } else if prefetched.frame_number >= self.start_frame_number {
-                                            prefetch_buffer.push_back(prefetched);
+                                        } else if prefetched.frame_number >= frame_number {
+                                            if insert_prefetched_frame_untrimmed(
+                                                &mut prefetch_buffer,
+                                                prefetched,
+                                                frame_number,
+                                            ) {
+                                                buffered_wait_prefetch_changed = true;
+                                            }
                                         }
                                     }
-                                    _ = tokio::time::sleep(Duration::from_millis(5)) => {
-                                        let still_in_flight = main_in_flight
+                                    _ = tokio::time::sleep(in_flight_poll_interval) => {
+                                        if seek_rx.has_changed().unwrap_or(false) {
+                                            break;
+                                        }
+                                        let still_in_flight = playback_prefetch_in_flight
                                             .read()
-                                            .map(|guard| guard.contains(&frame_number))
-                                            .unwrap_or(false);
+                                            .map(|guard| guard.contains(&in_flight_key))
+                                            .unwrap_or(false)
+                                            || playback_decode_in_flight
+                                                .read()
+                                                .map(|guard| guard.contains(&in_flight_key))
+                                                .unwrap_or(false);
                                         if !still_in_flight {
                                             break;
                                         }
@@ -532,54 +850,79 @@ impl Playback {
                                 }
                             }
 
+                            if buffered_wait_prefetch_changed {
+                                let _ = trim_prefetch_buffer(&mut prefetch_buffer, frame_number);
+                            }
+
+                            if seek_rx.has_changed().unwrap_or(false) {
+                                continue;
+                            }
+
                             if let Some(prefetched) = found_frame {
                                 Some((
                                     Arc::new(prefetched.segment_frames),
                                     prefetched.segment_index,
                                 ))
                             } else {
-                                let prefetched_idx = prefetch_buffer
-                                    .iter()
-                                    .position(|p| p.frame_number == frame_number);
-                                if let Some(idx) = prefetched_idx {
-                                    let prefetched = prefetch_buffer.remove(idx).unwrap();
+                                if let Some(prefetched) = prefetch_buffer.remove(&frame_number) {
                                     Some((
                                         Arc::new(prefetched.segment_frames),
                                         prefetched.segment_index,
                                     ))
                                 } else {
                                     frame_number = frame_number.saturating_add(1);
-                                    _total_frames_skipped += 1;
+                                    total_frames_skipped += 1;
                                     continue;
                                 }
                             }
                         } else if prefetch_buffer.is_empty() && total_frames_rendered < 15 {
-                            let _ = frame_request_tx.send(frame_number);
+                            if seek_rx.has_changed().unwrap_or(false) {
+                                continue;
+                            }
 
-                            let wait_result = tokio::time::timeout(
-                                Duration::from_millis(200),
-                                prefetch_rx.recv(),
-                            )
-                            .await;
+                            send_watch_u32_if_changed(&frame_request_tx, frame_number);
+
+                            let wait_result =
+                                tokio::time::timeout(frame_fetch_timeout, prefetch_rx.recv()).await;
+
+                            if seek_rx.has_changed().unwrap_or(false) {
+                                continue;
+                            }
 
                             if let Ok(Some(prefetched)) = wait_result {
+                                if prefetched.generation != seek_generation {
+                                    frame_number = frame_number.saturating_add(1);
+                                    total_frames_skipped += 1;
+                                    continue;
+                                }
                                 if prefetched.frame_number == frame_number {
                                     Some((
                                         Arc::new(prefetched.segment_frames),
                                         prefetched.segment_index,
                                     ))
                                 } else {
-                                    prefetch_buffer.push_back(prefetched);
+                                    let _ = insert_prefetched_frame(
+                                        &mut prefetch_buffer,
+                                        prefetched,
+                                        frame_number,
+                                    );
                                     frame_number = frame_number.saturating_add(1);
-                                    _total_frames_skipped += 1;
+                                    total_frames_skipped += 1;
                                     continue;
                                 }
                             } else {
+                                if seek_rx.has_changed().unwrap_or(false) {
+                                    continue;
+                                }
                                 frame_number = frame_number.saturating_add(1);
-                                _total_frames_skipped += 1;
+                                total_frames_skipped += 1;
                                 continue;
                             }
                         } else {
+                            if seek_rx.has_changed().unwrap_or(false) {
+                                continue;
+                            }
+
                             let Some((segment_time, segment)) =
                                 cached_project.get_segment_time(playback_time)
                             else {
@@ -593,43 +936,45 @@ impl Playback {
                                 continue;
                             };
 
-                            let clip_offsets = cached_project
-                                .clips
-                                .iter()
-                                .find(|v| v.index == segment.recording_clip)
-                                .map(|v| v.offsets)
+                            let clip_offsets = playback_clip_offsets
+                                .get(&segment.recording_clip)
+                                .copied()
                                 .unwrap_or_default();
 
-                            if let Ok(mut guard) = main_in_flight.write() {
-                                guard.insert(frame_number);
+                            if let Ok(mut guard) = playback_decode_in_flight.write() {
+                                guard.insert(in_flight_key);
                             }
 
-                            let max_wait = Duration::from_millis(200);
+                            let max_wait = frame_fetch_timeout;
                             let data = tokio::select! {
                                 _ = stop_rx.changed() => {
-                                    if let Ok(mut guard) = main_in_flight.write() {
-                                        guard.remove(&frame_number);
+                                    if let Ok(mut guard) = playback_decode_in_flight.write() {
+                                        guard.remove(&in_flight_key);
                                     }
                                     break 'playback
                                 },
                                 _ = tokio::time::sleep(max_wait) => {
-                                    if let Ok(mut guard) = main_in_flight.write() {
-                                        guard.remove(&frame_number);
+                                    if let Ok(mut guard) = playback_decode_in_flight.write() {
+                                        guard.remove(&in_flight_key);
                                     }
                                     frame_number = frame_number.saturating_add(1);
-                                    _total_frames_skipped += 1;
+                                    total_frames_skipped += 1;
                                     continue;
                                 },
                                 data = segment_media
                                     .decoders
                                     .get_frames(segment_time as f32, !cached_project.camera.hide, clip_offsets) => {
-                                    if let Ok(mut guard) = main_in_flight.write() {
-                                        guard.remove(&frame_number);
+                                    if let Ok(mut guard) = playback_decode_in_flight.write() {
+                                        guard.remove(&in_flight_key);
                                     }
                                     data
                                 },
                             };
 
+                            if seek_rx.has_changed().unwrap_or(false) {
+                                continue;
+                            }
+
                             data.map(|frames| (Arc::new(frames), segment.recording_clip))
                         }
                     }
@@ -685,6 +1030,25 @@ impl Playback {
                         .await;
 
                     total_frames_rendered += 1;
+                    if !first_render_logged {
+                        first_render_logged = true;
+                        info!(
+                            first_render_latency_ms =
+                                playback_task_start.elapsed().as_secs_f64() * 1000.0,
+                            "Playback rendered first frame"
+                        );
+                    }
+                    if let Some((seek_target_frame, seek_started_at)) = pending_seek_observation
+                        && frame_number >= seek_target_frame
+                    {
+                        info!(
+                            seek_target_frame,
+                            rendered_frame = frame_number,
+                            seek_settle_ms = seek_started_at.elapsed().as_secs_f64() * 1000.0,
+                            "Playback seek settled"
+                        );
+                        pending_seek_observation = None;
+                    }
                 }
 
                 event_tx.send(PlaybackEvent::Frame(frame_number)).ok();
@@ -699,23 +1063,28 @@ impl Playback {
                     break 'playback;
                 }
 
-                let expected_frame = self.start_frame_number
-                    + (start.elapsed().as_secs_f64() * fps_f64).floor() as u32;
+                let expected_frame = playback_anchor_frame
+                    + (playback_anchor_start.elapsed().as_secs_f64() * fps_f64).floor() as u32;
 
                 if frame_number < expected_frame {
                     let frames_behind = expected_frame - frame_number;
+                    late_streak = late_streak.saturating_add(1);
+                    let threshold_reduction = (late_streak / 12).min(base_skip_threshold);
+                    let dynamic_skip_threshold =
+                        base_skip_threshold.saturating_sub(threshold_reduction);
 
-                    if frames_behind <= aggressive_skip_threshold {
+                    if frames_behind <= dynamic_skip_threshold {
                         continue;
                     }
 
                     let skipped = frames_behind.saturating_sub(1);
                     if skipped > 0 {
                         frame_number += skipped;
-                        _total_frames_skipped += skipped as u64;
+                        total_frames_skipped += skipped as u64;
+                        skip_events = skip_events.saturating_add(1);
 
-                        prefetch_buffer.retain(|p| p.frame_number >= frame_number);
-                        let _ = frame_request_tx.send(frame_number);
+                        prune_prefetch_buffer_before_frame(&mut prefetch_buffer, frame_number);
+                        send_watch_u32_if_changed(&frame_request_tx, frame_number);
                         let _ = playback_position_tx.send(frame_number);
                         if has_audio
                             && audio_playhead_tx
@@ -724,10 +1093,29 @@ impl Playback {
                         {
                             break 'playback;
                         }
+
+                        if skipped >= fps.saturating_div(2) || skip_events % 120 == 0 {
+                            info!(
+                                skipped_frames = skipped,
+                                frames_behind,
+                                dynamic_skip_threshold,
+                                late_streak,
+                                total_frames_skipped,
+                                skip_events,
+                                "Playback applied frame skip catch-up"
+                            );
+                        }
                     }
+                } else {
+                    late_streak = 0;
                 }
             }
 
+            info!(
+                total_frames_rendered,
+                total_frames_skipped, skip_events, "Playback loop completed"
+            );
+
             stop_tx.send(true).ok();
 
             event_tx.send(PlaybackEvent::Stop).ok();
@@ -742,6 +1130,17 @@ impl PlaybackHandle {
         self.stop_tx.send(true).ok();
     }
 
+    pub fn seek(&self, frame_number: u32) {
+        let _ = self.seek_tx.send_if_modified(|current_frame| {
+            if *current_frame == frame_number {
+                false
+            } else {
+                *current_frame = frame_number;
+                true
+            }
+        });
+    }
+
     pub async fn receive_event(&mut self) -> watch::Ref<'_, PlaybackEvent> {
         self.event_rx.changed().await.ok();
         self.event_rx.borrow_and_update()
@@ -759,6 +1158,12 @@ struct AudioPlayback {
 }
 
 impl AudioPlayback {
+    fn use_prerendered_audio() -> bool {
+        std::env::var("CAP_AUDIO_PRERENDER_PLAYBACK")
+            .map(|value| value == "1" || value.eq_ignore_ascii_case("true"))
+            .unwrap_or(false)
+    }
+
     fn spawn(self) -> bool {
         let handle = tokio::runtime::Handle::current();
 
@@ -768,6 +1173,7 @@ impl AudioPlayback {
         }
 
         std::thread::spawn(move || {
+            let audio_thread_start = Instant::now();
             let host = cpal::default_host();
             let device = match host.default_output_device() {
                 Some(d) => d,
@@ -787,26 +1193,80 @@ impl AudioPlayback {
                 }
             };
 
+            let use_prerendered_audio = Self::use_prerendered_audio();
             let duration_secs = self.duration_secs;
+            if use_prerendered_audio {
+                info!("Using pre-rendered audio playback mode");
+            } else {
+                info!("Using low-latency streaming audio playback mode");
+            }
 
             let result = match supported_config.sample_format() {
                 SampleFormat::I16 => {
-                    self.create_stream_prerendered::<i16>(device, supported_config, duration_secs)
+                    if use_prerendered_audio {
+                        self.create_stream_prerendered::<i16>(
+                            device,
+                            supported_config,
+                            duration_secs,
+                        )
+                    } else {
+                        self.create_stream::<i16>(device, supported_config)
+                    }
                 }
                 SampleFormat::I32 => {
-                    self.create_stream_prerendered::<i32>(device, supported_config, duration_secs)
+                    if use_prerendered_audio {
+                        self.create_stream_prerendered::<i32>(
+                            device,
+                            supported_config,
+                            duration_secs,
+                        )
+                    } else {
+                        self.create_stream::<i32>(device, supported_config)
+                    }
                 }
                 SampleFormat::F32 => {
-                    self.create_stream_prerendered::<f32>(device, supported_config, duration_secs)
+                    if use_prerendered_audio {
+                        self.create_stream_prerendered::<f32>(
+                            device,
+                            supported_config,
+                            duration_secs,
+                        )
+                    } else {
+                        self.create_stream::<f32>(device, supported_config)
+                    }
                 }
                 SampleFormat::I64 => {
-                    self.create_stream_prerendered::<i64>(device, supported_config, duration_secs)
+                    if use_prerendered_audio {
+                        self.create_stream_prerendered::<i64>(
+                            device,
+                            supported_config,
+                            duration_secs,
+                        )
+                    } else {
+                        self.create_stream::<i64>(device, supported_config)
+                    }
                 }
                 SampleFormat::U8 => {
-                    self.create_stream_prerendered::<u8>(device, supported_config, duration_secs)
+                    if use_prerendered_audio {
+                        self.create_stream_prerendered::<u8>(
+                            device,
+                            supported_config,
+                            duration_secs,
+                        )
+                    } else {
+                        self.create_stream::<u8>(device, supported_config)
+                    }
                 }
                 SampleFormat::F64 => {
-                    self.create_stream_prerendered::<f64>(device, supported_config, duration_secs)
+                    if use_prerendered_audio {
+                        self.create_stream_prerendered::<f64>(
+                            device,
+                            supported_config,
+                            duration_secs,
+                        )
+                    } else {
+                        self.create_stream::<f64>(device, supported_config)
+                    }
                 }
                 format => {
                     error!(
@@ -828,6 +1288,10 @@ impl AudioPlayback {
                 }
             };
 
+            info!(
+                startup_prepare_ms = audio_thread_start.elapsed().as_secs_f64() * 1000.0,
+                "Audio stream prepared, starting playback stream"
+            );
             if let Err(e) = stream.play() {
                 error!(
                     "Failed to play audio stream: {}. Skipping audio playback.",
@@ -843,7 +1307,6 @@ impl AudioPlayback {
         true
     }
 
-    #[cfg(not(target_os = "windows"))]
     #[allow(dead_code)]
     fn create_stream<T>(
         self,
@@ -999,6 +1462,8 @@ impl AudioPlayback {
             let mut latency_corrector = LatencyCorrector::new(static_latency_hint, latency_config);
             let initial_compensation_secs = latency_corrector.initial_compensation_secs();
             let device_sample_rate = sample_rate;
+            let stream_build_start = Instant::now();
+            let callback_started = Arc::new(AtomicBool::new(false));
 
             {
                 let project_snapshot = project.borrow();
@@ -1036,6 +1501,7 @@ impl AudioPlayback {
             let headroom_for_stream = headroom_samples;
             let mut playhead_rx_for_stream = playhead_rx.clone();
             let mut last_video_playhead = playhead;
+            let callback_started_for_stream = callback_started.clone();
 
             #[cfg(target_os = "windows")]
             const FIXED_LATENCY_SECS: f64 = 0.08;
@@ -1055,6 +1521,13 @@ impl AudioPlayback {
             let stream_result = device.build_output_stream(
                 &config,
                 move |buffer: &mut [T], info| {
+                    if !callback_started_for_stream.swap(true, Ordering::Relaxed) {
+                        info!(
+                            startup_to_callback_ms =
+                                stream_build_start.elapsed().as_secs_f64() * 1000.0,
+                            "Audio output callback started"
+                        );
+                    }
                     #[cfg(not(target_os = "windows"))]
                     let latency_secs = latency_corrector.update_from_callback(info);
                     #[cfg(target_os = "windows")]
diff --git a/crates/recording/examples/playback-test-runner.rs b/crates/recording/examples/playback-test-runner.rs
index 437b3844a3..16865ae654 100644
--- a/crates/recording/examples/playback-test-runner.rs
+++ b/crates/recording/examples/playback-test-runner.rs
@@ -4,6 +4,7 @@ use cap_project::{RecordingMeta, RecordingMetaInner, StudioRecordingMeta};
 use cap_rendering::decoder::spawn_decoder;
 use chrono::{Local, Utc};
 use clap::{Parser, Subcommand};
+use serde::Serialize;
 use std::{
     fs,
     path::{Path, PathBuf},
@@ -38,12 +39,18 @@ struct Cli {
     #[arg(long, global = true, default_value = "30")]
     fps: u32,
 
+    #[arg(long, global = true, default_value_t = STARTUP_TO_FIRST_FRAME_WARNING_MS)]
+    startup_threshold_ms: f64,
+
     #[arg(long, global = true)]
     verbose: bool,
 
     #[arg(long, global = true)]
     benchmark_output: bool,
 
+    #[arg(long, global = true)]
+    json_output: Option<PathBuf>,
+
     #[arg(long, global = true)]
     notes: Option<String>,
 }
@@ -53,6 +60,7 @@ enum Commands {
     Full,
     Decoder,
     Playback,
+    Scrub,
     AudioSync,
     CameraSync,
     List,
@@ -60,10 +68,12 @@ enum Commands {
 
 const FPS_TOLERANCE: f64 = 2.0;
 const DECODE_LATENCY_WARNING_MS: f64 = 50.0;
+const SCRUB_SEEK_WARNING_MS: f64 = 40.0;
+const STARTUP_TO_FIRST_FRAME_WARNING_MS: f64 = 250.0;
 const AUDIO_VIDEO_SYNC_TOLERANCE_MS: f64 = 100.0;
 const CAMERA_SYNC_TOLERANCE_MS: f64 = 100.0;
 
-#[derive(Debug, Clone, Default)]
+#[derive(Debug, Clone, Default, Serialize)]
 struct DecoderTestResult {
     passed: bool,
     decoder_type: String,
@@ -75,13 +85,15 @@ struct DecoderTestResult {
     errors: Vec<String>,
 }
 
-#[derive(Debug, Clone, Default)]
+#[derive(Debug, Clone, Default, Serialize)]
 struct PlaybackTestResult {
     passed: bool,
     segment_index: usize,
     total_frames: usize,
     decoded_frames: usize,
     failed_frames: usize,
+    first_frame_decode_time_ms: f64,
+    startup_to_first_frame_ms: f64,
     avg_decode_time_ms: f64,
     min_decode_time_ms: f64,
     max_decode_time_ms: f64,
@@ -93,10 +105,28 @@ struct PlaybackTestResult {
     fps_ok: bool,
     jitter_ms: f64,
     decode_latency_ok: bool,
+    startup_latency_ok: bool,
+    startup_threshold_ms: f64,
+    errors: Vec<String>,
+}
+
+#[derive(Debug, Clone, Default, Serialize)]
+struct ScrubTestResult {
+    passed: bool,
+    segment_index: usize,
+    seek_operations: usize,
+    successful_seeks: usize,
+    failed_seeks: usize,
+    avg_seek_time_ms: f64,
+    p50_seek_time_ms: f64,
+    p95_seek_time_ms: f64,
+    p99_seek_time_ms: f64,
+    max_seek_time_ms: f64,
+    seek_latency_ok: bool,
     errors: Vec<String>,
 }
 
-#[derive(Debug, Clone, Default)]
+#[derive(Debug, Clone, Default, Serialize)]
 struct AudioSyncTestResult {
     passed: bool,
     segment_index: usize,
@@ -114,7 +144,7 @@ struct AudioSyncTestResult {
     errors: Vec<String>,
 }
 
-#[derive(Debug, Clone, Default)]
+#[derive(Debug, Clone, Default, Serialize)]
 struct CameraSyncTestResult {
     passed: bool,
     segment_index: usize,
@@ -130,7 +160,7 @@ struct CameraSyncTestResult {
     errors: Vec<String>,
 }
 
-#[derive(Debug, Clone, Default)]
+#[derive(Debug, Clone, Default, Serialize)]
 struct RecordingTestReport {
     recording_path: PathBuf,
     recording_name: String,
@@ -141,6 +171,7 @@ struct RecordingTestReport {
     has_system_audio: bool,
     decoder_results: Vec<DecoderTestResult>,
     playback_results: Vec<PlaybackTestResult>,
+    scrub_results: Vec<ScrubTestResult>,
     audio_sync_results: Vec<AudioSyncTestResult>,
     camera_sync_results: Vec<CameraSyncTestResult>,
     overall_passed: bool,
@@ -208,17 +239,49 @@ impl RecordingTestReport {
                 result.p95_decode_time_ms,
                 result.p99_decode_time_ms
             );
+            println!(
+                "      Startup: first_decode={:.1}ms startup_to_first={:.1}ms",
+                result.first_frame_decode_time_ms, result.startup_to_first_frame_ms
+            );
             if !result.fps_ok {
                 println!("      WARN: FPS outside tolerance!");
             }
             if !result.decode_latency_ok {
                 println!("      WARN: Decode latency exceeds {DECODE_LATENCY_WARNING_MS}ms!");
             }
+            if !result.startup_latency_ok {
+                println!(
+                    "      WARN: Startup-to-first-frame exceeds {:.1}ms!",
+                    result.startup_threshold_ms
+                );
+            }
             for err in &result.errors {
                 println!("      ERROR: {err}");
             }
         }
 
+        if !self.scrub_results.is_empty() {
+            println!("\n  SCRUB TESTS:");
+            for result in &self.scrub_results {
+                let status = if result.passed { "OK" } else { "FAIL" };
+                println!(
+                    "    Segment {}: [{}] seeks={}/{} avg={:.1}ms p95={:.1}ms",
+                    result.segment_index,
+                    status,
+                    result.successful_seeks,
+                    result.seek_operations,
+                    result.avg_seek_time_ms,
+                    result.p95_seek_time_ms
+                );
+                if !result.seek_latency_ok {
+                    println!("      WARN: Scrub seek latency exceeds {SCRUB_SEEK_WARNING_MS}ms!");
+                }
+                for err in &result.errors {
+                    println!("      ERROR: {err}");
+                }
+            }
+        }
+
         if !self.audio_sync_results.is_empty() {
             println!("\n  AUDIO SYNC TESTS:");
             for result in &self.audio_sync_results {
@@ -339,11 +402,14 @@ async fn test_playback(
     meta: &StudioRecordingMeta,
     segment_index: usize,
     fps: u32,
+    startup_threshold_ms: f64,
     verbose: bool,
 ) -> PlaybackTestResult {
+    let playback_start = Instant::now();
     let mut result = PlaybackTestResult {
         segment_index,
         expected_fps: fps as f64,
+        startup_threshold_ms,
         ..Default::default()
     };
 
@@ -384,6 +450,11 @@ async fn test_playback(
                 let decode_time_ms = start.elapsed().as_secs_f64() * 1000.0;
                 decode_times.push(decode_time_ms);
                 decoded_count += 1;
+                if decoded_count == 1 {
+                    result.first_frame_decode_time_ms = decode_time_ms;
+                    result.startup_to_first_frame_ms =
+                        playback_start.elapsed().as_secs_f64() * 1000.0;
+                }
 
                 if frame.width() == 0 || frame.height() == 0 {
                     result
@@ -439,15 +510,107 @@ async fn test_playback(
     result.fps_ok = (result.effective_fps - result.expected_fps).abs() <= FPS_TOLERANCE
         || result.effective_fps >= result.expected_fps;
     result.decode_latency_ok = result.p95_decode_time_ms <= DECODE_LATENCY_WARNING_MS;
+    result.startup_latency_ok = result.startup_to_first_frame_ms <= startup_threshold_ms;
 
     result.passed = result.fps_ok
         && result.decode_latency_ok
+        && result.startup_latency_ok
         && result.failed_frames == 0
         && result.decoded_frames > 0;
 
     result
 }
 
+async fn test_scrub(
+    recording_meta: &RecordingMeta,
+    meta: &StudioRecordingMeta,
+    segment_index: usize,
+    fps: u32,
+    verbose: bool,
+) -> ScrubTestResult {
+    let mut result = ScrubTestResult {
+        segment_index,
+        seek_operations: 120,
+        ..Default::default()
+    };
+
+    let display_path = match meta {
+        StudioRecordingMeta::SingleSegment { segment } => {
+            recording_meta.path(&segment.display.path)
+        }
+        StudioRecordingMeta::MultipleSegments { inner } => {
+            recording_meta.path(&inner.segments[segment_index].display.path)
+        }
+    };
+
+    let decoder = match spawn_decoder("display", display_path.clone(), fps, 0.0, false).await {
+        Ok(d) => d,
+        Err(e) => {
+            result.errors.push(format!("Failed to create decoder: {e}"));
+            return result;
+        }
+    };
+
+    let duration_secs = get_video_duration(&display_path);
+    let total_frames = (duration_secs * fps as f64).ceil() as usize;
+    if total_frames < 2 {
+        result
+            .errors
+            .push("Video duration too short for scrub benchmark".to_string());
+        return result;
+    }
+
+    let mut seek_times = Vec::with_capacity(result.seek_operations);
+
+    for operation in 0..result.seek_operations {
+        let target_frame = ((operation * 7919) % total_frames).max(1);
+        let target_time = target_frame as f32 / fps as f32;
+        let seek_start = Instant::now();
+        match decoder.get_frame(target_time).await {
+            Some(_) => {
+                let seek_time_ms = seek_start.elapsed().as_secs_f64() * 1000.0;
+                seek_times.push(seek_time_ms);
+                result.successful_seeks += 1;
+                if verbose && operation % 20 == 0 {
+                    println!(
+                        "    Scrub {} / {}: frame={} time={:.3}s seek={:.1}ms",
+                        operation + 1,
+                        result.seek_operations,
+                        target_frame,
+                        target_time,
+                        seek_time_ms
+                    );
+                }
+            }
+            None => {
+                result.failed_seeks += 1;
+                if verbose {
+                    println!(
+                        "    Scrub {} / {}: frame={} FAILED",
+                        operation + 1,
+                        result.seek_operations,
+                        target_frame
+                    );
+                }
+            }
+        }
+    }
+
+    if !seek_times.is_empty() {
+        result.avg_seek_time_ms = seek_times.iter().sum::<f64>() / seek_times.len() as f64;
+        result.p50_seek_time_ms = percentile(&seek_times, 50.0);
+        result.p95_seek_time_ms = percentile(&seek_times, 95.0);
+        result.p99_seek_time_ms = percentile(&seek_times, 99.0);
+        result.max_seek_time_ms = seek_times.iter().copied().fold(f64::NEG_INFINITY, f64::max);
+    }
+
+    result.seek_latency_ok = result.p95_seek_time_ms <= SCRUB_SEEK_WARNING_MS;
+    result.passed =
+        result.seek_latency_ok && result.failed_seeks == 0 && result.successful_seeks > 0;
+
+    result
+}
+
 async fn test_audio_sync(
     recording_meta: &RecordingMeta,
     meta: &StudioRecordingMeta,
@@ -733,8 +896,10 @@ fn discover_recordings(input_dir: &Path) -> Vec<PathBuf> {
 async fn run_tests_on_recording(
     recording_path: &Path,
     fps: u32,
+    startup_threshold_ms: f64,
     run_decoder: bool,
     run_playback: bool,
+    run_scrub: bool,
     run_audio_sync: bool,
     run_camera_sync: bool,
     verbose: bool,
@@ -821,11 +986,27 @@ async fn run_tests_on_recording(
             if verbose {
                 println!("  Testing playback for segment {segment_idx}...");
             }
-            let playback_result =
-                test_playback(&meta, studio_meta.as_ref(), segment_idx, fps, verbose).await;
+            let playback_result = test_playback(
+                &meta,
+                studio_meta.as_ref(),
+                segment_idx,
+                fps,
+                startup_threshold_ms,
+                verbose,
+            )
+            .await;
             report.playback_results.push(playback_result);
         }
 
+        if run_scrub {
+            if verbose {
+                println!("  Testing scrub performance for segment {segment_idx}...");
+            }
+            let scrub_result =
+                test_scrub(&meta, studio_meta.as_ref(), segment_idx, fps, verbose).await;
+            report.scrub_results.push(scrub_result);
+        }
+
         if run_audio_sync {
             if verbose {
                 println!("  Testing audio sync for segment {segment_idx}...");
@@ -848,21 +1029,40 @@ async fn run_tests_on_recording(
 
     let decoder_ok = report.decoder_results.iter().all(|r| r.passed);
     let playback_ok = report.playback_results.iter().all(|r| r.passed);
+    let scrub_ok = report.scrub_results.iter().all(|r| r.passed);
     let audio_ok = report.audio_sync_results.iter().all(|r| r.passed);
     let camera_ok = report.camera_sync_results.iter().all(|r| r.passed);
 
-    report.overall_passed = decoder_ok && playback_ok && audio_ok && camera_ok;
+    report.overall_passed = decoder_ok && playback_ok && scrub_ok && audio_ok && camera_ok;
 
     Ok(report)
 }
 
-#[derive(Debug)]
+#[derive(Debug, Clone, Serialize)]
 struct SystemInfo {
     os: String,
     arch: String,
     cpu: String,
 }
 
+#[derive(Debug, Serialize)]
+struct JsonBenchmarkSummary {
+    total_recordings: usize,
+    passed_recordings: usize,
+    failed_recordings: usize,
+}
+
+#[derive(Debug, Serialize)]
+struct JsonBenchmarkOutput {
+    generated_at_utc: String,
+    local_time: String,
+    command: String,
+    notes: Option<String>,
+    system: SystemInfo,
+    summary: JsonBenchmarkSummary,
+    reports: Vec<RecordingTestReport>,
+}
+
 impl SystemInfo {
     fn collect() -> Self {
         let mut sys = System::new_all();
@@ -906,6 +1106,19 @@ fn get_failure_tags(report: &RecordingTestReport) -> Vec<String> {
     if report.playback_results.iter().any(|r| !r.decode_latency_ok) {
         tags.push("LATENCY".to_string());
     }
+    if report
+        .playback_results
+        .iter()
+        .any(|r| !r.startup_latency_ok)
+    {
+        tags.push("STARTUP".to_string());
+    }
+    if report.scrub_results.iter().any(|r| !r.seek_latency_ok) {
+        tags.push("SCRUB_LATENCY".to_string());
+    }
+    if report.scrub_results.iter().any(|r| r.failed_seeks > 0) {
+        tags.push("SCRUB_ERRORS".to_string());
+    }
     if report.playback_results.iter().any(|r| r.failed_frames > 0) {
         tags.push("DECODE_ERRORS".to_string());
     }
@@ -1007,6 +1220,16 @@ fn report_to_markdown(report: &RecordingTestReport) -> String {
             result.p99_decode_time_ms,
             result.max_decode_time_ms
         ));
+        md.push_str(&format!(
+            "| ↳ Startup | {} | first_decode={:.1}ms startup_to_first={:.1}ms |\n",
+            if result.startup_latency_ok {
+                "✅"
+            } else {
+                "❌"
+            },
+            result.first_frame_decode_time_ms,
+            result.startup_to_first_frame_ms
+        ));
         if result.failed_frames > 0 {
             md.push_str(&format!(
                 "| ↳ Failed Frames | ⚠️ | {} |\n",
@@ -1015,6 +1238,31 @@ fn report_to_markdown(report: &RecordingTestReport) -> String {
         }
     }
 
+    for result in &report.scrub_results {
+        md.push_str(&format!(
+            "| Scrub Seg {} | {} | seeks={}/{} avg={:.1}ms p95={:.1}ms p99={:.1}ms |\n",
+            result.segment_index,
+            if result.passed { "✅" } else { "❌" },
+            result.successful_seeks,
+            result.seek_operations,
+            result.avg_seek_time_ms,
+            result.p95_seek_time_ms,
+            result.p99_seek_time_ms
+        ));
+        md.push_str(&format!(
+            "| ↳ Scrub Latency | {} | max={:.1}ms threshold={:.1}ms |\n",
+            if result.seek_latency_ok { "✅" } else { "❌" },
+            result.max_seek_time_ms,
+            SCRUB_SEEK_WARNING_MS
+        ));
+        if result.failed_seeks > 0 {
+            md.push_str(&format!(
+                "| ↳ Scrub Failures | ⚠️ | {} |\n",
+                result.failed_seeks
+            ));
+        }
+    }
+
     for result in &report.audio_sync_results {
         if result.has_mic_audio {
             let status = if result.mic_sync_ok { "✅" } else { "❌" };
@@ -1185,6 +1433,41 @@ fn write_benchmark_to_file(benchmark_md: &str) -> anyhow::Result<()> {
     Ok(())
 }
 
+fn write_json_output_to_file(
+    output_path: &Path,
+    reports: &[RecordingTestReport],
+    notes: Option<&str>,
+    command: &str,
+) -> anyhow::Result<()> {
+    let passed = reports.iter().filter(|r| r.overall_passed).count();
+    let total = reports.len();
+    let failed = total.saturating_sub(passed);
+
+    let output = JsonBenchmarkOutput {
+        generated_at_utc: Utc::now().format("%Y-%m-%d %H:%M:%S UTC").to_string(),
+        local_time: Local::now().format("%Y-%m-%d %H:%M:%S").to_string(),
+        command: command.to_string(),
+        notes: notes.map(ToString::to_string),
+        system: SystemInfo::collect(),
+        summary: JsonBenchmarkSummary {
+            total_recordings: total,
+            passed_recordings: passed,
+            failed_recordings: failed,
+        },
+        reports: reports.to_vec(),
+    };
+
+    let json = serde_json::to_string_pretty(&output)?;
+    fs::write(output_path, json)?;
+
+    println!(
+        "\n✅ JSON benchmark results written to {}",
+        output_path.display()
+    );
+
+    Ok(())
+}
+
 fn print_summary(reports: &[RecordingTestReport]) {
     println!("\n{}", "=".repeat(70));
     println!("PLAYBACK TEST SUMMARY");
@@ -1202,6 +1485,7 @@ fn print_summary(reports: &[RecordingTestReport]) {
 
             let decoder_failed = report.decoder_results.iter().any(|r| !r.passed);
             let playback_failed = report.playback_results.iter().any(|r| !r.passed);
+            let scrub_failed = report.scrub_results.iter().any(|r| !r.passed);
             let audio_failed = report.audio_sync_results.iter().any(|r| !r.passed);
             let camera_failed = report.camera_sync_results.iter().any(|r| !r.passed);
 
@@ -1211,6 +1495,9 @@ fn print_summary(reports: &[RecordingTestReport]) {
             if playback_failed {
                 print!(" [PLAYBACK]");
             }
+            if scrub_failed {
+                print!(" [SCRUB]");
+            }
             if audio_failed {
                 print!(" [AUDIO SYNC]");
             }
@@ -1224,6 +1511,66 @@ fn print_summary(reports: &[RecordingTestReport]) {
     println!();
 }
 
+fn command_name(command: Option<&Commands>) -> &'static str {
+    match command {
+        Some(Commands::Decoder) => "decoder",
+        Some(Commands::Playback) => "playback",
+        Some(Commands::Scrub) => "scrub",
+        Some(Commands::AudioSync) => "audio-sync",
+        Some(Commands::CameraSync) => "camera-sync",
+        Some(Commands::Full) | None => "full",
+        Some(Commands::List) => "list",
+    }
+}
+
+fn shell_quote(value: &str) -> String {
+    let is_safe = value
+        .chars()
+        .all(|char| char.is_ascii_alphanumeric() || "-_./:=,".contains(char));
+    if is_safe {
+        value.to_string()
+    } else {
+        format!("'{}'", value.replace('\'', "'\"'\"'"))
+    }
+}
+
+fn build_command_string(cli: &Cli) -> String {
+    let mut command = format!(
+        "cargo run -p cap-recording --example playback-test-runner -- {} --fps {} --startup-threshold-ms {:.1}",
+        command_name(cli.command.as_ref()),
+        cli.fps,
+        cli.startup_threshold_ms
+    );
+
+    if let Some(path) = &cli.recording_path {
+        command.push_str(" --recording-path ");
+        command.push_str(&shell_quote(path.to_string_lossy().as_ref()));
+    } else {
+        command.push_str(" --input-dir ");
+        command.push_str(&shell_quote(cli.input_dir.to_string_lossy().as_ref()));
+    }
+
+    if cli.verbose {
+        command.push_str(" --verbose");
+    }
+
+    if cli.benchmark_output {
+        command.push_str(" --benchmark-output");
+    }
+
+    if let Some(path) = &cli.json_output {
+        command.push_str(" --json-output ");
+        command.push_str(&shell_quote(path.to_string_lossy().as_ref()));
+    }
+
+    if let Some(notes) = &cli.notes {
+        command.push_str(" --notes ");
+        command.push_str(&shell_quote(notes));
+    }
+
+    command
+}
+
 #[tokio::main]
 async fn main() -> anyhow::Result<()> {
     tracing_subscriber::registry()
@@ -1235,7 +1582,7 @@ async fn main() -> anyhow::Result<()> {
 
     let cli = Cli::parse();
 
-    if let Some(Commands::List) = cli.command {
+    if matches!(cli.command.as_ref(), Some(Commands::List)) {
         let recordings = discover_recordings(&cli.input_dir);
         if recordings.is_empty() {
             println!("No recordings found in {}", cli.input_dir.display());
@@ -1269,21 +1616,24 @@ async fn main() -> anyhow::Result<()> {
         return Ok(());
     }
 
-    let (run_decoder, run_playback, run_audio_sync, run_camera_sync) = match cli.command {
-        Some(Commands::Decoder) => (true, false, false, false),
-        Some(Commands::Playback) => (false, true, false, false),
-        Some(Commands::AudioSync) => (false, false, true, false),
-        Some(Commands::CameraSync) => (false, false, false, true),
-        Some(Commands::Full) | None => (true, true, true, true),
-        Some(Commands::List) => unreachable!(),
-    };
+    let (run_decoder, run_playback, run_scrub, run_audio_sync, run_camera_sync) =
+        match cli.command.as_ref() {
+            Some(Commands::Decoder) => (true, false, false, false, false),
+            Some(Commands::Playback) => (false, true, false, false, false),
+            Some(Commands::Scrub) => (false, false, true, false, false),
+            Some(Commands::AudioSync) => (false, false, false, true, false),
+            Some(Commands::CameraSync) => (false, false, false, false, true),
+            Some(Commands::Full) | None => (true, true, true, true, true),
+            Some(Commands::List) => unreachable!(),
+        };
 
     println!("\nCap Playback Test Runner");
     println!("{}", "=".repeat(40));
     println!(
-        "Testing {} recording(s) at {} FPS",
+        "Testing {} recording(s) at {} FPS (startup threshold: {:.1}ms)",
         recordings.len(),
-        cli.fps
+        cli.fps,
+        cli.startup_threshold_ms
     );
     println!();
 
@@ -1295,8 +1645,10 @@ async fn main() -> anyhow::Result<()> {
         match run_tests_on_recording(
             recording_path,
             cli.fps,
+            cli.startup_threshold_ms,
             run_decoder,
             run_playback,
+            run_scrub,
             run_audio_sync,
             run_camera_sync,
             cli.verbose,
@@ -1315,24 +1667,9 @@ async fn main() -> anyhow::Result<()> {
 
     print_summary(&reports);
 
-    if cli.benchmark_output {
-        let command = format!(
-            "cargo run -p cap-recording --example playback-test-runner -- {} --fps {}{}",
-            match cli.command {
-                Some(Commands::Decoder) => "decoder",
-                Some(Commands::Playback) => "playback",
-                Some(Commands::AudioSync) => "audio-sync",
-                Some(Commands::CameraSync) => "camera-sync",
-                Some(Commands::Full) | None => "full",
-                Some(Commands::List) => "list",
-            },
-            cli.fps,
-            cli.recording_path
-                .as_ref()
-                .map(|p| format!(" --recording-path {}", p.display()))
-                .unwrap_or_default(),
-        );
+    let command = build_command_string(&cli);
 
+    if cli.benchmark_output {
         let benchmark_md =
             generate_benchmark_markdown(&reports, cli.notes.as_deref(), command.trim());
 
@@ -1341,6 +1678,14 @@ async fn main() -> anyhow::Result<()> {
         }
     }
 
+    if let Some(output_path) = &cli.json_output {
+        if let Err(e) =
+            write_json_output_to_file(output_path, &reports, cli.notes.as_deref(), command.trim())
+        {
+            tracing::error!("Failed to write JSON benchmark results: {}", e);
+        }
+    }
+
     let failed = reports.iter().filter(|r| !r.overall_passed).count();
     std::process::exit(if failed > 0 { 1 } else { 0 });
 }
diff --git a/crates/rendering/src/decoder/avassetreader.rs b/crates/rendering/src/decoder/avassetreader.rs
index 73f9c62a4a..62582efe67 100644
--- a/crates/rendering/src/decoder/avassetreader.rs
+++ b/crates/rendering/src/decoder/avassetreader.rs
@@ -417,6 +417,8 @@ pub struct AVAssetReaderDecoder {
 }
 
 impl AVAssetReaderDecoder {
+    const INITIAL_WARM_DECODER_COUNT: usize = 2;
+
     fn new(path: PathBuf, tokio_handle: TokioHandle) -> Result<Self, String> {
         let keyframe_index = cap_video_decode::avassetreader::KeyframeIndex::build(&path).ok();
         let fps = keyframe_index
@@ -449,7 +451,10 @@ impl AVAssetReaderDecoder {
         let mut decoders = vec![primary_instance];
 
         let initial_positions = pool_manager.positions();
-        for pos in initial_positions.iter().skip(1) {
+        let warm_decoder_count = Self::INITIAL_WARM_DECODER_COUNT
+            .max(1)
+            .min(initial_positions.len());
+        for pos in initial_positions.iter().take(warm_decoder_count).skip(1) {
             let start_time = pos.position_secs;
             match DecoderInstance::new(
                 path.clone(),
@@ -496,11 +501,65 @@ impl AVAssetReaderDecoder {
         })
     }
 
+    fn ensure_decoder_available(&mut self, decoder_id: usize) -> usize {
+        if decoder_id < self.decoders.len() {
+            return decoder_id;
+        }
+
+        let Some(template) = self.decoders.first() else {
+            return 0;
+        };
+        let template_path = template.path.clone();
+        let template_tokio_handle = template.tokio_handle.clone();
+        let template_keyframe_index = template.keyframe_index.clone();
+
+        while self.decoders.len() <= decoder_id {
+            let next_id = self.decoders.len();
+            let Some(position) = self
+                .pool_manager
+                .positions()
+                .iter()
+                .find(|p| p.id == next_id)
+                .map(|p| p.position_secs)
+            else {
+                break;
+            };
+
+            match DecoderInstance::new(
+                template_path.clone(),
+                template_tokio_handle.clone(),
+                position,
+                template_keyframe_index.clone(),
+            ) {
+                Ok(instance) => {
+                    self.decoders.push(instance);
+                    tracing::info!(
+                        decoder_id = next_id,
+                        position_secs = position,
+                        total_decoders = self.decoders.len(),
+                        "Lazily initialized decoder instance"
+                    );
+                }
+                Err(e) => {
+                    tracing::warn!(
+                        decoder_id = next_id,
+                        position_secs = position,
+                        error = %e,
+                        "Failed to lazily initialize decoder instance"
+                    );
+                    break;
+                }
+            }
+        }
+
+        decoder_id.min(self.decoders.len().saturating_sub(1))
+    }
+
     fn select_best_decoder(&mut self, requested_time: f32) -> (usize, bool) {
         let (best_id, _distance, needs_reset) =
             self.pool_manager.find_best_decoder_for_time(requested_time);
 
-        let decoder_idx = best_id.min(self.decoders.len().saturating_sub(1));
+        let decoder_idx = self.ensure_decoder_available(best_id);
 
         if needs_reset && decoder_idx < self.decoders.len() {
             self.decoders[decoder_idx].reset(requested_time);
diff --git a/package.json b/package.json
index 47f69790ef..4f81b478cc 100644
--- a/package.json
+++ b/package.json
@@ -41,6 +41,14 @@
 		"web": "pnpm --filter=@cap/web",
 		"env-setup": "node scripts/env-cli.js",
 		"check-tauri-versions": "node scripts/check-tauri-plugin-versions.js",
+		"bench:playback:matrix": "node scripts/run-playback-benchmark-matrix.js",
+		"bench:playback:aggregate": "node scripts/aggregate-playback-benchmarks.js",
+		"bench:playback:validate": "node scripts/validate-playback-matrix.js",
+		"bench:playback:report": "node scripts/build-playback-matrix-report.js",
+		"bench:playback:finalize": "node scripts/finalize-playback-matrix.js",
+		"bench:playback:publish": "node scripts/publish-playback-matrix-summary.js",
+		"bench:playback:analyze": "node scripts/analyze-playback-matrix-bottlenecks.js",
+		"bench:playback:compare": "node scripts/compare-playback-benchmark-runs.js",
 		"clean": "find . -name node_modules -o -name .next -o -name .output -o -name .turbo -o -name dist -type d -prune | xargs rm -rf",
 		"lgtm-otel": "docker run -p 3010:3000 -p 4317:4317 -p 4318:4318 --rm -it docker.io/grafana/otel-lgtm",
 		"with-env": "dotenv -e .env --"
diff --git a/scripts/aggregate-playback-benchmarks.js b/scripts/aggregate-playback-benchmarks.js
new file mode 100644
index 0000000000..b1e89d0e10
--- /dev/null
+++ b/scripts/aggregate-playback-benchmarks.js
@@ -0,0 +1,248 @@
+#!/usr/bin/env node
+
+import fs from "node:fs";
+import path from "node:path";
+
+function parseArgs(argv) {
+	const inputs = [];
+	let output = null;
+	let help = false;
+
+	for (let i = 2; i < argv.length; i++) {
+		const arg = argv[i];
+		if (arg === "--") {
+			continue;
+		}
+		if (arg === "--help" || arg === "-h") {
+			help = true;
+			continue;
+		}
+		if (arg === "--input" || arg === "-i") {
+			const value = argv[i + 1];
+			if (!value) {
+				throw new Error("Missing value for --input");
+			}
+			inputs.push(path.resolve(value));
+			i += 1;
+			continue;
+		}
+		if (arg === "--output" || arg === "-o") {
+			const value = argv[i + 1];
+			if (!value) {
+				throw new Error("Missing value for --output");
+			}
+			output = path.resolve(value);
+			i += 1;
+			continue;
+		}
+		throw new Error(`Unknown argument: ${arg}`);
+	}
+
+	return { inputs, output, help };
+}
+
+function collectJsonFiles(targetPath) {
+	if (!fs.existsSync(targetPath)) {
+		throw new Error(`Input path does not exist: ${targetPath}`);
+	}
+
+	const stats = fs.statSync(targetPath);
+	if (stats.isFile()) {
+		return targetPath.endsWith(".json") ? [targetPath] : [];
+	}
+
+	const files = [];
+	const entries = fs.readdirSync(targetPath, { withFileTypes: true });
+	for (const entry of entries) {
+		const fullPath = path.join(targetPath, entry.name);
+		if (entry.isDirectory()) {
+			files.push(...collectJsonFiles(fullPath));
+		} else if (entry.isFile() && entry.name.endsWith(".json")) {
+			files.push(fullPath);
+		}
+	}
+	return files;
+}
+
+function parseNotes(notes) {
+	if (!notes) {
+		return {};
+	}
+	const parsed = {};
+	for (const token of notes.split(/\s+/)) {
+		if (!token.includes("=")) {
+			continue;
+		}
+		const [key, ...rest] = token.split("=");
+		const value = rest.join("=");
+		if (!key || !value) {
+			continue;
+		}
+		parsed[key.trim()] = value.trim();
+	}
+	return parsed;
+}
+
+function numberOrNull(value) {
+	if (typeof value !== "number" || Number.isNaN(value)) {
+		return null;
+	}
+	return value;
+}
+
+function maxOrNull(values) {
+	const numeric = values.map(numberOrNull).filter((value) => value !== null);
+	if (numeric.length === 0) {
+		return null;
+	}
+	return Math.max(...numeric);
+}
+
+function avgOrNull(values) {
+	const numeric = values.map(numberOrNull).filter((value) => value !== null);
+	if (numeric.length === 0) {
+		return null;
+	}
+	return numeric.reduce((acc, value) => acc + value, 0) / numeric.length;
+}
+
+function formatMetric(value, digits = 1) {
+	return value === null ? "n/a" : value.toFixed(digits);
+}
+
+function extractRows(jsonPath, data) {
+	if (!Array.isArray(data.reports)) {
+		return [];
+	}
+
+	const notes = parseNotes(data.notes);
+	const platform = notes.platform ?? "unknown";
+	const gpu = notes.gpu ?? "unknown";
+	const scenario = notes.scenario ?? "unspecified";
+	const runTime = data.generated_at_utc ?? "unknown";
+
+	const rows = [];
+	for (const report of data.reports) {
+		const playbackResults = Array.isArray(report.playback_results)
+			? report.playback_results
+			: [];
+		const scrubResults = Array.isArray(report.scrub_results)
+			? report.scrub_results
+			: [];
+		const audioResults = Array.isArray(report.audio_sync_results)
+			? report.audio_sync_results
+			: [];
+
+		const effectiveFpsMin = playbackResults.length
+			? Math.min(
+					...playbackResults
+						.map((result) => numberOrNull(result.effective_fps))
+						.filter((value) => value !== null),
+				)
+			: null;
+		const scrubP95Max = maxOrNull(
+			scrubResults.map((result) => result.p95_seek_time_ms),
+		);
+		const startupAvg = avgOrNull(
+			playbackResults.map((result) => result.startup_to_first_frame_ms),
+		);
+		const micDiffMax = maxOrNull(
+			audioResults
+				.filter((result) => result.has_mic_audio)
+				.map((result) => result.mic_video_diff_ms),
+		);
+		const sysDiffMax = maxOrNull(
+			audioResults
+				.filter((result) => result.has_system_audio)
+				.map((result) => result.system_audio_video_diff_ms),
+		);
+
+		rows.push({
+			runTime,
+			platform,
+			gpu,
+			scenario,
+			recording: report.recording_name ?? path.basename(jsonPath),
+			format: report.is_fragmented ? "fragmented" : "mp4",
+			status: report.overall_passed ? "PASS" : "FAIL",
+			effectiveFpsMin,
+			scrubP95Max,
+			startupAvg,
+			micDiffMax,
+			sysDiffMax,
+			command: data.command ?? "unknown",
+			source: jsonPath,
+		});
+	}
+
+	return rows;
+}
+
+function buildMarkdown(rows) {
+	const sorted = [...rows].sort((a, b) => (a.runTime < b.runTime ? 1 : -1));
+	const passed = sorted.filter((row) => row.status === "PASS").length;
+	const failed = sorted.length - passed;
+
+	let md = "";
+	md += `# Playback Benchmark Aggregate\n\n`;
+	md += `Generated: ${new Date().toISOString()}\n\n`;
+	md += `Total rows: ${sorted.length}, Passed: ${passed}, Failed: ${failed}\n\n`;
+	md +=
+		"| Run Time (UTC) | Platform | GPU | Scenario | Recording | Format | Status | FPS(min) | Scrub p95(ms) | Startup avg(ms) | Mic diff max(ms) | Sys diff max(ms) |\n";
+	md += "|---|---|---|---|---|---|---|---:|---:|---:|---:|---:|\n";
+	for (const row of sorted) {
+		md += `| ${row.runTime} | ${row.platform} | ${row.gpu} | ${row.scenario} | ${row.recording} | ${row.format} | ${row.status} | ${formatMetric(row.effectiveFpsMin)} | ${formatMetric(row.scrubP95Max)} | ${formatMetric(row.startupAvg)} | ${formatMetric(row.micDiffMax)} | ${formatMetric(row.sysDiffMax)} |\n`;
+	}
+	md += "\n";
+	return md;
+}
+
+function printUsage() {
+	console.log(`Usage: node scripts/aggregate-playback-benchmarks.js --input <file-or-dir> [--input <file-or-dir> ...] [--output <file>]
+
+Aggregates playback-test-runner JSON outputs into a markdown summary table.`);
+}
+
+function main() {
+	const args = parseArgs(process.argv);
+	if (args.help) {
+		printUsage();
+		return;
+	}
+	if (args.inputs.length === 0) {
+		throw new Error("At least one --input is required");
+	}
+
+	const files = new Set();
+	for (const inputPath of args.inputs) {
+		for (const filePath of collectJsonFiles(inputPath)) {
+			files.add(filePath);
+		}
+	}
+
+	if (files.size === 0) {
+		throw new Error("No JSON files found for aggregation");
+	}
+
+	const rows = [];
+	for (const filePath of files) {
+		const raw = fs.readFileSync(filePath, "utf8");
+		const parsed = JSON.parse(raw);
+		rows.push(...extractRows(filePath, parsed));
+	}
+
+	const markdown = buildMarkdown(rows);
+	if (args.output) {
+		fs.writeFileSync(args.output, markdown, "utf8");
+		console.log(`Wrote aggregate markdown to ${args.output}`);
+	} else {
+		process.stdout.write(markdown);
+	}
+}
+
+try {
+	main();
+} catch (error) {
+	console.error(error instanceof Error ? error.message : String(error));
+	process.exit(1);
+}
diff --git a/scripts/analyze-playback-matrix-bottlenecks.js b/scripts/analyze-playback-matrix-bottlenecks.js
new file mode 100644
index 0000000000..8bf7c4317a
--- /dev/null
+++ b/scripts/analyze-playback-matrix-bottlenecks.js
@@ -0,0 +1,286 @@
+#!/usr/bin/env node
+
+import fs from "node:fs";
+import path from "node:path";
+
+function parseArgs(argv) {
+	const options = {
+		inputs: [],
+		output: null,
+		outputJson: null,
+		targetFps: 60,
+		maxScrubP95Ms: 40,
+		maxStartupMs: 250,
+	};
+
+	for (let i = 2; i < argv.length; i++) {
+		const arg = argv[i];
+		if (arg === "--") continue;
+		if (arg === "--help" || arg === "-h") {
+			options.help = true;
+			continue;
+		}
+		if (arg === "--input" || arg === "-i") {
+			const value = argv[++i];
+			if (!value) throw new Error("Missing value for --input");
+			options.inputs.push(path.resolve(value));
+			continue;
+		}
+		if (arg === "--output" || arg === "-o") {
+			const value = argv[++i];
+			if (!value) throw new Error("Missing value for --output");
+			options.output = path.resolve(value);
+			continue;
+		}
+		if (arg === "--output-json") {
+			const value = argv[++i];
+			if (!value) throw new Error("Missing value for --output-json");
+			options.outputJson = path.resolve(value);
+			continue;
+		}
+		if (arg === "--target-fps") {
+			const value = Number.parseFloat(argv[++i] ?? "");
+			if (!Number.isFinite(value) || value <= 0) {
+				throw new Error("Invalid --target-fps value");
+			}
+			options.targetFps = value;
+			continue;
+		}
+		if (arg === "--max-scrub-p95-ms") {
+			const value = Number.parseFloat(argv[++i] ?? "");
+			if (!Number.isFinite(value) || value <= 0) {
+				throw new Error("Invalid --max-scrub-p95-ms value");
+			}
+			options.maxScrubP95Ms = value;
+			continue;
+		}
+		if (arg === "--max-startup-ms") {
+			const value = Number.parseFloat(argv[++i] ?? "");
+			if (!Number.isFinite(value) || value <= 0) {
+				throw new Error("Invalid --max-startup-ms value");
+			}
+			options.maxStartupMs = value;
+			continue;
+		}
+		throw new Error(`Unknown argument: ${arg}`);
+	}
+
+	return options;
+}
+
+function usage() {
+	console.log(`Usage: node scripts/analyze-playback-matrix-bottlenecks.js --input <file-or-dir> [--input <file-or-dir> ...] [--output <file>] [--output-json <file>] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250]
+
+Analyzes playback matrix JSON outputs and highlights prioritized bottlenecks.`);
+}
+
+function collectJsonFiles(targetPath) {
+	if (!fs.existsSync(targetPath)) {
+		throw new Error(`Input path does not exist: ${targetPath}`);
+	}
+	const stats = fs.statSync(targetPath);
+	if (stats.isFile()) {
+		return targetPath.endsWith(".json") ? [targetPath] : [];
+	}
+	const files = [];
+	for (const entry of fs.readdirSync(targetPath, { withFileTypes: true })) {
+		const fullPath = path.join(targetPath, entry.name);
+		if (entry.isDirectory()) {
+			files.push(...collectJsonFiles(fullPath));
+		} else if (entry.isFile() && entry.name.endsWith(".json")) {
+			files.push(fullPath);
+		}
+	}
+	return files;
+}
+
+function parseNotes(notes) {
+	if (!notes) return {};
+	const result = {};
+	for (const token of notes.split(/\s+/)) {
+		if (!token.includes("=")) continue;
+		const [key, ...rest] = token.split("=");
+		const value = rest.join("=");
+		if (!key || !value) continue;
+		result[key.trim()] = value.trim();
+	}
+	return result;
+}
+
+function average(values) {
+	if (values.length === 0) return null;
+	return values.reduce((acc, value) => acc + value, 0) / values.length;
+}
+
+function max(values) {
+	if (values.length === 0) return null;
+	return Math.max(...values);
+}
+
+function scoreIssue(issue, options) {
+	let score = 0;
+	if (issue.fpsMin !== null && issue.fpsMin < options.targetFps) {
+		score += (options.targetFps - issue.fpsMin) * 5;
+	}
+	if (issue.scrubP95 !== null && issue.scrubP95 > options.maxScrubP95Ms) {
+		score += issue.scrubP95 - options.maxScrubP95Ms;
+	}
+	if (issue.startupAvg !== null && issue.startupAvg > options.maxStartupMs) {
+		score += (issue.startupAvg - options.maxStartupMs) / 2;
+	}
+	return score;
+}
+
+function formatValue(value, digits = 1) {
+	return value === null ? "n/a" : value.toFixed(digits);
+}
+
+function collectIssues(files, options) {
+	const issues = [];
+
+	for (const filePath of files) {
+		const parsed = JSON.parse(fs.readFileSync(filePath, "utf8"));
+		const notes = parseNotes(parsed.notes);
+		const platform = notes.platform ?? "unknown";
+		const gpu = notes.gpu ?? "unknown";
+		const scenario = notes.scenario ?? "unspecified";
+		const reports = Array.isArray(parsed.reports) ? parsed.reports : [];
+
+		for (const report of reports) {
+			const playback = Array.isArray(report.playback_results)
+				? report.playback_results
+				: [];
+			const scrub = Array.isArray(report.scrub_results)
+				? report.scrub_results
+				: [];
+
+			const fpsValues = playback
+				.map((entry) => entry.effective_fps)
+				.filter((entry) => typeof entry === "number");
+			const startupValues = playback
+				.map((entry) => entry.startup_to_first_frame_ms)
+				.filter((entry) => typeof entry === "number");
+			const scrubP95Values = scrub
+				.map((entry) => entry.p95_seek_time_ms)
+				.filter((entry) => typeof entry === "number");
+
+			const issue = {
+				platform,
+				gpu,
+				scenario,
+				recording: report.recording_name ?? path.basename(filePath),
+				format: report.is_fragmented ? "fragmented" : "mp4",
+				fpsMin: fpsValues.length ? Math.min(...fpsValues) : null,
+				startupAvg: average(startupValues),
+				scrubP95: max(scrubP95Values),
+				filePath,
+			};
+			issue.score = scoreIssue(issue, options);
+			if (issue.score > 0) {
+				issues.push(issue);
+			}
+		}
+	}
+
+	issues.sort((a, b) => b.score - a.score);
+	return issues;
+}
+
+function recommendation(issue, options) {
+	const recommendations = [];
+	if (issue.fpsMin !== null && issue.fpsMin < options.targetFps) {
+		recommendations.push("inspect decode/render path and frame wait behavior");
+	}
+	if (issue.scrubP95 !== null && issue.scrubP95 > options.maxScrubP95Ms) {
+		recommendations.push(
+			"investigate seek dispatch pressure and decoder reposition cost",
+		);
+	}
+	if (issue.startupAvg !== null && issue.startupAvg > options.maxStartupMs) {
+		recommendations.push("optimize startup warmup and first-frame path");
+	}
+	return recommendations.join("; ");
+}
+
+function buildMarkdown(issues, options) {
+	let md = "";
+	md += "# Playback Matrix Bottleneck Analysis\n\n";
+	md += `Generated: ${new Date().toISOString()}\n\n`;
+	md += `Thresholds: target_fps=${options.targetFps}, max_scrub_p95_ms=${options.maxScrubP95Ms}, max_startup_ms=${options.maxStartupMs}\n\n`;
+
+	if (issues.length === 0) {
+		md += "No bottlenecks detected for configured thresholds.\n";
+		return md;
+	}
+
+	md +=
+		"| Rank | Platform | GPU | Scenario | Recording | Format | FPS(min) | Startup avg(ms) | Scrub p95(ms) | Score | Recommendation |\n";
+	md += "|---:|---|---|---|---|---|---:|---:|---:|---:|---|\n";
+	issues.forEach((issue, index) => {
+		md += `| ${index + 1} | ${issue.platform} | ${issue.gpu} | ${issue.scenario} | ${issue.recording} | ${issue.format} | ${formatValue(issue.fpsMin)} | ${formatValue(issue.startupAvg)} | ${formatValue(issue.scrubP95)} | ${formatValue(issue.score, 2)} | ${recommendation(issue, options)} |\n`;
+	});
+	md += "\n";
+	return md;
+}
+
+function buildJson(issues, options) {
+	return {
+		generatedAt: new Date().toISOString(),
+		thresholds: {
+			targetFps: options.targetFps,
+			maxScrubP95Ms: options.maxScrubP95Ms,
+			maxStartupMs: options.maxStartupMs,
+		},
+		issueCount: issues.length,
+		issues: issues.map((issue, index) => ({
+			rank: index + 1,
+			...issue,
+			recommendation: recommendation(issue, options),
+		})),
+	};
+}
+
+function main() {
+	const options = parseArgs(process.argv);
+	if (options.help) {
+		usage();
+		return;
+	}
+	if (options.inputs.length === 0) {
+		throw new Error("At least one --input is required");
+	}
+
+	const files = new Set();
+	for (const input of options.inputs) {
+		for (const filePath of collectJsonFiles(input)) {
+			files.add(filePath);
+		}
+	}
+	if (files.size === 0) {
+		throw new Error("No JSON files found");
+	}
+
+	const issues = collectIssues([...files], options);
+	const markdown = buildMarkdown(issues, options);
+	if (options.output) {
+		fs.writeFileSync(options.output, markdown, "utf8");
+		console.log(`Wrote bottleneck analysis to ${options.output}`);
+	} else {
+		process.stdout.write(markdown);
+	}
+	if (options.outputJson) {
+		fs.writeFileSync(
+			options.outputJson,
+			JSON.stringify(buildJson(issues, options), null, 2),
+			"utf8",
+		);
+		console.log(`Wrote bottleneck analysis JSON to ${options.outputJson}`);
+	}
+}
+
+try {
+	main();
+} catch (error) {
+	console.error(error instanceof Error ? error.message : String(error));
+	process.exit(1);
+}
diff --git a/scripts/build-playback-matrix-report.js b/scripts/build-playback-matrix-report.js
new file mode 100644
index 0000000000..e29c6b4621
--- /dev/null
+++ b/scripts/build-playback-matrix-report.js
@@ -0,0 +1,294 @@
+#!/usr/bin/env node
+
+import fs from "node:fs";
+import path from "node:path";
+
+const DEFAULT_REQUIRED_CELLS = [
+	{ platform: "macos-13", gpu: "apple-silicon", scenario: "full" },
+	{ platform: "macos-13", gpu: "apple-silicon", scenario: "scrub" },
+	{ platform: "windows-11", gpu: "nvidia-discrete", scenario: "full" },
+	{ platform: "windows-11", gpu: "nvidia-discrete", scenario: "scrub" },
+	{ platform: "windows-11", gpu: "amd-discrete", scenario: "full" },
+	{ platform: "windows-11", gpu: "amd-discrete", scenario: "scrub" },
+	{ platform: "windows-11", gpu: "integrated", scenario: "full" },
+	{ platform: "windows-11", gpu: "integrated", scenario: "scrub" },
+];
+
+function parseArgs(argv) {
+	const options = {
+		inputs: [],
+		output: null,
+		useDefaultMatrix: true,
+		requiredCells: [],
+	};
+
+	for (let i = 2; i < argv.length; i++) {
+		const arg = argv[i];
+		if (arg === "--") continue;
+		if (arg === "--help" || arg === "-h") {
+			options.help = true;
+			continue;
+		}
+		if (arg === "--input" || arg === "-i") {
+			const value = argv[++i];
+			if (!value) throw new Error("Missing value for --input");
+			options.inputs.push(path.resolve(value));
+			continue;
+		}
+		if (arg === "--output" || arg === "-o") {
+			const value = argv[++i];
+			if (!value) throw new Error("Missing value for --output");
+			options.output = path.resolve(value);
+			continue;
+		}
+		if (arg === "--no-default-matrix") {
+			options.useDefaultMatrix = false;
+			continue;
+		}
+		if (arg === "--require-cell") {
+			const value = argv[++i];
+			if (!value) throw new Error("Missing value for --require-cell");
+			options.requiredCells.push(parseCell(value));
+			options.useDefaultMatrix = false;
+			continue;
+		}
+		throw new Error(`Unknown argument: ${arg}`);
+	}
+
+	return options;
+}
+
+function parseCell(value) {
+	const [platform, gpu, scenario] = value.split(":");
+	if (!platform || !gpu || !scenario) {
+		throw new Error(
+			`Invalid --require-cell format: ${value}. Expected platform:gpu:scenario`,
+		);
+	}
+	return { platform, gpu, scenario };
+}
+
+function usage() {
+	console.log(`Usage: node scripts/build-playback-matrix-report.js --input <file-or-dir> [--input <file-or-dir> ...] [--output <file>]
+
+Builds a concise playback matrix markdown report from playback benchmark JSON outputs.`);
+}
+
+function collectJsonFiles(targetPath) {
+	if (!fs.existsSync(targetPath)) {
+		throw new Error(`Input path does not exist: ${targetPath}`);
+	}
+
+	const stats = fs.statSync(targetPath);
+	if (stats.isFile()) {
+		return targetPath.endsWith(".json") ? [targetPath] : [];
+	}
+
+	const files = [];
+	for (const entry of fs.readdirSync(targetPath, { withFileTypes: true })) {
+		const fullPath = path.join(targetPath, entry.name);
+		if (entry.isDirectory()) {
+			files.push(...collectJsonFiles(fullPath));
+		} else if (entry.isFile() && entry.name.endsWith(".json")) {
+			files.push(fullPath);
+		}
+	}
+	return files;
+}
+
+function parseNotes(notes) {
+	if (!notes) return {};
+	const result = {};
+	for (const token of notes.split(/\s+/)) {
+		if (!token.includes("=")) continue;
+		const [key, ...rest] = token.split("=");
+		const value = rest.join("=");
+		if (!key || !value) continue;
+		result[key.trim()] = value.trim();
+	}
+	return result;
+}
+
+function cellKey(platform, gpu, scenario) {
+	return `${platform}|${gpu}|${scenario}`;
+}
+
+function platformGpuKey(platform, gpu) {
+	return `${platform}|${gpu}`;
+}
+
+function timestampOrEpoch(value) {
+	const parsed = Date.parse(value ?? "");
+	return Number.isNaN(parsed) ? 0 : parsed;
+}
+
+function upsertLatestCell(cells, candidate) {
+	const key = cellKey(candidate.platform, candidate.gpu, candidate.scenario);
+	const existing = cells.get(key);
+	if (
+		!existing ||
+		timestampOrEpoch(candidate.generatedAt) >=
+			timestampOrEpoch(existing.generatedAt)
+	) {
+		cells.set(key, candidate);
+	}
+}
+
+function collectData(files) {
+	const latestCells = new Map();
+	const formatCoverage = new Map();
+
+	for (const filePath of files) {
+		const raw = fs.readFileSync(filePath, "utf8");
+		const parsed = JSON.parse(raw);
+		const notes = parseNotes(parsed.notes);
+		const platform = notes.platform ?? "unknown";
+		const gpu = notes.gpu ?? "unknown";
+		const scenario = notes.scenario ?? "unspecified";
+		const reports = Array.isArray(parsed.reports) ? parsed.reports : [];
+		const pass = reports.every((report) => report.overall_passed === true);
+		const generatedAt = parsed.generated_at_utc ?? "";
+
+		const formats = new Set();
+		for (const report of reports) {
+			formats.add(report.is_fragmented ? "fragmented" : "mp4");
+		}
+
+		upsertLatestCell(latestCells, {
+			platform,
+			gpu,
+			scenario,
+			pass,
+			generatedAt,
+			filePath,
+			formats,
+		});
+
+		const pgKey = platformGpuKey(platform, gpu);
+		if (!formatCoverage.has(pgKey)) {
+			formatCoverage.set(pgKey, new Set());
+		}
+		for (const format of formats) {
+			formatCoverage.get(pgKey).add(format);
+		}
+	}
+
+	return { latestCells, formatCoverage };
+}
+
+function formatStatus(entry) {
+	if (!entry) return "MISSING";
+	return entry.pass ? "PASS" : "FAIL";
+}
+
+function formatCoverageStatus(formats, target) {
+	if (!formats || !formats.has(target)) return "NO";
+	return "YES";
+}
+
+function buildReport(requiredCells, latestCells, formatCoverage) {
+	const platformGpuPairs = new Map();
+	for (const cell of requiredCells) {
+		const key = platformGpuKey(cell.platform, cell.gpu);
+		if (!platformGpuPairs.has(key)) {
+			platformGpuPairs.set(key, { platform: cell.platform, gpu: cell.gpu });
+		}
+	}
+
+	const rows = [];
+	let missingCount = 0;
+	let failCount = 0;
+	for (const { platform, gpu } of platformGpuPairs.values()) {
+		const full = latestCells.get(cellKey(platform, gpu, "full"));
+		const scrub = latestCells.get(cellKey(platform, gpu, "scrub"));
+		const formats = formatCoverage.get(platformGpuKey(platform, gpu));
+		const fullStatus = formatStatus(full);
+		const scrubStatus = formatStatus(scrub);
+		if (fullStatus === "MISSING" || scrubStatus === "MISSING") {
+			missingCount += 1;
+		}
+		if (fullStatus === "FAIL" || scrubStatus === "FAIL") {
+			failCount += 1;
+		}
+		rows.push({
+			platform,
+			gpu,
+			fullStatus,
+			scrubStatus,
+			mp4: formatCoverageStatus(formats, "mp4"),
+			fragmented: formatCoverageStatus(formats, "fragmented"),
+			fullTime: full?.generatedAt ?? "n/a",
+			scrubTime: scrub?.generatedAt ?? "n/a",
+		});
+	}
+
+	let markdown = "";
+	markdown += "# Playback Matrix Status Report\n\n";
+	markdown += `Generated: ${new Date().toISOString()}\n\n`;
+	markdown += `Rows: ${rows.length}, Missing rows: ${missingCount}, Rows with failures: ${failCount}\n\n`;
+	markdown +=
+		"| Platform | GPU | Full | Scrub | MP4 Seen | Fragmented Seen | Full Timestamp | Scrub Timestamp |\n";
+	markdown += "|---|---|---|---|---|---|---|---|\n";
+	for (const row of rows) {
+		markdown += `| ${row.platform} | ${row.gpu} | ${row.fullStatus} | ${row.scrubStatus} | ${row.mp4} | ${row.fragmented} | ${row.fullTime} | ${row.scrubTime} |\n`;
+	}
+	markdown += "\n";
+
+	const missingCells = requiredCells.filter((cell) => {
+		return !latestCells.has(cellKey(cell.platform, cell.gpu, cell.scenario));
+	});
+	if (missingCells.length > 0) {
+		markdown += "## Missing Cells\n\n";
+		for (const cell of missingCells) {
+			markdown += `- ${cell.platform}:${cell.gpu}:${cell.scenario}\n`;
+		}
+		markdown += "\n";
+	}
+
+	return markdown;
+}
+
+function main() {
+	const options = parseArgs(process.argv);
+	if (options.help) {
+		usage();
+		return;
+	}
+	if (options.inputs.length === 0) {
+		throw new Error("At least one --input is required");
+	}
+
+	const files = new Set();
+	for (const inputPath of options.inputs) {
+		for (const filePath of collectJsonFiles(inputPath)) {
+			files.add(filePath);
+		}
+	}
+	if (files.size === 0) {
+		throw new Error("No JSON files found");
+	}
+
+	const requiredCells = options.useDefaultMatrix
+		? [...DEFAULT_REQUIRED_CELLS, ...options.requiredCells]
+		: options.requiredCells;
+	if (requiredCells.length === 0) {
+		throw new Error("No required cells configured");
+	}
+
+	const { latestCells, formatCoverage } = collectData([...files]);
+	const report = buildReport(requiredCells, latestCells, formatCoverage);
+
+	if (options.output) {
+		fs.writeFileSync(options.output, report, "utf8");
+		console.log(`Wrote matrix report to ${options.output}`);
+	} else {
+		process.stdout.write(report);
+	}
+}
+
+try {
+	main();
+} catch (error) {
+	console.error(error instanceof Error ? error.message : String(error));
+	process.exit(1);
+}
diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js
new file mode 100644
index 0000000000..e34c386eb6
--- /dev/null
+++ b/scripts/compare-playback-benchmark-runs.js
@@ -0,0 +1,683 @@
+#!/usr/bin/env node
+
+import fs from "node:fs";
+import path from "node:path";
+
+function parseArgs(argv) {
+	const options = {
+		baselineInputs: [],
+		candidateInputs: [],
+		output: null,
+		outputJson: null,
+		allowFpsDrop: 2,
+		allowStartupIncreaseMs: 25,
+		allowScrubP95IncreaseMs: 5,
+		allowMissingCandidate: false,
+		failOnCandidateOnly: false,
+		minSamplesPerRow: 1,
+		failOnParseErrors: false,
+		failOnZeroCompared: false,
+		failOnSkippedFiles: false,
+	};
+
+	for (let i = 2; i < argv.length; i++) {
+		const arg = argv[i];
+		if (arg === "--") continue;
+		if (arg === "--help" || arg === "-h") {
+			options.help = true;
+			continue;
+		}
+		if (arg === "--baseline") {
+			const value = argv[++i];
+			if (!value) throw new Error("Missing value for --baseline");
+			options.baselineInputs.push(path.resolve(value));
+			continue;
+		}
+		if (arg === "--candidate") {
+			const value = argv[++i];
+			if (!value) throw new Error("Missing value for --candidate");
+			options.candidateInputs.push(path.resolve(value));
+			continue;
+		}
+		if (arg === "--output" || arg === "-o") {
+			options.output = path.resolve(argv[++i] ?? "");
+			continue;
+		}
+		if (arg === "--output-json") {
+			options.outputJson = path.resolve(argv[++i] ?? "");
+			continue;
+		}
+		if (arg === "--allow-fps-drop") {
+			const value = Number.parseFloat(argv[++i] ?? "");
+			if (!Number.isFinite(value) || value < 0) {
+				throw new Error("Invalid --allow-fps-drop value");
+			}
+			options.allowFpsDrop = value;
+			continue;
+		}
+		if (arg === "--allow-startup-increase-ms") {
+			const value = Number.parseFloat(argv[++i] ?? "");
+			if (!Number.isFinite(value) || value < 0) {
+				throw new Error("Invalid --allow-startup-increase-ms value");
+			}
+			options.allowStartupIncreaseMs = value;
+			continue;
+		}
+		if (arg === "--allow-scrub-p95-increase-ms") {
+			const value = Number.parseFloat(argv[++i] ?? "");
+			if (!Number.isFinite(value) || value < 0) {
+				throw new Error("Invalid --allow-scrub-p95-increase-ms value");
+			}
+			options.allowScrubP95IncreaseMs = value;
+			continue;
+		}
+		if (arg === "--allow-missing-candidate") {
+			options.allowMissingCandidate = true;
+			continue;
+		}
+		if (arg === "--fail-on-candidate-only") {
+			options.failOnCandidateOnly = true;
+			continue;
+		}
+		if (arg === "--min-samples-per-row") {
+			const value = Number.parseInt(argv[++i] ?? "", 10);
+			if (!Number.isFinite(value) || value < 1) {
+				throw new Error("Invalid --min-samples-per-row value");
+			}
+			options.minSamplesPerRow = value;
+			continue;
+		}
+		if (arg === "--fail-on-parse-errors") {
+			options.failOnParseErrors = true;
+			continue;
+		}
+		if (arg === "--fail-on-zero-compared") {
+			options.failOnZeroCompared = true;
+			continue;
+		}
+		if (arg === "--fail-on-skipped-files") {
+			options.failOnSkippedFiles = true;
+			continue;
+		}
+		throw new Error(`Unknown argument: ${arg}`);
+	}
+
+	return options;
+}
+
+function usage() {
+	console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline <file-or-dir> [--baseline <file-or-dir> ...] --candidate <file-or-dir> [--candidate <file-or-dir> ...] [--output <file>] [--output-json <file>] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--min-samples-per-row 1] [--fail-on-parse-errors] [--fail-on-zero-compared] [--fail-on-skipped-files]
+
+Compares baseline and candidate playback matrix JSON outputs and flags regressions. Multiple --baseline and --candidate inputs are supported.`);
+}
+
+function collectJsonFiles(targetPath) {
+	if (!fs.existsSync(targetPath)) {
+		throw new Error(`Path does not exist: ${targetPath}`);
+	}
+	const stats = fs.statSync(targetPath);
+	if (stats.isFile()) {
+		return targetPath.endsWith(".json") ? [targetPath] : [];
+	}
+	const files = [];
+	for (const entry of fs.readdirSync(targetPath, { withFileTypes: true })) {
+		const fullPath = path.join(targetPath, entry.name);
+		if (entry.isDirectory()) {
+			files.push(...collectJsonFiles(fullPath));
+		} else if (entry.isFile() && entry.name.endsWith(".json")) {
+			files.push(fullPath);
+		}
+	}
+	return files;
+}
+
+function parseNotes(notes) {
+	if (!notes) return {};
+	const parsed = {};
+	for (const token of notes.split(/\s+/)) {
+		if (!token.includes("=")) continue;
+		const [key, ...rest] = token.split("=");
+		const value = rest.join("=");
+		if (!key || !value) continue;
+		parsed[key.trim()] = value.trim();
+	}
+	return parsed;
+}
+
+function average(values) {
+	if (values.length === 0) return null;
+	return values.reduce((acc, value) => acc + value, 0) / values.length;
+}
+
+function maximum(values) {
+	if (values.length === 0) return null;
+	return Math.max(...values);
+}
+
+function collectMetrics(files) {
+	const accumulators = new Map();
+	const stats = {
+		totalFiles: files.length,
+		parsedFiles: 0,
+		usableFiles: 0,
+		skippedFiles: 0,
+		skippedNoReports: 0,
+		skippedNoUsableMetrics: 0,
+		parseErrors: [],
+	};
+
+	for (const filePath of files) {
+		let parsed;
+		try {
+			parsed = JSON.parse(fs.readFileSync(filePath, "utf8"));
+			stats.parsedFiles += 1;
+		} catch (error) {
+			stats.parseErrors.push({
+				file: filePath,
+				error: error instanceof Error ? error.message : String(error),
+			});
+			continue;
+		}
+
+		if (!Array.isArray(parsed.reports) || parsed.reports.length === 0) {
+			stats.skippedFiles += 1;
+			stats.skippedNoReports += 1;
+			continue;
+		}
+
+		const notes = parseNotes(parsed.notes);
+		const platform = notes.platform ?? "unknown";
+		const gpu = notes.gpu ?? "unknown";
+		const scenario = notes.scenario ?? "unspecified";
+		const reports = Array.isArray(parsed.reports) ? parsed.reports : [];
+		let fileContributedRows = false;
+
+		for (const report of reports) {
+			const key = `${platform}|${gpu}|${scenario}|${report.recording_name ?? "unknown"}|${report.is_fragmented ? "fragmented" : "mp4"}`;
+
+			const playback = Array.isArray(report.playback_results)
+				? report.playback_results
+				: [];
+			const scrub = Array.isArray(report.scrub_results)
+				? report.scrub_results
+				: [];
+
+			const fpsValues = playback
+				.map((entry) => entry.effective_fps)
+				.filter((entry) => typeof entry === "number");
+			const startupValues = playback
+				.map((entry) => entry.startup_to_first_frame_ms)
+				.filter((entry) => typeof entry === "number");
+			const scrubP95Values = scrub
+				.map((entry) => entry.p95_seek_time_ms)
+				.filter((entry) => typeof entry === "number");
+			const hasUsableMetrics =
+				fpsValues.length > 0 ||
+				startupValues.length > 0 ||
+				scrubP95Values.length > 0;
+			if (!hasUsableMetrics) {
+				continue;
+			}
+
+			const existing = accumulators.get(key) ?? {
+				key,
+				platform,
+				gpu,
+				scenario,
+				recording: report.recording_name ?? "unknown",
+				format: report.is_fragmented ? "fragmented" : "mp4",
+				reportCount: 0,
+				fpsSamples: [],
+				startupSamples: [],
+				scrubP95Samples: [],
+			};
+			existing.reportCount += 1;
+			existing.fpsSamples.push(...fpsValues);
+			existing.startupSamples.push(...startupValues);
+			existing.scrubP95Samples.push(...scrubP95Values);
+			accumulators.set(key, existing);
+			fileContributedRows = true;
+		}
+
+		if (fileContributedRows) {
+			stats.usableFiles += 1;
+		} else {
+			stats.skippedFiles += 1;
+			stats.skippedNoUsableMetrics += 1;
+		}
+	}
+
+	const rows = new Map();
+	for (const [key, row] of accumulators) {
+		rows.set(key, {
+			key,
+			platform: row.platform,
+			gpu: row.gpu,
+			scenario: row.scenario,
+			recording: row.recording,
+			format: row.format,
+			reportCount: row.reportCount,
+			fpsSampleCount: row.fpsSamples.length,
+			startupSampleCount: row.startupSamples.length,
+			scrubSampleCount: row.scrubP95Samples.length,
+			fpsMin: row.fpsSamples.length ? Math.min(...row.fpsSamples) : null,
+			startupAvg: average(row.startupSamples),
+			scrubP95Max: maximum(row.scrubP95Samples),
+		});
+	}
+
+	return { rows, stats };
+}
+
+function delta(candidate, baseline) {
+	if (candidate === null || baseline === null) return null;
+	return candidate - baseline;
+}
+
+function formatNumber(value, digits = 2) {
+	return value === null ? "n/a" : value.toFixed(digits);
+}
+
+function compareCoverageRows(a, b) {
+	return (
+		a.platform.localeCompare(b.platform) ||
+		a.gpu.localeCompare(b.gpu) ||
+		a.scenario.localeCompare(b.scenario) ||
+		a.recording.localeCompare(b.recording) ||
+		a.format.localeCompare(b.format)
+	);
+}
+
+function compareMetrics(baselineRows, candidateRows, options) {
+	const comparisons = [];
+	const missingCandidateRows = [];
+	const candidateOnlyRows = [];
+	const insufficientSampleRows = [];
+
+	for (const [key, baseline] of baselineRows) {
+		const candidate = candidateRows.get(key);
+		if (!candidate) {
+			missingCandidateRows.push({
+				platform: baseline.platform,
+				gpu: baseline.gpu,
+				scenario: baseline.scenario,
+				recording: baseline.recording,
+				format: baseline.format,
+			});
+		}
+	}
+
+	for (const [key, candidate] of candidateRows) {
+		const baseline = baselineRows.get(key);
+		if (!baseline) {
+			candidateOnlyRows.push({
+				platform: candidate.platform,
+				gpu: candidate.gpu,
+				scenario: candidate.scenario,
+				recording: candidate.recording,
+				format: candidate.format,
+			});
+			continue;
+		}
+
+		const fpsDelta = delta(candidate.fpsMin, baseline.fpsMin);
+		const startupDelta = delta(candidate.startupAvg, baseline.startupAvg);
+		const scrubDelta = delta(candidate.scrubP95Max, baseline.scrubP95Max);
+
+		const regressions = [];
+		const fpsMinSamples = Math.min(
+			baseline.fpsSampleCount,
+			candidate.fpsSampleCount,
+		);
+		const startupMinSamples = Math.min(
+			baseline.startupSampleCount,
+			candidate.startupSampleCount,
+		);
+		const scrubMinSamples = Math.min(
+			baseline.scrubSampleCount,
+			candidate.scrubSampleCount,
+		);
+		const comparableSampleCounts = [];
+		if (fpsDelta !== null) {
+			comparableSampleCounts.push(fpsMinSamples);
+		}
+		if (startupDelta !== null) {
+			comparableSampleCounts.push(startupMinSamples);
+		}
+		if (scrubDelta !== null) {
+			comparableSampleCounts.push(scrubMinSamples);
+		}
+		const effectiveSampleCount =
+			comparableSampleCounts.length > 0
+				? Math.min(...comparableSampleCounts)
+				: 0;
+		if (effectiveSampleCount < options.minSamplesPerRow) {
+			insufficientSampleRows.push({
+				platform: candidate.platform,
+				gpu: candidate.gpu,
+				scenario: candidate.scenario,
+				recording: candidate.recording,
+				format: candidate.format,
+				effectiveSampleCount,
+				requiredSampleCount: options.minSamplesPerRow,
+			});
+			regressions.push(
+				`insufficient_samples=${effectiveSampleCount}/${options.minSamplesPerRow}`,
+			);
+		}
+
+		if (fpsDelta !== null && fpsDelta < -options.allowFpsDrop) {
+			regressions.push(`fps_drop=${formatNumber(fpsDelta)}`);
+		}
+		if (
+			startupDelta !== null &&
+			startupDelta > options.allowStartupIncreaseMs
+		) {
+			regressions.push(`startup_increase=${formatNumber(startupDelta)}`);
+		}
+		if (scrubDelta !== null && scrubDelta > options.allowScrubP95IncreaseMs) {
+			regressions.push(`scrub_p95_increase=${formatNumber(scrubDelta)}`);
+		}
+
+		comparisons.push({
+			platform: candidate.platform,
+			gpu: candidate.gpu,
+			scenario: candidate.scenario,
+			recording: candidate.recording,
+			format: candidate.format,
+			baselineReportCount: baseline.reportCount,
+			candidateReportCount: candidate.reportCount,
+			fpsMinSamples,
+			startupMinSamples,
+			scrubMinSamples,
+			comparedMetricCount: comparableSampleCounts.length,
+			effectiveSampleCount,
+			fpsDelta,
+			startupDelta,
+			scrubDelta,
+			regressions,
+		});
+	}
+
+	comparisons.sort(
+		(a, b) =>
+			b.regressions.length - a.regressions.length || compareCoverageRows(a, b),
+	);
+	missingCandidateRows.sort(compareCoverageRows);
+	candidateOnlyRows.sort(compareCoverageRows);
+	insufficientSampleRows.sort(compareCoverageRows);
+	return {
+		comparisons,
+		missingCandidateRows,
+		candidateOnlyRows,
+		insufficientSampleRows,
+	};
+}
+
+function escapeTableCell(value) {
+	return String(value).replace(/\\/g, "\\\\").replace(/\|/g, "\\|");
+}
+
+function toMarkdown(
+	comparisons,
+	missingCandidateRows,
+	candidateOnlyRows,
+	insufficientSampleRows,
+	baselineStats,
+	candidateStats,
+	options,
+) {
+	const regressions = comparisons.filter(
+		(entry) => entry.regressions.length > 0,
+	);
+	let md = "";
+	md += "# Playback Benchmark Comparison\n\n";
+	md += `Generated: ${new Date().toISOString()}\n\n`;
+	md += `Tolerance: fps_drop<=${options.allowFpsDrop}, startup_increase<=${options.allowStartupIncreaseMs}ms, scrub_p95_increase<=${options.allowScrubP95IncreaseMs}ms\n\n`;
+	md += `Coverage gate: missing_candidate=${options.allowMissingCandidate ? "allow" : "fail"}, candidate_only=${options.failOnCandidateOnly ? "fail" : "allow"}\n\n`;
+	md += `Sample gate: min_samples_per_row>=${options.minSamplesPerRow}\n\n`;
+	md += `Parse gate: parse_errors=${options.failOnParseErrors ? "fail" : "allow"}\n\n`;
+	md += `Zero-compare gate: compared_rows=${options.failOnZeroCompared ? "fail_if_zero" : "allow"}\n\n`;
+	md += `Skipped-file gate: skipped_files=${options.failOnSkippedFiles ? "fail" : "allow"}\n\n`;
+	md += `Baseline files: total=${baselineStats.totalFiles}, parsed=${baselineStats.parsedFiles}, usable=${baselineStats.usableFiles}, skipped=${baselineStats.skippedFiles}, skipped_no_reports=${baselineStats.skippedNoReports}, skipped_no_usable_metrics=${baselineStats.skippedNoUsableMetrics}, parse_errors=${baselineStats.parseErrors.length}\n`;
+	md += `Candidate files: total=${candidateStats.totalFiles}, parsed=${candidateStats.parsedFiles}, usable=${candidateStats.usableFiles}, skipped=${candidateStats.skippedFiles}, skipped_no_reports=${candidateStats.skippedNoReports}, skipped_no_usable_metrics=${candidateStats.skippedNoUsableMetrics}, parse_errors=${candidateStats.parseErrors.length}\n\n`;
+	md += `Compared rows: ${comparisons.length}, regressions: ${regressions.length}, missing candidate rows: ${missingCandidateRows.length}, candidate-only rows: ${candidateOnlyRows.length}, insufficient sample rows: ${insufficientSampleRows.length}\n\n`;
+	if (
+		baselineStats.parseErrors.length > 0 ||
+		candidateStats.parseErrors.length > 0
+	) {
+		md += "## Parse Errors\n\n";
+		md += "| Side | File | Error |\n";
+		md += "|---|---|---|\n";
+		for (const entry of baselineStats.parseErrors.slice(0, 20)) {
+			md += `| baseline | ${escapeTableCell(entry.file)} | ${escapeTableCell(entry.error)} |\n`;
+		}
+		for (const entry of candidateStats.parseErrors.slice(0, 20)) {
+			md += `| candidate | ${escapeTableCell(entry.file)} | ${escapeTableCell(entry.error)} |\n`;
+		}
+		md += "\n";
+	}
+	if (missingCandidateRows.length > 0) {
+		md += "## Missing Candidate Rows\n\n";
+		md += "| Platform | GPU | Scenario | Recording | Format |\n";
+		md += "|---|---|---|---|---|\n";
+		for (const row of missingCandidateRows) {
+			md += `| ${row.platform} | ${row.gpu} | ${row.scenario} | ${row.recording} | ${row.format} |\n`;
+		}
+		md += "\n";
+	}
+	if (candidateOnlyRows.length > 0) {
+		md += "## Candidate-Only Rows\n\n";
+		md += "| Platform | GPU | Scenario | Recording | Format |\n";
+		md += "|---|---|---|---|---|\n";
+		for (const row of candidateOnlyRows) {
+			md += `| ${row.platform} | ${row.gpu} | ${row.scenario} | ${row.recording} | ${row.format} |\n`;
+		}
+		md += "\n";
+	}
+	if (insufficientSampleRows.length > 0) {
+		md += "## Insufficient Sample Rows\n\n";
+		md +=
+			"| Platform | GPU | Scenario | Recording | Format | Effective Samples | Required Samples |\n";
+		md += "|---|---|---|---|---|---:|---:|\n";
+		for (const row of insufficientSampleRows) {
+			md += `| ${row.platform} | ${row.gpu} | ${row.scenario} | ${row.recording} | ${row.format} | ${row.effectiveSampleCount} | ${row.requiredSampleCount} |\n`;
+		}
+		md += "\n";
+	}
+	md +=
+		"| Platform | GPU | Scenario | Recording | Format | B Runs | C Runs | F Samples | S Samples | Q Samples | Metrics | Effective Samples | FPS Δ | Startup Δ (ms) | Scrub p95 Δ (ms) | Regression |\n";
+	md +=
+		"|---|---|---|---|---|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---|\n";
+	for (const row of comparisons) {
+		md += `| ${row.platform} | ${row.gpu} | ${row.scenario} | ${row.recording} | ${row.format} | ${row.baselineReportCount} | ${row.candidateReportCount} | ${row.fpsMinSamples} | ${row.startupMinSamples} | ${row.scrubMinSamples} | ${row.comparedMetricCount} | ${row.effectiveSampleCount} | ${formatNumber(row.fpsDelta)} | ${formatNumber(row.startupDelta)} | ${formatNumber(row.scrubDelta)} | ${row.regressions.length > 0 ? row.regressions.join(", ") : "none"} |\n`;
+	}
+	md += "\n";
+	return md;
+}
+
+function buildJsonOutput(
+	comparisons,
+	missingCandidateRows,
+	candidateOnlyRows,
+	insufficientSampleRows,
+	baselineStats,
+	candidateStats,
+	options,
+) {
+	const regressions = comparisons.filter(
+		(entry) => entry.regressions.length > 0,
+	);
+	const hasMissingCandidateRows = missingCandidateRows.length > 0;
+	const hasCandidateOnlyRows = candidateOnlyRows.length > 0;
+	const hasInsufficientSamples = insufficientSampleRows.length > 0;
+	const hasMetricRegressions = regressions.some((entry) =>
+		entry.regressions.some(
+			(issue) =>
+				issue.startsWith("fps_drop=") ||
+				issue.startsWith("startup_increase=") ||
+				issue.startsWith("scrub_p95_increase="),
+		),
+	);
+	const failureReasons = [];
+	if (hasMetricRegressions) {
+		failureReasons.push("metric_regression");
+	}
+	if (hasInsufficientSamples) {
+		failureReasons.push("insufficient_samples");
+	}
+	if (!options.allowMissingCandidate && hasMissingCandidateRows) {
+		failureReasons.push("missing_candidate_rows");
+	}
+	if (options.failOnCandidateOnly && hasCandidateOnlyRows) {
+		failureReasons.push("candidate_only_rows");
+	}
+	if (
+		options.failOnParseErrors &&
+		(baselineStats.parseErrors.length > 0 ||
+			candidateStats.parseErrors.length > 0)
+	) {
+		failureReasons.push("parse_errors");
+	}
+	if (options.failOnZeroCompared && comparisons.length === 0) {
+		failureReasons.push("zero_compared_rows");
+	}
+	if (
+		options.failOnSkippedFiles &&
+		(baselineStats.skippedFiles > 0 || candidateStats.skippedFiles > 0)
+	) {
+		failureReasons.push("skipped_files");
+	}
+	const passed = failureReasons.length === 0;
+	return {
+		generatedAt: new Date().toISOString(),
+		tolerance: {
+			allowFpsDrop: options.allowFpsDrop,
+			allowStartupIncreaseMs: options.allowStartupIncreaseMs,
+			allowScrubP95IncreaseMs: options.allowScrubP95IncreaseMs,
+			allowMissingCandidate: options.allowMissingCandidate,
+			failOnCandidateOnly: options.failOnCandidateOnly,
+			minSamplesPerRow: options.minSamplesPerRow,
+			failOnParseErrors: options.failOnParseErrors,
+			failOnZeroCompared: options.failOnZeroCompared,
+			failOnSkippedFiles: options.failOnSkippedFiles,
+		},
+		fileStats: {
+			baseline: baselineStats,
+			candidate: candidateStats,
+		},
+		summary: {
+			comparedRows: comparisons.length,
+			regressions: regressions.length,
+			missingCandidateRows: missingCandidateRows.length,
+			candidateOnlyRows: candidateOnlyRows.length,
+			insufficientSampleRows: insufficientSampleRows.length,
+			passed,
+			failureReasons,
+			gateOutcomes: {
+				metricRegressions: !hasMetricRegressions,
+				insufficientSamples: !hasInsufficientSamples,
+				missingCandidateRows:
+					options.allowMissingCandidate || !hasMissingCandidateRows,
+				candidateOnlyRows:
+					!options.failOnCandidateOnly || !hasCandidateOnlyRows,
+				parseErrors:
+					!options.failOnParseErrors ||
+					(baselineStats.parseErrors.length === 0 &&
+						candidateStats.parseErrors.length === 0),
+				zeroComparedRows: !options.failOnZeroCompared || comparisons.length > 0,
+				skippedFiles:
+					!options.failOnSkippedFiles ||
+					(baselineStats.skippedFiles === 0 &&
+						candidateStats.skippedFiles === 0),
+			},
+		},
+		regressions,
+		missingCandidateRows,
+		candidateOnlyRows,
+		insufficientSampleRows,
+		comparisons,
+	};
+}
+
+function main() {
+	const options = parseArgs(process.argv);
+	if (options.help) {
+		usage();
+		return;
+	}
+	if (
+		options.baselineInputs.length === 0 ||
+		options.candidateInputs.length === 0
+	) {
+		throw new Error("At least one --baseline and one --candidate are required");
+	}
+
+	const baselineFiles = [
+		...new Set(options.baselineInputs.flatMap(collectJsonFiles)),
+	];
+	const candidateFiles = [
+		...new Set(options.candidateInputs.flatMap(collectJsonFiles)),
+	];
+	if (baselineFiles.length === 0) {
+		throw new Error("No baseline JSON files found");
+	}
+	if (candidateFiles.length === 0) {
+		throw new Error("No candidate JSON files found");
+	}
+
+	const baselineCollected = collectMetrics(baselineFiles);
+	const candidateCollected = collectMetrics(candidateFiles);
+	const baselineRows = baselineCollected.rows;
+	const candidateRows = candidateCollected.rows;
+	const {
+		comparisons,
+		missingCandidateRows,
+		candidateOnlyRows,
+		insufficientSampleRows,
+	} = compareMetrics(baselineRows, candidateRows, options);
+	const markdown = toMarkdown(
+		comparisons,
+		missingCandidateRows,
+		candidateOnlyRows,
+		insufficientSampleRows,
+		baselineCollected.stats,
+		candidateCollected.stats,
+		options,
+	);
+	const outputJson = buildJsonOutput(
+		comparisons,
+		missingCandidateRows,
+		candidateOnlyRows,
+		insufficientSampleRows,
+		baselineCollected.stats,
+		candidateCollected.stats,
+		options,
+	);
+
+	if (options.output) {
+		fs.writeFileSync(options.output, markdown, "utf8");
+		console.log(`Wrote comparison report to ${options.output}`);
+	} else {
+		process.stdout.write(markdown);
+	}
+	if (options.outputJson) {
+		fs.writeFileSync(
+			options.outputJson,
+			JSON.stringify(outputJson, null, 2),
+			"utf8",
+		);
+		console.log(`Wrote comparison JSON to ${options.outputJson}`);
+	}
+
+	if (!outputJson.summary.passed) {
+		process.exit(1);
+	}
+}
+
+try {
+	main();
+} catch (error) {
+	console.error(error instanceof Error ? error.message : String(error));
+	process.exit(1);
+}
diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js
new file mode 100644
index 0000000000..3c5dc0eb0b
--- /dev/null
+++ b/scripts/finalize-playback-matrix.js
@@ -0,0 +1,418 @@
+#!/usr/bin/env node
+
+import { spawnSync } from "node:child_process";
+import fs from "node:fs";
+import path from "node:path";
+
+function parseArgs(argv) {
+	const options = {
+		inputs: [],
+		outputDir: null,
+		requireFormats: [],
+		targetFps: 60,
+		maxScrubP95Ms: 40,
+		maxStartupMs: 250,
+		analyze: true,
+		publishTarget: null,
+		compareBaselineInputs: [],
+		outputJson: null,
+		allowFpsDrop: 2,
+		allowStartupIncreaseMs: 25,
+		allowScrubP95IncreaseMs: 5,
+		allowMissingCandidate: false,
+		failOnCandidateOnly: false,
+		minSamplesPerRow: 1,
+		failOnParseErrors: false,
+		failOnZeroCompared: false,
+		failOnSkippedFiles: false,
+	};
+
+	for (let i = 2; i < argv.length; i++) {
+		const arg = argv[i];
+		if (arg === "--") continue;
+		if (arg === "--help" || arg === "-h") {
+			options.help = true;
+			continue;
+		}
+		if (arg === "--input" || arg === "-i") {
+			const value = argv[++i];
+			if (!value) throw new Error("Missing value for --input");
+			options.inputs.push(path.resolve(value));
+			continue;
+		}
+		if (arg === "--output-dir" || arg === "-o") {
+			const value = argv[++i];
+			if (!value) throw new Error("Missing value for --output-dir");
+			options.outputDir = path.resolve(value);
+			continue;
+		}
+		if (arg === "--require-formats") {
+			const value = argv[++i] ?? "";
+			options.requireFormats = value
+				.split(",")
+				.map((entry) => entry.trim().toLowerCase())
+				.filter(Boolean);
+			continue;
+		}
+		if (arg === "--target-fps") {
+			const value = Number.parseFloat(argv[++i] ?? "");
+			if (!Number.isFinite(value) || value <= 0) {
+				throw new Error("Invalid --target-fps value");
+			}
+			options.targetFps = value;
+			continue;
+		}
+		if (arg === "--max-scrub-p95-ms") {
+			const value = Number.parseFloat(argv[++i] ?? "");
+			if (!Number.isFinite(value) || value <= 0) {
+				throw new Error("Invalid --max-scrub-p95-ms value");
+			}
+			options.maxScrubP95Ms = value;
+			continue;
+		}
+		if (arg === "--max-startup-ms") {
+			const value = Number.parseFloat(argv[++i] ?? "");
+			if (!Number.isFinite(value) || value <= 0) {
+				throw new Error("Invalid --max-startup-ms value");
+			}
+			options.maxStartupMs = value;
+			continue;
+		}
+		if (arg === "--skip-analyze") {
+			options.analyze = false;
+			continue;
+		}
+		if (arg === "--publish-target") {
+			const value = argv[++i];
+			if (!value) throw new Error("Missing value for --publish-target");
+			options.publishTarget = path.resolve(value);
+			continue;
+		}
+		if (arg === "--output-json") {
+			const value = argv[++i];
+			if (!value) throw new Error("Missing value for --output-json");
+			options.outputJson = path.resolve(value);
+			continue;
+		}
+		if (arg === "--compare-baseline") {
+			const value = argv[++i];
+			if (!value) throw new Error("Missing value for --compare-baseline");
+			options.compareBaselineInputs.push(path.resolve(value));
+			continue;
+		}
+		if (arg === "--allow-fps-drop") {
+			const value = Number.parseFloat(argv[++i] ?? "");
+			if (!Number.isFinite(value) || value < 0) {
+				throw new Error("Invalid --allow-fps-drop value");
+			}
+			options.allowFpsDrop = value;
+			continue;
+		}
+		if (arg === "--allow-startup-increase-ms") {
+			const value = Number.parseFloat(argv[++i] ?? "");
+			if (!Number.isFinite(value) || value < 0) {
+				throw new Error("Invalid --allow-startup-increase-ms value");
+			}
+			options.allowStartupIncreaseMs = value;
+			continue;
+		}
+		if (arg === "--allow-scrub-p95-increase-ms") {
+			const value = Number.parseFloat(argv[++i] ?? "");
+			if (!Number.isFinite(value) || value < 0) {
+				throw new Error("Invalid --allow-scrub-p95-increase-ms value");
+			}
+			options.allowScrubP95IncreaseMs = value;
+			continue;
+		}
+		if (arg === "--allow-missing-candidate") {
+			options.allowMissingCandidate = true;
+			continue;
+		}
+		if (arg === "--fail-on-candidate-only") {
+			options.failOnCandidateOnly = true;
+			continue;
+		}
+		if (arg === "--min-samples-per-row") {
+			const value = Number.parseInt(argv[++i] ?? "", 10);
+			if (!Number.isFinite(value) || value < 1) {
+				throw new Error("Invalid --min-samples-per-row value");
+			}
+			options.minSamplesPerRow = value;
+			continue;
+		}
+		if (arg === "--fail-on-parse-errors") {
+			options.failOnParseErrors = true;
+			continue;
+		}
+		if (arg === "--fail-on-zero-compared") {
+			options.failOnZeroCompared = true;
+			continue;
+		}
+		if (arg === "--fail-on-skipped-files") {
+			options.failOnSkippedFiles = true;
+			continue;
+		}
+		throw new Error(`Unknown argument: ${arg}`);
+	}
+
+	return options;
+}
+
+function usage() {
+	console.log(`Usage: node scripts/finalize-playback-matrix.js --input <file-or-dir> [--input <file-or-dir> ...] --output-dir <dir> [--output-json <file>] [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--compare-baseline <file-or-dir>] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--min-samples-per-row 1] [--fail-on-parse-errors] [--fail-on-zero-compared] [--fail-on-skipped-files] [--publish-target <PLAYBACK-BENCHMARKS.md>]
+
+Generates aggregate markdown, status markdown, validation JSON, and bottleneck analysis for collected playback matrix outputs. Optionally compares candidate inputs against baseline inputs and fails on regressions.`);
+}
+
+function run(command, args) {
+	const result = spawnSync(command, args, { stdio: "inherit" });
+	if (result.status !== 0) {
+		throw new Error(`Command failed: ${command} ${args.join(" ")}`);
+	}
+}
+
+function readCommandOutput(command, args) {
+	const result = spawnSync(command, args, { encoding: "utf8" });
+	if (result.status !== 0) {
+		return null;
+	}
+	return result.stdout.trim() || null;
+}
+
+function main() {
+	const options = parseArgs(process.argv);
+	if (options.help) {
+		usage();
+		return;
+	}
+	if (options.inputs.length === 0) {
+		throw new Error("At least one --input is required");
+	}
+	if (!options.outputDir) {
+		throw new Error("--output-dir is required");
+	}
+	if (!fs.existsSync(options.outputDir)) {
+		fs.mkdirSync(options.outputDir, { recursive: true });
+	}
+
+	const aggregatePath = path.join(
+		options.outputDir,
+		"playback-benchmark-aggregate.md",
+	);
+	const statusPath = path.join(options.outputDir, "playback-matrix-status.md");
+	const validationPath = path.join(
+		options.outputDir,
+		"playback-matrix-validation.json",
+	);
+	const bottleneckPath = path.join(
+		options.outputDir,
+		"playback-bottlenecks.md",
+	);
+	const bottleneckJsonPath = path.join(
+		options.outputDir,
+		"playback-bottlenecks.json",
+	);
+	const comparisonPath = path.join(options.outputDir, "playback-comparison.md");
+	const comparisonJsonPath = path.join(
+		options.outputDir,
+		"playback-comparison.json",
+	);
+	const summaryJsonPath =
+		options.outputJson ??
+		path.join(options.outputDir, "playback-finalize-summary.json");
+
+	const aggregateArgs = ["scripts/aggregate-playback-benchmarks.js"];
+	const statusArgs = ["scripts/build-playback-matrix-report.js"];
+	const validateArgs = [
+		"scripts/validate-playback-matrix.js",
+		"--output-json",
+		validationPath,
+	];
+
+	for (const input of options.inputs) {
+		aggregateArgs.push("--input", input);
+		statusArgs.push("--input", input);
+		validateArgs.push("--input", input);
+	}
+
+	aggregateArgs.push("--output", aggregatePath);
+	statusArgs.push("--output", statusPath);
+	if (options.requireFormats.length > 0) {
+		validateArgs.push("--require-formats", options.requireFormats.join(","));
+	}
+
+	run("node", aggregateArgs);
+	run("node", statusArgs);
+	run("node", validateArgs);
+	if (options.analyze) {
+		const analyzeArgs = ["scripts/analyze-playback-matrix-bottlenecks.js"];
+		for (const input of options.inputs) {
+			analyzeArgs.push("--input", input);
+		}
+		analyzeArgs.push(
+			"--output",
+			bottleneckPath,
+			"--output-json",
+			bottleneckJsonPath,
+			"--target-fps",
+			String(options.targetFps),
+			"--max-scrub-p95-ms",
+			String(options.maxScrubP95Ms),
+			"--max-startup-ms",
+			String(options.maxStartupMs),
+		);
+		run("node", analyzeArgs);
+	}
+	if (options.compareBaselineInputs.length > 0) {
+		const compareArgs = ["scripts/compare-playback-benchmark-runs.js"];
+		for (const baselineInput of options.compareBaselineInputs) {
+			compareArgs.push("--baseline", baselineInput);
+		}
+		for (const candidateInput of options.inputs) {
+			compareArgs.push("--candidate", candidateInput);
+		}
+		compareArgs.push(
+			"--output",
+			comparisonPath,
+			"--output-json",
+			comparisonJsonPath,
+			"--allow-fps-drop",
+			String(options.allowFpsDrop),
+			"--allow-startup-increase-ms",
+			String(options.allowStartupIncreaseMs),
+			"--allow-scrub-p95-increase-ms",
+			String(options.allowScrubP95IncreaseMs),
+		);
+		if (options.allowMissingCandidate) {
+			compareArgs.push("--allow-missing-candidate");
+		}
+		if (options.failOnCandidateOnly) {
+			compareArgs.push("--fail-on-candidate-only");
+		}
+		if (options.failOnParseErrors) {
+			compareArgs.push("--fail-on-parse-errors");
+		}
+		if (options.failOnZeroCompared) {
+			compareArgs.push("--fail-on-zero-compared");
+		}
+		if (options.failOnSkippedFiles) {
+			compareArgs.push("--fail-on-skipped-files");
+		}
+		compareArgs.push("--min-samples-per-row", String(options.minSamplesPerRow));
+		run("node", compareArgs);
+	}
+	const validation = JSON.parse(fs.readFileSync(validationPath, "utf8"));
+	const comparison =
+		options.compareBaselineInputs.length > 0
+			? JSON.parse(fs.readFileSync(comparisonJsonPath, "utf8"))
+			: null;
+	const gitBranch = readCommandOutput("git", [
+		"rev-parse",
+		"--abbrev-ref",
+		"HEAD",
+	]);
+	const gitCommit = readCommandOutput("git", ["rev-parse", "HEAD"]);
+	const summary = {
+		generatedAt: new Date().toISOString(),
+		inputs: options.inputs,
+		outputDir: options.outputDir,
+		git: {
+			branch: gitBranch,
+			commit: gitCommit,
+		},
+		artifacts: {
+			aggregatePath,
+			statusPath,
+			validationPath,
+			bottleneckPath: options.analyze ? bottleneckPath : null,
+			bottleneckJsonPath: options.analyze ? bottleneckJsonPath : null,
+			comparisonPath:
+				options.compareBaselineInputs.length > 0 ? comparisonPath : null,
+			comparisonJsonPath:
+				options.compareBaselineInputs.length > 0 ? comparisonJsonPath : null,
+		},
+		settings: {
+			requireFormats: options.requireFormats,
+			targetFps: options.targetFps,
+			maxScrubP95Ms: options.maxScrubP95Ms,
+			maxStartupMs: options.maxStartupMs,
+			analyze: options.analyze,
+			publishTarget: options.publishTarget,
+			compareBaselineInputs: options.compareBaselineInputs,
+			allowFpsDrop: options.allowFpsDrop,
+			allowStartupIncreaseMs: options.allowStartupIncreaseMs,
+			allowScrubP95IncreaseMs: options.allowScrubP95IncreaseMs,
+			allowMissingCandidate: options.allowMissingCandidate,
+			failOnCandidateOnly: options.failOnCandidateOnly,
+			minSamplesPerRow: options.minSamplesPerRow,
+			failOnParseErrors: options.failOnParseErrors,
+			failOnZeroCompared: options.failOnZeroCompared,
+			failOnSkippedFiles: options.failOnSkippedFiles,
+		},
+		results: {
+			validationPassed: validation.passed === true,
+			comparisonPassed: comparison ? comparison.summary?.passed === true : null,
+			comparisonComparedRows: comparison?.summary?.comparedRows ?? null,
+			comparisonRegressions: comparison?.summary?.regressions ?? null,
+			comparisonMissingCandidateRows:
+				comparison?.summary?.missingCandidateRows ?? null,
+			comparisonCandidateOnlyRows:
+				comparison?.summary?.candidateOnlyRows ?? null,
+			comparisonInsufficientSampleRows:
+				comparison?.summary?.insufficientSampleRows ?? null,
+			comparisonFailureReasons: comparison?.summary?.failureReasons ?? null,
+			comparisonGateOutcomes: comparison?.summary?.gateOutcomes ?? null,
+			comparisonFileStats: comparison?.fileStats ?? null,
+		},
+	};
+	fs.writeFileSync(summaryJsonPath, JSON.stringify(summary, null, 2), "utf8");
+	if (options.publishTarget) {
+		const publishArgs = [
+			"scripts/publish-playback-matrix-summary.js",
+			"--aggregate-md",
+			aggregatePath,
+			"--status-md",
+			statusPath,
+			"--validation-json",
+			validationPath,
+			"--target",
+			options.publishTarget,
+		];
+		if (options.analyze) {
+			publishArgs.push("--bottlenecks-md", bottleneckPath);
+		}
+		if (options.compareBaselineInputs.length > 0) {
+			publishArgs.push(
+				"--comparison-md",
+				comparisonPath,
+				"--comparison-json",
+				comparisonJsonPath,
+			);
+		}
+		publishArgs.push("--finalize-summary-json", summaryJsonPath);
+		run("node", publishArgs);
+	}
+
+	console.log(`Aggregate markdown: ${aggregatePath}`);
+	console.log(`Status markdown: ${statusPath}`);
+	console.log(`Validation JSON: ${validationPath}`);
+	if (options.analyze) {
+		console.log(`Bottleneck analysis: ${bottleneckPath}`);
+		console.log(`Bottleneck analysis JSON: ${bottleneckJsonPath}`);
+	}
+	if (options.publishTarget) {
+		console.log(`Published target: ${options.publishTarget}`);
+	}
+	if (options.compareBaselineInputs.length > 0) {
+		console.log(`Comparison report: ${comparisonPath}`);
+		console.log(`Comparison JSON: ${comparisonJsonPath}`);
+	}
+	console.log(`Finalize summary JSON: ${summaryJsonPath}`);
+}
+
+try {
+	main();
+} catch (error) {
+	console.error(error instanceof Error ? error.message : String(error));
+	process.exit(1);
+}
diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js
new file mode 100644
index 0000000000..55f6b5b4cc
--- /dev/null
+++ b/scripts/publish-playback-matrix-summary.js
@@ -0,0 +1,267 @@
+#!/usr/bin/env node
+
+import fs from "node:fs";
+import path from "node:path";
+
+function parseArgs(argv) {
+	const options = {
+		aggregateMd: null,
+		statusMd: null,
+		bottlenecksMd: null,
+		comparisonMd: null,
+		comparisonJson: null,
+		finalizeSummaryJson: null,
+		validationJson: null,
+		target: path.resolve("crates/editor/PLAYBACK-BENCHMARKS.md"),
+	};
+
+	for (let i = 2; i < argv.length; i++) {
+		const arg = argv[i];
+		if (arg === "--") continue;
+		if (arg === "--help" || arg === "-h") {
+			options.help = true;
+			continue;
+		}
+		if (arg === "--aggregate-md") {
+			options.aggregateMd = path.resolve(argv[++i] ?? "");
+			continue;
+		}
+		if (arg === "--status-md") {
+			options.statusMd = path.resolve(argv[++i] ?? "");
+			continue;
+		}
+		if (arg === "--bottlenecks-md") {
+			options.bottlenecksMd = path.resolve(argv[++i] ?? "");
+			continue;
+		}
+		if (arg === "--comparison-md") {
+			options.comparisonMd = path.resolve(argv[++i] ?? "");
+			continue;
+		}
+		if (arg === "--comparison-json") {
+			options.comparisonJson = path.resolve(argv[++i] ?? "");
+			continue;
+		}
+		if (arg === "--finalize-summary-json") {
+			options.finalizeSummaryJson = path.resolve(argv[++i] ?? "");
+			continue;
+		}
+		if (arg === "--validation-json") {
+			options.validationJson = path.resolve(argv[++i] ?? "");
+			continue;
+		}
+		if (arg === "--target") {
+			options.target = path.resolve(argv[++i] ?? "");
+			continue;
+		}
+		throw new Error(`Unknown argument: ${arg}`);
+	}
+
+	return options;
+}
+
+function usage() {
+	console.log(`Usage: node scripts/publish-playback-matrix-summary.js --aggregate-md <path> --status-md <path> --validation-json <path> [--bottlenecks-md <path>] [--comparison-md <path>] [--comparison-json <path>] [--finalize-summary-json <path>] [--target <playback-benchmarks-path>]
+
+Prepends a matrix summary section into PLAYBACK-BENCHMARKS.md benchmark history region.`);
+}
+
+function ensureFile(filePath, label) {
+	if (!filePath || !fs.existsSync(filePath)) {
+		throw new Error(`${label} file not found: ${filePath ?? "undefined"}`);
+	}
+}
+
+function buildSummarySection(
+	aggregateMd,
+	statusMd,
+	validationJson,
+	bottlenecksMd,
+	comparisonMd,
+	comparisonJson,
+	finalizeSummaryJson,
+) {
+	const now = new Date().toISOString();
+	const validation = JSON.parse(validationJson);
+	const status = validation.passed ? "✅ MATRIX PASS" : "❌ MATRIX FAIL";
+
+	let markdown = "";
+	markdown += `### Matrix Summary Run: ${now}\n\n`;
+	markdown += `**Validation:** ${status}\n\n`;
+	markdown += `- Validated cells: ${validation.validatedCells}\n`;
+	markdown += `- Observed cells: ${validation.observedCells}\n`;
+	markdown += `- Missing cells: ${validation.missingCells?.length ?? 0}\n`;
+	markdown += `- Format failures: ${validation.formatFailures?.length ?? 0}\n\n`;
+	if (comparisonJson) {
+		const comparison = JSON.parse(comparisonJson);
+		const comparisonPassed = comparison.summary?.passed === true;
+		markdown += `- Comparison gate: ${comparisonPassed ? "✅ PASS" : "❌ FAIL"}\n`;
+		markdown += `- Comparison regressions: ${comparison.summary?.regressions ?? "n/a"}\n`;
+		markdown += `- Missing candidate rows: ${comparison.summary?.missingCandidateRows ?? "n/a"}\n\n`;
+		markdown += `- Candidate-only rows: ${comparison.summary?.candidateOnlyRows ?? "n/a"}\n\n`;
+		markdown += `- Insufficient sample rows: ${comparison.summary?.insufficientSampleRows ?? "n/a"}\n`;
+		markdown += `- Minimum samples per row: ${comparison.tolerance?.minSamplesPerRow ?? "n/a"}\n\n`;
+		markdown += `- Missing candidate policy: ${comparison.tolerance?.allowMissingCandidate ? "allow" : "fail"}\n`;
+		markdown += `- Candidate-only policy: ${comparison.tolerance?.failOnCandidateOnly ? "fail" : "allow"}\n\n`;
+		markdown += `- Parse error policy: ${comparison.tolerance?.failOnParseErrors ? "fail" : "allow"}\n`;
+		markdown += `- Zero-compare policy: ${comparison.tolerance?.failOnZeroCompared ? "fail" : "allow"}\n`;
+		markdown += `- Skipped-file policy: ${comparison.tolerance?.failOnSkippedFiles ? "fail" : "allow"}\n`;
+		markdown += `- Baseline parse errors: ${comparison.fileStats?.baseline?.parseErrors?.length ?? "n/a"}\n`;
+		markdown += `- Candidate parse errors: ${comparison.fileStats?.candidate?.parseErrors?.length ?? "n/a"}\n\n`;
+		markdown += `- Baseline skipped files: ${comparison.fileStats?.baseline?.skippedFiles ?? "n/a"}\n`;
+		markdown += `- Candidate skipped files: ${comparison.fileStats?.candidate?.skippedFiles ?? "n/a"}\n`;
+		markdown += `- Baseline skipped (no reports): ${comparison.fileStats?.baseline?.skippedNoReports ?? "n/a"}\n`;
+		markdown += `- Candidate skipped (no reports): ${comparison.fileStats?.candidate?.skippedNoReports ?? "n/a"}\n`;
+		markdown += `- Baseline skipped (no usable metrics): ${comparison.fileStats?.baseline?.skippedNoUsableMetrics ?? "n/a"}\n`;
+		markdown += `- Candidate skipped (no usable metrics): ${comparison.fileStats?.candidate?.skippedNoUsableMetrics ?? "n/a"}\n\n`;
+		const failureReasons = Array.isArray(comparison.summary?.failureReasons)
+			? comparison.summary.failureReasons
+			: [];
+		if (failureReasons.length > 0) {
+			markdown += `- Comparison failure reasons: ${failureReasons.join(", ")}\n\n`;
+		}
+	}
+	if (finalizeSummaryJson) {
+		const finalizeSummary = JSON.parse(finalizeSummaryJson);
+		markdown += `- Finalize source branch: ${finalizeSummary.git?.branch ?? "n/a"}\n`;
+		markdown += `- Finalize source commit: ${finalizeSummary.git?.commit ?? "n/a"}\n`;
+		markdown += `- Finalize validation passed: ${finalizeSummary.results?.validationPassed === true ? "true" : "false"}\n`;
+		if (finalizeSummary.results?.comparisonPassed !== null) {
+			markdown += `- Finalize comparison passed: ${finalizeSummary.results?.comparisonPassed === true ? "true" : "false"}\n`;
+		}
+		if (finalizeSummary.results?.comparisonComparedRows !== null) {
+			markdown += `- Finalize compared rows: ${finalizeSummary.results?.comparisonComparedRows}\n`;
+			markdown += `- Finalize comparison regressions: ${finalizeSummary.results?.comparisonRegressions ?? "n/a"}\n`;
+			markdown += `- Finalize missing candidate rows: ${finalizeSummary.results?.comparisonMissingCandidateRows ?? "n/a"}\n`;
+			markdown += `- Finalize candidate-only rows: ${finalizeSummary.results?.comparisonCandidateOnlyRows ?? "n/a"}\n`;
+			markdown += `- Finalize insufficient sample rows: ${finalizeSummary.results?.comparisonInsufficientSampleRows ?? "n/a"}\n`;
+		}
+		const finalizeFileStats = finalizeSummary.results?.comparisonFileStats;
+		if (finalizeFileStats?.baseline || finalizeFileStats?.candidate) {
+			markdown += `- Finalize baseline parse errors: ${finalizeFileStats?.baseline?.parseErrors?.length ?? "n/a"}\n`;
+			markdown += `- Finalize candidate parse errors: ${finalizeFileStats?.candidate?.parseErrors?.length ?? "n/a"}\n`;
+		}
+		const finalizeFailureReasons = Array.isArray(
+			finalizeSummary.results?.comparisonFailureReasons,
+		)
+			? finalizeSummary.results.comparisonFailureReasons
+			: [];
+		if (finalizeFailureReasons.length > 0) {
+			markdown += `- Finalize comparison failure reasons: ${finalizeFailureReasons.join(", ")}\n`;
+		}
+		markdown += "\n";
+	}
+
+	if ((validation.missingCells?.length ?? 0) > 0) {
+		markdown += "**Missing Cells**\n";
+		for (const cell of validation.missingCells) {
+			markdown += `- ${cell.platform}:${cell.gpu}:${cell.scenario}\n`;
+		}
+		markdown += "\n";
+	}
+
+	if ((validation.formatFailures?.length ?? 0) > 0) {
+		markdown += "**Format Failures**\n";
+		for (const failure of validation.formatFailures) {
+			markdown += `- ${failure.platform}:${failure.gpu}:${failure.scenario} missing ${failure.requiredFormat}\n`;
+		}
+		markdown += "\n";
+	}
+
+	markdown += "<details>\n<summary>Matrix Status Report</summary>\n\n";
+	markdown += `${statusMd.trim()}\n\n`;
+	markdown += "</details>\n\n";
+
+	markdown += "<details>\n<summary>Aggregate Benchmark Report</summary>\n\n";
+	markdown += `${aggregateMd.trim()}\n\n`;
+	markdown += "</details>\n\n";
+
+	if (bottlenecksMd) {
+		markdown += "<details>\n<summary>Bottleneck Analysis</summary>\n\n";
+		markdown += `${bottlenecksMd.trim()}\n\n`;
+		markdown += "</details>\n\n";
+	}
+	if (comparisonMd) {
+		markdown +=
+			"<details>\n<summary>Baseline vs Candidate Comparison</summary>\n\n";
+		markdown += `${comparisonMd.trim()}\n\n`;
+		markdown += "</details>\n\n";
+	}
+
+	return markdown;
+}
+
+function writeToBenchmarkHistory(targetFile, summaryMd) {
+	const markerStart = "<!-- PLAYBACK_BENCHMARK_RESULTS_START -->";
+	const markerEnd = "<!-- PLAYBACK_BENCHMARK_RESULTS_END -->";
+	const current = fs.readFileSync(targetFile, "utf8");
+	const start = current.indexOf(markerStart);
+	const end = current.indexOf(markerEnd);
+	if (start === -1 || end === -1 || start >= end) {
+		throw new Error(`Could not find benchmark result markers in ${targetFile}`);
+	}
+
+	const insertPos = start + markerStart.length;
+	const updated =
+		current.slice(0, insertPos) + "\n\n" + summaryMd + current.slice(end);
+	fs.writeFileSync(targetFile, updated, "utf8");
+}
+
+function main() {
+	const options = parseArgs(process.argv);
+	if (options.help) {
+		usage();
+		return;
+	}
+
+	ensureFile(options.aggregateMd, "Aggregate markdown");
+	ensureFile(options.statusMd, "Status markdown");
+	ensureFile(options.validationJson, "Validation JSON");
+	if (options.bottlenecksMd) {
+		ensureFile(options.bottlenecksMd, "Bottlenecks markdown");
+	}
+	if (options.comparisonMd) {
+		ensureFile(options.comparisonMd, "Comparison markdown");
+	}
+	if (options.comparisonJson) {
+		ensureFile(options.comparisonJson, "Comparison JSON");
+	}
+	if (options.finalizeSummaryJson) {
+		ensureFile(options.finalizeSummaryJson, "Finalize summary JSON");
+	}
+	ensureFile(options.target, "Target");
+
+	const aggregateMd = fs.readFileSync(options.aggregateMd, "utf8");
+	const statusMd = fs.readFileSync(options.statusMd, "utf8");
+	const validationJson = fs.readFileSync(options.validationJson, "utf8");
+	const bottlenecksMd = options.bottlenecksMd
+		? fs.readFileSync(options.bottlenecksMd, "utf8")
+		: null;
+	const comparisonMd = options.comparisonMd
+		? fs.readFileSync(options.comparisonMd, "utf8")
+		: null;
+	const comparisonJson = options.comparisonJson
+		? fs.readFileSync(options.comparisonJson, "utf8")
+		: null;
+	const finalizeSummaryJson = options.finalizeSummaryJson
+		? fs.readFileSync(options.finalizeSummaryJson, "utf8")
+		: null;
+	const summaryMd = buildSummarySection(
+		aggregateMd,
+		statusMd,
+		validationJson,
+		bottlenecksMd,
+		comparisonMd,
+		comparisonJson,
+		finalizeSummaryJson,
+	);
+	writeToBenchmarkHistory(options.target, summaryMd);
+	console.log(`Published matrix summary into ${options.target}`);
+}
+
+try {
+	main();
+} catch (error) {
+	console.error(error instanceof Error ? error.message : String(error));
+	process.exit(1);
+}
diff --git a/scripts/run-playback-benchmark-matrix.js b/scripts/run-playback-benchmark-matrix.js
new file mode 100644
index 0000000000..626c19a7ba
--- /dev/null
+++ b/scripts/run-playback-benchmark-matrix.js
@@ -0,0 +1,255 @@
+#!/usr/bin/env node
+
+import { spawnSync } from "node:child_process";
+import fs from "node:fs";
+import path from "node:path";
+
+function parseArgs(argv) {
+	const options = {
+		platform: null,
+		gpu: null,
+		outputDir: null,
+		fps: 60,
+		startupThresholdMs: 250,
+		recordingPath: null,
+		inputDir: null,
+		validate: true,
+		requireFormats: [],
+		scenarios: ["full", "scrub"],
+	};
+
+	for (let i = 2; i < argv.length; i++) {
+		const arg = argv[i];
+		if (arg === "--") {
+			continue;
+		}
+		if (arg === "--help" || arg === "-h") {
+			options.help = true;
+			continue;
+		}
+		if (arg === "--platform") {
+			options.platform = argv[++i] ?? null;
+			continue;
+		}
+		if (arg === "--gpu") {
+			options.gpu = argv[++i] ?? null;
+			continue;
+		}
+		if (arg === "--output-dir") {
+			options.outputDir = argv[++i] ?? null;
+			continue;
+		}
+		if (arg === "--fps") {
+			const value = Number.parseInt(argv[++i] ?? "", 10);
+			if (!Number.isFinite(value) || value <= 0) {
+				throw new Error("Invalid --fps value");
+			}
+			options.fps = value;
+			continue;
+		}
+		if (arg === "--startup-threshold-ms") {
+			const value = Number.parseFloat(argv[++i] ?? "");
+			if (!Number.isFinite(value) || value <= 0) {
+				throw new Error("Invalid --startup-threshold-ms value");
+			}
+			options.startupThresholdMs = value;
+			continue;
+		}
+		if (arg === "--recording-path") {
+			options.recordingPath = argv[++i] ?? null;
+			continue;
+		}
+		if (arg === "--input-dir") {
+			options.inputDir = argv[++i] ?? null;
+			continue;
+		}
+		if (arg === "--skip-validate") {
+			options.validate = false;
+			continue;
+		}
+		if (arg === "--require-formats") {
+			const value = argv[++i] ?? "";
+			options.requireFormats = value
+				.split(",")
+				.map((entry) => entry.trim().toLowerCase())
+				.filter(Boolean);
+			continue;
+		}
+		if (arg === "--scenarios") {
+			const value = argv[++i] ?? "";
+			const scenarios = value
+				.split(",")
+				.map((entry) => entry.trim().toLowerCase())
+				.filter(Boolean);
+			if (scenarios.length === 0) {
+				throw new Error("Invalid --scenarios value");
+			}
+			options.scenarios = scenarios;
+			continue;
+		}
+		throw new Error(`Unknown argument: ${arg}`);
+	}
+
+	return options;
+}
+
+function usage() {
+	console.log(`Usage: node scripts/run-playback-benchmark-matrix.js --platform <name> --gpu <name> --output-dir <dir> [--fps 60] [--startup-threshold-ms 250] [--recording-path <path>] [--input-dir <path>] [--require-formats mp4,fragmented] [--scenarios full,scrub]
+
+Runs playback benchmark matrix scenarios and writes JSON outputs.
+
+Required:
+  --platform      Platform label (for notes metadata)
+  --gpu           GPU label (for notes metadata)
+  --output-dir    Directory for benchmark JSON outputs
+
+Optional:
+  --fps           FPS for benchmark runs (default: 60)
+  --startup-threshold-ms Startup-to-first-frame threshold in ms (default: 250)
+  --recording-path  Specific recording path
+  --input-dir       Recording discovery directory
+  --require-formats Required formats for local validation (comma-separated)
+  --scenarios       Scenarios to run (comma-separated; default: full,scrub)
+  --skip-validate   Skip post-run validation`);
+}
+
+function run(command, args) {
+	const result = spawnSync(command, args, { stdio: "inherit" });
+	if (result.status !== 0) {
+		throw new Error(`Command failed: ${command} ${args.join(" ")}`);
+	}
+}
+
+function scenarioOutputPath(outputDir, platform, gpu, scenario) {
+	const stamp = new Date().toISOString().replace(/[:.]/g, "-");
+	return path.join(outputDir, `${stamp}-${platform}-${gpu}-${scenario}.json`);
+}
+
+function scenarioArgs(options, scenario) {
+	const jsonOutput = scenarioOutputPath(
+		options.outputDir,
+		options.platform,
+		options.gpu,
+		scenario,
+	);
+	const notes = `platform=${options.platform} gpu=${options.gpu} scenario=${scenario}`;
+
+	const args = [
+		"run",
+		"-p",
+		"cap-recording",
+		"--example",
+		"playback-test-runner",
+		"--",
+		scenario,
+		"--fps",
+		String(options.fps),
+		"--startup-threshold-ms",
+		String(options.startupThresholdMs),
+		"--json-output",
+		jsonOutput,
+		"--notes",
+		notes,
+	];
+
+	if (options.recordingPath) {
+		args.push("--recording-path", options.recordingPath);
+	} else if (options.inputDir) {
+		args.push("--input-dir", options.inputDir);
+	}
+
+	return args;
+}
+
+function validateOptions(options) {
+	if (!options.platform || !options.gpu || !options.outputDir) {
+		throw new Error(
+			"Missing required options: --platform, --gpu, --output-dir",
+		);
+	}
+	const validScenarios = new Set([
+		"full",
+		"scrub",
+		"decoder",
+		"playback",
+		"audio-sync",
+		"camera-sync",
+	]);
+	for (const scenario of options.scenarios) {
+		if (!validScenarios.has(scenario)) {
+			throw new Error(`Unsupported scenario: ${scenario}`);
+		}
+	}
+
+	const absoluteOutputDir = path.resolve(options.outputDir);
+	options.outputDir = absoluteOutputDir;
+	if (!fs.existsSync(absoluteOutputDir)) {
+		fs.mkdirSync(absoluteOutputDir, { recursive: true });
+	}
+}
+
+function main() {
+	const options = parseArgs(process.argv);
+	if (options.help) {
+		usage();
+		return;
+	}
+
+	validateOptions(options);
+
+	console.log(
+		`Running matrix for platform=${options.platform} gpu=${options.gpu}`,
+	);
+	for (const scenario of options.scenarios) {
+		run("cargo", scenarioArgs(options, scenario));
+	}
+
+	const aggregatePath = path.join(
+		options.outputDir,
+		`${options.platform}-${options.gpu}-aggregate.md`,
+	);
+	run("node", [
+		"scripts/aggregate-playback-benchmarks.js",
+		"--input",
+		options.outputDir,
+		"--output",
+		aggregatePath,
+	]);
+	console.log(`Aggregate markdown: ${aggregatePath}`);
+
+	if (options.validate) {
+		const validationJsonPath = path.join(
+			options.outputDir,
+			`${options.platform}-${options.gpu}-validation.json`,
+		);
+		const validateArgs = [
+			"scripts/validate-playback-matrix.js",
+			"--input",
+			options.outputDir,
+			"--no-default-matrix",
+			"--output-json",
+			validationJsonPath,
+		];
+		for (const scenario of options.scenarios) {
+			validateArgs.push(
+				"--require-cell",
+				`${options.platform}:${options.gpu}:${scenario}`,
+			);
+		}
+
+		if (options.requireFormats.length > 0) {
+			validateArgs.push("--require-formats", options.requireFormats.join(","));
+		}
+
+		run("node", validateArgs);
+		console.log("Matrix run validation passed");
+		console.log(`Validation JSON: ${validationJsonPath}`);
+	}
+}
+
+try {
+	main();
+} catch (error) {
+	console.error(error instanceof Error ? error.message : String(error));
+	process.exit(1);
+}
diff --git a/scripts/validate-playback-matrix.js b/scripts/validate-playback-matrix.js
new file mode 100644
index 0000000000..b06f01e006
--- /dev/null
+++ b/scripts/validate-playback-matrix.js
@@ -0,0 +1,259 @@
+#!/usr/bin/env node
+
+import fs from "node:fs";
+import path from "node:path";
+
+const DEFAULT_REQUIRED_CELLS = [
+	{ platform: "macos-13", gpu: "apple-silicon", scenario: "full" },
+	{ platform: "macos-13", gpu: "apple-silicon", scenario: "scrub" },
+	{ platform: "windows-11", gpu: "nvidia-discrete", scenario: "full" },
+	{ platform: "windows-11", gpu: "nvidia-discrete", scenario: "scrub" },
+	{ platform: "windows-11", gpu: "amd-discrete", scenario: "full" },
+	{ platform: "windows-11", gpu: "amd-discrete", scenario: "scrub" },
+	{ platform: "windows-11", gpu: "integrated", scenario: "full" },
+	{ platform: "windows-11", gpu: "integrated", scenario: "scrub" },
+];
+
+function parseArgs(argv) {
+	const options = {
+		inputs: [],
+		requiredCells: [],
+		requiredFormats: [],
+		useDefaultMatrix: true,
+		outputJson: null,
+	};
+
+	for (let i = 2; i < argv.length; i++) {
+		const arg = argv[i];
+		if (arg === "--") {
+			continue;
+		}
+		if (arg === "--help" || arg === "-h") {
+			options.help = true;
+			continue;
+		}
+		if (arg === "--input" || arg === "-i") {
+			const value = argv[++i];
+			if (!value) throw new Error("Missing value for --input");
+			options.inputs.push(path.resolve(value));
+			continue;
+		}
+		if (arg === "--require-cell") {
+			const value = argv[++i];
+			if (!value) throw new Error("Missing value for --require-cell");
+			options.requiredCells.push(parseCell(value));
+			options.useDefaultMatrix = false;
+			continue;
+		}
+		if (arg === "--require-formats") {
+			const value = argv[++i];
+			if (!value) throw new Error("Missing value for --require-formats");
+			options.requiredFormats = value
+				.split(",")
+				.map((entry) => entry.trim().toLowerCase())
+				.filter(Boolean);
+			continue;
+		}
+		if (arg === "--no-default-matrix") {
+			options.useDefaultMatrix = false;
+			continue;
+		}
+		if (arg === "--output-json") {
+			const value = argv[++i];
+			if (!value) throw new Error("Missing value for --output-json");
+			options.outputJson = path.resolve(value);
+			continue;
+		}
+		throw new Error(`Unknown argument: ${arg}`);
+	}
+
+	return options;
+}
+
+function parseCell(value) {
+	const [platform, gpu, scenario] = value.split(":");
+	if (!platform || !gpu || !scenario) {
+		throw new Error(
+			`Invalid --require-cell format: ${value}. Expected platform:gpu:scenario`,
+		);
+	}
+	return { platform, gpu, scenario };
+}
+
+function printUsage() {
+	console.log(`Usage: node scripts/validate-playback-matrix.js --input <file-or-dir> [--input <file-or-dir> ...] [--require-cell platform:gpu:scenario ...] [--require-formats mp4,fragmented] [--output-json <path>]
+
+Validates that required benchmark matrix cells are present in playback benchmark JSON results.
+
+Options:
+  --input, -i            JSON file or directory containing JSON files (repeatable)
+  --require-cell         Required cell as platform:gpu:scenario (repeatable)
+  --require-formats      Comma-separated required formats per cell
+  --no-default-matrix    Disable built-in required matrix
+  --output-json          Write validation result JSON file
+  --help, -h             Show help`);
+}
+
+function collectJsonFiles(targetPath) {
+	if (!fs.existsSync(targetPath)) {
+		throw new Error(`Input path does not exist: ${targetPath}`);
+	}
+	const stats = fs.statSync(targetPath);
+	if (stats.isFile()) {
+		return targetPath.endsWith(".json") ? [targetPath] : [];
+	}
+	const files = [];
+	for (const entry of fs.readdirSync(targetPath, { withFileTypes: true })) {
+		const fullPath = path.join(targetPath, entry.name);
+		if (entry.isDirectory()) {
+			files.push(...collectJsonFiles(fullPath));
+		} else if (entry.isFile() && entry.name.endsWith(".json")) {
+			files.push(fullPath);
+		}
+	}
+	return files;
+}
+
+function parseNotes(notes) {
+	if (!notes) return {};
+	const result = {};
+	for (const token of notes.split(/\s+/)) {
+		if (!token.includes("=")) continue;
+		const [key, ...rest] = token.split("=");
+		const value = rest.join("=");
+		if (!key || !value) continue;
+		result[key.trim()] = value.trim();
+	}
+	return result;
+}
+
+function keyForCell(cell) {
+	return `${cell.platform}|${cell.gpu}|${cell.scenario}`;
+}
+
+function collectObservedCells(files) {
+	const observed = new Map();
+	for (const filePath of files) {
+		const parsed = JSON.parse(fs.readFileSync(filePath, "utf8"));
+		const notes = parseNotes(parsed.notes);
+		const platform = notes.platform ?? "unknown";
+		const gpu = notes.gpu ?? "unknown";
+		const scenario = notes.scenario ?? "unspecified";
+		const key = keyForCell({ platform, gpu, scenario });
+		if (!observed.has(key)) {
+			observed.set(key, {
+				platform,
+				gpu,
+				scenario,
+				formats: new Set(),
+				files: new Set(),
+			});
+		}
+		const entry = observed.get(key);
+		entry.files.add(filePath);
+		const reports = Array.isArray(parsed.reports) ? parsed.reports : [];
+		for (const report of reports) {
+			entry.formats.add(report.is_fragmented ? "fragmented" : "mp4");
+		}
+	}
+	return observed;
+}
+
+function main() {
+	const options = parseArgs(process.argv);
+	if (options.help) {
+		printUsage();
+		return;
+	}
+	if (options.inputs.length === 0) {
+		throw new Error("At least one --input is required");
+	}
+
+	const files = new Set();
+	for (const input of options.inputs) {
+		for (const filePath of collectJsonFiles(input)) {
+			files.add(filePath);
+		}
+	}
+	if (files.size === 0) {
+		throw new Error("No JSON files found");
+	}
+
+	const requiredCells = options.useDefaultMatrix
+		? [...DEFAULT_REQUIRED_CELLS, ...options.requiredCells]
+		: options.requiredCells;
+	if (requiredCells.length === 0) {
+		throw new Error("No required matrix cells configured");
+	}
+
+	const observed = collectObservedCells([...files]);
+	const missingCells = [];
+	const formatFailures = [];
+
+	for (const cell of requiredCells) {
+		const key = keyForCell(cell);
+		const observedCell = observed.get(key);
+		if (!observedCell) {
+			missingCells.push(cell);
+			continue;
+		}
+		for (const requiredFormat of options.requiredFormats) {
+			if (!observedCell.formats.has(requiredFormat)) {
+				formatFailures.push({
+					...cell,
+					requiredFormat,
+					observedFormats: [...observedCell.formats],
+				});
+			}
+		}
+	}
+
+	const validationResult = {
+		validatedCells: requiredCells.length,
+		observedCells: observed.size,
+		requiredFormats: options.requiredFormats,
+		missingCells,
+		formatFailures,
+		passed: missingCells.length === 0 && formatFailures.length === 0,
+	};
+
+	if (options.outputJson) {
+		fs.writeFileSync(
+			options.outputJson,
+			JSON.stringify(validationResult, null, 2),
+		);
+		console.log(`Validation JSON: ${options.outputJson}`);
+	}
+
+	console.log(`Validated ${validationResult.validatedCells} required cells`);
+	console.log(`Observed ${validationResult.observedCells} unique cells`);
+
+	if (missingCells.length > 0) {
+		console.log("Missing required cells:");
+		for (const cell of missingCells) {
+			console.log(`  - ${cell.platform}:${cell.gpu}:${cell.scenario}`);
+		}
+	}
+
+	if (formatFailures.length > 0) {
+		console.log("Missing required formats:");
+		for (const failure of formatFailures) {
+			console.log(
+				`  - ${failure.platform}:${failure.gpu}:${failure.scenario} missing ${failure.requiredFormat} (observed: ${failure.observedFormats.join(", ") || "none"})`,
+			);
+		}
+	}
+
+	if (!validationResult.passed) {
+		process.exit(1);
+	}
+
+	console.log("Matrix validation passed");
+}
+
+try {
+	main();
+} catch (error) {
+	console.error(error instanceof Error ? error.message : String(error));
+	process.exit(1);
+}