From c4491c40f209720cbe1173c044328365ef28a0e7 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 20:22:05 +0000 Subject: [PATCH 001/333] improve: switch editor playback to streaming audio and live seek Co-authored-by: Richie McIlroy --- apps/desktop/src-tauri/src/lib.rs | 18 +++ .../src/routes/editor/Timeline/index.tsx | 27 +--- crates/editor/src/audio.rs | 17 ++- crates/editor/src/playback.rs | 138 +++++++++++++++--- 4 files changed, 157 insertions(+), 43 deletions(-) diff --git a/apps/desktop/src-tauri/src/lib.rs b/apps/desktop/src-tauri/src/lib.rs index 00dc6903b1..c00d465ced 100644 --- a/apps/desktop/src-tauri/src/lib.rs +++ b/apps/desktop/src-tauri/src/lib.rs @@ -1921,6 +1921,15 @@ async fn set_playhead_position( }) .await; + let playback_handle = { + let state = editor_instance.state.lock().await; + state.playback_task.clone() + }; + + if let Some(handle) = playback_handle { + handle.seek(frame_number); + } + Ok(()) } @@ -2545,6 +2554,15 @@ async fn seek_to(editor_instance: WindowEditorInstance, frame_number: u32) -> Re }) .await; + let playback_handle = { + let state = editor_instance.state.lock().await; + state.playback_task.clone() + }; + + if let Some(handle) = playback_handle { + handle.seek(frame_number); + } + Ok(()) } diff --git a/apps/desktop/src/routes/editor/Timeline/index.tsx b/apps/desktop/src/routes/editor/Timeline/index.tsx index 8aacea955f..da839931ad 100644 --- a/apps/desktop/src/routes/editor/Timeline/index.tsx +++ b/apps/desktop/src/routes/editor/Timeline/index.tsx @@ -89,7 +89,6 @@ export function Timeline() { editorState, projectActions, meta, - previewResolutionBase, } = useEditorContext(); const duration = () => editorInstance.recordingDuration; @@ -274,32 +273,16 @@ export function Timeline() { maskSegmentDragState.type !== "moving" && textSegmentDragState.type !== "moving" ) { - // Guard against missing bounds and clamp computed time to [0, totalDuration()] if (left == null) return; const rawTime = secsPerPixel() * (e.clientX - left) + transform().position; const newTime = Math.min(Math.max(0, rawTime), totalDuration()); + const targetFrame = Math.round(newTime * FPS); - // If playing, some backends require restart to seek reliably - if (editorState.playing) { - try { - await commands.stopPlayback(); - - // Round to nearest frame to prevent off-by-one drift - const targetFrame = Math.round(newTime * FPS); - await commands.seekTo(targetFrame); - - // If the user paused during these async ops, bail out without restarting - if (!editorState.playing) { - setEditorState("playbackTime", newTime); - return; - } - - await commands.startPlayback(FPS, previewResolutionBase()); - setEditorState("playing", true); - } catch (err) { - console.error("Failed to seek during playback:", err); - } + try { + await commands.seekTo(targetFrame); + } catch (err) { + console.error("Failed to seek timeline playhead:", err); } setEditorState("playbackTime", newTime); diff --git a/crates/editor/src/audio.rs b/crates/editor/src/audio.rs index d2cdbf8df4..7051ce5549 100644 --- a/crates/editor/src/audio.rs +++ b/crates/editor/src/audio.rs @@ -7,7 +7,6 @@ use cap_project::{AudioConfiguration, ClipOffsets, ProjectConfiguration, Timelin use ffmpeg::{ ChannelLayout, Dictionary, format as avformat, frame::Audio as FFAudio, software::resampling, }; -#[cfg(not(target_os = "windows"))] use ringbuf::{ HeapRb, traits::{Consumer, Observer, Producer}, @@ -248,14 +247,12 @@ impl AudioRenderer { } } -#[cfg(not(target_os = "windows"))] pub struct AudioPlaybackBuffer { frame_buffer: AudioRenderer, resampler: AudioResampler, resampled_buffer: HeapRb, } -#[cfg(not(target_os = "windows"))] impl AudioPlaybackBuffer { pub const PLAYBACK_SAMPLES_COUNT: u32 = 512; @@ -296,6 +293,19 @@ impl AudioPlaybackBuffer { self.frame_buffer.set_playhead(playhead, project); } + #[cfg(target_os = "windows")] + pub fn set_playhead_smooth(&mut self, playhead: f64, project: &ProjectConfiguration) { + let current_playhead = self.frame_buffer.elapsed_samples_to_playhead(); + let drift = (playhead - current_playhead).abs(); + + if drift > 0.2 { + self.set_playhead(playhead, project); + return; + } + + self.frame_buffer.set_playhead(playhead, project); + } + #[allow(dead_code)] pub fn current_playhead(&self) -> f64 { self.frame_buffer.elapsed_samples_to_playhead() @@ -426,7 +436,6 @@ impl AudioResampler { }) } - #[cfg(not(target_os = "windows"))] pub fn reset(&mut self) { *self = Self::new(self.output).unwrap(); } diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 000f209c6b..3672590369 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -1,6 +1,6 @@ -use cap_audio::FromSampleBytes; -#[cfg(not(target_os = "windows"))] -use cap_audio::{LatencyCorrectionConfig, LatencyCorrector, default_output_latency_hint}; +use cap_audio::{ + FromSampleBytes, LatencyCorrectionConfig, LatencyCorrector, default_output_latency_hint, +}; use cap_media::MediaError; use cap_media_info::AudioInfo; use cap_project::{ProjectConfiguration, XY}; @@ -8,7 +8,6 @@ use cap_rendering::{ DecodedSegmentFrames, ProjectUniforms, RenderVideoConstants, ZoomFocusInterpolator, spring_mass_damper::SpringMassDamperSimulationConfig, }; -#[cfg(not(target_os = "windows"))] use cpal::{BufferSize, SupportedBufferSize}; use cpal::{ SampleFormat, @@ -28,7 +27,6 @@ use tokio::{ }; use tracing::{error, info, warn}; -#[cfg(not(target_os = "windows"))] use crate::audio::AudioPlaybackBuffer; use crate::{ audio::AudioSegment, editor, editor_instance::SegmentMedia, segments::get_audio_segments, @@ -64,6 +62,7 @@ pub enum PlaybackEvent { pub struct PlaybackHandle { stop_tx: watch::Sender, event_rx: watch::Receiver, + seek_tx: tokio_mpsc::UnboundedSender, } struct PrefetchedFrame { @@ -118,10 +117,12 @@ impl Playback { let (event_tx, mut event_rx) = watch::channel(PlaybackEvent::Start); event_rx.borrow_and_update(); + let (seek_tx, mut seek_rx) = tokio_mpsc::unbounded_channel::(); let handle = PlaybackHandle { stop_tx: stop_tx.clone(), event_rx, + seek_tx, }; let (prefetch_tx, mut prefetch_rx) = @@ -437,10 +438,33 @@ impl Playback { .make_contiguous() .sort_by_key(|p| p.frame_number); - let start = Instant::now(); + let mut playback_anchor_start = Instant::now(); + let mut playback_anchor_frame = frame_number; let mut cached_project = self.project.borrow().clone(); 'playback: loop { + let mut pending_seek = None; + while let Ok(next_seek_frame) = seek_rx.try_recv() { + pending_seek = Some(next_seek_frame); + } + + if let Some(seek_frame) = pending_seek { + frame_number = seek_frame; + playback_anchor_start = Instant::now(); + playback_anchor_frame = seek_frame; + prefetch_buffer.retain(|p| p.frame_number >= frame_number); + frame_cache.cache.clear(); + let _ = frame_request_tx.send(frame_number); + let _ = playback_position_tx.send(frame_number); + if has_audio + && audio_playhead_tx + .send(frame_number as f64 / fps_f64) + .is_err() + { + break 'playback; + } + } + if self.project.has_changed().unwrap_or(false) { cached_project = self.project.borrow_and_update().clone(); } @@ -465,11 +489,28 @@ impl Playback { } } - let frame_offset = frame_number.saturating_sub(self.start_frame_number) as f64; - let next_deadline = start + frame_duration.mul_f64(frame_offset); + let frame_offset = frame_number.saturating_sub(playback_anchor_frame) as f64; + let next_deadline = playback_anchor_start + frame_duration.mul_f64(frame_offset); tokio::select! { _ = stop_rx.changed() => break 'playback, + Some(seek_frame) = seek_rx.recv() => { + frame_number = seek_frame; + playback_anchor_start = Instant::now(); + playback_anchor_frame = seek_frame; + prefetch_buffer.retain(|p| p.frame_number >= frame_number); + frame_cache.cache.clear(); + let _ = frame_request_tx.send(frame_number); + let _ = playback_position_tx.send(frame_number); + if has_audio + && audio_playhead_tx + .send(frame_number as f64 / fps_f64) + .is_err() + { + break 'playback; + } + continue; + } _ = tokio::time::sleep_until(next_deadline) => {} } @@ -699,8 +740,8 @@ impl Playback { break 'playback; } - let expected_frame = self.start_frame_number - + (start.elapsed().as_secs_f64() * fps_f64).floor() as u32; + let expected_frame = playback_anchor_frame + + (playback_anchor_start.elapsed().as_secs_f64() * fps_f64).floor() as u32; if frame_number < expected_frame { let frames_behind = expected_frame - frame_number; @@ -742,6 +783,10 @@ impl PlaybackHandle { self.stop_tx.send(true).ok(); } + pub fn seek(&self, frame_number: u32) { + let _ = self.seek_tx.send(frame_number); + } + pub async fn receive_event(&mut self) -> watch::Ref<'_, PlaybackEvent> { self.event_rx.changed().await.ok(); self.event_rx.borrow_and_update() @@ -759,6 +804,12 @@ struct AudioPlayback { } impl AudioPlayback { + fn use_prerendered_audio() -> bool { + std::env::var("CAP_AUDIO_PRERENDER_PLAYBACK") + .map(|value| value == "1" || value.eq_ignore_ascii_case("true")) + .unwrap_or(false) + } + fn spawn(self) -> bool { let handle = tokio::runtime::Handle::current(); @@ -787,26 +838,80 @@ impl AudioPlayback { } }; + let use_prerendered_audio = Self::use_prerendered_audio(); let duration_secs = self.duration_secs; + if use_prerendered_audio { + info!("Using pre-rendered audio playback mode"); + } else { + info!("Using low-latency streaming audio playback mode"); + } let result = match supported_config.sample_format() { SampleFormat::I16 => { - self.create_stream_prerendered::(device, supported_config, duration_secs) + if use_prerendered_audio { + self.create_stream_prerendered::( + device, + supported_config, + duration_secs, + ) + } else { + self.create_stream::(device, supported_config) + } } SampleFormat::I32 => { - self.create_stream_prerendered::(device, supported_config, duration_secs) + if use_prerendered_audio { + self.create_stream_prerendered::( + device, + supported_config, + duration_secs, + ) + } else { + self.create_stream::(device, supported_config) + } } SampleFormat::F32 => { - self.create_stream_prerendered::(device, supported_config, duration_secs) + if use_prerendered_audio { + self.create_stream_prerendered::( + device, + supported_config, + duration_secs, + ) + } else { + self.create_stream::(device, supported_config) + } } SampleFormat::I64 => { - self.create_stream_prerendered::(device, supported_config, duration_secs) + if use_prerendered_audio { + self.create_stream_prerendered::( + device, + supported_config, + duration_secs, + ) + } else { + self.create_stream::(device, supported_config) + } } SampleFormat::U8 => { - self.create_stream_prerendered::(device, supported_config, duration_secs) + if use_prerendered_audio { + self.create_stream_prerendered::( + device, + supported_config, + duration_secs, + ) + } else { + self.create_stream::(device, supported_config) + } } SampleFormat::F64 => { - self.create_stream_prerendered::(device, supported_config, duration_secs) + if use_prerendered_audio { + self.create_stream_prerendered::( + device, + supported_config, + duration_secs, + ) + } else { + self.create_stream::(device, supported_config) + } } format => { error!( @@ -843,7 +948,6 @@ impl AudioPlayback { true } - #[cfg(not(target_os = "windows"))] #[allow(dead_code)] fn create_stream( self, From 99f5995713846513e414c46e5f4b39e01806391e Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 20:22:05 +0000 Subject: [PATCH 002/333] improve: switch editor playback to streaming audio and live seek Co-authored-by: Richie McIlroy --- apps/desktop/src-tauri/src/lib.rs | 18 +++ .../src/routes/editor/Timeline/index.tsx | 27 +--- crates/editor/src/audio.rs | 17 ++- crates/editor/src/playback.rs | 138 +++++++++++++++--- 4 files changed, 157 insertions(+), 43 deletions(-) diff --git a/apps/desktop/src-tauri/src/lib.rs b/apps/desktop/src-tauri/src/lib.rs index 00dc6903b1..c00d465ced 100644 --- a/apps/desktop/src-tauri/src/lib.rs +++ b/apps/desktop/src-tauri/src/lib.rs @@ -1921,6 +1921,15 @@ async fn set_playhead_position( }) .await; + let playback_handle = { + let state = editor_instance.state.lock().await; + state.playback_task.clone() + }; + + if let Some(handle) = playback_handle { + handle.seek(frame_number); + } + Ok(()) } @@ -2545,6 +2554,15 @@ async fn seek_to(editor_instance: WindowEditorInstance, frame_number: u32) -> Re }) .await; + let playback_handle = { + let state = editor_instance.state.lock().await; + state.playback_task.clone() + }; + + if let Some(handle) = playback_handle { + handle.seek(frame_number); + } + Ok(()) } diff --git a/apps/desktop/src/routes/editor/Timeline/index.tsx b/apps/desktop/src/routes/editor/Timeline/index.tsx index 8aacea955f..da839931ad 100644 --- a/apps/desktop/src/routes/editor/Timeline/index.tsx +++ b/apps/desktop/src/routes/editor/Timeline/index.tsx @@ -89,7 +89,6 @@ export function Timeline() { editorState, projectActions, meta, - previewResolutionBase, } = useEditorContext(); const duration = () => editorInstance.recordingDuration; @@ -274,32 +273,16 @@ export function Timeline() { maskSegmentDragState.type !== "moving" && textSegmentDragState.type !== "moving" ) { - // Guard against missing bounds and clamp computed time to [0, totalDuration()] if (left == null) return; const rawTime = secsPerPixel() * (e.clientX - left) + transform().position; const newTime = Math.min(Math.max(0, rawTime), totalDuration()); + const targetFrame = Math.round(newTime * FPS); - // If playing, some backends require restart to seek reliably - if (editorState.playing) { - try { - await commands.stopPlayback(); - - // Round to nearest frame to prevent off-by-one drift - const targetFrame = Math.round(newTime * FPS); - await commands.seekTo(targetFrame); - - // If the user paused during these async ops, bail out without restarting - if (!editorState.playing) { - setEditorState("playbackTime", newTime); - return; - } - - await commands.startPlayback(FPS, previewResolutionBase()); - setEditorState("playing", true); - } catch (err) { - console.error("Failed to seek during playback:", err); - } + try { + await commands.seekTo(targetFrame); + } catch (err) { + console.error("Failed to seek timeline playhead:", err); } setEditorState("playbackTime", newTime); diff --git a/crates/editor/src/audio.rs b/crates/editor/src/audio.rs index d2cdbf8df4..7051ce5549 100644 --- a/crates/editor/src/audio.rs +++ b/crates/editor/src/audio.rs @@ -7,7 +7,6 @@ use cap_project::{AudioConfiguration, ClipOffsets, ProjectConfiguration, Timelin use ffmpeg::{ ChannelLayout, Dictionary, format as avformat, frame::Audio as FFAudio, software::resampling, }; -#[cfg(not(target_os = "windows"))] use ringbuf::{ HeapRb, traits::{Consumer, Observer, Producer}, @@ -248,14 +247,12 @@ impl AudioRenderer { } } -#[cfg(not(target_os = "windows"))] pub struct AudioPlaybackBuffer { frame_buffer: AudioRenderer, resampler: AudioResampler, resampled_buffer: HeapRb, } -#[cfg(not(target_os = "windows"))] impl AudioPlaybackBuffer { pub const PLAYBACK_SAMPLES_COUNT: u32 = 512; @@ -296,6 +293,19 @@ impl AudioPlaybackBuffer { self.frame_buffer.set_playhead(playhead, project); } + #[cfg(target_os = "windows")] + pub fn set_playhead_smooth(&mut self, playhead: f64, project: &ProjectConfiguration) { + let current_playhead = self.frame_buffer.elapsed_samples_to_playhead(); + let drift = (playhead - current_playhead).abs(); + + if drift > 0.2 { + self.set_playhead(playhead, project); + return; + } + + self.frame_buffer.set_playhead(playhead, project); + } + #[allow(dead_code)] pub fn current_playhead(&self) -> f64 { self.frame_buffer.elapsed_samples_to_playhead() @@ -426,7 +436,6 @@ impl AudioResampler { }) } - #[cfg(not(target_os = "windows"))] pub fn reset(&mut self) { *self = Self::new(self.output).unwrap(); } diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 000f209c6b..3672590369 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -1,6 +1,6 @@ -use cap_audio::FromSampleBytes; -#[cfg(not(target_os = "windows"))] -use cap_audio::{LatencyCorrectionConfig, LatencyCorrector, default_output_latency_hint}; +use cap_audio::{ + FromSampleBytes, LatencyCorrectionConfig, LatencyCorrector, default_output_latency_hint, +}; use cap_media::MediaError; use cap_media_info::AudioInfo; use cap_project::{ProjectConfiguration, XY}; @@ -8,7 +8,6 @@ use cap_rendering::{ DecodedSegmentFrames, ProjectUniforms, RenderVideoConstants, ZoomFocusInterpolator, spring_mass_damper::SpringMassDamperSimulationConfig, }; -#[cfg(not(target_os = "windows"))] use cpal::{BufferSize, SupportedBufferSize}; use cpal::{ SampleFormat, @@ -28,7 +27,6 @@ use tokio::{ }; use tracing::{error, info, warn}; -#[cfg(not(target_os = "windows"))] use crate::audio::AudioPlaybackBuffer; use crate::{ audio::AudioSegment, editor, editor_instance::SegmentMedia, segments::get_audio_segments, @@ -64,6 +62,7 @@ pub enum PlaybackEvent { pub struct PlaybackHandle { stop_tx: watch::Sender, event_rx: watch::Receiver, + seek_tx: tokio_mpsc::UnboundedSender, } struct PrefetchedFrame { @@ -118,10 +117,12 @@ impl Playback { let (event_tx, mut event_rx) = watch::channel(PlaybackEvent::Start); event_rx.borrow_and_update(); + let (seek_tx, mut seek_rx) = tokio_mpsc::unbounded_channel::(); let handle = PlaybackHandle { stop_tx: stop_tx.clone(), event_rx, + seek_tx, }; let (prefetch_tx, mut prefetch_rx) = @@ -437,10 +438,33 @@ impl Playback { .make_contiguous() .sort_by_key(|p| p.frame_number); - let start = Instant::now(); + let mut playback_anchor_start = Instant::now(); + let mut playback_anchor_frame = frame_number; let mut cached_project = self.project.borrow().clone(); 'playback: loop { + let mut pending_seek = None; + while let Ok(next_seek_frame) = seek_rx.try_recv() { + pending_seek = Some(next_seek_frame); + } + + if let Some(seek_frame) = pending_seek { + frame_number = seek_frame; + playback_anchor_start = Instant::now(); + playback_anchor_frame = seek_frame; + prefetch_buffer.retain(|p| p.frame_number >= frame_number); + frame_cache.cache.clear(); + let _ = frame_request_tx.send(frame_number); + let _ = playback_position_tx.send(frame_number); + if has_audio + && audio_playhead_tx + .send(frame_number as f64 / fps_f64) + .is_err() + { + break 'playback; + } + } + if self.project.has_changed().unwrap_or(false) { cached_project = self.project.borrow_and_update().clone(); } @@ -465,11 +489,28 @@ impl Playback { } } - let frame_offset = frame_number.saturating_sub(self.start_frame_number) as f64; - let next_deadline = start + frame_duration.mul_f64(frame_offset); + let frame_offset = frame_number.saturating_sub(playback_anchor_frame) as f64; + let next_deadline = playback_anchor_start + frame_duration.mul_f64(frame_offset); tokio::select! { _ = stop_rx.changed() => break 'playback, + Some(seek_frame) = seek_rx.recv() => { + frame_number = seek_frame; + playback_anchor_start = Instant::now(); + playback_anchor_frame = seek_frame; + prefetch_buffer.retain(|p| p.frame_number >= frame_number); + frame_cache.cache.clear(); + let _ = frame_request_tx.send(frame_number); + let _ = playback_position_tx.send(frame_number); + if has_audio + && audio_playhead_tx + .send(frame_number as f64 / fps_f64) + .is_err() + { + break 'playback; + } + continue; + } _ = tokio::time::sleep_until(next_deadline) => {} } @@ -699,8 +740,8 @@ impl Playback { break 'playback; } - let expected_frame = self.start_frame_number - + (start.elapsed().as_secs_f64() * fps_f64).floor() as u32; + let expected_frame = playback_anchor_frame + + (playback_anchor_start.elapsed().as_secs_f64() * fps_f64).floor() as u32; if frame_number < expected_frame { let frames_behind = expected_frame - frame_number; @@ -742,6 +783,10 @@ impl PlaybackHandle { self.stop_tx.send(true).ok(); } + pub fn seek(&self, frame_number: u32) { + let _ = self.seek_tx.send(frame_number); + } + pub async fn receive_event(&mut self) -> watch::Ref<'_, PlaybackEvent> { self.event_rx.changed().await.ok(); self.event_rx.borrow_and_update() @@ -759,6 +804,12 @@ struct AudioPlayback { } impl AudioPlayback { + fn use_prerendered_audio() -> bool { + std::env::var("CAP_AUDIO_PRERENDER_PLAYBACK") + .map(|value| value == "1" || value.eq_ignore_ascii_case("true")) + .unwrap_or(false) + } + fn spawn(self) -> bool { let handle = tokio::runtime::Handle::current(); @@ -787,26 +838,80 @@ impl AudioPlayback { } }; + let use_prerendered_audio = Self::use_prerendered_audio(); let duration_secs = self.duration_secs; + if use_prerendered_audio { + info!("Using pre-rendered audio playback mode"); + } else { + info!("Using low-latency streaming audio playback mode"); + } let result = match supported_config.sample_format() { SampleFormat::I16 => { - self.create_stream_prerendered::(device, supported_config, duration_secs) + if use_prerendered_audio { + self.create_stream_prerendered::( + device, + supported_config, + duration_secs, + ) + } else { + self.create_stream::(device, supported_config) + } } SampleFormat::I32 => { - self.create_stream_prerendered::(device, supported_config, duration_secs) + if use_prerendered_audio { + self.create_stream_prerendered::( + device, + supported_config, + duration_secs, + ) + } else { + self.create_stream::(device, supported_config) + } } SampleFormat::F32 => { - self.create_stream_prerendered::(device, supported_config, duration_secs) + if use_prerendered_audio { + self.create_stream_prerendered::( + device, + supported_config, + duration_secs, + ) + } else { + self.create_stream::(device, supported_config) + } } SampleFormat::I64 => { - self.create_stream_prerendered::(device, supported_config, duration_secs) + if use_prerendered_audio { + self.create_stream_prerendered::( + device, + supported_config, + duration_secs, + ) + } else { + self.create_stream::(device, supported_config) + } } SampleFormat::U8 => { - self.create_stream_prerendered::(device, supported_config, duration_secs) + if use_prerendered_audio { + self.create_stream_prerendered::( + device, + supported_config, + duration_secs, + ) + } else { + self.create_stream::(device, supported_config) + } } SampleFormat::F64 => { - self.create_stream_prerendered::(device, supported_config, duration_secs) + if use_prerendered_audio { + self.create_stream_prerendered::( + device, + supported_config, + duration_secs, + ) + } else { + self.create_stream::(device, supported_config) + } } format => { error!( @@ -843,7 +948,6 @@ impl AudioPlayback { true } - #[cfg(not(target_os = "windows"))] #[allow(dead_code)] fn create_stream( self, From a47c0296cd724b9750be81826a5eaed4d772904e Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 20:22:14 +0000 Subject: [PATCH 003/333] improve: lazy-initialize avassetreader pool and refresh findings Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 71 +++++++++++++++++-- crates/rendering/src/decoder/avassetreader.rs | 63 +++++++++++++++- 2 files changed, 127 insertions(+), 7 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index d30b940f21..e97796063a 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -35,7 +35,7 @@ ## Current Status -**Last Updated**: 2026-01-30 +**Last Updated**: 2026-02-13 ### Performance Summary @@ -60,10 +60,12 @@ - ✅ Multi-position decoder pool for smooth scrubbing - ✅ Mic audio sync within tolerance - ✅ Camera-display sync perfect (0ms drift) +- ✅ Editor playback now keeps a live seek channel during playback instead of stop/start restart loops +- ✅ Audio playback defaults to low-latency streaming buffer path with bounded prefill ### Known Issues (Lower Priority) 1. **System audio timing**: ~162ms difference inherited from recording-side timing issue -2. **Display decoder init time**: 337ms due to multi-position pool (creates 3 decoders) +2. **Display decoder init time**: baseline was 337ms from eager multi-decoder setup; now reduced by lazy decoder warmup but needs benchmark confirmation --- @@ -73,12 +75,17 @@ *(Update this section as you work)* - [ ] **Test fragmented mode** - Run playback tests on fragmented recordings -- [ ] **Investigate display decoder init time** - 337ms may be optimizable +- [ ] **Collect cross-platform benchmark evidence** - macOS 13+ and Windows GPU matrix for FPS, scrub settle, audio start latency, and A/V drift +- [ ] **Validate lazy decoder warmup impact** - measure display decoder init and scrub settle before/after on real recordings +- [ ] **Validate streaming audio startup/sync** - benchmark low-latency path vs legacy pre-render path across long timelines ### Completed - [x] **Run initial baseline** - Established current playback performance metrics (2026-01-28) - [x] **Profile decoder init time** - Hardware acceleration confirmed (AVAssetReader) (2026-01-28) - [x] **Identify latency hotspots** - No issues found, p95=3.1ms (2026-01-28) +- [x] **Remove seek restart churn in timeline path** - in-playback seeks now route through live playback handle (2026-02-13) +- [x] **Switch default audio mode to low-latency streaming** - full prerender now opt-in by env flag (2026-02-13) +- [x] **Reduce eager AVAssetReader decoder warmup** - pool now initializes lazily beyond first warm decoders (2026-02-13) --- @@ -127,13 +134,36 @@ cargo run -p cap-recording --example playback-test-runner -- full ## Completed Fixes -*(Document fixes here as they are implemented)* +1. **Low-latency audio startup enabled by default (2026-02-13)** + - `AudioPlayback::spawn()` now selects streaming `create_stream()` path by default. + - Legacy full-timeline prerender path is still available via `CAP_AUDIO_PRERENDER_PLAYBACK=1`. + - `AudioPlaybackBuffer` is available on all platforms so Windows can use streaming sync logic. + +2. **In-playback seek path without stop/start (2026-02-13)** + - Added seek channel to `PlaybackHandle` and playback loop. + - `seek_to` and `set_playhead_position` commands now forward seek requests to active playback. + - Timeline seek no longer tears down and recreates playback while playing. + +3. **Lazy decoder pool warmup on macOS AVAssetReader (2026-02-13)** + - Initial warmup now creates only a small subset of decoder instances. + - Additional decoder instances are initialized lazily when scrub patterns request them. + - Failed lazy init falls back safely to currently available decoders. --- ## Root Cause Analysis Archive -*(Document investigated issues here)* +1. **Audio start delay from full-track prerender** + - Root cause: playback startup used `create_stream_prerendered()` for all sample formats, forcing full timeline audio render before output stream started. + - Fix direction: switch default to incremental `AudioPlaybackBuffer` path with bounded prefill and live playhead correction. + +2. **Scrub lag from playback restart loop** + - Root cause: timeline seek while playing called stop → seek → start, rebuilding playback/audio state on every interactive seek. + - Fix direction: add live seek channel into running playback loop and route frontend seeks to it. + +3. **Display decoder init inflation on macOS** + - Root cause: AVAssetReader decoder pool eagerly initialized multiple decoders during startup. + - Fix direction: reduce eager warmup and lazily instantiate additional pool decoders when scrub behavior actually needs them. --- @@ -199,6 +229,37 @@ Decoder Pipeline: --- +### Session 2026-02-13 (Audio Startup + Live Seek + Lazy Decoder Warmup) + +**Goal**: Remove major editor playback bottlenecks affecting startup latency, scrub responsiveness, and decoder init overhead. + +**What was done**: +1. Switched playback audio startup default to streaming buffer path. +2. Kept prerender audio path behind `CAP_AUDIO_PRERENDER_PLAYBACK` as explicit fallback. +3. Enabled `AudioPlaybackBuffer` for all platforms so Windows uses live buffering/sync path. +4. Added a seek channel to `PlaybackHandle` and integrated seek handling into the main playback loop. +5. Updated Tauri seek/playhead commands to forward seeks into active playback handle. +6. Removed frontend timeline stop/start cycle when seeking while playing. +7. Reduced AVAssetReader eager pool warmup and added lazy decoder instantiation for additional pool slots. + +**Changes Made**: +- `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. +- `crates/editor/src/audio.rs`: cross-platform `AudioPlaybackBuffer`, windows-only smooth seek helper. +- `apps/desktop/src-tauri/src/lib.rs`: forward `seek_to` and `set_playhead_position` into active playback handle. +- `apps/desktop/src/routes/editor/Timeline/index.tsx`: seek while playing now sends direct `seekTo` without playback restart. +- `crates/rendering/src/decoder/avassetreader.rs`: lower eager decoder warmup and lazy pool growth. + +**Results**: +- ✅ `cargo +stable check -p cap-editor` passes after changes. +- ✅ `cargo +stable check -p cap-rendering` passes after changes. +- ✅ `pnpm --dir apps/desktop exec tsc --noEmit` passes after frontend seek changes. +- ⚠️ `cargo +stable check -p cap-desktop` and `cargo +stable run -p cap-recording --example playback-test-runner -- list` fail in this Linux environment because `scap-targets` does not currently compile on this target (`DisplayIdImpl`/`WindowImpl` unresolved), preventing local benchmark execution here. +- ⚠️ Cross-platform FPS/scrub/A-V benchmark evidence still pending on macOS and Windows devices with real recordings. + +**Stopping point**: Core playback code-path optimizations are implemented and compiling in touched crates; next step is benchmark execution on macOS 13+ and Windows GPU matrix to quantify gains. + +--- + ### Session 2026-01-28 (Initial Baseline - MP4) **Goal**: Establish initial playback performance baseline diff --git a/crates/rendering/src/decoder/avassetreader.rs b/crates/rendering/src/decoder/avassetreader.rs index 73f9c62a4a..62582efe67 100644 --- a/crates/rendering/src/decoder/avassetreader.rs +++ b/crates/rendering/src/decoder/avassetreader.rs @@ -417,6 +417,8 @@ pub struct AVAssetReaderDecoder { } impl AVAssetReaderDecoder { + const INITIAL_WARM_DECODER_COUNT: usize = 2; + fn new(path: PathBuf, tokio_handle: TokioHandle) -> Result { let keyframe_index = cap_video_decode::avassetreader::KeyframeIndex::build(&path).ok(); let fps = keyframe_index @@ -449,7 +451,10 @@ impl AVAssetReaderDecoder { let mut decoders = vec![primary_instance]; let initial_positions = pool_manager.positions(); - for pos in initial_positions.iter().skip(1) { + let warm_decoder_count = Self::INITIAL_WARM_DECODER_COUNT + .max(1) + .min(initial_positions.len()); + for pos in initial_positions.iter().take(warm_decoder_count).skip(1) { let start_time = pos.position_secs; match DecoderInstance::new( path.clone(), @@ -496,11 +501,65 @@ impl AVAssetReaderDecoder { }) } + fn ensure_decoder_available(&mut self, decoder_id: usize) -> usize { + if decoder_id < self.decoders.len() { + return decoder_id; + } + + let Some(template) = self.decoders.first() else { + return 0; + }; + let template_path = template.path.clone(); + let template_tokio_handle = template.tokio_handle.clone(); + let template_keyframe_index = template.keyframe_index.clone(); + + while self.decoders.len() <= decoder_id { + let next_id = self.decoders.len(); + let Some(position) = self + .pool_manager + .positions() + .iter() + .find(|p| p.id == next_id) + .map(|p| p.position_secs) + else { + break; + }; + + match DecoderInstance::new( + template_path.clone(), + template_tokio_handle.clone(), + position, + template_keyframe_index.clone(), + ) { + Ok(instance) => { + self.decoders.push(instance); + tracing::info!( + decoder_id = next_id, + position_secs = position, + total_decoders = self.decoders.len(), + "Lazily initialized decoder instance" + ); + } + Err(e) => { + tracing::warn!( + decoder_id = next_id, + position_secs = position, + error = %e, + "Failed to lazily initialize decoder instance" + ); + break; + } + } + } + + decoder_id.min(self.decoders.len().saturating_sub(1)) + } + fn select_best_decoder(&mut self, requested_time: f32) -> (usize, bool) { let (best_id, _distance, needs_reset) = self.pool_manager.find_best_decoder_for_time(requested_time); - let decoder_idx = best_id.min(self.decoders.len().saturating_sub(1)); + let decoder_idx = self.ensure_decoder_available(best_id); if needs_reset && decoder_idx < self.decoders.len() { self.decoders[decoder_idx].reset(requested_time); From bc4a17b82794c8c8aff0990727dd34f82f5cf01c Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 20:22:14 +0000 Subject: [PATCH 004/333] improve: lazy-initialize avassetreader pool and refresh findings Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 71 +++++++++++++++++-- crates/rendering/src/decoder/avassetreader.rs | 63 +++++++++++++++- 2 files changed, 127 insertions(+), 7 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index d30b940f21..e97796063a 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -35,7 +35,7 @@ ## Current Status -**Last Updated**: 2026-01-30 +**Last Updated**: 2026-02-13 ### Performance Summary @@ -60,10 +60,12 @@ - ✅ Multi-position decoder pool for smooth scrubbing - ✅ Mic audio sync within tolerance - ✅ Camera-display sync perfect (0ms drift) +- ✅ Editor playback now keeps a live seek channel during playback instead of stop/start restart loops +- ✅ Audio playback defaults to low-latency streaming buffer path with bounded prefill ### Known Issues (Lower Priority) 1. **System audio timing**: ~162ms difference inherited from recording-side timing issue -2. **Display decoder init time**: 337ms due to multi-position pool (creates 3 decoders) +2. **Display decoder init time**: baseline was 337ms from eager multi-decoder setup; now reduced by lazy decoder warmup but needs benchmark confirmation --- @@ -73,12 +75,17 @@ *(Update this section as you work)* - [ ] **Test fragmented mode** - Run playback tests on fragmented recordings -- [ ] **Investigate display decoder init time** - 337ms may be optimizable +- [ ] **Collect cross-platform benchmark evidence** - macOS 13+ and Windows GPU matrix for FPS, scrub settle, audio start latency, and A/V drift +- [ ] **Validate lazy decoder warmup impact** - measure display decoder init and scrub settle before/after on real recordings +- [ ] **Validate streaming audio startup/sync** - benchmark low-latency path vs legacy pre-render path across long timelines ### Completed - [x] **Run initial baseline** - Established current playback performance metrics (2026-01-28) - [x] **Profile decoder init time** - Hardware acceleration confirmed (AVAssetReader) (2026-01-28) - [x] **Identify latency hotspots** - No issues found, p95=3.1ms (2026-01-28) +- [x] **Remove seek restart churn in timeline path** - in-playback seeks now route through live playback handle (2026-02-13) +- [x] **Switch default audio mode to low-latency streaming** - full prerender now opt-in by env flag (2026-02-13) +- [x] **Reduce eager AVAssetReader decoder warmup** - pool now initializes lazily beyond first warm decoders (2026-02-13) --- @@ -127,13 +134,36 @@ cargo run -p cap-recording --example playback-test-runner -- full ## Completed Fixes -*(Document fixes here as they are implemented)* +1. **Low-latency audio startup enabled by default (2026-02-13)** + - `AudioPlayback::spawn()` now selects streaming `create_stream()` path by default. + - Legacy full-timeline prerender path is still available via `CAP_AUDIO_PRERENDER_PLAYBACK=1`. + - `AudioPlaybackBuffer` is available on all platforms so Windows can use streaming sync logic. + +2. **In-playback seek path without stop/start (2026-02-13)** + - Added seek channel to `PlaybackHandle` and playback loop. + - `seek_to` and `set_playhead_position` commands now forward seek requests to active playback. + - Timeline seek no longer tears down and recreates playback while playing. + +3. **Lazy decoder pool warmup on macOS AVAssetReader (2026-02-13)** + - Initial warmup now creates only a small subset of decoder instances. + - Additional decoder instances are initialized lazily when scrub patterns request them. + - Failed lazy init falls back safely to currently available decoders. --- ## Root Cause Analysis Archive -*(Document investigated issues here)* +1. **Audio start delay from full-track prerender** + - Root cause: playback startup used `create_stream_prerendered()` for all sample formats, forcing full timeline audio render before output stream started. + - Fix direction: switch default to incremental `AudioPlaybackBuffer` path with bounded prefill and live playhead correction. + +2. **Scrub lag from playback restart loop** + - Root cause: timeline seek while playing called stop → seek → start, rebuilding playback/audio state on every interactive seek. + - Fix direction: add live seek channel into running playback loop and route frontend seeks to it. + +3. **Display decoder init inflation on macOS** + - Root cause: AVAssetReader decoder pool eagerly initialized multiple decoders during startup. + - Fix direction: reduce eager warmup and lazily instantiate additional pool decoders when scrub behavior actually needs them. --- @@ -199,6 +229,37 @@ Decoder Pipeline: --- +### Session 2026-02-13 (Audio Startup + Live Seek + Lazy Decoder Warmup) + +**Goal**: Remove major editor playback bottlenecks affecting startup latency, scrub responsiveness, and decoder init overhead. + +**What was done**: +1. Switched playback audio startup default to streaming buffer path. +2. Kept prerender audio path behind `CAP_AUDIO_PRERENDER_PLAYBACK` as explicit fallback. +3. Enabled `AudioPlaybackBuffer` for all platforms so Windows uses live buffering/sync path. +4. Added a seek channel to `PlaybackHandle` and integrated seek handling into the main playback loop. +5. Updated Tauri seek/playhead commands to forward seeks into active playback handle. +6. Removed frontend timeline stop/start cycle when seeking while playing. +7. Reduced AVAssetReader eager pool warmup and added lazy decoder instantiation for additional pool slots. + +**Changes Made**: +- `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. +- `crates/editor/src/audio.rs`: cross-platform `AudioPlaybackBuffer`, windows-only smooth seek helper. +- `apps/desktop/src-tauri/src/lib.rs`: forward `seek_to` and `set_playhead_position` into active playback handle. +- `apps/desktop/src/routes/editor/Timeline/index.tsx`: seek while playing now sends direct `seekTo` without playback restart. +- `crates/rendering/src/decoder/avassetreader.rs`: lower eager decoder warmup and lazy pool growth. + +**Results**: +- ✅ `cargo +stable check -p cap-editor` passes after changes. +- ✅ `cargo +stable check -p cap-rendering` passes after changes. +- ✅ `pnpm --dir apps/desktop exec tsc --noEmit` passes after frontend seek changes. +- ⚠️ `cargo +stable check -p cap-desktop` and `cargo +stable run -p cap-recording --example playback-test-runner -- list` fail in this Linux environment because `scap-targets` does not currently compile on this target (`DisplayIdImpl`/`WindowImpl` unresolved), preventing local benchmark execution here. +- ⚠️ Cross-platform FPS/scrub/A-V benchmark evidence still pending on macOS and Windows devices with real recordings. + +**Stopping point**: Core playback code-path optimizations are implemented and compiling in touched crates; next step is benchmark execution on macOS 13+ and Windows GPU matrix to quantify gains. + +--- + ### Session 2026-01-28 (Initial Baseline - MP4) **Goal**: Establish initial playback performance baseline diff --git a/crates/rendering/src/decoder/avassetreader.rs b/crates/rendering/src/decoder/avassetreader.rs index 73f9c62a4a..62582efe67 100644 --- a/crates/rendering/src/decoder/avassetreader.rs +++ b/crates/rendering/src/decoder/avassetreader.rs @@ -417,6 +417,8 @@ pub struct AVAssetReaderDecoder { } impl AVAssetReaderDecoder { + const INITIAL_WARM_DECODER_COUNT: usize = 2; + fn new(path: PathBuf, tokio_handle: TokioHandle) -> Result { let keyframe_index = cap_video_decode::avassetreader::KeyframeIndex::build(&path).ok(); let fps = keyframe_index @@ -449,7 +451,10 @@ impl AVAssetReaderDecoder { let mut decoders = vec![primary_instance]; let initial_positions = pool_manager.positions(); - for pos in initial_positions.iter().skip(1) { + let warm_decoder_count = Self::INITIAL_WARM_DECODER_COUNT + .max(1) + .min(initial_positions.len()); + for pos in initial_positions.iter().take(warm_decoder_count).skip(1) { let start_time = pos.position_secs; match DecoderInstance::new( path.clone(), @@ -496,11 +501,65 @@ impl AVAssetReaderDecoder { }) } + fn ensure_decoder_available(&mut self, decoder_id: usize) -> usize { + if decoder_id < self.decoders.len() { + return decoder_id; + } + + let Some(template) = self.decoders.first() else { + return 0; + }; + let template_path = template.path.clone(); + let template_tokio_handle = template.tokio_handle.clone(); + let template_keyframe_index = template.keyframe_index.clone(); + + while self.decoders.len() <= decoder_id { + let next_id = self.decoders.len(); + let Some(position) = self + .pool_manager + .positions() + .iter() + .find(|p| p.id == next_id) + .map(|p| p.position_secs) + else { + break; + }; + + match DecoderInstance::new( + template_path.clone(), + template_tokio_handle.clone(), + position, + template_keyframe_index.clone(), + ) { + Ok(instance) => { + self.decoders.push(instance); + tracing::info!( + decoder_id = next_id, + position_secs = position, + total_decoders = self.decoders.len(), + "Lazily initialized decoder instance" + ); + } + Err(e) => { + tracing::warn!( + decoder_id = next_id, + position_secs = position, + error = %e, + "Failed to lazily initialize decoder instance" + ); + break; + } + } + } + + decoder_id.min(self.decoders.len().saturating_sub(1)) + } + fn select_best_decoder(&mut self, requested_time: f32) -> (usize, bool) { let (best_id, _distance, needs_reset) = self.pool_manager.find_best_decoder_for_time(requested_time); - let decoder_idx = best_id.min(self.decoders.len().saturating_sub(1)); + let decoder_idx = self.ensure_decoder_available(best_id); if needs_reset && decoder_idx < self.decoders.len() { self.decoders[decoder_idx].reset(requested_time); From 9ed478b116a2f02c767ac7e7479321e7b57911e5 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 20:26:12 +0000 Subject: [PATCH 005/333] improve: add scrub and startup metrics to playback benchmark Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 16 ++ crates/editor/PLAYBACK-FINDINGS.md | 9 + .../examples/playback-test-runner.rs | 217 +++++++++++++++++- 3 files changed, 235 insertions(+), 7 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index a2253d580f..dd2f3fe853 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -12,6 +12,7 @@ This document tracks performance benchmarks for Cap's playback and decoding syst | Decode Latency (p95) | <50ms | - | | Effective FPS | ≥30 fps | ±2 fps | | Decode Jitter | <10ms | - | +| Scrub Seek Latency (p95) | <40ms | - | | A/V Sync (mic↔video) | <100ms | - | | A/V Sync (system↔video) | <100ms | - | | Camera-Display Drift | <100ms | - | @@ -20,6 +21,7 @@ This document tracks performance benchmarks for Cap's playback and decoding syst - **Decoder Tests**: Init time, hardware acceleration detection, fallback handling - **Playback Tests**: Sequential decode, frame retrieval, latency percentiles +- **Scrub Tests**: Random access seek decode latency and seek failure rate - **Audio Sync Tests**: Mic-video sync, system audio-video sync - **Camera Sync Tests**: Camera-display drift, frame count alignment - **Decode Benchmark**: Creation, sequential, seek, and random access performance @@ -44,6 +46,7 @@ cargo run -p cap-recording --example playback-test-runner -- full # Run specific test categories cargo run -p cap-recording --example playback-test-runner -- decoder cargo run -p cap-recording --example playback-test-runner -- playback +cargo run -p cap-recording --example playback-test-runner -- scrub cargo run -p cap-recording --example playback-test-runner -- audio-sync cargo run -p cap-recording --example playback-test-runner -- camera-sync @@ -106,6 +109,19 @@ cargo run -p cap-recording --example playback-test-runner -- full | **P50/P95/P99** | Latency percentiles | Sorted distribution | | **Effective FPS** | Actual decode throughput | frames / elapsed_time | | **Jitter** | Decode time variance (std dev) | sqrt(variance) | +| **First Decode** | Decode latency for first successful frame | elapsed from first frame request | +| **Startup to First** | Time from playback test start to first decoded frame | elapsed since playback test start | + +### Scrub Metrics + +| Metric | Description | How Measured | +|--------|-------------|--------------| +| **Seek Operations** | Total random seek attempts | Fixed operation count per segment | +| **Successful Seeks** | Seeks returning a decoded frame | Count of non-None seek decodes | +| **Failed Seeks** | Seeks returning no frame | Count of None seek decodes | +| **Avg Seek Time** | Mean random seek decode latency | Avg of seek decode times | +| **P50/P95/P99 Seek** | Seek latency percentiles | Sorted seek time distribution | +| **Max Seek Time** | Worst seek decode latency | Max of seek decode times | ### Audio Sync Metrics diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index e97796063a..0b693b65f9 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -98,6 +98,7 @@ cargo run -p cap-recording --example playback-test-runner -- full # Test specific categories cargo run -p cap-recording --example playback-test-runner -- decoder cargo run -p cap-recording --example playback-test-runner -- playback +cargo run -p cap-recording --example playback-test-runner -- scrub cargo run -p cap-recording --example playback-test-runner -- audio-sync cargo run -p cap-recording --example playback-test-runner -- camera-sync @@ -149,6 +150,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Additional decoder instances are initialized lazily when scrub patterns request them. - Failed lazy init falls back safely to currently available decoders. +4. **Playback benchmark runner now captures scrub and startup metrics (2026-02-13)** + - Added `scrub` benchmark mode to `playback-test-runner`. + - Playback result now includes first-frame decode and startup-to-first-frame latency. + - Scrub result now reports seek p50/p95/p99 and seek failure counts. + --- ## Root Cause Analysis Archive @@ -241,6 +247,7 @@ Decoder Pipeline: 5. Updated Tauri seek/playhead commands to forward seeks into active playback handle. 6. Removed frontend timeline stop/start cycle when seeking while playing. 7. Reduced AVAssetReader eager pool warmup and added lazy decoder instantiation for additional pool slots. +8. Extended playback benchmark tooling with scrub mode and startup latency metrics. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -248,6 +255,8 @@ Decoder Pipeline: - `apps/desktop/src-tauri/src/lib.rs`: forward `seek_to` and `set_playhead_position` into active playback handle. - `apps/desktop/src/routes/editor/Timeline/index.tsx`: seek while playing now sends direct `seekTo` without playback restart. - `crates/rendering/src/decoder/avassetreader.rs`: lower eager decoder warmup and lazy pool growth. +- `crates/recording/examples/playback-test-runner.rs`: added scrub command and startup/scrub latency metrics. +- `crates/editor/PLAYBACK-BENCHMARKS.md`: updated benchmark reference and metric definitions. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/recording/examples/playback-test-runner.rs b/crates/recording/examples/playback-test-runner.rs index 437b3844a3..9c7bff6110 100644 --- a/crates/recording/examples/playback-test-runner.rs +++ b/crates/recording/examples/playback-test-runner.rs @@ -53,6 +53,7 @@ enum Commands { Full, Decoder, Playback, + Scrub, AudioSync, CameraSync, List, @@ -60,6 +61,7 @@ enum Commands { const FPS_TOLERANCE: f64 = 2.0; const DECODE_LATENCY_WARNING_MS: f64 = 50.0; +const SCRUB_SEEK_WARNING_MS: f64 = 40.0; const AUDIO_VIDEO_SYNC_TOLERANCE_MS: f64 = 100.0; const CAMERA_SYNC_TOLERANCE_MS: f64 = 100.0; @@ -82,6 +84,8 @@ struct PlaybackTestResult { total_frames: usize, decoded_frames: usize, failed_frames: usize, + first_frame_decode_time_ms: f64, + startup_to_first_frame_ms: f64, avg_decode_time_ms: f64, min_decode_time_ms: f64, max_decode_time_ms: f64, @@ -96,6 +100,22 @@ struct PlaybackTestResult { errors: Vec, } +#[derive(Debug, Clone, Default)] +struct ScrubTestResult { + passed: bool, + segment_index: usize, + seek_operations: usize, + successful_seeks: usize, + failed_seeks: usize, + avg_seek_time_ms: f64, + p50_seek_time_ms: f64, + p95_seek_time_ms: f64, + p99_seek_time_ms: f64, + max_seek_time_ms: f64, + seek_latency_ok: bool, + errors: Vec, +} + #[derive(Debug, Clone, Default)] struct AudioSyncTestResult { passed: bool, @@ -141,6 +161,7 @@ struct RecordingTestReport { has_system_audio: bool, decoder_results: Vec, playback_results: Vec, + scrub_results: Vec, audio_sync_results: Vec, camera_sync_results: Vec, overall_passed: bool, @@ -208,6 +229,10 @@ impl RecordingTestReport { result.p95_decode_time_ms, result.p99_decode_time_ms ); + println!( + " Startup: first_decode={:.1}ms startup_to_first={:.1}ms", + result.first_frame_decode_time_ms, result.startup_to_first_frame_ms + ); if !result.fps_ok { println!(" WARN: FPS outside tolerance!"); } @@ -219,6 +244,28 @@ impl RecordingTestReport { } } + if !self.scrub_results.is_empty() { + println!("\n SCRUB TESTS:"); + for result in &self.scrub_results { + let status = if result.passed { "OK" } else { "FAIL" }; + println!( + " Segment {}: [{}] seeks={}/{} avg={:.1}ms p95={:.1}ms", + result.segment_index, + status, + result.successful_seeks, + result.seek_operations, + result.avg_seek_time_ms, + result.p95_seek_time_ms + ); + if !result.seek_latency_ok { + println!(" WARN: Scrub seek latency exceeds {SCRUB_SEEK_WARNING_MS}ms!"); + } + for err in &result.errors { + println!(" ERROR: {err}"); + } + } + } + if !self.audio_sync_results.is_empty() { println!("\n AUDIO SYNC TESTS:"); for result in &self.audio_sync_results { @@ -341,6 +388,7 @@ async fn test_playback( fps: u32, verbose: bool, ) -> PlaybackTestResult { + let playback_start = Instant::now(); let mut result = PlaybackTestResult { segment_index, expected_fps: fps as f64, @@ -384,6 +432,11 @@ async fn test_playback( let decode_time_ms = start.elapsed().as_secs_f64() * 1000.0; decode_times.push(decode_time_ms); decoded_count += 1; + if decoded_count == 1 { + result.first_frame_decode_time_ms = decode_time_ms; + result.startup_to_first_frame_ms = + playback_start.elapsed().as_secs_f64() * 1000.0; + } if frame.width() == 0 || frame.height() == 0 { result @@ -448,6 +501,96 @@ async fn test_playback( result } +async fn test_scrub( + recording_meta: &RecordingMeta, + meta: &StudioRecordingMeta, + segment_index: usize, + fps: u32, + verbose: bool, +) -> ScrubTestResult { + let mut result = ScrubTestResult { + segment_index, + seek_operations: 120, + ..Default::default() + }; + + let display_path = match meta { + StudioRecordingMeta::SingleSegment { segment } => { + recording_meta.path(&segment.display.path) + } + StudioRecordingMeta::MultipleSegments { inner } => { + recording_meta.path(&inner.segments[segment_index].display.path) + } + }; + + let decoder = match spawn_decoder("display", display_path.clone(), fps, 0.0, false).await { + Ok(d) => d, + Err(e) => { + result.errors.push(format!("Failed to create decoder: {e}")); + return result; + } + }; + + let duration_secs = get_video_duration(&display_path); + let total_frames = (duration_secs * fps as f64).ceil() as usize; + if total_frames < 2 { + result + .errors + .push("Video duration too short for scrub benchmark".to_string()); + return result; + } + + let mut seek_times = Vec::with_capacity(result.seek_operations); + + for operation in 0..result.seek_operations { + let target_frame = ((operation * 7919) % total_frames).max(1); + let target_time = target_frame as f32 / fps as f32; + let seek_start = Instant::now(); + match decoder.get_frame(target_time).await { + Some(_) => { + let seek_time_ms = seek_start.elapsed().as_secs_f64() * 1000.0; + seek_times.push(seek_time_ms); + result.successful_seeks += 1; + if verbose && operation % 20 == 0 { + println!( + " Scrub {} / {}: frame={} time={:.3}s seek={:.1}ms", + operation + 1, + result.seek_operations, + target_frame, + target_time, + seek_time_ms + ); + } + } + None => { + result.failed_seeks += 1; + if verbose { + println!( + " Scrub {} / {}: frame={} FAILED", + operation + 1, + result.seek_operations, + target_frame + ); + } + } + } + } + + if !seek_times.is_empty() { + result.avg_seek_time_ms = seek_times.iter().sum::() / seek_times.len() as f64; + result.p50_seek_time_ms = percentile(&seek_times, 50.0); + result.p95_seek_time_ms = percentile(&seek_times, 95.0); + result.p99_seek_time_ms = percentile(&seek_times, 99.0); + result.max_seek_time_ms = seek_times.iter().copied().fold(f64::NEG_INFINITY, f64::max); + } + + result.seek_latency_ok = result.p95_seek_time_ms <= SCRUB_SEEK_WARNING_MS; + result.passed = + result.seek_latency_ok && result.failed_seeks == 0 && result.successful_seeks > 0; + + result +} + async fn test_audio_sync( recording_meta: &RecordingMeta, meta: &StudioRecordingMeta, @@ -735,6 +878,7 @@ async fn run_tests_on_recording( fps: u32, run_decoder: bool, run_playback: bool, + run_scrub: bool, run_audio_sync: bool, run_camera_sync: bool, verbose: bool, @@ -826,6 +970,15 @@ async fn run_tests_on_recording( report.playback_results.push(playback_result); } + if run_scrub { + if verbose { + println!(" Testing scrub performance for segment {segment_idx}..."); + } + let scrub_result = + test_scrub(&meta, studio_meta.as_ref(), segment_idx, fps, verbose).await; + report.scrub_results.push(scrub_result); + } + if run_audio_sync { if verbose { println!(" Testing audio sync for segment {segment_idx}..."); @@ -848,10 +1001,11 @@ async fn run_tests_on_recording( let decoder_ok = report.decoder_results.iter().all(|r| r.passed); let playback_ok = report.playback_results.iter().all(|r| r.passed); + let scrub_ok = report.scrub_results.iter().all(|r| r.passed); let audio_ok = report.audio_sync_results.iter().all(|r| r.passed); let camera_ok = report.camera_sync_results.iter().all(|r| r.passed); - report.overall_passed = decoder_ok && playback_ok && audio_ok && camera_ok; + report.overall_passed = decoder_ok && playback_ok && scrub_ok && audio_ok && camera_ok; Ok(report) } @@ -906,6 +1060,12 @@ fn get_failure_tags(report: &RecordingTestReport) -> Vec { if report.playback_results.iter().any(|r| !r.decode_latency_ok) { tags.push("LATENCY".to_string()); } + if report.scrub_results.iter().any(|r| !r.seek_latency_ok) { + tags.push("SCRUB_LATENCY".to_string()); + } + if report.scrub_results.iter().any(|r| r.failed_seeks > 0) { + tags.push("SCRUB_ERRORS".to_string()); + } if report.playback_results.iter().any(|r| r.failed_frames > 0) { tags.push("DECODE_ERRORS".to_string()); } @@ -1007,6 +1167,16 @@ fn report_to_markdown(report: &RecordingTestReport) -> String { result.p99_decode_time_ms, result.max_decode_time_ms )); + md.push_str(&format!( + "| ↳ Startup | {} | first_decode={:.1}ms startup_to_first={:.1}ms |\n", + if result.startup_to_first_frame_ms > 0.0 { + "✅" + } else { + "❌" + }, + result.first_frame_decode_time_ms, + result.startup_to_first_frame_ms + )); if result.failed_frames > 0 { md.push_str(&format!( "| ↳ Failed Frames | ⚠️ | {} |\n", @@ -1015,6 +1185,31 @@ fn report_to_markdown(report: &RecordingTestReport) -> String { } } + for result in &report.scrub_results { + md.push_str(&format!( + "| Scrub Seg {} | {} | seeks={}/{} avg={:.1}ms p95={:.1}ms p99={:.1}ms |\n", + result.segment_index, + if result.passed { "✅" } else { "❌" }, + result.successful_seeks, + result.seek_operations, + result.avg_seek_time_ms, + result.p95_seek_time_ms, + result.p99_seek_time_ms + )); + md.push_str(&format!( + "| ↳ Scrub Latency | {} | max={:.1}ms threshold={:.1}ms |\n", + if result.seek_latency_ok { "✅" } else { "❌" }, + result.max_seek_time_ms, + SCRUB_SEEK_WARNING_MS + )); + if result.failed_seeks > 0 { + md.push_str(&format!( + "| ↳ Scrub Failures | ⚠️ | {} |\n", + result.failed_seeks + )); + } + } + for result in &report.audio_sync_results { if result.has_mic_audio { let status = if result.mic_sync_ok { "✅" } else { "❌" }; @@ -1202,6 +1397,7 @@ fn print_summary(reports: &[RecordingTestReport]) { let decoder_failed = report.decoder_results.iter().any(|r| !r.passed); let playback_failed = report.playback_results.iter().any(|r| !r.passed); + let scrub_failed = report.scrub_results.iter().any(|r| !r.passed); let audio_failed = report.audio_sync_results.iter().any(|r| !r.passed); let camera_failed = report.camera_sync_results.iter().any(|r| !r.passed); @@ -1211,6 +1407,9 @@ fn print_summary(reports: &[RecordingTestReport]) { if playback_failed { print!(" [PLAYBACK]"); } + if scrub_failed { + print!(" [SCRUB]"); + } if audio_failed { print!(" [AUDIO SYNC]"); } @@ -1269,12 +1468,14 @@ async fn main() -> anyhow::Result<()> { return Ok(()); } - let (run_decoder, run_playback, run_audio_sync, run_camera_sync) = match cli.command { - Some(Commands::Decoder) => (true, false, false, false), - Some(Commands::Playback) => (false, true, false, false), - Some(Commands::AudioSync) => (false, false, true, false), - Some(Commands::CameraSync) => (false, false, false, true), - Some(Commands::Full) | None => (true, true, true, true), + let (run_decoder, run_playback, run_scrub, run_audio_sync, run_camera_sync) = match cli.command + { + Some(Commands::Decoder) => (true, false, false, false, false), + Some(Commands::Playback) => (false, true, false, false, false), + Some(Commands::Scrub) => (false, false, true, false, false), + Some(Commands::AudioSync) => (false, false, false, true, false), + Some(Commands::CameraSync) => (false, false, false, false, true), + Some(Commands::Full) | None => (true, true, true, true, true), Some(Commands::List) => unreachable!(), }; @@ -1297,6 +1498,7 @@ async fn main() -> anyhow::Result<()> { cli.fps, run_decoder, run_playback, + run_scrub, run_audio_sync, run_camera_sync, cli.verbose, @@ -1321,6 +1523,7 @@ async fn main() -> anyhow::Result<()> { match cli.command { Some(Commands::Decoder) => "decoder", Some(Commands::Playback) => "playback", + Some(Commands::Scrub) => "scrub", Some(Commands::AudioSync) => "audio-sync", Some(Commands::CameraSync) => "camera-sync", Some(Commands::Full) | None => "full", From 402da322802ba89b11fc49a4c11fb4f49e43e7e7 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 20:26:12 +0000 Subject: [PATCH 006/333] improve: add scrub and startup metrics to playback benchmark Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 16 ++ crates/editor/PLAYBACK-FINDINGS.md | 9 + .../examples/playback-test-runner.rs | 217 +++++++++++++++++- 3 files changed, 235 insertions(+), 7 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index a2253d580f..dd2f3fe853 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -12,6 +12,7 @@ This document tracks performance benchmarks for Cap's playback and decoding syst | Decode Latency (p95) | <50ms | - | | Effective FPS | ≥30 fps | ±2 fps | | Decode Jitter | <10ms | - | +| Scrub Seek Latency (p95) | <40ms | - | | A/V Sync (mic↔video) | <100ms | - | | A/V Sync (system↔video) | <100ms | - | | Camera-Display Drift | <100ms | - | @@ -20,6 +21,7 @@ This document tracks performance benchmarks for Cap's playback and decoding syst - **Decoder Tests**: Init time, hardware acceleration detection, fallback handling - **Playback Tests**: Sequential decode, frame retrieval, latency percentiles +- **Scrub Tests**: Random access seek decode latency and seek failure rate - **Audio Sync Tests**: Mic-video sync, system audio-video sync - **Camera Sync Tests**: Camera-display drift, frame count alignment - **Decode Benchmark**: Creation, sequential, seek, and random access performance @@ -44,6 +46,7 @@ cargo run -p cap-recording --example playback-test-runner -- full # Run specific test categories cargo run -p cap-recording --example playback-test-runner -- decoder cargo run -p cap-recording --example playback-test-runner -- playback +cargo run -p cap-recording --example playback-test-runner -- scrub cargo run -p cap-recording --example playback-test-runner -- audio-sync cargo run -p cap-recording --example playback-test-runner -- camera-sync @@ -106,6 +109,19 @@ cargo run -p cap-recording --example playback-test-runner -- full | **P50/P95/P99** | Latency percentiles | Sorted distribution | | **Effective FPS** | Actual decode throughput | frames / elapsed_time | | **Jitter** | Decode time variance (std dev) | sqrt(variance) | +| **First Decode** | Decode latency for first successful frame | elapsed from first frame request | +| **Startup to First** | Time from playback test start to first decoded frame | elapsed since playback test start | + +### Scrub Metrics + +| Metric | Description | How Measured | +|--------|-------------|--------------| +| **Seek Operations** | Total random seek attempts | Fixed operation count per segment | +| **Successful Seeks** | Seeks returning a decoded frame | Count of non-None seek decodes | +| **Failed Seeks** | Seeks returning no frame | Count of None seek decodes | +| **Avg Seek Time** | Mean random seek decode latency | Avg of seek decode times | +| **P50/P95/P99 Seek** | Seek latency percentiles | Sorted seek time distribution | +| **Max Seek Time** | Worst seek decode latency | Max of seek decode times | ### Audio Sync Metrics diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index e97796063a..0b693b65f9 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -98,6 +98,7 @@ cargo run -p cap-recording --example playback-test-runner -- full # Test specific categories cargo run -p cap-recording --example playback-test-runner -- decoder cargo run -p cap-recording --example playback-test-runner -- playback +cargo run -p cap-recording --example playback-test-runner -- scrub cargo run -p cap-recording --example playback-test-runner -- audio-sync cargo run -p cap-recording --example playback-test-runner -- camera-sync @@ -149,6 +150,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Additional decoder instances are initialized lazily when scrub patterns request them. - Failed lazy init falls back safely to currently available decoders. +4. **Playback benchmark runner now captures scrub and startup metrics (2026-02-13)** + - Added `scrub` benchmark mode to `playback-test-runner`. + - Playback result now includes first-frame decode and startup-to-first-frame latency. + - Scrub result now reports seek p50/p95/p99 and seek failure counts. + --- ## Root Cause Analysis Archive @@ -241,6 +247,7 @@ Decoder Pipeline: 5. Updated Tauri seek/playhead commands to forward seeks into active playback handle. 6. Removed frontend timeline stop/start cycle when seeking while playing. 7. Reduced AVAssetReader eager pool warmup and added lazy decoder instantiation for additional pool slots. +8. Extended playback benchmark tooling with scrub mode and startup latency metrics. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -248,6 +255,8 @@ Decoder Pipeline: - `apps/desktop/src-tauri/src/lib.rs`: forward `seek_to` and `set_playhead_position` into active playback handle. - `apps/desktop/src/routes/editor/Timeline/index.tsx`: seek while playing now sends direct `seekTo` without playback restart. - `crates/rendering/src/decoder/avassetreader.rs`: lower eager decoder warmup and lazy pool growth. +- `crates/recording/examples/playback-test-runner.rs`: added scrub command and startup/scrub latency metrics. +- `crates/editor/PLAYBACK-BENCHMARKS.md`: updated benchmark reference and metric definitions. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/recording/examples/playback-test-runner.rs b/crates/recording/examples/playback-test-runner.rs index 437b3844a3..9c7bff6110 100644 --- a/crates/recording/examples/playback-test-runner.rs +++ b/crates/recording/examples/playback-test-runner.rs @@ -53,6 +53,7 @@ enum Commands { Full, Decoder, Playback, + Scrub, AudioSync, CameraSync, List, @@ -60,6 +61,7 @@ enum Commands { const FPS_TOLERANCE: f64 = 2.0; const DECODE_LATENCY_WARNING_MS: f64 = 50.0; +const SCRUB_SEEK_WARNING_MS: f64 = 40.0; const AUDIO_VIDEO_SYNC_TOLERANCE_MS: f64 = 100.0; const CAMERA_SYNC_TOLERANCE_MS: f64 = 100.0; @@ -82,6 +84,8 @@ struct PlaybackTestResult { total_frames: usize, decoded_frames: usize, failed_frames: usize, + first_frame_decode_time_ms: f64, + startup_to_first_frame_ms: f64, avg_decode_time_ms: f64, min_decode_time_ms: f64, max_decode_time_ms: f64, @@ -96,6 +100,22 @@ struct PlaybackTestResult { errors: Vec, } +#[derive(Debug, Clone, Default)] +struct ScrubTestResult { + passed: bool, + segment_index: usize, + seek_operations: usize, + successful_seeks: usize, + failed_seeks: usize, + avg_seek_time_ms: f64, + p50_seek_time_ms: f64, + p95_seek_time_ms: f64, + p99_seek_time_ms: f64, + max_seek_time_ms: f64, + seek_latency_ok: bool, + errors: Vec, +} + #[derive(Debug, Clone, Default)] struct AudioSyncTestResult { passed: bool, @@ -141,6 +161,7 @@ struct RecordingTestReport { has_system_audio: bool, decoder_results: Vec, playback_results: Vec, + scrub_results: Vec, audio_sync_results: Vec, camera_sync_results: Vec, overall_passed: bool, @@ -208,6 +229,10 @@ impl RecordingTestReport { result.p95_decode_time_ms, result.p99_decode_time_ms ); + println!( + " Startup: first_decode={:.1}ms startup_to_first={:.1}ms", + result.first_frame_decode_time_ms, result.startup_to_first_frame_ms + ); if !result.fps_ok { println!(" WARN: FPS outside tolerance!"); } @@ -219,6 +244,28 @@ impl RecordingTestReport { } } + if !self.scrub_results.is_empty() { + println!("\n SCRUB TESTS:"); + for result in &self.scrub_results { + let status = if result.passed { "OK" } else { "FAIL" }; + println!( + " Segment {}: [{}] seeks={}/{} avg={:.1}ms p95={:.1}ms", + result.segment_index, + status, + result.successful_seeks, + result.seek_operations, + result.avg_seek_time_ms, + result.p95_seek_time_ms + ); + if !result.seek_latency_ok { + println!(" WARN: Scrub seek latency exceeds {SCRUB_SEEK_WARNING_MS}ms!"); + } + for err in &result.errors { + println!(" ERROR: {err}"); + } + } + } + if !self.audio_sync_results.is_empty() { println!("\n AUDIO SYNC TESTS:"); for result in &self.audio_sync_results { @@ -341,6 +388,7 @@ async fn test_playback( fps: u32, verbose: bool, ) -> PlaybackTestResult { + let playback_start = Instant::now(); let mut result = PlaybackTestResult { segment_index, expected_fps: fps as f64, @@ -384,6 +432,11 @@ async fn test_playback( let decode_time_ms = start.elapsed().as_secs_f64() * 1000.0; decode_times.push(decode_time_ms); decoded_count += 1; + if decoded_count == 1 { + result.first_frame_decode_time_ms = decode_time_ms; + result.startup_to_first_frame_ms = + playback_start.elapsed().as_secs_f64() * 1000.0; + } if frame.width() == 0 || frame.height() == 0 { result @@ -448,6 +501,96 @@ async fn test_playback( result } +async fn test_scrub( + recording_meta: &RecordingMeta, + meta: &StudioRecordingMeta, + segment_index: usize, + fps: u32, + verbose: bool, +) -> ScrubTestResult { + let mut result = ScrubTestResult { + segment_index, + seek_operations: 120, + ..Default::default() + }; + + let display_path = match meta { + StudioRecordingMeta::SingleSegment { segment } => { + recording_meta.path(&segment.display.path) + } + StudioRecordingMeta::MultipleSegments { inner } => { + recording_meta.path(&inner.segments[segment_index].display.path) + } + }; + + let decoder = match spawn_decoder("display", display_path.clone(), fps, 0.0, false).await { + Ok(d) => d, + Err(e) => { + result.errors.push(format!("Failed to create decoder: {e}")); + return result; + } + }; + + let duration_secs = get_video_duration(&display_path); + let total_frames = (duration_secs * fps as f64).ceil() as usize; + if total_frames < 2 { + result + .errors + .push("Video duration too short for scrub benchmark".to_string()); + return result; + } + + let mut seek_times = Vec::with_capacity(result.seek_operations); + + for operation in 0..result.seek_operations { + let target_frame = ((operation * 7919) % total_frames).max(1); + let target_time = target_frame as f32 / fps as f32; + let seek_start = Instant::now(); + match decoder.get_frame(target_time).await { + Some(_) => { + let seek_time_ms = seek_start.elapsed().as_secs_f64() * 1000.0; + seek_times.push(seek_time_ms); + result.successful_seeks += 1; + if verbose && operation % 20 == 0 { + println!( + " Scrub {} / {}: frame={} time={:.3}s seek={:.1}ms", + operation + 1, + result.seek_operations, + target_frame, + target_time, + seek_time_ms + ); + } + } + None => { + result.failed_seeks += 1; + if verbose { + println!( + " Scrub {} / {}: frame={} FAILED", + operation + 1, + result.seek_operations, + target_frame + ); + } + } + } + } + + if !seek_times.is_empty() { + result.avg_seek_time_ms = seek_times.iter().sum::() / seek_times.len() as f64; + result.p50_seek_time_ms = percentile(&seek_times, 50.0); + result.p95_seek_time_ms = percentile(&seek_times, 95.0); + result.p99_seek_time_ms = percentile(&seek_times, 99.0); + result.max_seek_time_ms = seek_times.iter().copied().fold(f64::NEG_INFINITY, f64::max); + } + + result.seek_latency_ok = result.p95_seek_time_ms <= SCRUB_SEEK_WARNING_MS; + result.passed = + result.seek_latency_ok && result.failed_seeks == 0 && result.successful_seeks > 0; + + result +} + async fn test_audio_sync( recording_meta: &RecordingMeta, meta: &StudioRecordingMeta, @@ -735,6 +878,7 @@ async fn run_tests_on_recording( fps: u32, run_decoder: bool, run_playback: bool, + run_scrub: bool, run_audio_sync: bool, run_camera_sync: bool, verbose: bool, @@ -826,6 +970,15 @@ async fn run_tests_on_recording( report.playback_results.push(playback_result); } + if run_scrub { + if verbose { + println!(" Testing scrub performance for segment {segment_idx}..."); + } + let scrub_result = + test_scrub(&meta, studio_meta.as_ref(), segment_idx, fps, verbose).await; + report.scrub_results.push(scrub_result); + } + if run_audio_sync { if verbose { println!(" Testing audio sync for segment {segment_idx}..."); @@ -848,10 +1001,11 @@ async fn run_tests_on_recording( let decoder_ok = report.decoder_results.iter().all(|r| r.passed); let playback_ok = report.playback_results.iter().all(|r| r.passed); + let scrub_ok = report.scrub_results.iter().all(|r| r.passed); let audio_ok = report.audio_sync_results.iter().all(|r| r.passed); let camera_ok = report.camera_sync_results.iter().all(|r| r.passed); - report.overall_passed = decoder_ok && playback_ok && audio_ok && camera_ok; + report.overall_passed = decoder_ok && playback_ok && scrub_ok && audio_ok && camera_ok; Ok(report) } @@ -906,6 +1060,12 @@ fn get_failure_tags(report: &RecordingTestReport) -> Vec { if report.playback_results.iter().any(|r| !r.decode_latency_ok) { tags.push("LATENCY".to_string()); } + if report.scrub_results.iter().any(|r| !r.seek_latency_ok) { + tags.push("SCRUB_LATENCY".to_string()); + } + if report.scrub_results.iter().any(|r| r.failed_seeks > 0) { + tags.push("SCRUB_ERRORS".to_string()); + } if report.playback_results.iter().any(|r| r.failed_frames > 0) { tags.push("DECODE_ERRORS".to_string()); } @@ -1007,6 +1167,16 @@ fn report_to_markdown(report: &RecordingTestReport) -> String { result.p99_decode_time_ms, result.max_decode_time_ms )); + md.push_str(&format!( + "| ↳ Startup | {} | first_decode={:.1}ms startup_to_first={:.1}ms |\n", + if result.startup_to_first_frame_ms > 0.0 { + "✅" + } else { + "❌" + }, + result.first_frame_decode_time_ms, + result.startup_to_first_frame_ms + )); if result.failed_frames > 0 { md.push_str(&format!( "| ↳ Failed Frames | ⚠️ | {} |\n", @@ -1015,6 +1185,31 @@ fn report_to_markdown(report: &RecordingTestReport) -> String { } } + for result in &report.scrub_results { + md.push_str(&format!( + "| Scrub Seg {} | {} | seeks={}/{} avg={:.1}ms p95={:.1}ms p99={:.1}ms |\n", + result.segment_index, + if result.passed { "✅" } else { "❌" }, + result.successful_seeks, + result.seek_operations, + result.avg_seek_time_ms, + result.p95_seek_time_ms, + result.p99_seek_time_ms + )); + md.push_str(&format!( + "| ↳ Scrub Latency | {} | max={:.1}ms threshold={:.1}ms |\n", + if result.seek_latency_ok { "✅" } else { "❌" }, + result.max_seek_time_ms, + SCRUB_SEEK_WARNING_MS + )); + if result.failed_seeks > 0 { + md.push_str(&format!( + "| ↳ Scrub Failures | ⚠️ | {} |\n", + result.failed_seeks + )); + } + } + for result in &report.audio_sync_results { if result.has_mic_audio { let status = if result.mic_sync_ok { "✅" } else { "❌" }; @@ -1202,6 +1397,7 @@ fn print_summary(reports: &[RecordingTestReport]) { let decoder_failed = report.decoder_results.iter().any(|r| !r.passed); let playback_failed = report.playback_results.iter().any(|r| !r.passed); + let scrub_failed = report.scrub_results.iter().any(|r| !r.passed); let audio_failed = report.audio_sync_results.iter().any(|r| !r.passed); let camera_failed = report.camera_sync_results.iter().any(|r| !r.passed); @@ -1211,6 +1407,9 @@ fn print_summary(reports: &[RecordingTestReport]) { if playback_failed { print!(" [PLAYBACK]"); } + if scrub_failed { + print!(" [SCRUB]"); + } if audio_failed { print!(" [AUDIO SYNC]"); } @@ -1269,12 +1468,14 @@ async fn main() -> anyhow::Result<()> { return Ok(()); } - let (run_decoder, run_playback, run_audio_sync, run_camera_sync) = match cli.command { - Some(Commands::Decoder) => (true, false, false, false), - Some(Commands::Playback) => (false, true, false, false), - Some(Commands::AudioSync) => (false, false, true, false), - Some(Commands::CameraSync) => (false, false, false, true), - Some(Commands::Full) | None => (true, true, true, true), + let (run_decoder, run_playback, run_scrub, run_audio_sync, run_camera_sync) = match cli.command + { + Some(Commands::Decoder) => (true, false, false, false, false), + Some(Commands::Playback) => (false, true, false, false, false), + Some(Commands::Scrub) => (false, false, true, false, false), + Some(Commands::AudioSync) => (false, false, false, true, false), + Some(Commands::CameraSync) => (false, false, false, false, true), + Some(Commands::Full) | None => (true, true, true, true, true), Some(Commands::List) => unreachable!(), }; @@ -1297,6 +1498,7 @@ async fn main() -> anyhow::Result<()> { cli.fps, run_decoder, run_playback, + run_scrub, run_audio_sync, run_camera_sync, cli.verbose, @@ -1321,6 +1523,7 @@ async fn main() -> anyhow::Result<()> { match cli.command { Some(Commands::Decoder) => "decoder", Some(Commands::Playback) => "playback", + Some(Commands::Scrub) => "scrub", Some(Commands::AudioSync) => "audio-sync", Some(Commands::CameraSync) => "camera-sync", Some(Commands::Full) | None => "full", From 5e9b2f33a5c8259e170f7876f2daf92282fee3b1 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 20:26:44 +0000 Subject: [PATCH 007/333] docs: add cross-platform playback benchmark matrix Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index dd2f3fe853..8b23767a92 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -77,6 +77,22 @@ cargo run -p cap-recording --example real-device-test-runner -- full --keep-outp cargo run -p cap-recording --example playback-test-runner -- full ``` +### Cross-Platform Validation Matrix + +Run these scenarios on each required hardware class and append outputs via `--benchmark-output`. + +```bash +cargo run -p cap-recording --example playback-test-runner -- full --fps 60 --benchmark-output --notes "platform= gpu= scenario=full" +cargo run -p cap-recording --example playback-test-runner -- scrub --fps 60 --benchmark-output --notes "platform= gpu= scenario=scrub" +``` + +| Platform | GPU Class | MP4 Full | Fragmented Full | MP4 Scrub | Fragmented Scrub | Notes | +|----------|-----------|----------|-----------------|-----------|------------------|-------| +| macOS 13+ | Apple Silicon | ☐ | ☐ | ☐ | ☐ | | +| Windows 11 | NVIDIA discrete | ☐ | ☐ | ☐ | ☐ | | +| Windows 11 | AMD discrete | ☐ | ☐ | ☐ | ☐ | | +| Windows 11 | Integrated baseline | ☐ | ☐ | ☐ | ☐ | | + --- ## Benchmark History From 0c3852e3cce386e141644048a9e9337f10f1b0d8 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 20:26:44 +0000 Subject: [PATCH 008/333] docs: add cross-platform playback benchmark matrix Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index dd2f3fe853..8b23767a92 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -77,6 +77,22 @@ cargo run -p cap-recording --example real-device-test-runner -- full --keep-outp cargo run -p cap-recording --example playback-test-runner -- full ``` +### Cross-Platform Validation Matrix + +Run these scenarios on each required hardware class and append outputs via `--benchmark-output`. + +```bash +cargo run -p cap-recording --example playback-test-runner -- full --fps 60 --benchmark-output --notes "platform= gpu= scenario=full" +cargo run -p cap-recording --example playback-test-runner -- scrub --fps 60 --benchmark-output --notes "platform= gpu= scenario=scrub" +``` + +| Platform | GPU Class | MP4 Full | Fragmented Full | MP4 Scrub | Fragmented Scrub | Notes | +|----------|-----------|----------|-----------------|-----------|------------------|-------| +| macOS 13+ | Apple Silicon | ☐ | ☐ | ☐ | ☐ | | +| Windows 11 | NVIDIA discrete | ☐ | ☐ | ☐ | ☐ | | +| Windows 11 | AMD discrete | ☐ | ☐ | ☐ | ☐ | | +| Windows 11 | Integrated baseline | ☐ | ☐ | ☐ | ☐ | | + --- ## Benchmark History From 4ed958841d38bcab2e14c8e940840cd24da0c377 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 20:28:37 +0000 Subject: [PATCH 009/333] improve: add playback startup latency telemetry logs Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 30 +++++++++++++++++++++++++++++- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 0b693b65f9..b4d6bf57c1 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -155,6 +155,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Playback result now includes first-frame decode and startup-to-first-frame latency. - Scrub result now reports seek p50/p95/p99 and seek failure counts. +5. **Playback runtime emits startup latency signals (2026-02-13)** + - Playback loop now logs first rendered frame latency. + - Audio stream setup now logs startup preparation time and first callback latency. + --- ## Root Cause Analysis Archive @@ -248,6 +252,7 @@ Decoder Pipeline: 6. Removed frontend timeline stop/start cycle when seeking while playing. 7. Reduced AVAssetReader eager pool warmup and added lazy decoder instantiation for additional pool slots. 8. Extended playback benchmark tooling with scrub mode and startup latency metrics. +9. Added playback runtime startup telemetry logs for first frame and audio callback bring-up. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -257,6 +262,7 @@ Decoder Pipeline: - `crates/rendering/src/decoder/avassetreader.rs`: lower eager decoder warmup and lazy pool growth. - `crates/recording/examples/playback-test-runner.rs`: added scrub command and startup/scrub latency metrics. - `crates/editor/PLAYBACK-BENCHMARKS.md`: updated benchmark reference and metric definitions. +- `crates/editor/src/playback.rs`: added first-render and audio-callback startup latency logging. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 3672590369..70e019ef5d 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -18,7 +18,10 @@ use lru::LruCache; use std::{ collections::{HashSet, VecDeque}, num::NonZeroUsize, - sync::{Arc, RwLock}, + sync::{ + Arc, RwLock, + atomic::{AtomicBool, Ordering}, + }, time::Duration, }; use tokio::{ @@ -358,6 +361,7 @@ impl Playback { }); tokio::spawn(async move { + let playback_task_start = Instant::now(); let duration = if let Some(timeline) = &self.project.borrow().timeline { timeline.duration() } else { @@ -387,6 +391,7 @@ impl Playback { let mut total_frames_rendered = 0u64; let mut _total_frames_skipped = 0u64; + let mut first_render_logged = false; let warmup_target_frames = 20usize; let warmup_after_first_timeout = Duration::from_millis(1000); @@ -726,6 +731,14 @@ impl Playback { .await; total_frames_rendered += 1; + if !first_render_logged { + first_render_logged = true; + info!( + first_render_latency_ms = + playback_task_start.elapsed().as_secs_f64() * 1000.0, + "Playback rendered first frame" + ); + } } event_tx.send(PlaybackEvent::Frame(frame_number)).ok(); @@ -819,6 +832,7 @@ impl AudioPlayback { } std::thread::spawn(move || { + let audio_thread_start = Instant::now(); let host = cpal::default_host(); let device = match host.default_output_device() { Some(d) => d, @@ -933,6 +947,10 @@ impl AudioPlayback { } }; + info!( + startup_prepare_ms = audio_thread_start.elapsed().as_secs_f64() * 1000.0, + "Audio stream prepared, starting playback stream" + ); if let Err(e) = stream.play() { error!( "Failed to play audio stream: {}. Skipping audio playback.", @@ -1103,6 +1121,8 @@ impl AudioPlayback { let mut latency_corrector = LatencyCorrector::new(static_latency_hint, latency_config); let initial_compensation_secs = latency_corrector.initial_compensation_secs(); let device_sample_rate = sample_rate; + let stream_build_start = Instant::now(); + let callback_started = Arc::new(AtomicBool::new(false)); { let project_snapshot = project.borrow(); @@ -1140,6 +1160,7 @@ impl AudioPlayback { let headroom_for_stream = headroom_samples; let mut playhead_rx_for_stream = playhead_rx.clone(); let mut last_video_playhead = playhead; + let callback_started_for_stream = callback_started.clone(); #[cfg(target_os = "windows")] const FIXED_LATENCY_SECS: f64 = 0.08; @@ -1159,6 +1180,13 @@ impl AudioPlayback { let stream_result = device.build_output_stream( &config, move |buffer: &mut [T], info| { + if !callback_started_for_stream.swap(true, Ordering::Relaxed) { + info!( + startup_to_callback_ms = + stream_build_start.elapsed().as_secs_f64() * 1000.0, + "Audio output callback started" + ); + } #[cfg(not(target_os = "windows"))] let latency_secs = latency_corrector.update_from_callback(info); #[cfg(target_os = "windows")] From 5c9df4ad015d6f6b7a66e5c1fd237280cfa5320b Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 20:28:37 +0000 Subject: [PATCH 010/333] improve: add playback startup latency telemetry logs Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 30 +++++++++++++++++++++++++++++- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 0b693b65f9..b4d6bf57c1 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -155,6 +155,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Playback result now includes first-frame decode and startup-to-first-frame latency. - Scrub result now reports seek p50/p95/p99 and seek failure counts. +5. **Playback runtime emits startup latency signals (2026-02-13)** + - Playback loop now logs first rendered frame latency. + - Audio stream setup now logs startup preparation time and first callback latency. + --- ## Root Cause Analysis Archive @@ -248,6 +252,7 @@ Decoder Pipeline: 6. Removed frontend timeline stop/start cycle when seeking while playing. 7. Reduced AVAssetReader eager pool warmup and added lazy decoder instantiation for additional pool slots. 8. Extended playback benchmark tooling with scrub mode and startup latency metrics. +9. Added playback runtime startup telemetry logs for first frame and audio callback bring-up. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -257,6 +262,7 @@ Decoder Pipeline: - `crates/rendering/src/decoder/avassetreader.rs`: lower eager decoder warmup and lazy pool growth. - `crates/recording/examples/playback-test-runner.rs`: added scrub command and startup/scrub latency metrics. - `crates/editor/PLAYBACK-BENCHMARKS.md`: updated benchmark reference and metric definitions. +- `crates/editor/src/playback.rs`: added first-render and audio-callback startup latency logging. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 3672590369..70e019ef5d 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -18,7 +18,10 @@ use lru::LruCache; use std::{ collections::{HashSet, VecDeque}, num::NonZeroUsize, - sync::{Arc, RwLock}, + sync::{ + Arc, RwLock, + atomic::{AtomicBool, Ordering}, + }, time::Duration, }; use tokio::{ @@ -358,6 +361,7 @@ impl Playback { }); tokio::spawn(async move { + let playback_task_start = Instant::now(); let duration = if let Some(timeline) = &self.project.borrow().timeline { timeline.duration() } else { @@ -387,6 +391,7 @@ impl Playback { let mut total_frames_rendered = 0u64; let mut _total_frames_skipped = 0u64; + let mut first_render_logged = false; let warmup_target_frames = 20usize; let warmup_after_first_timeout = Duration::from_millis(1000); @@ -726,6 +731,14 @@ impl Playback { .await; total_frames_rendered += 1; + if !first_render_logged { + first_render_logged = true; + info!( + first_render_latency_ms = + playback_task_start.elapsed().as_secs_f64() * 1000.0, + "Playback rendered first frame" + ); + } } event_tx.send(PlaybackEvent::Frame(frame_number)).ok(); @@ -819,6 +832,7 @@ impl AudioPlayback { } std::thread::spawn(move || { + let audio_thread_start = Instant::now(); let host = cpal::default_host(); let device = match host.default_output_device() { Some(d) => d, @@ -933,6 +947,10 @@ impl AudioPlayback { } }; + info!( + startup_prepare_ms = audio_thread_start.elapsed().as_secs_f64() * 1000.0, + "Audio stream prepared, starting playback stream" + ); if let Err(e) = stream.play() { error!( "Failed to play audio stream: {}. Skipping audio playback.", @@ -1103,6 +1121,8 @@ impl AudioPlayback { let mut latency_corrector = LatencyCorrector::new(static_latency_hint, latency_config); let initial_compensation_secs = latency_corrector.initial_compensation_secs(); let device_sample_rate = sample_rate; + let stream_build_start = Instant::now(); + let callback_started = Arc::new(AtomicBool::new(false)); { let project_snapshot = project.borrow(); @@ -1140,6 +1160,7 @@ impl AudioPlayback { let headroom_for_stream = headroom_samples; let mut playhead_rx_for_stream = playhead_rx.clone(); let mut last_video_playhead = playhead; + let callback_started_for_stream = callback_started.clone(); #[cfg(target_os = "windows")] const FIXED_LATENCY_SECS: f64 = 0.08; @@ -1159,6 +1180,13 @@ impl AudioPlayback { let stream_result = device.build_output_stream( &config, move |buffer: &mut [T], info| { + if !callback_started_for_stream.swap(true, Ordering::Relaxed) { + info!( + startup_to_callback_ms = + stream_build_start.elapsed().as_secs_f64() * 1000.0, + "Audio output callback started" + ); + } #[cfg(not(target_os = "windows"))] let latency_secs = latency_corrector.update_from_callback(info); #[cfg(target_os = "windows")] From 78c1b937848527d220063e2ac053c764b1cede8c Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 20:42:59 +0000 Subject: [PATCH 011/333] improve: extend decode benchmark with json startup metrics Co-authored-by: Richie McIlroy --- Cargo.lock | 1 + crates/editor/Cargo.toml | 1 + crates/editor/PLAYBACK-BENCHMARKS.md | 3 + crates/editor/PLAYBACK-FINDINGS.md | 6 + crates/editor/examples/decode-benchmark.rs | 169 ++++++++++++++++++--- 5 files changed, 158 insertions(+), 22 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c4f8665e90..6076dc378a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1311,6 +1311,7 @@ dependencies = [ "ringbuf", "sentry", "serde", + "serde_json", "specta", "tokio", "tokio-util", diff --git a/crates/editor/Cargo.toml b/crates/editor/Cargo.toml index c612d1e33f..0cc16b4111 100644 --- a/crates/editor/Cargo.toml +++ b/crates/editor/Cargo.toml @@ -23,6 +23,7 @@ axum = { version = "0.7.5", features = ["ws"] } ffmpeg.workspace = true specta.workspace = true serde = { workspace = true } +serde_json = "1" sentry.workspace = true futures = { workspace = true } tracing.workspace = true diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 8b23767a92..da84ed7b3a 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -65,6 +65,9 @@ cargo run -p cap-editor --example decode-benchmark -- --video /path/to/video.mp4 # With custom FPS and iterations cargo run -p cap-editor --example decode-benchmark -- --video /path/to/video.mp4 --fps 60 --iterations 50 + +# Emit machine-readable JSON with startup/scrub metrics +cargo run -p cap-editor --example decode-benchmark -- --video /path/to/video.mp4 --fps 60 --sequential-frames 180 --random-samples 120 --output-json /tmp/decode-benchmark.json ``` #### Combined Workflow (Recording → Playback) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index b4d6bf57c1..fa28454366 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -159,6 +159,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Playback loop now logs first rendered frame latency. - Audio stream setup now logs startup preparation time and first callback latency. +6. **Decode benchmark now supports machine-readable evidence output (2026-02-13)** + - `decode-benchmark` supports `--output-json` for structured metric capture. + - Added sequential frame and random sample count controls to scale benchmark depth per hardware class. + --- ## Root Cause Analysis Archive @@ -253,6 +257,7 @@ Decoder Pipeline: 7. Reduced AVAssetReader eager pool warmup and added lazy decoder instantiation for additional pool slots. 8. Extended playback benchmark tooling with scrub mode and startup latency metrics. 9. Added playback runtime startup telemetry logs for first frame and audio callback bring-up. +10. Enhanced decode benchmark example with structured JSON output and configurable sample depth. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -263,6 +268,7 @@ Decoder Pipeline: - `crates/recording/examples/playback-test-runner.rs`: added scrub command and startup/scrub latency metrics. - `crates/editor/PLAYBACK-BENCHMARKS.md`: updated benchmark reference and metric definitions. - `crates/editor/src/playback.rs`: added first-render and audio-callback startup latency logging. +- `crates/editor/examples/decode-benchmark.rs`: added `--output-json`, startup metrics, and configurable sequential/random sampling. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/examples/decode-benchmark.rs b/crates/editor/examples/decode-benchmark.rs index d29ab2dda0..2ae11f207b 100644 --- a/crates/editor/examples/decode-benchmark.rs +++ b/crates/editor/examples/decode-benchmark.rs @@ -1,4 +1,6 @@ use cap_rendering::decoder::{AsyncVideoDecoderHandle, spawn_decoder}; +use serde::Serialize; +use std::fs; use std::path::{Path, PathBuf}; use std::process::Command; use std::time::Instant; @@ -38,23 +40,44 @@ struct BenchmarkConfig { video_path: PathBuf, fps: u32, iterations: usize, + sequential_frames: usize, + random_samples: usize, + output_json: Option, } -#[derive(Debug, Default)] +#[derive(Debug, Default, Serialize)] struct BenchmarkResults { decoder_creation_ms: f64, sequential_decode_times_ms: Vec, + first_frame_decode_ms: f64, + startup_to_first_frame_ms: f64, + sequential_p50_ms: f64, + sequential_p95_ms: f64, + sequential_p99_ms: f64, sequential_fps: f64, sequential_failures: usize, seek_times_by_distance: Vec<(f32, f64)>, seek_failures: usize, random_access_times_ms: Vec, random_access_avg_ms: f64, + random_access_p50_ms: f64, + random_access_p95_ms: f64, + random_access_p99_ms: f64, random_access_failures: usize, cache_hits: usize, cache_misses: usize, } +#[derive(Debug, Serialize)] +struct BenchmarkOutput { + video_path: PathBuf, + fps: u32, + iterations: usize, + sequential_frames: usize, + random_samples: usize, + results: BenchmarkResults, +} + impl BenchmarkResults { fn print_report(&self) { println!("\n{}", "=".repeat(60)); @@ -96,6 +119,14 @@ impl BenchmarkResults { println!(" Avg decode time: {avg:.2}ms"); println!(" Min decode time: {min:.2}ms"); println!(" Max decode time: {max:.2}ms"); + println!(" P50 decode time: {:.2}ms", self.sequential_p50_ms); + println!(" P95 decode time: {:.2}ms", self.sequential_p95_ms); + println!(" P99 decode time: {:.2}ms", self.sequential_p99_ms); + println!(" First frame decode: {:.2}ms", self.first_frame_decode_ms); + println!( + " Startup to first frame: {:.2}ms", + self.startup_to_first_frame_ms + ); println!(" Effective FPS: {:.1}", self.sequential_fps); } println!(); @@ -138,18 +169,9 @@ impl BenchmarkResults { println!(" Avg access time: {avg:.2}ms"); println!(" Min access time: {min:.2}ms"); println!(" Max access time: {max:.2}ms"); - println!( - " P50: {:.2}ms", - percentile(&self.random_access_times_ms, 50.0) - ); - println!( - " P95: {:.2}ms", - percentile(&self.random_access_times_ms, 95.0) - ); - println!( - " P99: {:.2}ms", - percentile(&self.random_access_times_ms, 99.0) - ); + println!(" P50: {:.2}ms", self.random_access_p50_ms); + println!(" P95: {:.2}ms", self.random_access_p95_ms); + println!(" P99: {:.2}ms", self.random_access_p99_ms); } println!(); @@ -215,10 +237,13 @@ async fn benchmark_sequential_decode( fps: u32, frame_count: usize, start_time: f32, -) -> (Vec, f64, usize) { +) -> (Vec, f64, usize, f64, f64) { let mut times = Vec::with_capacity(frame_count); let mut failures = 0; let overall_start = Instant::now(); + let mut first_frame_decode_ms = 0.0; + let mut startup_to_first_frame_ms = 0.0; + let mut first_frame_captured = false; for i in 0..frame_count { let time = start_time + (i as f32 / fps as f32); @@ -227,6 +252,11 @@ async fn benchmark_sequential_decode( Some(_frame) => { let elapsed = start.elapsed(); times.push(elapsed.as_secs_f64() * 1000.0); + if !first_frame_captured { + first_frame_captured = true; + first_frame_decode_ms = elapsed.as_secs_f64() * 1000.0; + startup_to_first_frame_ms = overall_start.elapsed().as_secs_f64() * 1000.0; + } } None => { failures += 1; @@ -243,7 +273,13 @@ async fn benchmark_sequential_decode( 0.0 }; - (times, effective_fps, failures) + ( + times, + effective_fps, + failures, + first_frame_decode_ms, + startup_to_first_frame_ms, + ) } async fn benchmark_seek( @@ -308,6 +344,10 @@ async fn run_full_benchmark(config: BenchmarkConfig) -> BenchmarkResults { config.video_path.display() ); println!("FPS: {}, Iterations: {}", config.fps, config.iterations); + println!( + "Sequential frames: {}, Random samples: {}", + config.sequential_frames, config.random_samples + ); println!(); println!("[1/5] Benchmarking decoder creation..."); @@ -341,12 +381,20 @@ async fn run_full_benchmark(config: BenchmarkConfig) -> BenchmarkResults { println!("Detected video duration: {video_duration:.2}s"); println!(); - println!("[3/5] Benchmarking sequential decode (100 frames from start)..."); - let (seq_times, seq_fps, seq_failures) = - benchmark_sequential_decode(&decoder, config.fps, 100, 0.0).await; + println!( + "[3/5] Benchmarking sequential decode ({} frames from start)...", + config.sequential_frames + ); + let (seq_times, seq_fps, seq_failures, first_frame_decode_ms, startup_to_first_frame_ms) = + benchmark_sequential_decode(&decoder, config.fps, config.sequential_frames, 0.0).await; results.sequential_decode_times_ms = seq_times; results.sequential_fps = seq_fps; results.sequential_failures = seq_failures; + results.first_frame_decode_ms = first_frame_decode_ms; + results.startup_to_first_frame_ms = startup_to_first_frame_ms; + results.sequential_p50_ms = percentile(&results.sequential_decode_times_ms, 50.0); + results.sequential_p95_ms = percentile(&results.sequential_decode_times_ms, 95.0); + results.sequential_p99_ms = percentile(&results.sequential_decode_times_ms, 99.0); println!(" Done: {seq_fps:.1} effective FPS"); if seq_failures > 0 { println!(" Warning: {seq_failures} frames failed to decode"); @@ -370,9 +418,12 @@ async fn run_full_benchmark(config: BenchmarkConfig) -> BenchmarkResults { } } - println!("[5/5] Benchmarking random access (50 samples)..."); + println!( + "[5/5] Benchmarking random access ({} samples)...", + config.random_samples + ); let (random_times, random_failures) = - benchmark_random_access(&decoder, config.fps, video_duration, 50).await; + benchmark_random_access(&decoder, config.fps, video_duration, config.random_samples).await; results.random_access_times_ms = random_times; results.random_access_failures = random_failures; results.random_access_avg_ms = if results.random_access_times_ms.is_empty() { @@ -381,6 +432,9 @@ async fn run_full_benchmark(config: BenchmarkConfig) -> BenchmarkResults { results.random_access_times_ms.iter().sum::() / results.random_access_times_ms.len() as f64 }; + results.random_access_p50_ms = percentile(&results.random_access_times_ms, 50.0); + results.random_access_p95_ms = percentile(&results.random_access_times_ms, 95.0); + results.random_access_p99_ms = percentile(&results.random_access_times_ms, 99.0); println!(" Done: {:.2}ms avg", results.random_access_avg_ms); if random_failures > 0 { println!(" Warning: {random_failures} random accesses failed"); @@ -389,6 +443,53 @@ async fn run_full_benchmark(config: BenchmarkConfig) -> BenchmarkResults { results } +fn write_json_output(config: &BenchmarkConfig, results: &BenchmarkResults) { + let Some(output_path) = &config.output_json else { + return; + }; + + let output = BenchmarkOutput { + video_path: config.video_path.clone(), + fps: config.fps, + iterations: config.iterations, + sequential_frames: config.sequential_frames, + random_samples: config.random_samples, + results: BenchmarkResults { + decoder_creation_ms: results.decoder_creation_ms, + sequential_decode_times_ms: results.sequential_decode_times_ms.clone(), + first_frame_decode_ms: results.first_frame_decode_ms, + startup_to_first_frame_ms: results.startup_to_first_frame_ms, + sequential_p50_ms: results.sequential_p50_ms, + sequential_p95_ms: results.sequential_p95_ms, + sequential_p99_ms: results.sequential_p99_ms, + sequential_fps: results.sequential_fps, + sequential_failures: results.sequential_failures, + seek_times_by_distance: results.seek_times_by_distance.clone(), + seek_failures: results.seek_failures, + random_access_times_ms: results.random_access_times_ms.clone(), + random_access_avg_ms: results.random_access_avg_ms, + random_access_p50_ms: results.random_access_p50_ms, + random_access_p95_ms: results.random_access_p95_ms, + random_access_p99_ms: results.random_access_p99_ms, + random_access_failures: results.random_access_failures, + cache_hits: results.cache_hits, + cache_misses: results.cache_misses, + }, + }; + + match serde_json::to_string_pretty(&output) { + Ok(json) => match fs::write(output_path, json) { + Ok(()) => println!("Wrote benchmark JSON to {}", output_path.display()), + Err(error) => eprintln!( + "Failed to write benchmark JSON to {}: {}", + output_path.display(), + error + ), + }, + Err(error) => eprintln!("Failed to serialize benchmark JSON output: {}", error), + } +} + fn main() { let args: Vec = std::env::args().collect(); @@ -397,7 +498,7 @@ fn main() { .position(|a| a == "--video") .and_then(|i| args.get(i + 1)) .map(PathBuf::from) - .expect("Usage: decode-benchmark --video [--fps ] [--iterations ]"); + .expect("Usage: decode-benchmark --video [--fps ] [--iterations ] [--sequential-frames ] [--random-samples ] [--output-json ]"); let fps = args .iter() @@ -413,14 +514,38 @@ fn main() { .and_then(|s| s.parse().ok()) .unwrap_or(100); + let sequential_frames = args + .iter() + .position(|a| a == "--sequential-frames") + .and_then(|i| args.get(i + 1)) + .and_then(|s| s.parse().ok()) + .unwrap_or(100); + + let random_samples = args + .iter() + .position(|a| a == "--random-samples") + .and_then(|i| args.get(i + 1)) + .and_then(|s| s.parse().ok()) + .unwrap_or(50); + + let output_json = args + .iter() + .position(|a| a == "--output-json") + .and_then(|i| args.get(i + 1)) + .map(PathBuf::from); + let config = BenchmarkConfig { video_path, fps, iterations, + sequential_frames, + random_samples, + output_json, }; let rt = Runtime::new().expect("Failed to create Tokio runtime"); - let results = rt.block_on(run_full_benchmark(config)); + let results = rt.block_on(run_full_benchmark(config.clone())); results.print_report(); + write_json_output(&config, &results); } From aef21d4b321352a13689a2f9bbb1a43c1575b40a Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 20:42:59 +0000 Subject: [PATCH 012/333] improve: extend decode benchmark with json startup metrics Co-authored-by: Richie McIlroy --- Cargo.lock | 1 + crates/editor/Cargo.toml | 1 + crates/editor/PLAYBACK-BENCHMARKS.md | 3 + crates/editor/PLAYBACK-FINDINGS.md | 6 + crates/editor/examples/decode-benchmark.rs | 169 ++++++++++++++++++--- 5 files changed, 158 insertions(+), 22 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c4f8665e90..6076dc378a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1311,6 +1311,7 @@ dependencies = [ "ringbuf", "sentry", "serde", + "serde_json", "specta", "tokio", "tokio-util", diff --git a/crates/editor/Cargo.toml b/crates/editor/Cargo.toml index c612d1e33f..0cc16b4111 100644 --- a/crates/editor/Cargo.toml +++ b/crates/editor/Cargo.toml @@ -23,6 +23,7 @@ axum = { version = "0.7.5", features = ["ws"] } ffmpeg.workspace = true specta.workspace = true serde = { workspace = true } +serde_json = "1" sentry.workspace = true futures = { workspace = true } tracing.workspace = true diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 8b23767a92..da84ed7b3a 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -65,6 +65,9 @@ cargo run -p cap-editor --example decode-benchmark -- --video /path/to/video.mp4 # With custom FPS and iterations cargo run -p cap-editor --example decode-benchmark -- --video /path/to/video.mp4 --fps 60 --iterations 50 + +# Emit machine-readable JSON with startup/scrub metrics +cargo run -p cap-editor --example decode-benchmark -- --video /path/to/video.mp4 --fps 60 --sequential-frames 180 --random-samples 120 --output-json /tmp/decode-benchmark.json ``` #### Combined Workflow (Recording → Playback) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index b4d6bf57c1..fa28454366 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -159,6 +159,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Playback loop now logs first rendered frame latency. - Audio stream setup now logs startup preparation time and first callback latency. +6. **Decode benchmark now supports machine-readable evidence output (2026-02-13)** + - `decode-benchmark` supports `--output-json` for structured metric capture. + - Added sequential frame and random sample count controls to scale benchmark depth per hardware class. + --- ## Root Cause Analysis Archive @@ -253,6 +257,7 @@ Decoder Pipeline: 7. Reduced AVAssetReader eager pool warmup and added lazy decoder instantiation for additional pool slots. 8. Extended playback benchmark tooling with scrub mode and startup latency metrics. 9. Added playback runtime startup telemetry logs for first frame and audio callback bring-up. +10. Enhanced decode benchmark example with structured JSON output and configurable sample depth. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -263,6 +268,7 @@ Decoder Pipeline: - `crates/recording/examples/playback-test-runner.rs`: added scrub command and startup/scrub latency metrics. - `crates/editor/PLAYBACK-BENCHMARKS.md`: updated benchmark reference and metric definitions. - `crates/editor/src/playback.rs`: added first-render and audio-callback startup latency logging. +- `crates/editor/examples/decode-benchmark.rs`: added `--output-json`, startup metrics, and configurable sequential/random sampling. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/examples/decode-benchmark.rs b/crates/editor/examples/decode-benchmark.rs index d29ab2dda0..2ae11f207b 100644 --- a/crates/editor/examples/decode-benchmark.rs +++ b/crates/editor/examples/decode-benchmark.rs @@ -1,4 +1,6 @@ use cap_rendering::decoder::{AsyncVideoDecoderHandle, spawn_decoder}; +use serde::Serialize; +use std::fs; use std::path::{Path, PathBuf}; use std::process::Command; use std::time::Instant; @@ -38,23 +40,44 @@ struct BenchmarkConfig { video_path: PathBuf, fps: u32, iterations: usize, + sequential_frames: usize, + random_samples: usize, + output_json: Option, } -#[derive(Debug, Default)] +#[derive(Debug, Default, Serialize)] struct BenchmarkResults { decoder_creation_ms: f64, sequential_decode_times_ms: Vec, + first_frame_decode_ms: f64, + startup_to_first_frame_ms: f64, + sequential_p50_ms: f64, + sequential_p95_ms: f64, + sequential_p99_ms: f64, sequential_fps: f64, sequential_failures: usize, seek_times_by_distance: Vec<(f32, f64)>, seek_failures: usize, random_access_times_ms: Vec, random_access_avg_ms: f64, + random_access_p50_ms: f64, + random_access_p95_ms: f64, + random_access_p99_ms: f64, random_access_failures: usize, cache_hits: usize, cache_misses: usize, } +#[derive(Debug, Serialize)] +struct BenchmarkOutput { + video_path: PathBuf, + fps: u32, + iterations: usize, + sequential_frames: usize, + random_samples: usize, + results: BenchmarkResults, +} + impl BenchmarkResults { fn print_report(&self) { println!("\n{}", "=".repeat(60)); @@ -96,6 +119,14 @@ impl BenchmarkResults { println!(" Avg decode time: {avg:.2}ms"); println!(" Min decode time: {min:.2}ms"); println!(" Max decode time: {max:.2}ms"); + println!(" P50 decode time: {:.2}ms", self.sequential_p50_ms); + println!(" P95 decode time: {:.2}ms", self.sequential_p95_ms); + println!(" P99 decode time: {:.2}ms", self.sequential_p99_ms); + println!(" First frame decode: {:.2}ms", self.first_frame_decode_ms); + println!( + " Startup to first frame: {:.2}ms", + self.startup_to_first_frame_ms + ); println!(" Effective FPS: {:.1}", self.sequential_fps); } println!(); @@ -138,18 +169,9 @@ impl BenchmarkResults { println!(" Avg access time: {avg:.2}ms"); println!(" Min access time: {min:.2}ms"); println!(" Max access time: {max:.2}ms"); - println!( - " P50: {:.2}ms", - percentile(&self.random_access_times_ms, 50.0) - ); - println!( - " P95: {:.2}ms", - percentile(&self.random_access_times_ms, 95.0) - ); - println!( - " P99: {:.2}ms", - percentile(&self.random_access_times_ms, 99.0) - ); + println!(" P50: {:.2}ms", self.random_access_p50_ms); + println!(" P95: {:.2}ms", self.random_access_p95_ms); + println!(" P99: {:.2}ms", self.random_access_p99_ms); } println!(); @@ -215,10 +237,13 @@ async fn benchmark_sequential_decode( fps: u32, frame_count: usize, start_time: f32, -) -> (Vec, f64, usize) { +) -> (Vec, f64, usize, f64, f64) { let mut times = Vec::with_capacity(frame_count); let mut failures = 0; let overall_start = Instant::now(); + let mut first_frame_decode_ms = 0.0; + let mut startup_to_first_frame_ms = 0.0; + let mut first_frame_captured = false; for i in 0..frame_count { let time = start_time + (i as f32 / fps as f32); @@ -227,6 +252,11 @@ async fn benchmark_sequential_decode( Some(_frame) => { let elapsed = start.elapsed(); times.push(elapsed.as_secs_f64() * 1000.0); + if !first_frame_captured { + first_frame_captured = true; + first_frame_decode_ms = elapsed.as_secs_f64() * 1000.0; + startup_to_first_frame_ms = overall_start.elapsed().as_secs_f64() * 1000.0; + } } None => { failures += 1; @@ -243,7 +273,13 @@ async fn benchmark_sequential_decode( 0.0 }; - (times, effective_fps, failures) + ( + times, + effective_fps, + failures, + first_frame_decode_ms, + startup_to_first_frame_ms, + ) } async fn benchmark_seek( @@ -308,6 +344,10 @@ async fn run_full_benchmark(config: BenchmarkConfig) -> BenchmarkResults { config.video_path.display() ); println!("FPS: {}, Iterations: {}", config.fps, config.iterations); + println!( + "Sequential frames: {}, Random samples: {}", + config.sequential_frames, config.random_samples + ); println!(); println!("[1/5] Benchmarking decoder creation..."); @@ -341,12 +381,20 @@ async fn run_full_benchmark(config: BenchmarkConfig) -> BenchmarkResults { println!("Detected video duration: {video_duration:.2}s"); println!(); - println!("[3/5] Benchmarking sequential decode (100 frames from start)..."); - let (seq_times, seq_fps, seq_failures) = - benchmark_sequential_decode(&decoder, config.fps, 100, 0.0).await; + println!( + "[3/5] Benchmarking sequential decode ({} frames from start)...", + config.sequential_frames + ); + let (seq_times, seq_fps, seq_failures, first_frame_decode_ms, startup_to_first_frame_ms) = + benchmark_sequential_decode(&decoder, config.fps, config.sequential_frames, 0.0).await; results.sequential_decode_times_ms = seq_times; results.sequential_fps = seq_fps; results.sequential_failures = seq_failures; + results.first_frame_decode_ms = first_frame_decode_ms; + results.startup_to_first_frame_ms = startup_to_first_frame_ms; + results.sequential_p50_ms = percentile(&results.sequential_decode_times_ms, 50.0); + results.sequential_p95_ms = percentile(&results.sequential_decode_times_ms, 95.0); + results.sequential_p99_ms = percentile(&results.sequential_decode_times_ms, 99.0); println!(" Done: {seq_fps:.1} effective FPS"); if seq_failures > 0 { println!(" Warning: {seq_failures} frames failed to decode"); @@ -370,9 +418,12 @@ async fn run_full_benchmark(config: BenchmarkConfig) -> BenchmarkResults { } } - println!("[5/5] Benchmarking random access (50 samples)..."); + println!( + "[5/5] Benchmarking random access ({} samples)...", + config.random_samples + ); let (random_times, random_failures) = - benchmark_random_access(&decoder, config.fps, video_duration, 50).await; + benchmark_random_access(&decoder, config.fps, video_duration, config.random_samples).await; results.random_access_times_ms = random_times; results.random_access_failures = random_failures; results.random_access_avg_ms = if results.random_access_times_ms.is_empty() { @@ -381,6 +432,9 @@ async fn run_full_benchmark(config: BenchmarkConfig) -> BenchmarkResults { results.random_access_times_ms.iter().sum::() / results.random_access_times_ms.len() as f64 }; + results.random_access_p50_ms = percentile(&results.random_access_times_ms, 50.0); + results.random_access_p95_ms = percentile(&results.random_access_times_ms, 95.0); + results.random_access_p99_ms = percentile(&results.random_access_times_ms, 99.0); println!(" Done: {:.2}ms avg", results.random_access_avg_ms); if random_failures > 0 { println!(" Warning: {random_failures} random accesses failed"); @@ -389,6 +443,53 @@ async fn run_full_benchmark(config: BenchmarkConfig) -> BenchmarkResults { results } +fn write_json_output(config: &BenchmarkConfig, results: &BenchmarkResults) { + let Some(output_path) = &config.output_json else { + return; + }; + + let output = BenchmarkOutput { + video_path: config.video_path.clone(), + fps: config.fps, + iterations: config.iterations, + sequential_frames: config.sequential_frames, + random_samples: config.random_samples, + results: BenchmarkResults { + decoder_creation_ms: results.decoder_creation_ms, + sequential_decode_times_ms: results.sequential_decode_times_ms.clone(), + first_frame_decode_ms: results.first_frame_decode_ms, + startup_to_first_frame_ms: results.startup_to_first_frame_ms, + sequential_p50_ms: results.sequential_p50_ms, + sequential_p95_ms: results.sequential_p95_ms, + sequential_p99_ms: results.sequential_p99_ms, + sequential_fps: results.sequential_fps, + sequential_failures: results.sequential_failures, + seek_times_by_distance: results.seek_times_by_distance.clone(), + seek_failures: results.seek_failures, + random_access_times_ms: results.random_access_times_ms.clone(), + random_access_avg_ms: results.random_access_avg_ms, + random_access_p50_ms: results.random_access_p50_ms, + random_access_p95_ms: results.random_access_p95_ms, + random_access_p99_ms: results.random_access_p99_ms, + random_access_failures: results.random_access_failures, + cache_hits: results.cache_hits, + cache_misses: results.cache_misses, + }, + }; + + match serde_json::to_string_pretty(&output) { + Ok(json) => match fs::write(output_path, json) { + Ok(()) => println!("Wrote benchmark JSON to {}", output_path.display()), + Err(error) => eprintln!( + "Failed to write benchmark JSON to {}: {}", + output_path.display(), + error + ), + }, + Err(error) => eprintln!("Failed to serialize benchmark JSON output: {}", error), + } +} + fn main() { let args: Vec = std::env::args().collect(); @@ -397,7 +498,7 @@ fn main() { .position(|a| a == "--video") .and_then(|i| args.get(i + 1)) .map(PathBuf::from) - .expect("Usage: decode-benchmark --video [--fps ] [--iterations ]"); + .expect("Usage: decode-benchmark --video [--fps ] [--iterations ] [--sequential-frames ] [--random-samples ] [--output-json ]"); let fps = args .iter() @@ -413,14 +514,38 @@ fn main() { .and_then(|s| s.parse().ok()) .unwrap_or(100); + let sequential_frames = args + .iter() + .position(|a| a == "--sequential-frames") + .and_then(|i| args.get(i + 1)) + .and_then(|s| s.parse().ok()) + .unwrap_or(100); + + let random_samples = args + .iter() + .position(|a| a == "--random-samples") + .and_then(|i| args.get(i + 1)) + .and_then(|s| s.parse().ok()) + .unwrap_or(50); + + let output_json = args + .iter() + .position(|a| a == "--output-json") + .and_then(|i| args.get(i + 1)) + .map(PathBuf::from); + let config = BenchmarkConfig { video_path, fps, iterations, + sequential_frames, + random_samples, + output_json, }; let rt = Runtime::new().expect("Failed to create Tokio runtime"); - let results = rt.block_on(run_full_benchmark(config)); + let results = rt.block_on(run_full_benchmark(config.clone())); results.print_report(); + write_json_output(&config, &results); } From 7606f363848086ff8904747cf0f1d42942e035aa Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 20:43:59 +0000 Subject: [PATCH 013/333] improve: support fragmented inputs in decode benchmark Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 3 + crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/examples/decode-benchmark.rs | 93 ++++++++++++++++++++++ 3 files changed, 97 insertions(+) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index da84ed7b3a..a3fc87dad6 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -68,6 +68,9 @@ cargo run -p cap-editor --example decode-benchmark -- --video /path/to/video.mp4 # Emit machine-readable JSON with startup/scrub metrics cargo run -p cap-editor --example decode-benchmark -- --video /path/to/video.mp4 --fps 60 --sequential-frames 180 --random-samples 120 --output-json /tmp/decode-benchmark.json + +# Fragmented segment input is supported by passing the display directory +cargo run -p cap-editor --example decode-benchmark -- --video /path/to/segment/display --fps 60 --output-json /tmp/decode-benchmark-fragmented.json ``` #### Combined Workflow (Recording → Playback) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index fa28454366..2a73ea5726 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -162,6 +162,7 @@ cargo run -p cap-recording --example playback-test-runner -- full 6. **Decode benchmark now supports machine-readable evidence output (2026-02-13)** - `decode-benchmark` supports `--output-json` for structured metric capture. - Added sequential frame and random sample count controls to scale benchmark depth per hardware class. + - Supports fragmented segment directories for duration-aware benchmarking. --- diff --git a/crates/editor/examples/decode-benchmark.rs b/crates/editor/examples/decode-benchmark.rs index 2ae11f207b..b7e74ce4c3 100644 --- a/crates/editor/examples/decode-benchmark.rs +++ b/crates/editor/examples/decode-benchmark.rs @@ -4,11 +4,16 @@ use std::fs; use std::path::{Path, PathBuf}; use std::process::Command; use std::time::Instant; +use std::time::{SystemTime, UNIX_EPOCH}; use tokio::runtime::Runtime; const DEFAULT_DURATION_SECS: f32 = 60.0; fn get_video_duration(path: &Path) -> f32 { + if path.is_dir() { + return get_fragmented_video_duration(path); + } + let output = Command::new("ffprobe") .args([ "-v", @@ -35,6 +40,94 @@ fn get_video_duration(path: &Path) -> f32 { } } +fn get_fragmented_video_duration(path: &Path) -> f32 { + let init_segment = path.join("init.mp4"); + if !init_segment.exists() { + eprintln!( + "Warning: Fragmented input {} missing init.mp4", + path.display() + ); + return DEFAULT_DURATION_SECS; + } + + let mut fragments: Vec = match fs::read_dir(path) { + Ok(entries) => entries + .filter_map(|entry| entry.ok()) + .map(|entry| entry.path()) + .filter(|entry| entry.extension().is_some_and(|ext| ext == "m4s")) + .collect(), + Err(error) => { + eprintln!( + "Warning: Failed to read fragmented directory {}: {}", + path.display(), + error + ); + return DEFAULT_DURATION_SECS; + } + }; + fragments.sort(); + + if fragments.is_empty() { + eprintln!( + "Warning: Fragmented input {} has no .m4s segments", + path.display() + ); + return DEFAULT_DURATION_SECS; + } + + let timestamp = SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|value| value.as_nanos()) + .unwrap_or(0); + let combined_path = + std::env::temp_dir().join(format!("cap-decode-benchmark-combined-{timestamp}.mp4")); + + let mut combined_data = match fs::read(&init_segment) { + Ok(data) => data, + Err(error) => { + eprintln!( + "Warning: Failed to read init segment {}: {}", + init_segment.display(), + error + ); + return DEFAULT_DURATION_SECS; + } + }; + + for fragment in fragments { + match fs::read(&fragment) { + Ok(data) => combined_data.extend(data), + Err(error) => { + eprintln!( + "Warning: Failed to read segment {}: {}", + fragment.display(), + error + ); + return DEFAULT_DURATION_SECS; + } + } + } + + if let Err(error) = fs::write(&combined_path, &combined_data) { + eprintln!( + "Warning: Failed to write combined fragmented video {}: {}", + combined_path.display(), + error + ); + return DEFAULT_DURATION_SECS; + } + + let duration = get_video_duration(&combined_path); + if let Err(error) = fs::remove_file(&combined_path) { + eprintln!( + "Warning: Failed to remove temporary combined file {}: {}", + combined_path.display(), + error + ); + } + duration +} + #[derive(Debug, Clone)] struct BenchmarkConfig { video_path: PathBuf, From 7f11b3d6fed47bdf0ddaab7d56cce0e900006077 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 20:43:59 +0000 Subject: [PATCH 014/333] improve: support fragmented inputs in decode benchmark Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 3 + crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/examples/decode-benchmark.rs | 93 ++++++++++++++++++++++ 3 files changed, 97 insertions(+) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index da84ed7b3a..a3fc87dad6 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -68,6 +68,9 @@ cargo run -p cap-editor --example decode-benchmark -- --video /path/to/video.mp4 # Emit machine-readable JSON with startup/scrub metrics cargo run -p cap-editor --example decode-benchmark -- --video /path/to/video.mp4 --fps 60 --sequential-frames 180 --random-samples 120 --output-json /tmp/decode-benchmark.json + +# Fragmented segment input is supported by passing the display directory +cargo run -p cap-editor --example decode-benchmark -- --video /path/to/segment/display --fps 60 --output-json /tmp/decode-benchmark-fragmented.json ``` #### Combined Workflow (Recording → Playback) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index fa28454366..2a73ea5726 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -162,6 +162,7 @@ cargo run -p cap-recording --example playback-test-runner -- full 6. **Decode benchmark now supports machine-readable evidence output (2026-02-13)** - `decode-benchmark` supports `--output-json` for structured metric capture. - Added sequential frame and random sample count controls to scale benchmark depth per hardware class. + - Supports fragmented segment directories for duration-aware benchmarking. --- diff --git a/crates/editor/examples/decode-benchmark.rs b/crates/editor/examples/decode-benchmark.rs index 2ae11f207b..b7e74ce4c3 100644 --- a/crates/editor/examples/decode-benchmark.rs +++ b/crates/editor/examples/decode-benchmark.rs @@ -4,11 +4,16 @@ use std::fs; use std::path::{Path, PathBuf}; use std::process::Command; use std::time::Instant; +use std::time::{SystemTime, UNIX_EPOCH}; use tokio::runtime::Runtime; const DEFAULT_DURATION_SECS: f32 = 60.0; fn get_video_duration(path: &Path) -> f32 { + if path.is_dir() { + return get_fragmented_video_duration(path); + } + let output = Command::new("ffprobe") .args([ "-v", @@ -35,6 +40,94 @@ fn get_video_duration(path: &Path) -> f32 { } } +fn get_fragmented_video_duration(path: &Path) -> f32 { + let init_segment = path.join("init.mp4"); + if !init_segment.exists() { + eprintln!( + "Warning: Fragmented input {} missing init.mp4", + path.display() + ); + return DEFAULT_DURATION_SECS; + } + + let mut fragments: Vec = match fs::read_dir(path) { + Ok(entries) => entries + .filter_map(|entry| entry.ok()) + .map(|entry| entry.path()) + .filter(|entry| entry.extension().is_some_and(|ext| ext == "m4s")) + .collect(), + Err(error) => { + eprintln!( + "Warning: Failed to read fragmented directory {}: {}", + path.display(), + error + ); + return DEFAULT_DURATION_SECS; + } + }; + fragments.sort(); + + if fragments.is_empty() { + eprintln!( + "Warning: Fragmented input {} has no .m4s segments", + path.display() + ); + return DEFAULT_DURATION_SECS; + } + + let timestamp = SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|value| value.as_nanos()) + .unwrap_or(0); + let combined_path = + std::env::temp_dir().join(format!("cap-decode-benchmark-combined-{timestamp}.mp4")); + + let mut combined_data = match fs::read(&init_segment) { + Ok(data) => data, + Err(error) => { + eprintln!( + "Warning: Failed to read init segment {}: {}", + init_segment.display(), + error + ); + return DEFAULT_DURATION_SECS; + } + }; + + for fragment in fragments { + match fs::read(&fragment) { + Ok(data) => combined_data.extend(data), + Err(error) => { + eprintln!( + "Warning: Failed to read segment {}: {}", + fragment.display(), + error + ); + return DEFAULT_DURATION_SECS; + } + } + } + + if let Err(error) = fs::write(&combined_path, &combined_data) { + eprintln!( + "Warning: Failed to write combined fragmented video {}: {}", + combined_path.display(), + error + ); + return DEFAULT_DURATION_SECS; + } + + let duration = get_video_duration(&combined_path); + if let Err(error) = fs::remove_file(&combined_path) { + eprintln!( + "Warning: Failed to remove temporary combined file {}: {}", + combined_path.display(), + error + ); + } + duration +} + #[derive(Debug, Clone)] struct BenchmarkConfig { video_path: PathBuf, From 630ce1ba9fab419a87f4c7c7c6f4be8d2d344bf5 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 20:46:11 +0000 Subject: [PATCH 015/333] improve: coalesce timeline seek commands during scrubbing Co-authored-by: Richie McIlroy --- .../src/routes/editor/Timeline/index.tsx | 45 ++++++++++++++++--- crates/editor/PLAYBACK-FINDINGS.md | 6 +++ 2 files changed, 44 insertions(+), 7 deletions(-) diff --git a/apps/desktop/src/routes/editor/Timeline/index.tsx b/apps/desktop/src/routes/editor/Timeline/index.tsx index da839931ad..465ff5c596 100644 --- a/apps/desktop/src/routes/editor/Timeline/index.tsx +++ b/apps/desktop/src/routes/editor/Timeline/index.tsx @@ -220,6 +220,9 @@ export function Timeline() { let pendingScrollDelta = 0; let scrollRafId: number | null = null; + let pendingSeekFrame: number | null = null; + let seekRafId: number | null = null; + let seekInFlight = false; function flushPendingZoom() { if (pendingZoomDelta === 0 || pendingZoomOrigin === null) { @@ -265,7 +268,40 @@ export function Timeline() { } } - async function handleUpdatePlayhead(e: MouseEvent) { + function scheduleSeek(frameNumber: number) { + pendingSeekFrame = frameNumber; + if (seekRafId === null) { + seekRafId = requestAnimationFrame(flushPendingSeek); + } + } + + async function flushPendingSeek() { + seekRafId = null; + + if (seekInFlight || pendingSeekFrame === null) { + if (pendingSeekFrame !== null && seekRafId === null) { + seekRafId = requestAnimationFrame(flushPendingSeek); + } + return; + } + + const frameNumber = pendingSeekFrame; + pendingSeekFrame = null; + seekInFlight = true; + + try { + await commands.seekTo(frameNumber); + } catch (err) { + console.error("Failed to seek timeline playhead:", err); + } finally { + seekInFlight = false; + if (pendingSeekFrame !== null && seekRafId === null) { + seekRafId = requestAnimationFrame(flushPendingSeek); + } + } + } + + function handleUpdatePlayhead(e: MouseEvent) { const { left } = timelineBounds; if ( zoomSegmentDragState.type !== "moving" && @@ -278,12 +314,7 @@ export function Timeline() { secsPerPixel() * (e.clientX - left) + transform().position; const newTime = Math.min(Math.max(0, rawTime), totalDuration()); const targetFrame = Math.round(newTime * FPS); - - try { - await commands.seekTo(targetFrame); - } catch (err) { - console.error("Failed to seek timeline playhead:", err); - } + scheduleSeek(targetFrame); setEditorState("playbackTime", newTime); } diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 2a73ea5726..5d27c2c58a 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -164,6 +164,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Added sequential frame and random sample count controls to scale benchmark depth per hardware class. - Supports fragmented segment directories for duration-aware benchmarking. +7. **Timeline seek dispatch now coalesces during drag (2026-02-13)** + - Frontend seek calls are requestAnimationFrame-batched. + - Only the latest pending seek frame is sent while an async seek is in-flight. + --- ## Root Cause Analysis Archive @@ -259,6 +263,7 @@ Decoder Pipeline: 8. Extended playback benchmark tooling with scrub mode and startup latency metrics. 9. Added playback runtime startup telemetry logs for first frame and audio callback bring-up. 10. Enhanced decode benchmark example with structured JSON output and configurable sample depth. +11. Added timeline seek dispatch coalescing to reduce seek command storms during drag. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -270,6 +275,7 @@ Decoder Pipeline: - `crates/editor/PLAYBACK-BENCHMARKS.md`: updated benchmark reference and metric definitions. - `crates/editor/src/playback.rs`: added first-render and audio-callback startup latency logging. - `crates/editor/examples/decode-benchmark.rs`: added `--output-json`, startup metrics, and configurable sequential/random sampling. +- `apps/desktop/src/routes/editor/Timeline/index.tsx`: added requestAnimationFrame-based seek coalescing with in-flight protection. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. From a702438ed725d4e1ff64240d20c49dd0f0d9e76f Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 20:46:11 +0000 Subject: [PATCH 016/333] improve: coalesce timeline seek commands during scrubbing Co-authored-by: Richie McIlroy --- .../src/routes/editor/Timeline/index.tsx | 45 ++++++++++++++++--- crates/editor/PLAYBACK-FINDINGS.md | 6 +++ 2 files changed, 44 insertions(+), 7 deletions(-) diff --git a/apps/desktop/src/routes/editor/Timeline/index.tsx b/apps/desktop/src/routes/editor/Timeline/index.tsx index da839931ad..465ff5c596 100644 --- a/apps/desktop/src/routes/editor/Timeline/index.tsx +++ b/apps/desktop/src/routes/editor/Timeline/index.tsx @@ -220,6 +220,9 @@ export function Timeline() { let pendingScrollDelta = 0; let scrollRafId: number | null = null; + let pendingSeekFrame: number | null = null; + let seekRafId: number | null = null; + let seekInFlight = false; function flushPendingZoom() { if (pendingZoomDelta === 0 || pendingZoomOrigin === null) { @@ -265,7 +268,40 @@ export function Timeline() { } } - async function handleUpdatePlayhead(e: MouseEvent) { + function scheduleSeek(frameNumber: number) { + pendingSeekFrame = frameNumber; + if (seekRafId === null) { + seekRafId = requestAnimationFrame(flushPendingSeek); + } + } + + async function flushPendingSeek() { + seekRafId = null; + + if (seekInFlight || pendingSeekFrame === null) { + if (pendingSeekFrame !== null && seekRafId === null) { + seekRafId = requestAnimationFrame(flushPendingSeek); + } + return; + } + + const frameNumber = pendingSeekFrame; + pendingSeekFrame = null; + seekInFlight = true; + + try { + await commands.seekTo(frameNumber); + } catch (err) { + console.error("Failed to seek timeline playhead:", err); + } finally { + seekInFlight = false; + if (pendingSeekFrame !== null && seekRafId === null) { + seekRafId = requestAnimationFrame(flushPendingSeek); + } + } + } + + function handleUpdatePlayhead(e: MouseEvent) { const { left } = timelineBounds; if ( zoomSegmentDragState.type !== "moving" && @@ -278,12 +314,7 @@ export function Timeline() { secsPerPixel() * (e.clientX - left) + transform().position; const newTime = Math.min(Math.max(0, rawTime), totalDuration()); const targetFrame = Math.round(newTime * FPS); - - try { - await commands.seekTo(targetFrame); - } catch (err) { - console.error("Failed to seek timeline playhead:", err); - } + scheduleSeek(targetFrame); setEditorState("playbackTime", newTime); } diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 2a73ea5726..5d27c2c58a 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -164,6 +164,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Added sequential frame and random sample count controls to scale benchmark depth per hardware class. - Supports fragmented segment directories for duration-aware benchmarking. +7. **Timeline seek dispatch now coalesces during drag (2026-02-13)** + - Frontend seek calls are requestAnimationFrame-batched. + - Only the latest pending seek frame is sent while an async seek is in-flight. + --- ## Root Cause Analysis Archive @@ -259,6 +263,7 @@ Decoder Pipeline: 8. Extended playback benchmark tooling with scrub mode and startup latency metrics. 9. Added playback runtime startup telemetry logs for first frame and audio callback bring-up. 10. Enhanced decode benchmark example with structured JSON output and configurable sample depth. +11. Added timeline seek dispatch coalescing to reduce seek command storms during drag. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -270,6 +275,7 @@ Decoder Pipeline: - `crates/editor/PLAYBACK-BENCHMARKS.md`: updated benchmark reference and metric definitions. - `crates/editor/src/playback.rs`: added first-render and audio-callback startup latency logging. - `crates/editor/examples/decode-benchmark.rs`: added `--output-json`, startup metrics, and configurable sequential/random sampling. +- `apps/desktop/src/routes/editor/Timeline/index.tsx`: added requestAnimationFrame-based seek coalescing with in-flight protection. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. From 4a1d39d423cb1fdb39c2b0b0ca20e74f0035eeaf Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 21:00:59 +0000 Subject: [PATCH 017/333] improve: clean up scheduled timeline raf tasks on unmount Co-authored-by: Richie McIlroy --- apps/desktop/src/routes/editor/Timeline/index.tsx | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/apps/desktop/src/routes/editor/Timeline/index.tsx b/apps/desktop/src/routes/editor/Timeline/index.tsx index 465ff5c596..7726bb4258 100644 --- a/apps/desktop/src/routes/editor/Timeline/index.tsx +++ b/apps/desktop/src/routes/editor/Timeline/index.tsx @@ -7,6 +7,7 @@ import { createSignal, Index, type JSX, + onCleanup, onMount, Show, } from "solid-js"; @@ -224,6 +225,12 @@ export function Timeline() { let seekRafId: number | null = null; let seekInFlight = false; + onCleanup(() => { + if (zoomRafId !== null) cancelAnimationFrame(zoomRafId); + if (scrollRafId !== null) cancelAnimationFrame(scrollRafId); + if (seekRafId !== null) cancelAnimationFrame(seekRafId); + }); + function flushPendingZoom() { if (pendingZoomDelta === 0 || pendingZoomOrigin === null) { zoomRafId = null; From d96ae23b6d9101c12763c6060cf3337338ccd5ef Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:00:59 +0000 Subject: [PATCH 018/333] improve: clean up scheduled timeline raf tasks on unmount Co-authored-by: Richie McIlroy --- apps/desktop/src/routes/editor/Timeline/index.tsx | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/apps/desktop/src/routes/editor/Timeline/index.tsx b/apps/desktop/src/routes/editor/Timeline/index.tsx index 465ff5c596..7726bb4258 100644 --- a/apps/desktop/src/routes/editor/Timeline/index.tsx +++ b/apps/desktop/src/routes/editor/Timeline/index.tsx @@ -7,6 +7,7 @@ import { createSignal, Index, type JSX, + onCleanup, onMount, Show, } from "solid-js"; @@ -224,6 +225,12 @@ export function Timeline() { let seekRafId: number | null = null; let seekInFlight = false; + onCleanup(() => { + if (zoomRafId !== null) cancelAnimationFrame(zoomRafId); + if (scrollRafId !== null) cancelAnimationFrame(scrollRafId); + if (seekRafId !== null) cancelAnimationFrame(seekRafId); + }); + function flushPendingZoom() { if (pendingZoomDelta === 0 || pendingZoomOrigin === null) { zoomRafId = null; From 93cd1a23a7d1a75e9f6f8850925320d2a538eeb8 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 21:02:56 +0000 Subject: [PATCH 019/333] improve: add json export support to playback benchmark runner Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 3 + crates/editor/PLAYBACK-FINDINGS.md | 6 + .../examples/playback-test-runner.rs | 141 +++++++++++++----- 3 files changed, 114 insertions(+), 36 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index a3fc87dad6..6c7126b72b 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -55,6 +55,9 @@ cargo run -p cap-recording --example playback-test-runner -- --recording-path /p # List available recordings cargo run -p cap-recording --example playback-test-runner -- list + +# Emit machine-readable JSON report +cargo run -p cap-recording --example playback-test-runner -- full --json-output /tmp/playback-benchmark.json ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 5d27c2c58a..abc88dd919 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -168,6 +168,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Frontend seek calls are requestAnimationFrame-batched. - Only the latest pending seek frame is sent while an async seek is in-flight. +8. **Playback benchmark runner now supports JSON evidence export (2026-02-13)** + - `playback-test-runner` supports `--json-output` for structured report emission. + - JSON output includes command metadata, system info, summary, and per-recording test detail. + --- ## Root Cause Analysis Archive @@ -264,6 +268,7 @@ Decoder Pipeline: 9. Added playback runtime startup telemetry logs for first frame and audio callback bring-up. 10. Enhanced decode benchmark example with structured JSON output and configurable sample depth. 11. Added timeline seek dispatch coalescing to reduce seek command storms during drag. +12. Added JSON report output support to playback-test-runner for benchmark evidence collection. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -276,6 +281,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: added first-render and audio-callback startup latency logging. - `crates/editor/examples/decode-benchmark.rs`: added `--output-json`, startup metrics, and configurable sequential/random sampling. - `apps/desktop/src/routes/editor/Timeline/index.tsx`: added requestAnimationFrame-based seek coalescing with in-flight protection. +- `crates/recording/examples/playback-test-runner.rs`: added `--json-output` to emit structured benchmark reports. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/recording/examples/playback-test-runner.rs b/crates/recording/examples/playback-test-runner.rs index 9c7bff6110..abebc6cd14 100644 --- a/crates/recording/examples/playback-test-runner.rs +++ b/crates/recording/examples/playback-test-runner.rs @@ -4,6 +4,7 @@ use cap_project::{RecordingMeta, RecordingMetaInner, StudioRecordingMeta}; use cap_rendering::decoder::spawn_decoder; use chrono::{Local, Utc}; use clap::{Parser, Subcommand}; +use serde::Serialize; use std::{ fs, path::{Path, PathBuf}, @@ -44,6 +45,9 @@ struct Cli { #[arg(long, global = true)] benchmark_output: bool, + #[arg(long, global = true)] + json_output: Option, + #[arg(long, global = true)] notes: Option, } @@ -65,7 +69,7 @@ const SCRUB_SEEK_WARNING_MS: f64 = 40.0; const AUDIO_VIDEO_SYNC_TOLERANCE_MS: f64 = 100.0; const CAMERA_SYNC_TOLERANCE_MS: f64 = 100.0; -#[derive(Debug, Clone, Default)] +#[derive(Debug, Clone, Default, Serialize)] struct DecoderTestResult { passed: bool, decoder_type: String, @@ -77,7 +81,7 @@ struct DecoderTestResult { errors: Vec, } -#[derive(Debug, Clone, Default)] +#[derive(Debug, Clone, Default, Serialize)] struct PlaybackTestResult { passed: bool, segment_index: usize, @@ -100,7 +104,7 @@ struct PlaybackTestResult { errors: Vec, } -#[derive(Debug, Clone, Default)] +#[derive(Debug, Clone, Default, Serialize)] struct ScrubTestResult { passed: bool, segment_index: usize, @@ -116,7 +120,7 @@ struct ScrubTestResult { errors: Vec, } -#[derive(Debug, Clone, Default)] +#[derive(Debug, Clone, Default, Serialize)] struct AudioSyncTestResult { passed: bool, segment_index: usize, @@ -134,7 +138,7 @@ struct AudioSyncTestResult { errors: Vec, } -#[derive(Debug, Clone, Default)] +#[derive(Debug, Clone, Default, Serialize)] struct CameraSyncTestResult { passed: bool, segment_index: usize, @@ -150,7 +154,7 @@ struct CameraSyncTestResult { errors: Vec, } -#[derive(Debug, Clone, Default)] +#[derive(Debug, Clone, Default, Serialize)] struct RecordingTestReport { recording_path: PathBuf, recording_name: String, @@ -1010,13 +1014,31 @@ async fn run_tests_on_recording( Ok(report) } -#[derive(Debug)] +#[derive(Debug, Clone, Serialize)] struct SystemInfo { os: String, arch: String, cpu: String, } +#[derive(Debug, Serialize)] +struct JsonBenchmarkSummary { + total_recordings: usize, + passed_recordings: usize, + failed_recordings: usize, +} + +#[derive(Debug, Serialize)] +struct JsonBenchmarkOutput { + generated_at_utc: String, + local_time: String, + command: String, + notes: Option, + system: SystemInfo, + summary: JsonBenchmarkSummary, + reports: Vec, +} + impl SystemInfo { fn collect() -> Self { let mut sys = System::new_all(); @@ -1380,6 +1402,41 @@ fn write_benchmark_to_file(benchmark_md: &str) -> anyhow::Result<()> { Ok(()) } +fn write_json_output_to_file( + output_path: &Path, + reports: &[RecordingTestReport], + notes: Option<&str>, + command: &str, +) -> anyhow::Result<()> { + let passed = reports.iter().filter(|r| r.overall_passed).count(); + let total = reports.len(); + let failed = total.saturating_sub(passed); + + let output = JsonBenchmarkOutput { + generated_at_utc: Utc::now().format("%Y-%m-%d %H:%M:%S UTC").to_string(), + local_time: Local::now().format("%Y-%m-%d %H:%M:%S").to_string(), + command: command.to_string(), + notes: notes.map(ToString::to_string), + system: SystemInfo::collect(), + summary: JsonBenchmarkSummary { + total_recordings: total, + passed_recordings: passed, + failed_recordings: failed, + }, + reports: reports.to_vec(), + }; + + let json = serde_json::to_string_pretty(&output)?; + fs::write(output_path, json)?; + + println!( + "\n✅ JSON benchmark results written to {}", + output_path.display() + ); + + Ok(()) +} + fn print_summary(reports: &[RecordingTestReport]) { println!("\n{}", "=".repeat(70)); println!("PLAYBACK TEST SUMMARY"); @@ -1423,6 +1480,18 @@ fn print_summary(reports: &[RecordingTestReport]) { println!(); } +fn command_name(command: Option<&Commands>) -> &'static str { + match command { + Some(Commands::Decoder) => "decoder", + Some(Commands::Playback) => "playback", + Some(Commands::Scrub) => "scrub", + Some(Commands::AudioSync) => "audio-sync", + Some(Commands::CameraSync) => "camera-sync", + Some(Commands::Full) | None => "full", + Some(Commands::List) => "list", + } +} + #[tokio::main] async fn main() -> anyhow::Result<()> { tracing_subscriber::registry() @@ -1434,7 +1503,7 @@ async fn main() -> anyhow::Result<()> { let cli = Cli::parse(); - if let Some(Commands::List) = cli.command { + if matches!(cli.command.as_ref(), Some(Commands::List)) { let recordings = discover_recordings(&cli.input_dir); if recordings.is_empty() { println!("No recordings found in {}", cli.input_dir.display()); @@ -1468,16 +1537,16 @@ async fn main() -> anyhow::Result<()> { return Ok(()); } - let (run_decoder, run_playback, run_scrub, run_audio_sync, run_camera_sync) = match cli.command - { - Some(Commands::Decoder) => (true, false, false, false, false), - Some(Commands::Playback) => (false, true, false, false, false), - Some(Commands::Scrub) => (false, false, true, false, false), - Some(Commands::AudioSync) => (false, false, false, true, false), - Some(Commands::CameraSync) => (false, false, false, false, true), - Some(Commands::Full) | None => (true, true, true, true, true), - Some(Commands::List) => unreachable!(), - }; + let (run_decoder, run_playback, run_scrub, run_audio_sync, run_camera_sync) = + match cli.command.as_ref() { + Some(Commands::Decoder) => (true, false, false, false, false), + Some(Commands::Playback) => (false, true, false, false, false), + Some(Commands::Scrub) => (false, false, true, false, false), + Some(Commands::AudioSync) => (false, false, false, true, false), + Some(Commands::CameraSync) => (false, false, false, false, true), + Some(Commands::Full) | None => (true, true, true, true, true), + Some(Commands::List) => unreachable!(), + }; println!("\nCap Playback Test Runner"); println!("{}", "=".repeat(40)); @@ -1517,25 +1586,17 @@ async fn main() -> anyhow::Result<()> { print_summary(&reports); - if cli.benchmark_output { - let command = format!( - "cargo run -p cap-recording --example playback-test-runner -- {} --fps {}{}", - match cli.command { - Some(Commands::Decoder) => "decoder", - Some(Commands::Playback) => "playback", - Some(Commands::Scrub) => "scrub", - Some(Commands::AudioSync) => "audio-sync", - Some(Commands::CameraSync) => "camera-sync", - Some(Commands::Full) | None => "full", - Some(Commands::List) => "list", - }, - cli.fps, - cli.recording_path - .as_ref() - .map(|p| format!(" --recording-path {}", p.display())) - .unwrap_or_default(), - ); + let command = format!( + "cargo run -p cap-recording --example playback-test-runner -- {} --fps {}{}", + command_name(cli.command.as_ref()), + cli.fps, + cli.recording_path + .as_ref() + .map(|p| format!(" --recording-path {}", p.display())) + .unwrap_or_default(), + ); + if cli.benchmark_output { let benchmark_md = generate_benchmark_markdown(&reports, cli.notes.as_deref(), command.trim()); @@ -1544,6 +1605,14 @@ async fn main() -> anyhow::Result<()> { } } + if let Some(output_path) = &cli.json_output { + if let Err(e) = + write_json_output_to_file(output_path, &reports, cli.notes.as_deref(), command.trim()) + { + tracing::error!("Failed to write JSON benchmark results: {}", e); + } + } + let failed = reports.iter().filter(|r| !r.overall_passed).count(); std::process::exit(if failed > 0 { 1 } else { 0 }); } From f0cc73062923ce7882e0cb150ce7cbce54406813 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:02:56 +0000 Subject: [PATCH 020/333] improve: add json export support to playback benchmark runner Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 3 + crates/editor/PLAYBACK-FINDINGS.md | 6 + .../examples/playback-test-runner.rs | 141 +++++++++++++----- 3 files changed, 114 insertions(+), 36 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index a3fc87dad6..6c7126b72b 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -55,6 +55,9 @@ cargo run -p cap-recording --example playback-test-runner -- --recording-path /p # List available recordings cargo run -p cap-recording --example playback-test-runner -- list + +# Emit machine-readable JSON report +cargo run -p cap-recording --example playback-test-runner -- full --json-output /tmp/playback-benchmark.json ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 5d27c2c58a..abc88dd919 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -168,6 +168,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Frontend seek calls are requestAnimationFrame-batched. - Only the latest pending seek frame is sent while an async seek is in-flight. +8. **Playback benchmark runner now supports JSON evidence export (2026-02-13)** + - `playback-test-runner` supports `--json-output` for structured report emission. + - JSON output includes command metadata, system info, summary, and per-recording test detail. + --- ## Root Cause Analysis Archive @@ -264,6 +268,7 @@ Decoder Pipeline: 9. Added playback runtime startup telemetry logs for first frame and audio callback bring-up. 10. Enhanced decode benchmark example with structured JSON output and configurable sample depth. 11. Added timeline seek dispatch coalescing to reduce seek command storms during drag. +12. Added JSON report output support to playback-test-runner for benchmark evidence collection. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -276,6 +281,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: added first-render and audio-callback startup latency logging. - `crates/editor/examples/decode-benchmark.rs`: added `--output-json`, startup metrics, and configurable sequential/random sampling. - `apps/desktop/src/routes/editor/Timeline/index.tsx`: added requestAnimationFrame-based seek coalescing with in-flight protection. +- `crates/recording/examples/playback-test-runner.rs`: added `--json-output` to emit structured benchmark reports. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/recording/examples/playback-test-runner.rs b/crates/recording/examples/playback-test-runner.rs index 9c7bff6110..abebc6cd14 100644 --- a/crates/recording/examples/playback-test-runner.rs +++ b/crates/recording/examples/playback-test-runner.rs @@ -4,6 +4,7 @@ use cap_project::{RecordingMeta, RecordingMetaInner, StudioRecordingMeta}; use cap_rendering::decoder::spawn_decoder; use chrono::{Local, Utc}; use clap::{Parser, Subcommand}; +use serde::Serialize; use std::{ fs, path::{Path, PathBuf}, @@ -44,6 +45,9 @@ struct Cli { #[arg(long, global = true)] benchmark_output: bool, + #[arg(long, global = true)] + json_output: Option, + #[arg(long, global = true)] notes: Option, } @@ -65,7 +69,7 @@ const SCRUB_SEEK_WARNING_MS: f64 = 40.0; const AUDIO_VIDEO_SYNC_TOLERANCE_MS: f64 = 100.0; const CAMERA_SYNC_TOLERANCE_MS: f64 = 100.0; -#[derive(Debug, Clone, Default)] +#[derive(Debug, Clone, Default, Serialize)] struct DecoderTestResult { passed: bool, decoder_type: String, @@ -77,7 +81,7 @@ struct DecoderTestResult { errors: Vec, } -#[derive(Debug, Clone, Default)] +#[derive(Debug, Clone, Default, Serialize)] struct PlaybackTestResult { passed: bool, segment_index: usize, @@ -100,7 +104,7 @@ struct PlaybackTestResult { errors: Vec, } -#[derive(Debug, Clone, Default)] +#[derive(Debug, Clone, Default, Serialize)] struct ScrubTestResult { passed: bool, segment_index: usize, @@ -116,7 +120,7 @@ struct ScrubTestResult { errors: Vec, } -#[derive(Debug, Clone, Default)] +#[derive(Debug, Clone, Default, Serialize)] struct AudioSyncTestResult { passed: bool, segment_index: usize, @@ -134,7 +138,7 @@ struct AudioSyncTestResult { errors: Vec, } -#[derive(Debug, Clone, Default)] +#[derive(Debug, Clone, Default, Serialize)] struct CameraSyncTestResult { passed: bool, segment_index: usize, @@ -150,7 +154,7 @@ struct CameraSyncTestResult { errors: Vec, } -#[derive(Debug, Clone, Default)] +#[derive(Debug, Clone, Default, Serialize)] struct RecordingTestReport { recording_path: PathBuf, recording_name: String, @@ -1010,13 +1014,31 @@ async fn run_tests_on_recording( Ok(report) } -#[derive(Debug)] +#[derive(Debug, Clone, Serialize)] struct SystemInfo { os: String, arch: String, cpu: String, } +#[derive(Debug, Serialize)] +struct JsonBenchmarkSummary { + total_recordings: usize, + passed_recordings: usize, + failed_recordings: usize, +} + +#[derive(Debug, Serialize)] +struct JsonBenchmarkOutput { + generated_at_utc: String, + local_time: String, + command: String, + notes: Option, + system: SystemInfo, + summary: JsonBenchmarkSummary, + reports: Vec, +} + impl SystemInfo { fn collect() -> Self { let mut sys = System::new_all(); @@ -1380,6 +1402,41 @@ fn write_benchmark_to_file(benchmark_md: &str) -> anyhow::Result<()> { Ok(()) } +fn write_json_output_to_file( + output_path: &Path, + reports: &[RecordingTestReport], + notes: Option<&str>, + command: &str, +) -> anyhow::Result<()> { + let passed = reports.iter().filter(|r| r.overall_passed).count(); + let total = reports.len(); + let failed = total.saturating_sub(passed); + + let output = JsonBenchmarkOutput { + generated_at_utc: Utc::now().format("%Y-%m-%d %H:%M:%S UTC").to_string(), + local_time: Local::now().format("%Y-%m-%d %H:%M:%S").to_string(), + command: command.to_string(), + notes: notes.map(ToString::to_string), + system: SystemInfo::collect(), + summary: JsonBenchmarkSummary { + total_recordings: total, + passed_recordings: passed, + failed_recordings: failed, + }, + reports: reports.to_vec(), + }; + + let json = serde_json::to_string_pretty(&output)?; + fs::write(output_path, json)?; + + println!( + "\n✅ JSON benchmark results written to {}", + output_path.display() + ); + + Ok(()) +} + fn print_summary(reports: &[RecordingTestReport]) { println!("\n{}", "=".repeat(70)); println!("PLAYBACK TEST SUMMARY"); @@ -1423,6 +1480,18 @@ fn print_summary(reports: &[RecordingTestReport]) { println!(); } +fn command_name(command: Option<&Commands>) -> &'static str { + match command { + Some(Commands::Decoder) => "decoder", + Some(Commands::Playback) => "playback", + Some(Commands::Scrub) => "scrub", + Some(Commands::AudioSync) => "audio-sync", + Some(Commands::CameraSync) => "camera-sync", + Some(Commands::Full) | None => "full", + Some(Commands::List) => "list", + } +} + #[tokio::main] async fn main() -> anyhow::Result<()> { tracing_subscriber::registry() @@ -1434,7 +1503,7 @@ async fn main() -> anyhow::Result<()> { let cli = Cli::parse(); - if let Some(Commands::List) = cli.command { + if matches!(cli.command.as_ref(), Some(Commands::List)) { let recordings = discover_recordings(&cli.input_dir); if recordings.is_empty() { println!("No recordings found in {}", cli.input_dir.display()); @@ -1468,16 +1537,16 @@ async fn main() -> anyhow::Result<()> { return Ok(()); } - let (run_decoder, run_playback, run_scrub, run_audio_sync, run_camera_sync) = match cli.command - { - Some(Commands::Decoder) => (true, false, false, false, false), - Some(Commands::Playback) => (false, true, false, false, false), - Some(Commands::Scrub) => (false, false, true, false, false), - Some(Commands::AudioSync) => (false, false, false, true, false), - Some(Commands::CameraSync) => (false, false, false, false, true), - Some(Commands::Full) | None => (true, true, true, true, true), - Some(Commands::List) => unreachable!(), - }; + let (run_decoder, run_playback, run_scrub, run_audio_sync, run_camera_sync) = + match cli.command.as_ref() { + Some(Commands::Decoder) => (true, false, false, false, false), + Some(Commands::Playback) => (false, true, false, false, false), + Some(Commands::Scrub) => (false, false, true, false, false), + Some(Commands::AudioSync) => (false, false, false, true, false), + Some(Commands::CameraSync) => (false, false, false, false, true), + Some(Commands::Full) | None => (true, true, true, true, true), + Some(Commands::List) => unreachable!(), + }; println!("\nCap Playback Test Runner"); println!("{}", "=".repeat(40)); @@ -1517,25 +1586,17 @@ async fn main() -> anyhow::Result<()> { print_summary(&reports); - if cli.benchmark_output { - let command = format!( - "cargo run -p cap-recording --example playback-test-runner -- {} --fps {}{}", - match cli.command { - Some(Commands::Decoder) => "decoder", - Some(Commands::Playback) => "playback", - Some(Commands::Scrub) => "scrub", - Some(Commands::AudioSync) => "audio-sync", - Some(Commands::CameraSync) => "camera-sync", - Some(Commands::Full) | None => "full", - Some(Commands::List) => "list", - }, - cli.fps, - cli.recording_path - .as_ref() - .map(|p| format!(" --recording-path {}", p.display())) - .unwrap_or_default(), - ); + let command = format!( + "cargo run -p cap-recording --example playback-test-runner -- {} --fps {}{}", + command_name(cli.command.as_ref()), + cli.fps, + cli.recording_path + .as_ref() + .map(|p| format!(" --recording-path {}", p.display())) + .unwrap_or_default(), + ); + if cli.benchmark_output { let benchmark_md = generate_benchmark_markdown(&reports, cli.notes.as_deref(), command.trim()); @@ -1544,6 +1605,14 @@ async fn main() -> anyhow::Result<()> { } } + if let Some(output_path) = &cli.json_output { + if let Err(e) = + write_json_output_to_file(output_path, &reports, cli.notes.as_deref(), command.trim()) + { + tracing::error!("Failed to write JSON benchmark results: {}", e); + } + } + let failed = reports.iter().filter(|r| !r.overall_passed).count(); std::process::exit(if failed > 0 { 1 } else { 0 }); } From 6c265bb2eea2f15cfee94590b1559f8275cd63f0 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 21:04:31 +0000 Subject: [PATCH 021/333] improve: add playback benchmark json aggregation script Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 6 + crates/editor/PLAYBACK-FINDINGS.md | 6 + scripts/aggregate-playback-benchmarks.js | 248 +++++++++++++++++++++++ 3 files changed, 260 insertions(+) create mode 100644 scripts/aggregate-playback-benchmarks.js diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 6c7126b72b..a46e6ab281 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -60,6 +60,12 @@ cargo run -p cap-recording --example playback-test-runner -- list cargo run -p cap-recording --example playback-test-runner -- full --json-output /tmp/playback-benchmark.json ``` +Aggregate JSON outputs from multiple machines: + +```bash +node scripts/aggregate-playback-benchmarks.js --input /path/to/json-results --output /tmp/playback-benchmark-aggregate.md +``` + #### Decode Performance Benchmark ```bash diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index abc88dd919..b519641ecf 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -172,6 +172,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - `playback-test-runner` supports `--json-output` for structured report emission. - JSON output includes command metadata, system info, summary, and per-recording test detail. +9. **Added JSON aggregate utility for cross-platform benchmark collation (2026-02-13)** + - `scripts/aggregate-playback-benchmarks.js` builds a markdown table from multiple JSON outputs. + - Aggregates platform/gpu/scenario-tagged runs for matrix reporting. + --- ## Root Cause Analysis Archive @@ -269,6 +273,7 @@ Decoder Pipeline: 10. Enhanced decode benchmark example with structured JSON output and configurable sample depth. 11. Added timeline seek dispatch coalescing to reduce seek command storms during drag. 12. Added JSON report output support to playback-test-runner for benchmark evidence collection. +13. Added cross-platform benchmark JSON aggregation utility script. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -282,6 +287,7 @@ Decoder Pipeline: - `crates/editor/examples/decode-benchmark.rs`: added `--output-json`, startup metrics, and configurable sequential/random sampling. - `apps/desktop/src/routes/editor/Timeline/index.tsx`: added requestAnimationFrame-based seek coalescing with in-flight protection. - `crates/recording/examples/playback-test-runner.rs`: added `--json-output` to emit structured benchmark reports. +- `scripts/aggregate-playback-benchmarks.js`: added markdown aggregation for multiple playback benchmark JSON artifacts. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/scripts/aggregate-playback-benchmarks.js b/scripts/aggregate-playback-benchmarks.js new file mode 100644 index 0000000000..d5ee7fe7ec --- /dev/null +++ b/scripts/aggregate-playback-benchmarks.js @@ -0,0 +1,248 @@ +#!/usr/bin/env node + +import fs from "node:fs"; +import path from "node:path"; + +function parseArgs(argv) { + const inputs = []; + let output = null; + let help = false; + + for (let i = 2; i < argv.length; i++) { + const arg = argv[i]; + if (arg === "--help" || arg === "-h") { + help = true; + continue; + } + if (arg === "--input" || arg === "-i") { + const value = argv[i + 1]; + if (!value) { + throw new Error("Missing value for --input"); + } + inputs.push(path.resolve(value)); + i += 1; + continue; + } + if (arg === "--output" || arg === "-o") { + const value = argv[i + 1]; + if (!value) { + throw new Error("Missing value for --output"); + } + output = path.resolve(value); + i += 1; + continue; + } + throw new Error(`Unknown argument: ${arg}`); + } + + return { inputs, output, help }; +} + +function collectJsonFiles(targetPath) { + if (!fs.existsSync(targetPath)) { + throw new Error(`Input path does not exist: ${targetPath}`); + } + + const stats = fs.statSync(targetPath); + if (stats.isFile()) { + return targetPath.endsWith(".json") ? [targetPath] : []; + } + + const files = []; + const entries = fs.readdirSync(targetPath, { withFileTypes: true }); + for (const entry of entries) { + const fullPath = path.join(targetPath, entry.name); + if (entry.isDirectory()) { + files.push(...collectJsonFiles(fullPath)); + } else if (entry.isFile() && entry.name.endsWith(".json")) { + files.push(fullPath); + } + } + return files; +} + +function parseNotes(notes) { + if (!notes) { + return {}; + } + const parsed = {}; + for (const token of notes.split(/\s+/)) { + if (!token.includes("=")) { + continue; + } + const [key, ...rest] = token.split("="); + const value = rest.join("="); + if (!key || !value) { + continue; + } + parsed[key.trim()] = value.trim(); + } + return parsed; +} + +function numberOrNull(value) { + if (typeof value !== "number" || Number.isNaN(value)) { + return null; + } + return value; +} + +function maxOrNull(values) { + const numeric = values + .map(numberOrNull) + .filter((value) => value !== null); + if (numeric.length === 0) { + return null; + } + return Math.max(...numeric); +} + +function avgOrNull(values) { + const numeric = values + .map(numberOrNull) + .filter((value) => value !== null); + if (numeric.length === 0) { + return null; + } + return numeric.reduce((acc, value) => acc + value, 0) / numeric.length; +} + +function formatMetric(value, digits = 1) { + return value === null ? "n/a" : value.toFixed(digits); +} + +function extractRows(jsonPath, data) { + if (!Array.isArray(data.reports)) { + return []; + } + + const notes = parseNotes(data.notes); + const platform = notes.platform ?? "unknown"; + const gpu = notes.gpu ?? "unknown"; + const scenario = notes.scenario ?? "unspecified"; + const runTime = data.generated_at_utc ?? "unknown"; + + const rows = []; + for (const report of data.reports) { + const playbackResults = Array.isArray(report.playback_results) + ? report.playback_results + : []; + const scrubResults = Array.isArray(report.scrub_results) + ? report.scrub_results + : []; + const audioResults = Array.isArray(report.audio_sync_results) + ? report.audio_sync_results + : []; + + const effectiveFpsMin = playbackResults.length + ? Math.min( + ...playbackResults + .map((result) => numberOrNull(result.effective_fps)) + .filter((value) => value !== null), + ) + : null; + const scrubP95Max = maxOrNull( + scrubResults.map((result) => result.p95_seek_time_ms), + ); + const startupAvg = avgOrNull( + playbackResults.map((result) => result.startup_to_first_frame_ms), + ); + const micDiffMax = maxOrNull( + audioResults + .filter((result) => result.has_mic_audio) + .map((result) => result.mic_video_diff_ms), + ); + const sysDiffMax = maxOrNull( + audioResults + .filter((result) => result.has_system_audio) + .map((result) => result.system_audio_video_diff_ms), + ); + + rows.push({ + runTime, + platform, + gpu, + scenario, + recording: report.recording_name ?? path.basename(jsonPath), + format: report.is_fragmented ? "fragmented" : "mp4", + status: report.overall_passed ? "PASS" : "FAIL", + effectiveFpsMin, + scrubP95Max, + startupAvg, + micDiffMax, + sysDiffMax, + command: data.command ?? "unknown", + source: jsonPath, + }); + } + + return rows; +} + +function buildMarkdown(rows) { + const sorted = [...rows].sort((a, b) => (a.runTime < b.runTime ? 1 : -1)); + const passed = sorted.filter((row) => row.status === "PASS").length; + const failed = sorted.length - passed; + + let md = ""; + md += `# Playback Benchmark Aggregate\n\n`; + md += `Generated: ${new Date().toISOString()}\n\n`; + md += `Total rows: ${sorted.length}, Passed: ${passed}, Failed: ${failed}\n\n`; + md += "| Run Time (UTC) | Platform | GPU | Scenario | Recording | Format | Status | FPS(min) | Scrub p95(ms) | Startup avg(ms) | Mic diff max(ms) | Sys diff max(ms) |\n"; + md += "|---|---|---|---|---|---|---|---:|---:|---:|---:|---:|\n"; + for (const row of sorted) { + md += `| ${row.runTime} | ${row.platform} | ${row.gpu} | ${row.scenario} | ${row.recording} | ${row.format} | ${row.status} | ${formatMetric(row.effectiveFpsMin)} | ${formatMetric(row.scrubP95Max)} | ${formatMetric(row.startupAvg)} | ${formatMetric(row.micDiffMax)} | ${formatMetric(row.sysDiffMax)} |\n`; + } + md += "\n"; + return md; +} + +function printUsage() { + console.log(`Usage: node scripts/aggregate-playback-benchmarks.js --input [--input ...] [--output ] + +Aggregates playback-test-runner JSON outputs into a markdown summary table.`); +} + +function main() { + const args = parseArgs(process.argv); + if (args.help) { + printUsage(); + return; + } + if (args.inputs.length === 0) { + throw new Error("At least one --input is required"); + } + + const files = new Set(); + for (const inputPath of args.inputs) { + for (const filePath of collectJsonFiles(inputPath)) { + files.add(filePath); + } + } + + if (files.size === 0) { + throw new Error("No JSON files found for aggregation"); + } + + const rows = []; + for (const filePath of files) { + const raw = fs.readFileSync(filePath, "utf8"); + const parsed = JSON.parse(raw); + rows.push(...extractRows(filePath, parsed)); + } + + const markdown = buildMarkdown(rows); + if (args.output) { + fs.writeFileSync(args.output, markdown, "utf8"); + console.log(`Wrote aggregate markdown to ${args.output}`); + } else { + process.stdout.write(markdown); + } +} + +try { + main(); +} catch (error) { + console.error(error instanceof Error ? error.message : String(error)); + process.exit(1); +} From 36a3ce3976fa378e6a588f86fe347e3affd437ce Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:04:31 +0000 Subject: [PATCH 022/333] improve: add playback benchmark json aggregation script Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 6 + crates/editor/PLAYBACK-FINDINGS.md | 6 + scripts/aggregate-playback-benchmarks.js | 248 +++++++++++++++++++++++ 3 files changed, 260 insertions(+) create mode 100644 scripts/aggregate-playback-benchmarks.js diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 6c7126b72b..a46e6ab281 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -60,6 +60,12 @@ cargo run -p cap-recording --example playback-test-runner -- list cargo run -p cap-recording --example playback-test-runner -- full --json-output /tmp/playback-benchmark.json ``` +Aggregate JSON outputs from multiple machines: + +```bash +node scripts/aggregate-playback-benchmarks.js --input /path/to/json-results --output /tmp/playback-benchmark-aggregate.md +``` + #### Decode Performance Benchmark ```bash diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index abc88dd919..b519641ecf 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -172,6 +172,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - `playback-test-runner` supports `--json-output` for structured report emission. - JSON output includes command metadata, system info, summary, and per-recording test detail. +9. **Added JSON aggregate utility for cross-platform benchmark collation (2026-02-13)** + - `scripts/aggregate-playback-benchmarks.js` builds a markdown table from multiple JSON outputs. + - Aggregates platform/gpu/scenario-tagged runs for matrix reporting. + --- ## Root Cause Analysis Archive @@ -269,6 +273,7 @@ Decoder Pipeline: 10. Enhanced decode benchmark example with structured JSON output and configurable sample depth. 11. Added timeline seek dispatch coalescing to reduce seek command storms during drag. 12. Added JSON report output support to playback-test-runner for benchmark evidence collection. +13. Added cross-platform benchmark JSON aggregation utility script. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -282,6 +287,7 @@ Decoder Pipeline: - `crates/editor/examples/decode-benchmark.rs`: added `--output-json`, startup metrics, and configurable sequential/random sampling. - `apps/desktop/src/routes/editor/Timeline/index.tsx`: added requestAnimationFrame-based seek coalescing with in-flight protection. - `crates/recording/examples/playback-test-runner.rs`: added `--json-output` to emit structured benchmark reports. +- `scripts/aggregate-playback-benchmarks.js`: added markdown aggregation for multiple playback benchmark JSON artifacts. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/scripts/aggregate-playback-benchmarks.js b/scripts/aggregate-playback-benchmarks.js new file mode 100644 index 0000000000..d5ee7fe7ec --- /dev/null +++ b/scripts/aggregate-playback-benchmarks.js @@ -0,0 +1,248 @@ +#!/usr/bin/env node + +import fs from "node:fs"; +import path from "node:path"; + +function parseArgs(argv) { + const inputs = []; + let output = null; + let help = false; + + for (let i = 2; i < argv.length; i++) { + const arg = argv[i]; + if (arg === "--help" || arg === "-h") { + help = true; + continue; + } + if (arg === "--input" || arg === "-i") { + const value = argv[i + 1]; + if (!value) { + throw new Error("Missing value for --input"); + } + inputs.push(path.resolve(value)); + i += 1; + continue; + } + if (arg === "--output" || arg === "-o") { + const value = argv[i + 1]; + if (!value) { + throw new Error("Missing value for --output"); + } + output = path.resolve(value); + i += 1; + continue; + } + throw new Error(`Unknown argument: ${arg}`); + } + + return { inputs, output, help }; +} + +function collectJsonFiles(targetPath) { + if (!fs.existsSync(targetPath)) { + throw new Error(`Input path does not exist: ${targetPath}`); + } + + const stats = fs.statSync(targetPath); + if (stats.isFile()) { + return targetPath.endsWith(".json") ? [targetPath] : []; + } + + const files = []; + const entries = fs.readdirSync(targetPath, { withFileTypes: true }); + for (const entry of entries) { + const fullPath = path.join(targetPath, entry.name); + if (entry.isDirectory()) { + files.push(...collectJsonFiles(fullPath)); + } else if (entry.isFile() && entry.name.endsWith(".json")) { + files.push(fullPath); + } + } + return files; +} + +function parseNotes(notes) { + if (!notes) { + return {}; + } + const parsed = {}; + for (const token of notes.split(/\s+/)) { + if (!token.includes("=")) { + continue; + } + const [key, ...rest] = token.split("="); + const value = rest.join("="); + if (!key || !value) { + continue; + } + parsed[key.trim()] = value.trim(); + } + return parsed; +} + +function numberOrNull(value) { + if (typeof value !== "number" || Number.isNaN(value)) { + return null; + } + return value; +} + +function maxOrNull(values) { + const numeric = values + .map(numberOrNull) + .filter((value) => value !== null); + if (numeric.length === 0) { + return null; + } + return Math.max(...numeric); +} + +function avgOrNull(values) { + const numeric = values + .map(numberOrNull) + .filter((value) => value !== null); + if (numeric.length === 0) { + return null; + } + return numeric.reduce((acc, value) => acc + value, 0) / numeric.length; +} + +function formatMetric(value, digits = 1) { + return value === null ? "n/a" : value.toFixed(digits); +} + +function extractRows(jsonPath, data) { + if (!Array.isArray(data.reports)) { + return []; + } + + const notes = parseNotes(data.notes); + const platform = notes.platform ?? "unknown"; + const gpu = notes.gpu ?? "unknown"; + const scenario = notes.scenario ?? "unspecified"; + const runTime = data.generated_at_utc ?? "unknown"; + + const rows = []; + for (const report of data.reports) { + const playbackResults = Array.isArray(report.playback_results) + ? report.playback_results + : []; + const scrubResults = Array.isArray(report.scrub_results) + ? report.scrub_results + : []; + const audioResults = Array.isArray(report.audio_sync_results) + ? report.audio_sync_results + : []; + + const effectiveFpsMin = playbackResults.length + ? Math.min( + ...playbackResults + .map((result) => numberOrNull(result.effective_fps)) + .filter((value) => value !== null), + ) + : null; + const scrubP95Max = maxOrNull( + scrubResults.map((result) => result.p95_seek_time_ms), + ); + const startupAvg = avgOrNull( + playbackResults.map((result) => result.startup_to_first_frame_ms), + ); + const micDiffMax = maxOrNull( + audioResults + .filter((result) => result.has_mic_audio) + .map((result) => result.mic_video_diff_ms), + ); + const sysDiffMax = maxOrNull( + audioResults + .filter((result) => result.has_system_audio) + .map((result) => result.system_audio_video_diff_ms), + ); + + rows.push({ + runTime, + platform, + gpu, + scenario, + recording: report.recording_name ?? path.basename(jsonPath), + format: report.is_fragmented ? "fragmented" : "mp4", + status: report.overall_passed ? "PASS" : "FAIL", + effectiveFpsMin, + scrubP95Max, + startupAvg, + micDiffMax, + sysDiffMax, + command: data.command ?? "unknown", + source: jsonPath, + }); + } + + return rows; +} + +function buildMarkdown(rows) { + const sorted = [...rows].sort((a, b) => (a.runTime < b.runTime ? 1 : -1)); + const passed = sorted.filter((row) => row.status === "PASS").length; + const failed = sorted.length - passed; + + let md = ""; + md += `# Playback Benchmark Aggregate\n\n`; + md += `Generated: ${new Date().toISOString()}\n\n`; + md += `Total rows: ${sorted.length}, Passed: ${passed}, Failed: ${failed}\n\n`; + md += "| Run Time (UTC) | Platform | GPU | Scenario | Recording | Format | Status | FPS(min) | Scrub p95(ms) | Startup avg(ms) | Mic diff max(ms) | Sys diff max(ms) |\n"; + md += "|---|---|---|---|---|---|---|---:|---:|---:|---:|---:|\n"; + for (const row of sorted) { + md += `| ${row.runTime} | ${row.platform} | ${row.gpu} | ${row.scenario} | ${row.recording} | ${row.format} | ${row.status} | ${formatMetric(row.effectiveFpsMin)} | ${formatMetric(row.scrubP95Max)} | ${formatMetric(row.startupAvg)} | ${formatMetric(row.micDiffMax)} | ${formatMetric(row.sysDiffMax)} |\n`; + } + md += "\n"; + return md; +} + +function printUsage() { + console.log(`Usage: node scripts/aggregate-playback-benchmarks.js --input [--input ...] [--output ] + +Aggregates playback-test-runner JSON outputs into a markdown summary table.`); +} + +function main() { + const args = parseArgs(process.argv); + if (args.help) { + printUsage(); + return; + } + if (args.inputs.length === 0) { + throw new Error("At least one --input is required"); + } + + const files = new Set(); + for (const inputPath of args.inputs) { + for (const filePath of collectJsonFiles(inputPath)) { + files.add(filePath); + } + } + + if (files.size === 0) { + throw new Error("No JSON files found for aggregation"); + } + + const rows = []; + for (const filePath of files) { + const raw = fs.readFileSync(filePath, "utf8"); + const parsed = JSON.parse(raw); + rows.push(...extractRows(filePath, parsed)); + } + + const markdown = buildMarkdown(rows); + if (args.output) { + fs.writeFileSync(args.output, markdown, "utf8"); + console.log(`Wrote aggregate markdown to ${args.output}`); + } else { + process.stdout.write(markdown); + } +} + +try { + main(); +} catch (error) { + console.error(error instanceof Error ? error.message : String(error)); + process.exit(1); +} From c2ebf2c308f343520eb9426cee762f64cdfca80d Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 21:05:22 +0000 Subject: [PATCH 023/333] improve: use latest-only watch channel for playback seeks Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/src/playback.rs | 16 +++++++--------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index b519641ecf..fac917b34e 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -144,6 +144,7 @@ cargo run -p cap-recording --example playback-test-runner -- full - Added seek channel to `PlaybackHandle` and playback loop. - `seek_to` and `set_playhead_position` commands now forward seek requests to active playback. - Timeline seek no longer tears down and recreates playback while playing. + - Seek signaling now uses watch semantics so only latest frame target is consumed under heavy scrub load. 3. **Lazy decoder pool warmup on macOS AVAssetReader (2026-02-13)** - Initial warmup now creates only a small subset of decoder instances. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 70e019ef5d..c9533ab63b 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -65,7 +65,7 @@ pub enum PlaybackEvent { pub struct PlaybackHandle { stop_tx: watch::Sender, event_rx: watch::Receiver, - seek_tx: tokio_mpsc::UnboundedSender, + seek_tx: watch::Sender, } struct PrefetchedFrame { @@ -120,7 +120,8 @@ impl Playback { let (event_tx, mut event_rx) = watch::channel(PlaybackEvent::Start); event_rx.borrow_and_update(); - let (seek_tx, mut seek_rx) = tokio_mpsc::unbounded_channel::(); + let (seek_tx, mut seek_rx) = watch::channel(self.start_frame_number); + seek_rx.borrow_and_update(); let handle = PlaybackHandle { stop_tx: stop_tx.clone(), @@ -448,12 +449,8 @@ impl Playback { let mut cached_project = self.project.borrow().clone(); 'playback: loop { - let mut pending_seek = None; - while let Ok(next_seek_frame) = seek_rx.try_recv() { - pending_seek = Some(next_seek_frame); - } - - if let Some(seek_frame) = pending_seek { + if seek_rx.has_changed().unwrap_or(false) { + let seek_frame = *seek_rx.borrow_and_update(); frame_number = seek_frame; playback_anchor_start = Instant::now(); playback_anchor_frame = seek_frame; @@ -499,7 +496,8 @@ impl Playback { tokio::select! { _ = stop_rx.changed() => break 'playback, - Some(seek_frame) = seek_rx.recv() => { + _ = seek_rx.changed() => { + let seek_frame = *seek_rx.borrow_and_update(); frame_number = seek_frame; playback_anchor_start = Instant::now(); playback_anchor_frame = seek_frame; From 827b94ec31e2ade7794b506a81098309b3b47042 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:05:22 +0000 Subject: [PATCH 024/333] improve: use latest-only watch channel for playback seeks Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/src/playback.rs | 16 +++++++--------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index b519641ecf..fac917b34e 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -144,6 +144,7 @@ cargo run -p cap-recording --example playback-test-runner -- full - Added seek channel to `PlaybackHandle` and playback loop. - `seek_to` and `set_playhead_position` commands now forward seek requests to active playback. - Timeline seek no longer tears down and recreates playback while playing. + - Seek signaling now uses watch semantics so only latest frame target is consumed under heavy scrub load. 3. **Lazy decoder pool warmup on macOS AVAssetReader (2026-02-13)** - Initial warmup now creates only a small subset of decoder instances. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 70e019ef5d..c9533ab63b 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -65,7 +65,7 @@ pub enum PlaybackEvent { pub struct PlaybackHandle { stop_tx: watch::Sender, event_rx: watch::Receiver, - seek_tx: tokio_mpsc::UnboundedSender, + seek_tx: watch::Sender, } struct PrefetchedFrame { @@ -120,7 +120,8 @@ impl Playback { let (event_tx, mut event_rx) = watch::channel(PlaybackEvent::Start); event_rx.borrow_and_update(); - let (seek_tx, mut seek_rx) = tokio_mpsc::unbounded_channel::(); + let (seek_tx, mut seek_rx) = watch::channel(self.start_frame_number); + seek_rx.borrow_and_update(); let handle = PlaybackHandle { stop_tx: stop_tx.clone(), @@ -448,12 +449,8 @@ impl Playback { let mut cached_project = self.project.borrow().clone(); 'playback: loop { - let mut pending_seek = None; - while let Ok(next_seek_frame) = seek_rx.try_recv() { - pending_seek = Some(next_seek_frame); - } - - if let Some(seek_frame) = pending_seek { + if seek_rx.has_changed().unwrap_or(false) { + let seek_frame = *seek_rx.borrow_and_update(); frame_number = seek_frame; playback_anchor_start = Instant::now(); playback_anchor_frame = seek_frame; @@ -499,7 +496,8 @@ impl Playback { tokio::select! { _ = stop_rx.changed() => break 'playback, - Some(seek_frame) = seek_rx.recv() => { + _ = seek_rx.changed() => { + let seek_frame = *seek_rx.borrow_and_update(); frame_number = seek_frame; playback_anchor_start = Instant::now(); playback_anchor_frame = seek_frame; From 2f8a70f0fa443bdb848b232ad07b817d5cbcdd80 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 21:06:21 +0000 Subject: [PATCH 025/333] improve: add matrix helper for playback benchmark runs Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 6 + crates/editor/PLAYBACK-FINDINGS.md | 6 + scripts/run-playback-benchmark-matrix.js | 163 +++++++++++++++++++++++ 3 files changed, 175 insertions(+) create mode 100644 scripts/run-playback-benchmark-matrix.js diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index a46e6ab281..79a75e2052 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -101,6 +101,12 @@ cargo run -p cap-recording --example playback-test-runner -- full --fps 60 --ben cargo run -p cap-recording --example playback-test-runner -- scrub --fps 60 --benchmark-output --notes "platform= gpu= scenario=scrub" ``` +Automated helper for machine runs: + +```bash +node scripts/run-playback-benchmark-matrix.js --platform macos-13 --gpu apple-silicon --output-dir /tmp/playback-matrix --fps 60 --input-dir /tmp/cap-real-device-tests +``` + | Platform | GPU Class | MP4 Full | Fragmented Full | MP4 Scrub | Fragmented Scrub | Notes | |----------|-----------|----------|-----------------|-----------|------------------|-------| | macOS 13+ | Apple Silicon | ☐ | ☐ | ☐ | ☐ | | diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index fac917b34e..065c659a6d 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -177,6 +177,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - `scripts/aggregate-playback-benchmarks.js` builds a markdown table from multiple JSON outputs. - Aggregates platform/gpu/scenario-tagged runs for matrix reporting. +10. **Added matrix run helper for platform/GPU benchmark execution (2026-02-13)** + - `scripts/run-playback-benchmark-matrix.js` runs `full` and `scrub` scenarios with tagged notes and JSON output. + - Automatically generates aggregate markdown for each machine run directory. + --- ## Root Cause Analysis Archive @@ -275,6 +279,7 @@ Decoder Pipeline: 11. Added timeline seek dispatch coalescing to reduce seek command storms during drag. 12. Added JSON report output support to playback-test-runner for benchmark evidence collection. 13. Added cross-platform benchmark JSON aggregation utility script. +14. Added matrix execution helper script for full + scrub benchmark runs per platform/GPU. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -289,6 +294,7 @@ Decoder Pipeline: - `apps/desktop/src/routes/editor/Timeline/index.tsx`: added requestAnimationFrame-based seek coalescing with in-flight protection. - `crates/recording/examples/playback-test-runner.rs`: added `--json-output` to emit structured benchmark reports. - `scripts/aggregate-playback-benchmarks.js`: added markdown aggregation for multiple playback benchmark JSON artifacts. +- `scripts/run-playback-benchmark-matrix.js`: added orchestrated full/scrub benchmark runner with per-machine aggregate generation. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/scripts/run-playback-benchmark-matrix.js b/scripts/run-playback-benchmark-matrix.js new file mode 100644 index 0000000000..d5793550f5 --- /dev/null +++ b/scripts/run-playback-benchmark-matrix.js @@ -0,0 +1,163 @@ +#!/usr/bin/env node + +import { spawnSync } from "node:child_process"; +import fs from "node:fs"; +import path from "node:path"; + +function parseArgs(argv) { + const options = { + platform: null, + gpu: null, + outputDir: null, + fps: 60, + recordingPath: null, + inputDir: null, + }; + + for (let i = 2; i < argv.length; i++) { + const arg = argv[i]; + if (arg === "--help" || arg === "-h") { + options.help = true; + continue; + } + if (arg === "--platform") { + options.platform = argv[++i] ?? null; + continue; + } + if (arg === "--gpu") { + options.gpu = argv[++i] ?? null; + continue; + } + if (arg === "--output-dir") { + options.outputDir = argv[++i] ?? null; + continue; + } + if (arg === "--fps") { + const value = Number.parseInt(argv[++i] ?? "", 10); + if (!Number.isFinite(value) || value <= 0) { + throw new Error("Invalid --fps value"); + } + options.fps = value; + continue; + } + if (arg === "--recording-path") { + options.recordingPath = argv[++i] ?? null; + continue; + } + if (arg === "--input-dir") { + options.inputDir = argv[++i] ?? null; + continue; + } + throw new Error(`Unknown argument: ${arg}`); + } + + return options; +} + +function usage() { + console.log(`Usage: node scripts/run-playback-benchmark-matrix.js --platform --gpu --output-dir [--fps 60] [--recording-path ] [--input-dir ] + +Runs playback benchmark matrix scenarios and writes JSON outputs. + +Required: + --platform Platform label (for notes metadata) + --gpu GPU label (for notes metadata) + --output-dir Directory for benchmark JSON outputs + +Optional: + --fps FPS for benchmark runs (default: 60) + --recording-path Specific recording path + --input-dir Recording discovery directory`); +} + +function run(command, args) { + const result = spawnSync(command, args, { stdio: "inherit" }); + if (result.status !== 0) { + throw new Error(`Command failed: ${command} ${args.join(" ")}`); + } +} + +function scenarioOutputPath(outputDir, platform, gpu, scenario) { + const stamp = new Date().toISOString().replace(/[:.]/g, "-"); + return path.join(outputDir, `${stamp}-${platform}-${gpu}-${scenario}.json`); +} + +function scenarioArgs(options, scenario) { + const jsonOutput = scenarioOutputPath( + options.outputDir, + options.platform, + options.gpu, + scenario, + ); + const notes = `platform=${options.platform} gpu=${options.gpu} scenario=${scenario}`; + + const args = [ + "run", + "-p", + "cap-recording", + "--example", + "playback-test-runner", + "--", + scenario, + "--fps", + String(options.fps), + "--json-output", + jsonOutput, + "--notes", + notes, + ]; + + if (options.recordingPath) { + args.push("--recording-path", options.recordingPath); + } else if (options.inputDir) { + args.push("--input-dir", options.inputDir); + } + + return args; +} + +function validateOptions(options) { + if (!options.platform || !options.gpu || !options.outputDir) { + throw new Error("Missing required options: --platform, --gpu, --output-dir"); + } + + const absoluteOutputDir = path.resolve(options.outputDir); + options.outputDir = absoluteOutputDir; + if (!fs.existsSync(absoluteOutputDir)) { + fs.mkdirSync(absoluteOutputDir, { recursive: true }); + } +} + +function main() { + const options = parseArgs(process.argv); + if (options.help) { + usage(); + return; + } + + validateOptions(options); + + console.log(`Running matrix for platform=${options.platform} gpu=${options.gpu}`); + run("cargo", scenarioArgs(options, "full")); + run("cargo", scenarioArgs(options, "scrub")); + + const aggregatePath = path.join( + options.outputDir, + `${options.platform}-${options.gpu}-aggregate.md`, + ); + run("node", [ + "scripts/aggregate-playback-benchmarks.js", + "--input", + options.outputDir, + "--output", + aggregatePath, + ]); + console.log(`Aggregate markdown: ${aggregatePath}`); +} + +try { + main(); +} catch (error) { + console.error(error instanceof Error ? error.message : String(error)); + process.exit(1); +} From 788391ee926f83ebb94bfc3ca76bce720356746b Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:06:21 +0000 Subject: [PATCH 026/333] improve: add matrix helper for playback benchmark runs Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 6 + crates/editor/PLAYBACK-FINDINGS.md | 6 + scripts/run-playback-benchmark-matrix.js | 163 +++++++++++++++++++++++ 3 files changed, 175 insertions(+) create mode 100644 scripts/run-playback-benchmark-matrix.js diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index a46e6ab281..79a75e2052 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -101,6 +101,12 @@ cargo run -p cap-recording --example playback-test-runner -- full --fps 60 --ben cargo run -p cap-recording --example playback-test-runner -- scrub --fps 60 --benchmark-output --notes "platform= gpu= scenario=scrub" ``` +Automated helper for machine runs: + +```bash +node scripts/run-playback-benchmark-matrix.js --platform macos-13 --gpu apple-silicon --output-dir /tmp/playback-matrix --fps 60 --input-dir /tmp/cap-real-device-tests +``` + | Platform | GPU Class | MP4 Full | Fragmented Full | MP4 Scrub | Fragmented Scrub | Notes | |----------|-----------|----------|-----------------|-----------|------------------|-------| | macOS 13+ | Apple Silicon | ☐ | ☐ | ☐ | ☐ | | diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index fac917b34e..065c659a6d 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -177,6 +177,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - `scripts/aggregate-playback-benchmarks.js` builds a markdown table from multiple JSON outputs. - Aggregates platform/gpu/scenario-tagged runs for matrix reporting. +10. **Added matrix run helper for platform/GPU benchmark execution (2026-02-13)** + - `scripts/run-playback-benchmark-matrix.js` runs `full` and `scrub` scenarios with tagged notes and JSON output. + - Automatically generates aggregate markdown for each machine run directory. + --- ## Root Cause Analysis Archive @@ -275,6 +279,7 @@ Decoder Pipeline: 11. Added timeline seek dispatch coalescing to reduce seek command storms during drag. 12. Added JSON report output support to playback-test-runner for benchmark evidence collection. 13. Added cross-platform benchmark JSON aggregation utility script. +14. Added matrix execution helper script for full + scrub benchmark runs per platform/GPU. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -289,6 +294,7 @@ Decoder Pipeline: - `apps/desktop/src/routes/editor/Timeline/index.tsx`: added requestAnimationFrame-based seek coalescing with in-flight protection. - `crates/recording/examples/playback-test-runner.rs`: added `--json-output` to emit structured benchmark reports. - `scripts/aggregate-playback-benchmarks.js`: added markdown aggregation for multiple playback benchmark JSON artifacts. +- `scripts/run-playback-benchmark-matrix.js`: added orchestrated full/scrub benchmark runner with per-machine aggregate generation. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/scripts/run-playback-benchmark-matrix.js b/scripts/run-playback-benchmark-matrix.js new file mode 100644 index 0000000000..d5793550f5 --- /dev/null +++ b/scripts/run-playback-benchmark-matrix.js @@ -0,0 +1,163 @@ +#!/usr/bin/env node + +import { spawnSync } from "node:child_process"; +import fs from "node:fs"; +import path from "node:path"; + +function parseArgs(argv) { + const options = { + platform: null, + gpu: null, + outputDir: null, + fps: 60, + recordingPath: null, + inputDir: null, + }; + + for (let i = 2; i < argv.length; i++) { + const arg = argv[i]; + if (arg === "--help" || arg === "-h") { + options.help = true; + continue; + } + if (arg === "--platform") { + options.platform = argv[++i] ?? null; + continue; + } + if (arg === "--gpu") { + options.gpu = argv[++i] ?? null; + continue; + } + if (arg === "--output-dir") { + options.outputDir = argv[++i] ?? null; + continue; + } + if (arg === "--fps") { + const value = Number.parseInt(argv[++i] ?? "", 10); + if (!Number.isFinite(value) || value <= 0) { + throw new Error("Invalid --fps value"); + } + options.fps = value; + continue; + } + if (arg === "--recording-path") { + options.recordingPath = argv[++i] ?? null; + continue; + } + if (arg === "--input-dir") { + options.inputDir = argv[++i] ?? null; + continue; + } + throw new Error(`Unknown argument: ${arg}`); + } + + return options; +} + +function usage() { + console.log(`Usage: node scripts/run-playback-benchmark-matrix.js --platform --gpu --output-dir [--fps 60] [--recording-path ] [--input-dir ] + +Runs playback benchmark matrix scenarios and writes JSON outputs. + +Required: + --platform Platform label (for notes metadata) + --gpu GPU label (for notes metadata) + --output-dir Directory for benchmark JSON outputs + +Optional: + --fps FPS for benchmark runs (default: 60) + --recording-path Specific recording path + --input-dir Recording discovery directory`); +} + +function run(command, args) { + const result = spawnSync(command, args, { stdio: "inherit" }); + if (result.status !== 0) { + throw new Error(`Command failed: ${command} ${args.join(" ")}`); + } +} + +function scenarioOutputPath(outputDir, platform, gpu, scenario) { + const stamp = new Date().toISOString().replace(/[:.]/g, "-"); + return path.join(outputDir, `${stamp}-${platform}-${gpu}-${scenario}.json`); +} + +function scenarioArgs(options, scenario) { + const jsonOutput = scenarioOutputPath( + options.outputDir, + options.platform, + options.gpu, + scenario, + ); + const notes = `platform=${options.platform} gpu=${options.gpu} scenario=${scenario}`; + + const args = [ + "run", + "-p", + "cap-recording", + "--example", + "playback-test-runner", + "--", + scenario, + "--fps", + String(options.fps), + "--json-output", + jsonOutput, + "--notes", + notes, + ]; + + if (options.recordingPath) { + args.push("--recording-path", options.recordingPath); + } else if (options.inputDir) { + args.push("--input-dir", options.inputDir); + } + + return args; +} + +function validateOptions(options) { + if (!options.platform || !options.gpu || !options.outputDir) { + throw new Error("Missing required options: --platform, --gpu, --output-dir"); + } + + const absoluteOutputDir = path.resolve(options.outputDir); + options.outputDir = absoluteOutputDir; + if (!fs.existsSync(absoluteOutputDir)) { + fs.mkdirSync(absoluteOutputDir, { recursive: true }); + } +} + +function main() { + const options = parseArgs(process.argv); + if (options.help) { + usage(); + return; + } + + validateOptions(options); + + console.log(`Running matrix for platform=${options.platform} gpu=${options.gpu}`); + run("cargo", scenarioArgs(options, "full")); + run("cargo", scenarioArgs(options, "scrub")); + + const aggregatePath = path.join( + options.outputDir, + `${options.platform}-${options.gpu}-aggregate.md`, + ); + run("node", [ + "scripts/aggregate-playback-benchmarks.js", + "--input", + options.outputDir, + "--output", + aggregatePath, + ]); + console.log(`Aggregate markdown: ${aggregatePath}`); +} + +try { + main(); +} catch (error) { + console.error(error instanceof Error ? error.message : String(error)); + process.exit(1); +} From 87c9acb25bf6b0c183044c07a436af242b4de1ba Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 21:07:26 +0000 Subject: [PATCH 027/333] docs: add playback matrix runbook for hardware validation Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 79 ++++++++++++++++++++++++ 3 files changed, 81 insertions(+) create mode 100644 crates/editor/PLAYBACK-MATRIX-RUNBOOK.md diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 79a75e2052..4d9ff41649 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -241,5 +241,6 @@ When analyzing benchmark results, focus on: ## Related Documentation - [Recording Benchmarks](../recording/BENCHMARKS.md) - Recording performance tracking +- [Playback Matrix Runbook](./PLAYBACK-MATRIX-RUNBOOK.md) - Cross-platform evidence collection workflow - [cap-rendering/decoder](../rendering/src/decoder.rs) - Decoder implementation - [cap-video-decode](../video-decode/) - Platform-specific decoders diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 065c659a6d..4a42a3e5fc 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -435,6 +435,7 @@ The CPU RGBA→NV12 conversion was taking 15-25ms per frame for 3024x1964 resolu ## References - `PLAYBACK-BENCHMARKS.md` - Raw performance test data (auto-updated by test runner) +- `PLAYBACK-MATRIX-RUNBOOK.md` - Cross-platform playback evidence collection process - `../recording/FINDINGS.md` - Recording performance findings (source of test files) - `../recording/BENCHMARKS.md` - Recording benchmark data - `examples/playback-test-runner.rs` - Playback test implementation diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md new file mode 100644 index 0000000000..281dcbdef8 --- /dev/null +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -0,0 +1,79 @@ +# Playback Matrix Runbook + +This runbook defines how to collect benchmark evidence for desktop playback performance and sync validation across required hardware classes. + +## Target matrix + +| Platform | GPU class | Required scenarios | +|---|---|---| +| macOS 13+ | Apple Silicon | full, scrub | +| Windows 11 | NVIDIA discrete | full, scrub | +| Windows 11 | AMD discrete | full, scrub | +| Windows 11 | Integrated baseline | full, scrub | + +## Preconditions + +1. Build can run on target machine. +2. Real-device recording outputs are available. +3. Recordings include both MP4 and fragmented samples. +4. Node and Rust toolchains are installed. + +## Inputs and output directories + +Set these per machine: + +- `INPUT_DIR`: recording root (default `/tmp/cap-real-device-tests`) +- `OUT_DIR`: machine-local output folder for JSON and aggregate markdown + +Example: + +```bash +export INPUT_DIR="/tmp/cap-real-device-tests" +export OUT_DIR="/tmp/cap-playback-matrix/macos-apple-silicon" +mkdir -p "$OUT_DIR" +``` + +## Machine run command + +Run this once per platform/GPU class: + +```bash +node scripts/run-playback-benchmark-matrix.js \ + --platform "" \ + --gpu "" \ + --output-dir "$OUT_DIR" \ + --fps 60 \ + --input-dir "$INPUT_DIR" +``` + +Examples: + +```bash +node scripts/run-playback-benchmark-matrix.js --platform macos-13 --gpu apple-silicon --output-dir /tmp/cap-playback-matrix/macos-apple-silicon --fps 60 --input-dir /tmp/cap-real-device-tests +node scripts/run-playback-benchmark-matrix.js --platform windows-11 --gpu nvidia-discrete --output-dir C:\temp\cap-playback-matrix\windows-nvidia --fps 60 --input-dir C:\temp\cap-real-device-tests +node scripts/run-playback-benchmark-matrix.js --platform windows-11 --gpu amd-discrete --output-dir C:\temp\cap-playback-matrix\windows-amd --fps 60 --input-dir C:\temp\cap-real-device-tests +node scripts/run-playback-benchmark-matrix.js --platform windows-11 --gpu integrated --output-dir C:\temp\cap-playback-matrix\windows-integrated --fps 60 --input-dir C:\temp\cap-real-device-tests +``` + +## Outputs produced per machine + +Each run directory contains: + +- timestamped `full` scenario JSON +- timestamped `scrub` scenario JSON +- `--aggregate.md` summary table + +## Cross-machine aggregation + +After collecting all machine folders into a shared root: + +```bash +node scripts/aggregate-playback-benchmarks.js --input /path/to/all-machine-results --output /tmp/playback-matrix-aggregate.md +``` + +## Evidence checklist + +1. Confirm all matrix rows exist. +2. Confirm each row has both `full` and `scrub` scenarios. +3. Capture aggregate markdown and raw JSON artifacts. +4. Attach outputs to playback findings update. From 15833310276e6c7eec73e489eda546e9ecd7eacc Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:07:26 +0000 Subject: [PATCH 028/333] docs: add playback matrix runbook for hardware validation Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 79 ++++++++++++++++++++++++ 3 files changed, 81 insertions(+) create mode 100644 crates/editor/PLAYBACK-MATRIX-RUNBOOK.md diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 79a75e2052..4d9ff41649 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -241,5 +241,6 @@ When analyzing benchmark results, focus on: ## Related Documentation - [Recording Benchmarks](../recording/BENCHMARKS.md) - Recording performance tracking +- [Playback Matrix Runbook](./PLAYBACK-MATRIX-RUNBOOK.md) - Cross-platform evidence collection workflow - [cap-rendering/decoder](../rendering/src/decoder.rs) - Decoder implementation - [cap-video-decode](../video-decode/) - Platform-specific decoders diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 065c659a6d..4a42a3e5fc 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -435,6 +435,7 @@ The CPU RGBA→NV12 conversion was taking 15-25ms per frame for 3024x1964 resolu ## References - `PLAYBACK-BENCHMARKS.md` - Raw performance test data (auto-updated by test runner) +- `PLAYBACK-MATRIX-RUNBOOK.md` - Cross-platform playback evidence collection process - `../recording/FINDINGS.md` - Recording performance findings (source of test files) - `../recording/BENCHMARKS.md` - Recording benchmark data - `examples/playback-test-runner.rs` - Playback test implementation diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md new file mode 100644 index 0000000000..281dcbdef8 --- /dev/null +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -0,0 +1,79 @@ +# Playback Matrix Runbook + +This runbook defines how to collect benchmark evidence for desktop playback performance and sync validation across required hardware classes. + +## Target matrix + +| Platform | GPU class | Required scenarios | +|---|---|---| +| macOS 13+ | Apple Silicon | full, scrub | +| Windows 11 | NVIDIA discrete | full, scrub | +| Windows 11 | AMD discrete | full, scrub | +| Windows 11 | Integrated baseline | full, scrub | + +## Preconditions + +1. Build can run on target machine. +2. Real-device recording outputs are available. +3. Recordings include both MP4 and fragmented samples. +4. Node and Rust toolchains are installed. + +## Inputs and output directories + +Set these per machine: + +- `INPUT_DIR`: recording root (default `/tmp/cap-real-device-tests`) +- `OUT_DIR`: machine-local output folder for JSON and aggregate markdown + +Example: + +```bash +export INPUT_DIR="/tmp/cap-real-device-tests" +export OUT_DIR="/tmp/cap-playback-matrix/macos-apple-silicon" +mkdir -p "$OUT_DIR" +``` + +## Machine run command + +Run this once per platform/GPU class: + +```bash +node scripts/run-playback-benchmark-matrix.js \ + --platform "" \ + --gpu "" \ + --output-dir "$OUT_DIR" \ + --fps 60 \ + --input-dir "$INPUT_DIR" +``` + +Examples: + +```bash +node scripts/run-playback-benchmark-matrix.js --platform macos-13 --gpu apple-silicon --output-dir /tmp/cap-playback-matrix/macos-apple-silicon --fps 60 --input-dir /tmp/cap-real-device-tests +node scripts/run-playback-benchmark-matrix.js --platform windows-11 --gpu nvidia-discrete --output-dir C:\temp\cap-playback-matrix\windows-nvidia --fps 60 --input-dir C:\temp\cap-real-device-tests +node scripts/run-playback-benchmark-matrix.js --platform windows-11 --gpu amd-discrete --output-dir C:\temp\cap-playback-matrix\windows-amd --fps 60 --input-dir C:\temp\cap-real-device-tests +node scripts/run-playback-benchmark-matrix.js --platform windows-11 --gpu integrated --output-dir C:\temp\cap-playback-matrix\windows-integrated --fps 60 --input-dir C:\temp\cap-real-device-tests +``` + +## Outputs produced per machine + +Each run directory contains: + +- timestamped `full` scenario JSON +- timestamped `scrub` scenario JSON +- `--aggregate.md` summary table + +## Cross-machine aggregation + +After collecting all machine folders into a shared root: + +```bash +node scripts/aggregate-playback-benchmarks.js --input /path/to/all-machine-results --output /tmp/playback-matrix-aggregate.md +``` + +## Evidence checklist + +1. Confirm all matrix rows exist. +2. Confirm each row has both `full` and `scrub` scenarios. +3. Capture aggregate markdown and raw JSON artifacts. +4. Attach outputs to playback findings update. From da1990707dfaf856d99f20467e037261abc95a9f Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 21:10:15 +0000 Subject: [PATCH 029/333] improve: add playback matrix coverage validation script Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 6 + crates/editor/PLAYBACK-FINDINGS.md | 6 + crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 6 + scripts/validate-playback-matrix.js | 231 +++++++++++++++++++++++ 4 files changed, 249 insertions(+) create mode 100644 scripts/validate-playback-matrix.js diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 4d9ff41649..6da7192bb6 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -66,6 +66,12 @@ Aggregate JSON outputs from multiple machines: node scripts/aggregate-playback-benchmarks.js --input /path/to/json-results --output /tmp/playback-benchmark-aggregate.md ``` +Validate matrix coverage and required formats: + +```bash +node scripts/validate-playback-matrix.js --input /path/to/json-results --require-formats mp4,fragmented +``` + #### Decode Performance Benchmark ```bash diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 4a42a3e5fc..dcb954a5d8 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -181,6 +181,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - `scripts/run-playback-benchmark-matrix.js` runs `full` and `scrub` scenarios with tagged notes and JSON output. - Automatically generates aggregate markdown for each machine run directory. +11. **Added matrix completeness validator (2026-02-13)** + - `scripts/validate-playback-matrix.js` validates required platform/gpu/scenario cells. + - Supports required format checks per cell (mp4 + fragmented). + --- ## Root Cause Analysis Archive @@ -280,6 +284,7 @@ Decoder Pipeline: 12. Added JSON report output support to playback-test-runner for benchmark evidence collection. 13. Added cross-platform benchmark JSON aggregation utility script. 14. Added matrix execution helper script for full + scrub benchmark runs per platform/GPU. +15. Added matrix validation script for required cell and format coverage checks. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -295,6 +300,7 @@ Decoder Pipeline: - `crates/recording/examples/playback-test-runner.rs`: added `--json-output` to emit structured benchmark reports. - `scripts/aggregate-playback-benchmarks.js`: added markdown aggregation for multiple playback benchmark JSON artifacts. - `scripts/run-playback-benchmark-matrix.js`: added orchestrated full/scrub benchmark runner with per-machine aggregate generation. +- `scripts/validate-playback-matrix.js`: added required matrix cell/format validation for aggregated evidence. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 281dcbdef8..5d633c8cc2 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -71,6 +71,12 @@ After collecting all machine folders into a shared root: node scripts/aggregate-playback-benchmarks.js --input /path/to/all-machine-results --output /tmp/playback-matrix-aggregate.md ``` +Validate matrix completeness: + +```bash +node scripts/validate-playback-matrix.js --input /path/to/all-machine-results --require-formats mp4,fragmented +``` + ## Evidence checklist 1. Confirm all matrix rows exist. diff --git a/scripts/validate-playback-matrix.js b/scripts/validate-playback-matrix.js new file mode 100644 index 0000000000..a95ae023cb --- /dev/null +++ b/scripts/validate-playback-matrix.js @@ -0,0 +1,231 @@ +#!/usr/bin/env node + +import fs from "node:fs"; +import path from "node:path"; + +const DEFAULT_REQUIRED_CELLS = [ + { platform: "macos-13", gpu: "apple-silicon", scenario: "full" }, + { platform: "macos-13", gpu: "apple-silicon", scenario: "scrub" }, + { platform: "windows-11", gpu: "nvidia-discrete", scenario: "full" }, + { platform: "windows-11", gpu: "nvidia-discrete", scenario: "scrub" }, + { platform: "windows-11", gpu: "amd-discrete", scenario: "full" }, + { platform: "windows-11", gpu: "amd-discrete", scenario: "scrub" }, + { platform: "windows-11", gpu: "integrated", scenario: "full" }, + { platform: "windows-11", gpu: "integrated", scenario: "scrub" }, +]; + +function parseArgs(argv) { + const options = { + inputs: [], + requiredCells: [], + requiredFormats: [], + useDefaultMatrix: true, + }; + + for (let i = 2; i < argv.length; i++) { + const arg = argv[i]; + if (arg === "--help" || arg === "-h") { + options.help = true; + continue; + } + if (arg === "--input" || arg === "-i") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --input"); + options.inputs.push(path.resolve(value)); + continue; + } + if (arg === "--require-cell") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --require-cell"); + options.requiredCells.push(parseCell(value)); + options.useDefaultMatrix = false; + continue; + } + if (arg === "--require-formats") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --require-formats"); + options.requiredFormats = value + .split(",") + .map((entry) => entry.trim().toLowerCase()) + .filter(Boolean); + continue; + } + if (arg === "--no-default-matrix") { + options.useDefaultMatrix = false; + continue; + } + throw new Error(`Unknown argument: ${arg}`); + } + + return options; +} + +function parseCell(value) { + const [platform, gpu, scenario] = value.split(":"); + if (!platform || !gpu || !scenario) { + throw new Error( + `Invalid --require-cell format: ${value}. Expected platform:gpu:scenario`, + ); + } + return { platform, gpu, scenario }; +} + +function printUsage() { + console.log(`Usage: node scripts/validate-playback-matrix.js --input [--input ...] [--require-cell platform:gpu:scenario ...] [--require-formats mp4,fragmented] + +Validates that required benchmark matrix cells are present in playback benchmark JSON results. + +Options: + --input, -i JSON file or directory containing JSON files (repeatable) + --require-cell Required cell as platform:gpu:scenario (repeatable) + --require-formats Comma-separated required formats per cell + --no-default-matrix Disable built-in required matrix + --help, -h Show help`); +} + +function collectJsonFiles(targetPath) { + if (!fs.existsSync(targetPath)) { + throw new Error(`Input path does not exist: ${targetPath}`); + } + const stats = fs.statSync(targetPath); + if (stats.isFile()) { + return targetPath.endsWith(".json") ? [targetPath] : []; + } + const files = []; + for (const entry of fs.readdirSync(targetPath, { withFileTypes: true })) { + const fullPath = path.join(targetPath, entry.name); + if (entry.isDirectory()) { + files.push(...collectJsonFiles(fullPath)); + } else if (entry.isFile() && entry.name.endsWith(".json")) { + files.push(fullPath); + } + } + return files; +} + +function parseNotes(notes) { + if (!notes) return {}; + const result = {}; + for (const token of notes.split(/\s+/)) { + if (!token.includes("=")) continue; + const [key, ...rest] = token.split("="); + const value = rest.join("="); + if (!key || !value) continue; + result[key.trim()] = value.trim(); + } + return result; +} + +function keyForCell(cell) { + return `${cell.platform}|${cell.gpu}|${cell.scenario}`; +} + +function collectObservedCells(files) { + const observed = new Map(); + for (const filePath of files) { + const parsed = JSON.parse(fs.readFileSync(filePath, "utf8")); + const notes = parseNotes(parsed.notes); + const platform = notes.platform ?? "unknown"; + const gpu = notes.gpu ?? "unknown"; + const scenario = notes.scenario ?? "unspecified"; + const key = keyForCell({ platform, gpu, scenario }); + if (!observed.has(key)) { + observed.set(key, { + platform, + gpu, + scenario, + formats: new Set(), + files: new Set(), + }); + } + const entry = observed.get(key); + entry.files.add(filePath); + const reports = Array.isArray(parsed.reports) ? parsed.reports : []; + for (const report of reports) { + entry.formats.add(report.is_fragmented ? "fragmented" : "mp4"); + } + } + return observed; +} + +function main() { + const options = parseArgs(process.argv); + if (options.help) { + printUsage(); + return; + } + if (options.inputs.length === 0) { + throw new Error("At least one --input is required"); + } + + const files = new Set(); + for (const input of options.inputs) { + for (const filePath of collectJsonFiles(input)) { + files.add(filePath); + } + } + if (files.size === 0) { + throw new Error("No JSON files found"); + } + + const requiredCells = options.useDefaultMatrix + ? [...DEFAULT_REQUIRED_CELLS, ...options.requiredCells] + : options.requiredCells; + if (requiredCells.length === 0) { + throw new Error("No required matrix cells configured"); + } + + const observed = collectObservedCells([...files]); + const missingCells = []; + const formatFailures = []; + + for (const cell of requiredCells) { + const key = keyForCell(cell); + const observedCell = observed.get(key); + if (!observedCell) { + missingCells.push(cell); + continue; + } + for (const requiredFormat of options.requiredFormats) { + if (!observedCell.formats.has(requiredFormat)) { + formatFailures.push({ + ...cell, + requiredFormat, + observedFormats: [...observedCell.formats], + }); + } + } + } + + console.log(`Validated ${requiredCells.length} required cells`); + console.log(`Observed ${observed.size} unique cells`); + + if (missingCells.length > 0) { + console.log("Missing required cells:"); + for (const cell of missingCells) { + console.log(` - ${cell.platform}:${cell.gpu}:${cell.scenario}`); + } + } + + if (formatFailures.length > 0) { + console.log("Missing required formats:"); + for (const failure of formatFailures) { + console.log( + ` - ${failure.platform}:${failure.gpu}:${failure.scenario} missing ${failure.requiredFormat} (observed: ${failure.observedFormats.join(", ") || "none"})`, + ); + } + } + + if (missingCells.length > 0 || formatFailures.length > 0) { + process.exit(1); + } + + console.log("Matrix validation passed"); +} + +try { + main(); +} catch (error) { + console.error(error instanceof Error ? error.message : String(error)); + process.exit(1); +} From 291addedff6eb0f44b0f413557dacb0575e13f9f Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:10:15 +0000 Subject: [PATCH 030/333] improve: add playback matrix coverage validation script Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 6 + crates/editor/PLAYBACK-FINDINGS.md | 6 + crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 6 + scripts/validate-playback-matrix.js | 231 +++++++++++++++++++++++ 4 files changed, 249 insertions(+) create mode 100644 scripts/validate-playback-matrix.js diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 4d9ff41649..6da7192bb6 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -66,6 +66,12 @@ Aggregate JSON outputs from multiple machines: node scripts/aggregate-playback-benchmarks.js --input /path/to/json-results --output /tmp/playback-benchmark-aggregate.md ``` +Validate matrix coverage and required formats: + +```bash +node scripts/validate-playback-matrix.js --input /path/to/json-results --require-formats mp4,fragmented +``` + #### Decode Performance Benchmark ```bash diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 4a42a3e5fc..dcb954a5d8 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -181,6 +181,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - `scripts/run-playback-benchmark-matrix.js` runs `full` and `scrub` scenarios with tagged notes and JSON output. - Automatically generates aggregate markdown for each machine run directory. +11. **Added matrix completeness validator (2026-02-13)** + - `scripts/validate-playback-matrix.js` validates required platform/gpu/scenario cells. + - Supports required format checks per cell (mp4 + fragmented). + --- ## Root Cause Analysis Archive @@ -280,6 +284,7 @@ Decoder Pipeline: 12. Added JSON report output support to playback-test-runner for benchmark evidence collection. 13. Added cross-platform benchmark JSON aggregation utility script. 14. Added matrix execution helper script for full + scrub benchmark runs per platform/GPU. +15. Added matrix validation script for required cell and format coverage checks. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -295,6 +300,7 @@ Decoder Pipeline: - `crates/recording/examples/playback-test-runner.rs`: added `--json-output` to emit structured benchmark reports. - `scripts/aggregate-playback-benchmarks.js`: added markdown aggregation for multiple playback benchmark JSON artifacts. - `scripts/run-playback-benchmark-matrix.js`: added orchestrated full/scrub benchmark runner with per-machine aggregate generation. +- `scripts/validate-playback-matrix.js`: added required matrix cell/format validation for aggregated evidence. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 281dcbdef8..5d633c8cc2 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -71,6 +71,12 @@ After collecting all machine folders into a shared root: node scripts/aggregate-playback-benchmarks.js --input /path/to/all-machine-results --output /tmp/playback-matrix-aggregate.md ``` +Validate matrix completeness: + +```bash +node scripts/validate-playback-matrix.js --input /path/to/all-machine-results --require-formats mp4,fragmented +``` + ## Evidence checklist 1. Confirm all matrix rows exist. diff --git a/scripts/validate-playback-matrix.js b/scripts/validate-playback-matrix.js new file mode 100644 index 0000000000..a95ae023cb --- /dev/null +++ b/scripts/validate-playback-matrix.js @@ -0,0 +1,231 @@ +#!/usr/bin/env node + +import fs from "node:fs"; +import path from "node:path"; + +const DEFAULT_REQUIRED_CELLS = [ + { platform: "macos-13", gpu: "apple-silicon", scenario: "full" }, + { platform: "macos-13", gpu: "apple-silicon", scenario: "scrub" }, + { platform: "windows-11", gpu: "nvidia-discrete", scenario: "full" }, + { platform: "windows-11", gpu: "nvidia-discrete", scenario: "scrub" }, + { platform: "windows-11", gpu: "amd-discrete", scenario: "full" }, + { platform: "windows-11", gpu: "amd-discrete", scenario: "scrub" }, + { platform: "windows-11", gpu: "integrated", scenario: "full" }, + { platform: "windows-11", gpu: "integrated", scenario: "scrub" }, +]; + +function parseArgs(argv) { + const options = { + inputs: [], + requiredCells: [], + requiredFormats: [], + useDefaultMatrix: true, + }; + + for (let i = 2; i < argv.length; i++) { + const arg = argv[i]; + if (arg === "--help" || arg === "-h") { + options.help = true; + continue; + } + if (arg === "--input" || arg === "-i") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --input"); + options.inputs.push(path.resolve(value)); + continue; + } + if (arg === "--require-cell") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --require-cell"); + options.requiredCells.push(parseCell(value)); + options.useDefaultMatrix = false; + continue; + } + if (arg === "--require-formats") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --require-formats"); + options.requiredFormats = value + .split(",") + .map((entry) => entry.trim().toLowerCase()) + .filter(Boolean); + continue; + } + if (arg === "--no-default-matrix") { + options.useDefaultMatrix = false; + continue; + } + throw new Error(`Unknown argument: ${arg}`); + } + + return options; +} + +function parseCell(value) { + const [platform, gpu, scenario] = value.split(":"); + if (!platform || !gpu || !scenario) { + throw new Error( + `Invalid --require-cell format: ${value}. Expected platform:gpu:scenario`, + ); + } + return { platform, gpu, scenario }; +} + +function printUsage() { + console.log(`Usage: node scripts/validate-playback-matrix.js --input [--input ...] [--require-cell platform:gpu:scenario ...] [--require-formats mp4,fragmented] + +Validates that required benchmark matrix cells are present in playback benchmark JSON results. + +Options: + --input, -i JSON file or directory containing JSON files (repeatable) + --require-cell Required cell as platform:gpu:scenario (repeatable) + --require-formats Comma-separated required formats per cell + --no-default-matrix Disable built-in required matrix + --help, -h Show help`); +} + +function collectJsonFiles(targetPath) { + if (!fs.existsSync(targetPath)) { + throw new Error(`Input path does not exist: ${targetPath}`); + } + const stats = fs.statSync(targetPath); + if (stats.isFile()) { + return targetPath.endsWith(".json") ? [targetPath] : []; + } + const files = []; + for (const entry of fs.readdirSync(targetPath, { withFileTypes: true })) { + const fullPath = path.join(targetPath, entry.name); + if (entry.isDirectory()) { + files.push(...collectJsonFiles(fullPath)); + } else if (entry.isFile() && entry.name.endsWith(".json")) { + files.push(fullPath); + } + } + return files; +} + +function parseNotes(notes) { + if (!notes) return {}; + const result = {}; + for (const token of notes.split(/\s+/)) { + if (!token.includes("=")) continue; + const [key, ...rest] = token.split("="); + const value = rest.join("="); + if (!key || !value) continue; + result[key.trim()] = value.trim(); + } + return result; +} + +function keyForCell(cell) { + return `${cell.platform}|${cell.gpu}|${cell.scenario}`; +} + +function collectObservedCells(files) { + const observed = new Map(); + for (const filePath of files) { + const parsed = JSON.parse(fs.readFileSync(filePath, "utf8")); + const notes = parseNotes(parsed.notes); + const platform = notes.platform ?? "unknown"; + const gpu = notes.gpu ?? "unknown"; + const scenario = notes.scenario ?? "unspecified"; + const key = keyForCell({ platform, gpu, scenario }); + if (!observed.has(key)) { + observed.set(key, { + platform, + gpu, + scenario, + formats: new Set(), + files: new Set(), + }); + } + const entry = observed.get(key); + entry.files.add(filePath); + const reports = Array.isArray(parsed.reports) ? parsed.reports : []; + for (const report of reports) { + entry.formats.add(report.is_fragmented ? "fragmented" : "mp4"); + } + } + return observed; +} + +function main() { + const options = parseArgs(process.argv); + if (options.help) { + printUsage(); + return; + } + if (options.inputs.length === 0) { + throw new Error("At least one --input is required"); + } + + const files = new Set(); + for (const input of options.inputs) { + for (const filePath of collectJsonFiles(input)) { + files.add(filePath); + } + } + if (files.size === 0) { + throw new Error("No JSON files found"); + } + + const requiredCells = options.useDefaultMatrix + ? [...DEFAULT_REQUIRED_CELLS, ...options.requiredCells] + : options.requiredCells; + if (requiredCells.length === 0) { + throw new Error("No required matrix cells configured"); + } + + const observed = collectObservedCells([...files]); + const missingCells = []; + const formatFailures = []; + + for (const cell of requiredCells) { + const key = keyForCell(cell); + const observedCell = observed.get(key); + if (!observedCell) { + missingCells.push(cell); + continue; + } + for (const requiredFormat of options.requiredFormats) { + if (!observedCell.formats.has(requiredFormat)) { + formatFailures.push({ + ...cell, + requiredFormat, + observedFormats: [...observedCell.formats], + }); + } + } + } + + console.log(`Validated ${requiredCells.length} required cells`); + console.log(`Observed ${observed.size} unique cells`); + + if (missingCells.length > 0) { + console.log("Missing required cells:"); + for (const cell of missingCells) { + console.log(` - ${cell.platform}:${cell.gpu}:${cell.scenario}`); + } + } + + if (formatFailures.length > 0) { + console.log("Missing required formats:"); + for (const failure of formatFailures) { + console.log( + ` - ${failure.platform}:${failure.gpu}:${failure.scenario} missing ${failure.requiredFormat} (observed: ${failure.observedFormats.join(", ") || "none"})`, + ); + } + } + + if (missingCells.length > 0 || formatFailures.length > 0) { + process.exit(1); + } + + console.log("Matrix validation passed"); +} + +try { + main(); +} catch (error) { + console.error(error instanceof Error ? error.message : String(error)); + process.exit(1); +} From c04b825f19f71e0bcdee4dd1c01f169f6940da5e Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 21:11:16 +0000 Subject: [PATCH 031/333] improve: include full runner context in benchmark command metadata Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 1 + .../examples/playback-test-runner.rs | 57 ++++++++++++++++--- 2 files changed, 49 insertions(+), 9 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index dcb954a5d8..f584b0f2b7 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -172,6 +172,7 @@ cargo run -p cap-recording --example playback-test-runner -- full 8. **Playback benchmark runner now supports JSON evidence export (2026-02-13)** - `playback-test-runner` supports `--json-output` for structured report emission. - JSON output includes command metadata, system info, summary, and per-recording test detail. + - Command metadata now includes input scope and output flags for reproducibility. 9. **Added JSON aggregate utility for cross-platform benchmark collation (2026-02-13)** - `scripts/aggregate-playback-benchmarks.js` builds a markdown table from multiple JSON outputs. diff --git a/crates/recording/examples/playback-test-runner.rs b/crates/recording/examples/playback-test-runner.rs index abebc6cd14..b930a2cb41 100644 --- a/crates/recording/examples/playback-test-runner.rs +++ b/crates/recording/examples/playback-test-runner.rs @@ -1492,6 +1492,53 @@ fn command_name(command: Option<&Commands>) -> &'static str { } } +fn shell_quote(value: &str) -> String { + let is_safe = value + .chars() + .all(|char| char.is_ascii_alphanumeric() || "-_./:=,".contains(char)); + if is_safe { + value.to_string() + } else { + format!("'{}'", value.replace('\'', "'\"'\"'")) + } +} + +fn build_command_string(cli: &Cli) -> String { + let mut command = format!( + "cargo run -p cap-recording --example playback-test-runner -- {} --fps {}", + command_name(cli.command.as_ref()), + cli.fps + ); + + if let Some(path) = &cli.recording_path { + command.push_str(" --recording-path "); + command.push_str(&shell_quote(path.to_string_lossy().as_ref())); + } else { + command.push_str(" --input-dir "); + command.push_str(&shell_quote(cli.input_dir.to_string_lossy().as_ref())); + } + + if cli.verbose { + command.push_str(" --verbose"); + } + + if cli.benchmark_output { + command.push_str(" --benchmark-output"); + } + + if let Some(path) = &cli.json_output { + command.push_str(" --json-output "); + command.push_str(&shell_quote(path.to_string_lossy().as_ref())); + } + + if let Some(notes) = &cli.notes { + command.push_str(" --notes "); + command.push_str(&shell_quote(notes)); + } + + command +} + #[tokio::main] async fn main() -> anyhow::Result<()> { tracing_subscriber::registry() @@ -1586,15 +1633,7 @@ async fn main() -> anyhow::Result<()> { print_summary(&reports); - let command = format!( - "cargo run -p cap-recording --example playback-test-runner -- {} --fps {}{}", - command_name(cli.command.as_ref()), - cli.fps, - cli.recording_path - .as_ref() - .map(|p| format!(" --recording-path {}", p.display())) - .unwrap_or_default(), - ); + let command = build_command_string(&cli); if cli.benchmark_output { let benchmark_md = From db63673b1ab689bdce343c87e22a61adbda4351f Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:11:16 +0000 Subject: [PATCH 032/333] improve: include full runner context in benchmark command metadata Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 1 + .../examples/playback-test-runner.rs | 57 ++++++++++++++++--- 2 files changed, 49 insertions(+), 9 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index dcb954a5d8..f584b0f2b7 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -172,6 +172,7 @@ cargo run -p cap-recording --example playback-test-runner -- full 8. **Playback benchmark runner now supports JSON evidence export (2026-02-13)** - `playback-test-runner` supports `--json-output` for structured report emission. - JSON output includes command metadata, system info, summary, and per-recording test detail. + - Command metadata now includes input scope and output flags for reproducibility. 9. **Added JSON aggregate utility for cross-platform benchmark collation (2026-02-13)** - `scripts/aggregate-playback-benchmarks.js` builds a markdown table from multiple JSON outputs. diff --git a/crates/recording/examples/playback-test-runner.rs b/crates/recording/examples/playback-test-runner.rs index abebc6cd14..b930a2cb41 100644 --- a/crates/recording/examples/playback-test-runner.rs +++ b/crates/recording/examples/playback-test-runner.rs @@ -1492,6 +1492,53 @@ fn command_name(command: Option<&Commands>) -> &'static str { } } +fn shell_quote(value: &str) -> String { + let is_safe = value + .chars() + .all(|char| char.is_ascii_alphanumeric() || "-_./:=,".contains(char)); + if is_safe { + value.to_string() + } else { + format!("'{}'", value.replace('\'', "'\"'\"'")) + } +} + +fn build_command_string(cli: &Cli) -> String { + let mut command = format!( + "cargo run -p cap-recording --example playback-test-runner -- {} --fps {}", + command_name(cli.command.as_ref()), + cli.fps + ); + + if let Some(path) = &cli.recording_path { + command.push_str(" --recording-path "); + command.push_str(&shell_quote(path.to_string_lossy().as_ref())); + } else { + command.push_str(" --input-dir "); + command.push_str(&shell_quote(cli.input_dir.to_string_lossy().as_ref())); + } + + if cli.verbose { + command.push_str(" --verbose"); + } + + if cli.benchmark_output { + command.push_str(" --benchmark-output"); + } + + if let Some(path) = &cli.json_output { + command.push_str(" --json-output "); + command.push_str(&shell_quote(path.to_string_lossy().as_ref())); + } + + if let Some(notes) = &cli.notes { + command.push_str(" --notes "); + command.push_str(&shell_quote(notes)); + } + + command +} + #[tokio::main] async fn main() -> anyhow::Result<()> { tracing_subscriber::registry() @@ -1586,15 +1633,7 @@ async fn main() -> anyhow::Result<()> { print_summary(&reports); - let command = format!( - "cargo run -p cap-recording --example playback-test-runner -- {} --fps {}{}", - command_name(cli.command.as_ref()), - cli.fps, - cli.recording_path - .as_ref() - .map(|p| format!(" --recording-path {}", p.display())) - .unwrap_or_default(), - ); + let command = build_command_string(&cli); if cli.benchmark_output { let benchmark_md = From f7107f3b382766a2a3ff9c38c6ac086f32da8d28 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 21:12:47 +0000 Subject: [PATCH 033/333] improve: add playback benchmark npm aliases and cli passthrough Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 18 ++++++++++++++++++ package.json | 3 +++ scripts/aggregate-playback-benchmarks.js | 3 +++ scripts/run-playback-benchmark-matrix.js | 3 +++ scripts/validate-playback-matrix.js | 3 +++ 6 files changed, 31 insertions(+) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index f584b0f2b7..5cc463428d 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -185,6 +185,7 @@ cargo run -p cap-recording --example playback-test-runner -- full 11. **Added matrix completeness validator (2026-02-13)** - `scripts/validate-playback-matrix.js` validates required platform/gpu/scenario cells. - Supports required format checks per cell (mp4 + fragmented). + - Root `package.json` now exposes `bench:playback:*` script aliases for matrix, aggregate, and validate flows. --- diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 5d633c8cc2..38fd659ddc 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -46,6 +46,12 @@ node scripts/run-playback-benchmark-matrix.js \ --input-dir "$INPUT_DIR" ``` +Equivalent shortcut: + +```bash +pnpm bench:playback:matrix -- --platform "" --gpu "" --output-dir "$OUT_DIR" --fps 60 --input-dir "$INPUT_DIR" +``` + Examples: ```bash @@ -71,12 +77,24 @@ After collecting all machine folders into a shared root: node scripts/aggregate-playback-benchmarks.js --input /path/to/all-machine-results --output /tmp/playback-matrix-aggregate.md ``` +Equivalent shortcut: + +```bash +pnpm bench:playback:aggregate -- --input /path/to/all-machine-results --output /tmp/playback-matrix-aggregate.md +``` + Validate matrix completeness: ```bash node scripts/validate-playback-matrix.js --input /path/to/all-machine-results --require-formats mp4,fragmented ``` +Equivalent shortcut: + +```bash +pnpm bench:playback:validate -- --input /path/to/all-machine-results --require-formats mp4,fragmented +``` + ## Evidence checklist 1. Confirm all matrix rows exist. diff --git a/package.json b/package.json index 47f69790ef..dc4ad744af 100644 --- a/package.json +++ b/package.json @@ -41,6 +41,9 @@ "web": "pnpm --filter=@cap/web", "env-setup": "node scripts/env-cli.js", "check-tauri-versions": "node scripts/check-tauri-plugin-versions.js", + "bench:playback:matrix": "node scripts/run-playback-benchmark-matrix.js", + "bench:playback:aggregate": "node scripts/aggregate-playback-benchmarks.js", + "bench:playback:validate": "node scripts/validate-playback-matrix.js", "clean": "find . -name node_modules -o -name .next -o -name .output -o -name .turbo -o -name dist -type d -prune | xargs rm -rf", "lgtm-otel": "docker run -p 3010:3000 -p 4317:4317 -p 4318:4318 --rm -it docker.io/grafana/otel-lgtm", "with-env": "dotenv -e .env --" diff --git a/scripts/aggregate-playback-benchmarks.js b/scripts/aggregate-playback-benchmarks.js index d5ee7fe7ec..75275c944f 100644 --- a/scripts/aggregate-playback-benchmarks.js +++ b/scripts/aggregate-playback-benchmarks.js @@ -10,6 +10,9 @@ function parseArgs(argv) { for (let i = 2; i < argv.length; i++) { const arg = argv[i]; + if (arg === "--") { + continue; + } if (arg === "--help" || arg === "-h") { help = true; continue; diff --git a/scripts/run-playback-benchmark-matrix.js b/scripts/run-playback-benchmark-matrix.js index d5793550f5..f53c16a102 100644 --- a/scripts/run-playback-benchmark-matrix.js +++ b/scripts/run-playback-benchmark-matrix.js @@ -16,6 +16,9 @@ function parseArgs(argv) { for (let i = 2; i < argv.length; i++) { const arg = argv[i]; + if (arg === "--") { + continue; + } if (arg === "--help" || arg === "-h") { options.help = true; continue; diff --git a/scripts/validate-playback-matrix.js b/scripts/validate-playback-matrix.js index a95ae023cb..4fc85dc61d 100644 --- a/scripts/validate-playback-matrix.js +++ b/scripts/validate-playback-matrix.js @@ -24,6 +24,9 @@ function parseArgs(argv) { for (let i = 2; i < argv.length; i++) { const arg = argv[i]; + if (arg === "--") { + continue; + } if (arg === "--help" || arg === "-h") { options.help = true; continue; From b34be426f664c50aaaf85e830b198ba84df4e056 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:12:47 +0000 Subject: [PATCH 034/333] improve: add playback benchmark npm aliases and cli passthrough Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 18 ++++++++++++++++++ package.json | 3 +++ scripts/aggregate-playback-benchmarks.js | 3 +++ scripts/run-playback-benchmark-matrix.js | 3 +++ scripts/validate-playback-matrix.js | 3 +++ 6 files changed, 31 insertions(+) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index f584b0f2b7..5cc463428d 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -185,6 +185,7 @@ cargo run -p cap-recording --example playback-test-runner -- full 11. **Added matrix completeness validator (2026-02-13)** - `scripts/validate-playback-matrix.js` validates required platform/gpu/scenario cells. - Supports required format checks per cell (mp4 + fragmented). + - Root `package.json` now exposes `bench:playback:*` script aliases for matrix, aggregate, and validate flows. --- diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 5d633c8cc2..38fd659ddc 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -46,6 +46,12 @@ node scripts/run-playback-benchmark-matrix.js \ --input-dir "$INPUT_DIR" ``` +Equivalent shortcut: + +```bash +pnpm bench:playback:matrix -- --platform "" --gpu "" --output-dir "$OUT_DIR" --fps 60 --input-dir "$INPUT_DIR" +``` + Examples: ```bash @@ -71,12 +77,24 @@ After collecting all machine folders into a shared root: node scripts/aggregate-playback-benchmarks.js --input /path/to/all-machine-results --output /tmp/playback-matrix-aggregate.md ``` +Equivalent shortcut: + +```bash +pnpm bench:playback:aggregate -- --input /path/to/all-machine-results --output /tmp/playback-matrix-aggregate.md +``` + Validate matrix completeness: ```bash node scripts/validate-playback-matrix.js --input /path/to/all-machine-results --require-formats mp4,fragmented ``` +Equivalent shortcut: + +```bash +pnpm bench:playback:validate -- --input /path/to/all-machine-results --require-formats mp4,fragmented +``` + ## Evidence checklist 1. Confirm all matrix rows exist. diff --git a/package.json b/package.json index 47f69790ef..dc4ad744af 100644 --- a/package.json +++ b/package.json @@ -41,6 +41,9 @@ "web": "pnpm --filter=@cap/web", "env-setup": "node scripts/env-cli.js", "check-tauri-versions": "node scripts/check-tauri-plugin-versions.js", + "bench:playback:matrix": "node scripts/run-playback-benchmark-matrix.js", + "bench:playback:aggregate": "node scripts/aggregate-playback-benchmarks.js", + "bench:playback:validate": "node scripts/validate-playback-matrix.js", "clean": "find . -name node_modules -o -name .next -o -name .output -o -name .turbo -o -name dist -type d -prune | xargs rm -rf", "lgtm-otel": "docker run -p 3010:3000 -p 4317:4317 -p 4318:4318 --rm -it docker.io/grafana/otel-lgtm", "with-env": "dotenv -e .env --" diff --git a/scripts/aggregate-playback-benchmarks.js b/scripts/aggregate-playback-benchmarks.js index d5ee7fe7ec..75275c944f 100644 --- a/scripts/aggregate-playback-benchmarks.js +++ b/scripts/aggregate-playback-benchmarks.js @@ -10,6 +10,9 @@ function parseArgs(argv) { for (let i = 2; i < argv.length; i++) { const arg = argv[i]; + if (arg === "--") { + continue; + } if (arg === "--help" || arg === "-h") { help = true; continue; diff --git a/scripts/run-playback-benchmark-matrix.js b/scripts/run-playback-benchmark-matrix.js index d5793550f5..f53c16a102 100644 --- a/scripts/run-playback-benchmark-matrix.js +++ b/scripts/run-playback-benchmark-matrix.js @@ -16,6 +16,9 @@ function parseArgs(argv) { for (let i = 2; i < argv.length; i++) { const arg = argv[i]; + if (arg === "--") { + continue; + } if (arg === "--help" || arg === "-h") { options.help = true; continue; diff --git a/scripts/validate-playback-matrix.js b/scripts/validate-playback-matrix.js index a95ae023cb..4fc85dc61d 100644 --- a/scripts/validate-playback-matrix.js +++ b/scripts/validate-playback-matrix.js @@ -24,6 +24,9 @@ function parseArgs(argv) { for (let i = 2; i < argv.length; i++) { const arg = argv[i]; + if (arg === "--") { + continue; + } if (arg === "--help" || arg === "-h") { options.help = true; continue; From 44d06aa67f80975d8d9d69cbc5dd7b7aa3fde301 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 21:15:52 +0000 Subject: [PATCH 035/333] improve: drop duplicate same-frame seek updates Co-authored-by: Richie McIlroy --- apps/desktop/src/routes/editor/Timeline/index.tsx | 12 ++++++++++++ crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/src/playback.rs | 9 ++++++++- 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/apps/desktop/src/routes/editor/Timeline/index.tsx b/apps/desktop/src/routes/editor/Timeline/index.tsx index 7726bb4258..c475fa1d27 100644 --- a/apps/desktop/src/routes/editor/Timeline/index.tsx +++ b/apps/desktop/src/routes/editor/Timeline/index.tsx @@ -224,6 +224,8 @@ export function Timeline() { let pendingSeekFrame: number | null = null; let seekRafId: number | null = null; let seekInFlight = false; + let inFlightSeekFrame: number | null = null; + let lastCompletedSeekFrame: number | null = null; onCleanup(() => { if (zoomRafId !== null) cancelAnimationFrame(zoomRafId); @@ -276,6 +278,13 @@ export function Timeline() { } function scheduleSeek(frameNumber: number) { + if ( + frameNumber === pendingSeekFrame || + frameNumber === inFlightSeekFrame || + frameNumber === lastCompletedSeekFrame + ) { + return; + } pendingSeekFrame = frameNumber; if (seekRafId === null) { seekRafId = requestAnimationFrame(flushPendingSeek); @@ -295,13 +304,16 @@ export function Timeline() { const frameNumber = pendingSeekFrame; pendingSeekFrame = null; seekInFlight = true; + inFlightSeekFrame = frameNumber; try { await commands.seekTo(frameNumber); + lastCompletedSeekFrame = frameNumber; } catch (err) { console.error("Failed to seek timeline playhead:", err); } finally { seekInFlight = false; + inFlightSeekFrame = null; if (pendingSeekFrame !== null && seekRafId === null) { seekRafId = requestAnimationFrame(flushPendingSeek); } diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 5cc463428d..09d0f85376 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -168,6 +168,7 @@ cargo run -p cap-recording --example playback-test-runner -- full 7. **Timeline seek dispatch now coalesces during drag (2026-02-13)** - Frontend seek calls are requestAnimationFrame-batched. - Only the latest pending seek frame is sent while an async seek is in-flight. + - Duplicate same-frame seeks are dropped in both frontend dispatch and playback seek signaling. 8. **Playback benchmark runner now supports JSON evidence export (2026-02-13)** - `playback-test-runner` supports `--json-output` for structured report emission. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index c9533ab63b..b25939cd14 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -795,7 +795,14 @@ impl PlaybackHandle { } pub fn seek(&self, frame_number: u32) { - let _ = self.seek_tx.send(frame_number); + let _ = self.seek_tx.send_if_modified(|current_frame| { + if *current_frame == frame_number { + false + } else { + *current_frame = frame_number; + true + } + }); } pub async fn receive_event(&mut self) -> watch::Ref<'_, PlaybackEvent> { From 2f74e7a0860e5d65f5891e5a1e26d682c0e8b8f3 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:15:52 +0000 Subject: [PATCH 036/333] improve: drop duplicate same-frame seek updates Co-authored-by: Richie McIlroy --- apps/desktop/src/routes/editor/Timeline/index.tsx | 12 ++++++++++++ crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/src/playback.rs | 9 ++++++++- 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/apps/desktop/src/routes/editor/Timeline/index.tsx b/apps/desktop/src/routes/editor/Timeline/index.tsx index 7726bb4258..c475fa1d27 100644 --- a/apps/desktop/src/routes/editor/Timeline/index.tsx +++ b/apps/desktop/src/routes/editor/Timeline/index.tsx @@ -224,6 +224,8 @@ export function Timeline() { let pendingSeekFrame: number | null = null; let seekRafId: number | null = null; let seekInFlight = false; + let inFlightSeekFrame: number | null = null; + let lastCompletedSeekFrame: number | null = null; onCleanup(() => { if (zoomRafId !== null) cancelAnimationFrame(zoomRafId); @@ -276,6 +278,13 @@ export function Timeline() { } function scheduleSeek(frameNumber: number) { + if ( + frameNumber === pendingSeekFrame || + frameNumber === inFlightSeekFrame || + frameNumber === lastCompletedSeekFrame + ) { + return; + } pendingSeekFrame = frameNumber; if (seekRafId === null) { seekRafId = requestAnimationFrame(flushPendingSeek); @@ -295,13 +304,16 @@ export function Timeline() { const frameNumber = pendingSeekFrame; pendingSeekFrame = null; seekInFlight = true; + inFlightSeekFrame = frameNumber; try { await commands.seekTo(frameNumber); + lastCompletedSeekFrame = frameNumber; } catch (err) { console.error("Failed to seek timeline playhead:", err); } finally { seekInFlight = false; + inFlightSeekFrame = null; if (pendingSeekFrame !== null && seekRafId === null) { seekRafId = requestAnimationFrame(flushPendingSeek); } diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 5cc463428d..09d0f85376 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -168,6 +168,7 @@ cargo run -p cap-recording --example playback-test-runner -- full 7. **Timeline seek dispatch now coalesces during drag (2026-02-13)** - Frontend seek calls are requestAnimationFrame-batched. - Only the latest pending seek frame is sent while an async seek is in-flight. + - Duplicate same-frame seeks are dropped in both frontend dispatch and playback seek signaling. 8. **Playback benchmark runner now supports JSON evidence export (2026-02-13)** - `playback-test-runner` supports `--json-output` for structured report emission. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index c9533ab63b..b25939cd14 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -795,7 +795,14 @@ impl PlaybackHandle { } pub fn seek(&self, frame_number: u32) { - let _ = self.seek_tx.send(frame_number); + let _ = self.seek_tx.send_if_modified(|current_frame| { + if *current_frame == frame_number { + false + } else { + *current_frame = frame_number; + true + } + }); } pub async fn receive_event(&mut self) -> watch::Ref<'_, PlaybackEvent> { From 6af9364cf0991a65e8316166ceb28ab3f94fd2ad Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 21:16:57 +0000 Subject: [PATCH 037/333] improve: validate per-machine matrix runs in helper script Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 3 +- scripts/run-playback-benchmark-matrix.js | 40 ++++++++++++++++++++++-- 4 files changed, 42 insertions(+), 3 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 6da7192bb6..684161ff4d 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -111,6 +111,7 @@ Automated helper for machine runs: ```bash node scripts/run-playback-benchmark-matrix.js --platform macos-13 --gpu apple-silicon --output-dir /tmp/playback-matrix --fps 60 --input-dir /tmp/cap-real-device-tests +node scripts/run-playback-benchmark-matrix.js --platform windows-11 --gpu nvidia-discrete --output-dir /tmp/playback-matrix-windows-nvidia --fps 60 --require-formats mp4,fragmented --input-dir /tmp/cap-real-device-tests ``` | Platform | GPU Class | MP4 Full | Fragmented Full | MP4 Scrub | Fragmented Scrub | Notes | diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 09d0f85376..8431346cc3 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -182,6 +182,7 @@ cargo run -p cap-recording --example playback-test-runner -- full 10. **Added matrix run helper for platform/GPU benchmark execution (2026-02-13)** - `scripts/run-playback-benchmark-matrix.js` runs `full` and `scrub` scenarios with tagged notes and JSON output. - Automatically generates aggregate markdown for each machine run directory. + - Performs per-machine post-run validation for required scenarios and optional format requirements. 11. **Added matrix completeness validator (2026-02-13)** - `scripts/validate-playback-matrix.js` validates required platform/gpu/scenario cells. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 38fd659ddc..1c84c1b95c 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -43,13 +43,14 @@ node scripts/run-playback-benchmark-matrix.js \ --gpu "" \ --output-dir "$OUT_DIR" \ --fps 60 \ + --require-formats mp4,fragmented \ --input-dir "$INPUT_DIR" ``` Equivalent shortcut: ```bash -pnpm bench:playback:matrix -- --platform "" --gpu "" --output-dir "$OUT_DIR" --fps 60 --input-dir "$INPUT_DIR" +pnpm bench:playback:matrix -- --platform "" --gpu "" --output-dir "$OUT_DIR" --fps 60 --require-formats mp4,fragmented --input-dir "$INPUT_DIR" ``` Examples: diff --git a/scripts/run-playback-benchmark-matrix.js b/scripts/run-playback-benchmark-matrix.js index f53c16a102..f5a404e487 100644 --- a/scripts/run-playback-benchmark-matrix.js +++ b/scripts/run-playback-benchmark-matrix.js @@ -12,6 +12,8 @@ function parseArgs(argv) { fps: 60, recordingPath: null, inputDir: null, + validate: true, + requireFormats: [], }; for (let i = 2; i < argv.length; i++) { @@ -51,6 +53,18 @@ function parseArgs(argv) { options.inputDir = argv[++i] ?? null; continue; } + if (arg === "--skip-validate") { + options.validate = false; + continue; + } + if (arg === "--require-formats") { + const value = argv[++i] ?? ""; + options.requireFormats = value + .split(",") + .map((entry) => entry.trim().toLowerCase()) + .filter(Boolean); + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -58,7 +72,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/run-playback-benchmark-matrix.js --platform --gpu --output-dir [--fps 60] [--recording-path ] [--input-dir ] + console.log(`Usage: node scripts/run-playback-benchmark-matrix.js --platform --gpu --output-dir [--fps 60] [--recording-path ] [--input-dir ] [--require-formats mp4,fragmented] Runs playback benchmark matrix scenarios and writes JSON outputs. @@ -70,7 +84,9 @@ Required: Optional: --fps FPS for benchmark runs (default: 60) --recording-path Specific recording path - --input-dir Recording discovery directory`); + --input-dir Recording discovery directory + --require-formats Required formats for local validation (comma-separated) + --skip-validate Skip post-run validation`); } function run(command, args) { @@ -156,6 +172,26 @@ function main() { aggregatePath, ]); console.log(`Aggregate markdown: ${aggregatePath}`); + + if (options.validate) { + const validateArgs = [ + "scripts/validate-playback-matrix.js", + "--input", + options.outputDir, + "--no-default-matrix", + "--require-cell", + `${options.platform}:${options.gpu}:full`, + "--require-cell", + `${options.platform}:${options.gpu}:scrub`, + ]; + + if (options.requireFormats.length > 0) { + validateArgs.push("--require-formats", options.requireFormats.join(",")); + } + + run("node", validateArgs); + console.log("Matrix run validation passed"); + } } try { From d11bb5453b49b35761d3158866919ffbb1b85d45 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:16:57 +0000 Subject: [PATCH 038/333] improve: validate per-machine matrix runs in helper script Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 3 +- scripts/run-playback-benchmark-matrix.js | 40 ++++++++++++++++++++++-- 4 files changed, 42 insertions(+), 3 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 6da7192bb6..684161ff4d 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -111,6 +111,7 @@ Automated helper for machine runs: ```bash node scripts/run-playback-benchmark-matrix.js --platform macos-13 --gpu apple-silicon --output-dir /tmp/playback-matrix --fps 60 --input-dir /tmp/cap-real-device-tests +node scripts/run-playback-benchmark-matrix.js --platform windows-11 --gpu nvidia-discrete --output-dir /tmp/playback-matrix-windows-nvidia --fps 60 --require-formats mp4,fragmented --input-dir /tmp/cap-real-device-tests ``` | Platform | GPU Class | MP4 Full | Fragmented Full | MP4 Scrub | Fragmented Scrub | Notes | diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 09d0f85376..8431346cc3 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -182,6 +182,7 @@ cargo run -p cap-recording --example playback-test-runner -- full 10. **Added matrix run helper for platform/GPU benchmark execution (2026-02-13)** - `scripts/run-playback-benchmark-matrix.js` runs `full` and `scrub` scenarios with tagged notes and JSON output. - Automatically generates aggregate markdown for each machine run directory. + - Performs per-machine post-run validation for required scenarios and optional format requirements. 11. **Added matrix completeness validator (2026-02-13)** - `scripts/validate-playback-matrix.js` validates required platform/gpu/scenario cells. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 38fd659ddc..1c84c1b95c 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -43,13 +43,14 @@ node scripts/run-playback-benchmark-matrix.js \ --gpu "" \ --output-dir "$OUT_DIR" \ --fps 60 \ + --require-formats mp4,fragmented \ --input-dir "$INPUT_DIR" ``` Equivalent shortcut: ```bash -pnpm bench:playback:matrix -- --platform "" --gpu "" --output-dir "$OUT_DIR" --fps 60 --input-dir "$INPUT_DIR" +pnpm bench:playback:matrix -- --platform "" --gpu "" --output-dir "$OUT_DIR" --fps 60 --require-formats mp4,fragmented --input-dir "$INPUT_DIR" ``` Examples: diff --git a/scripts/run-playback-benchmark-matrix.js b/scripts/run-playback-benchmark-matrix.js index f53c16a102..f5a404e487 100644 --- a/scripts/run-playback-benchmark-matrix.js +++ b/scripts/run-playback-benchmark-matrix.js @@ -12,6 +12,8 @@ function parseArgs(argv) { fps: 60, recordingPath: null, inputDir: null, + validate: true, + requireFormats: [], }; for (let i = 2; i < argv.length; i++) { @@ -51,6 +53,18 @@ function parseArgs(argv) { options.inputDir = argv[++i] ?? null; continue; } + if (arg === "--skip-validate") { + options.validate = false; + continue; + } + if (arg === "--require-formats") { + const value = argv[++i] ?? ""; + options.requireFormats = value + .split(",") + .map((entry) => entry.trim().toLowerCase()) + .filter(Boolean); + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -58,7 +72,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/run-playback-benchmark-matrix.js --platform --gpu --output-dir [--fps 60] [--recording-path ] [--input-dir ] + console.log(`Usage: node scripts/run-playback-benchmark-matrix.js --platform --gpu --output-dir [--fps 60] [--recording-path ] [--input-dir ] [--require-formats mp4,fragmented] Runs playback benchmark matrix scenarios and writes JSON outputs. @@ -70,7 +84,9 @@ Required: Optional: --fps FPS for benchmark runs (default: 60) --recording-path Specific recording path - --input-dir Recording discovery directory`); + --input-dir Recording discovery directory + --require-formats Required formats for local validation (comma-separated) + --skip-validate Skip post-run validation`); } function run(command, args) { @@ -156,6 +172,26 @@ function main() { aggregatePath, ]); console.log(`Aggregate markdown: ${aggregatePath}`); + + if (options.validate) { + const validateArgs = [ + "scripts/validate-playback-matrix.js", + "--input", + options.outputDir, + "--no-default-matrix", + "--require-cell", + `${options.platform}:${options.gpu}:full`, + "--require-cell", + `${options.platform}:${options.gpu}:scrub`, + ]; + + if (options.requireFormats.length > 0) { + validateArgs.push("--require-formats", options.requireFormats.join(",")); + } + + run("node", validateArgs); + console.log("Matrix run validation passed"); + } } try { From 3f691a8e0b785d27a00e306ce4abc8310de6803a Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 21:18:11 +0000 Subject: [PATCH 039/333] improve: emit matrix validation json artifacts Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 1 + scripts/run-playback-benchmark-matrix.js | 7 ++++++ scripts/validate-playback-matrix.js | 30 ++++++++++++++++++++---- 5 files changed, 36 insertions(+), 4 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 684161ff4d..63011bb7a9 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -70,6 +70,7 @@ Validate matrix coverage and required formats: ```bash node scripts/validate-playback-matrix.js --input /path/to/json-results --require-formats mp4,fragmented +node scripts/validate-playback-matrix.js --input /path/to/json-results --require-formats mp4,fragmented --output-json /tmp/playback-matrix-validation.json ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 8431346cc3..a048af1c95 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -188,6 +188,7 @@ cargo run -p cap-recording --example playback-test-runner -- full - `scripts/validate-playback-matrix.js` validates required platform/gpu/scenario cells. - Supports required format checks per cell (mp4 + fragmented). - Root `package.json` now exposes `bench:playback:*` script aliases for matrix, aggregate, and validate flows. + - Can emit structured validation JSON for artifact upload and automation. --- diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 1c84c1b95c..ad778012b3 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -69,6 +69,7 @@ Each run directory contains: - timestamped `full` scenario JSON - timestamped `scrub` scenario JSON - `--aggregate.md` summary table +- `--validation.json` matrix validation result ## Cross-machine aggregation diff --git a/scripts/run-playback-benchmark-matrix.js b/scripts/run-playback-benchmark-matrix.js index f5a404e487..98b08def7f 100644 --- a/scripts/run-playback-benchmark-matrix.js +++ b/scripts/run-playback-benchmark-matrix.js @@ -174,11 +174,17 @@ function main() { console.log(`Aggregate markdown: ${aggregatePath}`); if (options.validate) { + const validationJsonPath = path.join( + options.outputDir, + `${options.platform}-${options.gpu}-validation.json`, + ); const validateArgs = [ "scripts/validate-playback-matrix.js", "--input", options.outputDir, "--no-default-matrix", + "--output-json", + validationJsonPath, "--require-cell", `${options.platform}:${options.gpu}:full`, "--require-cell", @@ -191,6 +197,7 @@ function main() { run("node", validateArgs); console.log("Matrix run validation passed"); + console.log(`Validation JSON: ${validationJsonPath}`); } } diff --git a/scripts/validate-playback-matrix.js b/scripts/validate-playback-matrix.js index 4fc85dc61d..c919369c42 100644 --- a/scripts/validate-playback-matrix.js +++ b/scripts/validate-playback-matrix.js @@ -20,6 +20,7 @@ function parseArgs(argv) { requiredCells: [], requiredFormats: [], useDefaultMatrix: true, + outputJson: null, }; for (let i = 2; i < argv.length; i++) { @@ -57,6 +58,12 @@ function parseArgs(argv) { options.useDefaultMatrix = false; continue; } + if (arg === "--output-json") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --output-json"); + options.outputJson = path.resolve(value); + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -74,7 +81,7 @@ function parseCell(value) { } function printUsage() { - console.log(`Usage: node scripts/validate-playback-matrix.js --input [--input ...] [--require-cell platform:gpu:scenario ...] [--require-formats mp4,fragmented] + console.log(`Usage: node scripts/validate-playback-matrix.js --input [--input ...] [--require-cell platform:gpu:scenario ...] [--require-formats mp4,fragmented] [--output-json ] Validates that required benchmark matrix cells are present in playback benchmark JSON results. @@ -83,6 +90,7 @@ Options: --require-cell Required cell as platform:gpu:scenario (repeatable) --require-formats Comma-separated required formats per cell --no-default-matrix Disable built-in required matrix + --output-json Write validation result JSON file --help, -h Show help`); } @@ -200,8 +208,22 @@ function main() { } } - console.log(`Validated ${requiredCells.length} required cells`); - console.log(`Observed ${observed.size} unique cells`); + const validationResult = { + validatedCells: requiredCells.length, + observedCells: observed.size, + requiredFormats: options.requiredFormats, + missingCells, + formatFailures, + passed: missingCells.length === 0 && formatFailures.length === 0, + }; + + if (options.outputJson) { + fs.writeFileSync(options.outputJson, JSON.stringify(validationResult, null, 2)); + console.log(`Validation JSON: ${options.outputJson}`); + } + + console.log(`Validated ${validationResult.validatedCells} required cells`); + console.log(`Observed ${validationResult.observedCells} unique cells`); if (missingCells.length > 0) { console.log("Missing required cells:"); @@ -219,7 +241,7 @@ function main() { } } - if (missingCells.length > 0 || formatFailures.length > 0) { + if (!validationResult.passed) { process.exit(1); } From f1f90423ae4ecd6b4851f3aec10becc81dab54d6 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:18:11 +0000 Subject: [PATCH 040/333] improve: emit matrix validation json artifacts Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 1 + scripts/run-playback-benchmark-matrix.js | 7 ++++++ scripts/validate-playback-matrix.js | 30 ++++++++++++++++++++---- 5 files changed, 36 insertions(+), 4 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 684161ff4d..63011bb7a9 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -70,6 +70,7 @@ Validate matrix coverage and required formats: ```bash node scripts/validate-playback-matrix.js --input /path/to/json-results --require-formats mp4,fragmented +node scripts/validate-playback-matrix.js --input /path/to/json-results --require-formats mp4,fragmented --output-json /tmp/playback-matrix-validation.json ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 8431346cc3..a048af1c95 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -188,6 +188,7 @@ cargo run -p cap-recording --example playback-test-runner -- full - `scripts/validate-playback-matrix.js` validates required platform/gpu/scenario cells. - Supports required format checks per cell (mp4 + fragmented). - Root `package.json` now exposes `bench:playback:*` script aliases for matrix, aggregate, and validate flows. + - Can emit structured validation JSON for artifact upload and automation. --- diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 1c84c1b95c..ad778012b3 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -69,6 +69,7 @@ Each run directory contains: - timestamped `full` scenario JSON - timestamped `scrub` scenario JSON - `--aggregate.md` summary table +- `--validation.json` matrix validation result ## Cross-machine aggregation diff --git a/scripts/run-playback-benchmark-matrix.js b/scripts/run-playback-benchmark-matrix.js index f5a404e487..98b08def7f 100644 --- a/scripts/run-playback-benchmark-matrix.js +++ b/scripts/run-playback-benchmark-matrix.js @@ -174,11 +174,17 @@ function main() { console.log(`Aggregate markdown: ${aggregatePath}`); if (options.validate) { + const validationJsonPath = path.join( + options.outputDir, + `${options.platform}-${options.gpu}-validation.json`, + ); const validateArgs = [ "scripts/validate-playback-matrix.js", "--input", options.outputDir, "--no-default-matrix", + "--output-json", + validationJsonPath, "--require-cell", `${options.platform}:${options.gpu}:full`, "--require-cell", @@ -191,6 +197,7 @@ function main() { run("node", validateArgs); console.log("Matrix run validation passed"); + console.log(`Validation JSON: ${validationJsonPath}`); } } diff --git a/scripts/validate-playback-matrix.js b/scripts/validate-playback-matrix.js index 4fc85dc61d..c919369c42 100644 --- a/scripts/validate-playback-matrix.js +++ b/scripts/validate-playback-matrix.js @@ -20,6 +20,7 @@ function parseArgs(argv) { requiredCells: [], requiredFormats: [], useDefaultMatrix: true, + outputJson: null, }; for (let i = 2; i < argv.length; i++) { @@ -57,6 +58,12 @@ function parseArgs(argv) { options.useDefaultMatrix = false; continue; } + if (arg === "--output-json") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --output-json"); + options.outputJson = path.resolve(value); + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -74,7 +81,7 @@ function parseCell(value) { } function printUsage() { - console.log(`Usage: node scripts/validate-playback-matrix.js --input [--input ...] [--require-cell platform:gpu:scenario ...] [--require-formats mp4,fragmented] + console.log(`Usage: node scripts/validate-playback-matrix.js --input [--input ...] [--require-cell platform:gpu:scenario ...] [--require-formats mp4,fragmented] [--output-json ] Validates that required benchmark matrix cells are present in playback benchmark JSON results. @@ -83,6 +90,7 @@ Options: --require-cell Required cell as platform:gpu:scenario (repeatable) --require-formats Comma-separated required formats per cell --no-default-matrix Disable built-in required matrix + --output-json Write validation result JSON file --help, -h Show help`); } @@ -200,8 +208,22 @@ function main() { } } - console.log(`Validated ${requiredCells.length} required cells`); - console.log(`Observed ${observed.size} unique cells`); + const validationResult = { + validatedCells: requiredCells.length, + observedCells: observed.size, + requiredFormats: options.requiredFormats, + missingCells, + formatFailures, + passed: missingCells.length === 0 && formatFailures.length === 0, + }; + + if (options.outputJson) { + fs.writeFileSync(options.outputJson, JSON.stringify(validationResult, null, 2)); + console.log(`Validation JSON: ${options.outputJson}`); + } + + console.log(`Validated ${validationResult.validatedCells} required cells`); + console.log(`Observed ${validationResult.observedCells} unique cells`); if (missingCells.length > 0) { console.log("Missing required cells:"); @@ -219,7 +241,7 @@ function main() { } } - if (missingCells.length > 0 || formatFailures.length > 0) { + if (!validationResult.passed) { process.exit(1); } From ded61eedb531c2a9dcefae51b302ac9c71f5dbfb Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 21:20:37 +0000 Subject: [PATCH 041/333] improve: emit seek settle telemetry in playback loop Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/src/playback.rs | 14 ++++++++++++++ 2 files changed, 15 insertions(+) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index a048af1c95..a1fb2349e7 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -159,6 +159,7 @@ cargo run -p cap-recording --example playback-test-runner -- full 5. **Playback runtime emits startup latency signals (2026-02-13)** - Playback loop now logs first rendered frame latency. - Audio stream setup now logs startup preparation time and first callback latency. + - Playback loop now logs seek settle latency (`seek_target_frame` to rendered frame). 6. **Decode benchmark now supports machine-readable evidence output (2026-02-13)** - `decode-benchmark` supports `--output-json` for structured metric capture. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index b25939cd14..3b2c72411f 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -393,6 +393,7 @@ impl Playback { let mut total_frames_rendered = 0u64; let mut _total_frames_skipped = 0u64; let mut first_render_logged = false; + let mut pending_seek_observation: Option<(u32, Instant)> = None; let warmup_target_frames = 20usize; let warmup_after_first_timeout = Duration::from_millis(1000); @@ -454,6 +455,7 @@ impl Playback { frame_number = seek_frame; playback_anchor_start = Instant::now(); playback_anchor_frame = seek_frame; + pending_seek_observation = Some((seek_frame, Instant::now())); prefetch_buffer.retain(|p| p.frame_number >= frame_number); frame_cache.cache.clear(); let _ = frame_request_tx.send(frame_number); @@ -501,6 +503,7 @@ impl Playback { frame_number = seek_frame; playback_anchor_start = Instant::now(); playback_anchor_frame = seek_frame; + pending_seek_observation = Some((seek_frame, Instant::now())); prefetch_buffer.retain(|p| p.frame_number >= frame_number); frame_cache.cache.clear(); let _ = frame_request_tx.send(frame_number); @@ -737,6 +740,17 @@ impl Playback { "Playback rendered first frame" ); } + if let Some((seek_target_frame, seek_started_at)) = pending_seek_observation + && frame_number >= seek_target_frame + { + info!( + seek_target_frame, + rendered_frame = frame_number, + seek_settle_ms = seek_started_at.elapsed().as_secs_f64() * 1000.0, + "Playback seek settled" + ); + pending_seek_observation = None; + } } event_tx.send(PlaybackEvent::Frame(frame_number)).ok(); From 45482c41986d9b94697bbb3c69298835f42106fc Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:20:37 +0000 Subject: [PATCH 042/333] improve: emit seek settle telemetry in playback loop Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/src/playback.rs | 14 ++++++++++++++ 2 files changed, 15 insertions(+) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index a048af1c95..a1fb2349e7 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -159,6 +159,7 @@ cargo run -p cap-recording --example playback-test-runner -- full 5. **Playback runtime emits startup latency signals (2026-02-13)** - Playback loop now logs first rendered frame latency. - Audio stream setup now logs startup preparation time and first callback latency. + - Playback loop now logs seek settle latency (`seek_target_frame` to rendered frame). 6. **Decode benchmark now supports machine-readable evidence output (2026-02-13)** - `decode-benchmark` supports `--output-json` for structured metric capture. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index b25939cd14..3b2c72411f 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -393,6 +393,7 @@ impl Playback { let mut total_frames_rendered = 0u64; let mut _total_frames_skipped = 0u64; let mut first_render_logged = false; + let mut pending_seek_observation: Option<(u32, Instant)> = None; let warmup_target_frames = 20usize; let warmup_after_first_timeout = Duration::from_millis(1000); @@ -454,6 +455,7 @@ impl Playback { frame_number = seek_frame; playback_anchor_start = Instant::now(); playback_anchor_frame = seek_frame; + pending_seek_observation = Some((seek_frame, Instant::now())); prefetch_buffer.retain(|p| p.frame_number >= frame_number); frame_cache.cache.clear(); let _ = frame_request_tx.send(frame_number); @@ -501,6 +503,7 @@ impl Playback { frame_number = seek_frame; playback_anchor_start = Instant::now(); playback_anchor_frame = seek_frame; + pending_seek_observation = Some((seek_frame, Instant::now())); prefetch_buffer.retain(|p| p.frame_number >= frame_number); frame_cache.cache.clear(); let _ = frame_request_tx.send(frame_number); @@ -737,6 +740,17 @@ impl Playback { "Playback rendered first frame" ); } + if let Some((seek_target_frame, seek_started_at)) = pending_seek_observation + && frame_number >= seek_target_frame + { + info!( + seek_target_frame, + rendered_frame = frame_number, + seek_settle_ms = seek_started_at.elapsed().as_secs_f64() * 1000.0, + "Playback seek settled" + ); + pending_seek_observation = None; + } } event_tx.send(PlaybackEvent::Frame(frame_number)).ok(); From 2a68b40dfa43ad45acc9cd7fe063f1d95f006e11 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 21:22:20 +0000 Subject: [PATCH 043/333] improve: add playback matrix status report generator Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 6 + crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 2 + package.json | 1 + scripts/build-playback-matrix-report.js | 289 +++++++++++++++++++++++ 5 files changed, 299 insertions(+) create mode 100644 scripts/build-playback-matrix-report.js diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 63011bb7a9..c9d1cd95b5 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -64,6 +64,7 @@ Aggregate JSON outputs from multiple machines: ```bash node scripts/aggregate-playback-benchmarks.js --input /path/to/json-results --output /tmp/playback-benchmark-aggregate.md +node scripts/build-playback-matrix-report.js --input /path/to/json-results --output /tmp/playback-matrix-status.md ``` Validate matrix coverage and required formats: diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index a1fb2349e7..8636b73cda 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -191,6 +191,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Root `package.json` now exposes `bench:playback:*` script aliases for matrix, aggregate, and validate flows. - Can emit structured validation JSON for artifact upload and automation. +12. **Added matrix status report generator (2026-02-13)** + - `scripts/build-playback-matrix-report.js` generates concise matrix markdown from JSON results. + - Highlights missing cells, scenario pass/fail, and format coverage per platform/GPU row. + --- ## Root Cause Analysis Archive @@ -291,6 +295,7 @@ Decoder Pipeline: 13. Added cross-platform benchmark JSON aggregation utility script. 14. Added matrix execution helper script for full + scrub benchmark runs per platform/GPU. 15. Added matrix validation script for required cell and format coverage checks. +16. Added matrix status report generator for concise artifact summaries. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -307,6 +312,7 @@ Decoder Pipeline: - `scripts/aggregate-playback-benchmarks.js`: added markdown aggregation for multiple playback benchmark JSON artifacts. - `scripts/run-playback-benchmark-matrix.js`: added orchestrated full/scrub benchmark runner with per-machine aggregate generation. - `scripts/validate-playback-matrix.js`: added required matrix cell/format validation for aggregated evidence. +- `scripts/build-playback-matrix-report.js`: added concise matrix status report generation from JSON benchmark outputs. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index ad778012b3..b0f5b78ea6 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -77,12 +77,14 @@ After collecting all machine folders into a shared root: ```bash node scripts/aggregate-playback-benchmarks.js --input /path/to/all-machine-results --output /tmp/playback-matrix-aggregate.md +node scripts/build-playback-matrix-report.js --input /path/to/all-machine-results --output /tmp/playback-matrix-status.md ``` Equivalent shortcut: ```bash pnpm bench:playback:aggregate -- --input /path/to/all-machine-results --output /tmp/playback-matrix-aggregate.md +pnpm bench:playback:report -- --input /path/to/all-machine-results --output /tmp/playback-matrix-status.md ``` Validate matrix completeness: diff --git a/package.json b/package.json index dc4ad744af..f8346043a0 100644 --- a/package.json +++ b/package.json @@ -44,6 +44,7 @@ "bench:playback:matrix": "node scripts/run-playback-benchmark-matrix.js", "bench:playback:aggregate": "node scripts/aggregate-playback-benchmarks.js", "bench:playback:validate": "node scripts/validate-playback-matrix.js", + "bench:playback:report": "node scripts/build-playback-matrix-report.js", "clean": "find . -name node_modules -o -name .next -o -name .output -o -name .turbo -o -name dist -type d -prune | xargs rm -rf", "lgtm-otel": "docker run -p 3010:3000 -p 4317:4317 -p 4318:4318 --rm -it docker.io/grafana/otel-lgtm", "with-env": "dotenv -e .env --" diff --git a/scripts/build-playback-matrix-report.js b/scripts/build-playback-matrix-report.js new file mode 100644 index 0000000000..a07b1876c5 --- /dev/null +++ b/scripts/build-playback-matrix-report.js @@ -0,0 +1,289 @@ +#!/usr/bin/env node + +import fs from "node:fs"; +import path from "node:path"; + +const DEFAULT_REQUIRED_CELLS = [ + { platform: "macos-13", gpu: "apple-silicon", scenario: "full" }, + { platform: "macos-13", gpu: "apple-silicon", scenario: "scrub" }, + { platform: "windows-11", gpu: "nvidia-discrete", scenario: "full" }, + { platform: "windows-11", gpu: "nvidia-discrete", scenario: "scrub" }, + { platform: "windows-11", gpu: "amd-discrete", scenario: "full" }, + { platform: "windows-11", gpu: "amd-discrete", scenario: "scrub" }, + { platform: "windows-11", gpu: "integrated", scenario: "full" }, + { platform: "windows-11", gpu: "integrated", scenario: "scrub" }, +]; + +function parseArgs(argv) { + const options = { + inputs: [], + output: null, + useDefaultMatrix: true, + requiredCells: [], + }; + + for (let i = 2; i < argv.length; i++) { + const arg = argv[i]; + if (arg === "--") continue; + if (arg === "--help" || arg === "-h") { + options.help = true; + continue; + } + if (arg === "--input" || arg === "-i") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --input"); + options.inputs.push(path.resolve(value)); + continue; + } + if (arg === "--output" || arg === "-o") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --output"); + options.output = path.resolve(value); + continue; + } + if (arg === "--no-default-matrix") { + options.useDefaultMatrix = false; + continue; + } + if (arg === "--require-cell") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --require-cell"); + options.requiredCells.push(parseCell(value)); + options.useDefaultMatrix = false; + continue; + } + throw new Error(`Unknown argument: ${arg}`); + } + + return options; +} + +function parseCell(value) { + const [platform, gpu, scenario] = value.split(":"); + if (!platform || !gpu || !scenario) { + throw new Error( + `Invalid --require-cell format: ${value}. Expected platform:gpu:scenario`, + ); + } + return { platform, gpu, scenario }; +} + +function usage() { + console.log(`Usage: node scripts/build-playback-matrix-report.js --input [--input ...] [--output ] + +Builds a concise playback matrix markdown report from playback benchmark JSON outputs.`); +} + +function collectJsonFiles(targetPath) { + if (!fs.existsSync(targetPath)) { + throw new Error(`Input path does not exist: ${targetPath}`); + } + + const stats = fs.statSync(targetPath); + if (stats.isFile()) { + return targetPath.endsWith(".json") ? [targetPath] : []; + } + + const files = []; + for (const entry of fs.readdirSync(targetPath, { withFileTypes: true })) { + const fullPath = path.join(targetPath, entry.name); + if (entry.isDirectory()) { + files.push(...collectJsonFiles(fullPath)); + } else if (entry.isFile() && entry.name.endsWith(".json")) { + files.push(fullPath); + } + } + return files; +} + +function parseNotes(notes) { + if (!notes) return {}; + const result = {}; + for (const token of notes.split(/\s+/)) { + if (!token.includes("=")) continue; + const [key, ...rest] = token.split("="); + const value = rest.join("="); + if (!key || !value) continue; + result[key.trim()] = value.trim(); + } + return result; +} + +function cellKey(platform, gpu, scenario) { + return `${platform}|${gpu}|${scenario}`; +} + +function platformGpuKey(platform, gpu) { + return `${platform}|${gpu}`; +} + +function timestampOrEpoch(value) { + const parsed = Date.parse(value ?? ""); + return Number.isNaN(parsed) ? 0 : parsed; +} + +function upsertLatestCell(cells, candidate) { + const key = cellKey(candidate.platform, candidate.gpu, candidate.scenario); + const existing = cells.get(key); + if (!existing || timestampOrEpoch(candidate.generatedAt) >= timestampOrEpoch(existing.generatedAt)) { + cells.set(key, candidate); + } +} + +function collectData(files) { + const latestCells = new Map(); + const formatCoverage = new Map(); + + for (const filePath of files) { + const raw = fs.readFileSync(filePath, "utf8"); + const parsed = JSON.parse(raw); + const notes = parseNotes(parsed.notes); + const platform = notes.platform ?? "unknown"; + const gpu = notes.gpu ?? "unknown"; + const scenario = notes.scenario ?? "unspecified"; + const reports = Array.isArray(parsed.reports) ? parsed.reports : []; + const pass = reports.every((report) => report.overall_passed === true); + const generatedAt = parsed.generated_at_utc ?? ""; + + const formats = new Set(); + for (const report of reports) { + formats.add(report.is_fragmented ? "fragmented" : "mp4"); + } + + upsertLatestCell(latestCells, { + platform, + gpu, + scenario, + pass, + generatedAt, + filePath, + formats, + }); + + const pgKey = platformGpuKey(platform, gpu); + if (!formatCoverage.has(pgKey)) { + formatCoverage.set(pgKey, new Set()); + } + for (const format of formats) { + formatCoverage.get(pgKey).add(format); + } + } + + return { latestCells, formatCoverage }; +} + +function formatStatus(entry) { + if (!entry) return "MISSING"; + return entry.pass ? "PASS" : "FAIL"; +} + +function formatCoverageStatus(formats, target) { + if (!formats || !formats.has(target)) return "NO"; + return "YES"; +} + +function buildReport(requiredCells, latestCells, formatCoverage) { + const platformGpuPairs = new Map(); + for (const cell of requiredCells) { + const key = platformGpuKey(cell.platform, cell.gpu); + if (!platformGpuPairs.has(key)) { + platformGpuPairs.set(key, { platform: cell.platform, gpu: cell.gpu }); + } + } + + const rows = []; + let missingCount = 0; + let failCount = 0; + for (const { platform, gpu } of platformGpuPairs.values()) { + const full = latestCells.get(cellKey(platform, gpu, "full")); + const scrub = latestCells.get(cellKey(platform, gpu, "scrub")); + const formats = formatCoverage.get(platformGpuKey(platform, gpu)); + const fullStatus = formatStatus(full); + const scrubStatus = formatStatus(scrub); + if (fullStatus === "MISSING" || scrubStatus === "MISSING") { + missingCount += 1; + } + if (fullStatus === "FAIL" || scrubStatus === "FAIL") { + failCount += 1; + } + rows.push({ + platform, + gpu, + fullStatus, + scrubStatus, + mp4: formatCoverageStatus(formats, "mp4"), + fragmented: formatCoverageStatus(formats, "fragmented"), + fullTime: full?.generatedAt ?? "n/a", + scrubTime: scrub?.generatedAt ?? "n/a", + }); + } + + let markdown = ""; + markdown += "# Playback Matrix Status Report\n\n"; + markdown += `Generated: ${new Date().toISOString()}\n\n`; + markdown += `Rows: ${rows.length}, Missing rows: ${missingCount}, Rows with failures: ${failCount}\n\n`; + markdown += "| Platform | GPU | Full | Scrub | MP4 Seen | Fragmented Seen | Full Timestamp | Scrub Timestamp |\n"; + markdown += "|---|---|---|---|---|---|---|---|\n"; + for (const row of rows) { + markdown += `| ${row.platform} | ${row.gpu} | ${row.fullStatus} | ${row.scrubStatus} | ${row.mp4} | ${row.fragmented} | ${row.fullTime} | ${row.scrubTime} |\n`; + } + markdown += "\n"; + + const missingCells = requiredCells.filter((cell) => { + return !latestCells.has(cellKey(cell.platform, cell.gpu, cell.scenario)); + }); + if (missingCells.length > 0) { + markdown += "## Missing Cells\n\n"; + for (const cell of missingCells) { + markdown += `- ${cell.platform}:${cell.gpu}:${cell.scenario}\n`; + } + markdown += "\n"; + } + + return markdown; +} + +function main() { + const options = parseArgs(process.argv); + if (options.help) { + usage(); + return; + } + if (options.inputs.length === 0) { + throw new Error("At least one --input is required"); + } + + const files = new Set(); + for (const inputPath of options.inputs) { + for (const filePath of collectJsonFiles(inputPath)) { + files.add(filePath); + } + } + if (files.size === 0) { + throw new Error("No JSON files found"); + } + + const requiredCells = options.useDefaultMatrix + ? [...DEFAULT_REQUIRED_CELLS, ...options.requiredCells] + : options.requiredCells; + if (requiredCells.length === 0) { + throw new Error("No required cells configured"); + } + + const { latestCells, formatCoverage } = collectData([...files]); + const report = buildReport(requiredCells, latestCells, formatCoverage); + + if (options.output) { + fs.writeFileSync(options.output, report, "utf8"); + console.log(`Wrote matrix report to ${options.output}`); + } else { + process.stdout.write(report); + } +} + +try { + main(); +} catch (error) { + console.error(error instanceof Error ? error.message : String(error)); + process.exit(1); +} From 9d8fe153ca56e8aea38356208908e04f692e9a1d Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:22:20 +0000 Subject: [PATCH 044/333] improve: add playback matrix status report generator Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 6 + crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 2 + package.json | 1 + scripts/build-playback-matrix-report.js | 289 +++++++++++++++++++++++ 5 files changed, 299 insertions(+) create mode 100644 scripts/build-playback-matrix-report.js diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 63011bb7a9..c9d1cd95b5 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -64,6 +64,7 @@ Aggregate JSON outputs from multiple machines: ```bash node scripts/aggregate-playback-benchmarks.js --input /path/to/json-results --output /tmp/playback-benchmark-aggregate.md +node scripts/build-playback-matrix-report.js --input /path/to/json-results --output /tmp/playback-matrix-status.md ``` Validate matrix coverage and required formats: diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index a1fb2349e7..8636b73cda 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -191,6 +191,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Root `package.json` now exposes `bench:playback:*` script aliases for matrix, aggregate, and validate flows. - Can emit structured validation JSON for artifact upload and automation. +12. **Added matrix status report generator (2026-02-13)** + - `scripts/build-playback-matrix-report.js` generates concise matrix markdown from JSON results. + - Highlights missing cells, scenario pass/fail, and format coverage per platform/GPU row. + --- ## Root Cause Analysis Archive @@ -291,6 +295,7 @@ Decoder Pipeline: 13. Added cross-platform benchmark JSON aggregation utility script. 14. Added matrix execution helper script for full + scrub benchmark runs per platform/GPU. 15. Added matrix validation script for required cell and format coverage checks. +16. Added matrix status report generator for concise artifact summaries. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -307,6 +312,7 @@ Decoder Pipeline: - `scripts/aggregate-playback-benchmarks.js`: added markdown aggregation for multiple playback benchmark JSON artifacts. - `scripts/run-playback-benchmark-matrix.js`: added orchestrated full/scrub benchmark runner with per-machine aggregate generation. - `scripts/validate-playback-matrix.js`: added required matrix cell/format validation for aggregated evidence. +- `scripts/build-playback-matrix-report.js`: added concise matrix status report generation from JSON benchmark outputs. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index ad778012b3..b0f5b78ea6 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -77,12 +77,14 @@ After collecting all machine folders into a shared root: ```bash node scripts/aggregate-playback-benchmarks.js --input /path/to/all-machine-results --output /tmp/playback-matrix-aggregate.md +node scripts/build-playback-matrix-report.js --input /path/to/all-machine-results --output /tmp/playback-matrix-status.md ``` Equivalent shortcut: ```bash pnpm bench:playback:aggregate -- --input /path/to/all-machine-results --output /tmp/playback-matrix-aggregate.md +pnpm bench:playback:report -- --input /path/to/all-machine-results --output /tmp/playback-matrix-status.md ``` Validate matrix completeness: diff --git a/package.json b/package.json index dc4ad744af..f8346043a0 100644 --- a/package.json +++ b/package.json @@ -44,6 +44,7 @@ "bench:playback:matrix": "node scripts/run-playback-benchmark-matrix.js", "bench:playback:aggregate": "node scripts/aggregate-playback-benchmarks.js", "bench:playback:validate": "node scripts/validate-playback-matrix.js", + "bench:playback:report": "node scripts/build-playback-matrix-report.js", "clean": "find . -name node_modules -o -name .next -o -name .output -o -name .turbo -o -name dist -type d -prune | xargs rm -rf", "lgtm-otel": "docker run -p 3010:3000 -p 4317:4317 -p 4318:4318 --rm -it docker.io/grafana/otel-lgtm", "with-env": "dotenv -e .env --" diff --git a/scripts/build-playback-matrix-report.js b/scripts/build-playback-matrix-report.js new file mode 100644 index 0000000000..a07b1876c5 --- /dev/null +++ b/scripts/build-playback-matrix-report.js @@ -0,0 +1,289 @@ +#!/usr/bin/env node + +import fs from "node:fs"; +import path from "node:path"; + +const DEFAULT_REQUIRED_CELLS = [ + { platform: "macos-13", gpu: "apple-silicon", scenario: "full" }, + { platform: "macos-13", gpu: "apple-silicon", scenario: "scrub" }, + { platform: "windows-11", gpu: "nvidia-discrete", scenario: "full" }, + { platform: "windows-11", gpu: "nvidia-discrete", scenario: "scrub" }, + { platform: "windows-11", gpu: "amd-discrete", scenario: "full" }, + { platform: "windows-11", gpu: "amd-discrete", scenario: "scrub" }, + { platform: "windows-11", gpu: "integrated", scenario: "full" }, + { platform: "windows-11", gpu: "integrated", scenario: "scrub" }, +]; + +function parseArgs(argv) { + const options = { + inputs: [], + output: null, + useDefaultMatrix: true, + requiredCells: [], + }; + + for (let i = 2; i < argv.length; i++) { + const arg = argv[i]; + if (arg === "--") continue; + if (arg === "--help" || arg === "-h") { + options.help = true; + continue; + } + if (arg === "--input" || arg === "-i") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --input"); + options.inputs.push(path.resolve(value)); + continue; + } + if (arg === "--output" || arg === "-o") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --output"); + options.output = path.resolve(value); + continue; + } + if (arg === "--no-default-matrix") { + options.useDefaultMatrix = false; + continue; + } + if (arg === "--require-cell") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --require-cell"); + options.requiredCells.push(parseCell(value)); + options.useDefaultMatrix = false; + continue; + } + throw new Error(`Unknown argument: ${arg}`); + } + + return options; +} + +function parseCell(value) { + const [platform, gpu, scenario] = value.split(":"); + if (!platform || !gpu || !scenario) { + throw new Error( + `Invalid --require-cell format: ${value}. Expected platform:gpu:scenario`, + ); + } + return { platform, gpu, scenario }; +} + +function usage() { + console.log(`Usage: node scripts/build-playback-matrix-report.js --input [--input ...] [--output ] + +Builds a concise playback matrix markdown report from playback benchmark JSON outputs.`); +} + +function collectJsonFiles(targetPath) { + if (!fs.existsSync(targetPath)) { + throw new Error(`Input path does not exist: ${targetPath}`); + } + + const stats = fs.statSync(targetPath); + if (stats.isFile()) { + return targetPath.endsWith(".json") ? [targetPath] : []; + } + + const files = []; + for (const entry of fs.readdirSync(targetPath, { withFileTypes: true })) { + const fullPath = path.join(targetPath, entry.name); + if (entry.isDirectory()) { + files.push(...collectJsonFiles(fullPath)); + } else if (entry.isFile() && entry.name.endsWith(".json")) { + files.push(fullPath); + } + } + return files; +} + +function parseNotes(notes) { + if (!notes) return {}; + const result = {}; + for (const token of notes.split(/\s+/)) { + if (!token.includes("=")) continue; + const [key, ...rest] = token.split("="); + const value = rest.join("="); + if (!key || !value) continue; + result[key.trim()] = value.trim(); + } + return result; +} + +function cellKey(platform, gpu, scenario) { + return `${platform}|${gpu}|${scenario}`; +} + +function platformGpuKey(platform, gpu) { + return `${platform}|${gpu}`; +} + +function timestampOrEpoch(value) { + const parsed = Date.parse(value ?? ""); + return Number.isNaN(parsed) ? 0 : parsed; +} + +function upsertLatestCell(cells, candidate) { + const key = cellKey(candidate.platform, candidate.gpu, candidate.scenario); + const existing = cells.get(key); + if (!existing || timestampOrEpoch(candidate.generatedAt) >= timestampOrEpoch(existing.generatedAt)) { + cells.set(key, candidate); + } +} + +function collectData(files) { + const latestCells = new Map(); + const formatCoverage = new Map(); + + for (const filePath of files) { + const raw = fs.readFileSync(filePath, "utf8"); + const parsed = JSON.parse(raw); + const notes = parseNotes(parsed.notes); + const platform = notes.platform ?? "unknown"; + const gpu = notes.gpu ?? "unknown"; + const scenario = notes.scenario ?? "unspecified"; + const reports = Array.isArray(parsed.reports) ? parsed.reports : []; + const pass = reports.every((report) => report.overall_passed === true); + const generatedAt = parsed.generated_at_utc ?? ""; + + const formats = new Set(); + for (const report of reports) { + formats.add(report.is_fragmented ? "fragmented" : "mp4"); + } + + upsertLatestCell(latestCells, { + platform, + gpu, + scenario, + pass, + generatedAt, + filePath, + formats, + }); + + const pgKey = platformGpuKey(platform, gpu); + if (!formatCoverage.has(pgKey)) { + formatCoverage.set(pgKey, new Set()); + } + for (const format of formats) { + formatCoverage.get(pgKey).add(format); + } + } + + return { latestCells, formatCoverage }; +} + +function formatStatus(entry) { + if (!entry) return "MISSING"; + return entry.pass ? "PASS" : "FAIL"; +} + +function formatCoverageStatus(formats, target) { + if (!formats || !formats.has(target)) return "NO"; + return "YES"; +} + +function buildReport(requiredCells, latestCells, formatCoverage) { + const platformGpuPairs = new Map(); + for (const cell of requiredCells) { + const key = platformGpuKey(cell.platform, cell.gpu); + if (!platformGpuPairs.has(key)) { + platformGpuPairs.set(key, { platform: cell.platform, gpu: cell.gpu }); + } + } + + const rows = []; + let missingCount = 0; + let failCount = 0; + for (const { platform, gpu } of platformGpuPairs.values()) { + const full = latestCells.get(cellKey(platform, gpu, "full")); + const scrub = latestCells.get(cellKey(platform, gpu, "scrub")); + const formats = formatCoverage.get(platformGpuKey(platform, gpu)); + const fullStatus = formatStatus(full); + const scrubStatus = formatStatus(scrub); + if (fullStatus === "MISSING" || scrubStatus === "MISSING") { + missingCount += 1; + } + if (fullStatus === "FAIL" || scrubStatus === "FAIL") { + failCount += 1; + } + rows.push({ + platform, + gpu, + fullStatus, + scrubStatus, + mp4: formatCoverageStatus(formats, "mp4"), + fragmented: formatCoverageStatus(formats, "fragmented"), + fullTime: full?.generatedAt ?? "n/a", + scrubTime: scrub?.generatedAt ?? "n/a", + }); + } + + let markdown = ""; + markdown += "# Playback Matrix Status Report\n\n"; + markdown += `Generated: ${new Date().toISOString()}\n\n`; + markdown += `Rows: ${rows.length}, Missing rows: ${missingCount}, Rows with failures: ${failCount}\n\n`; + markdown += "| Platform | GPU | Full | Scrub | MP4 Seen | Fragmented Seen | Full Timestamp | Scrub Timestamp |\n"; + markdown += "|---|---|---|---|---|---|---|---|\n"; + for (const row of rows) { + markdown += `| ${row.platform} | ${row.gpu} | ${row.fullStatus} | ${row.scrubStatus} | ${row.mp4} | ${row.fragmented} | ${row.fullTime} | ${row.scrubTime} |\n`; + } + markdown += "\n"; + + const missingCells = requiredCells.filter((cell) => { + return !latestCells.has(cellKey(cell.platform, cell.gpu, cell.scenario)); + }); + if (missingCells.length > 0) { + markdown += "## Missing Cells\n\n"; + for (const cell of missingCells) { + markdown += `- ${cell.platform}:${cell.gpu}:${cell.scenario}\n`; + } + markdown += "\n"; + } + + return markdown; +} + +function main() { + const options = parseArgs(process.argv); + if (options.help) { + usage(); + return; + } + if (options.inputs.length === 0) { + throw new Error("At least one --input is required"); + } + + const files = new Set(); + for (const inputPath of options.inputs) { + for (const filePath of collectJsonFiles(inputPath)) { + files.add(filePath); + } + } + if (files.size === 0) { + throw new Error("No JSON files found"); + } + + const requiredCells = options.useDefaultMatrix + ? [...DEFAULT_REQUIRED_CELLS, ...options.requiredCells] + : options.requiredCells; + if (requiredCells.length === 0) { + throw new Error("No required cells configured"); + } + + const { latestCells, formatCoverage } = collectData([...files]); + const report = buildReport(requiredCells, latestCells, formatCoverage); + + if (options.output) { + fs.writeFileSync(options.output, report, "utf8"); + console.log(`Wrote matrix report to ${options.output}`); + } else { + process.stdout.write(report); + } +} + +try { + main(); +} catch (error) { + console.error(error instanceof Error ? error.message : String(error)); + process.exit(1); +} From 3df4bf8f2b748f7c29d13f89e12297bb888384d4 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 21:23:31 +0000 Subject: [PATCH 045/333] improve: support scenario subset runs in matrix helper Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 9 +++++- scripts/run-playback-benchmark-matrix.js | 37 +++++++++++++++++++----- 4 files changed, 40 insertions(+), 8 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index c9d1cd95b5..81819e2e5f 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -114,6 +114,7 @@ Automated helper for machine runs: ```bash node scripts/run-playback-benchmark-matrix.js --platform macos-13 --gpu apple-silicon --output-dir /tmp/playback-matrix --fps 60 --input-dir /tmp/cap-real-device-tests node scripts/run-playback-benchmark-matrix.js --platform windows-11 --gpu nvidia-discrete --output-dir /tmp/playback-matrix-windows-nvidia --fps 60 --require-formats mp4,fragmented --input-dir /tmp/cap-real-device-tests +node scripts/run-playback-benchmark-matrix.js --platform windows-11 --gpu integrated --output-dir /tmp/playback-matrix-windows-integrated --fps 60 --scenarios scrub --input-dir /tmp/cap-real-device-tests ``` | Platform | GPU Class | MP4 Full | Fragmented Full | MP4 Scrub | Fragmented Scrub | Notes | diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 8636b73cda..88dc25adf2 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -184,6 +184,7 @@ cargo run -p cap-recording --example playback-test-runner -- full - `scripts/run-playback-benchmark-matrix.js` runs `full` and `scrub` scenarios with tagged notes and JSON output. - Automatically generates aggregate markdown for each machine run directory. - Performs per-machine post-run validation for required scenarios and optional format requirements. + - Supports scenario subset reruns via `--scenarios` for faster targeted validation. 11. **Added matrix completeness validator (2026-02-13)** - `scripts/validate-playback-matrix.js` validates required platform/gpu/scenario cells. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index b0f5b78ea6..a7644dc888 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -44,13 +44,20 @@ node scripts/run-playback-benchmark-matrix.js \ --output-dir "$OUT_DIR" \ --fps 60 \ --require-formats mp4,fragmented \ + --scenarios full,scrub \ --input-dir "$INPUT_DIR" ``` Equivalent shortcut: ```bash -pnpm bench:playback:matrix -- --platform "" --gpu "" --output-dir "$OUT_DIR" --fps 60 --require-formats mp4,fragmented --input-dir "$INPUT_DIR" +pnpm bench:playback:matrix -- --platform "" --gpu "" --output-dir "$OUT_DIR" --fps 60 --require-formats mp4,fragmented --scenarios full,scrub --input-dir "$INPUT_DIR" +``` + +Rerun only scrub scenario for a machine: + +```bash +pnpm bench:playback:matrix -- --platform "" --gpu "" --output-dir "$OUT_DIR" --fps 60 --scenarios scrub --input-dir "$INPUT_DIR" ``` Examples: diff --git a/scripts/run-playback-benchmark-matrix.js b/scripts/run-playback-benchmark-matrix.js index 98b08def7f..89f0a26e90 100644 --- a/scripts/run-playback-benchmark-matrix.js +++ b/scripts/run-playback-benchmark-matrix.js @@ -14,6 +14,7 @@ function parseArgs(argv) { inputDir: null, validate: true, requireFormats: [], + scenarios: ["full", "scrub"], }; for (let i = 2; i < argv.length; i++) { @@ -65,6 +66,18 @@ function parseArgs(argv) { .filter(Boolean); continue; } + if (arg === "--scenarios") { + const value = argv[++i] ?? ""; + const scenarios = value + .split(",") + .map((entry) => entry.trim().toLowerCase()) + .filter(Boolean); + if (scenarios.length === 0) { + throw new Error("Invalid --scenarios value"); + } + options.scenarios = scenarios; + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -72,7 +85,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/run-playback-benchmark-matrix.js --platform --gpu --output-dir [--fps 60] [--recording-path ] [--input-dir ] [--require-formats mp4,fragmented] + console.log(`Usage: node scripts/run-playback-benchmark-matrix.js --platform --gpu --output-dir [--fps 60] [--recording-path ] [--input-dir ] [--require-formats mp4,fragmented] [--scenarios full,scrub] Runs playback benchmark matrix scenarios and writes JSON outputs. @@ -86,6 +99,7 @@ Optional: --recording-path Specific recording path --input-dir Recording discovery directory --require-formats Required formats for local validation (comma-separated) + --scenarios Scenarios to run (comma-separated; default: full,scrub) --skip-validate Skip post-run validation`); } @@ -139,6 +153,12 @@ function validateOptions(options) { if (!options.platform || !options.gpu || !options.outputDir) { throw new Error("Missing required options: --platform, --gpu, --output-dir"); } + const validScenarios = new Set(["full", "scrub", "decoder", "playback", "audio-sync", "camera-sync"]); + for (const scenario of options.scenarios) { + if (!validScenarios.has(scenario)) { + throw new Error(`Unsupported scenario: ${scenario}`); + } + } const absoluteOutputDir = path.resolve(options.outputDir); options.outputDir = absoluteOutputDir; @@ -157,8 +177,9 @@ function main() { validateOptions(options); console.log(`Running matrix for platform=${options.platform} gpu=${options.gpu}`); - run("cargo", scenarioArgs(options, "full")); - run("cargo", scenarioArgs(options, "scrub")); + for (const scenario of options.scenarios) { + run("cargo", scenarioArgs(options, scenario)); + } const aggregatePath = path.join( options.outputDir, @@ -185,11 +206,13 @@ function main() { "--no-default-matrix", "--output-json", validationJsonPath, - "--require-cell", - `${options.platform}:${options.gpu}:full`, - "--require-cell", - `${options.platform}:${options.gpu}:scrub`, ]; + for (const scenario of options.scenarios) { + validateArgs.push( + "--require-cell", + `${options.platform}:${options.gpu}:${scenario}`, + ); + } if (options.requireFormats.length > 0) { validateArgs.push("--require-formats", options.requireFormats.join(",")); From 27c413e7b50e300c7279a4eebd1dab1eed00850c Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:23:31 +0000 Subject: [PATCH 046/333] improve: support scenario subset runs in matrix helper Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 9 +++++- scripts/run-playback-benchmark-matrix.js | 37 +++++++++++++++++++----- 4 files changed, 40 insertions(+), 8 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index c9d1cd95b5..81819e2e5f 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -114,6 +114,7 @@ Automated helper for machine runs: ```bash node scripts/run-playback-benchmark-matrix.js --platform macos-13 --gpu apple-silicon --output-dir /tmp/playback-matrix --fps 60 --input-dir /tmp/cap-real-device-tests node scripts/run-playback-benchmark-matrix.js --platform windows-11 --gpu nvidia-discrete --output-dir /tmp/playback-matrix-windows-nvidia --fps 60 --require-formats mp4,fragmented --input-dir /tmp/cap-real-device-tests +node scripts/run-playback-benchmark-matrix.js --platform windows-11 --gpu integrated --output-dir /tmp/playback-matrix-windows-integrated --fps 60 --scenarios scrub --input-dir /tmp/cap-real-device-tests ``` | Platform | GPU Class | MP4 Full | Fragmented Full | MP4 Scrub | Fragmented Scrub | Notes | diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 8636b73cda..88dc25adf2 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -184,6 +184,7 @@ cargo run -p cap-recording --example playback-test-runner -- full - `scripts/run-playback-benchmark-matrix.js` runs `full` and `scrub` scenarios with tagged notes and JSON output. - Automatically generates aggregate markdown for each machine run directory. - Performs per-machine post-run validation for required scenarios and optional format requirements. + - Supports scenario subset reruns via `--scenarios` for faster targeted validation. 11. **Added matrix completeness validator (2026-02-13)** - `scripts/validate-playback-matrix.js` validates required platform/gpu/scenario cells. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index b0f5b78ea6..a7644dc888 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -44,13 +44,20 @@ node scripts/run-playback-benchmark-matrix.js \ --output-dir "$OUT_DIR" \ --fps 60 \ --require-formats mp4,fragmented \ + --scenarios full,scrub \ --input-dir "$INPUT_DIR" ``` Equivalent shortcut: ```bash -pnpm bench:playback:matrix -- --platform "" --gpu "" --output-dir "$OUT_DIR" --fps 60 --require-formats mp4,fragmented --input-dir "$INPUT_DIR" +pnpm bench:playback:matrix -- --platform "" --gpu "" --output-dir "$OUT_DIR" --fps 60 --require-formats mp4,fragmented --scenarios full,scrub --input-dir "$INPUT_DIR" +``` + +Rerun only scrub scenario for a machine: + +```bash +pnpm bench:playback:matrix -- --platform "" --gpu "" --output-dir "$OUT_DIR" --fps 60 --scenarios scrub --input-dir "$INPUT_DIR" ``` Examples: diff --git a/scripts/run-playback-benchmark-matrix.js b/scripts/run-playback-benchmark-matrix.js index 98b08def7f..89f0a26e90 100644 --- a/scripts/run-playback-benchmark-matrix.js +++ b/scripts/run-playback-benchmark-matrix.js @@ -14,6 +14,7 @@ function parseArgs(argv) { inputDir: null, validate: true, requireFormats: [], + scenarios: ["full", "scrub"], }; for (let i = 2; i < argv.length; i++) { @@ -65,6 +66,18 @@ function parseArgs(argv) { .filter(Boolean); continue; } + if (arg === "--scenarios") { + const value = argv[++i] ?? ""; + const scenarios = value + .split(",") + .map((entry) => entry.trim().toLowerCase()) + .filter(Boolean); + if (scenarios.length === 0) { + throw new Error("Invalid --scenarios value"); + } + options.scenarios = scenarios; + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -72,7 +85,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/run-playback-benchmark-matrix.js --platform --gpu --output-dir [--fps 60] [--recording-path ] [--input-dir ] [--require-formats mp4,fragmented] + console.log(`Usage: node scripts/run-playback-benchmark-matrix.js --platform --gpu --output-dir [--fps 60] [--recording-path ] [--input-dir ] [--require-formats mp4,fragmented] [--scenarios full,scrub] Runs playback benchmark matrix scenarios and writes JSON outputs. @@ -86,6 +99,7 @@ Optional: --recording-path Specific recording path --input-dir Recording discovery directory --require-formats Required formats for local validation (comma-separated) + --scenarios Scenarios to run (comma-separated; default: full,scrub) --skip-validate Skip post-run validation`); } @@ -139,6 +153,12 @@ function validateOptions(options) { if (!options.platform || !options.gpu || !options.outputDir) { throw new Error("Missing required options: --platform, --gpu, --output-dir"); } + const validScenarios = new Set(["full", "scrub", "decoder", "playback", "audio-sync", "camera-sync"]); + for (const scenario of options.scenarios) { + if (!validScenarios.has(scenario)) { + throw new Error(`Unsupported scenario: ${scenario}`); + } + } const absoluteOutputDir = path.resolve(options.outputDir); options.outputDir = absoluteOutputDir; @@ -157,8 +177,9 @@ function main() { validateOptions(options); console.log(`Running matrix for platform=${options.platform} gpu=${options.gpu}`); - run("cargo", scenarioArgs(options, "full")); - run("cargo", scenarioArgs(options, "scrub")); + for (const scenario of options.scenarios) { + run("cargo", scenarioArgs(options, scenario)); + } const aggregatePath = path.join( options.outputDir, @@ -185,11 +206,13 @@ function main() { "--no-default-matrix", "--output-json", validationJsonPath, - "--require-cell", - `${options.platform}:${options.gpu}:full`, - "--require-cell", - `${options.platform}:${options.gpu}:scrub`, ]; + for (const scenario of options.scenarios) { + validateArgs.push( + "--require-cell", + `${options.platform}:${options.gpu}:${scenario}`, + ); + } if (options.requireFormats.length > 0) { validateArgs.push("--require-formats", options.requireFormats.join(",")); From 2dc7a831115041632575ba8322b5b03b5d361093 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 21:24:50 +0000 Subject: [PATCH 047/333] improve: add one-shot playback matrix finalization command Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 3 + crates/editor/PLAYBACK-FINDINGS.md | 6 ++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 6 ++ package.json | 1 + scripts/finalize-playback-matrix.js | 114 +++++++++++++++++++++++ 5 files changed, 130 insertions(+) create mode 100644 scripts/finalize-playback-matrix.js diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 81819e2e5f..fec61c3ad6 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -72,6 +72,9 @@ Validate matrix coverage and required formats: ```bash node scripts/validate-playback-matrix.js --input /path/to/json-results --require-formats mp4,fragmented node scripts/validate-playback-matrix.js --input /path/to/json-results --require-formats mp4,fragmented --output-json /tmp/playback-matrix-validation.json + +# Finalize aggregate + status + validation artifacts +node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 88dc25adf2..4a0d81e88b 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -196,6 +196,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - `scripts/build-playback-matrix-report.js` generates concise matrix markdown from JSON results. - Highlights missing cells, scenario pass/fail, and format coverage per platform/GPU row. +13. **Added matrix finalization helper (2026-02-13)** + - `scripts/finalize-playback-matrix.js` generates aggregate markdown, status markdown, and validation JSON in one command. + - Supports optional required format enforcement during finalization. + --- ## Root Cause Analysis Archive @@ -297,6 +301,7 @@ Decoder Pipeline: 14. Added matrix execution helper script for full + scrub benchmark runs per platform/GPU. 15. Added matrix validation script for required cell and format coverage checks. 16. Added matrix status report generator for concise artifact summaries. +17. Added one-shot finalization script for aggregate + status + validation outputs. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -314,6 +319,7 @@ Decoder Pipeline: - `scripts/run-playback-benchmark-matrix.js`: added orchestrated full/scrub benchmark runner with per-machine aggregate generation. - `scripts/validate-playback-matrix.js`: added required matrix cell/format validation for aggregated evidence. - `scripts/build-playback-matrix-report.js`: added concise matrix status report generation from JSON benchmark outputs. +- `scripts/finalize-playback-matrix.js`: added one-shot matrix artifact finalization workflow. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index a7644dc888..de3db31808 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -106,6 +106,12 @@ Equivalent shortcut: pnpm bench:playback:validate -- --input /path/to/all-machine-results --require-formats mp4,fragmented ``` +One-shot finalize command: + +```bash +pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented +``` + ## Evidence checklist 1. Confirm all matrix rows exist. diff --git a/package.json b/package.json index f8346043a0..c8bdea3b17 100644 --- a/package.json +++ b/package.json @@ -45,6 +45,7 @@ "bench:playback:aggregate": "node scripts/aggregate-playback-benchmarks.js", "bench:playback:validate": "node scripts/validate-playback-matrix.js", "bench:playback:report": "node scripts/build-playback-matrix-report.js", + "bench:playback:finalize": "node scripts/finalize-playback-matrix.js", "clean": "find . -name node_modules -o -name .next -o -name .output -o -name .turbo -o -name dist -type d -prune | xargs rm -rf", "lgtm-otel": "docker run -p 3010:3000 -p 4317:4317 -p 4318:4318 --rm -it docker.io/grafana/otel-lgtm", "with-env": "dotenv -e .env --" diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js new file mode 100644 index 0000000000..886f2fc52b --- /dev/null +++ b/scripts/finalize-playback-matrix.js @@ -0,0 +1,114 @@ +#!/usr/bin/env node + +import { spawnSync } from "node:child_process"; +import fs from "node:fs"; +import path from "node:path"; + +function parseArgs(argv) { + const options = { + inputs: [], + outputDir: null, + requireFormats: [], + }; + + for (let i = 2; i < argv.length; i++) { + const arg = argv[i]; + if (arg === "--") continue; + if (arg === "--help" || arg === "-h") { + options.help = true; + continue; + } + if (arg === "--input" || arg === "-i") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --input"); + options.inputs.push(path.resolve(value)); + continue; + } + if (arg === "--output-dir" || arg === "-o") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --output-dir"); + options.outputDir = path.resolve(value); + continue; + } + if (arg === "--require-formats") { + const value = argv[++i] ?? ""; + options.requireFormats = value + .split(",") + .map((entry) => entry.trim().toLowerCase()) + .filter(Boolean); + continue; + } + throw new Error(`Unknown argument: ${arg}`); + } + + return options; +} + +function usage() { + console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--require-formats mp4,fragmented] + +Generates aggregate markdown, status markdown, and validation JSON for collected playback matrix outputs.`); +} + +function run(command, args) { + const result = spawnSync(command, args, { stdio: "inherit" }); + if (result.status !== 0) { + throw new Error(`Command failed: ${command} ${args.join(" ")}`); + } +} + +function main() { + const options = parseArgs(process.argv); + if (options.help) { + usage(); + return; + } + if (options.inputs.length === 0) { + throw new Error("At least one --input is required"); + } + if (!options.outputDir) { + throw new Error("--output-dir is required"); + } + if (!fs.existsSync(options.outputDir)) { + fs.mkdirSync(options.outputDir, { recursive: true }); + } + + const aggregatePath = path.join(options.outputDir, "playback-benchmark-aggregate.md"); + const statusPath = path.join(options.outputDir, "playback-matrix-status.md"); + const validationPath = path.join(options.outputDir, "playback-matrix-validation.json"); + + const aggregateArgs = ["scripts/aggregate-playback-benchmarks.js"]; + const statusArgs = ["scripts/build-playback-matrix-report.js"]; + const validateArgs = [ + "scripts/validate-playback-matrix.js", + "--output-json", + validationPath, + ]; + + for (const input of options.inputs) { + aggregateArgs.push("--input", input); + statusArgs.push("--input", input); + validateArgs.push("--input", input); + } + + aggregateArgs.push("--output", aggregatePath); + statusArgs.push("--output", statusPath); + if (options.requireFormats.length > 0) { + validateArgs.push("--require-formats", options.requireFormats.join(",")); + } + + run("node", aggregateArgs); + run("node", statusArgs); + run("node", validateArgs); + + console.log(`Aggregate markdown: ${aggregatePath}`); + console.log(`Status markdown: ${statusPath}`); + console.log(`Validation JSON: ${validationPath}`); +} + +try { + main(); +} catch (error) { + console.error(error instanceof Error ? error.message : String(error)); + process.exit(1); +} From 4f65f7d3dd60445b44a0fecb499672665a3c3ced Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:24:50 +0000 Subject: [PATCH 048/333] improve: add one-shot playback matrix finalization command Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 3 + crates/editor/PLAYBACK-FINDINGS.md | 6 ++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 6 ++ package.json | 1 + scripts/finalize-playback-matrix.js | 114 +++++++++++++++++++++++ 5 files changed, 130 insertions(+) create mode 100644 scripts/finalize-playback-matrix.js diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 81819e2e5f..fec61c3ad6 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -72,6 +72,9 @@ Validate matrix coverage and required formats: ```bash node scripts/validate-playback-matrix.js --input /path/to/json-results --require-formats mp4,fragmented node scripts/validate-playback-matrix.js --input /path/to/json-results --require-formats mp4,fragmented --output-json /tmp/playback-matrix-validation.json + +# Finalize aggregate + status + validation artifacts +node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 88dc25adf2..4a0d81e88b 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -196,6 +196,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - `scripts/build-playback-matrix-report.js` generates concise matrix markdown from JSON results. - Highlights missing cells, scenario pass/fail, and format coverage per platform/GPU row. +13. **Added matrix finalization helper (2026-02-13)** + - `scripts/finalize-playback-matrix.js` generates aggregate markdown, status markdown, and validation JSON in one command. + - Supports optional required format enforcement during finalization. + --- ## Root Cause Analysis Archive @@ -297,6 +301,7 @@ Decoder Pipeline: 14. Added matrix execution helper script for full + scrub benchmark runs per platform/GPU. 15. Added matrix validation script for required cell and format coverage checks. 16. Added matrix status report generator for concise artifact summaries. +17. Added one-shot finalization script for aggregate + status + validation outputs. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -314,6 +319,7 @@ Decoder Pipeline: - `scripts/run-playback-benchmark-matrix.js`: added orchestrated full/scrub benchmark runner with per-machine aggregate generation. - `scripts/validate-playback-matrix.js`: added required matrix cell/format validation for aggregated evidence. - `scripts/build-playback-matrix-report.js`: added concise matrix status report generation from JSON benchmark outputs. +- `scripts/finalize-playback-matrix.js`: added one-shot matrix artifact finalization workflow. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index a7644dc888..de3db31808 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -106,6 +106,12 @@ Equivalent shortcut: pnpm bench:playback:validate -- --input /path/to/all-machine-results --require-formats mp4,fragmented ``` +One-shot finalize command: + +```bash +pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented +``` + ## Evidence checklist 1. Confirm all matrix rows exist. diff --git a/package.json b/package.json index f8346043a0..c8bdea3b17 100644 --- a/package.json +++ b/package.json @@ -45,6 +45,7 @@ "bench:playback:aggregate": "node scripts/aggregate-playback-benchmarks.js", "bench:playback:validate": "node scripts/validate-playback-matrix.js", "bench:playback:report": "node scripts/build-playback-matrix-report.js", + "bench:playback:finalize": "node scripts/finalize-playback-matrix.js", "clean": "find . -name node_modules -o -name .next -o -name .output -o -name .turbo -o -name dist -type d -prune | xargs rm -rf", "lgtm-otel": "docker run -p 3010:3000 -p 4317:4317 -p 4318:4318 --rm -it docker.io/grafana/otel-lgtm", "with-env": "dotenv -e .env --" diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js new file mode 100644 index 0000000000..886f2fc52b --- /dev/null +++ b/scripts/finalize-playback-matrix.js @@ -0,0 +1,114 @@ +#!/usr/bin/env node + +import { spawnSync } from "node:child_process"; +import fs from "node:fs"; +import path from "node:path"; + +function parseArgs(argv) { + const options = { + inputs: [], + outputDir: null, + requireFormats: [], + }; + + for (let i = 2; i < argv.length; i++) { + const arg = argv[i]; + if (arg === "--") continue; + if (arg === "--help" || arg === "-h") { + options.help = true; + continue; + } + if (arg === "--input" || arg === "-i") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --input"); + options.inputs.push(path.resolve(value)); + continue; + } + if (arg === "--output-dir" || arg === "-o") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --output-dir"); + options.outputDir = path.resolve(value); + continue; + } + if (arg === "--require-formats") { + const value = argv[++i] ?? ""; + options.requireFormats = value + .split(",") + .map((entry) => entry.trim().toLowerCase()) + .filter(Boolean); + continue; + } + throw new Error(`Unknown argument: ${arg}`); + } + + return options; +} + +function usage() { + console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--require-formats mp4,fragmented] + +Generates aggregate markdown, status markdown, and validation JSON for collected playback matrix outputs.`); +} + +function run(command, args) { + const result = spawnSync(command, args, { stdio: "inherit" }); + if (result.status !== 0) { + throw new Error(`Command failed: ${command} ${args.join(" ")}`); + } +} + +function main() { + const options = parseArgs(process.argv); + if (options.help) { + usage(); + return; + } + if (options.inputs.length === 0) { + throw new Error("At least one --input is required"); + } + if (!options.outputDir) { + throw new Error("--output-dir is required"); + } + if (!fs.existsSync(options.outputDir)) { + fs.mkdirSync(options.outputDir, { recursive: true }); + } + + const aggregatePath = path.join(options.outputDir, "playback-benchmark-aggregate.md"); + const statusPath = path.join(options.outputDir, "playback-matrix-status.md"); + const validationPath = path.join(options.outputDir, "playback-matrix-validation.json"); + + const aggregateArgs = ["scripts/aggregate-playback-benchmarks.js"]; + const statusArgs = ["scripts/build-playback-matrix-report.js"]; + const validateArgs = [ + "scripts/validate-playback-matrix.js", + "--output-json", + validationPath, + ]; + + for (const input of options.inputs) { + aggregateArgs.push("--input", input); + statusArgs.push("--input", input); + validateArgs.push("--input", input); + } + + aggregateArgs.push("--output", aggregatePath); + statusArgs.push("--output", statusPath); + if (options.requireFormats.length > 0) { + validateArgs.push("--require-formats", options.requireFormats.join(",")); + } + + run("node", aggregateArgs); + run("node", statusArgs); + run("node", validateArgs); + + console.log(`Aggregate markdown: ${aggregatePath}`); + console.log(`Status markdown: ${statusPath}`); + console.log(`Validation JSON: ${validationPath}`); +} + +try { + main(); +} catch (error) { + console.error(error instanceof Error ? error.message : String(error)); + process.exit(1); +} From e698d6aa99d194924b492ec77984cc54d28c4328 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 21:26:23 +0000 Subject: [PATCH 049/333] improve: add matrix artifact publisher into benchmark history Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 3 + crates/editor/PLAYBACK-FINDINGS.md | 6 + crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 9 ++ package.json | 1 + scripts/publish-playback-matrix-summary.js | 139 +++++++++++++++++++++ 5 files changed, 158 insertions(+) create mode 100644 scripts/publish-playback-matrix-summary.js diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index fec61c3ad6..b30b5dce56 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -75,6 +75,9 @@ node scripts/validate-playback-matrix.js --input /path/to/json-results --require # Finalize aggregate + status + validation artifacts node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented + +# Publish matrix artifacts into this benchmark history +node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 4a0d81e88b..9aefaf01e9 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -200,6 +200,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - `scripts/finalize-playback-matrix.js` generates aggregate markdown, status markdown, and validation JSON in one command. - Supports optional required format enforcement during finalization. +14. **Added matrix summary publisher (2026-02-13)** + - `scripts/publish-playback-matrix-summary.js` injects finalized matrix artifacts into playback benchmark history. + - Keeps matrix evidence updates consistent and repeatable. + --- ## Root Cause Analysis Archive @@ -302,6 +306,7 @@ Decoder Pipeline: 15. Added matrix validation script for required cell and format coverage checks. 16. Added matrix status report generator for concise artifact summaries. 17. Added one-shot finalization script for aggregate + status + validation outputs. +18. Added benchmark history publisher script for finalized matrix artifacts. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -320,6 +325,7 @@ Decoder Pipeline: - `scripts/validate-playback-matrix.js`: added required matrix cell/format validation for aggregated evidence. - `scripts/build-playback-matrix-report.js`: added concise matrix status report generation from JSON benchmark outputs. - `scripts/finalize-playback-matrix.js`: added one-shot matrix artifact finalization workflow. +- `scripts/publish-playback-matrix-summary.js`: added matrix artifact publisher into PLAYBACK-BENCHMARKS history region. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index de3db31808..8399e401ea 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -112,6 +112,15 @@ One-shot finalize command: pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented ``` +Publish finalized artifacts into benchmark history: + +```bash +pnpm bench:playback:publish -- \ + --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md \ + --status-md /tmp/playback-matrix-final/playback-matrix-status.md \ + --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json +``` + ## Evidence checklist 1. Confirm all matrix rows exist. diff --git a/package.json b/package.json index c8bdea3b17..51abc7ce0a 100644 --- a/package.json +++ b/package.json @@ -46,6 +46,7 @@ "bench:playback:validate": "node scripts/validate-playback-matrix.js", "bench:playback:report": "node scripts/build-playback-matrix-report.js", "bench:playback:finalize": "node scripts/finalize-playback-matrix.js", + "bench:playback:publish": "node scripts/publish-playback-matrix-summary.js", "clean": "find . -name node_modules -o -name .next -o -name .output -o -name .turbo -o -name dist -type d -prune | xargs rm -rf", "lgtm-otel": "docker run -p 3010:3000 -p 4317:4317 -p 4318:4318 --rm -it docker.io/grafana/otel-lgtm", "with-env": "dotenv -e .env --" diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js new file mode 100644 index 0000000000..0ee89fc8fe --- /dev/null +++ b/scripts/publish-playback-matrix-summary.js @@ -0,0 +1,139 @@ +#!/usr/bin/env node + +import fs from "node:fs"; +import path from "node:path"; + +function parseArgs(argv) { + const options = { + aggregateMd: null, + statusMd: null, + validationJson: null, + target: path.resolve("crates/editor/PLAYBACK-BENCHMARKS.md"), + }; + + for (let i = 2; i < argv.length; i++) { + const arg = argv[i]; + if (arg === "--") continue; + if (arg === "--help" || arg === "-h") { + options.help = true; + continue; + } + if (arg === "--aggregate-md") { + options.aggregateMd = path.resolve(argv[++i] ?? ""); + continue; + } + if (arg === "--status-md") { + options.statusMd = path.resolve(argv[++i] ?? ""); + continue; + } + if (arg === "--validation-json") { + options.validationJson = path.resolve(argv[++i] ?? ""); + continue; + } + if (arg === "--target") { + options.target = path.resolve(argv[++i] ?? ""); + continue; + } + throw new Error(`Unknown argument: ${arg}`); + } + + return options; +} + +function usage() { + console.log(`Usage: node scripts/publish-playback-matrix-summary.js --aggregate-md --status-md --validation-json [--target ] + +Prepends a matrix summary section into PLAYBACK-BENCHMARKS.md benchmark history region.`); +} + +function ensureFile(filePath, label) { + if (!filePath || !fs.existsSync(filePath)) { + throw new Error(`${label} file not found: ${filePath ?? "undefined"}`); + } +} + +function buildSummarySection(aggregateMd, statusMd, validationJson) { + const now = new Date().toISOString(); + const validation = JSON.parse(validationJson); + const status = validation.passed ? "✅ MATRIX PASS" : "❌ MATRIX FAIL"; + + let markdown = ""; + markdown += `### Matrix Summary Run: ${now}\n\n`; + markdown += `**Validation:** ${status}\n\n`; + markdown += `- Validated cells: ${validation.validatedCells}\n`; + markdown += `- Observed cells: ${validation.observedCells}\n`; + markdown += `- Missing cells: ${validation.missingCells?.length ?? 0}\n`; + markdown += `- Format failures: ${validation.formatFailures?.length ?? 0}\n\n`; + + if ((validation.missingCells?.length ?? 0) > 0) { + markdown += "**Missing Cells**\n"; + for (const cell of validation.missingCells) { + markdown += `- ${cell.platform}:${cell.gpu}:${cell.scenario}\n`; + } + markdown += "\n"; + } + + if ((validation.formatFailures?.length ?? 0) > 0) { + markdown += "**Format Failures**\n"; + for (const failure of validation.formatFailures) { + markdown += `- ${failure.platform}:${failure.gpu}:${failure.scenario} missing ${failure.requiredFormat}\n`; + } + markdown += "\n"; + } + + markdown += "
\nMatrix Status Report\n\n"; + markdown += `${statusMd.trim()}\n\n`; + markdown += "
\n\n"; + + markdown += "
\nAggregate Benchmark Report\n\n"; + markdown += `${aggregateMd.trim()}\n\n`; + markdown += "
\n\n"; + + return markdown; +} + +function writeToBenchmarkHistory(targetFile, summaryMd) { + const markerStart = ""; + const markerEnd = ""; + const current = fs.readFileSync(targetFile, "utf8"); + const start = current.indexOf(markerStart); + const end = current.indexOf(markerEnd); + if (start === -1 || end === -1 || start >= end) { + throw new Error(`Could not find benchmark result markers in ${targetFile}`); + } + + const insertPos = start + markerStart.length; + const updated = + current.slice(0, insertPos) + + "\n\n" + + summaryMd + + current.slice(end); + fs.writeFileSync(targetFile, updated, "utf8"); +} + +function main() { + const options = parseArgs(process.argv); + if (options.help) { + usage(); + return; + } + + ensureFile(options.aggregateMd, "Aggregate markdown"); + ensureFile(options.statusMd, "Status markdown"); + ensureFile(options.validationJson, "Validation JSON"); + ensureFile(options.target, "Target"); + + const aggregateMd = fs.readFileSync(options.aggregateMd, "utf8"); + const statusMd = fs.readFileSync(options.statusMd, "utf8"); + const validationJson = fs.readFileSync(options.validationJson, "utf8"); + const summaryMd = buildSummarySection(aggregateMd, statusMd, validationJson); + writeToBenchmarkHistory(options.target, summaryMd); + console.log(`Published matrix summary into ${options.target}`); +} + +try { + main(); +} catch (error) { + console.error(error instanceof Error ? error.message : String(error)); + process.exit(1); +} From 129042fae37636abf67f9989ddf33235b66be53d Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:26:23 +0000 Subject: [PATCH 050/333] improve: add matrix artifact publisher into benchmark history Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 3 + crates/editor/PLAYBACK-FINDINGS.md | 6 + crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 9 ++ package.json | 1 + scripts/publish-playback-matrix-summary.js | 139 +++++++++++++++++++++ 5 files changed, 158 insertions(+) create mode 100644 scripts/publish-playback-matrix-summary.js diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index fec61c3ad6..b30b5dce56 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -75,6 +75,9 @@ node scripts/validate-playback-matrix.js --input /path/to/json-results --require # Finalize aggregate + status + validation artifacts node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented + +# Publish matrix artifacts into this benchmark history +node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 4a0d81e88b..9aefaf01e9 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -200,6 +200,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - `scripts/finalize-playback-matrix.js` generates aggregate markdown, status markdown, and validation JSON in one command. - Supports optional required format enforcement during finalization. +14. **Added matrix summary publisher (2026-02-13)** + - `scripts/publish-playback-matrix-summary.js` injects finalized matrix artifacts into playback benchmark history. + - Keeps matrix evidence updates consistent and repeatable. + --- ## Root Cause Analysis Archive @@ -302,6 +306,7 @@ Decoder Pipeline: 15. Added matrix validation script for required cell and format coverage checks. 16. Added matrix status report generator for concise artifact summaries. 17. Added one-shot finalization script for aggregate + status + validation outputs. +18. Added benchmark history publisher script for finalized matrix artifacts. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -320,6 +325,7 @@ Decoder Pipeline: - `scripts/validate-playback-matrix.js`: added required matrix cell/format validation for aggregated evidence. - `scripts/build-playback-matrix-report.js`: added concise matrix status report generation from JSON benchmark outputs. - `scripts/finalize-playback-matrix.js`: added one-shot matrix artifact finalization workflow. +- `scripts/publish-playback-matrix-summary.js`: added matrix artifact publisher into PLAYBACK-BENCHMARKS history region. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index de3db31808..8399e401ea 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -112,6 +112,15 @@ One-shot finalize command: pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented ``` +Publish finalized artifacts into benchmark history: + +```bash +pnpm bench:playback:publish -- \ + --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md \ + --status-md /tmp/playback-matrix-final/playback-matrix-status.md \ + --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json +``` + ## Evidence checklist 1. Confirm all matrix rows exist. diff --git a/package.json b/package.json index c8bdea3b17..51abc7ce0a 100644 --- a/package.json +++ b/package.json @@ -46,6 +46,7 @@ "bench:playback:validate": "node scripts/validate-playback-matrix.js", "bench:playback:report": "node scripts/build-playback-matrix-report.js", "bench:playback:finalize": "node scripts/finalize-playback-matrix.js", + "bench:playback:publish": "node scripts/publish-playback-matrix-summary.js", "clean": "find . -name node_modules -o -name .next -o -name .output -o -name .turbo -o -name dist -type d -prune | xargs rm -rf", "lgtm-otel": "docker run -p 3010:3000 -p 4317:4317 -p 4318:4318 --rm -it docker.io/grafana/otel-lgtm", "with-env": "dotenv -e .env --" diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js new file mode 100644 index 0000000000..0ee89fc8fe --- /dev/null +++ b/scripts/publish-playback-matrix-summary.js @@ -0,0 +1,139 @@ +#!/usr/bin/env node + +import fs from "node:fs"; +import path from "node:path"; + +function parseArgs(argv) { + const options = { + aggregateMd: null, + statusMd: null, + validationJson: null, + target: path.resolve("crates/editor/PLAYBACK-BENCHMARKS.md"), + }; + + for (let i = 2; i < argv.length; i++) { + const arg = argv[i]; + if (arg === "--") continue; + if (arg === "--help" || arg === "-h") { + options.help = true; + continue; + } + if (arg === "--aggregate-md") { + options.aggregateMd = path.resolve(argv[++i] ?? ""); + continue; + } + if (arg === "--status-md") { + options.statusMd = path.resolve(argv[++i] ?? ""); + continue; + } + if (arg === "--validation-json") { + options.validationJson = path.resolve(argv[++i] ?? ""); + continue; + } + if (arg === "--target") { + options.target = path.resolve(argv[++i] ?? ""); + continue; + } + throw new Error(`Unknown argument: ${arg}`); + } + + return options; +} + +function usage() { + console.log(`Usage: node scripts/publish-playback-matrix-summary.js --aggregate-md --status-md --validation-json [--target ] + +Prepends a matrix summary section into PLAYBACK-BENCHMARKS.md benchmark history region.`); +} + +function ensureFile(filePath, label) { + if (!filePath || !fs.existsSync(filePath)) { + throw new Error(`${label} file not found: ${filePath ?? "undefined"}`); + } +} + +function buildSummarySection(aggregateMd, statusMd, validationJson) { + const now = new Date().toISOString(); + const validation = JSON.parse(validationJson); + const status = validation.passed ? "✅ MATRIX PASS" : "❌ MATRIX FAIL"; + + let markdown = ""; + markdown += `### Matrix Summary Run: ${now}\n\n`; + markdown += `**Validation:** ${status}\n\n`; + markdown += `- Validated cells: ${validation.validatedCells}\n`; + markdown += `- Observed cells: ${validation.observedCells}\n`; + markdown += `- Missing cells: ${validation.missingCells?.length ?? 0}\n`; + markdown += `- Format failures: ${validation.formatFailures?.length ?? 0}\n\n`; + + if ((validation.missingCells?.length ?? 0) > 0) { + markdown += "**Missing Cells**\n"; + for (const cell of validation.missingCells) { + markdown += `- ${cell.platform}:${cell.gpu}:${cell.scenario}\n`; + } + markdown += "\n"; + } + + if ((validation.formatFailures?.length ?? 0) > 0) { + markdown += "**Format Failures**\n"; + for (const failure of validation.formatFailures) { + markdown += `- ${failure.platform}:${failure.gpu}:${failure.scenario} missing ${failure.requiredFormat}\n`; + } + markdown += "\n"; + } + + markdown += "
\nMatrix Status Report\n\n"; + markdown += `${statusMd.trim()}\n\n`; + markdown += "
\n\n"; + + markdown += "
\nAggregate Benchmark Report\n\n"; + markdown += `${aggregateMd.trim()}\n\n`; + markdown += "
\n\n"; + + return markdown; +} + +function writeToBenchmarkHistory(targetFile, summaryMd) { + const markerStart = ""; + const markerEnd = ""; + const current = fs.readFileSync(targetFile, "utf8"); + const start = current.indexOf(markerStart); + const end = current.indexOf(markerEnd); + if (start === -1 || end === -1 || start >= end) { + throw new Error(`Could not find benchmark result markers in ${targetFile}`); + } + + const insertPos = start + markerStart.length; + const updated = + current.slice(0, insertPos) + + "\n\n" + + summaryMd + + current.slice(end); + fs.writeFileSync(targetFile, updated, "utf8"); +} + +function main() { + const options = parseArgs(process.argv); + if (options.help) { + usage(); + return; + } + + ensureFile(options.aggregateMd, "Aggregate markdown"); + ensureFile(options.statusMd, "Status markdown"); + ensureFile(options.validationJson, "Validation JSON"); + ensureFile(options.target, "Target"); + + const aggregateMd = fs.readFileSync(options.aggregateMd, "utf8"); + const statusMd = fs.readFileSync(options.statusMd, "utf8"); + const validationJson = fs.readFileSync(options.validationJson, "utf8"); + const summaryMd = buildSummarySection(aggregateMd, statusMd, validationJson); + writeToBenchmarkHistory(options.target, summaryMd); + console.log(`Published matrix summary into ${options.target}`); +} + +try { + main(); +} catch (error) { + console.error(error instanceof Error ? error.message : String(error)); + process.exit(1); +} From a0f6a99acc1adee4030cf495fac825eebc66aeed Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 21:32:55 +0000 Subject: [PATCH 051/333] improve: tune frame wait and add startup threshold checks Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 8 ++-- crates/editor/PLAYBACK-FINDINGS.md | 6 +++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 3 +- crates/editor/src/playback.rs | 15 +++--- .../examples/playback-test-runner.rs | 48 ++++++++++++++++--- scripts/run-playback-benchmark-matrix.js | 14 +++++- 6 files changed, 75 insertions(+), 19 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index b30b5dce56..aaef9c71fc 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -10,6 +10,7 @@ This document tracks performance benchmarks for Cap's playback and decoding syst |--------|--------|-----------| | Decoder Init | <200ms | - | | Decode Latency (p95) | <50ms | - | +| Startup to First Frame | <250ms | configurable | | Effective FPS | ≥30 fps | ±2 fps | | Decode Jitter | <10ms | - | | Scrub Seek Latency (p95) | <40ms | - | @@ -42,6 +43,7 @@ This document tracks performance benchmarks for Cap's playback and decoding syst ```bash # Run full playback validation on recordings from real-device-test-runner cargo run -p cap-recording --example playback-test-runner -- full +cargo run -p cap-recording --example playback-test-runner -- full --startup-threshold-ms 250 # Run specific test categories cargo run -p cap-recording --example playback-test-runner -- decoder @@ -118,9 +120,9 @@ cargo run -p cap-recording --example playback-test-runner -- scrub --fps 60 --be Automated helper for machine runs: ```bash -node scripts/run-playback-benchmark-matrix.js --platform macos-13 --gpu apple-silicon --output-dir /tmp/playback-matrix --fps 60 --input-dir /tmp/cap-real-device-tests -node scripts/run-playback-benchmark-matrix.js --platform windows-11 --gpu nvidia-discrete --output-dir /tmp/playback-matrix-windows-nvidia --fps 60 --require-formats mp4,fragmented --input-dir /tmp/cap-real-device-tests -node scripts/run-playback-benchmark-matrix.js --platform windows-11 --gpu integrated --output-dir /tmp/playback-matrix-windows-integrated --fps 60 --scenarios scrub --input-dir /tmp/cap-real-device-tests +node scripts/run-playback-benchmark-matrix.js --platform macos-13 --gpu apple-silicon --output-dir /tmp/playback-matrix --fps 60 --startup-threshold-ms 250 --input-dir /tmp/cap-real-device-tests +node scripts/run-playback-benchmark-matrix.js --platform windows-11 --gpu nvidia-discrete --output-dir /tmp/playback-matrix-windows-nvidia --fps 60 --startup-threshold-ms 250 --require-formats mp4,fragmented --input-dir /tmp/cap-real-device-tests +node scripts/run-playback-benchmark-matrix.js --platform windows-11 --gpu integrated --output-dir /tmp/playback-matrix-windows-integrated --fps 60 --startup-threshold-ms 250 --scenarios scrub --input-dir /tmp/cap-real-device-tests ``` | Platform | GPU Class | MP4 Full | Fragmented Full | MP4 Scrub | Fragmented Scrub | Notes | diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 9aefaf01e9..3c4dae83dc 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -171,10 +171,15 @@ cargo run -p cap-recording --example playback-test-runner -- full - Only the latest pending seek frame is sent while an async seek is in-flight. - Duplicate same-frame seeks are dropped in both frontend dispatch and playback seek signaling. +8. **Playback frame wait timeout now scales with target FPS (2026-02-13)** + - Replaced fixed 200ms frame fetch waits with FPS-derived bounded timeout. + - Reduces long stall windows on 60fps playback and improves real-time catch-up behavior. + 8. **Playback benchmark runner now supports JSON evidence export (2026-02-13)** - `playback-test-runner` supports `--json-output` for structured report emission. - JSON output includes command metadata, system info, summary, and per-recording test detail. - Command metadata now includes input scope and output flags for reproducibility. + - Startup-to-first-frame threshold is configurable with `--startup-threshold-ms` and tracked as pass/fail signal. 9. **Added JSON aggregate utility for cross-platform benchmark collation (2026-02-13)** - `scripts/aggregate-playback-benchmarks.js` builds a markdown table from multiple JSON outputs. @@ -185,6 +190,7 @@ cargo run -p cap-recording --example playback-test-runner -- full - Automatically generates aggregate markdown for each machine run directory. - Performs per-machine post-run validation for required scenarios and optional format requirements. - Supports scenario subset reruns via `--scenarios` for faster targeted validation. + - Supports startup threshold tuning via `--startup-threshold-ms`. 11. **Added matrix completeness validator (2026-02-13)** - `scripts/validate-playback-matrix.js` validates required platform/gpu/scenario cells. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 8399e401ea..4997f57f64 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -43,6 +43,7 @@ node scripts/run-playback-benchmark-matrix.js \ --gpu "" \ --output-dir "$OUT_DIR" \ --fps 60 \ + --startup-threshold-ms 250 \ --require-formats mp4,fragmented \ --scenarios full,scrub \ --input-dir "$INPUT_DIR" @@ -51,7 +52,7 @@ node scripts/run-playback-benchmark-matrix.js \ Equivalent shortcut: ```bash -pnpm bench:playback:matrix -- --platform "" --gpu "" --output-dir "$OUT_DIR" --fps 60 --require-formats mp4,fragmented --scenarios full,scrub --input-dir "$INPUT_DIR" +pnpm bench:playback:matrix -- --platform "" --gpu "" --output-dir "$OUT_DIR" --fps 60 --startup-threshold-ms 250 --require-formats mp4,fragmented --scenarios full,scrub --input-dir "$INPUT_DIR" ``` Rerun only scrub scenario for a machine: diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 3b2c72411f..dacede787f 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -384,6 +384,10 @@ impl Playback { .spawn(); let frame_duration = Duration::from_secs_f64(1.0 / fps_f64); + let frame_fetch_timeout = frame_duration + .mul_f64(4.0) + .max(Duration::from_millis(20)) + .min(Duration::from_millis(80)); let mut frame_number = self.start_frame_number; let mut prefetch_buffer: VecDeque = VecDeque::with_capacity(PREFETCH_BUFFER_SIZE); @@ -553,7 +557,7 @@ impl Playback { if is_in_flight { let wait_start = Instant::now(); - let max_wait = Duration::from_millis(200); + let max_wait = frame_fetch_timeout; let mut found_frame = None; while wait_start.elapsed() < max_wait { @@ -603,11 +607,8 @@ impl Playback { } else if prefetch_buffer.is_empty() && total_frames_rendered < 15 { let _ = frame_request_tx.send(frame_number); - let wait_result = tokio::time::timeout( - Duration::from_millis(200), - prefetch_rx.recv(), - ) - .await; + let wait_result = + tokio::time::timeout(frame_fetch_timeout, prefetch_rx.recv()).await; if let Ok(Some(prefetched)) = wait_result { if prefetched.frame_number == frame_number { @@ -651,7 +652,7 @@ impl Playback { guard.insert(frame_number); } - let max_wait = Duration::from_millis(200); + let max_wait = frame_fetch_timeout; let data = tokio::select! { _ = stop_rx.changed() => { if let Ok(mut guard) = main_in_flight.write() { diff --git a/crates/recording/examples/playback-test-runner.rs b/crates/recording/examples/playback-test-runner.rs index b930a2cb41..16865ae654 100644 --- a/crates/recording/examples/playback-test-runner.rs +++ b/crates/recording/examples/playback-test-runner.rs @@ -39,6 +39,9 @@ struct Cli { #[arg(long, global = true, default_value = "30")] fps: u32, + #[arg(long, global = true, default_value_t = STARTUP_TO_FIRST_FRAME_WARNING_MS)] + startup_threshold_ms: f64, + #[arg(long, global = true)] verbose: bool, @@ -66,6 +69,7 @@ enum Commands { const FPS_TOLERANCE: f64 = 2.0; const DECODE_LATENCY_WARNING_MS: f64 = 50.0; const SCRUB_SEEK_WARNING_MS: f64 = 40.0; +const STARTUP_TO_FIRST_FRAME_WARNING_MS: f64 = 250.0; const AUDIO_VIDEO_SYNC_TOLERANCE_MS: f64 = 100.0; const CAMERA_SYNC_TOLERANCE_MS: f64 = 100.0; @@ -101,6 +105,8 @@ struct PlaybackTestResult { fps_ok: bool, jitter_ms: f64, decode_latency_ok: bool, + startup_latency_ok: bool, + startup_threshold_ms: f64, errors: Vec, } @@ -243,6 +249,12 @@ impl RecordingTestReport { if !result.decode_latency_ok { println!(" WARN: Decode latency exceeds {DECODE_LATENCY_WARNING_MS}ms!"); } + if !result.startup_latency_ok { + println!( + " WARN: Startup-to-first-frame exceeds {:.1}ms!", + result.startup_threshold_ms + ); + } for err in &result.errors { println!(" ERROR: {err}"); } @@ -390,12 +402,14 @@ async fn test_playback( meta: &StudioRecordingMeta, segment_index: usize, fps: u32, + startup_threshold_ms: f64, verbose: bool, ) -> PlaybackTestResult { let playback_start = Instant::now(); let mut result = PlaybackTestResult { segment_index, expected_fps: fps as f64, + startup_threshold_ms, ..Default::default() }; @@ -496,9 +510,11 @@ async fn test_playback( result.fps_ok = (result.effective_fps - result.expected_fps).abs() <= FPS_TOLERANCE || result.effective_fps >= result.expected_fps; result.decode_latency_ok = result.p95_decode_time_ms <= DECODE_LATENCY_WARNING_MS; + result.startup_latency_ok = result.startup_to_first_frame_ms <= startup_threshold_ms; result.passed = result.fps_ok && result.decode_latency_ok + && result.startup_latency_ok && result.failed_frames == 0 && result.decoded_frames > 0; @@ -880,6 +896,7 @@ fn discover_recordings(input_dir: &Path) -> Vec { async fn run_tests_on_recording( recording_path: &Path, fps: u32, + startup_threshold_ms: f64, run_decoder: bool, run_playback: bool, run_scrub: bool, @@ -969,8 +986,15 @@ async fn run_tests_on_recording( if verbose { println!(" Testing playback for segment {segment_idx}..."); } - let playback_result = - test_playback(&meta, studio_meta.as_ref(), segment_idx, fps, verbose).await; + let playback_result = test_playback( + &meta, + studio_meta.as_ref(), + segment_idx, + fps, + startup_threshold_ms, + verbose, + ) + .await; report.playback_results.push(playback_result); } @@ -1082,6 +1106,13 @@ fn get_failure_tags(report: &RecordingTestReport) -> Vec { if report.playback_results.iter().any(|r| !r.decode_latency_ok) { tags.push("LATENCY".to_string()); } + if report + .playback_results + .iter() + .any(|r| !r.startup_latency_ok) + { + tags.push("STARTUP".to_string()); + } if report.scrub_results.iter().any(|r| !r.seek_latency_ok) { tags.push("SCRUB_LATENCY".to_string()); } @@ -1191,7 +1222,7 @@ fn report_to_markdown(report: &RecordingTestReport) -> String { )); md.push_str(&format!( "| ↳ Startup | {} | first_decode={:.1}ms startup_to_first={:.1}ms |\n", - if result.startup_to_first_frame_ms > 0.0 { + if result.startup_latency_ok { "✅" } else { "❌" @@ -1505,9 +1536,10 @@ fn shell_quote(value: &str) -> String { fn build_command_string(cli: &Cli) -> String { let mut command = format!( - "cargo run -p cap-recording --example playback-test-runner -- {} --fps {}", + "cargo run -p cap-recording --example playback-test-runner -- {} --fps {} --startup-threshold-ms {:.1}", command_name(cli.command.as_ref()), - cli.fps + cli.fps, + cli.startup_threshold_ms ); if let Some(path) = &cli.recording_path { @@ -1598,9 +1630,10 @@ async fn main() -> anyhow::Result<()> { println!("\nCap Playback Test Runner"); println!("{}", "=".repeat(40)); println!( - "Testing {} recording(s) at {} FPS", + "Testing {} recording(s) at {} FPS (startup threshold: {:.1}ms)", recordings.len(), - cli.fps + cli.fps, + cli.startup_threshold_ms ); println!(); @@ -1612,6 +1645,7 @@ async fn main() -> anyhow::Result<()> { match run_tests_on_recording( recording_path, cli.fps, + cli.startup_threshold_ms, run_decoder, run_playback, run_scrub, diff --git a/scripts/run-playback-benchmark-matrix.js b/scripts/run-playback-benchmark-matrix.js index 89f0a26e90..d49fec6024 100644 --- a/scripts/run-playback-benchmark-matrix.js +++ b/scripts/run-playback-benchmark-matrix.js @@ -10,6 +10,7 @@ function parseArgs(argv) { gpu: null, outputDir: null, fps: 60, + startupThresholdMs: 250, recordingPath: null, inputDir: null, validate: true, @@ -46,6 +47,14 @@ function parseArgs(argv) { options.fps = value; continue; } + if (arg === "--startup-threshold-ms") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value <= 0) { + throw new Error("Invalid --startup-threshold-ms value"); + } + options.startupThresholdMs = value; + continue; + } if (arg === "--recording-path") { options.recordingPath = argv[++i] ?? null; continue; @@ -85,7 +94,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/run-playback-benchmark-matrix.js --platform --gpu --output-dir [--fps 60] [--recording-path ] [--input-dir ] [--require-formats mp4,fragmented] [--scenarios full,scrub] + console.log(`Usage: node scripts/run-playback-benchmark-matrix.js --platform --gpu --output-dir [--fps 60] [--startup-threshold-ms 250] [--recording-path ] [--input-dir ] [--require-formats mp4,fragmented] [--scenarios full,scrub] Runs playback benchmark matrix scenarios and writes JSON outputs. @@ -96,6 +105,7 @@ Required: Optional: --fps FPS for benchmark runs (default: 60) + --startup-threshold-ms Startup-to-first-frame threshold in ms (default: 250) --recording-path Specific recording path --input-dir Recording discovery directory --require-formats Required formats for local validation (comma-separated) @@ -134,6 +144,8 @@ function scenarioArgs(options, scenario) { scenario, "--fps", String(options.fps), + "--startup-threshold-ms", + String(options.startupThresholdMs), "--json-output", jsonOutput, "--notes", From 06c008a4629c49e9634084a0ac786f263c334059 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:32:55 +0000 Subject: [PATCH 052/333] improve: tune frame wait and add startup threshold checks Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 8 ++-- crates/editor/PLAYBACK-FINDINGS.md | 6 +++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 3 +- crates/editor/src/playback.rs | 15 +++--- .../examples/playback-test-runner.rs | 48 ++++++++++++++++--- scripts/run-playback-benchmark-matrix.js | 14 +++++- 6 files changed, 75 insertions(+), 19 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index b30b5dce56..aaef9c71fc 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -10,6 +10,7 @@ This document tracks performance benchmarks for Cap's playback and decoding syst |--------|--------|-----------| | Decoder Init | <200ms | - | | Decode Latency (p95) | <50ms | - | +| Startup to First Frame | <250ms | configurable | | Effective FPS | ≥30 fps | ±2 fps | | Decode Jitter | <10ms | - | | Scrub Seek Latency (p95) | <40ms | - | @@ -42,6 +43,7 @@ This document tracks performance benchmarks for Cap's playback and decoding syst ```bash # Run full playback validation on recordings from real-device-test-runner cargo run -p cap-recording --example playback-test-runner -- full +cargo run -p cap-recording --example playback-test-runner -- full --startup-threshold-ms 250 # Run specific test categories cargo run -p cap-recording --example playback-test-runner -- decoder @@ -118,9 +120,9 @@ cargo run -p cap-recording --example playback-test-runner -- scrub --fps 60 --be Automated helper for machine runs: ```bash -node scripts/run-playback-benchmark-matrix.js --platform macos-13 --gpu apple-silicon --output-dir /tmp/playback-matrix --fps 60 --input-dir /tmp/cap-real-device-tests -node scripts/run-playback-benchmark-matrix.js --platform windows-11 --gpu nvidia-discrete --output-dir /tmp/playback-matrix-windows-nvidia --fps 60 --require-formats mp4,fragmented --input-dir /tmp/cap-real-device-tests -node scripts/run-playback-benchmark-matrix.js --platform windows-11 --gpu integrated --output-dir /tmp/playback-matrix-windows-integrated --fps 60 --scenarios scrub --input-dir /tmp/cap-real-device-tests +node scripts/run-playback-benchmark-matrix.js --platform macos-13 --gpu apple-silicon --output-dir /tmp/playback-matrix --fps 60 --startup-threshold-ms 250 --input-dir /tmp/cap-real-device-tests +node scripts/run-playback-benchmark-matrix.js --platform windows-11 --gpu nvidia-discrete --output-dir /tmp/playback-matrix-windows-nvidia --fps 60 --startup-threshold-ms 250 --require-formats mp4,fragmented --input-dir /tmp/cap-real-device-tests +node scripts/run-playback-benchmark-matrix.js --platform windows-11 --gpu integrated --output-dir /tmp/playback-matrix-windows-integrated --fps 60 --startup-threshold-ms 250 --scenarios scrub --input-dir /tmp/cap-real-device-tests ``` | Platform | GPU Class | MP4 Full | Fragmented Full | MP4 Scrub | Fragmented Scrub | Notes | diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 9aefaf01e9..3c4dae83dc 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -171,10 +171,15 @@ cargo run -p cap-recording --example playback-test-runner -- full - Only the latest pending seek frame is sent while an async seek is in-flight. - Duplicate same-frame seeks are dropped in both frontend dispatch and playback seek signaling. +8. **Playback frame wait timeout now scales with target FPS (2026-02-13)** + - Replaced fixed 200ms frame fetch waits with FPS-derived bounded timeout. + - Reduces long stall windows on 60fps playback and improves real-time catch-up behavior. + 8. **Playback benchmark runner now supports JSON evidence export (2026-02-13)** - `playback-test-runner` supports `--json-output` for structured report emission. - JSON output includes command metadata, system info, summary, and per-recording test detail. - Command metadata now includes input scope and output flags for reproducibility. + - Startup-to-first-frame threshold is configurable with `--startup-threshold-ms` and tracked as pass/fail signal. 9. **Added JSON aggregate utility for cross-platform benchmark collation (2026-02-13)** - `scripts/aggregate-playback-benchmarks.js` builds a markdown table from multiple JSON outputs. @@ -185,6 +190,7 @@ cargo run -p cap-recording --example playback-test-runner -- full - Automatically generates aggregate markdown for each machine run directory. - Performs per-machine post-run validation for required scenarios and optional format requirements. - Supports scenario subset reruns via `--scenarios` for faster targeted validation. + - Supports startup threshold tuning via `--startup-threshold-ms`. 11. **Added matrix completeness validator (2026-02-13)** - `scripts/validate-playback-matrix.js` validates required platform/gpu/scenario cells. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 8399e401ea..4997f57f64 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -43,6 +43,7 @@ node scripts/run-playback-benchmark-matrix.js \ --gpu "" \ --output-dir "$OUT_DIR" \ --fps 60 \ + --startup-threshold-ms 250 \ --require-formats mp4,fragmented \ --scenarios full,scrub \ --input-dir "$INPUT_DIR" @@ -51,7 +52,7 @@ node scripts/run-playback-benchmark-matrix.js \ Equivalent shortcut: ```bash -pnpm bench:playback:matrix -- --platform "" --gpu "" --output-dir "$OUT_DIR" --fps 60 --require-formats mp4,fragmented --scenarios full,scrub --input-dir "$INPUT_DIR" +pnpm bench:playback:matrix -- --platform "" --gpu "" --output-dir "$OUT_DIR" --fps 60 --startup-threshold-ms 250 --require-formats mp4,fragmented --scenarios full,scrub --input-dir "$INPUT_DIR" ``` Rerun only scrub scenario for a machine: diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 3b2c72411f..dacede787f 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -384,6 +384,10 @@ impl Playback { .spawn(); let frame_duration = Duration::from_secs_f64(1.0 / fps_f64); + let frame_fetch_timeout = frame_duration + .mul_f64(4.0) + .max(Duration::from_millis(20)) + .min(Duration::from_millis(80)); let mut frame_number = self.start_frame_number; let mut prefetch_buffer: VecDeque = VecDeque::with_capacity(PREFETCH_BUFFER_SIZE); @@ -553,7 +557,7 @@ impl Playback { if is_in_flight { let wait_start = Instant::now(); - let max_wait = Duration::from_millis(200); + let max_wait = frame_fetch_timeout; let mut found_frame = None; while wait_start.elapsed() < max_wait { @@ -603,11 +607,8 @@ impl Playback { } else if prefetch_buffer.is_empty() && total_frames_rendered < 15 { let _ = frame_request_tx.send(frame_number); - let wait_result = tokio::time::timeout( - Duration::from_millis(200), - prefetch_rx.recv(), - ) - .await; + let wait_result = + tokio::time::timeout(frame_fetch_timeout, prefetch_rx.recv()).await; if let Ok(Some(prefetched)) = wait_result { if prefetched.frame_number == frame_number { @@ -651,7 +652,7 @@ impl Playback { guard.insert(frame_number); } - let max_wait = Duration::from_millis(200); + let max_wait = frame_fetch_timeout; let data = tokio::select! { _ = stop_rx.changed() => { if let Ok(mut guard) = main_in_flight.write() { diff --git a/crates/recording/examples/playback-test-runner.rs b/crates/recording/examples/playback-test-runner.rs index b930a2cb41..16865ae654 100644 --- a/crates/recording/examples/playback-test-runner.rs +++ b/crates/recording/examples/playback-test-runner.rs @@ -39,6 +39,9 @@ struct Cli { #[arg(long, global = true, default_value = "30")] fps: u32, + #[arg(long, global = true, default_value_t = STARTUP_TO_FIRST_FRAME_WARNING_MS)] + startup_threshold_ms: f64, + #[arg(long, global = true)] verbose: bool, @@ -66,6 +69,7 @@ enum Commands { const FPS_TOLERANCE: f64 = 2.0; const DECODE_LATENCY_WARNING_MS: f64 = 50.0; const SCRUB_SEEK_WARNING_MS: f64 = 40.0; +const STARTUP_TO_FIRST_FRAME_WARNING_MS: f64 = 250.0; const AUDIO_VIDEO_SYNC_TOLERANCE_MS: f64 = 100.0; const CAMERA_SYNC_TOLERANCE_MS: f64 = 100.0; @@ -101,6 +105,8 @@ struct PlaybackTestResult { fps_ok: bool, jitter_ms: f64, decode_latency_ok: bool, + startup_latency_ok: bool, + startup_threshold_ms: f64, errors: Vec, } @@ -243,6 +249,12 @@ impl RecordingTestReport { if !result.decode_latency_ok { println!(" WARN: Decode latency exceeds {DECODE_LATENCY_WARNING_MS}ms!"); } + if !result.startup_latency_ok { + println!( + " WARN: Startup-to-first-frame exceeds {:.1}ms!", + result.startup_threshold_ms + ); + } for err in &result.errors { println!(" ERROR: {err}"); } @@ -390,12 +402,14 @@ async fn test_playback( meta: &StudioRecordingMeta, segment_index: usize, fps: u32, + startup_threshold_ms: f64, verbose: bool, ) -> PlaybackTestResult { let playback_start = Instant::now(); let mut result = PlaybackTestResult { segment_index, expected_fps: fps as f64, + startup_threshold_ms, ..Default::default() }; @@ -496,9 +510,11 @@ async fn test_playback( result.fps_ok = (result.effective_fps - result.expected_fps).abs() <= FPS_TOLERANCE || result.effective_fps >= result.expected_fps; result.decode_latency_ok = result.p95_decode_time_ms <= DECODE_LATENCY_WARNING_MS; + result.startup_latency_ok = result.startup_to_first_frame_ms <= startup_threshold_ms; result.passed = result.fps_ok && result.decode_latency_ok + && result.startup_latency_ok && result.failed_frames == 0 && result.decoded_frames > 0; @@ -880,6 +896,7 @@ fn discover_recordings(input_dir: &Path) -> Vec { async fn run_tests_on_recording( recording_path: &Path, fps: u32, + startup_threshold_ms: f64, run_decoder: bool, run_playback: bool, run_scrub: bool, @@ -969,8 +986,15 @@ async fn run_tests_on_recording( if verbose { println!(" Testing playback for segment {segment_idx}..."); } - let playback_result = - test_playback(&meta, studio_meta.as_ref(), segment_idx, fps, verbose).await; + let playback_result = test_playback( + &meta, + studio_meta.as_ref(), + segment_idx, + fps, + startup_threshold_ms, + verbose, + ) + .await; report.playback_results.push(playback_result); } @@ -1082,6 +1106,13 @@ fn get_failure_tags(report: &RecordingTestReport) -> Vec { if report.playback_results.iter().any(|r| !r.decode_latency_ok) { tags.push("LATENCY".to_string()); } + if report + .playback_results + .iter() + .any(|r| !r.startup_latency_ok) + { + tags.push("STARTUP".to_string()); + } if report.scrub_results.iter().any(|r| !r.seek_latency_ok) { tags.push("SCRUB_LATENCY".to_string()); } @@ -1191,7 +1222,7 @@ fn report_to_markdown(report: &RecordingTestReport) -> String { )); md.push_str(&format!( "| ↳ Startup | {} | first_decode={:.1}ms startup_to_first={:.1}ms |\n", - if result.startup_to_first_frame_ms > 0.0 { + if result.startup_latency_ok { "✅" } else { "❌" @@ -1505,9 +1536,10 @@ fn shell_quote(value: &str) -> String { fn build_command_string(cli: &Cli) -> String { let mut command = format!( - "cargo run -p cap-recording --example playback-test-runner -- {} --fps {}", + "cargo run -p cap-recording --example playback-test-runner -- {} --fps {} --startup-threshold-ms {:.1}", command_name(cli.command.as_ref()), - cli.fps + cli.fps, + cli.startup_threshold_ms ); if let Some(path) = &cli.recording_path { @@ -1598,9 +1630,10 @@ async fn main() -> anyhow::Result<()> { println!("\nCap Playback Test Runner"); println!("{}", "=".repeat(40)); println!( - "Testing {} recording(s) at {} FPS", + "Testing {} recording(s) at {} FPS (startup threshold: {:.1}ms)", recordings.len(), - cli.fps + cli.fps, + cli.startup_threshold_ms ); println!(); @@ -1612,6 +1645,7 @@ async fn main() -> anyhow::Result<()> { match run_tests_on_recording( recording_path, cli.fps, + cli.startup_threshold_ms, run_decoder, run_playback, run_scrub, diff --git a/scripts/run-playback-benchmark-matrix.js b/scripts/run-playback-benchmark-matrix.js index 89f0a26e90..d49fec6024 100644 --- a/scripts/run-playback-benchmark-matrix.js +++ b/scripts/run-playback-benchmark-matrix.js @@ -10,6 +10,7 @@ function parseArgs(argv) { gpu: null, outputDir: null, fps: 60, + startupThresholdMs: 250, recordingPath: null, inputDir: null, validate: true, @@ -46,6 +47,14 @@ function parseArgs(argv) { options.fps = value; continue; } + if (arg === "--startup-threshold-ms") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value <= 0) { + throw new Error("Invalid --startup-threshold-ms value"); + } + options.startupThresholdMs = value; + continue; + } if (arg === "--recording-path") { options.recordingPath = argv[++i] ?? null; continue; @@ -85,7 +94,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/run-playback-benchmark-matrix.js --platform --gpu --output-dir [--fps 60] [--recording-path ] [--input-dir ] [--require-formats mp4,fragmented] [--scenarios full,scrub] + console.log(`Usage: node scripts/run-playback-benchmark-matrix.js --platform --gpu --output-dir [--fps 60] [--startup-threshold-ms 250] [--recording-path ] [--input-dir ] [--require-formats mp4,fragmented] [--scenarios full,scrub] Runs playback benchmark matrix scenarios and writes JSON outputs. @@ -96,6 +105,7 @@ Required: Optional: --fps FPS for benchmark runs (default: 60) + --startup-threshold-ms Startup-to-first-frame threshold in ms (default: 250) --recording-path Specific recording path --input-dir Recording discovery directory --require-formats Required formats for local validation (comma-separated) @@ -134,6 +144,8 @@ function scenarioArgs(options, scenario) { scenario, "--fps", String(options.fps), + "--startup-threshold-ms", + String(options.startupThresholdMs), "--json-output", jsonOutput, "--notes", From f2d9b462711c34511099997b78e7e8a90f16caf6 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 21:34:55 +0000 Subject: [PATCH 053/333] improve: add matrix bottleneck analysis for fps optimization Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 3 + crates/editor/PLAYBACK-FINDINGS.md | 6 + crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 6 + package.json | 1 + .../analyze-playback-matrix-bottlenecks.js | 249 ++++++++++++++++++ 5 files changed, 265 insertions(+) create mode 100644 scripts/analyze-playback-matrix-bottlenecks.js diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index aaef9c71fc..fde0860b97 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -80,6 +80,9 @@ node scripts/finalize-playback-matrix.js --input /path/to/json-results --output- # Publish matrix artifacts into this benchmark history node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json + +# Analyze bottlenecks from matrix results +node scripts/analyze-playback-matrix-bottlenecks.js --input /path/to/json-results --output /tmp/playback-bottlenecks.md --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 3c4dae83dc..b535a28b50 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -210,6 +210,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - `scripts/publish-playback-matrix-summary.js` injects finalized matrix artifacts into playback benchmark history. - Keeps matrix evidence updates consistent and repeatable. +15. **Added bottleneck analyzer for continuous FPS optimization (2026-02-13)** + - `scripts/analyze-playback-matrix-bottlenecks.js` ranks matrix cells by FPS, startup, and scrub threshold breaches. + - Produces prioritized optimization backlog from real matrix evidence. + --- ## Root Cause Analysis Archive @@ -313,6 +317,7 @@ Decoder Pipeline: 16. Added matrix status report generator for concise artifact summaries. 17. Added one-shot finalization script for aggregate + status + validation outputs. 18. Added benchmark history publisher script for finalized matrix artifacts. +19. Added matrix bottleneck analysis script for prioritized FPS optimization follow-up. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -332,6 +337,7 @@ Decoder Pipeline: - `scripts/build-playback-matrix-report.js`: added concise matrix status report generation from JSON benchmark outputs. - `scripts/finalize-playback-matrix.js`: added one-shot matrix artifact finalization workflow. - `scripts/publish-playback-matrix-summary.js`: added matrix artifact publisher into PLAYBACK-BENCHMARKS history region. +- `scripts/analyze-playback-matrix-bottlenecks.js`: added prioritized bottleneck analysis output from matrix JSON evidence. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 4997f57f64..46ff772df8 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -122,6 +122,12 @@ pnpm bench:playback:publish -- \ --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json ``` +Generate bottleneck analysis for optimization backlog: + +```bash +pnpm bench:playback:analyze -- --input /path/to/all-machine-results --output /tmp/playback-matrix-final/playback-bottlenecks.md --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 +``` + ## Evidence checklist 1. Confirm all matrix rows exist. diff --git a/package.json b/package.json index 51abc7ce0a..93f760e48f 100644 --- a/package.json +++ b/package.json @@ -47,6 +47,7 @@ "bench:playback:report": "node scripts/build-playback-matrix-report.js", "bench:playback:finalize": "node scripts/finalize-playback-matrix.js", "bench:playback:publish": "node scripts/publish-playback-matrix-summary.js", + "bench:playback:analyze": "node scripts/analyze-playback-matrix-bottlenecks.js", "clean": "find . -name node_modules -o -name .next -o -name .output -o -name .turbo -o -name dist -type d -prune | xargs rm -rf", "lgtm-otel": "docker run -p 3010:3000 -p 4317:4317 -p 4318:4318 --rm -it docker.io/grafana/otel-lgtm", "with-env": "dotenv -e .env --" diff --git a/scripts/analyze-playback-matrix-bottlenecks.js b/scripts/analyze-playback-matrix-bottlenecks.js new file mode 100644 index 0000000000..74ca0b3294 --- /dev/null +++ b/scripts/analyze-playback-matrix-bottlenecks.js @@ -0,0 +1,249 @@ +#!/usr/bin/env node + +import fs from "node:fs"; +import path from "node:path"; + +function parseArgs(argv) { + const options = { + inputs: [], + output: null, + targetFps: 60, + maxScrubP95Ms: 40, + maxStartupMs: 250, + }; + + for (let i = 2; i < argv.length; i++) { + const arg = argv[i]; + if (arg === "--") continue; + if (arg === "--help" || arg === "-h") { + options.help = true; + continue; + } + if (arg === "--input" || arg === "-i") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --input"); + options.inputs.push(path.resolve(value)); + continue; + } + if (arg === "--output" || arg === "-o") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --output"); + options.output = path.resolve(value); + continue; + } + if (arg === "--target-fps") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value <= 0) { + throw new Error("Invalid --target-fps value"); + } + options.targetFps = value; + continue; + } + if (arg === "--max-scrub-p95-ms") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value <= 0) { + throw new Error("Invalid --max-scrub-p95-ms value"); + } + options.maxScrubP95Ms = value; + continue; + } + if (arg === "--max-startup-ms") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value <= 0) { + throw new Error("Invalid --max-startup-ms value"); + } + options.maxStartupMs = value; + continue; + } + throw new Error(`Unknown argument: ${arg}`); + } + + return options; +} + +function usage() { + console.log(`Usage: node scripts/analyze-playback-matrix-bottlenecks.js --input [--input ...] [--output ] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] + +Analyzes playback matrix JSON outputs and highlights prioritized bottlenecks.`); +} + +function collectJsonFiles(targetPath) { + if (!fs.existsSync(targetPath)) { + throw new Error(`Input path does not exist: ${targetPath}`); + } + const stats = fs.statSync(targetPath); + if (stats.isFile()) { + return targetPath.endsWith(".json") ? [targetPath] : []; + } + const files = []; + for (const entry of fs.readdirSync(targetPath, { withFileTypes: true })) { + const fullPath = path.join(targetPath, entry.name); + if (entry.isDirectory()) { + files.push(...collectJsonFiles(fullPath)); + } else if (entry.isFile() && entry.name.endsWith(".json")) { + files.push(fullPath); + } + } + return files; +} + +function parseNotes(notes) { + if (!notes) return {}; + const result = {}; + for (const token of notes.split(/\s+/)) { + if (!token.includes("=")) continue; + const [key, ...rest] = token.split("="); + const value = rest.join("="); + if (!key || !value) continue; + result[key.trim()] = value.trim(); + } + return result; +} + +function average(values) { + if (values.length === 0) return null; + return values.reduce((acc, value) => acc + value, 0) / values.length; +} + +function max(values) { + if (values.length === 0) return null; + return Math.max(...values); +} + +function scoreIssue(issue, options) { + let score = 0; + if (issue.fpsMin !== null && issue.fpsMin < options.targetFps) { + score += (options.targetFps - issue.fpsMin) * 5; + } + if (issue.scrubP95 !== null && issue.scrubP95 > options.maxScrubP95Ms) { + score += issue.scrubP95 - options.maxScrubP95Ms; + } + if (issue.startupAvg !== null && issue.startupAvg > options.maxStartupMs) { + score += (issue.startupAvg - options.maxStartupMs) / 2; + } + return score; +} + +function formatValue(value, digits = 1) { + return value === null ? "n/a" : value.toFixed(digits); +} + +function collectIssues(files, options) { + const issues = []; + + for (const filePath of files) { + const parsed = JSON.parse(fs.readFileSync(filePath, "utf8")); + const notes = parseNotes(parsed.notes); + const platform = notes.platform ?? "unknown"; + const gpu = notes.gpu ?? "unknown"; + const scenario = notes.scenario ?? "unspecified"; + const reports = Array.isArray(parsed.reports) ? parsed.reports : []; + + for (const report of reports) { + const playback = Array.isArray(report.playback_results) + ? report.playback_results + : []; + const scrub = Array.isArray(report.scrub_results) ? report.scrub_results : []; + + const fpsValues = playback + .map((entry) => entry.effective_fps) + .filter((entry) => typeof entry === "number"); + const startupValues = playback + .map((entry) => entry.startup_to_first_frame_ms) + .filter((entry) => typeof entry === "number"); + const scrubP95Values = scrub + .map((entry) => entry.p95_seek_time_ms) + .filter((entry) => typeof entry === "number"); + + const issue = { + platform, + gpu, + scenario, + recording: report.recording_name ?? path.basename(filePath), + format: report.is_fragmented ? "fragmented" : "mp4", + fpsMin: fpsValues.length ? Math.min(...fpsValues) : null, + startupAvg: average(startupValues), + scrubP95: max(scrubP95Values), + filePath, + }; + issue.score = scoreIssue(issue, options); + if (issue.score > 0) { + issues.push(issue); + } + } + } + + issues.sort((a, b) => b.score - a.score); + return issues; +} + +function recommendation(issue, options) { + const recommendations = []; + if (issue.fpsMin !== null && issue.fpsMin < options.targetFps) { + recommendations.push("inspect decode/render path and frame wait behavior"); + } + if (issue.scrubP95 !== null && issue.scrubP95 > options.maxScrubP95Ms) { + recommendations.push("investigate seek dispatch pressure and decoder reposition cost"); + } + if (issue.startupAvg !== null && issue.startupAvg > options.maxStartupMs) { + recommendations.push("optimize startup warmup and first-frame path"); + } + return recommendations.join("; "); +} + +function buildMarkdown(issues, options) { + let md = ""; + md += "# Playback Matrix Bottleneck Analysis\n\n"; + md += `Generated: ${new Date().toISOString()}\n\n`; + md += `Thresholds: target_fps=${options.targetFps}, max_scrub_p95_ms=${options.maxScrubP95Ms}, max_startup_ms=${options.maxStartupMs}\n\n`; + + if (issues.length === 0) { + md += "No bottlenecks detected for configured thresholds.\n"; + return md; + } + + md += "| Rank | Platform | GPU | Scenario | Recording | Format | FPS(min) | Startup avg(ms) | Scrub p95(ms) | Score | Recommendation |\n"; + md += "|---:|---|---|---|---|---|---:|---:|---:|---:|---|\n"; + issues.forEach((issue, index) => { + md += `| ${index + 1} | ${issue.platform} | ${issue.gpu} | ${issue.scenario} | ${issue.recording} | ${issue.format} | ${formatValue(issue.fpsMin)} | ${formatValue(issue.startupAvg)} | ${formatValue(issue.scrubP95)} | ${formatValue(issue.score, 2)} | ${recommendation(issue, options)} |\n`; + }); + md += "\n"; + return md; +} + +function main() { + const options = parseArgs(process.argv); + if (options.help) { + usage(); + return; + } + if (options.inputs.length === 0) { + throw new Error("At least one --input is required"); + } + + const files = new Set(); + for (const input of options.inputs) { + for (const filePath of collectJsonFiles(input)) { + files.add(filePath); + } + } + if (files.size === 0) { + throw new Error("No JSON files found"); + } + + const issues = collectIssues([...files], options); + const markdown = buildMarkdown(issues, options); + if (options.output) { + fs.writeFileSync(options.output, markdown, "utf8"); + console.log(`Wrote bottleneck analysis to ${options.output}`); + } else { + process.stdout.write(markdown); + } +} + +try { + main(); +} catch (error) { + console.error(error instanceof Error ? error.message : String(error)); + process.exit(1); +} From abfe036456069f7e884fb78017db818a7195f22e Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:34:55 +0000 Subject: [PATCH 054/333] improve: add matrix bottleneck analysis for fps optimization Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 3 + crates/editor/PLAYBACK-FINDINGS.md | 6 + crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 6 + package.json | 1 + .../analyze-playback-matrix-bottlenecks.js | 249 ++++++++++++++++++ 5 files changed, 265 insertions(+) create mode 100644 scripts/analyze-playback-matrix-bottlenecks.js diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index aaef9c71fc..fde0860b97 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -80,6 +80,9 @@ node scripts/finalize-playback-matrix.js --input /path/to/json-results --output- # Publish matrix artifacts into this benchmark history node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json + +# Analyze bottlenecks from matrix results +node scripts/analyze-playback-matrix-bottlenecks.js --input /path/to/json-results --output /tmp/playback-bottlenecks.md --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 3c4dae83dc..b535a28b50 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -210,6 +210,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - `scripts/publish-playback-matrix-summary.js` injects finalized matrix artifacts into playback benchmark history. - Keeps matrix evidence updates consistent and repeatable. +15. **Added bottleneck analyzer for continuous FPS optimization (2026-02-13)** + - `scripts/analyze-playback-matrix-bottlenecks.js` ranks matrix cells by FPS, startup, and scrub threshold breaches. + - Produces prioritized optimization backlog from real matrix evidence. + --- ## Root Cause Analysis Archive @@ -313,6 +317,7 @@ Decoder Pipeline: 16. Added matrix status report generator for concise artifact summaries. 17. Added one-shot finalization script for aggregate + status + validation outputs. 18. Added benchmark history publisher script for finalized matrix artifacts. +19. Added matrix bottleneck analysis script for prioritized FPS optimization follow-up. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -332,6 +337,7 @@ Decoder Pipeline: - `scripts/build-playback-matrix-report.js`: added concise matrix status report generation from JSON benchmark outputs. - `scripts/finalize-playback-matrix.js`: added one-shot matrix artifact finalization workflow. - `scripts/publish-playback-matrix-summary.js`: added matrix artifact publisher into PLAYBACK-BENCHMARKS history region. +- `scripts/analyze-playback-matrix-bottlenecks.js`: added prioritized bottleneck analysis output from matrix JSON evidence. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 4997f57f64..46ff772df8 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -122,6 +122,12 @@ pnpm bench:playback:publish -- \ --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json ``` +Generate bottleneck analysis for optimization backlog: + +```bash +pnpm bench:playback:analyze -- --input /path/to/all-machine-results --output /tmp/playback-matrix-final/playback-bottlenecks.md --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 +``` + ## Evidence checklist 1. Confirm all matrix rows exist. diff --git a/package.json b/package.json index 51abc7ce0a..93f760e48f 100644 --- a/package.json +++ b/package.json @@ -47,6 +47,7 @@ "bench:playback:report": "node scripts/build-playback-matrix-report.js", "bench:playback:finalize": "node scripts/finalize-playback-matrix.js", "bench:playback:publish": "node scripts/publish-playback-matrix-summary.js", + "bench:playback:analyze": "node scripts/analyze-playback-matrix-bottlenecks.js", "clean": "find . -name node_modules -o -name .next -o -name .output -o -name .turbo -o -name dist -type d -prune | xargs rm -rf", "lgtm-otel": "docker run -p 3010:3000 -p 4317:4317 -p 4318:4318 --rm -it docker.io/grafana/otel-lgtm", "with-env": "dotenv -e .env --" diff --git a/scripts/analyze-playback-matrix-bottlenecks.js b/scripts/analyze-playback-matrix-bottlenecks.js new file mode 100644 index 0000000000..74ca0b3294 --- /dev/null +++ b/scripts/analyze-playback-matrix-bottlenecks.js @@ -0,0 +1,249 @@ +#!/usr/bin/env node + +import fs from "node:fs"; +import path from "node:path"; + +function parseArgs(argv) { + const options = { + inputs: [], + output: null, + targetFps: 60, + maxScrubP95Ms: 40, + maxStartupMs: 250, + }; + + for (let i = 2; i < argv.length; i++) { + const arg = argv[i]; + if (arg === "--") continue; + if (arg === "--help" || arg === "-h") { + options.help = true; + continue; + } + if (arg === "--input" || arg === "-i") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --input"); + options.inputs.push(path.resolve(value)); + continue; + } + if (arg === "--output" || arg === "-o") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --output"); + options.output = path.resolve(value); + continue; + } + if (arg === "--target-fps") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value <= 0) { + throw new Error("Invalid --target-fps value"); + } + options.targetFps = value; + continue; + } + if (arg === "--max-scrub-p95-ms") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value <= 0) { + throw new Error("Invalid --max-scrub-p95-ms value"); + } + options.maxScrubP95Ms = value; + continue; + } + if (arg === "--max-startup-ms") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value <= 0) { + throw new Error("Invalid --max-startup-ms value"); + } + options.maxStartupMs = value; + continue; + } + throw new Error(`Unknown argument: ${arg}`); + } + + return options; +} + +function usage() { + console.log(`Usage: node scripts/analyze-playback-matrix-bottlenecks.js --input [--input ...] [--output ] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] + +Analyzes playback matrix JSON outputs and highlights prioritized bottlenecks.`); +} + +function collectJsonFiles(targetPath) { + if (!fs.existsSync(targetPath)) { + throw new Error(`Input path does not exist: ${targetPath}`); + } + const stats = fs.statSync(targetPath); + if (stats.isFile()) { + return targetPath.endsWith(".json") ? [targetPath] : []; + } + const files = []; + for (const entry of fs.readdirSync(targetPath, { withFileTypes: true })) { + const fullPath = path.join(targetPath, entry.name); + if (entry.isDirectory()) { + files.push(...collectJsonFiles(fullPath)); + } else if (entry.isFile() && entry.name.endsWith(".json")) { + files.push(fullPath); + } + } + return files; +} + +function parseNotes(notes) { + if (!notes) return {}; + const result = {}; + for (const token of notes.split(/\s+/)) { + if (!token.includes("=")) continue; + const [key, ...rest] = token.split("="); + const value = rest.join("="); + if (!key || !value) continue; + result[key.trim()] = value.trim(); + } + return result; +} + +function average(values) { + if (values.length === 0) return null; + return values.reduce((acc, value) => acc + value, 0) / values.length; +} + +function max(values) { + if (values.length === 0) return null; + return Math.max(...values); +} + +function scoreIssue(issue, options) { + let score = 0; + if (issue.fpsMin !== null && issue.fpsMin < options.targetFps) { + score += (options.targetFps - issue.fpsMin) * 5; + } + if (issue.scrubP95 !== null && issue.scrubP95 > options.maxScrubP95Ms) { + score += issue.scrubP95 - options.maxScrubP95Ms; + } + if (issue.startupAvg !== null && issue.startupAvg > options.maxStartupMs) { + score += (issue.startupAvg - options.maxStartupMs) / 2; + } + return score; +} + +function formatValue(value, digits = 1) { + return value === null ? "n/a" : value.toFixed(digits); +} + +function collectIssues(files, options) { + const issues = []; + + for (const filePath of files) { + const parsed = JSON.parse(fs.readFileSync(filePath, "utf8")); + const notes = parseNotes(parsed.notes); + const platform = notes.platform ?? "unknown"; + const gpu = notes.gpu ?? "unknown"; + const scenario = notes.scenario ?? "unspecified"; + const reports = Array.isArray(parsed.reports) ? parsed.reports : []; + + for (const report of reports) { + const playback = Array.isArray(report.playback_results) + ? report.playback_results + : []; + const scrub = Array.isArray(report.scrub_results) ? report.scrub_results : []; + + const fpsValues = playback + .map((entry) => entry.effective_fps) + .filter((entry) => typeof entry === "number"); + const startupValues = playback + .map((entry) => entry.startup_to_first_frame_ms) + .filter((entry) => typeof entry === "number"); + const scrubP95Values = scrub + .map((entry) => entry.p95_seek_time_ms) + .filter((entry) => typeof entry === "number"); + + const issue = { + platform, + gpu, + scenario, + recording: report.recording_name ?? path.basename(filePath), + format: report.is_fragmented ? "fragmented" : "mp4", + fpsMin: fpsValues.length ? Math.min(...fpsValues) : null, + startupAvg: average(startupValues), + scrubP95: max(scrubP95Values), + filePath, + }; + issue.score = scoreIssue(issue, options); + if (issue.score > 0) { + issues.push(issue); + } + } + } + + issues.sort((a, b) => b.score - a.score); + return issues; +} + +function recommendation(issue, options) { + const recommendations = []; + if (issue.fpsMin !== null && issue.fpsMin < options.targetFps) { + recommendations.push("inspect decode/render path and frame wait behavior"); + } + if (issue.scrubP95 !== null && issue.scrubP95 > options.maxScrubP95Ms) { + recommendations.push("investigate seek dispatch pressure and decoder reposition cost"); + } + if (issue.startupAvg !== null && issue.startupAvg > options.maxStartupMs) { + recommendations.push("optimize startup warmup and first-frame path"); + } + return recommendations.join("; "); +} + +function buildMarkdown(issues, options) { + let md = ""; + md += "# Playback Matrix Bottleneck Analysis\n\n"; + md += `Generated: ${new Date().toISOString()}\n\n`; + md += `Thresholds: target_fps=${options.targetFps}, max_scrub_p95_ms=${options.maxScrubP95Ms}, max_startup_ms=${options.maxStartupMs}\n\n`; + + if (issues.length === 0) { + md += "No bottlenecks detected for configured thresholds.\n"; + return md; + } + + md += "| Rank | Platform | GPU | Scenario | Recording | Format | FPS(min) | Startup avg(ms) | Scrub p95(ms) | Score | Recommendation |\n"; + md += "|---:|---|---|---|---|---|---:|---:|---:|---:|---|\n"; + issues.forEach((issue, index) => { + md += `| ${index + 1} | ${issue.platform} | ${issue.gpu} | ${issue.scenario} | ${issue.recording} | ${issue.format} | ${formatValue(issue.fpsMin)} | ${formatValue(issue.startupAvg)} | ${formatValue(issue.scrubP95)} | ${formatValue(issue.score, 2)} | ${recommendation(issue, options)} |\n`; + }); + md += "\n"; + return md; +} + +function main() { + const options = parseArgs(process.argv); + if (options.help) { + usage(); + return; + } + if (options.inputs.length === 0) { + throw new Error("At least one --input is required"); + } + + const files = new Set(); + for (const input of options.inputs) { + for (const filePath of collectJsonFiles(input)) { + files.add(filePath); + } + } + if (files.size === 0) { + throw new Error("No JSON files found"); + } + + const issues = collectIssues([...files], options); + const markdown = buildMarkdown(issues, options); + if (options.output) { + fs.writeFileSync(options.output, markdown, "utf8"); + console.log(`Wrote bottleneck analysis to ${options.output}`); + } else { + process.stdout.write(markdown); + } +} + +try { + main(); +} catch (error) { + console.error(error instanceof Error ? error.message : String(error)); + process.exit(1); +} From b18eaf7d477c038235bfc9f4a0e73e654814eaa3 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 21:36:03 +0000 Subject: [PATCH 055/333] improve: scale playback frame polling with frame budget Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/src/playback.rs | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index b535a28b50..b2dbfb1e86 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -174,6 +174,7 @@ cargo run -p cap-recording --example playback-test-runner -- full 8. **Playback frame wait timeout now scales with target FPS (2026-02-13)** - Replaced fixed 200ms frame fetch waits with FPS-derived bounded timeout. - Reduces long stall windows on 60fps playback and improves real-time catch-up behavior. + - In-flight polling interval now scales with frame budget instead of fixed 5ms. 8. **Playback benchmark runner now supports JSON evidence export (2026-02-13)** - `playback-test-runner` supports `--json-output` for structured report emission. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index dacede787f..626b942f7b 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -388,6 +388,10 @@ impl Playback { .mul_f64(4.0) .max(Duration::from_millis(20)) .min(Duration::from_millis(80)); + let in_flight_poll_interval = frame_duration + .mul_f64(0.25) + .max(Duration::from_millis(1)) + .min(Duration::from_millis(4)); let mut frame_number = self.start_frame_number; let mut prefetch_buffer: VecDeque = VecDeque::with_capacity(PREFETCH_BUFFER_SIZE); @@ -571,7 +575,7 @@ impl Playback { prefetch_buffer.push_back(prefetched); } } - _ = tokio::time::sleep(Duration::from_millis(5)) => { + _ = tokio::time::sleep(in_flight_poll_interval) => { let still_in_flight = main_in_flight .read() .map(|guard| guard.contains(&frame_number)) From a453f198acf14806dd01f622bd83495ba0255c62 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:36:03 +0000 Subject: [PATCH 056/333] improve: scale playback frame polling with frame budget Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/src/playback.rs | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index b535a28b50..b2dbfb1e86 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -174,6 +174,7 @@ cargo run -p cap-recording --example playback-test-runner -- full 8. **Playback frame wait timeout now scales with target FPS (2026-02-13)** - Replaced fixed 200ms frame fetch waits with FPS-derived bounded timeout. - Reduces long stall windows on 60fps playback and improves real-time catch-up behavior. + - In-flight polling interval now scales with frame budget instead of fixed 5ms. 8. **Playback benchmark runner now supports JSON evidence export (2026-02-13)** - `playback-test-runner` supports `--json-output` for structured report emission. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index dacede787f..626b942f7b 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -388,6 +388,10 @@ impl Playback { .mul_f64(4.0) .max(Duration::from_millis(20)) .min(Duration::from_millis(80)); + let in_flight_poll_interval = frame_duration + .mul_f64(0.25) + .max(Duration::from_millis(1)) + .min(Duration::from_millis(4)); let mut frame_number = self.start_frame_number; let mut prefetch_buffer: VecDeque = VecDeque::with_capacity(PREFETCH_BUFFER_SIZE); @@ -571,7 +575,7 @@ impl Playback { prefetch_buffer.push_back(prefetched); } } - _ = tokio::time::sleep(Duration::from_millis(5)) => { + _ = tokio::time::sleep(in_flight_poll_interval) => { let still_in_flight = main_in_flight .read() .map(|guard| guard.contains(&frame_number)) From bf9ef4cf138c98c3b138ba70474d5cf7aac80a0a Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 21:37:25 +0000 Subject: [PATCH 057/333] improve: include bottleneck analysis in matrix finalization Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 6 +++ scripts/finalize-playback-matrix.js | 57 +++++++++++++++++++++++- 4 files changed, 63 insertions(+), 2 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index fde0860b97..6fd60faced 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -77,6 +77,7 @@ node scripts/validate-playback-matrix.js --input /path/to/json-results --require # Finalize aggregate + status + validation artifacts node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented +node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 # Publish matrix artifacts into this benchmark history node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index b2dbfb1e86..7e91671450 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -206,6 +206,7 @@ cargo run -p cap-recording --example playback-test-runner -- full 13. **Added matrix finalization helper (2026-02-13)** - `scripts/finalize-playback-matrix.js` generates aggregate markdown, status markdown, and validation JSON in one command. - Supports optional required format enforcement during finalization. + - Also emits bottleneck analysis markdown using configurable FPS/scrub/startup thresholds. 14. **Added matrix summary publisher (2026-02-13)** - `scripts/publish-playback-matrix-summary.js` injects finalized matrix artifacts into playback benchmark history. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 46ff772df8..8c0569be65 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -113,6 +113,12 @@ One-shot finalize command: pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented ``` +Include optimization thresholds when finalizing: + +```bash +pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 +``` + Publish finalized artifacts into benchmark history: ```bash diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index 886f2fc52b..9f281de9c4 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -9,6 +9,10 @@ function parseArgs(argv) { inputs: [], outputDir: null, requireFormats: [], + targetFps: 60, + maxScrubP95Ms: 40, + maxStartupMs: 250, + analyze: true, }; for (let i = 2; i < argv.length; i++) { @@ -38,6 +42,34 @@ function parseArgs(argv) { .filter(Boolean); continue; } + if (arg === "--target-fps") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value <= 0) { + throw new Error("Invalid --target-fps value"); + } + options.targetFps = value; + continue; + } + if (arg === "--max-scrub-p95-ms") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value <= 0) { + throw new Error("Invalid --max-scrub-p95-ms value"); + } + options.maxScrubP95Ms = value; + continue; + } + if (arg === "--max-startup-ms") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value <= 0) { + throw new Error("Invalid --max-startup-ms value"); + } + options.maxStartupMs = value; + continue; + } + if (arg === "--skip-analyze") { + options.analyze = false; + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -45,9 +77,9 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--require-formats mp4,fragmented] + console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] -Generates aggregate markdown, status markdown, and validation JSON for collected playback matrix outputs.`); +Generates aggregate markdown, status markdown, validation JSON, and bottleneck analysis for collected playback matrix outputs.`); } function run(command, args) { @@ -76,6 +108,7 @@ function main() { const aggregatePath = path.join(options.outputDir, "playback-benchmark-aggregate.md"); const statusPath = path.join(options.outputDir, "playback-matrix-status.md"); const validationPath = path.join(options.outputDir, "playback-matrix-validation.json"); + const bottleneckPath = path.join(options.outputDir, "playback-bottlenecks.md"); const aggregateArgs = ["scripts/aggregate-playback-benchmarks.js"]; const statusArgs = ["scripts/build-playback-matrix-report.js"]; @@ -100,10 +133,30 @@ function main() { run("node", aggregateArgs); run("node", statusArgs); run("node", validateArgs); + if (options.analyze) { + const analyzeArgs = ["scripts/analyze-playback-matrix-bottlenecks.js"]; + for (const input of options.inputs) { + analyzeArgs.push("--input", input); + } + analyzeArgs.push( + "--output", + bottleneckPath, + "--target-fps", + String(options.targetFps), + "--max-scrub-p95-ms", + String(options.maxScrubP95Ms), + "--max-startup-ms", + String(options.maxStartupMs), + ); + run("node", analyzeArgs); + } console.log(`Aggregate markdown: ${aggregatePath}`); console.log(`Status markdown: ${statusPath}`); console.log(`Validation JSON: ${validationPath}`); + if (options.analyze) { + console.log(`Bottleneck analysis: ${bottleneckPath}`); + } } try { From ebb2a1a3c4fdbded928548e094b37c5c9f2e1268 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:37:25 +0000 Subject: [PATCH 058/333] improve: include bottleneck analysis in matrix finalization Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 6 +++ scripts/finalize-playback-matrix.js | 57 +++++++++++++++++++++++- 4 files changed, 63 insertions(+), 2 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index fde0860b97..6fd60faced 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -77,6 +77,7 @@ node scripts/validate-playback-matrix.js --input /path/to/json-results --require # Finalize aggregate + status + validation artifacts node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented +node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 # Publish matrix artifacts into this benchmark history node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index b2dbfb1e86..7e91671450 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -206,6 +206,7 @@ cargo run -p cap-recording --example playback-test-runner -- full 13. **Added matrix finalization helper (2026-02-13)** - `scripts/finalize-playback-matrix.js` generates aggregate markdown, status markdown, and validation JSON in one command. - Supports optional required format enforcement during finalization. + - Also emits bottleneck analysis markdown using configurable FPS/scrub/startup thresholds. 14. **Added matrix summary publisher (2026-02-13)** - `scripts/publish-playback-matrix-summary.js` injects finalized matrix artifacts into playback benchmark history. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 46ff772df8..8c0569be65 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -113,6 +113,12 @@ One-shot finalize command: pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented ``` +Include optimization thresholds when finalizing: + +```bash +pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 +``` + Publish finalized artifacts into benchmark history: ```bash diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index 886f2fc52b..9f281de9c4 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -9,6 +9,10 @@ function parseArgs(argv) { inputs: [], outputDir: null, requireFormats: [], + targetFps: 60, + maxScrubP95Ms: 40, + maxStartupMs: 250, + analyze: true, }; for (let i = 2; i < argv.length; i++) { @@ -38,6 +42,34 @@ function parseArgs(argv) { .filter(Boolean); continue; } + if (arg === "--target-fps") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value <= 0) { + throw new Error("Invalid --target-fps value"); + } + options.targetFps = value; + continue; + } + if (arg === "--max-scrub-p95-ms") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value <= 0) { + throw new Error("Invalid --max-scrub-p95-ms value"); + } + options.maxScrubP95Ms = value; + continue; + } + if (arg === "--max-startup-ms") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value <= 0) { + throw new Error("Invalid --max-startup-ms value"); + } + options.maxStartupMs = value; + continue; + } + if (arg === "--skip-analyze") { + options.analyze = false; + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -45,9 +77,9 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--require-formats mp4,fragmented] + console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] -Generates aggregate markdown, status markdown, and validation JSON for collected playback matrix outputs.`); +Generates aggregate markdown, status markdown, validation JSON, and bottleneck analysis for collected playback matrix outputs.`); } function run(command, args) { @@ -76,6 +108,7 @@ function main() { const aggregatePath = path.join(options.outputDir, "playback-benchmark-aggregate.md"); const statusPath = path.join(options.outputDir, "playback-matrix-status.md"); const validationPath = path.join(options.outputDir, "playback-matrix-validation.json"); + const bottleneckPath = path.join(options.outputDir, "playback-bottlenecks.md"); const aggregateArgs = ["scripts/aggregate-playback-benchmarks.js"]; const statusArgs = ["scripts/build-playback-matrix-report.js"]; @@ -100,10 +133,30 @@ function main() { run("node", aggregateArgs); run("node", statusArgs); run("node", validateArgs); + if (options.analyze) { + const analyzeArgs = ["scripts/analyze-playback-matrix-bottlenecks.js"]; + for (const input of options.inputs) { + analyzeArgs.push("--input", input); + } + analyzeArgs.push( + "--output", + bottleneckPath, + "--target-fps", + String(options.targetFps), + "--max-scrub-p95-ms", + String(options.maxScrubP95Ms), + "--max-startup-ms", + String(options.maxStartupMs), + ); + run("node", analyzeArgs); + } console.log(`Aggregate markdown: ${aggregatePath}`); console.log(`Status markdown: ${statusPath}`); console.log(`Validation JSON: ${validationPath}`); + if (options.analyze) { + console.log(`Bottleneck analysis: ${bottleneckPath}`); + } } try { From 34a9abde712c9b1aee93b0003f412aeac1901ef9 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 21:41:14 +0000 Subject: [PATCH 059/333] improve: adapt playback catch-up skipping to sustained lag Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/src/playback.rs | 42 ++++++++++++++++++++++++------ 2 files changed, 35 insertions(+), 8 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 7e91671450..a12b2bae9e 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -175,6 +175,7 @@ cargo run -p cap-recording --example playback-test-runner -- full - Replaced fixed 200ms frame fetch waits with FPS-derived bounded timeout. - Reduces long stall windows on 60fps playback and improves real-time catch-up behavior. - In-flight polling interval now scales with frame budget instead of fixed 5ms. + - Catch-up skip threshold now adapts with late streak depth and logs skip event telemetry. 8. **Playback benchmark runner now supports JSON evidence export (2026-02-13)** - `playback-test-runner` supports `--json-output` for structured report emission. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 626b942f7b..b15e80603a 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -396,10 +396,12 @@ impl Playback { let mut prefetch_buffer: VecDeque = VecDeque::with_capacity(PREFETCH_BUFFER_SIZE); let mut frame_cache = FrameCache::new(FRAME_CACHE_SIZE); - let aggressive_skip_threshold = 10u32; + let base_skip_threshold = (fps / 6).clamp(6, 16); + let mut late_streak = 0u32; + let mut skip_events = 0u64; let mut total_frames_rendered = 0u64; - let mut _total_frames_skipped = 0u64; + let mut total_frames_skipped = 0u64; let mut first_render_logged = false; let mut pending_seek_observation: Option<(u32, Instant)> = None; @@ -604,7 +606,7 @@ impl Playback { )) } else { frame_number = frame_number.saturating_add(1); - _total_frames_skipped += 1; + total_frames_skipped += 1; continue; } } @@ -623,12 +625,12 @@ impl Playback { } else { prefetch_buffer.push_back(prefetched); frame_number = frame_number.saturating_add(1); - _total_frames_skipped += 1; + total_frames_skipped += 1; continue; } } else { frame_number = frame_number.saturating_add(1); - _total_frames_skipped += 1; + total_frames_skipped += 1; continue; } } else { @@ -669,7 +671,7 @@ impl Playback { guard.remove(&frame_number); } frame_number = frame_number.saturating_add(1); - _total_frames_skipped += 1; + total_frames_skipped += 1; continue; }, data = segment_media @@ -775,15 +777,20 @@ impl Playback { if frame_number < expected_frame { let frames_behind = expected_frame - frame_number; + late_streak = late_streak.saturating_add(1); + let threshold_reduction = (late_streak / 12).min(base_skip_threshold); + let dynamic_skip_threshold = + base_skip_threshold.saturating_sub(threshold_reduction); - if frames_behind <= aggressive_skip_threshold { + if frames_behind <= dynamic_skip_threshold { continue; } let skipped = frames_behind.saturating_sub(1); if skipped > 0 { frame_number += skipped; - _total_frames_skipped += skipped as u64; + total_frames_skipped += skipped as u64; + skip_events = skip_events.saturating_add(1); prefetch_buffer.retain(|p| p.frame_number >= frame_number); let _ = frame_request_tx.send(frame_number); @@ -795,10 +802,29 @@ impl Playback { { break 'playback; } + + if skipped >= fps.saturating_div(2) || skip_events % 120 == 0 { + info!( + skipped_frames = skipped, + frames_behind, + dynamic_skip_threshold, + late_streak, + total_frames_skipped, + skip_events, + "Playback applied frame skip catch-up" + ); + } } + } else { + late_streak = 0; } } + info!( + total_frames_rendered, + total_frames_skipped, skip_events, "Playback loop completed" + ); + stop_tx.send(true).ok(); event_tx.send(PlaybackEvent::Stop).ok(); From 555695398e43282b54ed2d45b97369518807913e Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:41:14 +0000 Subject: [PATCH 060/333] improve: adapt playback catch-up skipping to sustained lag Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/src/playback.rs | 42 ++++++++++++++++++++++++------ 2 files changed, 35 insertions(+), 8 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 7e91671450..a12b2bae9e 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -175,6 +175,7 @@ cargo run -p cap-recording --example playback-test-runner -- full - Replaced fixed 200ms frame fetch waits with FPS-derived bounded timeout. - Reduces long stall windows on 60fps playback and improves real-time catch-up behavior. - In-flight polling interval now scales with frame budget instead of fixed 5ms. + - Catch-up skip threshold now adapts with late streak depth and logs skip event telemetry. 8. **Playback benchmark runner now supports JSON evidence export (2026-02-13)** - `playback-test-runner` supports `--json-output` for structured report emission. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 626b942f7b..b15e80603a 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -396,10 +396,12 @@ impl Playback { let mut prefetch_buffer: VecDeque = VecDeque::with_capacity(PREFETCH_BUFFER_SIZE); let mut frame_cache = FrameCache::new(FRAME_CACHE_SIZE); - let aggressive_skip_threshold = 10u32; + let base_skip_threshold = (fps / 6).clamp(6, 16); + let mut late_streak = 0u32; + let mut skip_events = 0u64; let mut total_frames_rendered = 0u64; - let mut _total_frames_skipped = 0u64; + let mut total_frames_skipped = 0u64; let mut first_render_logged = false; let mut pending_seek_observation: Option<(u32, Instant)> = None; @@ -604,7 +606,7 @@ impl Playback { )) } else { frame_number = frame_number.saturating_add(1); - _total_frames_skipped += 1; + total_frames_skipped += 1; continue; } } @@ -623,12 +625,12 @@ impl Playback { } else { prefetch_buffer.push_back(prefetched); frame_number = frame_number.saturating_add(1); - _total_frames_skipped += 1; + total_frames_skipped += 1; continue; } } else { frame_number = frame_number.saturating_add(1); - _total_frames_skipped += 1; + total_frames_skipped += 1; continue; } } else { @@ -669,7 +671,7 @@ impl Playback { guard.remove(&frame_number); } frame_number = frame_number.saturating_add(1); - _total_frames_skipped += 1; + total_frames_skipped += 1; continue; }, data = segment_media @@ -775,15 +777,20 @@ impl Playback { if frame_number < expected_frame { let frames_behind = expected_frame - frame_number; + late_streak = late_streak.saturating_add(1); + let threshold_reduction = (late_streak / 12).min(base_skip_threshold); + let dynamic_skip_threshold = + base_skip_threshold.saturating_sub(threshold_reduction); - if frames_behind <= aggressive_skip_threshold { + if frames_behind <= dynamic_skip_threshold { continue; } let skipped = frames_behind.saturating_sub(1); if skipped > 0 { frame_number += skipped; - _total_frames_skipped += skipped as u64; + total_frames_skipped += skipped as u64; + skip_events = skip_events.saturating_add(1); prefetch_buffer.retain(|p| p.frame_number >= frame_number); let _ = frame_request_tx.send(frame_number); @@ -795,10 +802,29 @@ impl Playback { { break 'playback; } + + if skipped >= fps.saturating_div(2) || skip_events % 120 == 0 { + info!( + skipped_frames = skipped, + frames_behind, + dynamic_skip_threshold, + late_streak, + total_frames_skipped, + skip_events, + "Playback applied frame skip catch-up" + ); + } } + } else { + late_streak = 0; } } + info!( + total_frames_rendered, + total_frames_skipped, skip_events, "Playback loop completed" + ); + stop_tx.send(true).ok(); event_tx.send(PlaybackEvent::Stop).ok(); From 916bf0656cd5af097c3ae8eb1ee52c42f2d2f544 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 21:42:39 +0000 Subject: [PATCH 061/333] improve: allow bottleneck attachment in matrix summary publishing Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 2 +- crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 3 ++- scripts/publish-playback-matrix-summary.js | 28 +++++++++++++++++++--- 4 files changed, 29 insertions(+), 5 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 6fd60faced..9a0067d5a6 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -80,7 +80,7 @@ node scripts/finalize-playback-matrix.js --input /path/to/json-results --output- node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 # Publish matrix artifacts into this benchmark history -node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json +node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --bottlenecks-md /tmp/playback-matrix-final/playback-bottlenecks.md # Analyze bottlenecks from matrix results node scripts/analyze-playback-matrix-bottlenecks.js --input /path/to/json-results --output /tmp/playback-bottlenecks.md --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index a12b2bae9e..eb23355cc1 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -212,6 +212,7 @@ cargo run -p cap-recording --example playback-test-runner -- full 14. **Added matrix summary publisher (2026-02-13)** - `scripts/publish-playback-matrix-summary.js` injects finalized matrix artifacts into playback benchmark history. - Keeps matrix evidence updates consistent and repeatable. + - Supports optional bottleneck analysis attachment in published summary. 15. **Added bottleneck analyzer for continuous FPS optimization (2026-02-13)** - `scripts/analyze-playback-matrix-bottlenecks.js` ranks matrix cells by FPS, startup, and scrub threshold breaches. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 8c0569be65..4daf71ab12 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -125,7 +125,8 @@ Publish finalized artifacts into benchmark history: pnpm bench:playback:publish -- \ --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md \ --status-md /tmp/playback-matrix-final/playback-matrix-status.md \ - --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json + --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json \ + --bottlenecks-md /tmp/playback-matrix-final/playback-bottlenecks.md ``` Generate bottleneck analysis for optimization backlog: diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js index 0ee89fc8fe..45fb7fe107 100644 --- a/scripts/publish-playback-matrix-summary.js +++ b/scripts/publish-playback-matrix-summary.js @@ -7,6 +7,7 @@ function parseArgs(argv) { const options = { aggregateMd: null, statusMd: null, + bottlenecksMd: null, validationJson: null, target: path.resolve("crates/editor/PLAYBACK-BENCHMARKS.md"), }; @@ -26,6 +27,10 @@ function parseArgs(argv) { options.statusMd = path.resolve(argv[++i] ?? ""); continue; } + if (arg === "--bottlenecks-md") { + options.bottlenecksMd = path.resolve(argv[++i] ?? ""); + continue; + } if (arg === "--validation-json") { options.validationJson = path.resolve(argv[++i] ?? ""); continue; @@ -41,7 +46,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/publish-playback-matrix-summary.js --aggregate-md --status-md --validation-json [--target ] + console.log(`Usage: node scripts/publish-playback-matrix-summary.js --aggregate-md --status-md --validation-json [--bottlenecks-md ] [--target ] Prepends a matrix summary section into PLAYBACK-BENCHMARKS.md benchmark history region.`); } @@ -52,7 +57,7 @@ function ensureFile(filePath, label) { } } -function buildSummarySection(aggregateMd, statusMd, validationJson) { +function buildSummarySection(aggregateMd, statusMd, validationJson, bottlenecksMd) { const now = new Date().toISOString(); const validation = JSON.parse(validationJson); const status = validation.passed ? "✅ MATRIX PASS" : "❌ MATRIX FAIL"; @@ -89,6 +94,12 @@ function buildSummarySection(aggregateMd, statusMd, validationJson) { markdown += `${aggregateMd.trim()}\n\n`; markdown += "\n\n"; + if (bottlenecksMd) { + markdown += "
\nBottleneck Analysis\n\n"; + markdown += `${bottlenecksMd.trim()}\n\n`; + markdown += "
\n\n"; + } + return markdown; } @@ -121,12 +132,23 @@ function main() { ensureFile(options.aggregateMd, "Aggregate markdown"); ensureFile(options.statusMd, "Status markdown"); ensureFile(options.validationJson, "Validation JSON"); + if (options.bottlenecksMd) { + ensureFile(options.bottlenecksMd, "Bottlenecks markdown"); + } ensureFile(options.target, "Target"); const aggregateMd = fs.readFileSync(options.aggregateMd, "utf8"); const statusMd = fs.readFileSync(options.statusMd, "utf8"); const validationJson = fs.readFileSync(options.validationJson, "utf8"); - const summaryMd = buildSummarySection(aggregateMd, statusMd, validationJson); + const bottlenecksMd = options.bottlenecksMd + ? fs.readFileSync(options.bottlenecksMd, "utf8") + : null; + const summaryMd = buildSummarySection( + aggregateMd, + statusMd, + validationJson, + bottlenecksMd, + ); writeToBenchmarkHistory(options.target, summaryMd); console.log(`Published matrix summary into ${options.target}`); } From 1805d2ddfbdaa863a8a4ec9bafb7b5aecf1b964b Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:42:39 +0000 Subject: [PATCH 062/333] improve: allow bottleneck attachment in matrix summary publishing Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 2 +- crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 3 ++- scripts/publish-playback-matrix-summary.js | 28 +++++++++++++++++++--- 4 files changed, 29 insertions(+), 5 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 6fd60faced..9a0067d5a6 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -80,7 +80,7 @@ node scripts/finalize-playback-matrix.js --input /path/to/json-results --output- node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 # Publish matrix artifacts into this benchmark history -node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json +node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --bottlenecks-md /tmp/playback-matrix-final/playback-bottlenecks.md # Analyze bottlenecks from matrix results node scripts/analyze-playback-matrix-bottlenecks.js --input /path/to/json-results --output /tmp/playback-bottlenecks.md --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index a12b2bae9e..eb23355cc1 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -212,6 +212,7 @@ cargo run -p cap-recording --example playback-test-runner -- full 14. **Added matrix summary publisher (2026-02-13)** - `scripts/publish-playback-matrix-summary.js` injects finalized matrix artifacts into playback benchmark history. - Keeps matrix evidence updates consistent and repeatable. + - Supports optional bottleneck analysis attachment in published summary. 15. **Added bottleneck analyzer for continuous FPS optimization (2026-02-13)** - `scripts/analyze-playback-matrix-bottlenecks.js` ranks matrix cells by FPS, startup, and scrub threshold breaches. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 8c0569be65..4daf71ab12 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -125,7 +125,8 @@ Publish finalized artifacts into benchmark history: pnpm bench:playback:publish -- \ --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md \ --status-md /tmp/playback-matrix-final/playback-matrix-status.md \ - --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json + --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json \ + --bottlenecks-md /tmp/playback-matrix-final/playback-bottlenecks.md ``` Generate bottleneck analysis for optimization backlog: diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js index 0ee89fc8fe..45fb7fe107 100644 --- a/scripts/publish-playback-matrix-summary.js +++ b/scripts/publish-playback-matrix-summary.js @@ -7,6 +7,7 @@ function parseArgs(argv) { const options = { aggregateMd: null, statusMd: null, + bottlenecksMd: null, validationJson: null, target: path.resolve("crates/editor/PLAYBACK-BENCHMARKS.md"), }; @@ -26,6 +27,10 @@ function parseArgs(argv) { options.statusMd = path.resolve(argv[++i] ?? ""); continue; } + if (arg === "--bottlenecks-md") { + options.bottlenecksMd = path.resolve(argv[++i] ?? ""); + continue; + } if (arg === "--validation-json") { options.validationJson = path.resolve(argv[++i] ?? ""); continue; @@ -41,7 +46,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/publish-playback-matrix-summary.js --aggregate-md --status-md --validation-json [--target ] + console.log(`Usage: node scripts/publish-playback-matrix-summary.js --aggregate-md --status-md --validation-json [--bottlenecks-md ] [--target ] Prepends a matrix summary section into PLAYBACK-BENCHMARKS.md benchmark history region.`); } @@ -52,7 +57,7 @@ function ensureFile(filePath, label) { } } -function buildSummarySection(aggregateMd, statusMd, validationJson) { +function buildSummarySection(aggregateMd, statusMd, validationJson, bottlenecksMd) { const now = new Date().toISOString(); const validation = JSON.parse(validationJson); const status = validation.passed ? "✅ MATRIX PASS" : "❌ MATRIX FAIL"; @@ -89,6 +94,12 @@ function buildSummarySection(aggregateMd, statusMd, validationJson) { markdown += `${aggregateMd.trim()}\n\n`; markdown += "\n\n"; + if (bottlenecksMd) { + markdown += "
\nBottleneck Analysis\n\n"; + markdown += `${bottlenecksMd.trim()}\n\n`; + markdown += "
\n\n"; + } + return markdown; } @@ -121,12 +132,23 @@ function main() { ensureFile(options.aggregateMd, "Aggregate markdown"); ensureFile(options.statusMd, "Status markdown"); ensureFile(options.validationJson, "Validation JSON"); + if (options.bottlenecksMd) { + ensureFile(options.bottlenecksMd, "Bottlenecks markdown"); + } ensureFile(options.target, "Target"); const aggregateMd = fs.readFileSync(options.aggregateMd, "utf8"); const statusMd = fs.readFileSync(options.statusMd, "utf8"); const validationJson = fs.readFileSync(options.validationJson, "utf8"); - const summaryMd = buildSummarySection(aggregateMd, statusMd, validationJson); + const bottlenecksMd = options.bottlenecksMd + ? fs.readFileSync(options.bottlenecksMd, "utf8") + : null; + const summaryMd = buildSummarySection( + aggregateMd, + statusMd, + validationJson, + bottlenecksMd, + ); writeToBenchmarkHistory(options.target, summaryMd); console.log(`Published matrix summary into ${options.target}`); } From da918985fd3e236b4cf9e8f86915372b212c9862 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 21:43:52 +0000 Subject: [PATCH 063/333] improve: scale playback warmup buffering with fps Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/src/playback.rs | 12 ++++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index eb23355cc1..03adc437d3 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -176,6 +176,7 @@ cargo run -p cap-recording --example playback-test-runner -- full - Reduces long stall windows on 60fps playback and improves real-time catch-up behavior. - In-flight polling interval now scales with frame budget instead of fixed 5ms. - Catch-up skip threshold now adapts with late streak depth and logs skip event telemetry. + - Warmup target and warmup timeout now scale with FPS, reducing startup buffering overhead. 8. **Playback benchmark runner now supports JSON evidence export (2026-02-13)** - `playback-test-runner` supports `--json-output` for structured report emission. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index b15e80603a..80596cbe7f 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -405,11 +405,19 @@ impl Playback { let mut first_render_logged = false; let mut pending_seek_observation: Option<(u32, Instant)> = None; - let warmup_target_frames = 20usize; - let warmup_after_first_timeout = Duration::from_millis(1000); + let warmup_target_frames = (fps.saturating_div(4)).clamp(8, 16) as usize; + let warmup_after_first_timeout = frame_duration + .mul_f64((warmup_target_frames as f64) * 2.0) + .max(Duration::from_millis(200)) + .min(Duration::from_millis(700)); let warmup_no_frames_timeout = Duration::from_secs(5); let warmup_start = Instant::now(); let mut first_frame_time: Option = None; + info!( + warmup_target_frames, + warmup_after_first_timeout_ms = warmup_after_first_timeout.as_secs_f64() * 1000.0, + "Playback warmup configuration" + ); while !*stop_rx.borrow() { let should_start = if let Some(first_time) = first_frame_time { From db79fc444e3edd802631e9176f874e3fc20a96b9 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:43:52 +0000 Subject: [PATCH 064/333] improve: scale playback warmup buffering with fps Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/src/playback.rs | 12 ++++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index eb23355cc1..03adc437d3 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -176,6 +176,7 @@ cargo run -p cap-recording --example playback-test-runner -- full - Reduces long stall windows on 60fps playback and improves real-time catch-up behavior. - In-flight polling interval now scales with frame budget instead of fixed 5ms. - Catch-up skip threshold now adapts with late streak depth and logs skip event telemetry. + - Warmup target and warmup timeout now scale with FPS, reducing startup buffering overhead. 8. **Playback benchmark runner now supports JSON evidence export (2026-02-13)** - `playback-test-runner` supports `--json-output` for structured report emission. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index b15e80603a..80596cbe7f 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -405,11 +405,19 @@ impl Playback { let mut first_render_logged = false; let mut pending_seek_observation: Option<(u32, Instant)> = None; - let warmup_target_frames = 20usize; - let warmup_after_first_timeout = Duration::from_millis(1000); + let warmup_target_frames = (fps.saturating_div(4)).clamp(8, 16) as usize; + let warmup_after_first_timeout = frame_duration + .mul_f64((warmup_target_frames as f64) * 2.0) + .max(Duration::from_millis(200)) + .min(Duration::from_millis(700)); let warmup_no_frames_timeout = Duration::from_secs(5); let warmup_start = Instant::now(); let mut first_frame_time: Option = None; + info!( + warmup_target_frames, + warmup_after_first_timeout_ms = warmup_after_first_timeout.as_secs_f64() * 1000.0, + "Playback warmup configuration" + ); while !*stop_rx.borrow() { let should_start = if let Some(first_time) = first_frame_time { From 9ce4ebe405a3ad0da2e65ff8bf4e3e634653e29c Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 21:45:09 +0000 Subject: [PATCH 065/333] improve: support publish-target in matrix finalization Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 6 +++++ scripts/finalize-playback-matrix.js | 29 +++++++++++++++++++++++- 4 files changed, 36 insertions(+), 1 deletion(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 9a0067d5a6..b3d485801d 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -78,6 +78,7 @@ node scripts/validate-playback-matrix.js --input /path/to/json-results --require # Finalize aggregate + status + validation artifacts node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 +node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 --publish-target /workspace/crates/editor/PLAYBACK-BENCHMARKS.md # Publish matrix artifacts into this benchmark history node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --bottlenecks-md /tmp/playback-matrix-final/playback-bottlenecks.md diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 03adc437d3..4ad22c5d7a 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -209,6 +209,7 @@ cargo run -p cap-recording --example playback-test-runner -- full - `scripts/finalize-playback-matrix.js` generates aggregate markdown, status markdown, and validation JSON in one command. - Supports optional required format enforcement during finalization. - Also emits bottleneck analysis markdown using configurable FPS/scrub/startup thresholds. + - Can optionally publish finalized artifacts directly into benchmark history target. 14. **Added matrix summary publisher (2026-02-13)** - `scripts/publish-playback-matrix-summary.js` injects finalized matrix artifacts into playback benchmark history. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 4daf71ab12..abd991a5f2 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -119,6 +119,12 @@ Include optimization thresholds when finalizing: pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 ``` +Finalize and publish to benchmark history in one command: + +```bash +pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 --publish-target /workspace/crates/editor/PLAYBACK-BENCHMARKS.md +``` + Publish finalized artifacts into benchmark history: ```bash diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index 9f281de9c4..14ea83a436 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -13,6 +13,7 @@ function parseArgs(argv) { maxScrubP95Ms: 40, maxStartupMs: 250, analyze: true, + publishTarget: null, }; for (let i = 2; i < argv.length; i++) { @@ -70,6 +71,12 @@ function parseArgs(argv) { options.analyze = false; continue; } + if (arg === "--publish-target") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --publish-target"); + options.publishTarget = path.resolve(value); + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -77,7 +84,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] + console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--publish-target ] Generates aggregate markdown, status markdown, validation JSON, and bottleneck analysis for collected playback matrix outputs.`); } @@ -150,6 +157,23 @@ function main() { ); run("node", analyzeArgs); } + if (options.publishTarget) { + const publishArgs = [ + "scripts/publish-playback-matrix-summary.js", + "--aggregate-md", + aggregatePath, + "--status-md", + statusPath, + "--validation-json", + validationPath, + "--target", + options.publishTarget, + ]; + if (options.analyze) { + publishArgs.push("--bottlenecks-md", bottleneckPath); + } + run("node", publishArgs); + } console.log(`Aggregate markdown: ${aggregatePath}`); console.log(`Status markdown: ${statusPath}`); @@ -157,6 +181,9 @@ function main() { if (options.analyze) { console.log(`Bottleneck analysis: ${bottleneckPath}`); } + if (options.publishTarget) { + console.log(`Published target: ${options.publishTarget}`); + } } try { From 1e7036783bd71ded474946b6eae26ec2ba5de583 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:45:09 +0000 Subject: [PATCH 066/333] improve: support publish-target in matrix finalization Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 6 +++++ scripts/finalize-playback-matrix.js | 29 +++++++++++++++++++++++- 4 files changed, 36 insertions(+), 1 deletion(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 9a0067d5a6..b3d485801d 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -78,6 +78,7 @@ node scripts/validate-playback-matrix.js --input /path/to/json-results --require # Finalize aggregate + status + validation artifacts node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 +node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 --publish-target /workspace/crates/editor/PLAYBACK-BENCHMARKS.md # Publish matrix artifacts into this benchmark history node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --bottlenecks-md /tmp/playback-matrix-final/playback-bottlenecks.md diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 03adc437d3..4ad22c5d7a 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -209,6 +209,7 @@ cargo run -p cap-recording --example playback-test-runner -- full - `scripts/finalize-playback-matrix.js` generates aggregate markdown, status markdown, and validation JSON in one command. - Supports optional required format enforcement during finalization. - Also emits bottleneck analysis markdown using configurable FPS/scrub/startup thresholds. + - Can optionally publish finalized artifacts directly into benchmark history target. 14. **Added matrix summary publisher (2026-02-13)** - `scripts/publish-playback-matrix-summary.js` injects finalized matrix artifacts into playback benchmark history. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 4daf71ab12..abd991a5f2 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -119,6 +119,12 @@ Include optimization thresholds when finalizing: pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 ``` +Finalize and publish to benchmark history in one command: + +```bash +pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 --publish-target /workspace/crates/editor/PLAYBACK-BENCHMARKS.md +``` + Publish finalized artifacts into benchmark history: ```bash diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index 9f281de9c4..14ea83a436 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -13,6 +13,7 @@ function parseArgs(argv) { maxScrubP95Ms: 40, maxStartupMs: 250, analyze: true, + publishTarget: null, }; for (let i = 2; i < argv.length; i++) { @@ -70,6 +71,12 @@ function parseArgs(argv) { options.analyze = false; continue; } + if (arg === "--publish-target") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --publish-target"); + options.publishTarget = path.resolve(value); + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -77,7 +84,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] + console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--publish-target ] Generates aggregate markdown, status markdown, validation JSON, and bottleneck analysis for collected playback matrix outputs.`); } @@ -150,6 +157,23 @@ function main() { ); run("node", analyzeArgs); } + if (options.publishTarget) { + const publishArgs = [ + "scripts/publish-playback-matrix-summary.js", + "--aggregate-md", + aggregatePath, + "--status-md", + statusPath, + "--validation-json", + validationPath, + "--target", + options.publishTarget, + ]; + if (options.analyze) { + publishArgs.push("--bottlenecks-md", bottleneckPath); + } + run("node", publishArgs); + } console.log(`Aggregate markdown: ${aggregatePath}`); console.log(`Status markdown: ${statusPath}`); @@ -157,6 +181,9 @@ function main() { if (options.analyze) { console.log(`Bottleneck analysis: ${bottleneckPath}`); } + if (options.publishTarget) { + console.log(`Published target: ${options.publishTarget}`); + } } try { From 89335c082fd72455fbc10572bf35fb6a1e6ef788 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 21:46:27 +0000 Subject: [PATCH 067/333] improve: scale prefetch windows with playback fps Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/src/playback.rs | 14 +++++++++----- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 4ad22c5d7a..2354fad87b 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -177,6 +177,7 @@ cargo run -p cap-recording --example playback-test-runner -- full - In-flight polling interval now scales with frame budget instead of fixed 5ms. - Catch-up skip threshold now adapts with late streak depth and logs skip event telemetry. - Warmup target and warmup timeout now scale with FPS, reducing startup buffering overhead. + - Prefetch ahead/behind windows now scale with FPS to reduce unnecessary decode pressure at lower targets. 8. **Playback benchmark runner now supports JSON evidence export (2026-02-13)** - `playback-test-runner` supports `--json-output` for structured report emission. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 80596cbe7f..b50c11be3a 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -37,8 +37,6 @@ use crate::{ const PREFETCH_BUFFER_SIZE: usize = 60; const PARALLEL_DECODE_TASKS: usize = 4; -const MAX_PREFETCH_AHEAD: u32 = 60; -const PREFETCH_BEHIND: u32 = 15; const FRAME_CACHE_SIZE: usize = 60; #[derive(Debug)] @@ -168,8 +166,14 @@ impl Playback { let mut prefetched_behind: HashSet = HashSet::new(); const INITIAL_PARALLEL_TASKS: usize = 4; const RAMP_UP_AFTER_FRAMES: u32 = 5; + let dynamic_prefetch_ahead = fps.clamp(30, 90); + let dynamic_prefetch_behind = (fps / 4).clamp(8, 24); let mut cached_project = prefetch_project.borrow().clone(); + info!( + dynamic_prefetch_ahead, + dynamic_prefetch_behind, "Prefetch window configuration" + ); loop { if *prefetch_stop_rx.borrow() { @@ -199,14 +203,14 @@ impl Playback { in_flight_guard.clear(); } - if is_backward_seek || seek_distance > MAX_PREFETCH_AHEAD / 2 { + if is_backward_seek || seek_distance > dynamic_prefetch_ahead / 2 { in_flight = FuturesUnordered::new(); } } } let current_playback_frame = *playback_position_rx.borrow(); - let max_prefetch_frame = current_playback_frame + MAX_PREFETCH_AHEAD; + let max_prefetch_frame = current_playback_frame + dynamic_prefetch_ahead; let effective_parallel = if frames_decoded < RAMP_UP_AFTER_FRAMES { INITIAL_PARALLEL_TASKS @@ -279,7 +283,7 @@ impl Playback { } if in_flight.len() < effective_parallel { - for behind_offset in 1..=PREFETCH_BEHIND { + for behind_offset in 1..=dynamic_prefetch_behind { if in_flight.len() >= effective_parallel { break; } From 2e18292a5063541046bc310d72e31bab9edb6dbc Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:46:27 +0000 Subject: [PATCH 068/333] improve: scale prefetch windows with playback fps Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/src/playback.rs | 14 +++++++++----- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 4ad22c5d7a..2354fad87b 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -177,6 +177,7 @@ cargo run -p cap-recording --example playback-test-runner -- full - In-flight polling interval now scales with frame budget instead of fixed 5ms. - Catch-up skip threshold now adapts with late streak depth and logs skip event telemetry. - Warmup target and warmup timeout now scale with FPS, reducing startup buffering overhead. + - Prefetch ahead/behind windows now scale with FPS to reduce unnecessary decode pressure at lower targets. 8. **Playback benchmark runner now supports JSON evidence export (2026-02-13)** - `playback-test-runner` supports `--json-output` for structured report emission. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 80596cbe7f..b50c11be3a 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -37,8 +37,6 @@ use crate::{ const PREFETCH_BUFFER_SIZE: usize = 60; const PARALLEL_DECODE_TASKS: usize = 4; -const MAX_PREFETCH_AHEAD: u32 = 60; -const PREFETCH_BEHIND: u32 = 15; const FRAME_CACHE_SIZE: usize = 60; #[derive(Debug)] @@ -168,8 +166,14 @@ impl Playback { let mut prefetched_behind: HashSet = HashSet::new(); const INITIAL_PARALLEL_TASKS: usize = 4; const RAMP_UP_AFTER_FRAMES: u32 = 5; + let dynamic_prefetch_ahead = fps.clamp(30, 90); + let dynamic_prefetch_behind = (fps / 4).clamp(8, 24); let mut cached_project = prefetch_project.borrow().clone(); + info!( + dynamic_prefetch_ahead, + dynamic_prefetch_behind, "Prefetch window configuration" + ); loop { if *prefetch_stop_rx.borrow() { @@ -199,14 +203,14 @@ impl Playback { in_flight_guard.clear(); } - if is_backward_seek || seek_distance > MAX_PREFETCH_AHEAD / 2 { + if is_backward_seek || seek_distance > dynamic_prefetch_ahead / 2 { in_flight = FuturesUnordered::new(); } } } let current_playback_frame = *playback_position_rx.borrow(); - let max_prefetch_frame = current_playback_frame + MAX_PREFETCH_AHEAD; + let max_prefetch_frame = current_playback_frame + dynamic_prefetch_ahead; let effective_parallel = if frames_decoded < RAMP_UP_AFTER_FRAMES { INITIAL_PARALLEL_TASKS @@ -279,7 +283,7 @@ impl Playback { } if in_flight.len() < effective_parallel { - for behind_offset in 1..=PREFETCH_BEHIND { + for behind_offset in 1..=dynamic_prefetch_behind { if in_flight.len() >= effective_parallel { break; } From 51602a437b1fde5e0dbd00c9aa29c0e278562ff2 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 21:48:07 +0000 Subject: [PATCH 069/333] improve: emit structured bottleneck artifacts in matrix analysis Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 1 + .../analyze-playback-matrix-bottlenecks.js | 34 ++++++++++++++++++- scripts/finalize-playback-matrix.js | 4 +++ 5 files changed, 40 insertions(+), 1 deletion(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index b3d485801d..819746eec8 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -85,6 +85,7 @@ node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-mat # Analyze bottlenecks from matrix results node scripts/analyze-playback-matrix-bottlenecks.js --input /path/to/json-results --output /tmp/playback-bottlenecks.md --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 +node scripts/analyze-playback-matrix-bottlenecks.js --input /path/to/json-results --output /tmp/playback-bottlenecks.md --output-json /tmp/playback-bottlenecks.json --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 2354fad87b..dec6cf81b7 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -220,6 +220,7 @@ cargo run -p cap-recording --example playback-test-runner -- full 15. **Added bottleneck analyzer for continuous FPS optimization (2026-02-13)** - `scripts/analyze-playback-matrix-bottlenecks.js` ranks matrix cells by FPS, startup, and scrub threshold breaches. - Produces prioritized optimization backlog from real matrix evidence. + - Supports structured JSON output for automation and regression tracking. --- diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index abd991a5f2..e1503dd5f0 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -139,6 +139,7 @@ Generate bottleneck analysis for optimization backlog: ```bash pnpm bench:playback:analyze -- --input /path/to/all-machine-results --output /tmp/playback-matrix-final/playback-bottlenecks.md --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 +pnpm bench:playback:analyze -- --input /path/to/all-machine-results --output /tmp/playback-matrix-final/playback-bottlenecks.md --output-json /tmp/playback-matrix-final/playback-bottlenecks.json --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 ``` ## Evidence checklist diff --git a/scripts/analyze-playback-matrix-bottlenecks.js b/scripts/analyze-playback-matrix-bottlenecks.js index 74ca0b3294..ac91d0c8ce 100644 --- a/scripts/analyze-playback-matrix-bottlenecks.js +++ b/scripts/analyze-playback-matrix-bottlenecks.js @@ -7,6 +7,7 @@ function parseArgs(argv) { const options = { inputs: [], output: null, + outputJson: null, targetFps: 60, maxScrubP95Ms: 40, maxStartupMs: 250, @@ -31,6 +32,12 @@ function parseArgs(argv) { options.output = path.resolve(value); continue; } + if (arg === "--output-json") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --output-json"); + options.outputJson = path.resolve(value); + continue; + } if (arg === "--target-fps") { const value = Number.parseFloat(argv[++i] ?? ""); if (!Number.isFinite(value) || value <= 0) { @@ -62,7 +69,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/analyze-playback-matrix-bottlenecks.js --input [--input ...] [--output ] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] + console.log(`Usage: node scripts/analyze-playback-matrix-bottlenecks.js --input [--input ...] [--output ] [--output-json ] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] Analyzes playback matrix JSON outputs and highlights prioritized bottlenecks.`); } @@ -211,6 +218,23 @@ function buildMarkdown(issues, options) { return md; } +function buildJson(issues, options) { + return { + generatedAt: new Date().toISOString(), + thresholds: { + targetFps: options.targetFps, + maxScrubP95Ms: options.maxScrubP95Ms, + maxStartupMs: options.maxStartupMs, + }, + issueCount: issues.length, + issues: issues.map((issue, index) => ({ + rank: index + 1, + ...issue, + recommendation: recommendation(issue, options), + })), + }; +} + function main() { const options = parseArgs(process.argv); if (options.help) { @@ -239,6 +263,14 @@ function main() { } else { process.stdout.write(markdown); } + if (options.outputJson) { + fs.writeFileSync( + options.outputJson, + JSON.stringify(buildJson(issues, options), null, 2), + "utf8", + ); + console.log(`Wrote bottleneck analysis JSON to ${options.outputJson}`); + } } try { diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index 14ea83a436..d542d6f1b5 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -116,6 +116,7 @@ function main() { const statusPath = path.join(options.outputDir, "playback-matrix-status.md"); const validationPath = path.join(options.outputDir, "playback-matrix-validation.json"); const bottleneckPath = path.join(options.outputDir, "playback-bottlenecks.md"); + const bottleneckJsonPath = path.join(options.outputDir, "playback-bottlenecks.json"); const aggregateArgs = ["scripts/aggregate-playback-benchmarks.js"]; const statusArgs = ["scripts/build-playback-matrix-report.js"]; @@ -148,6 +149,8 @@ function main() { analyzeArgs.push( "--output", bottleneckPath, + "--output-json", + bottleneckJsonPath, "--target-fps", String(options.targetFps), "--max-scrub-p95-ms", @@ -180,6 +183,7 @@ function main() { console.log(`Validation JSON: ${validationPath}`); if (options.analyze) { console.log(`Bottleneck analysis: ${bottleneckPath}`); + console.log(`Bottleneck analysis JSON: ${bottleneckJsonPath}`); } if (options.publishTarget) { console.log(`Published target: ${options.publishTarget}`); From 778d98b2d8b2b5fbd4ed2310a5961e2255d2e56a Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:48:07 +0000 Subject: [PATCH 070/333] improve: emit structured bottleneck artifacts in matrix analysis Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 1 + .../analyze-playback-matrix-bottlenecks.js | 34 ++++++++++++++++++- scripts/finalize-playback-matrix.js | 4 +++ 5 files changed, 40 insertions(+), 1 deletion(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index b3d485801d..819746eec8 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -85,6 +85,7 @@ node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-mat # Analyze bottlenecks from matrix results node scripts/analyze-playback-matrix-bottlenecks.js --input /path/to/json-results --output /tmp/playback-bottlenecks.md --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 +node scripts/analyze-playback-matrix-bottlenecks.js --input /path/to/json-results --output /tmp/playback-bottlenecks.md --output-json /tmp/playback-bottlenecks.json --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 2354fad87b..dec6cf81b7 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -220,6 +220,7 @@ cargo run -p cap-recording --example playback-test-runner -- full 15. **Added bottleneck analyzer for continuous FPS optimization (2026-02-13)** - `scripts/analyze-playback-matrix-bottlenecks.js` ranks matrix cells by FPS, startup, and scrub threshold breaches. - Produces prioritized optimization backlog from real matrix evidence. + - Supports structured JSON output for automation and regression tracking. --- diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index abd991a5f2..e1503dd5f0 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -139,6 +139,7 @@ Generate bottleneck analysis for optimization backlog: ```bash pnpm bench:playback:analyze -- --input /path/to/all-machine-results --output /tmp/playback-matrix-final/playback-bottlenecks.md --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 +pnpm bench:playback:analyze -- --input /path/to/all-machine-results --output /tmp/playback-matrix-final/playback-bottlenecks.md --output-json /tmp/playback-matrix-final/playback-bottlenecks.json --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 ``` ## Evidence checklist diff --git a/scripts/analyze-playback-matrix-bottlenecks.js b/scripts/analyze-playback-matrix-bottlenecks.js index 74ca0b3294..ac91d0c8ce 100644 --- a/scripts/analyze-playback-matrix-bottlenecks.js +++ b/scripts/analyze-playback-matrix-bottlenecks.js @@ -7,6 +7,7 @@ function parseArgs(argv) { const options = { inputs: [], output: null, + outputJson: null, targetFps: 60, maxScrubP95Ms: 40, maxStartupMs: 250, @@ -31,6 +32,12 @@ function parseArgs(argv) { options.output = path.resolve(value); continue; } + if (arg === "--output-json") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --output-json"); + options.outputJson = path.resolve(value); + continue; + } if (arg === "--target-fps") { const value = Number.parseFloat(argv[++i] ?? ""); if (!Number.isFinite(value) || value <= 0) { @@ -62,7 +69,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/analyze-playback-matrix-bottlenecks.js --input [--input ...] [--output ] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] + console.log(`Usage: node scripts/analyze-playback-matrix-bottlenecks.js --input [--input ...] [--output ] [--output-json ] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] Analyzes playback matrix JSON outputs and highlights prioritized bottlenecks.`); } @@ -211,6 +218,23 @@ function buildMarkdown(issues, options) { return md; } +function buildJson(issues, options) { + return { + generatedAt: new Date().toISOString(), + thresholds: { + targetFps: options.targetFps, + maxScrubP95Ms: options.maxScrubP95Ms, + maxStartupMs: options.maxStartupMs, + }, + issueCount: issues.length, + issues: issues.map((issue, index) => ({ + rank: index + 1, + ...issue, + recommendation: recommendation(issue, options), + })), + }; +} + function main() { const options = parseArgs(process.argv); if (options.help) { @@ -239,6 +263,14 @@ function main() { } else { process.stdout.write(markdown); } + if (options.outputJson) { + fs.writeFileSync( + options.outputJson, + JSON.stringify(buildJson(issues, options), null, 2), + "utf8", + ); + console.log(`Wrote bottleneck analysis JSON to ${options.outputJson}`); + } } try { diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index 14ea83a436..d542d6f1b5 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -116,6 +116,7 @@ function main() { const statusPath = path.join(options.outputDir, "playback-matrix-status.md"); const validationPath = path.join(options.outputDir, "playback-matrix-validation.json"); const bottleneckPath = path.join(options.outputDir, "playback-bottlenecks.md"); + const bottleneckJsonPath = path.join(options.outputDir, "playback-bottlenecks.json"); const aggregateArgs = ["scripts/aggregate-playback-benchmarks.js"]; const statusArgs = ["scripts/build-playback-matrix-report.js"]; @@ -148,6 +149,8 @@ function main() { analyzeArgs.push( "--output", bottleneckPath, + "--output-json", + bottleneckJsonPath, "--target-fps", String(options.targetFps), "--max-scrub-p95-ms", @@ -180,6 +183,7 @@ function main() { console.log(`Validation JSON: ${validationPath}`); if (options.analyze) { console.log(`Bottleneck analysis: ${bottleneckPath}`); + console.log(`Bottleneck analysis JSON: ${bottleneckJsonPath}`); } if (options.publishTarget) { console.log(`Published target: ${options.publishTarget}`); From 9f78eb730b00c8539b8dc75dd2270e1bb8b978d9 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 21:49:33 +0000 Subject: [PATCH 071/333] improve: scale prefetch decode parallelism for high-fps playback Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/src/playback.rs | 15 +++++++++++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index dec6cf81b7..b49d91a9ce 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -178,6 +178,7 @@ cargo run -p cap-recording --example playback-test-runner -- full - Catch-up skip threshold now adapts with late streak depth and logs skip event telemetry. - Warmup target and warmup timeout now scale with FPS, reducing startup buffering overhead. - Prefetch ahead/behind windows now scale with FPS to reduce unnecessary decode pressure at lower targets. + - Prefetch parallelism now scales with FPS target to increase decode throughput under 60fps workloads. 8. **Playback benchmark runner now supports JSON evidence export (2026-02-13)** - `playback-test-runner` supports `--json-output` for structured report emission. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index b50c11be3a..06a01210dd 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -164,15 +164,22 @@ impl Playback { let mut in_flight: FuturesUnordered = FuturesUnordered::new(); let mut frames_decoded: u32 = 0; let mut prefetched_behind: HashSet = HashSet::new(); - const INITIAL_PARALLEL_TASKS: usize = 4; const RAMP_UP_AFTER_FRAMES: u32 = 5; let dynamic_prefetch_ahead = fps.clamp(30, 90); let dynamic_prefetch_behind = (fps / 4).clamp(8, 24); + let dynamic_parallel_tasks = if fps >= 60 { + 6 + } else if fps >= 45 { + 5 + } else { + PARALLEL_DECODE_TASKS + }; + let initial_parallel_tasks = dynamic_parallel_tasks.min(4); let mut cached_project = prefetch_project.borrow().clone(); info!( dynamic_prefetch_ahead, - dynamic_prefetch_behind, "Prefetch window configuration" + dynamic_prefetch_behind, dynamic_parallel_tasks, "Prefetch window configuration" ); loop { @@ -213,9 +220,9 @@ impl Playback { let max_prefetch_frame = current_playback_frame + dynamic_prefetch_ahead; let effective_parallel = if frames_decoded < RAMP_UP_AFTER_FRAMES { - INITIAL_PARALLEL_TASKS + initial_parallel_tasks } else { - PARALLEL_DECODE_TASKS + dynamic_parallel_tasks }; while in_flight.len() < effective_parallel { From a0d688520b8f35bbb774275efddb429d4b7edb34 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:49:33 +0000 Subject: [PATCH 072/333] improve: scale prefetch decode parallelism for high-fps playback Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/src/playback.rs | 15 +++++++++++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index dec6cf81b7..b49d91a9ce 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -178,6 +178,7 @@ cargo run -p cap-recording --example playback-test-runner -- full - Catch-up skip threshold now adapts with late streak depth and logs skip event telemetry. - Warmup target and warmup timeout now scale with FPS, reducing startup buffering overhead. - Prefetch ahead/behind windows now scale with FPS to reduce unnecessary decode pressure at lower targets. + - Prefetch parallelism now scales with FPS target to increase decode throughput under 60fps workloads. 8. **Playback benchmark runner now supports JSON evidence export (2026-02-13)** - `playback-test-runner` supports `--json-output` for structured report emission. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index b50c11be3a..06a01210dd 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -164,15 +164,22 @@ impl Playback { let mut in_flight: FuturesUnordered = FuturesUnordered::new(); let mut frames_decoded: u32 = 0; let mut prefetched_behind: HashSet = HashSet::new(); - const INITIAL_PARALLEL_TASKS: usize = 4; const RAMP_UP_AFTER_FRAMES: u32 = 5; let dynamic_prefetch_ahead = fps.clamp(30, 90); let dynamic_prefetch_behind = (fps / 4).clamp(8, 24); + let dynamic_parallel_tasks = if fps >= 60 { + 6 + } else if fps >= 45 { + 5 + } else { + PARALLEL_DECODE_TASKS + }; + let initial_parallel_tasks = dynamic_parallel_tasks.min(4); let mut cached_project = prefetch_project.borrow().clone(); info!( dynamic_prefetch_ahead, - dynamic_prefetch_behind, "Prefetch window configuration" + dynamic_prefetch_behind, dynamic_parallel_tasks, "Prefetch window configuration" ); loop { @@ -213,9 +220,9 @@ impl Playback { let max_prefetch_frame = current_playback_frame + dynamic_prefetch_ahead; let effective_parallel = if frames_decoded < RAMP_UP_AFTER_FRAMES { - INITIAL_PARALLEL_TASKS + initial_parallel_tasks } else { - PARALLEL_DECODE_TASKS + dynamic_parallel_tasks }; while in_flight.len() < effective_parallel { From b41410f5b6edfcf1fa503630d7dab46c47bed753 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 21:53:45 +0000 Subject: [PATCH 073/333] improve: skip no-op playhead state updates in tauri commands Co-authored-by: Richie McIlroy --- apps/desktop/src-tauri/src/lib.rs | 42 +++++++++++++++++++++--------- crates/editor/PLAYBACK-FINDINGS.md | 1 + 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/apps/desktop/src-tauri/src/lib.rs b/apps/desktop/src-tauri/src/lib.rs index c00d465ced..795fbe7710 100644 --- a/apps/desktop/src-tauri/src/lib.rs +++ b/apps/desktop/src-tauri/src/lib.rs @@ -1915,15 +1915,24 @@ async fn set_playhead_position( editor_instance: WindowEditorInstance, frame_number: u32, ) -> Result<(), String> { - editor_instance - .modify_and_emit_state(|state| { - state.playhead_position = frame_number; - }) - .await; + let state_changed = { + let state = editor_instance.state.lock().await; + state.playhead_position != frame_number + }; - let playback_handle = { + if state_changed { + editor_instance + .modify_and_emit_state(|state| { + state.playhead_position = frame_number; + }) + .await; + } + + let playback_handle = if state_changed { let state = editor_instance.state.lock().await; state.playback_task.clone() + } else { + None }; if let Some(handle) = playback_handle { @@ -2548,15 +2557,24 @@ async fn is_camera_window_open(app: AppHandle) -> bool { #[specta::specta] #[instrument(skip(editor_instance))] async fn seek_to(editor_instance: WindowEditorInstance, frame_number: u32) -> Result<(), String> { - editor_instance - .modify_and_emit_state(|state| { - state.playhead_position = frame_number; - }) - .await; + let state_changed = { + let state = editor_instance.state.lock().await; + state.playhead_position != frame_number + }; - let playback_handle = { + if state_changed { + editor_instance + .modify_and_emit_state(|state| { + state.playhead_position = frame_number; + }) + .await; + } + + let playback_handle = if state_changed { let state = editor_instance.state.lock().await; state.playback_task.clone() + } else { + None }; if let Some(handle) = playback_handle { diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index b49d91a9ce..1a0c4ff43c 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -145,6 +145,7 @@ cargo run -p cap-recording --example playback-test-runner -- full - `seek_to` and `set_playhead_position` commands now forward seek requests to active playback. - Timeline seek no longer tears down and recreates playback while playing. - Seek signaling now uses watch semantics so only latest frame target is consumed under heavy scrub load. + - Tauri playhead/seek commands now skip no-op same-frame state emission to reduce state/event churn. 3. **Lazy decoder pool warmup on macOS AVAssetReader (2026-02-13)** - Initial warmup now creates only a small subset of decoder instances. From 583e26b094efed4c156f75125e741ca8fa0efee4 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:53:45 +0000 Subject: [PATCH 074/333] improve: skip no-op playhead state updates in tauri commands Co-authored-by: Richie McIlroy --- apps/desktop/src-tauri/src/lib.rs | 42 +++++++++++++++++++++--------- crates/editor/PLAYBACK-FINDINGS.md | 1 + 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/apps/desktop/src-tauri/src/lib.rs b/apps/desktop/src-tauri/src/lib.rs index c00d465ced..795fbe7710 100644 --- a/apps/desktop/src-tauri/src/lib.rs +++ b/apps/desktop/src-tauri/src/lib.rs @@ -1915,15 +1915,24 @@ async fn set_playhead_position( editor_instance: WindowEditorInstance, frame_number: u32, ) -> Result<(), String> { - editor_instance - .modify_and_emit_state(|state| { - state.playhead_position = frame_number; - }) - .await; + let state_changed = { + let state = editor_instance.state.lock().await; + state.playhead_position != frame_number + }; - let playback_handle = { + if state_changed { + editor_instance + .modify_and_emit_state(|state| { + state.playhead_position = frame_number; + }) + .await; + } + + let playback_handle = if state_changed { let state = editor_instance.state.lock().await; state.playback_task.clone() + } else { + None }; if let Some(handle) = playback_handle { @@ -2548,15 +2557,24 @@ async fn is_camera_window_open(app: AppHandle) -> bool { #[specta::specta] #[instrument(skip(editor_instance))] async fn seek_to(editor_instance: WindowEditorInstance, frame_number: u32) -> Result<(), String> { - editor_instance - .modify_and_emit_state(|state| { - state.playhead_position = frame_number; - }) - .await; + let state_changed = { + let state = editor_instance.state.lock().await; + state.playhead_position != frame_number + }; - let playback_handle = { + if state_changed { + editor_instance + .modify_and_emit_state(|state| { + state.playhead_position = frame_number; + }) + .await; + } + + let playback_handle = if state_changed { let state = editor_instance.state.lock().await; state.playback_task.clone() + } else { + None }; if let Some(handle) = playback_handle { diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index b49d91a9ce..1a0c4ff43c 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -145,6 +145,7 @@ cargo run -p cap-recording --example playback-test-runner -- full - `seek_to` and `set_playhead_position` commands now forward seek requests to active playback. - Timeline seek no longer tears down and recreates playback while playing. - Seek signaling now uses watch semantics so only latest frame target is consumed under heavy scrub load. + - Tauri playhead/seek commands now skip no-op same-frame state emission to reduce state/event churn. 3. **Lazy decoder pool warmup on macOS AVAssetReader (2026-02-13)** - Initial warmup now creates only a small subset of decoder instances. From 5c35a6e9322458a38b469797c3e76cc3b2fd8922 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 22:00:15 +0000 Subject: [PATCH 075/333] improve: add baseline comparison gating for playback benchmarks Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 3 + crates/editor/PLAYBACK-FINDINGS.md | 7 + crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 6 + package.json | 1 + scripts/compare-playback-benchmark-runs.js | 273 +++++++++++++++++++++ 5 files changed, 290 insertions(+) create mode 100644 scripts/compare-playback-benchmark-runs.js diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 819746eec8..62434b52b2 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -86,6 +86,9 @@ node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-mat # Analyze bottlenecks from matrix results node scripts/analyze-playback-matrix-bottlenecks.js --input /path/to/json-results --output /tmp/playback-bottlenecks.md --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 node scripts/analyze-playback-matrix-bottlenecks.js --input /path/to/json-results --output /tmp/playback-bottlenecks.md --output-json /tmp/playback-bottlenecks.json --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 + +# Compare candidate against baseline and flag regressions +node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-comparison.md --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5 ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 1a0c4ff43c..84f3072ba4 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -224,6 +224,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Produces prioritized optimization backlog from real matrix evidence. - Supports structured JSON output for automation and regression tracking. +16. **Added baseline-vs-candidate comparator for regression gating (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` compares candidate matrix outputs against baseline outputs. + - Flags regressions when FPS drops or startup/scrub latency increase beyond configured tolerance. + - Exits non-zero on regressions so matrix-driven optimization loops can be gated automatically. + --- ## Root Cause Analysis Archive @@ -328,6 +333,7 @@ Decoder Pipeline: 17. Added one-shot finalization script for aggregate + status + validation outputs. 18. Added benchmark history publisher script for finalized matrix artifacts. 19. Added matrix bottleneck analysis script for prioritized FPS optimization follow-up. +20. Added baseline-vs-candidate comparison script to gate regressions in optimization loops. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -348,6 +354,7 @@ Decoder Pipeline: - `scripts/finalize-playback-matrix.js`: added one-shot matrix artifact finalization workflow. - `scripts/publish-playback-matrix-summary.js`: added matrix artifact publisher into PLAYBACK-BENCHMARKS history region. - `scripts/analyze-playback-matrix-bottlenecks.js`: added prioritized bottleneck analysis output from matrix JSON evidence. +- `scripts/compare-playback-benchmark-runs.js`: added regression-aware baseline/candidate comparison with configurable FPS/startup/scrub tolerances. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index e1503dd5f0..069f2f0a53 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -142,6 +142,12 @@ pnpm bench:playback:analyze -- --input /path/to/all-machine-results --output /tm pnpm bench:playback:analyze -- --input /path/to/all-machine-results --output /tmp/playback-matrix-final/playback-bottlenecks.md --output-json /tmp/playback-matrix-final/playback-bottlenecks.json --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 ``` +Compare candidate run against baseline and fail on regressions: + +```bash +pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-matrix-final/playback-comparison.md --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5 +``` + ## Evidence checklist 1. Confirm all matrix rows exist. diff --git a/package.json b/package.json index 93f760e48f..4f81b478cc 100644 --- a/package.json +++ b/package.json @@ -48,6 +48,7 @@ "bench:playback:finalize": "node scripts/finalize-playback-matrix.js", "bench:playback:publish": "node scripts/publish-playback-matrix-summary.js", "bench:playback:analyze": "node scripts/analyze-playback-matrix-bottlenecks.js", + "bench:playback:compare": "node scripts/compare-playback-benchmark-runs.js", "clean": "find . -name node_modules -o -name .next -o -name .output -o -name .turbo -o -name dist -type d -prune | xargs rm -rf", "lgtm-otel": "docker run -p 3010:3000 -p 4317:4317 -p 4318:4318 --rm -it docker.io/grafana/otel-lgtm", "with-env": "dotenv -e .env --" diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js new file mode 100644 index 0000000000..a7a7eaca22 --- /dev/null +++ b/scripts/compare-playback-benchmark-runs.js @@ -0,0 +1,273 @@ +#!/usr/bin/env node + +import fs from "node:fs"; +import path from "node:path"; + +function parseArgs(argv) { + const options = { + baseline: null, + candidate: null, + output: null, + allowFpsDrop: 2, + allowStartupIncreaseMs: 25, + allowScrubP95IncreaseMs: 5, + }; + + for (let i = 2; i < argv.length; i++) { + const arg = argv[i]; + if (arg === "--") continue; + if (arg === "--help" || arg === "-h") { + options.help = true; + continue; + } + if (arg === "--baseline") { + options.baseline = path.resolve(argv[++i] ?? ""); + continue; + } + if (arg === "--candidate") { + options.candidate = path.resolve(argv[++i] ?? ""); + continue; + } + if (arg === "--output" || arg === "-o") { + options.output = path.resolve(argv[++i] ?? ""); + continue; + } + if (arg === "--allow-fps-drop") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value < 0) { + throw new Error("Invalid --allow-fps-drop value"); + } + options.allowFpsDrop = value; + continue; + } + if (arg === "--allow-startup-increase-ms") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value < 0) { + throw new Error("Invalid --allow-startup-increase-ms value"); + } + options.allowStartupIncreaseMs = value; + continue; + } + if (arg === "--allow-scrub-p95-increase-ms") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value < 0) { + throw new Error("Invalid --allow-scrub-p95-increase-ms value"); + } + options.allowScrubP95IncreaseMs = value; + continue; + } + throw new Error(`Unknown argument: ${arg}`); + } + + return options; +} + +function usage() { + console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline --candidate [--output ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] + +Compares baseline and candidate playback matrix JSON outputs and flags regressions.`); +} + +function collectJsonFiles(targetPath) { + if (!fs.existsSync(targetPath)) { + throw new Error(`Path does not exist: ${targetPath}`); + } + const stats = fs.statSync(targetPath); + if (stats.isFile()) { + return targetPath.endsWith(".json") ? [targetPath] : []; + } + const files = []; + for (const entry of fs.readdirSync(targetPath, { withFileTypes: true })) { + const fullPath = path.join(targetPath, entry.name); + if (entry.isDirectory()) { + files.push(...collectJsonFiles(fullPath)); + } else if (entry.isFile() && entry.name.endsWith(".json")) { + files.push(fullPath); + } + } + return files; +} + +function parseNotes(notes) { + if (!notes) return {}; + const parsed = {}; + for (const token of notes.split(/\s+/)) { + if (!token.includes("=")) continue; + const [key, ...rest] = token.split("="); + const value = rest.join("="); + if (!key || !value) continue; + parsed[key.trim()] = value.trim(); + } + return parsed; +} + +function average(values) { + if (values.length === 0) return null; + return values.reduce((acc, value) => acc + value, 0) / values.length; +} + +function maximum(values) { + if (values.length === 0) return null; + return Math.max(...values); +} + +function collectMetrics(files) { + const rows = new Map(); + + for (const filePath of files) { + const parsed = JSON.parse(fs.readFileSync(filePath, "utf8")); + const notes = parseNotes(parsed.notes); + const platform = notes.platform ?? "unknown"; + const gpu = notes.gpu ?? "unknown"; + const scenario = notes.scenario ?? "unspecified"; + const reports = Array.isArray(parsed.reports) ? parsed.reports : []; + + for (const report of reports) { + const key = `${platform}|${gpu}|${scenario}|${report.recording_name ?? "unknown"}|${report.is_fragmented ? "fragmented" : "mp4"}`; + + const playback = Array.isArray(report.playback_results) + ? report.playback_results + : []; + const scrub = Array.isArray(report.scrub_results) + ? report.scrub_results + : []; + + const fpsValues = playback + .map((entry) => entry.effective_fps) + .filter((entry) => typeof entry === "number"); + const startupValues = playback + .map((entry) => entry.startup_to_first_frame_ms) + .filter((entry) => typeof entry === "number"); + const scrubP95Values = scrub + .map((entry) => entry.p95_seek_time_ms) + .filter((entry) => typeof entry === "number"); + + rows.set(key, { + key, + platform, + gpu, + scenario, + recording: report.recording_name ?? "unknown", + format: report.is_fragmented ? "fragmented" : "mp4", + fpsMin: fpsValues.length ? Math.min(...fpsValues) : null, + startupAvg: average(startupValues), + scrubP95Max: maximum(scrubP95Values), + }); + } + } + + return rows; +} + +function delta(candidate, baseline) { + if (candidate === null || baseline === null) return null; + return candidate - baseline; +} + +function formatNumber(value, digits = 2) { + return value === null ? "n/a" : value.toFixed(digits); +} + +function compareMetrics(baselineRows, candidateRows, options) { + const comparisons = []; + + for (const [key, candidate] of candidateRows) { + const baseline = baselineRows.get(key); + if (!baseline) continue; + + const fpsDelta = delta(candidate.fpsMin, baseline.fpsMin); + const startupDelta = delta(candidate.startupAvg, baseline.startupAvg); + const scrubDelta = delta(candidate.scrubP95Max, baseline.scrubP95Max); + + const regressions = []; + if (fpsDelta !== null && fpsDelta < -options.allowFpsDrop) { + regressions.push(`fps_drop=${formatNumber(fpsDelta)}`); + } + if ( + startupDelta !== null && + startupDelta > options.allowStartupIncreaseMs + ) { + regressions.push(`startup_increase=${formatNumber(startupDelta)}`); + } + if (scrubDelta !== null && scrubDelta > options.allowScrubP95IncreaseMs) { + regressions.push(`scrub_p95_increase=${formatNumber(scrubDelta)}`); + } + + comparisons.push({ + platform: candidate.platform, + gpu: candidate.gpu, + scenario: candidate.scenario, + recording: candidate.recording, + format: candidate.format, + fpsDelta, + startupDelta, + scrubDelta, + regressions, + }); + } + + comparisons.sort((a, b) => b.regressions.length - a.regressions.length); + return comparisons; +} + +function toMarkdown(comparisons, options) { + const regressions = comparisons.filter( + (entry) => entry.regressions.length > 0, + ); + let md = ""; + md += "# Playback Benchmark Comparison\n\n"; + md += `Generated: ${new Date().toISOString()}\n\n`; + md += `Tolerance: fps_drop<=${options.allowFpsDrop}, startup_increase<=${options.allowStartupIncreaseMs}ms, scrub_p95_increase<=${options.allowScrubP95IncreaseMs}ms\n\n`; + md += `Compared rows: ${comparisons.length}, regressions: ${regressions.length}\n\n`; + md += + "| Platform | GPU | Scenario | Recording | Format | FPS Δ | Startup Δ (ms) | Scrub p95 Δ (ms) | Regression |\n"; + md += "|---|---|---|---|---|---:|---:|---:|---|\n"; + for (const row of comparisons) { + md += `| ${row.platform} | ${row.gpu} | ${row.scenario} | ${row.recording} | ${row.format} | ${formatNumber(row.fpsDelta)} | ${formatNumber(row.startupDelta)} | ${formatNumber(row.scrubDelta)} | ${row.regressions.length > 0 ? row.regressions.join(", ") : "none"} |\n`; + } + md += "\n"; + return md; +} + +function main() { + const options = parseArgs(process.argv); + if (options.help) { + usage(); + return; + } + if (!options.baseline || !options.candidate) { + throw new Error("--baseline and --candidate are required"); + } + + const baselineFiles = collectJsonFiles(options.baseline); + const candidateFiles = collectJsonFiles(options.candidate); + if (baselineFiles.length === 0) { + throw new Error("No baseline JSON files found"); + } + if (candidateFiles.length === 0) { + throw new Error("No candidate JSON files found"); + } + + const baselineRows = collectMetrics(baselineFiles); + const candidateRows = collectMetrics(candidateFiles); + const comparisons = compareMetrics(baselineRows, candidateRows, options); + const markdown = toMarkdown(comparisons, options); + + if (options.output) { + fs.writeFileSync(options.output, markdown, "utf8"); + console.log(`Wrote comparison report to ${options.output}`); + } else { + process.stdout.write(markdown); + } + + if (comparisons.some((entry) => entry.regressions.length > 0)) { + process.exit(1); + } +} + +try { + main(); +} catch (error) { + console.error(error instanceof Error ? error.message : String(error)); + process.exit(1); +} From 8a38c0caf7a53dc5e669cc20f386631eeb19221e Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:00:15 +0000 Subject: [PATCH 076/333] improve: add baseline comparison gating for playback benchmarks Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 3 + crates/editor/PLAYBACK-FINDINGS.md | 7 + crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 6 + package.json | 1 + scripts/compare-playback-benchmark-runs.js | 273 +++++++++++++++++++++ 5 files changed, 290 insertions(+) create mode 100644 scripts/compare-playback-benchmark-runs.js diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 819746eec8..62434b52b2 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -86,6 +86,9 @@ node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-mat # Analyze bottlenecks from matrix results node scripts/analyze-playback-matrix-bottlenecks.js --input /path/to/json-results --output /tmp/playback-bottlenecks.md --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 node scripts/analyze-playback-matrix-bottlenecks.js --input /path/to/json-results --output /tmp/playback-bottlenecks.md --output-json /tmp/playback-bottlenecks.json --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 + +# Compare candidate against baseline and flag regressions +node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-comparison.md --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5 ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 1a0c4ff43c..84f3072ba4 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -224,6 +224,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Produces prioritized optimization backlog from real matrix evidence. - Supports structured JSON output for automation and regression tracking. +16. **Added baseline-vs-candidate comparator for regression gating (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` compares candidate matrix outputs against baseline outputs. + - Flags regressions when FPS drops or startup/scrub latency increase beyond configured tolerance. + - Exits non-zero on regressions so matrix-driven optimization loops can be gated automatically. + --- ## Root Cause Analysis Archive @@ -328,6 +333,7 @@ Decoder Pipeline: 17. Added one-shot finalization script for aggregate + status + validation outputs. 18. Added benchmark history publisher script for finalized matrix artifacts. 19. Added matrix bottleneck analysis script for prioritized FPS optimization follow-up. +20. Added baseline-vs-candidate comparison script to gate regressions in optimization loops. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -348,6 +354,7 @@ Decoder Pipeline: - `scripts/finalize-playback-matrix.js`: added one-shot matrix artifact finalization workflow. - `scripts/publish-playback-matrix-summary.js`: added matrix artifact publisher into PLAYBACK-BENCHMARKS history region. - `scripts/analyze-playback-matrix-bottlenecks.js`: added prioritized bottleneck analysis output from matrix JSON evidence. +- `scripts/compare-playback-benchmark-runs.js`: added regression-aware baseline/candidate comparison with configurable FPS/startup/scrub tolerances. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index e1503dd5f0..069f2f0a53 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -142,6 +142,12 @@ pnpm bench:playback:analyze -- --input /path/to/all-machine-results --output /tm pnpm bench:playback:analyze -- --input /path/to/all-machine-results --output /tmp/playback-matrix-final/playback-bottlenecks.md --output-json /tmp/playback-matrix-final/playback-bottlenecks.json --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 ``` +Compare candidate run against baseline and fail on regressions: + +```bash +pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-matrix-final/playback-comparison.md --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5 +``` + ## Evidence checklist 1. Confirm all matrix rows exist. diff --git a/package.json b/package.json index 93f760e48f..4f81b478cc 100644 --- a/package.json +++ b/package.json @@ -48,6 +48,7 @@ "bench:playback:finalize": "node scripts/finalize-playback-matrix.js", "bench:playback:publish": "node scripts/publish-playback-matrix-summary.js", "bench:playback:analyze": "node scripts/analyze-playback-matrix-bottlenecks.js", + "bench:playback:compare": "node scripts/compare-playback-benchmark-runs.js", "clean": "find . -name node_modules -o -name .next -o -name .output -o -name .turbo -o -name dist -type d -prune | xargs rm -rf", "lgtm-otel": "docker run -p 3010:3000 -p 4317:4317 -p 4318:4318 --rm -it docker.io/grafana/otel-lgtm", "with-env": "dotenv -e .env --" diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js new file mode 100644 index 0000000000..a7a7eaca22 --- /dev/null +++ b/scripts/compare-playback-benchmark-runs.js @@ -0,0 +1,273 @@ +#!/usr/bin/env node + +import fs from "node:fs"; +import path from "node:path"; + +function parseArgs(argv) { + const options = { + baseline: null, + candidate: null, + output: null, + allowFpsDrop: 2, + allowStartupIncreaseMs: 25, + allowScrubP95IncreaseMs: 5, + }; + + for (let i = 2; i < argv.length; i++) { + const arg = argv[i]; + if (arg === "--") continue; + if (arg === "--help" || arg === "-h") { + options.help = true; + continue; + } + if (arg === "--baseline") { + options.baseline = path.resolve(argv[++i] ?? ""); + continue; + } + if (arg === "--candidate") { + options.candidate = path.resolve(argv[++i] ?? ""); + continue; + } + if (arg === "--output" || arg === "-o") { + options.output = path.resolve(argv[++i] ?? ""); + continue; + } + if (arg === "--allow-fps-drop") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value < 0) { + throw new Error("Invalid --allow-fps-drop value"); + } + options.allowFpsDrop = value; + continue; + } + if (arg === "--allow-startup-increase-ms") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value < 0) { + throw new Error("Invalid --allow-startup-increase-ms value"); + } + options.allowStartupIncreaseMs = value; + continue; + } + if (arg === "--allow-scrub-p95-increase-ms") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value < 0) { + throw new Error("Invalid --allow-scrub-p95-increase-ms value"); + } + options.allowScrubP95IncreaseMs = value; + continue; + } + throw new Error(`Unknown argument: ${arg}`); + } + + return options; +} + +function usage() { + console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline --candidate [--output ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] + +Compares baseline and candidate playback matrix JSON outputs and flags regressions.`); +} + +function collectJsonFiles(targetPath) { + if (!fs.existsSync(targetPath)) { + throw new Error(`Path does not exist: ${targetPath}`); + } + const stats = fs.statSync(targetPath); + if (stats.isFile()) { + return targetPath.endsWith(".json") ? [targetPath] : []; + } + const files = []; + for (const entry of fs.readdirSync(targetPath, { withFileTypes: true })) { + const fullPath = path.join(targetPath, entry.name); + if (entry.isDirectory()) { + files.push(...collectJsonFiles(fullPath)); + } else if (entry.isFile() && entry.name.endsWith(".json")) { + files.push(fullPath); + } + } + return files; +} + +function parseNotes(notes) { + if (!notes) return {}; + const parsed = {}; + for (const token of notes.split(/\s+/)) { + if (!token.includes("=")) continue; + const [key, ...rest] = token.split("="); + const value = rest.join("="); + if (!key || !value) continue; + parsed[key.trim()] = value.trim(); + } + return parsed; +} + +function average(values) { + if (values.length === 0) return null; + return values.reduce((acc, value) => acc + value, 0) / values.length; +} + +function maximum(values) { + if (values.length === 0) return null; + return Math.max(...values); +} + +function collectMetrics(files) { + const rows = new Map(); + + for (const filePath of files) { + const parsed = JSON.parse(fs.readFileSync(filePath, "utf8")); + const notes = parseNotes(parsed.notes); + const platform = notes.platform ?? "unknown"; + const gpu = notes.gpu ?? "unknown"; + const scenario = notes.scenario ?? "unspecified"; + const reports = Array.isArray(parsed.reports) ? parsed.reports : []; + + for (const report of reports) { + const key = `${platform}|${gpu}|${scenario}|${report.recording_name ?? "unknown"}|${report.is_fragmented ? "fragmented" : "mp4"}`; + + const playback = Array.isArray(report.playback_results) + ? report.playback_results + : []; + const scrub = Array.isArray(report.scrub_results) + ? report.scrub_results + : []; + + const fpsValues = playback + .map((entry) => entry.effective_fps) + .filter((entry) => typeof entry === "number"); + const startupValues = playback + .map((entry) => entry.startup_to_first_frame_ms) + .filter((entry) => typeof entry === "number"); + const scrubP95Values = scrub + .map((entry) => entry.p95_seek_time_ms) + .filter((entry) => typeof entry === "number"); + + rows.set(key, { + key, + platform, + gpu, + scenario, + recording: report.recording_name ?? "unknown", + format: report.is_fragmented ? "fragmented" : "mp4", + fpsMin: fpsValues.length ? Math.min(...fpsValues) : null, + startupAvg: average(startupValues), + scrubP95Max: maximum(scrubP95Values), + }); + } + } + + return rows; +} + +function delta(candidate, baseline) { + if (candidate === null || baseline === null) return null; + return candidate - baseline; +} + +function formatNumber(value, digits = 2) { + return value === null ? "n/a" : value.toFixed(digits); +} + +function compareMetrics(baselineRows, candidateRows, options) { + const comparisons = []; + + for (const [key, candidate] of candidateRows) { + const baseline = baselineRows.get(key); + if (!baseline) continue; + + const fpsDelta = delta(candidate.fpsMin, baseline.fpsMin); + const startupDelta = delta(candidate.startupAvg, baseline.startupAvg); + const scrubDelta = delta(candidate.scrubP95Max, baseline.scrubP95Max); + + const regressions = []; + if (fpsDelta !== null && fpsDelta < -options.allowFpsDrop) { + regressions.push(`fps_drop=${formatNumber(fpsDelta)}`); + } + if ( + startupDelta !== null && + startupDelta > options.allowStartupIncreaseMs + ) { + regressions.push(`startup_increase=${formatNumber(startupDelta)}`); + } + if (scrubDelta !== null && scrubDelta > options.allowScrubP95IncreaseMs) { + regressions.push(`scrub_p95_increase=${formatNumber(scrubDelta)}`); + } + + comparisons.push({ + platform: candidate.platform, + gpu: candidate.gpu, + scenario: candidate.scenario, + recording: candidate.recording, + format: candidate.format, + fpsDelta, + startupDelta, + scrubDelta, + regressions, + }); + } + + comparisons.sort((a, b) => b.regressions.length - a.regressions.length); + return comparisons; +} + +function toMarkdown(comparisons, options) { + const regressions = comparisons.filter( + (entry) => entry.regressions.length > 0, + ); + let md = ""; + md += "# Playback Benchmark Comparison\n\n"; + md += `Generated: ${new Date().toISOString()}\n\n`; + md += `Tolerance: fps_drop<=${options.allowFpsDrop}, startup_increase<=${options.allowStartupIncreaseMs}ms, scrub_p95_increase<=${options.allowScrubP95IncreaseMs}ms\n\n`; + md += `Compared rows: ${comparisons.length}, regressions: ${regressions.length}\n\n`; + md += + "| Platform | GPU | Scenario | Recording | Format | FPS Δ | Startup Δ (ms) | Scrub p95 Δ (ms) | Regression |\n"; + md += "|---|---|---|---|---|---:|---:|---:|---|\n"; + for (const row of comparisons) { + md += `| ${row.platform} | ${row.gpu} | ${row.scenario} | ${row.recording} | ${row.format} | ${formatNumber(row.fpsDelta)} | ${formatNumber(row.startupDelta)} | ${formatNumber(row.scrubDelta)} | ${row.regressions.length > 0 ? row.regressions.join(", ") : "none"} |\n`; + } + md += "\n"; + return md; +} + +function main() { + const options = parseArgs(process.argv); + if (options.help) { + usage(); + return; + } + if (!options.baseline || !options.candidate) { + throw new Error("--baseline and --candidate are required"); + } + + const baselineFiles = collectJsonFiles(options.baseline); + const candidateFiles = collectJsonFiles(options.candidate); + if (baselineFiles.length === 0) { + throw new Error("No baseline JSON files found"); + } + if (candidateFiles.length === 0) { + throw new Error("No candidate JSON files found"); + } + + const baselineRows = collectMetrics(baselineFiles); + const candidateRows = collectMetrics(candidateFiles); + const comparisons = compareMetrics(baselineRows, candidateRows, options); + const markdown = toMarkdown(comparisons, options); + + if (options.output) { + fs.writeFileSync(options.output, markdown, "utf8"); + console.log(`Wrote comparison report to ${options.output}`); + } else { + process.stdout.write(markdown); + } + + if (comparisons.some((entry) => entry.regressions.length > 0)) { + process.exit(1); + } +} + +try { + main(); +} catch (error) { + console.error(error instanceof Error ? error.message : String(error)); + process.exit(1); +} From a9653464caf462e7e9151ce5374c0ac9b8fedcbd Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 22:02:01 +0000 Subject: [PATCH 077/333] improve: discard stale prefetched frames after live seeks Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 7 ++++ crates/editor/src/playback.rs | 56 ++++++++++++++++++++++++++---- 2 files changed, 57 insertions(+), 6 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 84f3072ba4..c88dde9c05 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -229,6 +229,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Flags regressions when FPS drops or startup/scrub latency increase beyond configured tolerance. - Exits non-zero on regressions so matrix-driven optimization loops can be gated automatically. +17. **Added prefetch generation gating for live seek correctness and latency (2026-02-13)** + - Prefetch outputs are tagged with seek-generation IDs and stale generation frames are dropped. + - Seek events now advance generation and flush prefetch consumption to prevent old in-flight decode outputs from polluting post-seek playback. + - Reduces redundant decode/render work during aggressive scrub and improves settle reliability. + --- ## Root Cause Analysis Archive @@ -334,6 +339,7 @@ Decoder Pipeline: 18. Added benchmark history publisher script for finalized matrix artifacts. 19. Added matrix bottleneck analysis script for prioritized FPS optimization follow-up. 20. Added baseline-vs-candidate comparison script to gate regressions in optimization loops. +21. Added seek-generation prefetch gating to drop stale decode outputs after live seek updates. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -355,6 +361,7 @@ Decoder Pipeline: - `scripts/publish-playback-matrix-summary.js`: added matrix artifact publisher into PLAYBACK-BENCHMARKS history region. - `scripts/analyze-playback-matrix-bottlenecks.js`: added prioritized bottleneck analysis output from matrix JSON evidence. - `scripts/compare-playback-benchmark-runs.js`: added regression-aware baseline/candidate comparison with configurable FPS/startup/scrub tolerances. +- `crates/editor/src/playback.rs`: added seek-generation tagging for prefetched frames so stale in-flight decode results are ignored after seek generation advances. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 06a01210dd..7e9671554a 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -70,6 +70,7 @@ struct PrefetchedFrame { frame_number: u32, segment_frames: DecodedSegmentFrames, segment_index: u32, + generation: u64, } struct FrameCache { @@ -131,6 +132,8 @@ impl Playback { tokio_mpsc::channel::(PREFETCH_BUFFER_SIZE * 2); let (frame_request_tx, mut frame_request_rx) = watch::channel(self.start_frame_number); let (playback_position_tx, playback_position_rx) = watch::channel(self.start_frame_number); + let (seek_generation_tx, mut seek_generation_rx) = watch::channel(0u64); + seek_generation_rx.borrow_and_update(); let in_flight_frames: Arc>> = Arc::new(RwLock::new(HashSet::new())); let prefetch_in_flight = in_flight_frames.clone(); @@ -138,6 +141,7 @@ impl Playback { let prefetch_stop_rx = stop_rx.clone(); let mut prefetch_project = self.project.clone(); + let mut prefetch_seek_generation = seek_generation_rx.clone(); let prefetch_segment_medias = self.segment_medias.clone(); let (prefetch_duration, has_timeline) = if let Some(timeline) = &self.project.borrow().timeline { @@ -156,7 +160,7 @@ impl Playback { } type PrefetchFuture = std::pin::Pin< Box< - dyn std::future::Future)> + dyn std::future::Future)> + Send, >, >; @@ -175,6 +179,7 @@ impl Playback { PARALLEL_DECODE_TASKS }; let initial_parallel_tasks = dynamic_parallel_tasks.min(4); + let mut active_generation = *prefetch_seek_generation.borrow(); let mut cached_project = prefetch_project.borrow().clone(); info!( @@ -191,6 +196,22 @@ impl Playback { cached_project = prefetch_project.borrow_and_update().clone(); } + if prefetch_seek_generation.has_changed().unwrap_or(false) { + let generation = *prefetch_seek_generation.borrow_and_update(); + if generation != active_generation { + active_generation = generation; + next_prefetch_frame = *frame_request_rx.borrow(); + frames_decoded = 0; + prefetched_behind.clear(); + + if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { + in_flight_guard.clear(); + } + + in_flight = FuturesUnordered::new(); + } + } + if let Ok(true) = frame_request_rx.has_changed() { let requested = *frame_request_rx.borrow_and_update(); if requested != next_prefetch_frame { @@ -263,6 +284,7 @@ impl Playback { let hide_camera = cached_project.camera.hide; let segment_index = segment.recording_clip; let is_initial = frames_decoded < 10; + let generation = active_generation; if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.insert(frame_num); @@ -282,7 +304,7 @@ impl Playback { .get_frames(segment_time as f32, !hide_camera, clip_offsets) .await }; - (frame_num, segment_index, result) + (frame_num, segment_index, generation, result) })); } @@ -327,6 +349,7 @@ impl Playback { let decoders = segment_media.decoders.clone(); let hide_camera = cached_project.camera.hide; let segment_index = segment.recording_clip; + let generation = active_generation; if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.insert(behind_frame); @@ -337,7 +360,7 @@ impl Playback { let result = decoders .get_frames(segment_time as f32, !hide_camera, clip_offsets) .await; - (behind_frame, segment_index, result) + (behind_frame, segment_index, generation, result) })); } } @@ -346,10 +369,15 @@ impl Playback { tokio::select! { biased; - Some((frame_num, segment_index, result)) = in_flight.next() => { + Some((frame_num, segment_index, generation, result)) = in_flight.next() => { if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.remove(&frame_num); } + + if generation != active_generation { + continue; + } + frames_decoded = frames_decoded.saturating_add(1); if let Some(segment_frames) = result { @@ -357,6 +385,7 @@ impl Playback { frame_number: frame_num, segment_frames, segment_index, + generation, }).await; } else if frames_decoded <= 5 { warn!( @@ -407,6 +436,7 @@ impl Playback { let mut prefetch_buffer: VecDeque = VecDeque::with_capacity(PREFETCH_BUFFER_SIZE); let mut frame_cache = FrameCache::new(FRAME_CACHE_SIZE); + let mut seek_generation = 0u64; let base_skip_threshold = (fps / 6).clamp(6, 16); let mut late_streak = 0u32; let mut skip_events = 0u64; @@ -453,7 +483,7 @@ impl Playback { tokio::select! { Some(prefetched) = prefetch_rx.recv() => { - if prefetched.frame_number >= frame_number { + if prefetched.generation == seek_generation && prefetched.frame_number >= frame_number { prefetch_buffer.push_back(prefetched); if first_frame_time.is_none() { first_frame_time = Some(Instant::now()); @@ -481,12 +511,14 @@ impl Playback { 'playback: loop { if seek_rx.has_changed().unwrap_or(false) { let seek_frame = *seek_rx.borrow_and_update(); + seek_generation = seek_generation.saturating_add(1); frame_number = seek_frame; playback_anchor_start = Instant::now(); playback_anchor_frame = seek_frame; pending_seek_observation = Some((seek_frame, Instant::now())); prefetch_buffer.retain(|p| p.frame_number >= frame_number); frame_cache.cache.clear(); + let _ = seek_generation_tx.send(seek_generation); let _ = frame_request_tx.send(frame_number); let _ = playback_position_tx.send(frame_number); if has_audio @@ -502,7 +534,9 @@ impl Playback { cached_project = self.project.borrow_and_update().clone(); } while let Ok(prefetched) = prefetch_rx.try_recv() { - if prefetched.frame_number >= frame_number { + if prefetched.generation == seek_generation + && prefetched.frame_number >= frame_number + { prefetch_buffer.push_back(prefetched); while prefetch_buffer.len() > PREFETCH_BUFFER_SIZE { if let Some(idx) = prefetch_buffer @@ -529,12 +563,14 @@ impl Playback { _ = stop_rx.changed() => break 'playback, _ = seek_rx.changed() => { let seek_frame = *seek_rx.borrow_and_update(); + seek_generation = seek_generation.saturating_add(1); frame_number = seek_frame; playback_anchor_start = Instant::now(); playback_anchor_frame = seek_frame; pending_seek_observation = Some((seek_frame, Instant::now())); prefetch_buffer.retain(|p| p.frame_number >= frame_number); frame_cache.cache.clear(); + let _ = seek_generation_tx.send(seek_generation); let _ = frame_request_tx.send(frame_number); let _ = playback_position_tx.send(frame_number); if has_audio @@ -589,6 +625,9 @@ impl Playback { tokio::select! { _ = stop_rx.changed() => break 'playback, Some(prefetched) = prefetch_rx.recv() => { + if prefetched.generation != seek_generation { + continue; + } if prefetched.frame_number == frame_number { found_frame = Some(prefetched); break; @@ -636,6 +675,11 @@ impl Playback { tokio::time::timeout(frame_fetch_timeout, prefetch_rx.recv()).await; if let Ok(Some(prefetched)) = wait_result { + if prefetched.generation != seek_generation { + frame_number = frame_number.saturating_add(1); + total_frames_skipped += 1; + continue; + } if prefetched.frame_number == frame_number { Some(( Arc::new(prefetched.segment_frames), From b00cdec25b937e7a2504eb0fa3287e411df4b4bd Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:02:01 +0000 Subject: [PATCH 078/333] improve: discard stale prefetched frames after live seeks Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 7 ++++ crates/editor/src/playback.rs | 56 ++++++++++++++++++++++++++---- 2 files changed, 57 insertions(+), 6 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 84f3072ba4..c88dde9c05 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -229,6 +229,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Flags regressions when FPS drops or startup/scrub latency increase beyond configured tolerance. - Exits non-zero on regressions so matrix-driven optimization loops can be gated automatically. +17. **Added prefetch generation gating for live seek correctness and latency (2026-02-13)** + - Prefetch outputs are tagged with seek-generation IDs and stale generation frames are dropped. + - Seek events now advance generation and flush prefetch consumption to prevent old in-flight decode outputs from polluting post-seek playback. + - Reduces redundant decode/render work during aggressive scrub and improves settle reliability. + --- ## Root Cause Analysis Archive @@ -334,6 +339,7 @@ Decoder Pipeline: 18. Added benchmark history publisher script for finalized matrix artifacts. 19. Added matrix bottleneck analysis script for prioritized FPS optimization follow-up. 20. Added baseline-vs-candidate comparison script to gate regressions in optimization loops. +21. Added seek-generation prefetch gating to drop stale decode outputs after live seek updates. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -355,6 +361,7 @@ Decoder Pipeline: - `scripts/publish-playback-matrix-summary.js`: added matrix artifact publisher into PLAYBACK-BENCHMARKS history region. - `scripts/analyze-playback-matrix-bottlenecks.js`: added prioritized bottleneck analysis output from matrix JSON evidence. - `scripts/compare-playback-benchmark-runs.js`: added regression-aware baseline/candidate comparison with configurable FPS/startup/scrub tolerances. +- `crates/editor/src/playback.rs`: added seek-generation tagging for prefetched frames so stale in-flight decode results are ignored after seek generation advances. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 06a01210dd..7e9671554a 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -70,6 +70,7 @@ struct PrefetchedFrame { frame_number: u32, segment_frames: DecodedSegmentFrames, segment_index: u32, + generation: u64, } struct FrameCache { @@ -131,6 +132,8 @@ impl Playback { tokio_mpsc::channel::(PREFETCH_BUFFER_SIZE * 2); let (frame_request_tx, mut frame_request_rx) = watch::channel(self.start_frame_number); let (playback_position_tx, playback_position_rx) = watch::channel(self.start_frame_number); + let (seek_generation_tx, mut seek_generation_rx) = watch::channel(0u64); + seek_generation_rx.borrow_and_update(); let in_flight_frames: Arc>> = Arc::new(RwLock::new(HashSet::new())); let prefetch_in_flight = in_flight_frames.clone(); @@ -138,6 +141,7 @@ impl Playback { let prefetch_stop_rx = stop_rx.clone(); let mut prefetch_project = self.project.clone(); + let mut prefetch_seek_generation = seek_generation_rx.clone(); let prefetch_segment_medias = self.segment_medias.clone(); let (prefetch_duration, has_timeline) = if let Some(timeline) = &self.project.borrow().timeline { @@ -156,7 +160,7 @@ impl Playback { } type PrefetchFuture = std::pin::Pin< Box< - dyn std::future::Future)> + dyn std::future::Future)> + Send, >, >; @@ -175,6 +179,7 @@ impl Playback { PARALLEL_DECODE_TASKS }; let initial_parallel_tasks = dynamic_parallel_tasks.min(4); + let mut active_generation = *prefetch_seek_generation.borrow(); let mut cached_project = prefetch_project.borrow().clone(); info!( @@ -191,6 +196,22 @@ impl Playback { cached_project = prefetch_project.borrow_and_update().clone(); } + if prefetch_seek_generation.has_changed().unwrap_or(false) { + let generation = *prefetch_seek_generation.borrow_and_update(); + if generation != active_generation { + active_generation = generation; + next_prefetch_frame = *frame_request_rx.borrow(); + frames_decoded = 0; + prefetched_behind.clear(); + + if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { + in_flight_guard.clear(); + } + + in_flight = FuturesUnordered::new(); + } + } + if let Ok(true) = frame_request_rx.has_changed() { let requested = *frame_request_rx.borrow_and_update(); if requested != next_prefetch_frame { @@ -263,6 +284,7 @@ impl Playback { let hide_camera = cached_project.camera.hide; let segment_index = segment.recording_clip; let is_initial = frames_decoded < 10; + let generation = active_generation; if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.insert(frame_num); @@ -282,7 +304,7 @@ impl Playback { .get_frames(segment_time as f32, !hide_camera, clip_offsets) .await }; - (frame_num, segment_index, result) + (frame_num, segment_index, generation, result) })); } @@ -327,6 +349,7 @@ impl Playback { let decoders = segment_media.decoders.clone(); let hide_camera = cached_project.camera.hide; let segment_index = segment.recording_clip; + let generation = active_generation; if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.insert(behind_frame); @@ -337,7 +360,7 @@ impl Playback { let result = decoders .get_frames(segment_time as f32, !hide_camera, clip_offsets) .await; - (behind_frame, segment_index, result) + (behind_frame, segment_index, generation, result) })); } } @@ -346,10 +369,15 @@ impl Playback { tokio::select! { biased; - Some((frame_num, segment_index, result)) = in_flight.next() => { + Some((frame_num, segment_index, generation, result)) = in_flight.next() => { if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.remove(&frame_num); } + + if generation != active_generation { + continue; + } + frames_decoded = frames_decoded.saturating_add(1); if let Some(segment_frames) = result { @@ -357,6 +385,7 @@ impl Playback { frame_number: frame_num, segment_frames, segment_index, + generation, }).await; } else if frames_decoded <= 5 { warn!( @@ -407,6 +436,7 @@ impl Playback { let mut prefetch_buffer: VecDeque = VecDeque::with_capacity(PREFETCH_BUFFER_SIZE); let mut frame_cache = FrameCache::new(FRAME_CACHE_SIZE); + let mut seek_generation = 0u64; let base_skip_threshold = (fps / 6).clamp(6, 16); let mut late_streak = 0u32; let mut skip_events = 0u64; @@ -453,7 +483,7 @@ impl Playback { tokio::select! { Some(prefetched) = prefetch_rx.recv() => { - if prefetched.frame_number >= frame_number { + if prefetched.generation == seek_generation && prefetched.frame_number >= frame_number { prefetch_buffer.push_back(prefetched); if first_frame_time.is_none() { first_frame_time = Some(Instant::now()); @@ -481,12 +511,14 @@ impl Playback { 'playback: loop { if seek_rx.has_changed().unwrap_or(false) { let seek_frame = *seek_rx.borrow_and_update(); + seek_generation = seek_generation.saturating_add(1); frame_number = seek_frame; playback_anchor_start = Instant::now(); playback_anchor_frame = seek_frame; pending_seek_observation = Some((seek_frame, Instant::now())); prefetch_buffer.retain(|p| p.frame_number >= frame_number); frame_cache.cache.clear(); + let _ = seek_generation_tx.send(seek_generation); let _ = frame_request_tx.send(frame_number); let _ = playback_position_tx.send(frame_number); if has_audio @@ -502,7 +534,9 @@ impl Playback { cached_project = self.project.borrow_and_update().clone(); } while let Ok(prefetched) = prefetch_rx.try_recv() { - if prefetched.frame_number >= frame_number { + if prefetched.generation == seek_generation + && prefetched.frame_number >= frame_number + { prefetch_buffer.push_back(prefetched); while prefetch_buffer.len() > PREFETCH_BUFFER_SIZE { if let Some(idx) = prefetch_buffer @@ -529,12 +563,14 @@ impl Playback { _ = stop_rx.changed() => break 'playback, _ = seek_rx.changed() => { let seek_frame = *seek_rx.borrow_and_update(); + seek_generation = seek_generation.saturating_add(1); frame_number = seek_frame; playback_anchor_start = Instant::now(); playback_anchor_frame = seek_frame; pending_seek_observation = Some((seek_frame, Instant::now())); prefetch_buffer.retain(|p| p.frame_number >= frame_number); frame_cache.cache.clear(); + let _ = seek_generation_tx.send(seek_generation); let _ = frame_request_tx.send(frame_number); let _ = playback_position_tx.send(frame_number); if has_audio @@ -589,6 +625,9 @@ impl Playback { tokio::select! { _ = stop_rx.changed() => break 'playback, Some(prefetched) = prefetch_rx.recv() => { + if prefetched.generation != seek_generation { + continue; + } if prefetched.frame_number == frame_number { found_frame = Some(prefetched); break; @@ -636,6 +675,11 @@ impl Playback { tokio::time::timeout(frame_fetch_timeout, prefetch_rx.recv()).await; if let Ok(Some(prefetched)) = wait_result { + if prefetched.generation != seek_generation { + frame_number = frame_number.saturating_add(1); + total_frames_skipped += 1; + continue; + } if prefetched.frame_number == frame_number { Some(( Arc::new(prefetched.segment_frames), From 3a9646cc4267e1dbd4f8e7cda25b1ec8a9601279 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 22:05:22 +0000 Subject: [PATCH 079/333] improve: clear prefetched buffer immediately on live seek Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 7 +++++++ crates/editor/src/playback.rs | 4 ++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index c88dde9c05..6d7ff24f03 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -234,6 +234,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Seek events now advance generation and flush prefetch consumption to prevent old in-flight decode outputs from polluting post-seek playback. - Reduces redundant decode/render work during aggressive scrub and improves settle reliability. +18. **Flushed prefetched-frame buffer on seek generation changes (2026-02-13)** + - Live seek handling now clears prefetch buffer immediately on seek events. + - Prevents stale buffered frames from prior playback position from being reused after seek jumps. + - Reduces unnecessary post-seek frame scans and improves settle determinism. + --- ## Root Cause Analysis Archive @@ -340,6 +345,7 @@ Decoder Pipeline: 19. Added matrix bottleneck analysis script for prioritized FPS optimization follow-up. 20. Added baseline-vs-candidate comparison script to gate regressions in optimization loops. 21. Added seek-generation prefetch gating to drop stale decode outputs after live seek updates. +22. Cleared prefetched-frame buffer on live seek handling to avoid stale buffered frame reuse. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -362,6 +368,7 @@ Decoder Pipeline: - `scripts/analyze-playback-matrix-bottlenecks.js`: added prioritized bottleneck analysis output from matrix JSON evidence. - `scripts/compare-playback-benchmark-runs.js`: added regression-aware baseline/candidate comparison with configurable FPS/startup/scrub tolerances. - `crates/editor/src/playback.rs`: added seek-generation tagging for prefetched frames so stale in-flight decode results are ignored after seek generation advances. +- `crates/editor/src/playback.rs`: seek handling now clears prefetched frame buffer on generation changes to guarantee stale buffered frames are discarded immediately. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 7e9671554a..de686b71a5 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -516,7 +516,7 @@ impl Playback { playback_anchor_start = Instant::now(); playback_anchor_frame = seek_frame; pending_seek_observation = Some((seek_frame, Instant::now())); - prefetch_buffer.retain(|p| p.frame_number >= frame_number); + prefetch_buffer.clear(); frame_cache.cache.clear(); let _ = seek_generation_tx.send(seek_generation); let _ = frame_request_tx.send(frame_number); @@ -568,7 +568,7 @@ impl Playback { playback_anchor_start = Instant::now(); playback_anchor_frame = seek_frame; pending_seek_observation = Some((seek_frame, Instant::now())); - prefetch_buffer.retain(|p| p.frame_number >= frame_number); + prefetch_buffer.clear(); frame_cache.cache.clear(); let _ = seek_generation_tx.send(seek_generation); let _ = frame_request_tx.send(frame_number); From e03a491ffc04ccadd4328d9c22502e30598936ab Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:05:22 +0000 Subject: [PATCH 080/333] improve: clear prefetched buffer immediately on live seek Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 7 +++++++ crates/editor/src/playback.rs | 4 ++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index c88dde9c05..6d7ff24f03 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -234,6 +234,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Seek events now advance generation and flush prefetch consumption to prevent old in-flight decode outputs from polluting post-seek playback. - Reduces redundant decode/render work during aggressive scrub and improves settle reliability. +18. **Flushed prefetched-frame buffer on seek generation changes (2026-02-13)** + - Live seek handling now clears prefetch buffer immediately on seek events. + - Prevents stale buffered frames from prior playback position from being reused after seek jumps. + - Reduces unnecessary post-seek frame scans and improves settle determinism. + --- ## Root Cause Analysis Archive @@ -340,6 +345,7 @@ Decoder Pipeline: 19. Added matrix bottleneck analysis script for prioritized FPS optimization follow-up. 20. Added baseline-vs-candidate comparison script to gate regressions in optimization loops. 21. Added seek-generation prefetch gating to drop stale decode outputs after live seek updates. +22. Cleared prefetched-frame buffer on live seek handling to avoid stale buffered frame reuse. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -362,6 +368,7 @@ Decoder Pipeline: - `scripts/analyze-playback-matrix-bottlenecks.js`: added prioritized bottleneck analysis output from matrix JSON evidence. - `scripts/compare-playback-benchmark-runs.js`: added regression-aware baseline/candidate comparison with configurable FPS/startup/scrub tolerances. - `crates/editor/src/playback.rs`: added seek-generation tagging for prefetched frames so stale in-flight decode results are ignored after seek generation advances. +- `crates/editor/src/playback.rs`: seek handling now clears prefetched frame buffer on generation changes to guarantee stale buffered frames are discarded immediately. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 7e9671554a..de686b71a5 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -516,7 +516,7 @@ impl Playback { playback_anchor_start = Instant::now(); playback_anchor_frame = seek_frame; pending_seek_observation = Some((seek_frame, Instant::now())); - prefetch_buffer.retain(|p| p.frame_number >= frame_number); + prefetch_buffer.clear(); frame_cache.cache.clear(); let _ = seek_generation_tx.send(seek_generation); let _ = frame_request_tx.send(frame_number); @@ -568,7 +568,7 @@ impl Playback { playback_anchor_start = Instant::now(); playback_anchor_frame = seek_frame; pending_seek_observation = Some((seek_frame, Instant::now())); - prefetch_buffer.retain(|p| p.frame_number >= frame_number); + prefetch_buffer.clear(); frame_cache.cache.clear(); let _ = seek_generation_tx.send(seek_generation); let _ = frame_request_tx.send(frame_number); From 3587b4c22c7f0d1d2a2b0d3b4f77bd76a5a1874c Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 22:06:13 +0000 Subject: [PATCH 081/333] improve: avoid buffering stale prefetched frames behind playhead Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 7 +++++++ crates/editor/src/playback.rs | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 6d7ff24f03..a87c66dfc6 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -239,6 +239,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Prevents stale buffered frames from prior playback position from being reused after seek jumps. - Reduces unnecessary post-seek frame scans and improves settle determinism. +19. **Tightened in-flight prefetch buffering to current playhead (2026-02-13)** + - In-flight wait path now buffers only frames at or ahead of current frame. + - Avoids re-queueing older frames from initial start position baseline. + - Reduces avoidable prefetch buffer churn during late playback and aggressive seek scenarios. + --- ## Root Cause Analysis Archive @@ -346,6 +351,7 @@ Decoder Pipeline: 20. Added baseline-vs-candidate comparison script to gate regressions in optimization loops. 21. Added seek-generation prefetch gating to drop stale decode outputs after live seek updates. 22. Cleared prefetched-frame buffer on live seek handling to avoid stale buffered frame reuse. +23. Restricted in-flight prefetch buffering to current frame or newer frames during frame wait path. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -369,6 +375,7 @@ Decoder Pipeline: - `scripts/compare-playback-benchmark-runs.js`: added regression-aware baseline/candidate comparison with configurable FPS/startup/scrub tolerances. - `crates/editor/src/playback.rs`: added seek-generation tagging for prefetched frames so stale in-flight decode results are ignored after seek generation advances. - `crates/editor/src/playback.rs`: seek handling now clears prefetched frame buffer on generation changes to guarantee stale buffered frames are discarded immediately. +- `crates/editor/src/playback.rs`: in-flight prefetch wait path now only buffers frames at or ahead of current frame to reduce stale buffer accumulation. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index de686b71a5..ee294b267e 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -631,7 +631,7 @@ impl Playback { if prefetched.frame_number == frame_number { found_frame = Some(prefetched); break; - } else if prefetched.frame_number >= self.start_frame_number { + } else if prefetched.frame_number >= frame_number { prefetch_buffer.push_back(prefetched); } } From 5bf8c9f9ee1f60350a538edd62cdaed945a525ba Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:06:13 +0000 Subject: [PATCH 082/333] improve: avoid buffering stale prefetched frames behind playhead Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 7 +++++++ crates/editor/src/playback.rs | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 6d7ff24f03..a87c66dfc6 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -239,6 +239,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Prevents stale buffered frames from prior playback position from being reused after seek jumps. - Reduces unnecessary post-seek frame scans and improves settle determinism. +19. **Tightened in-flight prefetch buffering to current playhead (2026-02-13)** + - In-flight wait path now buffers only frames at or ahead of current frame. + - Avoids re-queueing older frames from initial start position baseline. + - Reduces avoidable prefetch buffer churn during late playback and aggressive seek scenarios. + --- ## Root Cause Analysis Archive @@ -346,6 +351,7 @@ Decoder Pipeline: 20. Added baseline-vs-candidate comparison script to gate regressions in optimization loops. 21. Added seek-generation prefetch gating to drop stale decode outputs after live seek updates. 22. Cleared prefetched-frame buffer on live seek handling to avoid stale buffered frame reuse. +23. Restricted in-flight prefetch buffering to current frame or newer frames during frame wait path. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -369,6 +375,7 @@ Decoder Pipeline: - `scripts/compare-playback-benchmark-runs.js`: added regression-aware baseline/candidate comparison with configurable FPS/startup/scrub tolerances. - `crates/editor/src/playback.rs`: added seek-generation tagging for prefetched frames so stale in-flight decode results are ignored after seek generation advances. - `crates/editor/src/playback.rs`: seek handling now clears prefetched frame buffer on generation changes to guarantee stale buffered frames are discarded immediately. +- `crates/editor/src/playback.rs`: in-flight prefetch wait path now only buffers frames at or ahead of current frame to reduce stale buffer accumulation. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index de686b71a5..ee294b267e 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -631,7 +631,7 @@ impl Playback { if prefetched.frame_number == frame_number { found_frame = Some(prefetched); break; - } else if prefetched.frame_number >= self.start_frame_number { + } else if prefetched.frame_number >= frame_number { prefetch_buffer.push_back(prefetched); } } From 94e4e79ca1257450e9817c2a68c1897800f8363b Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 22:07:26 +0000 Subject: [PATCH 083/333] improve: support multi-input benchmark comparison gating Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 7 +++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 3 +++ scripts/compare-playback-benchmark-runs.js | 31 +++++++++++++++------- 4 files changed, 32 insertions(+), 10 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 62434b52b2..c7c5716cb9 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -89,6 +89,7 @@ node scripts/analyze-playback-matrix-bottlenecks.js --input /path/to/json-result # Compare candidate against baseline and flag regressions node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-comparison.md --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5 +node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results-a --baseline /path/to/baseline-results-b --candidate /path/to/candidate-results-a --candidate /path/to/candidate-results-b --output /tmp/playback-comparison.md ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index a87c66dfc6..c93dcaf164 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -244,6 +244,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Avoids re-queueing older frames from initial start position baseline. - Reduces avoidable prefetch buffer churn during late playback and aggressive seek scenarios. +20. **Expanded comparison gating for multi-run matrix diffs (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` now supports multiple baseline and candidate inputs. + - Enables aggregate regression gating across batched machine runs instead of one directory at a time. + - Improves reliability of continuous optimization loops when matrix outputs are split across multiple sources. + --- ## Root Cause Analysis Archive @@ -352,6 +357,7 @@ Decoder Pipeline: 21. Added seek-generation prefetch gating to drop stale decode outputs after live seek updates. 22. Cleared prefetched-frame buffer on live seek handling to avoid stale buffered frame reuse. 23. Restricted in-flight prefetch buffering to current frame or newer frames during frame wait path. +24. Expanded benchmark comparison gating to support multi-input baseline/candidate matrix sets. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -376,6 +382,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: added seek-generation tagging for prefetched frames so stale in-flight decode results are ignored after seek generation advances. - `crates/editor/src/playback.rs`: seek handling now clears prefetched frame buffer on generation changes to guarantee stale buffered frames are discarded immediately. - `crates/editor/src/playback.rs`: in-flight prefetch wait path now only buffers frames at or ahead of current frame to reduce stale buffer accumulation. +- `scripts/compare-playback-benchmark-runs.js`: comparison gating now accepts multiple baseline and candidate inputs for aggregated matrix regression checks. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 069f2f0a53..132d1bf440 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -146,6 +146,9 @@ Compare candidate run against baseline and fail on regressions: ```bash pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-matrix-final/playback-comparison.md --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5 + +# multiple baseline/candidate directories can be provided +pnpm bench:playback:compare -- --baseline /path/to/baseline-a --baseline /path/to/baseline-b --candidate /path/to/candidate-a --candidate /path/to/candidate-b --output /tmp/playback-matrix-final/playback-comparison.md ``` ## Evidence checklist diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index a7a7eaca22..73fe67253b 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -5,8 +5,8 @@ import path from "node:path"; function parseArgs(argv) { const options = { - baseline: null, - candidate: null, + baselineInputs: [], + candidateInputs: [], output: null, allowFpsDrop: 2, allowStartupIncreaseMs: 25, @@ -21,11 +21,15 @@ function parseArgs(argv) { continue; } if (arg === "--baseline") { - options.baseline = path.resolve(argv[++i] ?? ""); + const value = argv[++i]; + if (!value) throw new Error("Missing value for --baseline"); + options.baselineInputs.push(path.resolve(value)); continue; } if (arg === "--candidate") { - options.candidate = path.resolve(argv[++i] ?? ""); + const value = argv[++i]; + if (!value) throw new Error("Missing value for --candidate"); + options.candidateInputs.push(path.resolve(value)); continue; } if (arg === "--output" || arg === "-o") { @@ -63,9 +67,9 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline --candidate [--output ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] + console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline [--baseline ...] --candidate [--candidate ...] [--output ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] -Compares baseline and candidate playback matrix JSON outputs and flags regressions.`); +Compares baseline and candidate playback matrix JSON outputs and flags regressions. Multiple --baseline and --candidate inputs are supported.`); } function collectJsonFiles(targetPath) { @@ -235,12 +239,19 @@ function main() { usage(); return; } - if (!options.baseline || !options.candidate) { - throw new Error("--baseline and --candidate are required"); + if ( + options.baselineInputs.length === 0 || + options.candidateInputs.length === 0 + ) { + throw new Error("At least one --baseline and one --candidate are required"); } - const baselineFiles = collectJsonFiles(options.baseline); - const candidateFiles = collectJsonFiles(options.candidate); + const baselineFiles = [ + ...new Set(options.baselineInputs.flatMap(collectJsonFiles)), + ]; + const candidateFiles = [ + ...new Set(options.candidateInputs.flatMap(collectJsonFiles)), + ]; if (baselineFiles.length === 0) { throw new Error("No baseline JSON files found"); } From 5e00fcc8a425e781c14baf68ad57e3f5646cbb04 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:07:26 +0000 Subject: [PATCH 084/333] improve: support multi-input benchmark comparison gating Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 7 +++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 3 +++ scripts/compare-playback-benchmark-runs.js | 31 +++++++++++++++------- 4 files changed, 32 insertions(+), 10 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 62434b52b2..c7c5716cb9 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -89,6 +89,7 @@ node scripts/analyze-playback-matrix-bottlenecks.js --input /path/to/json-result # Compare candidate against baseline and flag regressions node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-comparison.md --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5 +node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results-a --baseline /path/to/baseline-results-b --candidate /path/to/candidate-results-a --candidate /path/to/candidate-results-b --output /tmp/playback-comparison.md ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index a87c66dfc6..c93dcaf164 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -244,6 +244,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Avoids re-queueing older frames from initial start position baseline. - Reduces avoidable prefetch buffer churn during late playback and aggressive seek scenarios. +20. **Expanded comparison gating for multi-run matrix diffs (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` now supports multiple baseline and candidate inputs. + - Enables aggregate regression gating across batched machine runs instead of one directory at a time. + - Improves reliability of continuous optimization loops when matrix outputs are split across multiple sources. + --- ## Root Cause Analysis Archive @@ -352,6 +357,7 @@ Decoder Pipeline: 21. Added seek-generation prefetch gating to drop stale decode outputs after live seek updates. 22. Cleared prefetched-frame buffer on live seek handling to avoid stale buffered frame reuse. 23. Restricted in-flight prefetch buffering to current frame or newer frames during frame wait path. +24. Expanded benchmark comparison gating to support multi-input baseline/candidate matrix sets. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -376,6 +382,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: added seek-generation tagging for prefetched frames so stale in-flight decode results are ignored after seek generation advances. - `crates/editor/src/playback.rs`: seek handling now clears prefetched frame buffer on generation changes to guarantee stale buffered frames are discarded immediately. - `crates/editor/src/playback.rs`: in-flight prefetch wait path now only buffers frames at or ahead of current frame to reduce stale buffer accumulation. +- `scripts/compare-playback-benchmark-runs.js`: comparison gating now accepts multiple baseline and candidate inputs for aggregated matrix regression checks. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 069f2f0a53..132d1bf440 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -146,6 +146,9 @@ Compare candidate run against baseline and fail on regressions: ```bash pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-matrix-final/playback-comparison.md --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5 + +# multiple baseline/candidate directories can be provided +pnpm bench:playback:compare -- --baseline /path/to/baseline-a --baseline /path/to/baseline-b --candidate /path/to/candidate-a --candidate /path/to/candidate-b --output /tmp/playback-matrix-final/playback-comparison.md ``` ## Evidence checklist diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index a7a7eaca22..73fe67253b 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -5,8 +5,8 @@ import path from "node:path"; function parseArgs(argv) { const options = { - baseline: null, - candidate: null, + baselineInputs: [], + candidateInputs: [], output: null, allowFpsDrop: 2, allowStartupIncreaseMs: 25, @@ -21,11 +21,15 @@ function parseArgs(argv) { continue; } if (arg === "--baseline") { - options.baseline = path.resolve(argv[++i] ?? ""); + const value = argv[++i]; + if (!value) throw new Error("Missing value for --baseline"); + options.baselineInputs.push(path.resolve(value)); continue; } if (arg === "--candidate") { - options.candidate = path.resolve(argv[++i] ?? ""); + const value = argv[++i]; + if (!value) throw new Error("Missing value for --candidate"); + options.candidateInputs.push(path.resolve(value)); continue; } if (arg === "--output" || arg === "-o") { @@ -63,9 +67,9 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline --candidate [--output ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] + console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline [--baseline ...] --candidate [--candidate ...] [--output ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] -Compares baseline and candidate playback matrix JSON outputs and flags regressions.`); +Compares baseline and candidate playback matrix JSON outputs and flags regressions. Multiple --baseline and --candidate inputs are supported.`); } function collectJsonFiles(targetPath) { @@ -235,12 +239,19 @@ function main() { usage(); return; } - if (!options.baseline || !options.candidate) { - throw new Error("--baseline and --candidate are required"); + if ( + options.baselineInputs.length === 0 || + options.candidateInputs.length === 0 + ) { + throw new Error("At least one --baseline and one --candidate are required"); } - const baselineFiles = collectJsonFiles(options.baseline); - const candidateFiles = collectJsonFiles(options.candidate); + const baselineFiles = [ + ...new Set(options.baselineInputs.flatMap(collectJsonFiles)), + ]; + const candidateFiles = [ + ...new Set(options.candidateInputs.flatMap(collectJsonFiles)), + ]; if (baselineFiles.length === 0) { throw new Error("No baseline JSON files found"); } From 3b8a8fdb97845d2659c7770255a885b0ca1aefbc Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 22:08:47 +0000 Subject: [PATCH 085/333] improve: add baseline compare gate to matrix finalization Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 7 ++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 3 + scripts/finalize-playback-matrix.js | 82 ++++++++++++++++++++++-- 4 files changed, 87 insertions(+), 6 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index c7c5716cb9..bbd376d28d 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -78,6 +78,7 @@ node scripts/validate-playback-matrix.js --input /path/to/json-results --require # Finalize aggregate + status + validation artifacts node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 +node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5 node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 --publish-target /workspace/crates/editor/PLAYBACK-BENCHMARKS.md # Publish matrix artifacts into this benchmark history diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index c93dcaf164..c0a7f5f8ab 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -249,6 +249,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Enables aggregate regression gating across batched machine runs instead of one directory at a time. - Improves reliability of continuous optimization loops when matrix outputs are split across multiple sources. +21. **Added finalization-integrated regression gate support (2026-02-13)** + - `scripts/finalize-playback-matrix.js` now supports `--compare-baseline` and threshold args. + - Finalization can now produce aggregate/status/validation/bottleneck artifacts and run baseline-vs-candidate gating in one command. + - Keeps optimization loops strict by failing finalize runs when regression tolerances are exceeded. + --- ## Root Cause Analysis Archive @@ -358,6 +363,7 @@ Decoder Pipeline: 22. Cleared prefetched-frame buffer on live seek handling to avoid stale buffered frame reuse. 23. Restricted in-flight prefetch buffering to current frame or newer frames during frame wait path. 24. Expanded benchmark comparison gating to support multi-input baseline/candidate matrix sets. +25. Added optional baseline comparison gating inside matrix finalization workflow. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -383,6 +389,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: seek handling now clears prefetched frame buffer on generation changes to guarantee stale buffered frames are discarded immediately. - `crates/editor/src/playback.rs`: in-flight prefetch wait path now only buffers frames at or ahead of current frame to reduce stale buffer accumulation. - `scripts/compare-playback-benchmark-runs.js`: comparison gating now accepts multiple baseline and candidate inputs for aggregated matrix regression checks. +- `scripts/finalize-playback-matrix.js`: finalization now supports optional baseline comparison gating and threshold controls in the same pass. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 132d1bf440..736ff538d5 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -117,6 +117,9 @@ Include optimization thresholds when finalizing: ```bash pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 + +# include baseline comparison gate during finalization +pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5 ``` Finalize and publish to benchmark history in one command: diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index d542d6f1b5..257e4c11c6 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -14,6 +14,10 @@ function parseArgs(argv) { maxStartupMs: 250, analyze: true, publishTarget: null, + compareBaselineInputs: [], + allowFpsDrop: 2, + allowStartupIncreaseMs: 25, + allowScrubP95IncreaseMs: 5, }; for (let i = 2; i < argv.length; i++) { @@ -77,6 +81,36 @@ function parseArgs(argv) { options.publishTarget = path.resolve(value); continue; } + if (arg === "--compare-baseline") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --compare-baseline"); + options.compareBaselineInputs.push(path.resolve(value)); + continue; + } + if (arg === "--allow-fps-drop") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value < 0) { + throw new Error("Invalid --allow-fps-drop value"); + } + options.allowFpsDrop = value; + continue; + } + if (arg === "--allow-startup-increase-ms") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value < 0) { + throw new Error("Invalid --allow-startup-increase-ms value"); + } + options.allowStartupIncreaseMs = value; + continue; + } + if (arg === "--allow-scrub-p95-increase-ms") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value < 0) { + throw new Error("Invalid --allow-scrub-p95-increase-ms value"); + } + options.allowScrubP95IncreaseMs = value; + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -84,9 +118,9 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--publish-target ] + console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--compare-baseline ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--publish-target ] -Generates aggregate markdown, status markdown, validation JSON, and bottleneck analysis for collected playback matrix outputs.`); +Generates aggregate markdown, status markdown, validation JSON, and bottleneck analysis for collected playback matrix outputs. Optionally compares candidate inputs against baseline inputs and fails on regressions.`); } function run(command, args) { @@ -112,11 +146,24 @@ function main() { fs.mkdirSync(options.outputDir, { recursive: true }); } - const aggregatePath = path.join(options.outputDir, "playback-benchmark-aggregate.md"); + const aggregatePath = path.join( + options.outputDir, + "playback-benchmark-aggregate.md", + ); const statusPath = path.join(options.outputDir, "playback-matrix-status.md"); - const validationPath = path.join(options.outputDir, "playback-matrix-validation.json"); - const bottleneckPath = path.join(options.outputDir, "playback-bottlenecks.md"); - const bottleneckJsonPath = path.join(options.outputDir, "playback-bottlenecks.json"); + const validationPath = path.join( + options.outputDir, + "playback-matrix-validation.json", + ); + const bottleneckPath = path.join( + options.outputDir, + "playback-bottlenecks.md", + ); + const bottleneckJsonPath = path.join( + options.outputDir, + "playback-bottlenecks.json", + ); + const comparisonPath = path.join(options.outputDir, "playback-comparison.md"); const aggregateArgs = ["scripts/aggregate-playback-benchmarks.js"]; const statusArgs = ["scripts/build-playback-matrix-report.js"]; @@ -177,6 +224,26 @@ function main() { } run("node", publishArgs); } + if (options.compareBaselineInputs.length > 0) { + const compareArgs = ["scripts/compare-playback-benchmark-runs.js"]; + for (const baselineInput of options.compareBaselineInputs) { + compareArgs.push("--baseline", baselineInput); + } + for (const candidateInput of options.inputs) { + compareArgs.push("--candidate", candidateInput); + } + compareArgs.push( + "--output", + comparisonPath, + "--allow-fps-drop", + String(options.allowFpsDrop), + "--allow-startup-increase-ms", + String(options.allowStartupIncreaseMs), + "--allow-scrub-p95-increase-ms", + String(options.allowScrubP95IncreaseMs), + ); + run("node", compareArgs); + } console.log(`Aggregate markdown: ${aggregatePath}`); console.log(`Status markdown: ${statusPath}`); @@ -188,6 +255,9 @@ function main() { if (options.publishTarget) { console.log(`Published target: ${options.publishTarget}`); } + if (options.compareBaselineInputs.length > 0) { + console.log(`Comparison report: ${comparisonPath}`); + } } try { From 083b1f394eb1e71d27b76c7ede562f126dc970e5 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:08:47 +0000 Subject: [PATCH 086/333] improve: add baseline compare gate to matrix finalization Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 7 ++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 3 + scripts/finalize-playback-matrix.js | 82 ++++++++++++++++++++++-- 4 files changed, 87 insertions(+), 6 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index c7c5716cb9..bbd376d28d 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -78,6 +78,7 @@ node scripts/validate-playback-matrix.js --input /path/to/json-results --require # Finalize aggregate + status + validation artifacts node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 +node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5 node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 --publish-target /workspace/crates/editor/PLAYBACK-BENCHMARKS.md # Publish matrix artifacts into this benchmark history diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index c93dcaf164..c0a7f5f8ab 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -249,6 +249,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Enables aggregate regression gating across batched machine runs instead of one directory at a time. - Improves reliability of continuous optimization loops when matrix outputs are split across multiple sources. +21. **Added finalization-integrated regression gate support (2026-02-13)** + - `scripts/finalize-playback-matrix.js` now supports `--compare-baseline` and threshold args. + - Finalization can now produce aggregate/status/validation/bottleneck artifacts and run baseline-vs-candidate gating in one command. + - Keeps optimization loops strict by failing finalize runs when regression tolerances are exceeded. + --- ## Root Cause Analysis Archive @@ -358,6 +363,7 @@ Decoder Pipeline: 22. Cleared prefetched-frame buffer on live seek handling to avoid stale buffered frame reuse. 23. Restricted in-flight prefetch buffering to current frame or newer frames during frame wait path. 24. Expanded benchmark comparison gating to support multi-input baseline/candidate matrix sets. +25. Added optional baseline comparison gating inside matrix finalization workflow. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -383,6 +389,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: seek handling now clears prefetched frame buffer on generation changes to guarantee stale buffered frames are discarded immediately. - `crates/editor/src/playback.rs`: in-flight prefetch wait path now only buffers frames at or ahead of current frame to reduce stale buffer accumulation. - `scripts/compare-playback-benchmark-runs.js`: comparison gating now accepts multiple baseline and candidate inputs for aggregated matrix regression checks. +- `scripts/finalize-playback-matrix.js`: finalization now supports optional baseline comparison gating and threshold controls in the same pass. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 132d1bf440..736ff538d5 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -117,6 +117,9 @@ Include optimization thresholds when finalizing: ```bash pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 + +# include baseline comparison gate during finalization +pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5 ``` Finalize and publish to benchmark history in one command: diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index d542d6f1b5..257e4c11c6 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -14,6 +14,10 @@ function parseArgs(argv) { maxStartupMs: 250, analyze: true, publishTarget: null, + compareBaselineInputs: [], + allowFpsDrop: 2, + allowStartupIncreaseMs: 25, + allowScrubP95IncreaseMs: 5, }; for (let i = 2; i < argv.length; i++) { @@ -77,6 +81,36 @@ function parseArgs(argv) { options.publishTarget = path.resolve(value); continue; } + if (arg === "--compare-baseline") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --compare-baseline"); + options.compareBaselineInputs.push(path.resolve(value)); + continue; + } + if (arg === "--allow-fps-drop") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value < 0) { + throw new Error("Invalid --allow-fps-drop value"); + } + options.allowFpsDrop = value; + continue; + } + if (arg === "--allow-startup-increase-ms") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value < 0) { + throw new Error("Invalid --allow-startup-increase-ms value"); + } + options.allowStartupIncreaseMs = value; + continue; + } + if (arg === "--allow-scrub-p95-increase-ms") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value < 0) { + throw new Error("Invalid --allow-scrub-p95-increase-ms value"); + } + options.allowScrubP95IncreaseMs = value; + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -84,9 +118,9 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--publish-target ] + console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--compare-baseline ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--publish-target ] -Generates aggregate markdown, status markdown, validation JSON, and bottleneck analysis for collected playback matrix outputs.`); +Generates aggregate markdown, status markdown, validation JSON, and bottleneck analysis for collected playback matrix outputs. Optionally compares candidate inputs against baseline inputs and fails on regressions.`); } function run(command, args) { @@ -112,11 +146,24 @@ function main() { fs.mkdirSync(options.outputDir, { recursive: true }); } - const aggregatePath = path.join(options.outputDir, "playback-benchmark-aggregate.md"); + const aggregatePath = path.join( + options.outputDir, + "playback-benchmark-aggregate.md", + ); const statusPath = path.join(options.outputDir, "playback-matrix-status.md"); - const validationPath = path.join(options.outputDir, "playback-matrix-validation.json"); - const bottleneckPath = path.join(options.outputDir, "playback-bottlenecks.md"); - const bottleneckJsonPath = path.join(options.outputDir, "playback-bottlenecks.json"); + const validationPath = path.join( + options.outputDir, + "playback-matrix-validation.json", + ); + const bottleneckPath = path.join( + options.outputDir, + "playback-bottlenecks.md", + ); + const bottleneckJsonPath = path.join( + options.outputDir, + "playback-bottlenecks.json", + ); + const comparisonPath = path.join(options.outputDir, "playback-comparison.md"); const aggregateArgs = ["scripts/aggregate-playback-benchmarks.js"]; const statusArgs = ["scripts/build-playback-matrix-report.js"]; @@ -177,6 +224,26 @@ function main() { } run("node", publishArgs); } + if (options.compareBaselineInputs.length > 0) { + const compareArgs = ["scripts/compare-playback-benchmark-runs.js"]; + for (const baselineInput of options.compareBaselineInputs) { + compareArgs.push("--baseline", baselineInput); + } + for (const candidateInput of options.inputs) { + compareArgs.push("--candidate", candidateInput); + } + compareArgs.push( + "--output", + comparisonPath, + "--allow-fps-drop", + String(options.allowFpsDrop), + "--allow-startup-increase-ms", + String(options.allowStartupIncreaseMs), + "--allow-scrub-p95-increase-ms", + String(options.allowScrubP95IncreaseMs), + ); + run("node", compareArgs); + } console.log(`Aggregate markdown: ${aggregatePath}`); console.log(`Status markdown: ${statusPath}`); @@ -188,6 +255,9 @@ function main() { if (options.publishTarget) { console.log(`Published target: ${options.publishTarget}`); } + if (options.compareBaselineInputs.length > 0) { + console.log(`Comparison report: ${comparisonPath}`); + } } try { From d381053831a6aa2010dd8307247df1ad8ed3e999 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 22:12:03 +0000 Subject: [PATCH 087/333] improve: key in-flight frame tracking by seek generation Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 7 +++++++ crates/editor/src/playback.rs | 26 ++++++++++++++------------ 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index c0a7f5f8ab..ed07b623b7 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -254,6 +254,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Finalization can now produce aggregate/status/validation/bottleneck artifacts and run baseline-vs-candidate gating in one command. - Keeps optimization loops strict by failing finalize runs when regression tolerances are exceeded. +22. **Made in-flight tracking generation-aware to avoid seek races (2026-02-13)** + - Shared in-flight frame tracking now keys entries by `(seek_generation, frame_number)`. + - Prevents old-generation decode completions from removing new-generation in-flight markers for the same frame number. + - Improves seek correctness under rapid repeated seeks to nearby frame ranges. + --- ## Root Cause Analysis Archive @@ -364,6 +369,7 @@ Decoder Pipeline: 23. Restricted in-flight prefetch buffering to current frame or newer frames during frame wait path. 24. Expanded benchmark comparison gating to support multi-input baseline/candidate matrix sets. 25. Added optional baseline comparison gating inside matrix finalization workflow. +26. Made shared in-flight frame tracking generation-aware to prevent cross-seek marker collisions. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -390,6 +396,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: in-flight prefetch wait path now only buffers frames at or ahead of current frame to reduce stale buffer accumulation. - `scripts/compare-playback-benchmark-runs.js`: comparison gating now accepts multiple baseline and candidate inputs for aggregated matrix regression checks. - `scripts/finalize-playback-matrix.js`: finalization now supports optional baseline comparison gating and threshold controls in the same pass. +- `crates/editor/src/playback.rs`: in-flight frame markers now include seek generation to prevent old decode paths from clearing current-generation markers. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index ee294b267e..10963c31f2 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -135,7 +135,8 @@ impl Playback { let (seek_generation_tx, mut seek_generation_rx) = watch::channel(0u64); seek_generation_rx.borrow_and_update(); - let in_flight_frames: Arc>> = Arc::new(RwLock::new(HashSet::new())); + let in_flight_frames: Arc>> = + Arc::new(RwLock::new(HashSet::new())); let prefetch_in_flight = in_flight_frames.clone(); let main_in_flight = in_flight_frames; @@ -261,7 +262,7 @@ impl Playback { let already_in_flight = prefetch_in_flight .read() - .map(|guard| guard.contains(&frame_num)) + .map(|guard| guard.contains(&(active_generation, frame_num))) .unwrap_or(false); if already_in_flight { next_prefetch_frame += 1; @@ -287,7 +288,7 @@ impl Playback { let generation = active_generation; if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { - in_flight_guard.insert(frame_num); + in_flight_guard.insert((generation, frame_num)); } in_flight.push(Box::pin(async move { @@ -328,7 +329,7 @@ impl Playback { let already_in_flight = prefetch_in_flight .read() - .map(|guard| guard.contains(&behind_frame)) + .map(|guard| guard.contains(&(active_generation, behind_frame))) .unwrap_or(false); if already_in_flight { continue; @@ -352,7 +353,7 @@ impl Playback { let generation = active_generation; if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { - in_flight_guard.insert(behind_frame); + in_flight_guard.insert((generation, behind_frame)); } prefetched_behind.insert(behind_frame); @@ -371,7 +372,7 @@ impl Playback { Some((frame_num, segment_index, generation, result)) = in_flight.next() => { if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { - in_flight_guard.remove(&frame_num); + in_flight_guard.remove(&(generation, frame_num)); } if generation != active_generation { @@ -613,7 +614,7 @@ impl Playback { } else { let is_in_flight = main_in_flight .read() - .map(|guard| guard.contains(&frame_number)) + .map(|guard| guard.contains(&(seek_generation, frame_number))) .unwrap_or(false); if is_in_flight { @@ -638,7 +639,7 @@ impl Playback { _ = tokio::time::sleep(in_flight_poll_interval) => { let still_in_flight = main_in_flight .read() - .map(|guard| guard.contains(&frame_number)) + .map(|guard| guard.contains(&(seek_generation, frame_number))) .unwrap_or(false); if !still_in_flight { break; @@ -717,21 +718,22 @@ impl Playback { .map(|v| v.offsets) .unwrap_or_default(); + let in_flight_key = (seek_generation, frame_number); if let Ok(mut guard) = main_in_flight.write() { - guard.insert(frame_number); + guard.insert(in_flight_key); } let max_wait = frame_fetch_timeout; let data = tokio::select! { _ = stop_rx.changed() => { if let Ok(mut guard) = main_in_flight.write() { - guard.remove(&frame_number); + guard.remove(&in_flight_key); } break 'playback }, _ = tokio::time::sleep(max_wait) => { if let Ok(mut guard) = main_in_flight.write() { - guard.remove(&frame_number); + guard.remove(&in_flight_key); } frame_number = frame_number.saturating_add(1); total_frames_skipped += 1; @@ -741,7 +743,7 @@ impl Playback { .decoders .get_frames(segment_time as f32, !cached_project.camera.hide, clip_offsets) => { if let Ok(mut guard) = main_in_flight.write() { - guard.remove(&frame_number); + guard.remove(&in_flight_key); } data }, From a6e5ea5c1b10efe6deeb3bbc0cc662f1f2556020 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:12:03 +0000 Subject: [PATCH 088/333] improve: key in-flight frame tracking by seek generation Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 7 +++++++ crates/editor/src/playback.rs | 26 ++++++++++++++------------ 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index c0a7f5f8ab..ed07b623b7 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -254,6 +254,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Finalization can now produce aggregate/status/validation/bottleneck artifacts and run baseline-vs-candidate gating in one command. - Keeps optimization loops strict by failing finalize runs when regression tolerances are exceeded. +22. **Made in-flight tracking generation-aware to avoid seek races (2026-02-13)** + - Shared in-flight frame tracking now keys entries by `(seek_generation, frame_number)`. + - Prevents old-generation decode completions from removing new-generation in-flight markers for the same frame number. + - Improves seek correctness under rapid repeated seeks to nearby frame ranges. + --- ## Root Cause Analysis Archive @@ -364,6 +369,7 @@ Decoder Pipeline: 23. Restricted in-flight prefetch buffering to current frame or newer frames during frame wait path. 24. Expanded benchmark comparison gating to support multi-input baseline/candidate matrix sets. 25. Added optional baseline comparison gating inside matrix finalization workflow. +26. Made shared in-flight frame tracking generation-aware to prevent cross-seek marker collisions. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -390,6 +396,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: in-flight prefetch wait path now only buffers frames at or ahead of current frame to reduce stale buffer accumulation. - `scripts/compare-playback-benchmark-runs.js`: comparison gating now accepts multiple baseline and candidate inputs for aggregated matrix regression checks. - `scripts/finalize-playback-matrix.js`: finalization now supports optional baseline comparison gating and threshold controls in the same pass. +- `crates/editor/src/playback.rs`: in-flight frame markers now include seek generation to prevent old decode paths from clearing current-generation markers. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index ee294b267e..10963c31f2 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -135,7 +135,8 @@ impl Playback { let (seek_generation_tx, mut seek_generation_rx) = watch::channel(0u64); seek_generation_rx.borrow_and_update(); - let in_flight_frames: Arc>> = Arc::new(RwLock::new(HashSet::new())); + let in_flight_frames: Arc>> = + Arc::new(RwLock::new(HashSet::new())); let prefetch_in_flight = in_flight_frames.clone(); let main_in_flight = in_flight_frames; @@ -261,7 +262,7 @@ impl Playback { let already_in_flight = prefetch_in_flight .read() - .map(|guard| guard.contains(&frame_num)) + .map(|guard| guard.contains(&(active_generation, frame_num))) .unwrap_or(false); if already_in_flight { next_prefetch_frame += 1; @@ -287,7 +288,7 @@ impl Playback { let generation = active_generation; if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { - in_flight_guard.insert(frame_num); + in_flight_guard.insert((generation, frame_num)); } in_flight.push(Box::pin(async move { @@ -328,7 +329,7 @@ impl Playback { let already_in_flight = prefetch_in_flight .read() - .map(|guard| guard.contains(&behind_frame)) + .map(|guard| guard.contains(&(active_generation, behind_frame))) .unwrap_or(false); if already_in_flight { continue; @@ -352,7 +353,7 @@ impl Playback { let generation = active_generation; if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { - in_flight_guard.insert(behind_frame); + in_flight_guard.insert((generation, behind_frame)); } prefetched_behind.insert(behind_frame); @@ -371,7 +372,7 @@ impl Playback { Some((frame_num, segment_index, generation, result)) = in_flight.next() => { if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { - in_flight_guard.remove(&frame_num); + in_flight_guard.remove(&(generation, frame_num)); } if generation != active_generation { @@ -613,7 +614,7 @@ impl Playback { } else { let is_in_flight = main_in_flight .read() - .map(|guard| guard.contains(&frame_number)) + .map(|guard| guard.contains(&(seek_generation, frame_number))) .unwrap_or(false); if is_in_flight { @@ -638,7 +639,7 @@ impl Playback { _ = tokio::time::sleep(in_flight_poll_interval) => { let still_in_flight = main_in_flight .read() - .map(|guard| guard.contains(&frame_number)) + .map(|guard| guard.contains(&(seek_generation, frame_number))) .unwrap_or(false); if !still_in_flight { break; @@ -717,21 +718,22 @@ impl Playback { .map(|v| v.offsets) .unwrap_or_default(); + let in_flight_key = (seek_generation, frame_number); if let Ok(mut guard) = main_in_flight.write() { - guard.insert(frame_number); + guard.insert(in_flight_key); } let max_wait = frame_fetch_timeout; let data = tokio::select! { _ = stop_rx.changed() => { if let Ok(mut guard) = main_in_flight.write() { - guard.remove(&frame_number); + guard.remove(&in_flight_key); } break 'playback }, _ = tokio::time::sleep(max_wait) => { if let Ok(mut guard) = main_in_flight.write() { - guard.remove(&frame_number); + guard.remove(&in_flight_key); } frame_number = frame_number.saturating_add(1); total_frames_skipped += 1; @@ -741,7 +743,7 @@ impl Playback { .decoders .get_frames(segment_time as f32, !cached_project.camera.hide, clip_offsets) => { if let Ok(mut guard) = main_in_flight.write() { - guard.remove(&frame_number); + guard.remove(&in_flight_key); } data }, From cd95f80d4df4b471080451ed2ab89958d75c762b Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 22:13:46 +0000 Subject: [PATCH 089/333] improve: publish baseline comparison artifacts in matrix summaries Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 8 ++++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 6 ++++ scripts/finalize-playback-matrix.js | 3 ++ scripts/publish-playback-matrix-summary.js | 33 ++++++++++++++++++---- 5 files changed, 45 insertions(+), 6 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index bbd376d28d..94350ea549 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -83,6 +83,7 @@ node scripts/finalize-playback-matrix.js --input /path/to/json-results --output- # Publish matrix artifacts into this benchmark history node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --bottlenecks-md /tmp/playback-matrix-final/playback-bottlenecks.md +node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --comparison-md /tmp/playback-matrix-final/playback-comparison.md # Analyze bottlenecks from matrix results node scripts/analyze-playback-matrix-bottlenecks.js --input /path/to/json-results --output /tmp/playback-bottlenecks.md --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index ed07b623b7..ea6a0edf5c 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -259,6 +259,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Prevents old-generation decode completions from removing new-generation in-flight markers for the same frame number. - Improves seek correctness under rapid repeated seeks to nearby frame ranges. +23. **Added comparison artifact publishing in finalize workflows (2026-02-13)** + - `scripts/publish-playback-matrix-summary.js` now accepts optional `--comparison-md`. + - `scripts/finalize-playback-matrix.js` now forwards generated comparison artifact to publishing when both compare and publish options are enabled. + - Keeps benchmark history entries self-contained with regression gate evidence. + --- ## Root Cause Analysis Archive @@ -370,6 +375,7 @@ Decoder Pipeline: 24. Expanded benchmark comparison gating to support multi-input baseline/candidate matrix sets. 25. Added optional baseline comparison gating inside matrix finalization workflow. 26. Made shared in-flight frame tracking generation-aware to prevent cross-seek marker collisions. +27. Added comparison artifact attachment support in publish/finalize matrix summary workflows. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -397,6 +403,8 @@ Decoder Pipeline: - `scripts/compare-playback-benchmark-runs.js`: comparison gating now accepts multiple baseline and candidate inputs for aggregated matrix regression checks. - `scripts/finalize-playback-matrix.js`: finalization now supports optional baseline comparison gating and threshold controls in the same pass. - `crates/editor/src/playback.rs`: in-flight frame markers now include seek generation to prevent old decode paths from clearing current-generation markers. +- `scripts/publish-playback-matrix-summary.js`: publish flow now supports optional comparison artifact attachment. +- `scripts/finalize-playback-matrix.js`: finalize flow now includes comparison artifact when publishing and baseline comparison are both requested. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 736ff538d5..bba2e07b46 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -136,6 +136,12 @@ pnpm bench:playback:publish -- \ --status-md /tmp/playback-matrix-final/playback-matrix-status.md \ --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json \ --bottlenecks-md /tmp/playback-matrix-final/playback-bottlenecks.md + +pnpm bench:playback:publish -- \ + --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md \ + --status-md /tmp/playback-matrix-final/playback-matrix-status.md \ + --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json \ + --comparison-md /tmp/playback-matrix-final/playback-comparison.md ``` Generate bottleneck analysis for optimization backlog: diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index 257e4c11c6..30ebdf5792 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -222,6 +222,9 @@ function main() { if (options.analyze) { publishArgs.push("--bottlenecks-md", bottleneckPath); } + if (options.compareBaselineInputs.length > 0) { + publishArgs.push("--comparison-md", comparisonPath); + } run("node", publishArgs); } if (options.compareBaselineInputs.length > 0) { diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js index 45fb7fe107..fab62486f9 100644 --- a/scripts/publish-playback-matrix-summary.js +++ b/scripts/publish-playback-matrix-summary.js @@ -8,6 +8,7 @@ function parseArgs(argv) { aggregateMd: null, statusMd: null, bottlenecksMd: null, + comparisonMd: null, validationJson: null, target: path.resolve("crates/editor/PLAYBACK-BENCHMARKS.md"), }; @@ -31,6 +32,10 @@ function parseArgs(argv) { options.bottlenecksMd = path.resolve(argv[++i] ?? ""); continue; } + if (arg === "--comparison-md") { + options.comparisonMd = path.resolve(argv[++i] ?? ""); + continue; + } if (arg === "--validation-json") { options.validationJson = path.resolve(argv[++i] ?? ""); continue; @@ -46,7 +51,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/publish-playback-matrix-summary.js --aggregate-md --status-md --validation-json [--bottlenecks-md ] [--target ] + console.log(`Usage: node scripts/publish-playback-matrix-summary.js --aggregate-md --status-md --validation-json [--bottlenecks-md ] [--comparison-md ] [--target ] Prepends a matrix summary section into PLAYBACK-BENCHMARKS.md benchmark history region.`); } @@ -57,7 +62,13 @@ function ensureFile(filePath, label) { } } -function buildSummarySection(aggregateMd, statusMd, validationJson, bottlenecksMd) { +function buildSummarySection( + aggregateMd, + statusMd, + validationJson, + bottlenecksMd, + comparisonMd, +) { const now = new Date().toISOString(); const validation = JSON.parse(validationJson); const status = validation.passed ? "✅ MATRIX PASS" : "❌ MATRIX FAIL"; @@ -99,6 +110,12 @@ function buildSummarySection(aggregateMd, statusMd, validationJson, bottlenecksM markdown += `${bottlenecksMd.trim()}\n\n`; markdown += "\n\n"; } + if (comparisonMd) { + markdown += + "
\nBaseline vs Candidate Comparison\n\n"; + markdown += `${comparisonMd.trim()}\n\n`; + markdown += "
\n\n"; + } return markdown; } @@ -115,10 +132,7 @@ function writeToBenchmarkHistory(targetFile, summaryMd) { const insertPos = start + markerStart.length; const updated = - current.slice(0, insertPos) + - "\n\n" + - summaryMd + - current.slice(end); + current.slice(0, insertPos) + "\n\n" + summaryMd + current.slice(end); fs.writeFileSync(targetFile, updated, "utf8"); } @@ -135,6 +149,9 @@ function main() { if (options.bottlenecksMd) { ensureFile(options.bottlenecksMd, "Bottlenecks markdown"); } + if (options.comparisonMd) { + ensureFile(options.comparisonMd, "Comparison markdown"); + } ensureFile(options.target, "Target"); const aggregateMd = fs.readFileSync(options.aggregateMd, "utf8"); @@ -143,11 +160,15 @@ function main() { const bottlenecksMd = options.bottlenecksMd ? fs.readFileSync(options.bottlenecksMd, "utf8") : null; + const comparisonMd = options.comparisonMd + ? fs.readFileSync(options.comparisonMd, "utf8") + : null; const summaryMd = buildSummarySection( aggregateMd, statusMd, validationJson, bottlenecksMd, + comparisonMd, ); writeToBenchmarkHistory(options.target, summaryMd); console.log(`Published matrix summary into ${options.target}`); From f08bae9c861f8fc56ca6191207fc3afd3ac8b9a0 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:13:46 +0000 Subject: [PATCH 090/333] improve: publish baseline comparison artifacts in matrix summaries Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 8 ++++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 6 ++++ scripts/finalize-playback-matrix.js | 3 ++ scripts/publish-playback-matrix-summary.js | 33 ++++++++++++++++++---- 5 files changed, 45 insertions(+), 6 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index bbd376d28d..94350ea549 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -83,6 +83,7 @@ node scripts/finalize-playback-matrix.js --input /path/to/json-results --output- # Publish matrix artifacts into this benchmark history node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --bottlenecks-md /tmp/playback-matrix-final/playback-bottlenecks.md +node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --comparison-md /tmp/playback-matrix-final/playback-comparison.md # Analyze bottlenecks from matrix results node scripts/analyze-playback-matrix-bottlenecks.js --input /path/to/json-results --output /tmp/playback-bottlenecks.md --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index ed07b623b7..ea6a0edf5c 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -259,6 +259,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Prevents old-generation decode completions from removing new-generation in-flight markers for the same frame number. - Improves seek correctness under rapid repeated seeks to nearby frame ranges. +23. **Added comparison artifact publishing in finalize workflows (2026-02-13)** + - `scripts/publish-playback-matrix-summary.js` now accepts optional `--comparison-md`. + - `scripts/finalize-playback-matrix.js` now forwards generated comparison artifact to publishing when both compare and publish options are enabled. + - Keeps benchmark history entries self-contained with regression gate evidence. + --- ## Root Cause Analysis Archive @@ -370,6 +375,7 @@ Decoder Pipeline: 24. Expanded benchmark comparison gating to support multi-input baseline/candidate matrix sets. 25. Added optional baseline comparison gating inside matrix finalization workflow. 26. Made shared in-flight frame tracking generation-aware to prevent cross-seek marker collisions. +27. Added comparison artifact attachment support in publish/finalize matrix summary workflows. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -397,6 +403,8 @@ Decoder Pipeline: - `scripts/compare-playback-benchmark-runs.js`: comparison gating now accepts multiple baseline and candidate inputs for aggregated matrix regression checks. - `scripts/finalize-playback-matrix.js`: finalization now supports optional baseline comparison gating and threshold controls in the same pass. - `crates/editor/src/playback.rs`: in-flight frame markers now include seek generation to prevent old decode paths from clearing current-generation markers. +- `scripts/publish-playback-matrix-summary.js`: publish flow now supports optional comparison artifact attachment. +- `scripts/finalize-playback-matrix.js`: finalize flow now includes comparison artifact when publishing and baseline comparison are both requested. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 736ff538d5..bba2e07b46 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -136,6 +136,12 @@ pnpm bench:playback:publish -- \ --status-md /tmp/playback-matrix-final/playback-matrix-status.md \ --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json \ --bottlenecks-md /tmp/playback-matrix-final/playback-bottlenecks.md + +pnpm bench:playback:publish -- \ + --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md \ + --status-md /tmp/playback-matrix-final/playback-matrix-status.md \ + --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json \ + --comparison-md /tmp/playback-matrix-final/playback-comparison.md ``` Generate bottleneck analysis for optimization backlog: diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index 257e4c11c6..30ebdf5792 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -222,6 +222,9 @@ function main() { if (options.analyze) { publishArgs.push("--bottlenecks-md", bottleneckPath); } + if (options.compareBaselineInputs.length > 0) { + publishArgs.push("--comparison-md", comparisonPath); + } run("node", publishArgs); } if (options.compareBaselineInputs.length > 0) { diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js index 45fb7fe107..fab62486f9 100644 --- a/scripts/publish-playback-matrix-summary.js +++ b/scripts/publish-playback-matrix-summary.js @@ -8,6 +8,7 @@ function parseArgs(argv) { aggregateMd: null, statusMd: null, bottlenecksMd: null, + comparisonMd: null, validationJson: null, target: path.resolve("crates/editor/PLAYBACK-BENCHMARKS.md"), }; @@ -31,6 +32,10 @@ function parseArgs(argv) { options.bottlenecksMd = path.resolve(argv[++i] ?? ""); continue; } + if (arg === "--comparison-md") { + options.comparisonMd = path.resolve(argv[++i] ?? ""); + continue; + } if (arg === "--validation-json") { options.validationJson = path.resolve(argv[++i] ?? ""); continue; @@ -46,7 +51,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/publish-playback-matrix-summary.js --aggregate-md --status-md --validation-json [--bottlenecks-md ] [--target ] + console.log(`Usage: node scripts/publish-playback-matrix-summary.js --aggregate-md --status-md --validation-json [--bottlenecks-md ] [--comparison-md ] [--target ] Prepends a matrix summary section into PLAYBACK-BENCHMARKS.md benchmark history region.`); } @@ -57,7 +62,13 @@ function ensureFile(filePath, label) { } } -function buildSummarySection(aggregateMd, statusMd, validationJson, bottlenecksMd) { +function buildSummarySection( + aggregateMd, + statusMd, + validationJson, + bottlenecksMd, + comparisonMd, +) { const now = new Date().toISOString(); const validation = JSON.parse(validationJson); const status = validation.passed ? "✅ MATRIX PASS" : "❌ MATRIX FAIL"; @@ -99,6 +110,12 @@ function buildSummarySection(aggregateMd, statusMd, validationJson, bottlenecksM markdown += `${bottlenecksMd.trim()}\n\n`; markdown += "\n\n"; } + if (comparisonMd) { + markdown += + "
\nBaseline vs Candidate Comparison\n\n"; + markdown += `${comparisonMd.trim()}\n\n`; + markdown += "
\n\n"; + } return markdown; } @@ -115,10 +132,7 @@ function writeToBenchmarkHistory(targetFile, summaryMd) { const insertPos = start + markerStart.length; const updated = - current.slice(0, insertPos) + - "\n\n" + - summaryMd + - current.slice(end); + current.slice(0, insertPos) + "\n\n" + summaryMd + current.slice(end); fs.writeFileSync(targetFile, updated, "utf8"); } @@ -135,6 +149,9 @@ function main() { if (options.bottlenecksMd) { ensureFile(options.bottlenecksMd, "Bottlenecks markdown"); } + if (options.comparisonMd) { + ensureFile(options.comparisonMd, "Comparison markdown"); + } ensureFile(options.target, "Target"); const aggregateMd = fs.readFileSync(options.aggregateMd, "utf8"); @@ -143,11 +160,15 @@ function main() { const bottlenecksMd = options.bottlenecksMd ? fs.readFileSync(options.bottlenecksMd, "utf8") : null; + const comparisonMd = options.comparisonMd + ? fs.readFileSync(options.comparisonMd, "utf8") + : null; const summaryMd = buildSummarySection( aggregateMd, statusMd, validationJson, bottlenecksMd, + comparisonMd, ); writeToBenchmarkHistory(options.target, summaryMd); console.log(`Published matrix summary into ${options.target}`); From 797b9d87823a6cdd66be255f32f3fe3ab79c3e07 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 22:16:54 +0000 Subject: [PATCH 091/333] improve: separate prefetch and decode in-flight tracking Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 11 ++++++++ crates/editor/src/playback.rs | 42 ++++++++++++++++++++---------- 2 files changed, 39 insertions(+), 14 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index ea6a0edf5c..cb077ed982 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -264,6 +264,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - `scripts/finalize-playback-matrix.js` now forwards generated comparison artifact to publishing when both compare and publish options are enabled. - Keeps benchmark history entries self-contained with regression gate evidence. +24. **Separated prefetch/direct decode in-flight tracking (2026-02-13)** + - Playback now tracks prefetch in-flight frames and direct decode in-flight frames in separate generation-aware sets. + - Prevents prefetch-side clear/reset paths from clearing direct decode in-flight markers. + - In-flight wait logic now checks both sets and direct decode outputs are dropped when a pending seek is detected before frame use. + --- ## Root Cause Analysis Archive @@ -374,8 +379,11 @@ Decoder Pipeline: 23. Restricted in-flight prefetch buffering to current frame or newer frames during frame wait path. 24. Expanded benchmark comparison gating to support multi-input baseline/candidate matrix sets. 25. Added optional baseline comparison gating inside matrix finalization workflow. +26. Made in-flight frame tracking generation-aware to prevent cross-seek marker collisions. +27. Split prefetch and direct decode in-flight tracking and guarded direct decode frame usage when seek updates are pending. 26. Made shared in-flight frame tracking generation-aware to prevent cross-seek marker collisions. 27. Added comparison artifact attachment support in publish/finalize matrix summary workflows. +28. Split prefetch and direct decode in-flight tracking to avoid cross-path marker interference. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -397,6 +405,8 @@ Decoder Pipeline: - `scripts/publish-playback-matrix-summary.js`: added matrix artifact publisher into PLAYBACK-BENCHMARKS history region. - `scripts/analyze-playback-matrix-bottlenecks.js`: added prioritized bottleneck analysis output from matrix JSON evidence. - `scripts/compare-playback-benchmark-runs.js`: added regression-aware baseline/candidate comparison with configurable FPS/startup/scrub tolerances. +- `scripts/publish-playback-matrix-summary.js`: added optional baseline-vs-candidate comparison artifact attachment in published summaries. +- `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `crates/editor/src/playback.rs`: added seek-generation tagging for prefetched frames so stale in-flight decode results are ignored after seek generation advances. - `crates/editor/src/playback.rs`: seek handling now clears prefetched frame buffer on generation changes to guarantee stale buffered frames are discarded immediately. - `crates/editor/src/playback.rs`: in-flight prefetch wait path now only buffers frames at or ahead of current frame to reduce stale buffer accumulation. @@ -405,6 +415,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: in-flight frame markers now include seek generation to prevent old decode paths from clearing current-generation markers. - `scripts/publish-playback-matrix-summary.js`: publish flow now supports optional comparison artifact attachment. - `scripts/finalize-playback-matrix.js`: finalize flow now includes comparison artifact when publishing and baseline comparison are both requested. +- `crates/editor/src/playback.rs`: prefetch and direct decode now use separate generation-aware in-flight sets, with combined checks in frame wait path. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 10963c31f2..1071231185 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -135,10 +135,12 @@ impl Playback { let (seek_generation_tx, mut seek_generation_rx) = watch::channel(0u64); seek_generation_rx.borrow_and_update(); - let in_flight_frames: Arc>> = + let prefetch_in_flight_frames: Arc>> = + Arc::new(RwLock::new(HashSet::new())); + let prefetch_in_flight = prefetch_in_flight_frames.clone(); + let playback_prefetch_in_flight = prefetch_in_flight_frames; + let playback_decode_in_flight: Arc>> = Arc::new(RwLock::new(HashSet::new())); - let prefetch_in_flight = in_flight_frames.clone(); - let main_in_flight = in_flight_frames; let prefetch_stop_rx = stop_rx.clone(); let mut prefetch_project = self.project.clone(); @@ -612,10 +614,15 @@ impl Playback { prefetched.segment_index, )) } else { - let is_in_flight = main_in_flight + let in_flight_key = (seek_generation, frame_number); + let is_in_flight = playback_prefetch_in_flight .read() - .map(|guard| guard.contains(&(seek_generation, frame_number))) - .unwrap_or(false); + .map(|guard| guard.contains(&in_flight_key)) + .unwrap_or(false) + || playback_decode_in_flight + .read() + .map(|guard| guard.contains(&in_flight_key)) + .unwrap_or(false); if is_in_flight { let wait_start = Instant::now(); @@ -637,10 +644,14 @@ impl Playback { } } _ = tokio::time::sleep(in_flight_poll_interval) => { - let still_in_flight = main_in_flight + let still_in_flight = playback_prefetch_in_flight .read() - .map(|guard| guard.contains(&(seek_generation, frame_number))) - .unwrap_or(false); + .map(|guard| guard.contains(&in_flight_key)) + .unwrap_or(false) + || playback_decode_in_flight + .read() + .map(|guard| guard.contains(&in_flight_key)) + .unwrap_or(false); if !still_in_flight { break; } @@ -718,21 +729,20 @@ impl Playback { .map(|v| v.offsets) .unwrap_or_default(); - let in_flight_key = (seek_generation, frame_number); - if let Ok(mut guard) = main_in_flight.write() { + if let Ok(mut guard) = playback_decode_in_flight.write() { guard.insert(in_flight_key); } let max_wait = frame_fetch_timeout; let data = tokio::select! { _ = stop_rx.changed() => { - if let Ok(mut guard) = main_in_flight.write() { + if let Ok(mut guard) = playback_decode_in_flight.write() { guard.remove(&in_flight_key); } break 'playback }, _ = tokio::time::sleep(max_wait) => { - if let Ok(mut guard) = main_in_flight.write() { + if let Ok(mut guard) = playback_decode_in_flight.write() { guard.remove(&in_flight_key); } frame_number = frame_number.saturating_add(1); @@ -742,13 +752,17 @@ impl Playback { data = segment_media .decoders .get_frames(segment_time as f32, !cached_project.camera.hide, clip_offsets) => { - if let Ok(mut guard) = main_in_flight.write() { + if let Ok(mut guard) = playback_decode_in_flight.write() { guard.remove(&in_flight_key); } data }, }; + if seek_rx.has_changed().unwrap_or(false) { + continue; + } + data.map(|frames| (Arc::new(frames), segment.recording_clip)) } } From 479248217efc4354e0efd6204ee9471912d25437 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:16:54 +0000 Subject: [PATCH 092/333] improve: separate prefetch and decode in-flight tracking Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 11 ++++++++ crates/editor/src/playback.rs | 42 ++++++++++++++++++++---------- 2 files changed, 39 insertions(+), 14 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index ea6a0edf5c..cb077ed982 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -264,6 +264,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - `scripts/finalize-playback-matrix.js` now forwards generated comparison artifact to publishing when both compare and publish options are enabled. - Keeps benchmark history entries self-contained with regression gate evidence. +24. **Separated prefetch/direct decode in-flight tracking (2026-02-13)** + - Playback now tracks prefetch in-flight frames and direct decode in-flight frames in separate generation-aware sets. + - Prevents prefetch-side clear/reset paths from clearing direct decode in-flight markers. + - In-flight wait logic now checks both sets and direct decode outputs are dropped when a pending seek is detected before frame use. + --- ## Root Cause Analysis Archive @@ -374,8 +379,11 @@ Decoder Pipeline: 23. Restricted in-flight prefetch buffering to current frame or newer frames during frame wait path. 24. Expanded benchmark comparison gating to support multi-input baseline/candidate matrix sets. 25. Added optional baseline comparison gating inside matrix finalization workflow. +26. Made in-flight frame tracking generation-aware to prevent cross-seek marker collisions. +27. Split prefetch and direct decode in-flight tracking and guarded direct decode frame usage when seek updates are pending. 26. Made shared in-flight frame tracking generation-aware to prevent cross-seek marker collisions. 27. Added comparison artifact attachment support in publish/finalize matrix summary workflows. +28. Split prefetch and direct decode in-flight tracking to avoid cross-path marker interference. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -397,6 +405,8 @@ Decoder Pipeline: - `scripts/publish-playback-matrix-summary.js`: added matrix artifact publisher into PLAYBACK-BENCHMARKS history region. - `scripts/analyze-playback-matrix-bottlenecks.js`: added prioritized bottleneck analysis output from matrix JSON evidence. - `scripts/compare-playback-benchmark-runs.js`: added regression-aware baseline/candidate comparison with configurable FPS/startup/scrub tolerances. +- `scripts/publish-playback-matrix-summary.js`: added optional baseline-vs-candidate comparison artifact attachment in published summaries. +- `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `crates/editor/src/playback.rs`: added seek-generation tagging for prefetched frames so stale in-flight decode results are ignored after seek generation advances. - `crates/editor/src/playback.rs`: seek handling now clears prefetched frame buffer on generation changes to guarantee stale buffered frames are discarded immediately. - `crates/editor/src/playback.rs`: in-flight prefetch wait path now only buffers frames at or ahead of current frame to reduce stale buffer accumulation. @@ -405,6 +415,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: in-flight frame markers now include seek generation to prevent old decode paths from clearing current-generation markers. - `scripts/publish-playback-matrix-summary.js`: publish flow now supports optional comparison artifact attachment. - `scripts/finalize-playback-matrix.js`: finalize flow now includes comparison artifact when publishing and baseline comparison are both requested. +- `crates/editor/src/playback.rs`: prefetch and direct decode now use separate generation-aware in-flight sets, with combined checks in frame wait path. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 10963c31f2..1071231185 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -135,10 +135,12 @@ impl Playback { let (seek_generation_tx, mut seek_generation_rx) = watch::channel(0u64); seek_generation_rx.borrow_and_update(); - let in_flight_frames: Arc>> = + let prefetch_in_flight_frames: Arc>> = + Arc::new(RwLock::new(HashSet::new())); + let prefetch_in_flight = prefetch_in_flight_frames.clone(); + let playback_prefetch_in_flight = prefetch_in_flight_frames; + let playback_decode_in_flight: Arc>> = Arc::new(RwLock::new(HashSet::new())); - let prefetch_in_flight = in_flight_frames.clone(); - let main_in_flight = in_flight_frames; let prefetch_stop_rx = stop_rx.clone(); let mut prefetch_project = self.project.clone(); @@ -612,10 +614,15 @@ impl Playback { prefetched.segment_index, )) } else { - let is_in_flight = main_in_flight + let in_flight_key = (seek_generation, frame_number); + let is_in_flight = playback_prefetch_in_flight .read() - .map(|guard| guard.contains(&(seek_generation, frame_number))) - .unwrap_or(false); + .map(|guard| guard.contains(&in_flight_key)) + .unwrap_or(false) + || playback_decode_in_flight + .read() + .map(|guard| guard.contains(&in_flight_key)) + .unwrap_or(false); if is_in_flight { let wait_start = Instant::now(); @@ -637,10 +644,14 @@ impl Playback { } } _ = tokio::time::sleep(in_flight_poll_interval) => { - let still_in_flight = main_in_flight + let still_in_flight = playback_prefetch_in_flight .read() - .map(|guard| guard.contains(&(seek_generation, frame_number))) - .unwrap_or(false); + .map(|guard| guard.contains(&in_flight_key)) + .unwrap_or(false) + || playback_decode_in_flight + .read() + .map(|guard| guard.contains(&in_flight_key)) + .unwrap_or(false); if !still_in_flight { break; } @@ -718,21 +729,20 @@ impl Playback { .map(|v| v.offsets) .unwrap_or_default(); - let in_flight_key = (seek_generation, frame_number); - if let Ok(mut guard) = main_in_flight.write() { + if let Ok(mut guard) = playback_decode_in_flight.write() { guard.insert(in_flight_key); } let max_wait = frame_fetch_timeout; let data = tokio::select! { _ = stop_rx.changed() => { - if let Ok(mut guard) = main_in_flight.write() { + if let Ok(mut guard) = playback_decode_in_flight.write() { guard.remove(&in_flight_key); } break 'playback }, _ = tokio::time::sleep(max_wait) => { - if let Ok(mut guard) = main_in_flight.write() { + if let Ok(mut guard) = playback_decode_in_flight.write() { guard.remove(&in_flight_key); } frame_number = frame_number.saturating_add(1); @@ -742,13 +752,17 @@ impl Playback { data = segment_media .decoders .get_frames(segment_time as f32, !cached_project.camera.hide, clip_offsets) => { - if let Ok(mut guard) = main_in_flight.write() { + if let Ok(mut guard) = playback_decode_in_flight.write() { guard.remove(&in_flight_key); } data }, }; + if seek_rx.has_changed().unwrap_or(false) { + continue; + } + data.map(|frames| (Arc::new(frames), segment.recording_clip)) } } From dc2c86d62244ad31da127827ea78cb61a0bc9051 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 22:18:34 +0000 Subject: [PATCH 093/333] improve: gate benchmark comparisons on candidate coverage gaps Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 9 ++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 3 ++ scripts/compare-playback-benchmark-runs.js | 50 ++++++++++++++++++---- 4 files changed, 55 insertions(+), 8 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 94350ea549..44a210ebfa 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -92,6 +92,7 @@ node scripts/analyze-playback-matrix-bottlenecks.js --input /path/to/json-result # Compare candidate against baseline and flag regressions node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-comparison.md --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5 node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results-a --baseline /path/to/baseline-results-b --candidate /path/to/candidate-results-a --candidate /path/to/candidate-results-b --output /tmp/playback-comparison.md +node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --allow-missing-candidate ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index cb077ed982..b148eab06d 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -269,6 +269,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Prevents prefetch-side clear/reset paths from clearing direct decode in-flight markers. - In-flight wait logic now checks both sets and direct decode outputs are dropped when a pending seek is detected before frame use. +25. **Added comparison coverage gating for missing candidate rows (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` now reports baseline rows that are missing in candidate runs. + - Comparison now fails by default when candidate coverage is missing baseline rows. + - Optional `--allow-missing-candidate` flag keeps metric regression checks while allowing partial candidate matrices. + --- ## Root Cause Analysis Archive @@ -381,9 +386,11 @@ Decoder Pipeline: 25. Added optional baseline comparison gating inside matrix finalization workflow. 26. Made in-flight frame tracking generation-aware to prevent cross-seek marker collisions. 27. Split prefetch and direct decode in-flight tracking and guarded direct decode frame usage when seek updates are pending. +28. Added missing-candidate-row coverage gating in baseline-vs-candidate comparison script with optional override flag. 26. Made shared in-flight frame tracking generation-aware to prevent cross-seek marker collisions. 27. Added comparison artifact attachment support in publish/finalize matrix summary workflows. 28. Split prefetch and direct decode in-flight tracking to avoid cross-path marker interference. +29. Added missing-candidate-row gating to baseline-vs-candidate comparison workflow. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -407,6 +414,7 @@ Decoder Pipeline: - `scripts/compare-playback-benchmark-runs.js`: added regression-aware baseline/candidate comparison with configurable FPS/startup/scrub tolerances. - `scripts/publish-playback-matrix-summary.js`: added optional baseline-vs-candidate comparison artifact attachment in published summaries. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. +- `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `crates/editor/src/playback.rs`: added seek-generation tagging for prefetched frames so stale in-flight decode results are ignored after seek generation advances. - `crates/editor/src/playback.rs`: seek handling now clears prefetched frame buffer on generation changes to guarantee stale buffered frames are discarded immediately. - `crates/editor/src/playback.rs`: in-flight prefetch wait path now only buffers frames at or ahead of current frame to reduce stale buffer accumulation. @@ -416,6 +424,7 @@ Decoder Pipeline: - `scripts/publish-playback-matrix-summary.js`: publish flow now supports optional comparison artifact attachment. - `scripts/finalize-playback-matrix.js`: finalize flow now includes comparison artifact when publishing and baseline comparison are both requested. - `crates/editor/src/playback.rs`: prefetch and direct decode now use separate generation-aware in-flight sets, with combined checks in frame wait path. +- `scripts/compare-playback-benchmark-runs.js`: comparison now reports and gates missing candidate rows relative to baseline coverage. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index bba2e07b46..30c1e4c0b3 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -158,6 +158,9 @@ pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate # multiple baseline/candidate directories can be provided pnpm bench:playback:compare -- --baseline /path/to/baseline-a --baseline /path/to/baseline-b --candidate /path/to/candidate-a --candidate /path/to/candidate-b --output /tmp/playback-matrix-final/playback-comparison.md + +# optional: allow missing candidate rows while still checking metric regressions +pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --allow-missing-candidate ``` ## Evidence checklist diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index 73fe67253b..fc1c101dd1 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -11,6 +11,7 @@ function parseArgs(argv) { allowFpsDrop: 2, allowStartupIncreaseMs: 25, allowScrubP95IncreaseMs: 5, + allowMissingCandidate: false, }; for (let i = 2; i < argv.length; i++) { @@ -60,6 +61,10 @@ function parseArgs(argv) { options.allowScrubP95IncreaseMs = value; continue; } + if (arg === "--allow-missing-candidate") { + options.allowMissingCandidate = true; + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -67,7 +72,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline [--baseline ...] --candidate [--candidate ...] [--output ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] + console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline [--baseline ...] --candidate [--candidate ...] [--output ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] Compares baseline and candidate playback matrix JSON outputs and flags regressions. Multiple --baseline and --candidate inputs are supported.`); } @@ -172,8 +177,22 @@ function formatNumber(value, digits = 2) { return value === null ? "n/a" : value.toFixed(digits); } -function compareMetrics(baselineRows, candidateRows, options) { +function compareMetrics(baselineRows, candidateRows) { const comparisons = []; + const missingCandidateRows = []; + + for (const [key, baseline] of baselineRows) { + const candidate = candidateRows.get(key); + if (!candidate) { + missingCandidateRows.push({ + platform: baseline.platform, + gpu: baseline.gpu, + scenario: baseline.scenario, + recording: baseline.recording, + format: baseline.format, + }); + } + } for (const [key, candidate] of candidateRows) { const baseline = baselineRows.get(key); @@ -211,10 +230,10 @@ function compareMetrics(baselineRows, candidateRows, options) { } comparisons.sort((a, b) => b.regressions.length - a.regressions.length); - return comparisons; + return { comparisons, missingCandidateRows }; } -function toMarkdown(comparisons, options) { +function toMarkdown(comparisons, missingCandidateRows, options) { const regressions = comparisons.filter( (entry) => entry.regressions.length > 0, ); @@ -222,7 +241,16 @@ function toMarkdown(comparisons, options) { md += "# Playback Benchmark Comparison\n\n"; md += `Generated: ${new Date().toISOString()}\n\n`; md += `Tolerance: fps_drop<=${options.allowFpsDrop}, startup_increase<=${options.allowStartupIncreaseMs}ms, scrub_p95_increase<=${options.allowScrubP95IncreaseMs}ms\n\n`; - md += `Compared rows: ${comparisons.length}, regressions: ${regressions.length}\n\n`; + md += `Compared rows: ${comparisons.length}, regressions: ${regressions.length}, missing candidate rows: ${missingCandidateRows.length}\n\n`; + if (missingCandidateRows.length > 0) { + md += "## Missing Candidate Rows\n\n"; + md += "| Platform | GPU | Scenario | Recording | Format |\n"; + md += "|---|---|---|---|---|\n"; + for (const row of missingCandidateRows) { + md += `| ${row.platform} | ${row.gpu} | ${row.scenario} | ${row.recording} | ${row.format} |\n`; + } + md += "\n"; + } md += "| Platform | GPU | Scenario | Recording | Format | FPS Δ | Startup Δ (ms) | Scrub p95 Δ (ms) | Regression |\n"; md += "|---|---|---|---|---|---:|---:|---:|---|\n"; @@ -261,8 +289,11 @@ function main() { const baselineRows = collectMetrics(baselineFiles); const candidateRows = collectMetrics(candidateFiles); - const comparisons = compareMetrics(baselineRows, candidateRows, options); - const markdown = toMarkdown(comparisons, options); + const { comparisons, missingCandidateRows } = compareMetrics( + baselineRows, + candidateRows, + ); + const markdown = toMarkdown(comparisons, missingCandidateRows, options); if (options.output) { fs.writeFileSync(options.output, markdown, "utf8"); @@ -271,7 +302,10 @@ function main() { process.stdout.write(markdown); } - if (comparisons.some((entry) => entry.regressions.length > 0)) { + if ( + comparisons.some((entry) => entry.regressions.length > 0) || + (!options.allowMissingCandidate && missingCandidateRows.length > 0) + ) { process.exit(1); } } From fb44a38d80707282dfcc930ea70599ed74094f4d Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:18:34 +0000 Subject: [PATCH 094/333] improve: gate benchmark comparisons on candidate coverage gaps Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 9 ++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 3 ++ scripts/compare-playback-benchmark-runs.js | 50 ++++++++++++++++++---- 4 files changed, 55 insertions(+), 8 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 94350ea549..44a210ebfa 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -92,6 +92,7 @@ node scripts/analyze-playback-matrix-bottlenecks.js --input /path/to/json-result # Compare candidate against baseline and flag regressions node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-comparison.md --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5 node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results-a --baseline /path/to/baseline-results-b --candidate /path/to/candidate-results-a --candidate /path/to/candidate-results-b --output /tmp/playback-comparison.md +node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --allow-missing-candidate ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index cb077ed982..b148eab06d 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -269,6 +269,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Prevents prefetch-side clear/reset paths from clearing direct decode in-flight markers. - In-flight wait logic now checks both sets and direct decode outputs are dropped when a pending seek is detected before frame use. +25. **Added comparison coverage gating for missing candidate rows (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` now reports baseline rows that are missing in candidate runs. + - Comparison now fails by default when candidate coverage is missing baseline rows. + - Optional `--allow-missing-candidate` flag keeps metric regression checks while allowing partial candidate matrices. + --- ## Root Cause Analysis Archive @@ -381,9 +386,11 @@ Decoder Pipeline: 25. Added optional baseline comparison gating inside matrix finalization workflow. 26. Made in-flight frame tracking generation-aware to prevent cross-seek marker collisions. 27. Split prefetch and direct decode in-flight tracking and guarded direct decode frame usage when seek updates are pending. +28. Added missing-candidate-row coverage gating in baseline-vs-candidate comparison script with optional override flag. 26. Made shared in-flight frame tracking generation-aware to prevent cross-seek marker collisions. 27. Added comparison artifact attachment support in publish/finalize matrix summary workflows. 28. Split prefetch and direct decode in-flight tracking to avoid cross-path marker interference. +29. Added missing-candidate-row gating to baseline-vs-candidate comparison workflow. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -407,6 +414,7 @@ Decoder Pipeline: - `scripts/compare-playback-benchmark-runs.js`: added regression-aware baseline/candidate comparison with configurable FPS/startup/scrub tolerances. - `scripts/publish-playback-matrix-summary.js`: added optional baseline-vs-candidate comparison artifact attachment in published summaries. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. +- `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `crates/editor/src/playback.rs`: added seek-generation tagging for prefetched frames so stale in-flight decode results are ignored after seek generation advances. - `crates/editor/src/playback.rs`: seek handling now clears prefetched frame buffer on generation changes to guarantee stale buffered frames are discarded immediately. - `crates/editor/src/playback.rs`: in-flight prefetch wait path now only buffers frames at or ahead of current frame to reduce stale buffer accumulation. @@ -416,6 +424,7 @@ Decoder Pipeline: - `scripts/publish-playback-matrix-summary.js`: publish flow now supports optional comparison artifact attachment. - `scripts/finalize-playback-matrix.js`: finalize flow now includes comparison artifact when publishing and baseline comparison are both requested. - `crates/editor/src/playback.rs`: prefetch and direct decode now use separate generation-aware in-flight sets, with combined checks in frame wait path. +- `scripts/compare-playback-benchmark-runs.js`: comparison now reports and gates missing candidate rows relative to baseline coverage. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index bba2e07b46..30c1e4c0b3 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -158,6 +158,9 @@ pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate # multiple baseline/candidate directories can be provided pnpm bench:playback:compare -- --baseline /path/to/baseline-a --baseline /path/to/baseline-b --candidate /path/to/candidate-a --candidate /path/to/candidate-b --output /tmp/playback-matrix-final/playback-comparison.md + +# optional: allow missing candidate rows while still checking metric regressions +pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --allow-missing-candidate ``` ## Evidence checklist diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index 73fe67253b..fc1c101dd1 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -11,6 +11,7 @@ function parseArgs(argv) { allowFpsDrop: 2, allowStartupIncreaseMs: 25, allowScrubP95IncreaseMs: 5, + allowMissingCandidate: false, }; for (let i = 2; i < argv.length; i++) { @@ -60,6 +61,10 @@ function parseArgs(argv) { options.allowScrubP95IncreaseMs = value; continue; } + if (arg === "--allow-missing-candidate") { + options.allowMissingCandidate = true; + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -67,7 +72,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline [--baseline ...] --candidate [--candidate ...] [--output ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] + console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline [--baseline ...] --candidate [--candidate ...] [--output ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] Compares baseline and candidate playback matrix JSON outputs and flags regressions. Multiple --baseline and --candidate inputs are supported.`); } @@ -172,8 +177,22 @@ function formatNumber(value, digits = 2) { return value === null ? "n/a" : value.toFixed(digits); } -function compareMetrics(baselineRows, candidateRows, options) { +function compareMetrics(baselineRows, candidateRows) { const comparisons = []; + const missingCandidateRows = []; + + for (const [key, baseline] of baselineRows) { + const candidate = candidateRows.get(key); + if (!candidate) { + missingCandidateRows.push({ + platform: baseline.platform, + gpu: baseline.gpu, + scenario: baseline.scenario, + recording: baseline.recording, + format: baseline.format, + }); + } + } for (const [key, candidate] of candidateRows) { const baseline = baselineRows.get(key); @@ -211,10 +230,10 @@ function compareMetrics(baselineRows, candidateRows, options) { } comparisons.sort((a, b) => b.regressions.length - a.regressions.length); - return comparisons; + return { comparisons, missingCandidateRows }; } -function toMarkdown(comparisons, options) { +function toMarkdown(comparisons, missingCandidateRows, options) { const regressions = comparisons.filter( (entry) => entry.regressions.length > 0, ); @@ -222,7 +241,16 @@ function toMarkdown(comparisons, options) { md += "# Playback Benchmark Comparison\n\n"; md += `Generated: ${new Date().toISOString()}\n\n`; md += `Tolerance: fps_drop<=${options.allowFpsDrop}, startup_increase<=${options.allowStartupIncreaseMs}ms, scrub_p95_increase<=${options.allowScrubP95IncreaseMs}ms\n\n`; - md += `Compared rows: ${comparisons.length}, regressions: ${regressions.length}\n\n`; + md += `Compared rows: ${comparisons.length}, regressions: ${regressions.length}, missing candidate rows: ${missingCandidateRows.length}\n\n`; + if (missingCandidateRows.length > 0) { + md += "## Missing Candidate Rows\n\n"; + md += "| Platform | GPU | Scenario | Recording | Format |\n"; + md += "|---|---|---|---|---|\n"; + for (const row of missingCandidateRows) { + md += `| ${row.platform} | ${row.gpu} | ${row.scenario} | ${row.recording} | ${row.format} |\n`; + } + md += "\n"; + } md += "| Platform | GPU | Scenario | Recording | Format | FPS Δ | Startup Δ (ms) | Scrub p95 Δ (ms) | Regression |\n"; md += "|---|---|---|---|---|---:|---:|---:|---|\n"; @@ -261,8 +289,11 @@ function main() { const baselineRows = collectMetrics(baselineFiles); const candidateRows = collectMetrics(candidateFiles); - const comparisons = compareMetrics(baselineRows, candidateRows, options); - const markdown = toMarkdown(comparisons, options); + const { comparisons, missingCandidateRows } = compareMetrics( + baselineRows, + candidateRows, + ); + const markdown = toMarkdown(comparisons, missingCandidateRows, options); if (options.output) { fs.writeFileSync(options.output, markdown, "utf8"); @@ -271,7 +302,10 @@ function main() { process.stdout.write(markdown); } - if (comparisons.some((entry) => entry.regressions.length > 0)) { + if ( + comparisons.some((entry) => entry.regressions.length > 0) || + (!options.allowMissingCandidate && missingCandidateRows.length > 0) + ) { process.exit(1); } } From b16d835121f801970639a9c84c139d2cf7ae62b7 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 22:20:33 +0000 Subject: [PATCH 095/333] improve: run finalize comparison before publish attachment Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 9 +++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 3 ++ scripts/finalize-playback-matrix.js | 50 ++++++++++++++---------- 4 files changed, 42 insertions(+), 21 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 44a210ebfa..bc03e8fd90 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -79,6 +79,7 @@ node scripts/validate-playback-matrix.js --input /path/to/json-results --require node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5 +node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-missing-candidate node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 --publish-target /workspace/crates/editor/PLAYBACK-BENCHMARKS.md # Publish matrix artifacts into this benchmark history diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index b148eab06d..7589f350ae 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -274,6 +274,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Comparison now fails by default when candidate coverage is missing baseline rows. - Optional `--allow-missing-candidate` flag keeps metric regression checks while allowing partial candidate matrices. +26. **Fixed finalize publish ordering for comparison artifacts (2026-02-13)** + - `scripts/finalize-playback-matrix.js` now executes baseline comparison before publish when both options are enabled. + - Prevents publish step from referencing missing comparison artifact files. + - Added finalize passthrough support for `--allow-missing-candidate`. + --- ## Root Cause Analysis Archive @@ -387,10 +392,12 @@ Decoder Pipeline: 26. Made in-flight frame tracking generation-aware to prevent cross-seek marker collisions. 27. Split prefetch and direct decode in-flight tracking and guarded direct decode frame usage when seek updates are pending. 28. Added missing-candidate-row coverage gating in baseline-vs-candidate comparison script with optional override flag. +29. Fixed finalize compare/publish ordering so comparison artifacts exist before publish attachment and added finalize support for missing-candidate override. 26. Made shared in-flight frame tracking generation-aware to prevent cross-seek marker collisions. 27. Added comparison artifact attachment support in publish/finalize matrix summary workflows. 28. Split prefetch and direct decode in-flight tracking to avoid cross-path marker interference. 29. Added missing-candidate-row gating to baseline-vs-candidate comparison workflow. +30. Fixed finalize compare/publish ordering and propagated missing-candidate override into finalize compare flow. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -415,6 +422,7 @@ Decoder Pipeline: - `scripts/publish-playback-matrix-summary.js`: added optional baseline-vs-candidate comparison artifact attachment in published summaries. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. +- `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. - `crates/editor/src/playback.rs`: added seek-generation tagging for prefetched frames so stale in-flight decode results are ignored after seek generation advances. - `crates/editor/src/playback.rs`: seek handling now clears prefetched frame buffer on generation changes to guarantee stale buffered frames are discarded immediately. - `crates/editor/src/playback.rs`: in-flight prefetch wait path now only buffers frames at or ahead of current frame to reduce stale buffer accumulation. @@ -425,6 +433,7 @@ Decoder Pipeline: - `scripts/finalize-playback-matrix.js`: finalize flow now includes comparison artifact when publishing and baseline comparison are both requested. - `crates/editor/src/playback.rs`: prefetch and direct decode now use separate generation-aware in-flight sets, with combined checks in frame wait path. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports and gates missing candidate rows relative to baseline coverage. +- `scripts/finalize-playback-matrix.js`: comparison now runs before publish in combined workflows and forwards missing-candidate override to compare step. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 30c1e4c0b3..51f937d307 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -120,6 +120,9 @@ pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-di # include baseline comparison gate during finalization pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5 + +# optional: allow missing candidate rows during compare gate +pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-missing-candidate ``` Finalize and publish to benchmark history in one command: diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index 30ebdf5792..130fd6bb34 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -18,6 +18,7 @@ function parseArgs(argv) { allowFpsDrop: 2, allowStartupIncreaseMs: 25, allowScrubP95IncreaseMs: 5, + allowMissingCandidate: false, }; for (let i = 2; i < argv.length; i++) { @@ -111,6 +112,10 @@ function parseArgs(argv) { options.allowScrubP95IncreaseMs = value; continue; } + if (arg === "--allow-missing-candidate") { + options.allowMissingCandidate = true; + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -118,7 +123,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--compare-baseline ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--publish-target ] + console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--compare-baseline ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--publish-target ] Generates aggregate markdown, status markdown, validation JSON, and bottleneck analysis for collected playback matrix outputs. Optionally compares candidate inputs against baseline inputs and fails on regressions.`); } @@ -207,26 +212,6 @@ function main() { ); run("node", analyzeArgs); } - if (options.publishTarget) { - const publishArgs = [ - "scripts/publish-playback-matrix-summary.js", - "--aggregate-md", - aggregatePath, - "--status-md", - statusPath, - "--validation-json", - validationPath, - "--target", - options.publishTarget, - ]; - if (options.analyze) { - publishArgs.push("--bottlenecks-md", bottleneckPath); - } - if (options.compareBaselineInputs.length > 0) { - publishArgs.push("--comparison-md", comparisonPath); - } - run("node", publishArgs); - } if (options.compareBaselineInputs.length > 0) { const compareArgs = ["scripts/compare-playback-benchmark-runs.js"]; for (const baselineInput of options.compareBaselineInputs) { @@ -245,8 +230,31 @@ function main() { "--allow-scrub-p95-increase-ms", String(options.allowScrubP95IncreaseMs), ); + if (options.allowMissingCandidate) { + compareArgs.push("--allow-missing-candidate"); + } run("node", compareArgs); } + if (options.publishTarget) { + const publishArgs = [ + "scripts/publish-playback-matrix-summary.js", + "--aggregate-md", + aggregatePath, + "--status-md", + statusPath, + "--validation-json", + validationPath, + "--target", + options.publishTarget, + ]; + if (options.analyze) { + publishArgs.push("--bottlenecks-md", bottleneckPath); + } + if (options.compareBaselineInputs.length > 0) { + publishArgs.push("--comparison-md", comparisonPath); + } + run("node", publishArgs); + } console.log(`Aggregate markdown: ${aggregatePath}`); console.log(`Status markdown: ${statusPath}`); From dff49d8d3b94ee76e9a09fd0045c292ddddfec7d Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:20:33 +0000 Subject: [PATCH 096/333] improve: run finalize comparison before publish attachment Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 9 +++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 3 ++ scripts/finalize-playback-matrix.js | 50 ++++++++++++++---------- 4 files changed, 42 insertions(+), 21 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 44a210ebfa..bc03e8fd90 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -79,6 +79,7 @@ node scripts/validate-playback-matrix.js --input /path/to/json-results --require node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5 +node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-missing-candidate node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 --publish-target /workspace/crates/editor/PLAYBACK-BENCHMARKS.md # Publish matrix artifacts into this benchmark history diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index b148eab06d..7589f350ae 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -274,6 +274,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Comparison now fails by default when candidate coverage is missing baseline rows. - Optional `--allow-missing-candidate` flag keeps metric regression checks while allowing partial candidate matrices. +26. **Fixed finalize publish ordering for comparison artifacts (2026-02-13)** + - `scripts/finalize-playback-matrix.js` now executes baseline comparison before publish when both options are enabled. + - Prevents publish step from referencing missing comparison artifact files. + - Added finalize passthrough support for `--allow-missing-candidate`. + --- ## Root Cause Analysis Archive @@ -387,10 +392,12 @@ Decoder Pipeline: 26. Made in-flight frame tracking generation-aware to prevent cross-seek marker collisions. 27. Split prefetch and direct decode in-flight tracking and guarded direct decode frame usage when seek updates are pending. 28. Added missing-candidate-row coverage gating in baseline-vs-candidate comparison script with optional override flag. +29. Fixed finalize compare/publish ordering so comparison artifacts exist before publish attachment and added finalize support for missing-candidate override. 26. Made shared in-flight frame tracking generation-aware to prevent cross-seek marker collisions. 27. Added comparison artifact attachment support in publish/finalize matrix summary workflows. 28. Split prefetch and direct decode in-flight tracking to avoid cross-path marker interference. 29. Added missing-candidate-row gating to baseline-vs-candidate comparison workflow. +30. Fixed finalize compare/publish ordering and propagated missing-candidate override into finalize compare flow. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -415,6 +422,7 @@ Decoder Pipeline: - `scripts/publish-playback-matrix-summary.js`: added optional baseline-vs-candidate comparison artifact attachment in published summaries. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. +- `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. - `crates/editor/src/playback.rs`: added seek-generation tagging for prefetched frames so stale in-flight decode results are ignored after seek generation advances. - `crates/editor/src/playback.rs`: seek handling now clears prefetched frame buffer on generation changes to guarantee stale buffered frames are discarded immediately. - `crates/editor/src/playback.rs`: in-flight prefetch wait path now only buffers frames at or ahead of current frame to reduce stale buffer accumulation. @@ -425,6 +433,7 @@ Decoder Pipeline: - `scripts/finalize-playback-matrix.js`: finalize flow now includes comparison artifact when publishing and baseline comparison are both requested. - `crates/editor/src/playback.rs`: prefetch and direct decode now use separate generation-aware in-flight sets, with combined checks in frame wait path. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports and gates missing candidate rows relative to baseline coverage. +- `scripts/finalize-playback-matrix.js`: comparison now runs before publish in combined workflows and forwards missing-candidate override to compare step. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 30c1e4c0b3..51f937d307 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -120,6 +120,9 @@ pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-di # include baseline comparison gate during finalization pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5 + +# optional: allow missing candidate rows during compare gate +pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-missing-candidate ``` Finalize and publish to benchmark history in one command: diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index 30ebdf5792..130fd6bb34 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -18,6 +18,7 @@ function parseArgs(argv) { allowFpsDrop: 2, allowStartupIncreaseMs: 25, allowScrubP95IncreaseMs: 5, + allowMissingCandidate: false, }; for (let i = 2; i < argv.length; i++) { @@ -111,6 +112,10 @@ function parseArgs(argv) { options.allowScrubP95IncreaseMs = value; continue; } + if (arg === "--allow-missing-candidate") { + options.allowMissingCandidate = true; + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -118,7 +123,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--compare-baseline ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--publish-target ] + console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--compare-baseline ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--publish-target ] Generates aggregate markdown, status markdown, validation JSON, and bottleneck analysis for collected playback matrix outputs. Optionally compares candidate inputs against baseline inputs and fails on regressions.`); } @@ -207,26 +212,6 @@ function main() { ); run("node", analyzeArgs); } - if (options.publishTarget) { - const publishArgs = [ - "scripts/publish-playback-matrix-summary.js", - "--aggregate-md", - aggregatePath, - "--status-md", - statusPath, - "--validation-json", - validationPath, - "--target", - options.publishTarget, - ]; - if (options.analyze) { - publishArgs.push("--bottlenecks-md", bottleneckPath); - } - if (options.compareBaselineInputs.length > 0) { - publishArgs.push("--comparison-md", comparisonPath); - } - run("node", publishArgs); - } if (options.compareBaselineInputs.length > 0) { const compareArgs = ["scripts/compare-playback-benchmark-runs.js"]; for (const baselineInput of options.compareBaselineInputs) { @@ -245,8 +230,31 @@ function main() { "--allow-scrub-p95-increase-ms", String(options.allowScrubP95IncreaseMs), ); + if (options.allowMissingCandidate) { + compareArgs.push("--allow-missing-candidate"); + } run("node", compareArgs); } + if (options.publishTarget) { + const publishArgs = [ + "scripts/publish-playback-matrix-summary.js", + "--aggregate-md", + aggregatePath, + "--status-md", + statusPath, + "--validation-json", + validationPath, + "--target", + options.publishTarget, + ]; + if (options.analyze) { + publishArgs.push("--bottlenecks-md", bottleneckPath); + } + if (options.compareBaselineInputs.length > 0) { + publishArgs.push("--comparison-md", comparisonPath); + } + run("node", publishArgs); + } console.log(`Aggregate markdown: ${aggregatePath}`); console.log(`Status markdown: ${statusPath}`); From 9f0c10745844940c59bf6bb60715f14e07e7db52 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 22:22:18 +0000 Subject: [PATCH 097/333] improve: emit structured comparison artifacts for matrix gating Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 11 ++++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 3 ++ scripts/compare-playback-benchmark-runs.js | 46 +++++++++++++++++++++- scripts/finalize-playback-matrix.js | 7 ++++ 5 files changed, 67 insertions(+), 1 deletion(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index bc03e8fd90..8f533ecdfe 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -94,6 +94,7 @@ node scripts/analyze-playback-matrix-bottlenecks.js --input /path/to/json-result node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-comparison.md --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5 node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results-a --baseline /path/to/baseline-results-b --candidate /path/to/candidate-results-a --candidate /path/to/candidate-results-b --output /tmp/playback-comparison.md node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --allow-missing-candidate +node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-comparison.md --output-json /tmp/playback-comparison.json ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 7589f350ae..4aa9451bc6 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -279,6 +279,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Prevents publish step from referencing missing comparison artifact files. - Added finalize passthrough support for `--allow-missing-candidate`. +27. **Added structured JSON output for comparison gating (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` now supports `--output-json`. + - Emits comparison summary/regression/missing-coverage details for automation. + - `scripts/finalize-playback-matrix.js` now writes comparison markdown and JSON artifacts during baseline comparison runs. + --- ## Root Cause Analysis Archive @@ -393,11 +398,13 @@ Decoder Pipeline: 27. Split prefetch and direct decode in-flight tracking and guarded direct decode frame usage when seek updates are pending. 28. Added missing-candidate-row coverage gating in baseline-vs-candidate comparison script with optional override flag. 29. Fixed finalize compare/publish ordering so comparison artifacts exist before publish attachment and added finalize support for missing-candidate override. +30. Added structured JSON output for baseline-vs-candidate comparison script and wired finalize comparison runs to emit comparison JSON artifacts. 26. Made shared in-flight frame tracking generation-aware to prevent cross-seek marker collisions. 27. Added comparison artifact attachment support in publish/finalize matrix summary workflows. 28. Split prefetch and direct decode in-flight tracking to avoid cross-path marker interference. 29. Added missing-candidate-row gating to baseline-vs-candidate comparison workflow. 30. Fixed finalize compare/publish ordering and propagated missing-candidate override into finalize compare flow. +31. Added structured JSON artifact emission for baseline-vs-candidate comparison workflows. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -423,6 +430,8 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. +- `scripts/compare-playback-benchmark-runs.js`: added structured comparison JSON output with pass/fail summary and regression detail payload. +- `scripts/finalize-playback-matrix.js`: baseline comparison flow now writes both `playback-comparison.md` and `playback-comparison.json`. - `crates/editor/src/playback.rs`: added seek-generation tagging for prefetched frames so stale in-flight decode results are ignored after seek generation advances. - `crates/editor/src/playback.rs`: seek handling now clears prefetched frame buffer on generation changes to guarantee stale buffered frames are discarded immediately. - `crates/editor/src/playback.rs`: in-flight prefetch wait path now only buffers frames at or ahead of current frame to reduce stale buffer accumulation. @@ -434,6 +443,8 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: prefetch and direct decode now use separate generation-aware in-flight sets, with combined checks in frame wait path. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports and gates missing candidate rows relative to baseline coverage. - `scripts/finalize-playback-matrix.js`: comparison now runs before publish in combined workflows and forwards missing-candidate override to compare step. +- `scripts/compare-playback-benchmark-runs.js`: comparison now supports optional structured JSON output for downstream automation. +- `scripts/finalize-playback-matrix.js`: baseline comparison in finalize now writes both markdown and JSON comparison artifacts. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 51f937d307..d88f7e982b 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -164,6 +164,9 @@ pnpm bench:playback:compare -- --baseline /path/to/baseline-a --baseline /path/t # optional: allow missing candidate rows while still checking metric regressions pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --allow-missing-candidate + +# emit structured JSON alongside markdown for automation +pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-matrix-final/playback-comparison.md --output-json /tmp/playback-matrix-final/playback-comparison.json ``` ## Evidence checklist diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index fc1c101dd1..2f76545b6a 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -8,6 +8,7 @@ function parseArgs(argv) { baselineInputs: [], candidateInputs: [], output: null, + outputJson: null, allowFpsDrop: 2, allowStartupIncreaseMs: 25, allowScrubP95IncreaseMs: 5, @@ -37,6 +38,10 @@ function parseArgs(argv) { options.output = path.resolve(argv[++i] ?? ""); continue; } + if (arg === "--output-json") { + options.outputJson = path.resolve(argv[++i] ?? ""); + continue; + } if (arg === "--allow-fps-drop") { const value = Number.parseFloat(argv[++i] ?? ""); if (!Number.isFinite(value) || value < 0) { @@ -72,7 +77,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline [--baseline ...] --candidate [--candidate ...] [--output ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] + console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline [--baseline ...] --candidate [--candidate ...] [--output ] [--output-json ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] Compares baseline and candidate playback matrix JSON outputs and flags regressions. Multiple --baseline and --candidate inputs are supported.`); } @@ -261,6 +266,32 @@ function toMarkdown(comparisons, missingCandidateRows, options) { return md; } +function buildJsonOutput(comparisons, missingCandidateRows, options) { + const regressions = comparisons.filter( + (entry) => entry.regressions.length > 0, + ); + return { + generatedAt: new Date().toISOString(), + tolerance: { + allowFpsDrop: options.allowFpsDrop, + allowStartupIncreaseMs: options.allowStartupIncreaseMs, + allowScrubP95IncreaseMs: options.allowScrubP95IncreaseMs, + allowMissingCandidate: options.allowMissingCandidate, + }, + summary: { + comparedRows: comparisons.length, + regressions: regressions.length, + missingCandidateRows: missingCandidateRows.length, + passed: + regressions.length === 0 && + (options.allowMissingCandidate || missingCandidateRows.length === 0), + }, + regressions, + missingCandidateRows, + comparisons, + }; +} + function main() { const options = parseArgs(process.argv); if (options.help) { @@ -294,6 +325,11 @@ function main() { candidateRows, ); const markdown = toMarkdown(comparisons, missingCandidateRows, options); + const outputJson = buildJsonOutput( + comparisons, + missingCandidateRows, + options, + ); if (options.output) { fs.writeFileSync(options.output, markdown, "utf8"); @@ -301,6 +337,14 @@ function main() { } else { process.stdout.write(markdown); } + if (options.outputJson) { + fs.writeFileSync( + options.outputJson, + JSON.stringify(outputJson, null, 2), + "utf8", + ); + console.log(`Wrote comparison JSON to ${options.outputJson}`); + } if ( comparisons.some((entry) => entry.regressions.length > 0) || diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index 130fd6bb34..4e3699b401 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -169,6 +169,10 @@ function main() { "playback-bottlenecks.json", ); const comparisonPath = path.join(options.outputDir, "playback-comparison.md"); + const comparisonJsonPath = path.join( + options.outputDir, + "playback-comparison.json", + ); const aggregateArgs = ["scripts/aggregate-playback-benchmarks.js"]; const statusArgs = ["scripts/build-playback-matrix-report.js"]; @@ -223,6 +227,8 @@ function main() { compareArgs.push( "--output", comparisonPath, + "--output-json", + comparisonJsonPath, "--allow-fps-drop", String(options.allowFpsDrop), "--allow-startup-increase-ms", @@ -268,6 +274,7 @@ function main() { } if (options.compareBaselineInputs.length > 0) { console.log(`Comparison report: ${comparisonPath}`); + console.log(`Comparison JSON: ${comparisonJsonPath}`); } } From 2a4715afa5888350709317500a4a2d6d0eff03cf Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:22:18 +0000 Subject: [PATCH 098/333] improve: emit structured comparison artifacts for matrix gating Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 11 ++++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 3 ++ scripts/compare-playback-benchmark-runs.js | 46 +++++++++++++++++++++- scripts/finalize-playback-matrix.js | 7 ++++ 5 files changed, 67 insertions(+), 1 deletion(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index bc03e8fd90..8f533ecdfe 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -94,6 +94,7 @@ node scripts/analyze-playback-matrix-bottlenecks.js --input /path/to/json-result node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-comparison.md --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5 node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results-a --baseline /path/to/baseline-results-b --candidate /path/to/candidate-results-a --candidate /path/to/candidate-results-b --output /tmp/playback-comparison.md node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --allow-missing-candidate +node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-comparison.md --output-json /tmp/playback-comparison.json ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 7589f350ae..4aa9451bc6 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -279,6 +279,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Prevents publish step from referencing missing comparison artifact files. - Added finalize passthrough support for `--allow-missing-candidate`. +27. **Added structured JSON output for comparison gating (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` now supports `--output-json`. + - Emits comparison summary/regression/missing-coverage details for automation. + - `scripts/finalize-playback-matrix.js` now writes comparison markdown and JSON artifacts during baseline comparison runs. + --- ## Root Cause Analysis Archive @@ -393,11 +398,13 @@ Decoder Pipeline: 27. Split prefetch and direct decode in-flight tracking and guarded direct decode frame usage when seek updates are pending. 28. Added missing-candidate-row coverage gating in baseline-vs-candidate comparison script with optional override flag. 29. Fixed finalize compare/publish ordering so comparison artifacts exist before publish attachment and added finalize support for missing-candidate override. +30. Added structured JSON output for baseline-vs-candidate comparison script and wired finalize comparison runs to emit comparison JSON artifacts. 26. Made shared in-flight frame tracking generation-aware to prevent cross-seek marker collisions. 27. Added comparison artifact attachment support in publish/finalize matrix summary workflows. 28. Split prefetch and direct decode in-flight tracking to avoid cross-path marker interference. 29. Added missing-candidate-row gating to baseline-vs-candidate comparison workflow. 30. Fixed finalize compare/publish ordering and propagated missing-candidate override into finalize compare flow. +31. Added structured JSON artifact emission for baseline-vs-candidate comparison workflows. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -423,6 +430,8 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. +- `scripts/compare-playback-benchmark-runs.js`: added structured comparison JSON output with pass/fail summary and regression detail payload. +- `scripts/finalize-playback-matrix.js`: baseline comparison flow now writes both `playback-comparison.md` and `playback-comparison.json`. - `crates/editor/src/playback.rs`: added seek-generation tagging for prefetched frames so stale in-flight decode results are ignored after seek generation advances. - `crates/editor/src/playback.rs`: seek handling now clears prefetched frame buffer on generation changes to guarantee stale buffered frames are discarded immediately. - `crates/editor/src/playback.rs`: in-flight prefetch wait path now only buffers frames at or ahead of current frame to reduce stale buffer accumulation. @@ -434,6 +443,8 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: prefetch and direct decode now use separate generation-aware in-flight sets, with combined checks in frame wait path. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports and gates missing candidate rows relative to baseline coverage. - `scripts/finalize-playback-matrix.js`: comparison now runs before publish in combined workflows and forwards missing-candidate override to compare step. +- `scripts/compare-playback-benchmark-runs.js`: comparison now supports optional structured JSON output for downstream automation. +- `scripts/finalize-playback-matrix.js`: baseline comparison in finalize now writes both markdown and JSON comparison artifacts. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 51f937d307..d88f7e982b 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -164,6 +164,9 @@ pnpm bench:playback:compare -- --baseline /path/to/baseline-a --baseline /path/t # optional: allow missing candidate rows while still checking metric regressions pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --allow-missing-candidate + +# emit structured JSON alongside markdown for automation +pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-matrix-final/playback-comparison.md --output-json /tmp/playback-matrix-final/playback-comparison.json ``` ## Evidence checklist diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index fc1c101dd1..2f76545b6a 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -8,6 +8,7 @@ function parseArgs(argv) { baselineInputs: [], candidateInputs: [], output: null, + outputJson: null, allowFpsDrop: 2, allowStartupIncreaseMs: 25, allowScrubP95IncreaseMs: 5, @@ -37,6 +38,10 @@ function parseArgs(argv) { options.output = path.resolve(argv[++i] ?? ""); continue; } + if (arg === "--output-json") { + options.outputJson = path.resolve(argv[++i] ?? ""); + continue; + } if (arg === "--allow-fps-drop") { const value = Number.parseFloat(argv[++i] ?? ""); if (!Number.isFinite(value) || value < 0) { @@ -72,7 +77,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline [--baseline ...] --candidate [--candidate ...] [--output ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] + console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline [--baseline ...] --candidate [--candidate ...] [--output ] [--output-json ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] Compares baseline and candidate playback matrix JSON outputs and flags regressions. Multiple --baseline and --candidate inputs are supported.`); } @@ -261,6 +266,32 @@ function toMarkdown(comparisons, missingCandidateRows, options) { return md; } +function buildJsonOutput(comparisons, missingCandidateRows, options) { + const regressions = comparisons.filter( + (entry) => entry.regressions.length > 0, + ); + return { + generatedAt: new Date().toISOString(), + tolerance: { + allowFpsDrop: options.allowFpsDrop, + allowStartupIncreaseMs: options.allowStartupIncreaseMs, + allowScrubP95IncreaseMs: options.allowScrubP95IncreaseMs, + allowMissingCandidate: options.allowMissingCandidate, + }, + summary: { + comparedRows: comparisons.length, + regressions: regressions.length, + missingCandidateRows: missingCandidateRows.length, + passed: + regressions.length === 0 && + (options.allowMissingCandidate || missingCandidateRows.length === 0), + }, + regressions, + missingCandidateRows, + comparisons, + }; +} + function main() { const options = parseArgs(process.argv); if (options.help) { @@ -294,6 +325,11 @@ function main() { candidateRows, ); const markdown = toMarkdown(comparisons, missingCandidateRows, options); + const outputJson = buildJsonOutput( + comparisons, + missingCandidateRows, + options, + ); if (options.output) { fs.writeFileSync(options.output, markdown, "utf8"); @@ -301,6 +337,14 @@ function main() { } else { process.stdout.write(markdown); } + if (options.outputJson) { + fs.writeFileSync( + options.outputJson, + JSON.stringify(outputJson, null, 2), + "utf8", + ); + console.log(`Wrote comparison JSON to ${options.outputJson}`); + } if ( comparisons.some((entry) => entry.regressions.length > 0) || diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index 130fd6bb34..4e3699b401 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -169,6 +169,10 @@ function main() { "playback-bottlenecks.json", ); const comparisonPath = path.join(options.outputDir, "playback-comparison.md"); + const comparisonJsonPath = path.join( + options.outputDir, + "playback-comparison.json", + ); const aggregateArgs = ["scripts/aggregate-playback-benchmarks.js"]; const statusArgs = ["scripts/build-playback-matrix-report.js"]; @@ -223,6 +227,8 @@ function main() { compareArgs.push( "--output", comparisonPath, + "--output-json", + comparisonJsonPath, "--allow-fps-drop", String(options.allowFpsDrop), "--allow-startup-increase-ms", @@ -268,6 +274,7 @@ function main() { } if (options.compareBaselineInputs.length > 0) { console.log(`Comparison report: ${comparisonPath}`); + console.log(`Comparison JSON: ${comparisonJsonPath}`); } } From 88bb59a51b88191471894c20b6d7ad53cc9c0983 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 22:26:13 +0000 Subject: [PATCH 099/333] improve: use keyed prefetch buffer for faster frame lookup Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 7 +++ crates/editor/src/playback.rs | 90 ++++++++++++++++-------------- 2 files changed, 55 insertions(+), 42 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 4aa9451bc6..54c5ca97d5 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -284,6 +284,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Emits comparison summary/regression/missing-coverage details for automation. - `scripts/finalize-playback-matrix.js` now writes comparison markdown and JSON artifacts during baseline comparison runs. +28. **Switched playback prefetch buffer to keyed map storage (2026-02-13)** + - Playback prefetch buffer now uses `BTreeMap` keyed by frame number. + - Removes repeated linear scans over deque entries for target frame lookup in hot playback path. + - Retains bounded buffer behavior with deterministic far-ahead/oldest eviction. + --- ## Root Cause Analysis Archive @@ -405,6 +410,7 @@ Decoder Pipeline: 29. Added missing-candidate-row gating to baseline-vs-candidate comparison workflow. 30. Fixed finalize compare/publish ordering and propagated missing-candidate override into finalize compare flow. 31. Added structured JSON artifact emission for baseline-vs-candidate comparison workflows. +32. Replaced playback prefetch deque scans with keyed `BTreeMap` buffering for lower lookup overhead in frame acquisition path. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -432,6 +438,7 @@ Decoder Pipeline: - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. - `scripts/compare-playback-benchmark-runs.js`: added structured comparison JSON output with pass/fail summary and regression detail payload. - `scripts/finalize-playback-matrix.js`: baseline comparison flow now writes both `playback-comparison.md` and `playback-comparison.json`. +- `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. - `crates/editor/src/playback.rs`: added seek-generation tagging for prefetched frames so stale in-flight decode results are ignored after seek generation advances. - `crates/editor/src/playback.rs`: seek handling now clears prefetched frame buffer on generation changes to guarantee stale buffered frames are discarded immediately. - `crates/editor/src/playback.rs`: in-flight prefetch wait path now only buffers frames at or ahead of current frame to reduce stale buffer accumulation. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 1071231185..0ce521b222 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -16,7 +16,7 @@ use cpal::{ use futures::stream::{FuturesUnordered, StreamExt}; use lru::LruCache; use std::{ - collections::{HashSet, VecDeque}, + collections::{BTreeMap, HashSet}, num::NonZeroUsize, sync::{ Arc, RwLock, @@ -101,6 +101,39 @@ impl FrameCache { } } +fn trim_prefetch_buffer(buffer: &mut BTreeMap, current_frame: u32) { + while buffer.len() > PREFETCH_BUFFER_SIZE { + let far_ahead_frame = buffer + .iter() + .rev() + .find(|(frame, _)| **frame > current_frame + PREFETCH_BUFFER_SIZE as u32) + .map(|(frame, _)| *frame); + + if let Some(frame) = far_ahead_frame { + buffer.remove(&frame); + continue; + } + + let Some(oldest_frame) = buffer.keys().next().copied() else { + break; + }; + buffer.remove(&oldest_frame); + } +} + +fn insert_prefetched_frame( + buffer: &mut BTreeMap, + prefetched: PrefetchedFrame, + current_frame: u32, +) { + if prefetched.frame_number < current_frame { + return; + } + + buffer.entry(prefetched.frame_number).or_insert(prefetched); + trim_prefetch_buffer(buffer, current_frame); +} + impl Playback { pub async fn start( mut self, @@ -436,8 +469,7 @@ impl Playback { .max(Duration::from_millis(1)) .min(Duration::from_millis(4)); let mut frame_number = self.start_frame_number; - let mut prefetch_buffer: VecDeque = - VecDeque::with_capacity(PREFETCH_BUFFER_SIZE); + let mut prefetch_buffer: BTreeMap = BTreeMap::new(); let mut frame_cache = FrameCache::new(FRAME_CACHE_SIZE); let mut seek_generation = 0u64; let base_skip_threshold = (fps / 6).clamp(6, 16); @@ -486,8 +518,8 @@ impl Playback { tokio::select! { Some(prefetched) = prefetch_rx.recv() => { - if prefetched.generation == seek_generation && prefetched.frame_number >= frame_number { - prefetch_buffer.push_back(prefetched); + if prefetched.generation == seek_generation { + insert_prefetched_frame(&mut prefetch_buffer, prefetched, frame_number); if first_frame_time.is_none() { first_frame_time = Some(Instant::now()); } @@ -503,10 +535,6 @@ impl Playback { } } - prefetch_buffer - .make_contiguous() - .sort_by_key(|p| p.frame_number); - let mut playback_anchor_start = Instant::now(); let mut playback_anchor_frame = frame_number; let mut cached_project = self.project.borrow().clone(); @@ -537,25 +565,8 @@ impl Playback { cached_project = self.project.borrow_and_update().clone(); } while let Ok(prefetched) = prefetch_rx.try_recv() { - if prefetched.generation == seek_generation - && prefetched.frame_number >= frame_number - { - prefetch_buffer.push_back(prefetched); - while prefetch_buffer.len() > PREFETCH_BUFFER_SIZE { - if let Some(idx) = prefetch_buffer - .iter() - .enumerate() - .filter(|(_, p)| { - p.frame_number > frame_number + PREFETCH_BUFFER_SIZE as u32 - }) - .max_by_key(|(_, p)| p.frame_number) - .map(|(i, _)| i) - { - prefetch_buffer.remove(idx); - } else { - prefetch_buffer.pop_front(); - } - } + if prefetched.generation == seek_generation { + insert_prefetched_frame(&mut prefetch_buffer, prefetched, frame_number); } } @@ -603,12 +614,7 @@ impl Playback { was_cached = true; Some(cached) } else { - let prefetched_idx = prefetch_buffer - .iter() - .position(|p| p.frame_number == frame_number); - - if let Some(idx) = prefetched_idx { - let prefetched = prefetch_buffer.remove(idx).unwrap(); + if let Some(prefetched) = prefetch_buffer.remove(&frame_number) { Some(( Arc::new(prefetched.segment_frames), prefetched.segment_index, @@ -640,7 +646,7 @@ impl Playback { found_frame = Some(prefetched); break; } else if prefetched.frame_number >= frame_number { - prefetch_buffer.push_back(prefetched); + insert_prefetched_frame(&mut prefetch_buffer, prefetched, frame_number); } } _ = tokio::time::sleep(in_flight_poll_interval) => { @@ -665,11 +671,7 @@ impl Playback { prefetched.segment_index, )) } else { - let prefetched_idx = prefetch_buffer - .iter() - .position(|p| p.frame_number == frame_number); - if let Some(idx) = prefetched_idx { - let prefetched = prefetch_buffer.remove(idx).unwrap(); + if let Some(prefetched) = prefetch_buffer.remove(&frame_number) { Some(( Arc::new(prefetched.segment_frames), prefetched.segment_index, @@ -698,7 +700,11 @@ impl Playback { prefetched.segment_index, )) } else { - prefetch_buffer.push_back(prefetched); + insert_prefetched_frame( + &mut prefetch_buffer, + prefetched, + frame_number, + ); frame_number = frame_number.saturating_add(1); total_frames_skipped += 1; continue; @@ -871,7 +877,7 @@ impl Playback { total_frames_skipped += skipped as u64; skip_events = skip_events.saturating_add(1); - prefetch_buffer.retain(|p| p.frame_number >= frame_number); + prefetch_buffer.retain(|frame, _| *frame >= frame_number); let _ = frame_request_tx.send(frame_number); let _ = playback_position_tx.send(frame_number); if has_audio From e1fb5e49c76bb52e6eff620d0985508bea5f1169 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:26:13 +0000 Subject: [PATCH 100/333] improve: use keyed prefetch buffer for faster frame lookup Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 7 +++ crates/editor/src/playback.rs | 90 ++++++++++++++++-------------- 2 files changed, 55 insertions(+), 42 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 4aa9451bc6..54c5ca97d5 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -284,6 +284,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Emits comparison summary/regression/missing-coverage details for automation. - `scripts/finalize-playback-matrix.js` now writes comparison markdown and JSON artifacts during baseline comparison runs. +28. **Switched playback prefetch buffer to keyed map storage (2026-02-13)** + - Playback prefetch buffer now uses `BTreeMap` keyed by frame number. + - Removes repeated linear scans over deque entries for target frame lookup in hot playback path. + - Retains bounded buffer behavior with deterministic far-ahead/oldest eviction. + --- ## Root Cause Analysis Archive @@ -405,6 +410,7 @@ Decoder Pipeline: 29. Added missing-candidate-row gating to baseline-vs-candidate comparison workflow. 30. Fixed finalize compare/publish ordering and propagated missing-candidate override into finalize compare flow. 31. Added structured JSON artifact emission for baseline-vs-candidate comparison workflows. +32. Replaced playback prefetch deque scans with keyed `BTreeMap` buffering for lower lookup overhead in frame acquisition path. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -432,6 +438,7 @@ Decoder Pipeline: - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. - `scripts/compare-playback-benchmark-runs.js`: added structured comparison JSON output with pass/fail summary and regression detail payload. - `scripts/finalize-playback-matrix.js`: baseline comparison flow now writes both `playback-comparison.md` and `playback-comparison.json`. +- `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. - `crates/editor/src/playback.rs`: added seek-generation tagging for prefetched frames so stale in-flight decode results are ignored after seek generation advances. - `crates/editor/src/playback.rs`: seek handling now clears prefetched frame buffer on generation changes to guarantee stale buffered frames are discarded immediately. - `crates/editor/src/playback.rs`: in-flight prefetch wait path now only buffers frames at or ahead of current frame to reduce stale buffer accumulation. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 1071231185..0ce521b222 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -16,7 +16,7 @@ use cpal::{ use futures::stream::{FuturesUnordered, StreamExt}; use lru::LruCache; use std::{ - collections::{HashSet, VecDeque}, + collections::{BTreeMap, HashSet}, num::NonZeroUsize, sync::{ Arc, RwLock, @@ -101,6 +101,39 @@ impl FrameCache { } } +fn trim_prefetch_buffer(buffer: &mut BTreeMap, current_frame: u32) { + while buffer.len() > PREFETCH_BUFFER_SIZE { + let far_ahead_frame = buffer + .iter() + .rev() + .find(|(frame, _)| **frame > current_frame + PREFETCH_BUFFER_SIZE as u32) + .map(|(frame, _)| *frame); + + if let Some(frame) = far_ahead_frame { + buffer.remove(&frame); + continue; + } + + let Some(oldest_frame) = buffer.keys().next().copied() else { + break; + }; + buffer.remove(&oldest_frame); + } +} + +fn insert_prefetched_frame( + buffer: &mut BTreeMap, + prefetched: PrefetchedFrame, + current_frame: u32, +) { + if prefetched.frame_number < current_frame { + return; + } + + buffer.entry(prefetched.frame_number).or_insert(prefetched); + trim_prefetch_buffer(buffer, current_frame); +} + impl Playback { pub async fn start( mut self, @@ -436,8 +469,7 @@ impl Playback { .max(Duration::from_millis(1)) .min(Duration::from_millis(4)); let mut frame_number = self.start_frame_number; - let mut prefetch_buffer: VecDeque = - VecDeque::with_capacity(PREFETCH_BUFFER_SIZE); + let mut prefetch_buffer: BTreeMap = BTreeMap::new(); let mut frame_cache = FrameCache::new(FRAME_CACHE_SIZE); let mut seek_generation = 0u64; let base_skip_threshold = (fps / 6).clamp(6, 16); @@ -486,8 +518,8 @@ impl Playback { tokio::select! { Some(prefetched) = prefetch_rx.recv() => { - if prefetched.generation == seek_generation && prefetched.frame_number >= frame_number { - prefetch_buffer.push_back(prefetched); + if prefetched.generation == seek_generation { + insert_prefetched_frame(&mut prefetch_buffer, prefetched, frame_number); if first_frame_time.is_none() { first_frame_time = Some(Instant::now()); } @@ -503,10 +535,6 @@ impl Playback { } } - prefetch_buffer - .make_contiguous() - .sort_by_key(|p| p.frame_number); - let mut playback_anchor_start = Instant::now(); let mut playback_anchor_frame = frame_number; let mut cached_project = self.project.borrow().clone(); @@ -537,25 +565,8 @@ impl Playback { cached_project = self.project.borrow_and_update().clone(); } while let Ok(prefetched) = prefetch_rx.try_recv() { - if prefetched.generation == seek_generation - && prefetched.frame_number >= frame_number - { - prefetch_buffer.push_back(prefetched); - while prefetch_buffer.len() > PREFETCH_BUFFER_SIZE { - if let Some(idx) = prefetch_buffer - .iter() - .enumerate() - .filter(|(_, p)| { - p.frame_number > frame_number + PREFETCH_BUFFER_SIZE as u32 - }) - .max_by_key(|(_, p)| p.frame_number) - .map(|(i, _)| i) - { - prefetch_buffer.remove(idx); - } else { - prefetch_buffer.pop_front(); - } - } + if prefetched.generation == seek_generation { + insert_prefetched_frame(&mut prefetch_buffer, prefetched, frame_number); } } @@ -603,12 +614,7 @@ impl Playback { was_cached = true; Some(cached) } else { - let prefetched_idx = prefetch_buffer - .iter() - .position(|p| p.frame_number == frame_number); - - if let Some(idx) = prefetched_idx { - let prefetched = prefetch_buffer.remove(idx).unwrap(); + if let Some(prefetched) = prefetch_buffer.remove(&frame_number) { Some(( Arc::new(prefetched.segment_frames), prefetched.segment_index, @@ -640,7 +646,7 @@ impl Playback { found_frame = Some(prefetched); break; } else if prefetched.frame_number >= frame_number { - prefetch_buffer.push_back(prefetched); + insert_prefetched_frame(&mut prefetch_buffer, prefetched, frame_number); } } _ = tokio::time::sleep(in_flight_poll_interval) => { @@ -665,11 +671,7 @@ impl Playback { prefetched.segment_index, )) } else { - let prefetched_idx = prefetch_buffer - .iter() - .position(|p| p.frame_number == frame_number); - if let Some(idx) = prefetched_idx { - let prefetched = prefetch_buffer.remove(idx).unwrap(); + if let Some(prefetched) = prefetch_buffer.remove(&frame_number) { Some(( Arc::new(prefetched.segment_frames), prefetched.segment_index, @@ -698,7 +700,11 @@ impl Playback { prefetched.segment_index, )) } else { - prefetch_buffer.push_back(prefetched); + insert_prefetched_frame( + &mut prefetch_buffer, + prefetched, + frame_number, + ); frame_number = frame_number.saturating_add(1); total_frames_skipped += 1; continue; @@ -871,7 +877,7 @@ impl Playback { total_frames_skipped += skipped as u64; skip_events = skip_events.saturating_add(1); - prefetch_buffer.retain(|p| p.frame_number >= frame_number); + prefetch_buffer.retain(|frame, _| *frame >= frame_number); let _ = frame_request_tx.send(frame_number); let _ = playback_position_tx.send(frame_number); if has_audio From 3eacadbff8df3e678af374163282ffd2b709ed41 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 22:27:12 +0000 Subject: [PATCH 101/333] improve: prune stale prefetched frames using ordered map Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 7 +++++++ crates/editor/src/playback.rs | 13 +++++++++++++ 2 files changed, 20 insertions(+) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 54c5ca97d5..0fb3dfd712 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -289,6 +289,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Removes repeated linear scans over deque entries for target frame lookup in hot playback path. - Retains bounded buffer behavior with deterministic far-ahead/oldest eviction. +29. **Added sorted prefetch stale-frame pruning (2026-02-13)** + - Playback loop now prunes prefetched frames older than current playhead from the keyed buffer. + - Uses ordered map operations to remove outdated frames efficiently. + - Reduces stale-buffer buildup during frame skips and sustained catch-up scenarios. + --- ## Root Cause Analysis Archive @@ -411,6 +416,7 @@ Decoder Pipeline: 30. Fixed finalize compare/publish ordering and propagated missing-candidate override into finalize compare flow. 31. Added structured JSON artifact emission for baseline-vs-candidate comparison workflows. 32. Replaced playback prefetch deque scans with keyed `BTreeMap` buffering for lower lookup overhead in frame acquisition path. +33. Added ordered stale-frame pruning in keyed prefetch buffer to keep playback buffer aligned with current playhead. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -439,6 +445,7 @@ Decoder Pipeline: - `scripts/compare-playback-benchmark-runs.js`: added structured comparison JSON output with pass/fail summary and regression detail payload. - `scripts/finalize-playback-matrix.js`: baseline comparison flow now writes both `playback-comparison.md` and `playback-comparison.json`. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. +- `crates/editor/src/playback.rs`: added ordered pruning of stale prefetched frames below current playhead to reduce stale buffer overhead during catch-up. - `crates/editor/src/playback.rs`: added seek-generation tagging for prefetched frames so stale in-flight decode results are ignored after seek generation advances. - `crates/editor/src/playback.rs`: seek handling now clears prefetched frame buffer on generation changes to guarantee stale buffered frames are discarded immediately. - `crates/editor/src/playback.rs`: in-flight prefetch wait path now only buffers frames at or ahead of current frame to reduce stale buffer accumulation. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 0ce521b222..28de5205df 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -134,6 +134,18 @@ fn insert_prefetched_frame( trim_prefetch_buffer(buffer, current_frame); } +fn prune_prefetch_buffer_before_frame( + buffer: &mut BTreeMap, + current_frame: u32, +) { + while let Some((frame, _)) = buffer.first_key_value() { + if *frame >= current_frame { + break; + } + buffer.pop_first(); + } +} + impl Playback { pub async fn start( mut self, @@ -569,6 +581,7 @@ impl Playback { insert_prefetched_frame(&mut prefetch_buffer, prefetched, frame_number); } } + prune_prefetch_buffer_before_frame(&mut prefetch_buffer, frame_number); let frame_offset = frame_number.saturating_sub(playback_anchor_frame) as f64; let next_deadline = playback_anchor_start + frame_duration.mul_f64(frame_offset); From 99801691f81241202514118fe3745f430e7b7ce3 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:27:12 +0000 Subject: [PATCH 102/333] improve: prune stale prefetched frames using ordered map Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 7 +++++++ crates/editor/src/playback.rs | 13 +++++++++++++ 2 files changed, 20 insertions(+) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 54c5ca97d5..0fb3dfd712 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -289,6 +289,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Removes repeated linear scans over deque entries for target frame lookup in hot playback path. - Retains bounded buffer behavior with deterministic far-ahead/oldest eviction. +29. **Added sorted prefetch stale-frame pruning (2026-02-13)** + - Playback loop now prunes prefetched frames older than current playhead from the keyed buffer. + - Uses ordered map operations to remove outdated frames efficiently. + - Reduces stale-buffer buildup during frame skips and sustained catch-up scenarios. + --- ## Root Cause Analysis Archive @@ -411,6 +416,7 @@ Decoder Pipeline: 30. Fixed finalize compare/publish ordering and propagated missing-candidate override into finalize compare flow. 31. Added structured JSON artifact emission for baseline-vs-candidate comparison workflows. 32. Replaced playback prefetch deque scans with keyed `BTreeMap` buffering for lower lookup overhead in frame acquisition path. +33. Added ordered stale-frame pruning in keyed prefetch buffer to keep playback buffer aligned with current playhead. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -439,6 +445,7 @@ Decoder Pipeline: - `scripts/compare-playback-benchmark-runs.js`: added structured comparison JSON output with pass/fail summary and regression detail payload. - `scripts/finalize-playback-matrix.js`: baseline comparison flow now writes both `playback-comparison.md` and `playback-comparison.json`. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. +- `crates/editor/src/playback.rs`: added ordered pruning of stale prefetched frames below current playhead to reduce stale buffer overhead during catch-up. - `crates/editor/src/playback.rs`: added seek-generation tagging for prefetched frames so stale in-flight decode results are ignored after seek generation advances. - `crates/editor/src/playback.rs`: seek handling now clears prefetched frame buffer on generation changes to guarantee stale buffered frames are discarded immediately. - `crates/editor/src/playback.rs`: in-flight prefetch wait path now only buffers frames at or ahead of current frame to reduce stale buffer accumulation. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 0ce521b222..28de5205df 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -134,6 +134,18 @@ fn insert_prefetched_frame( trim_prefetch_buffer(buffer, current_frame); } +fn prune_prefetch_buffer_before_frame( + buffer: &mut BTreeMap, + current_frame: u32, +) { + while let Some((frame, _)) = buffer.first_key_value() { + if *frame >= current_frame { + break; + } + buffer.pop_first(); + } +} + impl Playback { pub async fn start( mut self, @@ -569,6 +581,7 @@ impl Playback { insert_prefetched_frame(&mut prefetch_buffer, prefetched, frame_number); } } + prune_prefetch_buffer_before_frame(&mut prefetch_buffer, frame_number); let frame_offset = frame_number.saturating_sub(playback_anchor_frame) as f64; let next_deadline = playback_anchor_start + frame_duration.mul_f64(frame_offset); From bde144b1673723130464b6c1624237ae8b892167 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 22:28:31 +0000 Subject: [PATCH 103/333] improve: include comparison gate status in published matrix summaries Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 2 +- crates/editor/PLAYBACK-FINDINGS.md | 8 ++++++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 3 ++- scripts/finalize-playback-matrix.js | 7 ++++++- scripts/publish-playback-matrix-summary.js | 22 +++++++++++++++++++++- 5 files changed, 38 insertions(+), 4 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 8f533ecdfe..2ff8e37b63 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -84,7 +84,7 @@ node scripts/finalize-playback-matrix.js --input /path/to/json-results --output- # Publish matrix artifacts into this benchmark history node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --bottlenecks-md /tmp/playback-matrix-final/playback-bottlenecks.md -node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --comparison-md /tmp/playback-matrix-final/playback-comparison.md +node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --comparison-md /tmp/playback-matrix-final/playback-comparison.md --comparison-json /tmp/playback-matrix-final/playback-comparison.json # Analyze bottlenecks from matrix results node scripts/analyze-playback-matrix-bottlenecks.js --input /path/to/json-results --output /tmp/playback-bottlenecks.md --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 0fb3dfd712..a72a384855 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -294,6 +294,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Uses ordered map operations to remove outdated frames efficiently. - Reduces stale-buffer buildup during frame skips and sustained catch-up scenarios. +30. **Published comparison gate status in matrix summaries (2026-02-13)** + - `scripts/publish-playback-matrix-summary.js` now accepts optional `--comparison-json`. + - Published matrix summary now includes comparison gate pass/fail, regression count, and missing-candidate-row count when comparison JSON is provided. + - `scripts/finalize-playback-matrix.js` now forwards both comparison markdown and comparison JSON to publish flow. + --- ## Root Cause Analysis Archive @@ -417,6 +422,7 @@ Decoder Pipeline: 31. Added structured JSON artifact emission for baseline-vs-candidate comparison workflows. 32. Replaced playback prefetch deque scans with keyed `BTreeMap` buffering for lower lookup overhead in frame acquisition path. 33. Added ordered stale-frame pruning in keyed prefetch buffer to keep playback buffer aligned with current playhead. +34. Added comparison gate status fields to published matrix summary entries via comparison JSON attachment. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -446,6 +452,8 @@ Decoder Pipeline: - `scripts/finalize-playback-matrix.js`: baseline comparison flow now writes both `playback-comparison.md` and `playback-comparison.json`. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. - `crates/editor/src/playback.rs`: added ordered pruning of stale prefetched frames below current playhead to reduce stale buffer overhead during catch-up. +- `scripts/publish-playback-matrix-summary.js`: publish flow now surfaces comparison gate status/summary metrics when comparison JSON is provided. +- `scripts/finalize-playback-matrix.js`: finalize publish pass now forwards both comparison markdown and comparison JSON artifacts. - `crates/editor/src/playback.rs`: added seek-generation tagging for prefetched frames so stale in-flight decode results are ignored after seek generation advances. - `crates/editor/src/playback.rs`: seek handling now clears prefetched frame buffer on generation changes to guarantee stale buffered frames are discarded immediately. - `crates/editor/src/playback.rs`: in-flight prefetch wait path now only buffers frames at or ahead of current frame to reduce stale buffer accumulation. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index d88f7e982b..2a4896fbeb 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -144,7 +144,8 @@ pnpm bench:playback:publish -- \ --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md \ --status-md /tmp/playback-matrix-final/playback-matrix-status.md \ --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json \ - --comparison-md /tmp/playback-matrix-final/playback-comparison.md + --comparison-md /tmp/playback-matrix-final/playback-comparison.md \ + --comparison-json /tmp/playback-matrix-final/playback-comparison.json ``` Generate bottleneck analysis for optimization backlog: diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index 4e3699b401..45f9a5838a 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -257,7 +257,12 @@ function main() { publishArgs.push("--bottlenecks-md", bottleneckPath); } if (options.compareBaselineInputs.length > 0) { - publishArgs.push("--comparison-md", comparisonPath); + publishArgs.push( + "--comparison-md", + comparisonPath, + "--comparison-json", + comparisonJsonPath, + ); } run("node", publishArgs); } diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js index fab62486f9..ac7adfbbe3 100644 --- a/scripts/publish-playback-matrix-summary.js +++ b/scripts/publish-playback-matrix-summary.js @@ -9,6 +9,7 @@ function parseArgs(argv) { statusMd: null, bottlenecksMd: null, comparisonMd: null, + comparisonJson: null, validationJson: null, target: path.resolve("crates/editor/PLAYBACK-BENCHMARKS.md"), }; @@ -36,6 +37,10 @@ function parseArgs(argv) { options.comparisonMd = path.resolve(argv[++i] ?? ""); continue; } + if (arg === "--comparison-json") { + options.comparisonJson = path.resolve(argv[++i] ?? ""); + continue; + } if (arg === "--validation-json") { options.validationJson = path.resolve(argv[++i] ?? ""); continue; @@ -51,7 +56,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/publish-playback-matrix-summary.js --aggregate-md --status-md --validation-json [--bottlenecks-md ] [--comparison-md ] [--target ] + console.log(`Usage: node scripts/publish-playback-matrix-summary.js --aggregate-md --status-md --validation-json [--bottlenecks-md ] [--comparison-md ] [--comparison-json ] [--target ] Prepends a matrix summary section into PLAYBACK-BENCHMARKS.md benchmark history region.`); } @@ -68,6 +73,7 @@ function buildSummarySection( validationJson, bottlenecksMd, comparisonMd, + comparisonJson, ) { const now = new Date().toISOString(); const validation = JSON.parse(validationJson); @@ -80,6 +86,13 @@ function buildSummarySection( markdown += `- Observed cells: ${validation.observedCells}\n`; markdown += `- Missing cells: ${validation.missingCells?.length ?? 0}\n`; markdown += `- Format failures: ${validation.formatFailures?.length ?? 0}\n\n`; + if (comparisonJson) { + const comparison = JSON.parse(comparisonJson); + const comparisonPassed = comparison.summary?.passed === true; + markdown += `- Comparison gate: ${comparisonPassed ? "✅ PASS" : "❌ FAIL"}\n`; + markdown += `- Comparison regressions: ${comparison.summary?.regressions ?? "n/a"}\n`; + markdown += `- Missing candidate rows: ${comparison.summary?.missingCandidateRows ?? "n/a"}\n\n`; + } if ((validation.missingCells?.length ?? 0) > 0) { markdown += "**Missing Cells**\n"; @@ -152,6 +165,9 @@ function main() { if (options.comparisonMd) { ensureFile(options.comparisonMd, "Comparison markdown"); } + if (options.comparisonJson) { + ensureFile(options.comparisonJson, "Comparison JSON"); + } ensureFile(options.target, "Target"); const aggregateMd = fs.readFileSync(options.aggregateMd, "utf8"); @@ -163,12 +179,16 @@ function main() { const comparisonMd = options.comparisonMd ? fs.readFileSync(options.comparisonMd, "utf8") : null; + const comparisonJson = options.comparisonJson + ? fs.readFileSync(options.comparisonJson, "utf8") + : null; const summaryMd = buildSummarySection( aggregateMd, statusMd, validationJson, bottlenecksMd, comparisonMd, + comparisonJson, ); writeToBenchmarkHistory(options.target, summaryMd); console.log(`Published matrix summary into ${options.target}`); From f2efab8139a87939882108a87f0dd9ffb6f6e847 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:28:31 +0000 Subject: [PATCH 104/333] improve: include comparison gate status in published matrix summaries Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 2 +- crates/editor/PLAYBACK-FINDINGS.md | 8 ++++++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 3 ++- scripts/finalize-playback-matrix.js | 7 ++++++- scripts/publish-playback-matrix-summary.js | 22 +++++++++++++++++++++- 5 files changed, 38 insertions(+), 4 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 8f533ecdfe..2ff8e37b63 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -84,7 +84,7 @@ node scripts/finalize-playback-matrix.js --input /path/to/json-results --output- # Publish matrix artifacts into this benchmark history node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --bottlenecks-md /tmp/playback-matrix-final/playback-bottlenecks.md -node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --comparison-md /tmp/playback-matrix-final/playback-comparison.md +node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --comparison-md /tmp/playback-matrix-final/playback-comparison.md --comparison-json /tmp/playback-matrix-final/playback-comparison.json # Analyze bottlenecks from matrix results node scripts/analyze-playback-matrix-bottlenecks.js --input /path/to/json-results --output /tmp/playback-bottlenecks.md --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 0fb3dfd712..a72a384855 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -294,6 +294,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Uses ordered map operations to remove outdated frames efficiently. - Reduces stale-buffer buildup during frame skips and sustained catch-up scenarios. +30. **Published comparison gate status in matrix summaries (2026-02-13)** + - `scripts/publish-playback-matrix-summary.js` now accepts optional `--comparison-json`. + - Published matrix summary now includes comparison gate pass/fail, regression count, and missing-candidate-row count when comparison JSON is provided. + - `scripts/finalize-playback-matrix.js` now forwards both comparison markdown and comparison JSON to publish flow. + --- ## Root Cause Analysis Archive @@ -417,6 +422,7 @@ Decoder Pipeline: 31. Added structured JSON artifact emission for baseline-vs-candidate comparison workflows. 32. Replaced playback prefetch deque scans with keyed `BTreeMap` buffering for lower lookup overhead in frame acquisition path. 33. Added ordered stale-frame pruning in keyed prefetch buffer to keep playback buffer aligned with current playhead. +34. Added comparison gate status fields to published matrix summary entries via comparison JSON attachment. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -446,6 +452,8 @@ Decoder Pipeline: - `scripts/finalize-playback-matrix.js`: baseline comparison flow now writes both `playback-comparison.md` and `playback-comparison.json`. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. - `crates/editor/src/playback.rs`: added ordered pruning of stale prefetched frames below current playhead to reduce stale buffer overhead during catch-up. +- `scripts/publish-playback-matrix-summary.js`: publish flow now surfaces comparison gate status/summary metrics when comparison JSON is provided. +- `scripts/finalize-playback-matrix.js`: finalize publish pass now forwards both comparison markdown and comparison JSON artifacts. - `crates/editor/src/playback.rs`: added seek-generation tagging for prefetched frames so stale in-flight decode results are ignored after seek generation advances. - `crates/editor/src/playback.rs`: seek handling now clears prefetched frame buffer on generation changes to guarantee stale buffered frames are discarded immediately. - `crates/editor/src/playback.rs`: in-flight prefetch wait path now only buffers frames at or ahead of current frame to reduce stale buffer accumulation. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index d88f7e982b..2a4896fbeb 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -144,7 +144,8 @@ pnpm bench:playback:publish -- \ --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md \ --status-md /tmp/playback-matrix-final/playback-matrix-status.md \ --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json \ - --comparison-md /tmp/playback-matrix-final/playback-comparison.md + --comparison-md /tmp/playback-matrix-final/playback-comparison.md \ + --comparison-json /tmp/playback-matrix-final/playback-comparison.json ``` Generate bottleneck analysis for optimization backlog: diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index 4e3699b401..45f9a5838a 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -257,7 +257,12 @@ function main() { publishArgs.push("--bottlenecks-md", bottleneckPath); } if (options.compareBaselineInputs.length > 0) { - publishArgs.push("--comparison-md", comparisonPath); + publishArgs.push( + "--comparison-md", + comparisonPath, + "--comparison-json", + comparisonJsonPath, + ); } run("node", publishArgs); } diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js index fab62486f9..ac7adfbbe3 100644 --- a/scripts/publish-playback-matrix-summary.js +++ b/scripts/publish-playback-matrix-summary.js @@ -9,6 +9,7 @@ function parseArgs(argv) { statusMd: null, bottlenecksMd: null, comparisonMd: null, + comparisonJson: null, validationJson: null, target: path.resolve("crates/editor/PLAYBACK-BENCHMARKS.md"), }; @@ -36,6 +37,10 @@ function parseArgs(argv) { options.comparisonMd = path.resolve(argv[++i] ?? ""); continue; } + if (arg === "--comparison-json") { + options.comparisonJson = path.resolve(argv[++i] ?? ""); + continue; + } if (arg === "--validation-json") { options.validationJson = path.resolve(argv[++i] ?? ""); continue; @@ -51,7 +56,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/publish-playback-matrix-summary.js --aggregate-md --status-md --validation-json [--bottlenecks-md ] [--comparison-md ] [--target ] + console.log(`Usage: node scripts/publish-playback-matrix-summary.js --aggregate-md --status-md --validation-json [--bottlenecks-md ] [--comparison-md ] [--comparison-json ] [--target ] Prepends a matrix summary section into PLAYBACK-BENCHMARKS.md benchmark history region.`); } @@ -68,6 +73,7 @@ function buildSummarySection( validationJson, bottlenecksMd, comparisonMd, + comparisonJson, ) { const now = new Date().toISOString(); const validation = JSON.parse(validationJson); @@ -80,6 +86,13 @@ function buildSummarySection( markdown += `- Observed cells: ${validation.observedCells}\n`; markdown += `- Missing cells: ${validation.missingCells?.length ?? 0}\n`; markdown += `- Format failures: ${validation.formatFailures?.length ?? 0}\n\n`; + if (comparisonJson) { + const comparison = JSON.parse(comparisonJson); + const comparisonPassed = comparison.summary?.passed === true; + markdown += `- Comparison gate: ${comparisonPassed ? "✅ PASS" : "❌ FAIL"}\n`; + markdown += `- Comparison regressions: ${comparison.summary?.regressions ?? "n/a"}\n`; + markdown += `- Missing candidate rows: ${comparison.summary?.missingCandidateRows ?? "n/a"}\n\n`; + } if ((validation.missingCells?.length ?? 0) > 0) { markdown += "**Missing Cells**\n"; @@ -152,6 +165,9 @@ function main() { if (options.comparisonMd) { ensureFile(options.comparisonMd, "Comparison markdown"); } + if (options.comparisonJson) { + ensureFile(options.comparisonJson, "Comparison JSON"); + } ensureFile(options.target, "Target"); const aggregateMd = fs.readFileSync(options.aggregateMd, "utf8"); @@ -163,12 +179,16 @@ function main() { const comparisonMd = options.comparisonMd ? fs.readFileSync(options.comparisonMd, "utf8") : null; + const comparisonJson = options.comparisonJson + ? fs.readFileSync(options.comparisonJson, "utf8") + : null; const summaryMd = buildSummarySection( aggregateMd, statusMd, validationJson, bottlenecksMd, comparisonMd, + comparisonJson, ); writeToBenchmarkHistory(options.target, summaryMd); console.log(`Published matrix summary into ${options.target}`); From 576f1fd3f6a7378bdceaa47503c92609d944b2da Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 22:31:40 +0000 Subject: [PATCH 105/333] improve: tighten warmup timing and skip pruning in keyed prefetch path Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 7 +++++++ crates/editor/src/playback.rs | 4 ++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index a72a384855..7d33c14e19 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -299,6 +299,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Published matrix summary now includes comparison gate pass/fail, regression count, and missing-candidate-row count when comparison JSON is provided. - `scripts/finalize-playback-matrix.js` now forwards both comparison markdown and comparison JSON to publish flow. +31. **Tightened prefetch warmup/skip maintenance with keyed buffer helpers (2026-02-13)** + - Warmup first-frame timer now starts only after at least one eligible prefetched frame is present in the keyed buffer. + - Skip catch-up path now uses ordered stale-frame pruning helper instead of full-map retain filtering. + - Reduces avoidable warmup timing noise and stale-buffer maintenance overhead in high-skip playback paths. + --- ## Root Cause Analysis Archive @@ -423,6 +428,7 @@ Decoder Pipeline: 32. Replaced playback prefetch deque scans with keyed `BTreeMap` buffering for lower lookup overhead in frame acquisition path. 33. Added ordered stale-frame pruning in keyed prefetch buffer to keep playback buffer aligned with current playhead. 34. Added comparison gate status fields to published matrix summary entries via comparison JSON attachment. +35. Tightened keyed prefetch buffer warmup timing and skip-path pruning behavior using map-aware helper usage in playback loop. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -450,6 +456,7 @@ Decoder Pipeline: - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. - `scripts/compare-playback-benchmark-runs.js`: added structured comparison JSON output with pass/fail summary and regression detail payload. - `scripts/finalize-playback-matrix.js`: baseline comparison flow now writes both `playback-comparison.md` and `playback-comparison.json`. +- `crates/editor/src/playback.rs`: warmup first-frame timing now only starts after eligible prefetched frame insertion, and skip catch-up now reuses ordered stale-prune helper. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. - `crates/editor/src/playback.rs`: added ordered pruning of stale prefetched frames below current playhead to reduce stale buffer overhead during catch-up. - `scripts/publish-playback-matrix-summary.js`: publish flow now surfaces comparison gate status/summary metrics when comparison JSON is provided. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 28de5205df..ae5d1cb128 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -532,7 +532,7 @@ impl Playback { Some(prefetched) = prefetch_rx.recv() => { if prefetched.generation == seek_generation { insert_prefetched_frame(&mut prefetch_buffer, prefetched, frame_number); - if first_frame_time.is_none() { + if first_frame_time.is_none() && !prefetch_buffer.is_empty() { first_frame_time = Some(Instant::now()); } } @@ -890,7 +890,7 @@ impl Playback { total_frames_skipped += skipped as u64; skip_events = skip_events.saturating_add(1); - prefetch_buffer.retain(|frame, _| *frame >= frame_number); + prune_prefetch_buffer_before_frame(&mut prefetch_buffer, frame_number); let _ = frame_request_tx.send(frame_number); let _ = playback_position_tx.send(frame_number); if has_audio From 634e18f7a67a9f2994aa9b190fa12049fa85913d Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:31:40 +0000 Subject: [PATCH 106/333] improve: tighten warmup timing and skip pruning in keyed prefetch path Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 7 +++++++ crates/editor/src/playback.rs | 4 ++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index a72a384855..7d33c14e19 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -299,6 +299,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Published matrix summary now includes comparison gate pass/fail, regression count, and missing-candidate-row count when comparison JSON is provided. - `scripts/finalize-playback-matrix.js` now forwards both comparison markdown and comparison JSON to publish flow. +31. **Tightened prefetch warmup/skip maintenance with keyed buffer helpers (2026-02-13)** + - Warmup first-frame timer now starts only after at least one eligible prefetched frame is present in the keyed buffer. + - Skip catch-up path now uses ordered stale-frame pruning helper instead of full-map retain filtering. + - Reduces avoidable warmup timing noise and stale-buffer maintenance overhead in high-skip playback paths. + --- ## Root Cause Analysis Archive @@ -423,6 +428,7 @@ Decoder Pipeline: 32. Replaced playback prefetch deque scans with keyed `BTreeMap` buffering for lower lookup overhead in frame acquisition path. 33. Added ordered stale-frame pruning in keyed prefetch buffer to keep playback buffer aligned with current playhead. 34. Added comparison gate status fields to published matrix summary entries via comparison JSON attachment. +35. Tightened keyed prefetch buffer warmup timing and skip-path pruning behavior using map-aware helper usage in playback loop. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -450,6 +456,7 @@ Decoder Pipeline: - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. - `scripts/compare-playback-benchmark-runs.js`: added structured comparison JSON output with pass/fail summary and regression detail payload. - `scripts/finalize-playback-matrix.js`: baseline comparison flow now writes both `playback-comparison.md` and `playback-comparison.json`. +- `crates/editor/src/playback.rs`: warmup first-frame timing now only starts after eligible prefetched frame insertion, and skip catch-up now reuses ordered stale-prune helper. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. - `crates/editor/src/playback.rs`: added ordered pruning of stale prefetched frames below current playhead to reduce stale buffer overhead during catch-up. - `scripts/publish-playback-matrix-summary.js`: publish flow now surfaces comparison gate status/summary metrics when comparison JSON is provided. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 28de5205df..ae5d1cb128 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -532,7 +532,7 @@ impl Playback { Some(prefetched) = prefetch_rx.recv() => { if prefetched.generation == seek_generation { insert_prefetched_frame(&mut prefetch_buffer, prefetched, frame_number); - if first_frame_time.is_none() { + if first_frame_time.is_none() && !prefetch_buffer.is_empty() { first_frame_time = Some(Instant::now()); } } @@ -890,7 +890,7 @@ impl Playback { total_frames_skipped += skipped as u64; skip_events = skip_events.saturating_add(1); - prefetch_buffer.retain(|frame, _| *frame >= frame_number); + prune_prefetch_buffer_before_frame(&mut prefetch_buffer, frame_number); let _ = frame_request_tx.send(frame_number); let _ = playback_position_tx.send(frame_number); if has_audio From ab756a9ed276761ae859f2341135da7e1e696ca9 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 22:33:37 +0000 Subject: [PATCH 107/333] improve: report candidate-only coverage in benchmark comparisons Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 2 + crates/editor/PLAYBACK-FINDINGS.md | 7 +++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 2 + scripts/compare-playback-benchmark-runs.js | 53 ++++++++++++++++++---- 4 files changed, 55 insertions(+), 9 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 2ff8e37b63..accad50330 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -95,6 +95,8 @@ node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-res node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results-a --baseline /path/to/baseline-results-b --candidate /path/to/candidate-results-a --candidate /path/to/candidate-results-b --output /tmp/playback-comparison.md node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --allow-missing-candidate node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-comparison.md --output-json /tmp/playback-comparison.json + +Comparison output reports both baseline rows missing in candidate and candidate-only rows not present in baseline. ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 7d33c14e19..264b553181 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -304,6 +304,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Skip catch-up path now uses ordered stale-frame pruning helper instead of full-map retain filtering. - Reduces avoidable warmup timing noise and stale-buffer maintenance overhead in high-skip playback paths. +32. **Expanded comparison outputs with candidate-only coverage visibility (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` now reports candidate-only rows that do not exist in baseline. + - Markdown and JSON comparison outputs now include both missing-candidate and candidate-only coverage summaries. + - Improves matrix diff diagnostics when test coverage differs between baseline and candidate runs. + --- ## Root Cause Analysis Archive @@ -429,6 +434,7 @@ Decoder Pipeline: 33. Added ordered stale-frame pruning in keyed prefetch buffer to keep playback buffer aligned with current playhead. 34. Added comparison gate status fields to published matrix summary entries via comparison JSON attachment. 35. Tightened keyed prefetch buffer warmup timing and skip-path pruning behavior using map-aware helper usage in playback loop. +36. Expanded baseline-vs-candidate comparison outputs with candidate-only row reporting. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -455,6 +461,7 @@ Decoder Pipeline: - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. - `scripts/compare-playback-benchmark-runs.js`: added structured comparison JSON output with pass/fail summary and regression detail payload. +- `scripts/compare-playback-benchmark-runs.js`: comparison outputs now include candidate-only rows in addition to missing-candidate coverage deltas. - `scripts/finalize-playback-matrix.js`: baseline comparison flow now writes both `playback-comparison.md` and `playback-comparison.json`. - `crates/editor/src/playback.rs`: warmup first-frame timing now only starts after eligible prefetched frame insertion, and skip catch-up now reuses ordered stale-prune helper. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 2a4896fbeb..b3b4184234 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -168,6 +168,8 @@ pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate # emit structured JSON alongside markdown for automation pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-matrix-final/playback-comparison.md --output-json /tmp/playback-matrix-final/playback-comparison.json + +# compare output now includes both missing-candidate rows and candidate-only rows ``` ## Evidence checklist diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index 2f76545b6a..d3c634b3bc 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -185,6 +185,7 @@ function formatNumber(value, digits = 2) { function compareMetrics(baselineRows, candidateRows) { const comparisons = []; const missingCandidateRows = []; + const candidateOnlyRows = []; for (const [key, baseline] of baselineRows) { const candidate = candidateRows.get(key); @@ -201,7 +202,16 @@ function compareMetrics(baselineRows, candidateRows) { for (const [key, candidate] of candidateRows) { const baseline = baselineRows.get(key); - if (!baseline) continue; + if (!baseline) { + candidateOnlyRows.push({ + platform: candidate.platform, + gpu: candidate.gpu, + scenario: candidate.scenario, + recording: candidate.recording, + format: candidate.format, + }); + continue; + } const fpsDelta = delta(candidate.fpsMin, baseline.fpsMin); const startupDelta = delta(candidate.startupAvg, baseline.startupAvg); @@ -235,10 +245,15 @@ function compareMetrics(baselineRows, candidateRows) { } comparisons.sort((a, b) => b.regressions.length - a.regressions.length); - return { comparisons, missingCandidateRows }; + return { comparisons, missingCandidateRows, candidateOnlyRows }; } -function toMarkdown(comparisons, missingCandidateRows, options) { +function toMarkdown( + comparisons, + missingCandidateRows, + candidateOnlyRows, + options, +) { const regressions = comparisons.filter( (entry) => entry.regressions.length > 0, ); @@ -246,7 +261,7 @@ function toMarkdown(comparisons, missingCandidateRows, options) { md += "# Playback Benchmark Comparison\n\n"; md += `Generated: ${new Date().toISOString()}\n\n`; md += `Tolerance: fps_drop<=${options.allowFpsDrop}, startup_increase<=${options.allowStartupIncreaseMs}ms, scrub_p95_increase<=${options.allowScrubP95IncreaseMs}ms\n\n`; - md += `Compared rows: ${comparisons.length}, regressions: ${regressions.length}, missing candidate rows: ${missingCandidateRows.length}\n\n`; + md += `Compared rows: ${comparisons.length}, regressions: ${regressions.length}, missing candidate rows: ${missingCandidateRows.length}, candidate-only rows: ${candidateOnlyRows.length}\n\n`; if (missingCandidateRows.length > 0) { md += "## Missing Candidate Rows\n\n"; md += "| Platform | GPU | Scenario | Recording | Format |\n"; @@ -256,6 +271,15 @@ function toMarkdown(comparisons, missingCandidateRows, options) { } md += "\n"; } + if (candidateOnlyRows.length > 0) { + md += "## Candidate-Only Rows\n\n"; + md += "| Platform | GPU | Scenario | Recording | Format |\n"; + md += "|---|---|---|---|---|\n"; + for (const row of candidateOnlyRows) { + md += `| ${row.platform} | ${row.gpu} | ${row.scenario} | ${row.recording} | ${row.format} |\n`; + } + md += "\n"; + } md += "| Platform | GPU | Scenario | Recording | Format | FPS Δ | Startup Δ (ms) | Scrub p95 Δ (ms) | Regression |\n"; md += "|---|---|---|---|---|---:|---:|---:|---|\n"; @@ -266,7 +290,12 @@ function toMarkdown(comparisons, missingCandidateRows, options) { return md; } -function buildJsonOutput(comparisons, missingCandidateRows, options) { +function buildJsonOutput( + comparisons, + missingCandidateRows, + candidateOnlyRows, + options, +) { const regressions = comparisons.filter( (entry) => entry.regressions.length > 0, ); @@ -282,12 +311,14 @@ function buildJsonOutput(comparisons, missingCandidateRows, options) { comparedRows: comparisons.length, regressions: regressions.length, missingCandidateRows: missingCandidateRows.length, + candidateOnlyRows: candidateOnlyRows.length, passed: regressions.length === 0 && (options.allowMissingCandidate || missingCandidateRows.length === 0), }, regressions, missingCandidateRows, + candidateOnlyRows, comparisons, }; } @@ -320,14 +351,18 @@ function main() { const baselineRows = collectMetrics(baselineFiles); const candidateRows = collectMetrics(candidateFiles); - const { comparisons, missingCandidateRows } = compareMetrics( - baselineRows, - candidateRows, + const { comparisons, missingCandidateRows, candidateOnlyRows } = + compareMetrics(baselineRows, candidateRows); + const markdown = toMarkdown( + comparisons, + missingCandidateRows, + candidateOnlyRows, + options, ); - const markdown = toMarkdown(comparisons, missingCandidateRows, options); const outputJson = buildJsonOutput( comparisons, missingCandidateRows, + candidateOnlyRows, options, ); From a0e2a17ddb62f6c6ff0c5ff7a6193761bd2626b5 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:33:37 +0000 Subject: [PATCH 108/333] improve: report candidate-only coverage in benchmark comparisons Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 2 + crates/editor/PLAYBACK-FINDINGS.md | 7 +++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 2 + scripts/compare-playback-benchmark-runs.js | 53 ++++++++++++++++++---- 4 files changed, 55 insertions(+), 9 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 2ff8e37b63..accad50330 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -95,6 +95,8 @@ node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-res node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results-a --baseline /path/to/baseline-results-b --candidate /path/to/candidate-results-a --candidate /path/to/candidate-results-b --output /tmp/playback-comparison.md node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --allow-missing-candidate node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-comparison.md --output-json /tmp/playback-comparison.json + +Comparison output reports both baseline rows missing in candidate and candidate-only rows not present in baseline. ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 7d33c14e19..264b553181 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -304,6 +304,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Skip catch-up path now uses ordered stale-frame pruning helper instead of full-map retain filtering. - Reduces avoidable warmup timing noise and stale-buffer maintenance overhead in high-skip playback paths. +32. **Expanded comparison outputs with candidate-only coverage visibility (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` now reports candidate-only rows that do not exist in baseline. + - Markdown and JSON comparison outputs now include both missing-candidate and candidate-only coverage summaries. + - Improves matrix diff diagnostics when test coverage differs between baseline and candidate runs. + --- ## Root Cause Analysis Archive @@ -429,6 +434,7 @@ Decoder Pipeline: 33. Added ordered stale-frame pruning in keyed prefetch buffer to keep playback buffer aligned with current playhead. 34. Added comparison gate status fields to published matrix summary entries via comparison JSON attachment. 35. Tightened keyed prefetch buffer warmup timing and skip-path pruning behavior using map-aware helper usage in playback loop. +36. Expanded baseline-vs-candidate comparison outputs with candidate-only row reporting. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -455,6 +461,7 @@ Decoder Pipeline: - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. - `scripts/compare-playback-benchmark-runs.js`: added structured comparison JSON output with pass/fail summary and regression detail payload. +- `scripts/compare-playback-benchmark-runs.js`: comparison outputs now include candidate-only rows in addition to missing-candidate coverage deltas. - `scripts/finalize-playback-matrix.js`: baseline comparison flow now writes both `playback-comparison.md` and `playback-comparison.json`. - `crates/editor/src/playback.rs`: warmup first-frame timing now only starts after eligible prefetched frame insertion, and skip catch-up now reuses ordered stale-prune helper. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 2a4896fbeb..b3b4184234 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -168,6 +168,8 @@ pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate # emit structured JSON alongside markdown for automation pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-matrix-final/playback-comparison.md --output-json /tmp/playback-matrix-final/playback-comparison.json + +# compare output now includes both missing-candidate rows and candidate-only rows ``` ## Evidence checklist diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index 2f76545b6a..d3c634b3bc 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -185,6 +185,7 @@ function formatNumber(value, digits = 2) { function compareMetrics(baselineRows, candidateRows) { const comparisons = []; const missingCandidateRows = []; + const candidateOnlyRows = []; for (const [key, baseline] of baselineRows) { const candidate = candidateRows.get(key); @@ -201,7 +202,16 @@ function compareMetrics(baselineRows, candidateRows) { for (const [key, candidate] of candidateRows) { const baseline = baselineRows.get(key); - if (!baseline) continue; + if (!baseline) { + candidateOnlyRows.push({ + platform: candidate.platform, + gpu: candidate.gpu, + scenario: candidate.scenario, + recording: candidate.recording, + format: candidate.format, + }); + continue; + } const fpsDelta = delta(candidate.fpsMin, baseline.fpsMin); const startupDelta = delta(candidate.startupAvg, baseline.startupAvg); @@ -235,10 +245,15 @@ function compareMetrics(baselineRows, candidateRows) { } comparisons.sort((a, b) => b.regressions.length - a.regressions.length); - return { comparisons, missingCandidateRows }; + return { comparisons, missingCandidateRows, candidateOnlyRows }; } -function toMarkdown(comparisons, missingCandidateRows, options) { +function toMarkdown( + comparisons, + missingCandidateRows, + candidateOnlyRows, + options, +) { const regressions = comparisons.filter( (entry) => entry.regressions.length > 0, ); @@ -246,7 +261,7 @@ function toMarkdown(comparisons, missingCandidateRows, options) { md += "# Playback Benchmark Comparison\n\n"; md += `Generated: ${new Date().toISOString()}\n\n`; md += `Tolerance: fps_drop<=${options.allowFpsDrop}, startup_increase<=${options.allowStartupIncreaseMs}ms, scrub_p95_increase<=${options.allowScrubP95IncreaseMs}ms\n\n`; - md += `Compared rows: ${comparisons.length}, regressions: ${regressions.length}, missing candidate rows: ${missingCandidateRows.length}\n\n`; + md += `Compared rows: ${comparisons.length}, regressions: ${regressions.length}, missing candidate rows: ${missingCandidateRows.length}, candidate-only rows: ${candidateOnlyRows.length}\n\n`; if (missingCandidateRows.length > 0) { md += "## Missing Candidate Rows\n\n"; md += "| Platform | GPU | Scenario | Recording | Format |\n"; @@ -256,6 +271,15 @@ function toMarkdown(comparisons, missingCandidateRows, options) { } md += "\n"; } + if (candidateOnlyRows.length > 0) { + md += "## Candidate-Only Rows\n\n"; + md += "| Platform | GPU | Scenario | Recording | Format |\n"; + md += "|---|---|---|---|---|\n"; + for (const row of candidateOnlyRows) { + md += `| ${row.platform} | ${row.gpu} | ${row.scenario} | ${row.recording} | ${row.format} |\n`; + } + md += "\n"; + } md += "| Platform | GPU | Scenario | Recording | Format | FPS Δ | Startup Δ (ms) | Scrub p95 Δ (ms) | Regression |\n"; md += "|---|---|---|---|---|---:|---:|---:|---|\n"; @@ -266,7 +290,12 @@ function toMarkdown(comparisons, missingCandidateRows, options) { return md; } -function buildJsonOutput(comparisons, missingCandidateRows, options) { +function buildJsonOutput( + comparisons, + missingCandidateRows, + candidateOnlyRows, + options, +) { const regressions = comparisons.filter( (entry) => entry.regressions.length > 0, ); @@ -282,12 +311,14 @@ function buildJsonOutput(comparisons, missingCandidateRows, options) { comparedRows: comparisons.length, regressions: regressions.length, missingCandidateRows: missingCandidateRows.length, + candidateOnlyRows: candidateOnlyRows.length, passed: regressions.length === 0 && (options.allowMissingCandidate || missingCandidateRows.length === 0), }, regressions, missingCandidateRows, + candidateOnlyRows, comparisons, }; } @@ -320,14 +351,18 @@ function main() { const baselineRows = collectMetrics(baselineFiles); const candidateRows = collectMetrics(candidateFiles); - const { comparisons, missingCandidateRows } = compareMetrics( - baselineRows, - candidateRows, + const { comparisons, missingCandidateRows, candidateOnlyRows } = + compareMetrics(baselineRows, candidateRows); + const markdown = toMarkdown( + comparisons, + missingCandidateRows, + candidateOnlyRows, + options, ); - const markdown = toMarkdown(comparisons, missingCandidateRows, options); const outputJson = buildJsonOutput( comparisons, missingCandidateRows, + candidateOnlyRows, options, ); From 0a19cdb5aca406f1ad0b1052dccd84c9f1d16311 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 22:34:25 +0000 Subject: [PATCH 109/333] improve: include candidate-only count in published comparison summary Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ scripts/publish-playback-matrix-summary.js | 1 + 2 files changed, 7 insertions(+) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 264b553181..9a6facdfc7 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -309,6 +309,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Markdown and JSON comparison outputs now include both missing-candidate and candidate-only coverage summaries. - Improves matrix diff diagnostics when test coverage differs between baseline and candidate runs. +33. **Extended published comparison summary fields (2026-02-13)** + - `scripts/publish-playback-matrix-summary.js` now includes candidate-only row count from comparison JSON in published matrix summary bullets. + - Keeps published matrix evidence aligned with expanded comparison coverage diagnostics. + --- ## Root Cause Analysis Archive @@ -435,6 +439,7 @@ Decoder Pipeline: 34. Added comparison gate status fields to published matrix summary entries via comparison JSON attachment. 35. Tightened keyed prefetch buffer warmup timing and skip-path pruning behavior using map-aware helper usage in playback loop. 36. Expanded baseline-vs-candidate comparison outputs with candidate-only row reporting. +37. Added candidate-only row count reporting in published matrix summary comparison status bullets. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -463,6 +468,7 @@ Decoder Pipeline: - `scripts/compare-playback-benchmark-runs.js`: added structured comparison JSON output with pass/fail summary and regression detail payload. - `scripts/compare-playback-benchmark-runs.js`: comparison outputs now include candidate-only rows in addition to missing-candidate coverage deltas. - `scripts/finalize-playback-matrix.js`: baseline comparison flow now writes both `playback-comparison.md` and `playback-comparison.json`. +- `scripts/publish-playback-matrix-summary.js`: published comparison status now includes candidate-only row count from comparison JSON summary. - `crates/editor/src/playback.rs`: warmup first-frame timing now only starts after eligible prefetched frame insertion, and skip catch-up now reuses ordered stale-prune helper. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. - `crates/editor/src/playback.rs`: added ordered pruning of stale prefetched frames below current playhead to reduce stale buffer overhead during catch-up. diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js index ac7adfbbe3..60d0f6a6c4 100644 --- a/scripts/publish-playback-matrix-summary.js +++ b/scripts/publish-playback-matrix-summary.js @@ -92,6 +92,7 @@ function buildSummarySection( markdown += `- Comparison gate: ${comparisonPassed ? "✅ PASS" : "❌ FAIL"}\n`; markdown += `- Comparison regressions: ${comparison.summary?.regressions ?? "n/a"}\n`; markdown += `- Missing candidate rows: ${comparison.summary?.missingCandidateRows ?? "n/a"}\n\n`; + markdown += `- Candidate-only rows: ${comparison.summary?.candidateOnlyRows ?? "n/a"}\n\n`; } if ((validation.missingCells?.length ?? 0) > 0) { From 16f71707d4f580fef58dde6c368aeda89ed033c5 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:34:25 +0000 Subject: [PATCH 110/333] improve: include candidate-only count in published comparison summary Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ scripts/publish-playback-matrix-summary.js | 1 + 2 files changed, 7 insertions(+) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 264b553181..9a6facdfc7 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -309,6 +309,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Markdown and JSON comparison outputs now include both missing-candidate and candidate-only coverage summaries. - Improves matrix diff diagnostics when test coverage differs between baseline and candidate runs. +33. **Extended published comparison summary fields (2026-02-13)** + - `scripts/publish-playback-matrix-summary.js` now includes candidate-only row count from comparison JSON in published matrix summary bullets. + - Keeps published matrix evidence aligned with expanded comparison coverage diagnostics. + --- ## Root Cause Analysis Archive @@ -435,6 +439,7 @@ Decoder Pipeline: 34. Added comparison gate status fields to published matrix summary entries via comparison JSON attachment. 35. Tightened keyed prefetch buffer warmup timing and skip-path pruning behavior using map-aware helper usage in playback loop. 36. Expanded baseline-vs-candidate comparison outputs with candidate-only row reporting. +37. Added candidate-only row count reporting in published matrix summary comparison status bullets. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -463,6 +468,7 @@ Decoder Pipeline: - `scripts/compare-playback-benchmark-runs.js`: added structured comparison JSON output with pass/fail summary and regression detail payload. - `scripts/compare-playback-benchmark-runs.js`: comparison outputs now include candidate-only rows in addition to missing-candidate coverage deltas. - `scripts/finalize-playback-matrix.js`: baseline comparison flow now writes both `playback-comparison.md` and `playback-comparison.json`. +- `scripts/publish-playback-matrix-summary.js`: published comparison status now includes candidate-only row count from comparison JSON summary. - `crates/editor/src/playback.rs`: warmup first-frame timing now only starts after eligible prefetched frame insertion, and skip catch-up now reuses ordered stale-prune helper. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. - `crates/editor/src/playback.rs`: added ordered pruning of stale prefetched frames below current playhead to reduce stale buffer overhead during catch-up. diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js index ac7adfbbe3..60d0f6a6c4 100644 --- a/scripts/publish-playback-matrix-summary.js +++ b/scripts/publish-playback-matrix-summary.js @@ -92,6 +92,7 @@ function buildSummarySection( markdown += `- Comparison gate: ${comparisonPassed ? "✅ PASS" : "❌ FAIL"}\n`; markdown += `- Comparison regressions: ${comparison.summary?.regressions ?? "n/a"}\n`; markdown += `- Missing candidate rows: ${comparison.summary?.missingCandidateRows ?? "n/a"}\n\n`; + markdown += `- Candidate-only rows: ${comparison.summary?.candidateOnlyRows ?? "n/a"}\n\n`; } if ((validation.missingCells?.length ?? 0) > 0) { From 53f21e1a4d43726394c3a48e471f846e12a328cb Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 22:37:48 +0000 Subject: [PATCH 111/333] improve: add strict candidate-only gating for matrix comparisons Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 2 ++ crates/editor/PLAYBACK-FINDINGS.md | 8 ++++++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 5 +++++ scripts/compare-playback-benchmark-runs.js | 15 ++++++++++++--- scripts/finalize-playback-matrix.js | 10 +++++++++- 5 files changed, 36 insertions(+), 4 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index accad50330..f40284d4b8 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -80,6 +80,7 @@ node scripts/finalize-playback-matrix.js --input /path/to/json-results --output- node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5 node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-missing-candidate +node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-candidate-only node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 --publish-target /workspace/crates/editor/PLAYBACK-BENCHMARKS.md # Publish matrix artifacts into this benchmark history @@ -94,6 +95,7 @@ node scripts/analyze-playback-matrix-bottlenecks.js --input /path/to/json-result node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-comparison.md --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5 node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results-a --baseline /path/to/baseline-results-b --candidate /path/to/candidate-results-a --candidate /path/to/candidate-results-b --output /tmp/playback-comparison.md node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --allow-missing-candidate +node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-candidate-only node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-comparison.md --output-json /tmp/playback-comparison.json Comparison output reports both baseline rows missing in candidate and candidate-only rows not present in baseline. diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 9a6facdfc7..57b1dcf58c 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -313,6 +313,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - `scripts/publish-playback-matrix-summary.js` now includes candidate-only row count from comparison JSON in published matrix summary bullets. - Keeps published matrix evidence aligned with expanded comparison coverage diagnostics. +34. **Added strict candidate-only gating option for comparison workflows (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` now supports `--fail-on-candidate-only`. + - When enabled, comparison exits non-zero if candidate contains rows not present in baseline. + - `scripts/finalize-playback-matrix.js` now forwards the same strict option in integrated compare flows. + --- ## Root Cause Analysis Archive @@ -439,6 +444,7 @@ Decoder Pipeline: 34. Added comparison gate status fields to published matrix summary entries via comparison JSON attachment. 35. Tightened keyed prefetch buffer warmup timing and skip-path pruning behavior using map-aware helper usage in playback loop. 36. Expanded baseline-vs-candidate comparison outputs with candidate-only row reporting. +37. Added strict `fail-on-candidate-only` gating option for compare/finalize matrix comparison workflows. 37. Added candidate-only row count reporting in published matrix summary comparison status bullets. **Changes Made**: @@ -468,6 +474,8 @@ Decoder Pipeline: - `scripts/compare-playback-benchmark-runs.js`: added structured comparison JSON output with pass/fail summary and regression detail payload. - `scripts/compare-playback-benchmark-runs.js`: comparison outputs now include candidate-only rows in addition to missing-candidate coverage deltas. - `scripts/finalize-playback-matrix.js`: baseline comparison flow now writes both `playback-comparison.md` and `playback-comparison.json`. +- `scripts/compare-playback-benchmark-runs.js`: added optional strict `--fail-on-candidate-only` coverage gate and surfaced coverage gate mode in comparison markdown output. +- `scripts/finalize-playback-matrix.js`: added passthrough support for strict `--fail-on-candidate-only` compare mode in one-shot finalize workflows. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes candidate-only row count from comparison JSON summary. - `crates/editor/src/playback.rs`: warmup first-frame timing now only starts after eligible prefetched frame insertion, and skip catch-up now reuses ordered stale-prune helper. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index b3b4184234..23e1689ac7 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -123,6 +123,9 @@ pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir # optional: allow missing candidate rows during compare gate pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-missing-candidate + +# optional: fail finalize compare gate when candidate includes rows absent in baseline +pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-candidate-only ``` Finalize and publish to benchmark history in one command: @@ -170,6 +173,8 @@ pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-matrix-final/playback-comparison.md --output-json /tmp/playback-matrix-final/playback-comparison.json # compare output now includes both missing-candidate rows and candidate-only rows +# optional: fail compare gate when candidate includes rows absent in baseline +pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-candidate-only ``` ## Evidence checklist diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index d3c634b3bc..c858e8d396 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -13,6 +13,7 @@ function parseArgs(argv) { allowStartupIncreaseMs: 25, allowScrubP95IncreaseMs: 5, allowMissingCandidate: false, + failOnCandidateOnly: false, }; for (let i = 2; i < argv.length; i++) { @@ -70,6 +71,10 @@ function parseArgs(argv) { options.allowMissingCandidate = true; continue; } + if (arg === "--fail-on-candidate-only") { + options.failOnCandidateOnly = true; + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -77,7 +82,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline [--baseline ...] --candidate [--candidate ...] [--output ] [--output-json ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] + console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline [--baseline ...] --candidate [--candidate ...] [--output ] [--output-json ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] Compares baseline and candidate playback matrix JSON outputs and flags regressions. Multiple --baseline and --candidate inputs are supported.`); } @@ -261,6 +266,7 @@ function toMarkdown( md += "# Playback Benchmark Comparison\n\n"; md += `Generated: ${new Date().toISOString()}\n\n`; md += `Tolerance: fps_drop<=${options.allowFpsDrop}, startup_increase<=${options.allowStartupIncreaseMs}ms, scrub_p95_increase<=${options.allowScrubP95IncreaseMs}ms\n\n`; + md += `Coverage gate: missing_candidate=${options.allowMissingCandidate ? "allow" : "fail"}, candidate_only=${options.failOnCandidateOnly ? "fail" : "allow"}\n\n`; md += `Compared rows: ${comparisons.length}, regressions: ${regressions.length}, missing candidate rows: ${missingCandidateRows.length}, candidate-only rows: ${candidateOnlyRows.length}\n\n`; if (missingCandidateRows.length > 0) { md += "## Missing Candidate Rows\n\n"; @@ -306,6 +312,7 @@ function buildJsonOutput( allowStartupIncreaseMs: options.allowStartupIncreaseMs, allowScrubP95IncreaseMs: options.allowScrubP95IncreaseMs, allowMissingCandidate: options.allowMissingCandidate, + failOnCandidateOnly: options.failOnCandidateOnly, }, summary: { comparedRows: comparisons.length, @@ -314,7 +321,8 @@ function buildJsonOutput( candidateOnlyRows: candidateOnlyRows.length, passed: regressions.length === 0 && - (options.allowMissingCandidate || missingCandidateRows.length === 0), + (options.allowMissingCandidate || missingCandidateRows.length === 0) && + (!options.failOnCandidateOnly || candidateOnlyRows.length === 0), }, regressions, missingCandidateRows, @@ -383,7 +391,8 @@ function main() { if ( comparisons.some((entry) => entry.regressions.length > 0) || - (!options.allowMissingCandidate && missingCandidateRows.length > 0) + (!options.allowMissingCandidate && missingCandidateRows.length > 0) || + (options.failOnCandidateOnly && candidateOnlyRows.length > 0) ) { process.exit(1); } diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index 45f9a5838a..a767916571 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -19,6 +19,7 @@ function parseArgs(argv) { allowStartupIncreaseMs: 25, allowScrubP95IncreaseMs: 5, allowMissingCandidate: false, + failOnCandidateOnly: false, }; for (let i = 2; i < argv.length; i++) { @@ -116,6 +117,10 @@ function parseArgs(argv) { options.allowMissingCandidate = true; continue; } + if (arg === "--fail-on-candidate-only") { + options.failOnCandidateOnly = true; + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -123,7 +128,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--compare-baseline ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--publish-target ] + console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--compare-baseline ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--publish-target ] Generates aggregate markdown, status markdown, validation JSON, and bottleneck analysis for collected playback matrix outputs. Optionally compares candidate inputs against baseline inputs and fails on regressions.`); } @@ -239,6 +244,9 @@ function main() { if (options.allowMissingCandidate) { compareArgs.push("--allow-missing-candidate"); } + if (options.failOnCandidateOnly) { + compareArgs.push("--fail-on-candidate-only"); + } run("node", compareArgs); } if (options.publishTarget) { From 7b8a79d85e70e6666e710e8495e390cba329f630 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:37:48 +0000 Subject: [PATCH 112/333] improve: add strict candidate-only gating for matrix comparisons Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 2 ++ crates/editor/PLAYBACK-FINDINGS.md | 8 ++++++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 5 +++++ scripts/compare-playback-benchmark-runs.js | 15 ++++++++++++--- scripts/finalize-playback-matrix.js | 10 +++++++++- 5 files changed, 36 insertions(+), 4 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index accad50330..f40284d4b8 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -80,6 +80,7 @@ node scripts/finalize-playback-matrix.js --input /path/to/json-results --output- node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5 node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-missing-candidate +node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-candidate-only node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 --publish-target /workspace/crates/editor/PLAYBACK-BENCHMARKS.md # Publish matrix artifacts into this benchmark history @@ -94,6 +95,7 @@ node scripts/analyze-playback-matrix-bottlenecks.js --input /path/to/json-result node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-comparison.md --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5 node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results-a --baseline /path/to/baseline-results-b --candidate /path/to/candidate-results-a --candidate /path/to/candidate-results-b --output /tmp/playback-comparison.md node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --allow-missing-candidate +node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-candidate-only node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-comparison.md --output-json /tmp/playback-comparison.json Comparison output reports both baseline rows missing in candidate and candidate-only rows not present in baseline. diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 9a6facdfc7..57b1dcf58c 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -313,6 +313,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - `scripts/publish-playback-matrix-summary.js` now includes candidate-only row count from comparison JSON in published matrix summary bullets. - Keeps published matrix evidence aligned with expanded comparison coverage diagnostics. +34. **Added strict candidate-only gating option for comparison workflows (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` now supports `--fail-on-candidate-only`. + - When enabled, comparison exits non-zero if candidate contains rows not present in baseline. + - `scripts/finalize-playback-matrix.js` now forwards the same strict option in integrated compare flows. + --- ## Root Cause Analysis Archive @@ -439,6 +444,7 @@ Decoder Pipeline: 34. Added comparison gate status fields to published matrix summary entries via comparison JSON attachment. 35. Tightened keyed prefetch buffer warmup timing and skip-path pruning behavior using map-aware helper usage in playback loop. 36. Expanded baseline-vs-candidate comparison outputs with candidate-only row reporting. +37. Added strict `fail-on-candidate-only` gating option for compare/finalize matrix comparison workflows. 37. Added candidate-only row count reporting in published matrix summary comparison status bullets. **Changes Made**: @@ -468,6 +474,8 @@ Decoder Pipeline: - `scripts/compare-playback-benchmark-runs.js`: added structured comparison JSON output with pass/fail summary and regression detail payload. - `scripts/compare-playback-benchmark-runs.js`: comparison outputs now include candidate-only rows in addition to missing-candidate coverage deltas. - `scripts/finalize-playback-matrix.js`: baseline comparison flow now writes both `playback-comparison.md` and `playback-comparison.json`. +- `scripts/compare-playback-benchmark-runs.js`: added optional strict `--fail-on-candidate-only` coverage gate and surfaced coverage gate mode in comparison markdown output. +- `scripts/finalize-playback-matrix.js`: added passthrough support for strict `--fail-on-candidate-only` compare mode in one-shot finalize workflows. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes candidate-only row count from comparison JSON summary. - `crates/editor/src/playback.rs`: warmup first-frame timing now only starts after eligible prefetched frame insertion, and skip catch-up now reuses ordered stale-prune helper. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index b3b4184234..23e1689ac7 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -123,6 +123,9 @@ pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir # optional: allow missing candidate rows during compare gate pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-missing-candidate + +# optional: fail finalize compare gate when candidate includes rows absent in baseline +pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-candidate-only ``` Finalize and publish to benchmark history in one command: @@ -170,6 +173,8 @@ pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-matrix-final/playback-comparison.md --output-json /tmp/playback-matrix-final/playback-comparison.json # compare output now includes both missing-candidate rows and candidate-only rows +# optional: fail compare gate when candidate includes rows absent in baseline +pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-candidate-only ``` ## Evidence checklist diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index d3c634b3bc..c858e8d396 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -13,6 +13,7 @@ function parseArgs(argv) { allowStartupIncreaseMs: 25, allowScrubP95IncreaseMs: 5, allowMissingCandidate: false, + failOnCandidateOnly: false, }; for (let i = 2; i < argv.length; i++) { @@ -70,6 +71,10 @@ function parseArgs(argv) { options.allowMissingCandidate = true; continue; } + if (arg === "--fail-on-candidate-only") { + options.failOnCandidateOnly = true; + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -77,7 +82,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline [--baseline ...] --candidate [--candidate ...] [--output ] [--output-json ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] + console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline [--baseline ...] --candidate [--candidate ...] [--output ] [--output-json ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] Compares baseline and candidate playback matrix JSON outputs and flags regressions. Multiple --baseline and --candidate inputs are supported.`); } @@ -261,6 +266,7 @@ function toMarkdown( md += "# Playback Benchmark Comparison\n\n"; md += `Generated: ${new Date().toISOString()}\n\n`; md += `Tolerance: fps_drop<=${options.allowFpsDrop}, startup_increase<=${options.allowStartupIncreaseMs}ms, scrub_p95_increase<=${options.allowScrubP95IncreaseMs}ms\n\n`; + md += `Coverage gate: missing_candidate=${options.allowMissingCandidate ? "allow" : "fail"}, candidate_only=${options.failOnCandidateOnly ? "fail" : "allow"}\n\n`; md += `Compared rows: ${comparisons.length}, regressions: ${regressions.length}, missing candidate rows: ${missingCandidateRows.length}, candidate-only rows: ${candidateOnlyRows.length}\n\n`; if (missingCandidateRows.length > 0) { md += "## Missing Candidate Rows\n\n"; @@ -306,6 +312,7 @@ function buildJsonOutput( allowStartupIncreaseMs: options.allowStartupIncreaseMs, allowScrubP95IncreaseMs: options.allowScrubP95IncreaseMs, allowMissingCandidate: options.allowMissingCandidate, + failOnCandidateOnly: options.failOnCandidateOnly, }, summary: { comparedRows: comparisons.length, @@ -314,7 +321,8 @@ function buildJsonOutput( candidateOnlyRows: candidateOnlyRows.length, passed: regressions.length === 0 && - (options.allowMissingCandidate || missingCandidateRows.length === 0), + (options.allowMissingCandidate || missingCandidateRows.length === 0) && + (!options.failOnCandidateOnly || candidateOnlyRows.length === 0), }, regressions, missingCandidateRows, @@ -383,7 +391,8 @@ function main() { if ( comparisons.some((entry) => entry.regressions.length > 0) || - (!options.allowMissingCandidate && missingCandidateRows.length > 0) + (!options.allowMissingCandidate && missingCandidateRows.length > 0) || + (options.failOnCandidateOnly && candidateOnlyRows.length > 0) ) { process.exit(1); } diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index 45f9a5838a..a767916571 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -19,6 +19,7 @@ function parseArgs(argv) { allowStartupIncreaseMs: 25, allowScrubP95IncreaseMs: 5, allowMissingCandidate: false, + failOnCandidateOnly: false, }; for (let i = 2; i < argv.length; i++) { @@ -116,6 +117,10 @@ function parseArgs(argv) { options.allowMissingCandidate = true; continue; } + if (arg === "--fail-on-candidate-only") { + options.failOnCandidateOnly = true; + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -123,7 +128,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--compare-baseline ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--publish-target ] + console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--compare-baseline ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--publish-target ] Generates aggregate markdown, status markdown, validation JSON, and bottleneck analysis for collected playback matrix outputs. Optionally compares candidate inputs against baseline inputs and fails on regressions.`); } @@ -239,6 +244,9 @@ function main() { if (options.allowMissingCandidate) { compareArgs.push("--allow-missing-candidate"); } + if (options.failOnCandidateOnly) { + compareArgs.push("--fail-on-candidate-only"); + } run("node", compareArgs); } if (options.publishTarget) { From 0202484c9fdbd0c4fcab660454b9946d133ccd09 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 22:39:09 +0000 Subject: [PATCH 113/333] improve: require contiguous prefetch coverage for warmup readiness Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 9 ++++++++- crates/editor/src/playback.rs | 23 ++++++++++++++++++++++- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 57b1dcf58c..9913888499 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -318,6 +318,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - When enabled, comparison exits non-zero if candidate contains rows not present in baseline. - `scripts/finalize-playback-matrix.js` now forwards the same strict option in integrated compare flows. +35. **Required contiguous prefetched frames for warmup readiness (2026-02-13)** + - Playback warmup readiness now checks contiguous prefetched frame coverage from current frame. + - Avoids treating sparse/non-contiguous prefetched entries as equivalent to contiguous startup readiness. + - Reduces early playback start jitter risk when warmup buffer is fragmented. + --- ## Root Cause Analysis Archive @@ -445,7 +450,8 @@ Decoder Pipeline: 35. Tightened keyed prefetch buffer warmup timing and skip-path pruning behavior using map-aware helper usage in playback loop. 36. Expanded baseline-vs-candidate comparison outputs with candidate-only row reporting. 37. Added strict `fail-on-candidate-only` gating option for compare/finalize matrix comparison workflows. -37. Added candidate-only row count reporting in published matrix summary comparison status bullets. +38. Added candidate-only row count reporting in published matrix summary comparison status bullets. +39. Updated playback warmup start condition to require contiguous prefetched frame coverage from current frame. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -477,6 +483,7 @@ Decoder Pipeline: - `scripts/compare-playback-benchmark-runs.js`: added optional strict `--fail-on-candidate-only` coverage gate and surfaced coverage gate mode in comparison markdown output. - `scripts/finalize-playback-matrix.js`: added passthrough support for strict `--fail-on-candidate-only` compare mode in one-shot finalize workflows. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes candidate-only row count from comparison JSON summary. +- `crates/editor/src/playback.rs`: warmup readiness now requires contiguous prefetched frame coverage from current frame instead of raw buffer length threshold. - `crates/editor/src/playback.rs`: warmup first-frame timing now only starts after eligible prefetched frame insertion, and skip catch-up now reuses ordered stale-prune helper. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. - `crates/editor/src/playback.rs`: added ordered pruning of stale prefetched frames below current playhead to reduce stale buffer overhead during catch-up. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index ae5d1cb128..0966be3fe4 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -146,6 +146,22 @@ fn prune_prefetch_buffer_before_frame( } } +fn count_contiguous_prefetched_frames( + buffer: &BTreeMap, + start_frame: u32, + limit: usize, +) -> usize { + let mut contiguous = 0usize; + while contiguous < limit { + let frame = start_frame.saturating_add(contiguous as u32); + if !buffer.contains_key(&frame) { + break; + } + contiguous += 1; + } + contiguous +} + impl Playback { pub async fn start( mut self, @@ -508,8 +524,13 @@ impl Playback { ); while !*stop_rx.borrow() { + let contiguous_prefetched = count_contiguous_prefetched_frames( + &prefetch_buffer, + frame_number, + warmup_target_frames, + ); let should_start = if let Some(first_time) = first_frame_time { - prefetch_buffer.len() >= warmup_target_frames + contiguous_prefetched >= warmup_target_frames || first_time.elapsed() > warmup_after_first_timeout } else { false From 9f94a2672470683fecc818a1ba18322d217ff460 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:39:09 +0000 Subject: [PATCH 114/333] improve: require contiguous prefetch coverage for warmup readiness Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 9 ++++++++- crates/editor/src/playback.rs | 23 ++++++++++++++++++++++- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 57b1dcf58c..9913888499 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -318,6 +318,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - When enabled, comparison exits non-zero if candidate contains rows not present in baseline. - `scripts/finalize-playback-matrix.js` now forwards the same strict option in integrated compare flows. +35. **Required contiguous prefetched frames for warmup readiness (2026-02-13)** + - Playback warmup readiness now checks contiguous prefetched frame coverage from current frame. + - Avoids treating sparse/non-contiguous prefetched entries as equivalent to contiguous startup readiness. + - Reduces early playback start jitter risk when warmup buffer is fragmented. + --- ## Root Cause Analysis Archive @@ -445,7 +450,8 @@ Decoder Pipeline: 35. Tightened keyed prefetch buffer warmup timing and skip-path pruning behavior using map-aware helper usage in playback loop. 36. Expanded baseline-vs-candidate comparison outputs with candidate-only row reporting. 37. Added strict `fail-on-candidate-only` gating option for compare/finalize matrix comparison workflows. -37. Added candidate-only row count reporting in published matrix summary comparison status bullets. +38. Added candidate-only row count reporting in published matrix summary comparison status bullets. +39. Updated playback warmup start condition to require contiguous prefetched frame coverage from current frame. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -477,6 +483,7 @@ Decoder Pipeline: - `scripts/compare-playback-benchmark-runs.js`: added optional strict `--fail-on-candidate-only` coverage gate and surfaced coverage gate mode in comparison markdown output. - `scripts/finalize-playback-matrix.js`: added passthrough support for strict `--fail-on-candidate-only` compare mode in one-shot finalize workflows. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes candidate-only row count from comparison JSON summary. +- `crates/editor/src/playback.rs`: warmup readiness now requires contiguous prefetched frame coverage from current frame instead of raw buffer length threshold. - `crates/editor/src/playback.rs`: warmup first-frame timing now only starts after eligible prefetched frame insertion, and skip catch-up now reuses ordered stale-prune helper. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. - `crates/editor/src/playback.rs`: added ordered pruning of stale prefetched frames below current playhead to reduce stale buffer overhead during catch-up. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index ae5d1cb128..0966be3fe4 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -146,6 +146,22 @@ fn prune_prefetch_buffer_before_frame( } } +fn count_contiguous_prefetched_frames( + buffer: &BTreeMap, + start_frame: u32, + limit: usize, +) -> usize { + let mut contiguous = 0usize; + while contiguous < limit { + let frame = start_frame.saturating_add(contiguous as u32); + if !buffer.contains_key(&frame) { + break; + } + contiguous += 1; + } + contiguous +} + impl Playback { pub async fn start( mut self, @@ -508,8 +524,13 @@ impl Playback { ); while !*stop_rx.borrow() { + let contiguous_prefetched = count_contiguous_prefetched_frames( + &prefetch_buffer, + frame_number, + warmup_target_frames, + ); let should_start = if let Some(first_time) = first_frame_time { - prefetch_buffer.len() >= warmup_target_frames + contiguous_prefetched >= warmup_target_frames || first_time.elapsed() > warmup_after_first_timeout } else { false From b7652b7a572b2ec00098c6b168a2bd650e825bd2 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 22:40:27 +0000 Subject: [PATCH 115/333] improve: surface comparison coverage policies in published summaries Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 10 ++++++++-- scripts/publish-playback-matrix-summary.js | 2 ++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 9913888499..cd5abaa7db 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -313,12 +313,16 @@ cargo run -p cap-recording --example playback-test-runner -- full - `scripts/publish-playback-matrix-summary.js` now includes candidate-only row count from comparison JSON in published matrix summary bullets. - Keeps published matrix evidence aligned with expanded comparison coverage diagnostics. -34. **Added strict candidate-only gating option for comparison workflows (2026-02-13)** +34. **Published comparison policy mode in summary output (2026-02-13)** + - Published matrix summary now includes comparison policy modes for missing-candidate and candidate-only coverage handling. + - Keeps published evidence explicit about whether coverage gaps were allowed or gated in the comparison run. + +35. **Added strict candidate-only gating option for comparison workflows (2026-02-13)** - `scripts/compare-playback-benchmark-runs.js` now supports `--fail-on-candidate-only`. - When enabled, comparison exits non-zero if candidate contains rows not present in baseline. - `scripts/finalize-playback-matrix.js` now forwards the same strict option in integrated compare flows. -35. **Required contiguous prefetched frames for warmup readiness (2026-02-13)** +36. **Required contiguous prefetched frames for warmup readiness (2026-02-13)** - Playback warmup readiness now checks contiguous prefetched frame coverage from current frame. - Avoids treating sparse/non-contiguous prefetched entries as equivalent to contiguous startup readiness. - Reduces early playback start jitter risk when warmup buffer is fragmented. @@ -452,6 +456,7 @@ Decoder Pipeline: 37. Added strict `fail-on-candidate-only` gating option for compare/finalize matrix comparison workflows. 38. Added candidate-only row count reporting in published matrix summary comparison status bullets. 39. Updated playback warmup start condition to require contiguous prefetched frame coverage from current frame. +40. Added comparison policy mode reporting (allow/fail) for missing-candidate and candidate-only coverage in published matrix summaries. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -483,6 +488,7 @@ Decoder Pipeline: - `scripts/compare-playback-benchmark-runs.js`: added optional strict `--fail-on-candidate-only` coverage gate and surfaced coverage gate mode in comparison markdown output. - `scripts/finalize-playback-matrix.js`: added passthrough support for strict `--fail-on-candidate-only` compare mode in one-shot finalize workflows. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes candidate-only row count from comparison JSON summary. +- `scripts/publish-playback-matrix-summary.js`: published comparison status now includes missing-candidate and candidate-only coverage policy modes from comparison JSON tolerance settings. - `crates/editor/src/playback.rs`: warmup readiness now requires contiguous prefetched frame coverage from current frame instead of raw buffer length threshold. - `crates/editor/src/playback.rs`: warmup first-frame timing now only starts after eligible prefetched frame insertion, and skip catch-up now reuses ordered stale-prune helper. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js index 60d0f6a6c4..663a67480c 100644 --- a/scripts/publish-playback-matrix-summary.js +++ b/scripts/publish-playback-matrix-summary.js @@ -93,6 +93,8 @@ function buildSummarySection( markdown += `- Comparison regressions: ${comparison.summary?.regressions ?? "n/a"}\n`; markdown += `- Missing candidate rows: ${comparison.summary?.missingCandidateRows ?? "n/a"}\n\n`; markdown += `- Candidate-only rows: ${comparison.summary?.candidateOnlyRows ?? "n/a"}\n\n`; + markdown += `- Missing candidate policy: ${comparison.tolerance?.allowMissingCandidate ? "allow" : "fail"}\n`; + markdown += `- Candidate-only policy: ${comparison.tolerance?.failOnCandidateOnly ? "fail" : "allow"}\n\n`; } if ((validation.missingCells?.length ?? 0) > 0) { From b1b534e070453946bd38fb0da68b82017a45a831 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:40:27 +0000 Subject: [PATCH 116/333] improve: surface comparison coverage policies in published summaries Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 10 ++++++++-- scripts/publish-playback-matrix-summary.js | 2 ++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 9913888499..cd5abaa7db 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -313,12 +313,16 @@ cargo run -p cap-recording --example playback-test-runner -- full - `scripts/publish-playback-matrix-summary.js` now includes candidate-only row count from comparison JSON in published matrix summary bullets. - Keeps published matrix evidence aligned with expanded comparison coverage diagnostics. -34. **Added strict candidate-only gating option for comparison workflows (2026-02-13)** +34. **Published comparison policy mode in summary output (2026-02-13)** + - Published matrix summary now includes comparison policy modes for missing-candidate and candidate-only coverage handling. + - Keeps published evidence explicit about whether coverage gaps were allowed or gated in the comparison run. + +35. **Added strict candidate-only gating option for comparison workflows (2026-02-13)** - `scripts/compare-playback-benchmark-runs.js` now supports `--fail-on-candidate-only`. - When enabled, comparison exits non-zero if candidate contains rows not present in baseline. - `scripts/finalize-playback-matrix.js` now forwards the same strict option in integrated compare flows. -35. **Required contiguous prefetched frames for warmup readiness (2026-02-13)** +36. **Required contiguous prefetched frames for warmup readiness (2026-02-13)** - Playback warmup readiness now checks contiguous prefetched frame coverage from current frame. - Avoids treating sparse/non-contiguous prefetched entries as equivalent to contiguous startup readiness. - Reduces early playback start jitter risk when warmup buffer is fragmented. @@ -452,6 +456,7 @@ Decoder Pipeline: 37. Added strict `fail-on-candidate-only` gating option for compare/finalize matrix comparison workflows. 38. Added candidate-only row count reporting in published matrix summary comparison status bullets. 39. Updated playback warmup start condition to require contiguous prefetched frame coverage from current frame. +40. Added comparison policy mode reporting (allow/fail) for missing-candidate and candidate-only coverage in published matrix summaries. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -483,6 +488,7 @@ Decoder Pipeline: - `scripts/compare-playback-benchmark-runs.js`: added optional strict `--fail-on-candidate-only` coverage gate and surfaced coverage gate mode in comparison markdown output. - `scripts/finalize-playback-matrix.js`: added passthrough support for strict `--fail-on-candidate-only` compare mode in one-shot finalize workflows. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes candidate-only row count from comparison JSON summary. +- `scripts/publish-playback-matrix-summary.js`: published comparison status now includes missing-candidate and candidate-only coverage policy modes from comparison JSON tolerance settings. - `crates/editor/src/playback.rs`: warmup readiness now requires contiguous prefetched frame coverage from current frame instead of raw buffer length threshold. - `crates/editor/src/playback.rs`: warmup first-frame timing now only starts after eligible prefetched frame insertion, and skip catch-up now reuses ordered stale-prune helper. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js index 60d0f6a6c4..663a67480c 100644 --- a/scripts/publish-playback-matrix-summary.js +++ b/scripts/publish-playback-matrix-summary.js @@ -93,6 +93,8 @@ function buildSummarySection( markdown += `- Comparison regressions: ${comparison.summary?.regressions ?? "n/a"}\n`; markdown += `- Missing candidate rows: ${comparison.summary?.missingCandidateRows ?? "n/a"}\n\n`; markdown += `- Candidate-only rows: ${comparison.summary?.candidateOnlyRows ?? "n/a"}\n\n`; + markdown += `- Missing candidate policy: ${comparison.tolerance?.allowMissingCandidate ? "allow" : "fail"}\n`; + markdown += `- Candidate-only policy: ${comparison.tolerance?.failOnCandidateOnly ? "fail" : "allow"}\n\n`; } if ((validation.missingCells?.length ?? 0) > 0) { From af068d9041912f3657c3112ccf853e990c68d5cc Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 22:44:04 +0000 Subject: [PATCH 117/333] improve: emit finalize summary json artifacts for automation Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 7 ++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 1 + scripts/finalize-playback-matrix.js | 53 +++++++++++++++++++++++- 4 files changed, 61 insertions(+), 1 deletion(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index f40284d4b8..a0b2017849 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -81,6 +81,7 @@ node scripts/finalize-playback-matrix.js --input /path/to/json-results --output- node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5 node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-missing-candidate node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-candidate-only +node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --output-json /tmp/playback-matrix-final/playback-finalize-summary.json node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 --publish-target /workspace/crates/editor/PLAYBACK-BENCHMARKS.md # Publish matrix artifacts into this benchmark history diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index cd5abaa7db..564b1de8ef 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -327,6 +327,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Avoids treating sparse/non-contiguous prefetched entries as equivalent to contiguous startup readiness. - Reduces early playback start jitter risk when warmup buffer is fragmented. +37. **Added finalize summary JSON artifact output (2026-02-13)** + - `scripts/finalize-playback-matrix.js` now supports optional `--output-json`. + - Finalize now emits `playback-finalize-summary.json` by default in output directory. + - Summary JSON includes artifact paths, settings, and validation/comparison pass flags for automation. + --- ## Root Cause Analysis Archive @@ -457,6 +462,7 @@ Decoder Pipeline: 38. Added candidate-only row count reporting in published matrix summary comparison status bullets. 39. Updated playback warmup start condition to require contiguous prefetched frame coverage from current frame. 40. Added comparison policy mode reporting (allow/fail) for missing-candidate and candidate-only coverage in published matrix summaries. +41. Added finalize summary JSON artifact output with artifact/settings/result metadata for automation workflows. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -491,6 +497,7 @@ Decoder Pipeline: - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes missing-candidate and candidate-only coverage policy modes from comparison JSON tolerance settings. - `crates/editor/src/playback.rs`: warmup readiness now requires contiguous prefetched frame coverage from current frame instead of raw buffer length threshold. - `crates/editor/src/playback.rs`: warmup first-frame timing now only starts after eligible prefetched frame insertion, and skip catch-up now reuses ordered stale-prune helper. +- `scripts/finalize-playback-matrix.js`: added optional `--output-json` and default finalize summary JSON emission with artifact path and pass/fail metadata. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. - `crates/editor/src/playback.rs`: added ordered pruning of stale prefetched frames below current playhead to reduce stale buffer overhead during catch-up. - `scripts/publish-playback-matrix-summary.js`: publish flow now surfaces comparison gate status/summary metrics when comparison JSON is provided. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 23e1689ac7..9f0a37b26a 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -111,6 +111,7 @@ One-shot finalize command: ```bash pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented +pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-dir /tmp/playback-matrix-final --output-json /tmp/playback-matrix-final/playback-finalize-summary.json ``` Include optimization thresholds when finalizing: diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index a767916571..bef816d78b 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -15,6 +15,7 @@ function parseArgs(argv) { analyze: true, publishTarget: null, compareBaselineInputs: [], + outputJson: null, allowFpsDrop: 2, allowStartupIncreaseMs: 25, allowScrubP95IncreaseMs: 5, @@ -83,6 +84,12 @@ function parseArgs(argv) { options.publishTarget = path.resolve(value); continue; } + if (arg === "--output-json") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --output-json"); + options.outputJson = path.resolve(value); + continue; + } if (arg === "--compare-baseline") { const value = argv[++i]; if (!value) throw new Error("Missing value for --compare-baseline"); @@ -128,7 +135,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--compare-baseline ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--publish-target ] + console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--output-json ] [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--compare-baseline ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--publish-target ] Generates aggregate markdown, status markdown, validation JSON, and bottleneck analysis for collected playback matrix outputs. Optionally compares candidate inputs against baseline inputs and fails on regressions.`); } @@ -178,6 +185,9 @@ function main() { options.outputDir, "playback-comparison.json", ); + const summaryJsonPath = + options.outputJson ?? + path.join(options.outputDir, "playback-finalize-summary.json"); const aggregateArgs = ["scripts/aggregate-playback-benchmarks.js"]; const statusArgs = ["scripts/build-playback-matrix-report.js"]; @@ -289,6 +299,47 @@ function main() { console.log(`Comparison report: ${comparisonPath}`); console.log(`Comparison JSON: ${comparisonJsonPath}`); } + const validation = JSON.parse(fs.readFileSync(validationPath, "utf8")); + const comparison = + options.compareBaselineInputs.length > 0 + ? JSON.parse(fs.readFileSync(comparisonJsonPath, "utf8")) + : null; + const summary = { + generatedAt: new Date().toISOString(), + inputs: options.inputs, + outputDir: options.outputDir, + artifacts: { + aggregatePath, + statusPath, + validationPath, + bottleneckPath: options.analyze ? bottleneckPath : null, + bottleneckJsonPath: options.analyze ? bottleneckJsonPath : null, + comparisonPath: + options.compareBaselineInputs.length > 0 ? comparisonPath : null, + comparisonJsonPath: + options.compareBaselineInputs.length > 0 ? comparisonJsonPath : null, + }, + settings: { + requireFormats: options.requireFormats, + targetFps: options.targetFps, + maxScrubP95Ms: options.maxScrubP95Ms, + maxStartupMs: options.maxStartupMs, + analyze: options.analyze, + publishTarget: options.publishTarget, + compareBaselineInputs: options.compareBaselineInputs, + allowFpsDrop: options.allowFpsDrop, + allowStartupIncreaseMs: options.allowStartupIncreaseMs, + allowScrubP95IncreaseMs: options.allowScrubP95IncreaseMs, + allowMissingCandidate: options.allowMissingCandidate, + failOnCandidateOnly: options.failOnCandidateOnly, + }, + results: { + validationPassed: validation.passed === true, + comparisonPassed: comparison ? comparison.summary?.passed === true : null, + }, + }; + fs.writeFileSync(summaryJsonPath, JSON.stringify(summary, null, 2), "utf8"); + console.log(`Finalize summary JSON: ${summaryJsonPath}`); } try { From a7ad1090892677b3cc0cac011f46676924268ff7 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:44:04 +0000 Subject: [PATCH 118/333] improve: emit finalize summary json artifacts for automation Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 7 ++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 1 + scripts/finalize-playback-matrix.js | 53 +++++++++++++++++++++++- 4 files changed, 61 insertions(+), 1 deletion(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index f40284d4b8..a0b2017849 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -81,6 +81,7 @@ node scripts/finalize-playback-matrix.js --input /path/to/json-results --output- node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5 node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-missing-candidate node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-candidate-only +node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --output-json /tmp/playback-matrix-final/playback-finalize-summary.json node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 --publish-target /workspace/crates/editor/PLAYBACK-BENCHMARKS.md # Publish matrix artifacts into this benchmark history diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index cd5abaa7db..564b1de8ef 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -327,6 +327,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Avoids treating sparse/non-contiguous prefetched entries as equivalent to contiguous startup readiness. - Reduces early playback start jitter risk when warmup buffer is fragmented. +37. **Added finalize summary JSON artifact output (2026-02-13)** + - `scripts/finalize-playback-matrix.js` now supports optional `--output-json`. + - Finalize now emits `playback-finalize-summary.json` by default in output directory. + - Summary JSON includes artifact paths, settings, and validation/comparison pass flags for automation. + --- ## Root Cause Analysis Archive @@ -457,6 +462,7 @@ Decoder Pipeline: 38. Added candidate-only row count reporting in published matrix summary comparison status bullets. 39. Updated playback warmup start condition to require contiguous prefetched frame coverage from current frame. 40. Added comparison policy mode reporting (allow/fail) for missing-candidate and candidate-only coverage in published matrix summaries. +41. Added finalize summary JSON artifact output with artifact/settings/result metadata for automation workflows. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -491,6 +497,7 @@ Decoder Pipeline: - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes missing-candidate and candidate-only coverage policy modes from comparison JSON tolerance settings. - `crates/editor/src/playback.rs`: warmup readiness now requires contiguous prefetched frame coverage from current frame instead of raw buffer length threshold. - `crates/editor/src/playback.rs`: warmup first-frame timing now only starts after eligible prefetched frame insertion, and skip catch-up now reuses ordered stale-prune helper. +- `scripts/finalize-playback-matrix.js`: added optional `--output-json` and default finalize summary JSON emission with artifact path and pass/fail metadata. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. - `crates/editor/src/playback.rs`: added ordered pruning of stale prefetched frames below current playhead to reduce stale buffer overhead during catch-up. - `scripts/publish-playback-matrix-summary.js`: publish flow now surfaces comparison gate status/summary metrics when comparison JSON is provided. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 23e1689ac7..9f0a37b26a 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -111,6 +111,7 @@ One-shot finalize command: ```bash pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented +pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-dir /tmp/playback-matrix-final --output-json /tmp/playback-matrix-final/playback-finalize-summary.json ``` Include optimization thresholds when finalizing: diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index a767916571..bef816d78b 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -15,6 +15,7 @@ function parseArgs(argv) { analyze: true, publishTarget: null, compareBaselineInputs: [], + outputJson: null, allowFpsDrop: 2, allowStartupIncreaseMs: 25, allowScrubP95IncreaseMs: 5, @@ -83,6 +84,12 @@ function parseArgs(argv) { options.publishTarget = path.resolve(value); continue; } + if (arg === "--output-json") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --output-json"); + options.outputJson = path.resolve(value); + continue; + } if (arg === "--compare-baseline") { const value = argv[++i]; if (!value) throw new Error("Missing value for --compare-baseline"); @@ -128,7 +135,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--compare-baseline ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--publish-target ] + console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--output-json ] [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--compare-baseline ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--publish-target ] Generates aggregate markdown, status markdown, validation JSON, and bottleneck analysis for collected playback matrix outputs. Optionally compares candidate inputs against baseline inputs and fails on regressions.`); } @@ -178,6 +185,9 @@ function main() { options.outputDir, "playback-comparison.json", ); + const summaryJsonPath = + options.outputJson ?? + path.join(options.outputDir, "playback-finalize-summary.json"); const aggregateArgs = ["scripts/aggregate-playback-benchmarks.js"]; const statusArgs = ["scripts/build-playback-matrix-report.js"]; @@ -289,6 +299,47 @@ function main() { console.log(`Comparison report: ${comparisonPath}`); console.log(`Comparison JSON: ${comparisonJsonPath}`); } + const validation = JSON.parse(fs.readFileSync(validationPath, "utf8")); + const comparison = + options.compareBaselineInputs.length > 0 + ? JSON.parse(fs.readFileSync(comparisonJsonPath, "utf8")) + : null; + const summary = { + generatedAt: new Date().toISOString(), + inputs: options.inputs, + outputDir: options.outputDir, + artifacts: { + aggregatePath, + statusPath, + validationPath, + bottleneckPath: options.analyze ? bottleneckPath : null, + bottleneckJsonPath: options.analyze ? bottleneckJsonPath : null, + comparisonPath: + options.compareBaselineInputs.length > 0 ? comparisonPath : null, + comparisonJsonPath: + options.compareBaselineInputs.length > 0 ? comparisonJsonPath : null, + }, + settings: { + requireFormats: options.requireFormats, + targetFps: options.targetFps, + maxScrubP95Ms: options.maxScrubP95Ms, + maxStartupMs: options.maxStartupMs, + analyze: options.analyze, + publishTarget: options.publishTarget, + compareBaselineInputs: options.compareBaselineInputs, + allowFpsDrop: options.allowFpsDrop, + allowStartupIncreaseMs: options.allowStartupIncreaseMs, + allowScrubP95IncreaseMs: options.allowScrubP95IncreaseMs, + allowMissingCandidate: options.allowMissingCandidate, + failOnCandidateOnly: options.failOnCandidateOnly, + }, + results: { + validationPassed: validation.passed === true, + comparisonPassed: comparison ? comparison.summary?.passed === true : null, + }, + }; + fs.writeFileSync(summaryJsonPath, JSON.stringify(summary, null, 2), "utf8"); + console.log(`Finalize summary JSON: ${summaryJsonPath}`); } try { From d6e0f6dd515a4680af9d2943a6fc435b2da76dba Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 22:49:24 +0000 Subject: [PATCH 119/333] improve: optimize contiguous prefetch warmup scan Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 7 +++++++ crates/editor/src/playback.rs | 10 +++++++--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 564b1de8ef..eb4ce05fa5 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -332,6 +332,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Finalize now emits `playback-finalize-summary.json` by default in output directory. - Summary JSON includes artifact paths, settings, and validation/comparison pass flags for automation. +38. **Optimized contiguous warmup coverage scan on keyed buffer (2026-02-13)** + - Contiguous prefetched-frame counting now walks ordered keys via map range iteration. + - Reduces repeated keyed lookups during warmup readiness checks. + - Preserves contiguous coverage semantics while lowering per-loop lookup overhead. + --- ## Root Cause Analysis Archive @@ -463,6 +468,7 @@ Decoder Pipeline: 39. Updated playback warmup start condition to require contiguous prefetched frame coverage from current frame. 40. Added comparison policy mode reporting (allow/fail) for missing-candidate and candidate-only coverage in published matrix summaries. 41. Added finalize summary JSON artifact output with artifact/settings/result metadata for automation workflows. +42. Optimized contiguous prefetched-frame warmup scan using ordered map range iteration instead of repeated key lookups. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -496,6 +502,7 @@ Decoder Pipeline: - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes candidate-only row count from comparison JSON summary. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes missing-candidate and candidate-only coverage policy modes from comparison JSON tolerance settings. - `crates/editor/src/playback.rs`: warmup readiness now requires contiguous prefetched frame coverage from current frame instead of raw buffer length threshold. +- `crates/editor/src/playback.rs`: contiguous warmup coverage scan now uses ordered map range iteration to reduce repeated key lookup overhead. - `crates/editor/src/playback.rs`: warmup first-frame timing now only starts after eligible prefetched frame insertion, and skip catch-up now reuses ordered stale-prune helper. - `scripts/finalize-playback-matrix.js`: added optional `--output-json` and default finalize summary JSON emission with artifact path and pass/fail metadata. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 0966be3fe4..7cbf09d6e1 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -152,12 +152,16 @@ fn count_contiguous_prefetched_frames( limit: usize, ) -> usize { let mut contiguous = 0usize; - while contiguous < limit { - let frame = start_frame.saturating_add(contiguous as u32); - if !buffer.contains_key(&frame) { + let mut expected_frame = start_frame; + for (frame, _) in buffer.range(start_frame..) { + if *frame != expected_frame { break; } contiguous += 1; + if contiguous >= limit { + break; + } + expected_frame = expected_frame.saturating_add(1); } contiguous } From e0dc3efc03592cd30584dc005367d7f68e9a2684 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:49:24 +0000 Subject: [PATCH 120/333] improve: optimize contiguous prefetch warmup scan Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 7 +++++++ crates/editor/src/playback.rs | 10 +++++++--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 564b1de8ef..eb4ce05fa5 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -332,6 +332,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Finalize now emits `playback-finalize-summary.json` by default in output directory. - Summary JSON includes artifact paths, settings, and validation/comparison pass flags for automation. +38. **Optimized contiguous warmup coverage scan on keyed buffer (2026-02-13)** + - Contiguous prefetched-frame counting now walks ordered keys via map range iteration. + - Reduces repeated keyed lookups during warmup readiness checks. + - Preserves contiguous coverage semantics while lowering per-loop lookup overhead. + --- ## Root Cause Analysis Archive @@ -463,6 +468,7 @@ Decoder Pipeline: 39. Updated playback warmup start condition to require contiguous prefetched frame coverage from current frame. 40. Added comparison policy mode reporting (allow/fail) for missing-candidate and candidate-only coverage in published matrix summaries. 41. Added finalize summary JSON artifact output with artifact/settings/result metadata for automation workflows. +42. Optimized contiguous prefetched-frame warmup scan using ordered map range iteration instead of repeated key lookups. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -496,6 +502,7 @@ Decoder Pipeline: - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes candidate-only row count from comparison JSON summary. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes missing-candidate and candidate-only coverage policy modes from comparison JSON tolerance settings. - `crates/editor/src/playback.rs`: warmup readiness now requires contiguous prefetched frame coverage from current frame instead of raw buffer length threshold. +- `crates/editor/src/playback.rs`: contiguous warmup coverage scan now uses ordered map range iteration to reduce repeated key lookup overhead. - `crates/editor/src/playback.rs`: warmup first-frame timing now only starts after eligible prefetched frame insertion, and skip catch-up now reuses ordered stale-prune helper. - `scripts/finalize-playback-matrix.js`: added optional `--output-json` and default finalize summary JSON emission with artifact path and pass/fail metadata. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 0966be3fe4..7cbf09d6e1 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -152,12 +152,16 @@ fn count_contiguous_prefetched_frames( limit: usize, ) -> usize { let mut contiguous = 0usize; - while contiguous < limit { - let frame = start_frame.saturating_add(contiguous as u32); - if !buffer.contains_key(&frame) { + let mut expected_frame = start_frame; + for (frame, _) in buffer.range(start_frame..) { + if *frame != expected_frame { break; } contiguous += 1; + if contiguous >= limit { + break; + } + expected_frame = expected_frame.saturating_add(1); } contiguous } From 5fd803b07ecd33dffa727aefac1ade847414a788 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 22:50:24 +0000 Subject: [PATCH 121/333] improve: include git metadata in finalize summary artifacts Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 2 ++ crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 2 ++ scripts/finalize-playback-matrix.js | 18 ++++++++++++++++++ 4 files changed, 28 insertions(+) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index a0b2017849..7c83166054 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -84,6 +84,8 @@ node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --ou node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --output-json /tmp/playback-matrix-final/playback-finalize-summary.json node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 --publish-target /workspace/crates/editor/PLAYBACK-BENCHMARKS.md +Finalize summary JSON includes generated timestamp, artifact paths, settings, pass/fail flags, and git branch/commit metadata when available. + # Publish matrix artifacts into this benchmark history node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --bottlenecks-md /tmp/playback-matrix-final/playback-bottlenecks.md node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --comparison-md /tmp/playback-matrix-final/playback-comparison.md --comparison-json /tmp/playback-matrix-final/playback-comparison.json diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index eb4ce05fa5..595cb748c0 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -337,6 +337,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Reduces repeated keyed lookups during warmup readiness checks. - Preserves contiguous coverage semantics while lowering per-loop lookup overhead. +39. **Added git metadata to finalize summary artifacts (2026-02-13)** + - `scripts/finalize-playback-matrix.js` now records git branch and commit SHA in finalize summary JSON output. + - Improves traceability of benchmark artifacts to exact source revision. + --- ## Root Cause Analysis Archive @@ -469,6 +473,7 @@ Decoder Pipeline: 40. Added comparison policy mode reporting (allow/fail) for missing-candidate and candidate-only coverage in published matrix summaries. 41. Added finalize summary JSON artifact output with artifact/settings/result metadata for automation workflows. 42. Optimized contiguous prefetched-frame warmup scan using ordered map range iteration instead of repeated key lookups. +43. Added git branch/commit metadata into finalize summary JSON artifacts for source traceability. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -505,6 +510,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: contiguous warmup coverage scan now uses ordered map range iteration to reduce repeated key lookup overhead. - `crates/editor/src/playback.rs`: warmup first-frame timing now only starts after eligible prefetched frame insertion, and skip catch-up now reuses ordered stale-prune helper. - `scripts/finalize-playback-matrix.js`: added optional `--output-json` and default finalize summary JSON emission with artifact path and pass/fail metadata. +- `scripts/finalize-playback-matrix.js`: finalize summary JSON now includes git branch and commit metadata when available. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. - `crates/editor/src/playback.rs`: added ordered pruning of stale prefetched frames below current playhead to reduce stale buffer overhead during catch-up. - `scripts/publish-playback-matrix-summary.js`: publish flow now surfaces comparison gate status/summary metrics when comparison JSON is provided. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 9f0a37b26a..45ea7ea43f 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -114,6 +114,8 @@ pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-di pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-dir /tmp/playback-matrix-final --output-json /tmp/playback-matrix-final/playback-finalize-summary.json ``` +Finalize summary JSON includes artifact paths, gate outcomes, settings, and git branch/commit metadata when available. + Include optimization thresholds when finalizing: ```bash diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index bef816d78b..6296c27ca1 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -147,6 +147,14 @@ function run(command, args) { } } +function readCommandOutput(command, args) { + const result = spawnSync(command, args, { encoding: "utf8" }); + if (result.status !== 0) { + return null; + } + return result.stdout.trim() || null; +} + function main() { const options = parseArgs(process.argv); if (options.help) { @@ -304,10 +312,20 @@ function main() { options.compareBaselineInputs.length > 0 ? JSON.parse(fs.readFileSync(comparisonJsonPath, "utf8")) : null; + const gitBranch = readCommandOutput("git", [ + "rev-parse", + "--abbrev-ref", + "HEAD", + ]); + const gitCommit = readCommandOutput("git", ["rev-parse", "HEAD"]); const summary = { generatedAt: new Date().toISOString(), inputs: options.inputs, outputDir: options.outputDir, + git: { + branch: gitBranch, + commit: gitCommit, + }, artifacts: { aggregatePath, statusPath, From 91055a8e748597b1d57d4581a7cbb8cdffeb3dc9 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:50:24 +0000 Subject: [PATCH 122/333] improve: include git metadata in finalize summary artifacts Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 2 ++ crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 2 ++ scripts/finalize-playback-matrix.js | 18 ++++++++++++++++++ 4 files changed, 28 insertions(+) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index a0b2017849..7c83166054 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -84,6 +84,8 @@ node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --ou node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --output-json /tmp/playback-matrix-final/playback-finalize-summary.json node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 --publish-target /workspace/crates/editor/PLAYBACK-BENCHMARKS.md +Finalize summary JSON includes generated timestamp, artifact paths, settings, pass/fail flags, and git branch/commit metadata when available. + # Publish matrix artifacts into this benchmark history node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --bottlenecks-md /tmp/playback-matrix-final/playback-bottlenecks.md node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --comparison-md /tmp/playback-matrix-final/playback-comparison.md --comparison-json /tmp/playback-matrix-final/playback-comparison.json diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index eb4ce05fa5..595cb748c0 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -337,6 +337,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Reduces repeated keyed lookups during warmup readiness checks. - Preserves contiguous coverage semantics while lowering per-loop lookup overhead. +39. **Added git metadata to finalize summary artifacts (2026-02-13)** + - `scripts/finalize-playback-matrix.js` now records git branch and commit SHA in finalize summary JSON output. + - Improves traceability of benchmark artifacts to exact source revision. + --- ## Root Cause Analysis Archive @@ -469,6 +473,7 @@ Decoder Pipeline: 40. Added comparison policy mode reporting (allow/fail) for missing-candidate and candidate-only coverage in published matrix summaries. 41. Added finalize summary JSON artifact output with artifact/settings/result metadata for automation workflows. 42. Optimized contiguous prefetched-frame warmup scan using ordered map range iteration instead of repeated key lookups. +43. Added git branch/commit metadata into finalize summary JSON artifacts for source traceability. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -505,6 +510,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: contiguous warmup coverage scan now uses ordered map range iteration to reduce repeated key lookup overhead. - `crates/editor/src/playback.rs`: warmup first-frame timing now only starts after eligible prefetched frame insertion, and skip catch-up now reuses ordered stale-prune helper. - `scripts/finalize-playback-matrix.js`: added optional `--output-json` and default finalize summary JSON emission with artifact path and pass/fail metadata. +- `scripts/finalize-playback-matrix.js`: finalize summary JSON now includes git branch and commit metadata when available. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. - `crates/editor/src/playback.rs`: added ordered pruning of stale prefetched frames below current playhead to reduce stale buffer overhead during catch-up. - `scripts/publish-playback-matrix-summary.js`: publish flow now surfaces comparison gate status/summary metrics when comparison JSON is provided. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 9f0a37b26a..45ea7ea43f 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -114,6 +114,8 @@ pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-di pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-dir /tmp/playback-matrix-final --output-json /tmp/playback-matrix-final/playback-finalize-summary.json ``` +Finalize summary JSON includes artifact paths, gate outcomes, settings, and git branch/commit metadata when available. + Include optimization thresholds when finalizing: ```bash diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index bef816d78b..6296c27ca1 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -147,6 +147,14 @@ function run(command, args) { } } +function readCommandOutput(command, args) { + const result = spawnSync(command, args, { encoding: "utf8" }); + if (result.status !== 0) { + return null; + } + return result.stdout.trim() || null; +} + function main() { const options = parseArgs(process.argv); if (options.help) { @@ -304,10 +312,20 @@ function main() { options.compareBaselineInputs.length > 0 ? JSON.parse(fs.readFileSync(comparisonJsonPath, "utf8")) : null; + const gitBranch = readCommandOutput("git", [ + "rev-parse", + "--abbrev-ref", + "HEAD", + ]); + const gitCommit = readCommandOutput("git", ["rev-parse", "HEAD"]); const summary = { generatedAt: new Date().toISOString(), inputs: options.inputs, outputDir: options.outputDir, + git: { + branch: gitBranch, + commit: gitCommit, + }, artifacts: { aggregatePath, statusPath, From ba370613c664423072c2dcf885f76562da2066d3 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 22:52:12 +0000 Subject: [PATCH 123/333] improve: publish finalize summary metadata in matrix reports Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 8 +++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 3 +- scripts/finalize-playback-matrix.js | 81 +++++++++++----------- scripts/publish-playback-matrix-summary.js | 25 ++++++- 5 files changed, 76 insertions(+), 42 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 7c83166054..7ec06dbb69 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -89,6 +89,7 @@ Finalize summary JSON includes generated timestamp, artifact paths, settings, pa # Publish matrix artifacts into this benchmark history node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --bottlenecks-md /tmp/playback-matrix-final/playback-bottlenecks.md node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --comparison-md /tmp/playback-matrix-final/playback-comparison.md --comparison-json /tmp/playback-matrix-final/playback-comparison.json +node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --finalize-summary-json /tmp/playback-matrix-final/playback-finalize-summary.json # Analyze bottlenecks from matrix results node scripts/analyze-playback-matrix-bottlenecks.js --input /path/to/json-results --output /tmp/playback-bottlenecks.md --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 595cb748c0..33b178950a 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -341,6 +341,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - `scripts/finalize-playback-matrix.js` now records git branch and commit SHA in finalize summary JSON output. - Improves traceability of benchmark artifacts to exact source revision. +40. **Wired finalize summary artifact into publish flow (2026-02-13)** + - `scripts/finalize-playback-matrix.js` now generates finalize summary JSON before publish step. + - Finalize now forwards `--finalize-summary-json` to `publish-playback-matrix-summary.js`. + - Published matrix summaries can now include finalize artifact metadata in one-shot finalize runs. + --- ## Root Cause Analysis Archive @@ -474,6 +479,7 @@ Decoder Pipeline: 41. Added finalize summary JSON artifact output with artifact/settings/result metadata for automation workflows. 42. Optimized contiguous prefetched-frame warmup scan using ordered map range iteration instead of repeated key lookups. 43. Added git branch/commit metadata into finalize summary JSON artifacts for source traceability. +44. Wired finalize summary JSON into publish flow so one-shot finalize runs can publish summary metadata alongside matrix artifacts. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -511,6 +517,8 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: warmup first-frame timing now only starts after eligible prefetched frame insertion, and skip catch-up now reuses ordered stale-prune helper. - `scripts/finalize-playback-matrix.js`: added optional `--output-json` and default finalize summary JSON emission with artifact path and pass/fail metadata. - `scripts/finalize-playback-matrix.js`: finalize summary JSON now includes git branch and commit metadata when available. +- `scripts/finalize-playback-matrix.js`: finalize now writes summary JSON before publish and passes `--finalize-summary-json` into publish flow. +- `scripts/publish-playback-matrix-summary.js`: publish flow now supports optional finalize summary JSON input and surfaces finalize source/validation metadata. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. - `crates/editor/src/playback.rs`: added ordered pruning of stale prefetched frames below current playhead to reduce stale buffer overhead during catch-up. - `scripts/publish-playback-matrix-summary.js`: publish flow now surfaces comparison gate status/summary metrics when comparison JSON is provided. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 45ea7ea43f..284360cfd1 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -151,7 +151,8 @@ pnpm bench:playback:publish -- \ --status-md /tmp/playback-matrix-final/playback-matrix-status.md \ --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json \ --comparison-md /tmp/playback-matrix-final/playback-comparison.md \ - --comparison-json /tmp/playback-matrix-final/playback-comparison.json + --comparison-json /tmp/playback-matrix-final/playback-comparison.json \ + --finalize-summary-json /tmp/playback-matrix-final/playback-finalize-summary.json ``` Generate bottleneck analysis for optimization backlog: diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index 6296c27ca1..76d07b1c38 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -267,46 +267,6 @@ function main() { } run("node", compareArgs); } - if (options.publishTarget) { - const publishArgs = [ - "scripts/publish-playback-matrix-summary.js", - "--aggregate-md", - aggregatePath, - "--status-md", - statusPath, - "--validation-json", - validationPath, - "--target", - options.publishTarget, - ]; - if (options.analyze) { - publishArgs.push("--bottlenecks-md", bottleneckPath); - } - if (options.compareBaselineInputs.length > 0) { - publishArgs.push( - "--comparison-md", - comparisonPath, - "--comparison-json", - comparisonJsonPath, - ); - } - run("node", publishArgs); - } - - console.log(`Aggregate markdown: ${aggregatePath}`); - console.log(`Status markdown: ${statusPath}`); - console.log(`Validation JSON: ${validationPath}`); - if (options.analyze) { - console.log(`Bottleneck analysis: ${bottleneckPath}`); - console.log(`Bottleneck analysis JSON: ${bottleneckJsonPath}`); - } - if (options.publishTarget) { - console.log(`Published target: ${options.publishTarget}`); - } - if (options.compareBaselineInputs.length > 0) { - console.log(`Comparison report: ${comparisonPath}`); - console.log(`Comparison JSON: ${comparisonJsonPath}`); - } const validation = JSON.parse(fs.readFileSync(validationPath, "utf8")); const comparison = options.compareBaselineInputs.length > 0 @@ -357,6 +317,47 @@ function main() { }, }; fs.writeFileSync(summaryJsonPath, JSON.stringify(summary, null, 2), "utf8"); + if (options.publishTarget) { + const publishArgs = [ + "scripts/publish-playback-matrix-summary.js", + "--aggregate-md", + aggregatePath, + "--status-md", + statusPath, + "--validation-json", + validationPath, + "--target", + options.publishTarget, + ]; + if (options.analyze) { + publishArgs.push("--bottlenecks-md", bottleneckPath); + } + if (options.compareBaselineInputs.length > 0) { + publishArgs.push( + "--comparison-md", + comparisonPath, + "--comparison-json", + comparisonJsonPath, + ); + } + publishArgs.push("--finalize-summary-json", summaryJsonPath); + run("node", publishArgs); + } + + console.log(`Aggregate markdown: ${aggregatePath}`); + console.log(`Status markdown: ${statusPath}`); + console.log(`Validation JSON: ${validationPath}`); + if (options.analyze) { + console.log(`Bottleneck analysis: ${bottleneckPath}`); + console.log(`Bottleneck analysis JSON: ${bottleneckJsonPath}`); + } + if (options.publishTarget) { + console.log(`Published target: ${options.publishTarget}`); + } + if (options.compareBaselineInputs.length > 0) { + console.log(`Comparison report: ${comparisonPath}`); + console.log(`Comparison JSON: ${comparisonJsonPath}`); + } console.log(`Finalize summary JSON: ${summaryJsonPath}`); } diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js index 663a67480c..128d19cd94 100644 --- a/scripts/publish-playback-matrix-summary.js +++ b/scripts/publish-playback-matrix-summary.js @@ -10,6 +10,7 @@ function parseArgs(argv) { bottlenecksMd: null, comparisonMd: null, comparisonJson: null, + finalizeSummaryJson: null, validationJson: null, target: path.resolve("crates/editor/PLAYBACK-BENCHMARKS.md"), }; @@ -41,6 +42,10 @@ function parseArgs(argv) { options.comparisonJson = path.resolve(argv[++i] ?? ""); continue; } + if (arg === "--finalize-summary-json") { + options.finalizeSummaryJson = path.resolve(argv[++i] ?? ""); + continue; + } if (arg === "--validation-json") { options.validationJson = path.resolve(argv[++i] ?? ""); continue; @@ -56,7 +61,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/publish-playback-matrix-summary.js --aggregate-md --status-md --validation-json [--bottlenecks-md ] [--comparison-md ] [--comparison-json ] [--target ] + console.log(`Usage: node scripts/publish-playback-matrix-summary.js --aggregate-md --status-md --validation-json [--bottlenecks-md ] [--comparison-md ] [--comparison-json ] [--finalize-summary-json ] [--target ] Prepends a matrix summary section into PLAYBACK-BENCHMARKS.md benchmark history region.`); } @@ -74,6 +79,7 @@ function buildSummarySection( bottlenecksMd, comparisonMd, comparisonJson, + finalizeSummaryJson, ) { const now = new Date().toISOString(); const validation = JSON.parse(validationJson); @@ -96,6 +102,16 @@ function buildSummarySection( markdown += `- Missing candidate policy: ${comparison.tolerance?.allowMissingCandidate ? "allow" : "fail"}\n`; markdown += `- Candidate-only policy: ${comparison.tolerance?.failOnCandidateOnly ? "fail" : "allow"}\n\n`; } + if (finalizeSummaryJson) { + const finalizeSummary = JSON.parse(finalizeSummaryJson); + markdown += `- Finalize source branch: ${finalizeSummary.git?.branch ?? "n/a"}\n`; + markdown += `- Finalize source commit: ${finalizeSummary.git?.commit ?? "n/a"}\n`; + markdown += `- Finalize validation passed: ${finalizeSummary.results?.validationPassed === true ? "true" : "false"}\n`; + if (finalizeSummary.results?.comparisonPassed !== null) { + markdown += `- Finalize comparison passed: ${finalizeSummary.results?.comparisonPassed === true ? "true" : "false"}\n`; + } + markdown += "\n"; + } if ((validation.missingCells?.length ?? 0) > 0) { markdown += "**Missing Cells**\n"; @@ -171,6 +187,9 @@ function main() { if (options.comparisonJson) { ensureFile(options.comparisonJson, "Comparison JSON"); } + if (options.finalizeSummaryJson) { + ensureFile(options.finalizeSummaryJson, "Finalize summary JSON"); + } ensureFile(options.target, "Target"); const aggregateMd = fs.readFileSync(options.aggregateMd, "utf8"); @@ -185,6 +204,9 @@ function main() { const comparisonJson = options.comparisonJson ? fs.readFileSync(options.comparisonJson, "utf8") : null; + const finalizeSummaryJson = options.finalizeSummaryJson + ? fs.readFileSync(options.finalizeSummaryJson, "utf8") + : null; const summaryMd = buildSummarySection( aggregateMd, statusMd, @@ -192,6 +214,7 @@ function main() { bottlenecksMd, comparisonMd, comparisonJson, + finalizeSummaryJson, ); writeToBenchmarkHistory(options.target, summaryMd); console.log(`Published matrix summary into ${options.target}`); From c1e1f8543235a8d65a4cc73e987db1e9a42482b2 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:52:12 +0000 Subject: [PATCH 124/333] improve: publish finalize summary metadata in matrix reports Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 8 +++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 3 +- scripts/finalize-playback-matrix.js | 81 +++++++++++----------- scripts/publish-playback-matrix-summary.js | 25 ++++++- 5 files changed, 76 insertions(+), 42 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 7c83166054..7ec06dbb69 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -89,6 +89,7 @@ Finalize summary JSON includes generated timestamp, artifact paths, settings, pa # Publish matrix artifacts into this benchmark history node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --bottlenecks-md /tmp/playback-matrix-final/playback-bottlenecks.md node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --comparison-md /tmp/playback-matrix-final/playback-comparison.md --comparison-json /tmp/playback-matrix-final/playback-comparison.json +node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --finalize-summary-json /tmp/playback-matrix-final/playback-finalize-summary.json # Analyze bottlenecks from matrix results node scripts/analyze-playback-matrix-bottlenecks.js --input /path/to/json-results --output /tmp/playback-bottlenecks.md --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 595cb748c0..33b178950a 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -341,6 +341,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - `scripts/finalize-playback-matrix.js` now records git branch and commit SHA in finalize summary JSON output. - Improves traceability of benchmark artifacts to exact source revision. +40. **Wired finalize summary artifact into publish flow (2026-02-13)** + - `scripts/finalize-playback-matrix.js` now generates finalize summary JSON before publish step. + - Finalize now forwards `--finalize-summary-json` to `publish-playback-matrix-summary.js`. + - Published matrix summaries can now include finalize artifact metadata in one-shot finalize runs. + --- ## Root Cause Analysis Archive @@ -474,6 +479,7 @@ Decoder Pipeline: 41. Added finalize summary JSON artifact output with artifact/settings/result metadata for automation workflows. 42. Optimized contiguous prefetched-frame warmup scan using ordered map range iteration instead of repeated key lookups. 43. Added git branch/commit metadata into finalize summary JSON artifacts for source traceability. +44. Wired finalize summary JSON into publish flow so one-shot finalize runs can publish summary metadata alongside matrix artifacts. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -511,6 +517,8 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: warmup first-frame timing now only starts after eligible prefetched frame insertion, and skip catch-up now reuses ordered stale-prune helper. - `scripts/finalize-playback-matrix.js`: added optional `--output-json` and default finalize summary JSON emission with artifact path and pass/fail metadata. - `scripts/finalize-playback-matrix.js`: finalize summary JSON now includes git branch and commit metadata when available. +- `scripts/finalize-playback-matrix.js`: finalize now writes summary JSON before publish and passes `--finalize-summary-json` into publish flow. +- `scripts/publish-playback-matrix-summary.js`: publish flow now supports optional finalize summary JSON input and surfaces finalize source/validation metadata. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. - `crates/editor/src/playback.rs`: added ordered pruning of stale prefetched frames below current playhead to reduce stale buffer overhead during catch-up. - `scripts/publish-playback-matrix-summary.js`: publish flow now surfaces comparison gate status/summary metrics when comparison JSON is provided. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 45ea7ea43f..284360cfd1 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -151,7 +151,8 @@ pnpm bench:playback:publish -- \ --status-md /tmp/playback-matrix-final/playback-matrix-status.md \ --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json \ --comparison-md /tmp/playback-matrix-final/playback-comparison.md \ - --comparison-json /tmp/playback-matrix-final/playback-comparison.json + --comparison-json /tmp/playback-matrix-final/playback-comparison.json \ + --finalize-summary-json /tmp/playback-matrix-final/playback-finalize-summary.json ``` Generate bottleneck analysis for optimization backlog: diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index 6296c27ca1..76d07b1c38 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -267,46 +267,6 @@ function main() { } run("node", compareArgs); } - if (options.publishTarget) { - const publishArgs = [ - "scripts/publish-playback-matrix-summary.js", - "--aggregate-md", - aggregatePath, - "--status-md", - statusPath, - "--validation-json", - validationPath, - "--target", - options.publishTarget, - ]; - if (options.analyze) { - publishArgs.push("--bottlenecks-md", bottleneckPath); - } - if (options.compareBaselineInputs.length > 0) { - publishArgs.push( - "--comparison-md", - comparisonPath, - "--comparison-json", - comparisonJsonPath, - ); - } - run("node", publishArgs); - } - - console.log(`Aggregate markdown: ${aggregatePath}`); - console.log(`Status markdown: ${statusPath}`); - console.log(`Validation JSON: ${validationPath}`); - if (options.analyze) { - console.log(`Bottleneck analysis: ${bottleneckPath}`); - console.log(`Bottleneck analysis JSON: ${bottleneckJsonPath}`); - } - if (options.publishTarget) { - console.log(`Published target: ${options.publishTarget}`); - } - if (options.compareBaselineInputs.length > 0) { - console.log(`Comparison report: ${comparisonPath}`); - console.log(`Comparison JSON: ${comparisonJsonPath}`); - } const validation = JSON.parse(fs.readFileSync(validationPath, "utf8")); const comparison = options.compareBaselineInputs.length > 0 @@ -357,6 +317,47 @@ function main() { }, }; fs.writeFileSync(summaryJsonPath, JSON.stringify(summary, null, 2), "utf8"); + if (options.publishTarget) { + const publishArgs = [ + "scripts/publish-playback-matrix-summary.js", + "--aggregate-md", + aggregatePath, + "--status-md", + statusPath, + "--validation-json", + validationPath, + "--target", + options.publishTarget, + ]; + if (options.analyze) { + publishArgs.push("--bottlenecks-md", bottleneckPath); + } + if (options.compareBaselineInputs.length > 0) { + publishArgs.push( + "--comparison-md", + comparisonPath, + "--comparison-json", + comparisonJsonPath, + ); + } + publishArgs.push("--finalize-summary-json", summaryJsonPath); + run("node", publishArgs); + } + + console.log(`Aggregate markdown: ${aggregatePath}`); + console.log(`Status markdown: ${statusPath}`); + console.log(`Validation JSON: ${validationPath}`); + if (options.analyze) { + console.log(`Bottleneck analysis: ${bottleneckPath}`); + console.log(`Bottleneck analysis JSON: ${bottleneckJsonPath}`); + } + if (options.publishTarget) { + console.log(`Published target: ${options.publishTarget}`); + } + if (options.compareBaselineInputs.length > 0) { + console.log(`Comparison report: ${comparisonPath}`); + console.log(`Comparison JSON: ${comparisonJsonPath}`); + } console.log(`Finalize summary JSON: ${summaryJsonPath}`); } diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js index 663a67480c..128d19cd94 100644 --- a/scripts/publish-playback-matrix-summary.js +++ b/scripts/publish-playback-matrix-summary.js @@ -10,6 +10,7 @@ function parseArgs(argv) { bottlenecksMd: null, comparisonMd: null, comparisonJson: null, + finalizeSummaryJson: null, validationJson: null, target: path.resolve("crates/editor/PLAYBACK-BENCHMARKS.md"), }; @@ -41,6 +42,10 @@ function parseArgs(argv) { options.comparisonJson = path.resolve(argv[++i] ?? ""); continue; } + if (arg === "--finalize-summary-json") { + options.finalizeSummaryJson = path.resolve(argv[++i] ?? ""); + continue; + } if (arg === "--validation-json") { options.validationJson = path.resolve(argv[++i] ?? ""); continue; @@ -56,7 +61,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/publish-playback-matrix-summary.js --aggregate-md --status-md --validation-json [--bottlenecks-md ] [--comparison-md ] [--comparison-json ] [--target ] + console.log(`Usage: node scripts/publish-playback-matrix-summary.js --aggregate-md --status-md --validation-json [--bottlenecks-md ] [--comparison-md ] [--comparison-json ] [--finalize-summary-json ] [--target ] Prepends a matrix summary section into PLAYBACK-BENCHMARKS.md benchmark history region.`); } @@ -74,6 +79,7 @@ function buildSummarySection( bottlenecksMd, comparisonMd, comparisonJson, + finalizeSummaryJson, ) { const now = new Date().toISOString(); const validation = JSON.parse(validationJson); @@ -96,6 +102,16 @@ function buildSummarySection( markdown += `- Missing candidate policy: ${comparison.tolerance?.allowMissingCandidate ? "allow" : "fail"}\n`; markdown += `- Candidate-only policy: ${comparison.tolerance?.failOnCandidateOnly ? "fail" : "allow"}\n\n`; } + if (finalizeSummaryJson) { + const finalizeSummary = JSON.parse(finalizeSummaryJson); + markdown += `- Finalize source branch: ${finalizeSummary.git?.branch ?? "n/a"}\n`; + markdown += `- Finalize source commit: ${finalizeSummary.git?.commit ?? "n/a"}\n`; + markdown += `- Finalize validation passed: ${finalizeSummary.results?.validationPassed === true ? "true" : "false"}\n`; + if (finalizeSummary.results?.comparisonPassed !== null) { + markdown += `- Finalize comparison passed: ${finalizeSummary.results?.comparisonPassed === true ? "true" : "false"}\n`; + } + markdown += "\n"; + } if ((validation.missingCells?.length ?? 0) > 0) { markdown += "**Missing Cells**\n"; @@ -171,6 +187,9 @@ function main() { if (options.comparisonJson) { ensureFile(options.comparisonJson, "Comparison JSON"); } + if (options.finalizeSummaryJson) { + ensureFile(options.finalizeSummaryJson, "Finalize summary JSON"); + } ensureFile(options.target, "Target"); const aggregateMd = fs.readFileSync(options.aggregateMd, "utf8"); @@ -185,6 +204,9 @@ function main() { const comparisonJson = options.comparisonJson ? fs.readFileSync(options.comparisonJson, "utf8") : null; + const finalizeSummaryJson = options.finalizeSummaryJson + ? fs.readFileSync(options.finalizeSummaryJson, "utf8") + : null; const summaryMd = buildSummarySection( aggregateMd, statusMd, @@ -192,6 +214,7 @@ function main() { bottlenecksMd, comparisonMd, comparisonJson, + finalizeSummaryJson, ); writeToBenchmarkHistory(options.target, summaryMd); console.log(`Published matrix summary into ${options.target}`); From 143455461929fb5411b186f88c906ccd3e3dc316 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 22:56:04 +0000 Subject: [PATCH 125/333] improve: aggregate multi-run comparison metrics by key Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 7 ++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 2 + scripts/compare-playback-benchmark-runs.js | 49 +++++++++++++++++----- 4 files changed, 48 insertions(+), 11 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 7ec06dbb69..fbe575630e 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -103,6 +103,7 @@ node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-res node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-comparison.md --output-json /tmp/playback-comparison.json Comparison output reports both baseline rows missing in candidate and candidate-only rows not present in baseline. +Comparison table also reports baseline/candidate run counts per row when multiple JSON inputs contribute to the same key. ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 33b178950a..a5be6ee0b8 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -346,6 +346,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Finalize now forwards `--finalize-summary-json` to `publish-playback-matrix-summary.js`. - Published matrix summaries can now include finalize artifact metadata in one-shot finalize runs. +41. **Improved comparison aggregation across multi-input runs (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` now aggregates metrics per comparison key across all contributing input reports instead of last-write-wins replacement. + - Comparison output now includes baseline/candidate run counts per row to surface aggregation depth. + - Fixed comparison regression evaluation to use explicit options parameter wiring inside compare function. + --- ## Root Cause Analysis Archive @@ -480,6 +485,7 @@ Decoder Pipeline: 42. Optimized contiguous prefetched-frame warmup scan using ordered map range iteration instead of repeated key lookups. 43. Added git branch/commit metadata into finalize summary JSON artifacts for source traceability. 44. Wired finalize summary JSON into publish flow so one-shot finalize runs can publish summary metadata alongside matrix artifacts. +45. Improved multi-input comparison aggregation by merging per-key metrics across runs and surfacing baseline/candidate run counts per comparison row. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -501,6 +507,7 @@ Decoder Pipeline: - `scripts/publish-playback-matrix-summary.js`: added matrix artifact publisher into PLAYBACK-BENCHMARKS history region. - `scripts/analyze-playback-matrix-bottlenecks.js`: added prioritized bottleneck analysis output from matrix JSON evidence. - `scripts/compare-playback-benchmark-runs.js`: added regression-aware baseline/candidate comparison with configurable FPS/startup/scrub tolerances. +- `scripts/compare-playback-benchmark-runs.js`: fixed options wiring inside comparison regression checks and now aggregates per-key metrics across multi-input runs with run-count reporting. - `scripts/publish-playback-matrix-summary.js`: added optional baseline-vs-candidate comparison artifact attachment in published summaries. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 284360cfd1..7d07903b34 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -179,6 +179,8 @@ pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate # compare output now includes both missing-candidate rows and candidate-only rows # optional: fail compare gate when candidate includes rows absent in baseline pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-candidate-only + +# when multiple inputs are provided, comparison output includes baseline/candidate run counts per row ``` ## Evidence checklist diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index c858e8d396..e9184c397f 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -131,7 +131,7 @@ function maximum(values) { } function collectMetrics(files) { - const rows = new Map(); + const accumulators = new Map(); for (const filePath of files) { const parsed = JSON.parse(fs.readFileSync(filePath, "utf8")); @@ -161,20 +161,45 @@ function collectMetrics(files) { .map((entry) => entry.p95_seek_time_ms) .filter((entry) => typeof entry === "number"); - rows.set(key, { + const existing = accumulators.get(key) ?? { key, platform, gpu, scenario, recording: report.recording_name ?? "unknown", format: report.is_fragmented ? "fragmented" : "mp4", - fpsMin: fpsValues.length ? Math.min(...fpsValues) : null, - startupAvg: average(startupValues), - scrubP95Max: maximum(scrubP95Values), - }); + reportCount: 0, + fpsSamples: [], + startupSamples: [], + scrubP95Samples: [], + }; + existing.reportCount += 1; + existing.fpsSamples.push(...fpsValues); + existing.startupSamples.push(...startupValues); + existing.scrubP95Samples.push(...scrubP95Values); + accumulators.set(key, existing); } } + const rows = new Map(); + for (const [key, row] of accumulators) { + rows.set(key, { + key, + platform: row.platform, + gpu: row.gpu, + scenario: row.scenario, + recording: row.recording, + format: row.format, + reportCount: row.reportCount, + fpsSampleCount: row.fpsSamples.length, + startupSampleCount: row.startupSamples.length, + scrubSampleCount: row.scrubP95Samples.length, + fpsMin: row.fpsSamples.length ? Math.min(...row.fpsSamples) : null, + startupAvg: average(row.startupSamples), + scrubP95Max: maximum(row.scrubP95Samples), + }); + } + return rows; } @@ -187,7 +212,7 @@ function formatNumber(value, digits = 2) { return value === null ? "n/a" : value.toFixed(digits); } -function compareMetrics(baselineRows, candidateRows) { +function compareMetrics(baselineRows, candidateRows, options) { const comparisons = []; const missingCandidateRows = []; const candidateOnlyRows = []; @@ -242,6 +267,8 @@ function compareMetrics(baselineRows, candidateRows) { scenario: candidate.scenario, recording: candidate.recording, format: candidate.format, + baselineReportCount: baseline.reportCount, + candidateReportCount: candidate.reportCount, fpsDelta, startupDelta, scrubDelta, @@ -287,10 +314,10 @@ function toMarkdown( md += "\n"; } md += - "| Platform | GPU | Scenario | Recording | Format | FPS Δ | Startup Δ (ms) | Scrub p95 Δ (ms) | Regression |\n"; - md += "|---|---|---|---|---|---:|---:|---:|---|\n"; + "| Platform | GPU | Scenario | Recording | Format | B Runs | C Runs | FPS Δ | Startup Δ (ms) | Scrub p95 Δ (ms) | Regression |\n"; + md += "|---|---|---|---|---|---:|---:|---:|---:|---:|---|\n"; for (const row of comparisons) { - md += `| ${row.platform} | ${row.gpu} | ${row.scenario} | ${row.recording} | ${row.format} | ${formatNumber(row.fpsDelta)} | ${formatNumber(row.startupDelta)} | ${formatNumber(row.scrubDelta)} | ${row.regressions.length > 0 ? row.regressions.join(", ") : "none"} |\n`; + md += `| ${row.platform} | ${row.gpu} | ${row.scenario} | ${row.recording} | ${row.format} | ${row.baselineReportCount} | ${row.candidateReportCount} | ${formatNumber(row.fpsDelta)} | ${formatNumber(row.startupDelta)} | ${formatNumber(row.scrubDelta)} | ${row.regressions.length > 0 ? row.regressions.join(", ") : "none"} |\n`; } md += "\n"; return md; @@ -360,7 +387,7 @@ function main() { const baselineRows = collectMetrics(baselineFiles); const candidateRows = collectMetrics(candidateFiles); const { comparisons, missingCandidateRows, candidateOnlyRows } = - compareMetrics(baselineRows, candidateRows); + compareMetrics(baselineRows, candidateRows, options); const markdown = toMarkdown( comparisons, missingCandidateRows, From c332adab4ec2119541822aa912af66d9a0da68a4 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:56:04 +0000 Subject: [PATCH 126/333] improve: aggregate multi-run comparison metrics by key Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 7 ++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 2 + scripts/compare-playback-benchmark-runs.js | 49 +++++++++++++++++----- 4 files changed, 48 insertions(+), 11 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 7ec06dbb69..fbe575630e 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -103,6 +103,7 @@ node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-res node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-comparison.md --output-json /tmp/playback-comparison.json Comparison output reports both baseline rows missing in candidate and candidate-only rows not present in baseline. +Comparison table also reports baseline/candidate run counts per row when multiple JSON inputs contribute to the same key. ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 33b178950a..a5be6ee0b8 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -346,6 +346,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Finalize now forwards `--finalize-summary-json` to `publish-playback-matrix-summary.js`. - Published matrix summaries can now include finalize artifact metadata in one-shot finalize runs. +41. **Improved comparison aggregation across multi-input runs (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` now aggregates metrics per comparison key across all contributing input reports instead of last-write-wins replacement. + - Comparison output now includes baseline/candidate run counts per row to surface aggregation depth. + - Fixed comparison regression evaluation to use explicit options parameter wiring inside compare function. + --- ## Root Cause Analysis Archive @@ -480,6 +485,7 @@ Decoder Pipeline: 42. Optimized contiguous prefetched-frame warmup scan using ordered map range iteration instead of repeated key lookups. 43. Added git branch/commit metadata into finalize summary JSON artifacts for source traceability. 44. Wired finalize summary JSON into publish flow so one-shot finalize runs can publish summary metadata alongside matrix artifacts. +45. Improved multi-input comparison aggregation by merging per-key metrics across runs and surfacing baseline/candidate run counts per comparison row. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -501,6 +507,7 @@ Decoder Pipeline: - `scripts/publish-playback-matrix-summary.js`: added matrix artifact publisher into PLAYBACK-BENCHMARKS history region. - `scripts/analyze-playback-matrix-bottlenecks.js`: added prioritized bottleneck analysis output from matrix JSON evidence. - `scripts/compare-playback-benchmark-runs.js`: added regression-aware baseline/candidate comparison with configurable FPS/startup/scrub tolerances. +- `scripts/compare-playback-benchmark-runs.js`: fixed options wiring inside comparison regression checks and now aggregates per-key metrics across multi-input runs with run-count reporting. - `scripts/publish-playback-matrix-summary.js`: added optional baseline-vs-candidate comparison artifact attachment in published summaries. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 284360cfd1..7d07903b34 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -179,6 +179,8 @@ pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate # compare output now includes both missing-candidate rows and candidate-only rows # optional: fail compare gate when candidate includes rows absent in baseline pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-candidate-only + +# when multiple inputs are provided, comparison output includes baseline/candidate run counts per row ``` ## Evidence checklist diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index c858e8d396..e9184c397f 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -131,7 +131,7 @@ function maximum(values) { } function collectMetrics(files) { - const rows = new Map(); + const accumulators = new Map(); for (const filePath of files) { const parsed = JSON.parse(fs.readFileSync(filePath, "utf8")); @@ -161,20 +161,45 @@ function collectMetrics(files) { .map((entry) => entry.p95_seek_time_ms) .filter((entry) => typeof entry === "number"); - rows.set(key, { + const existing = accumulators.get(key) ?? { key, platform, gpu, scenario, recording: report.recording_name ?? "unknown", format: report.is_fragmented ? "fragmented" : "mp4", - fpsMin: fpsValues.length ? Math.min(...fpsValues) : null, - startupAvg: average(startupValues), - scrubP95Max: maximum(scrubP95Values), - }); + reportCount: 0, + fpsSamples: [], + startupSamples: [], + scrubP95Samples: [], + }; + existing.reportCount += 1; + existing.fpsSamples.push(...fpsValues); + existing.startupSamples.push(...startupValues); + existing.scrubP95Samples.push(...scrubP95Values); + accumulators.set(key, existing); } } + const rows = new Map(); + for (const [key, row] of accumulators) { + rows.set(key, { + key, + platform: row.platform, + gpu: row.gpu, + scenario: row.scenario, + recording: row.recording, + format: row.format, + reportCount: row.reportCount, + fpsSampleCount: row.fpsSamples.length, + startupSampleCount: row.startupSamples.length, + scrubSampleCount: row.scrubP95Samples.length, + fpsMin: row.fpsSamples.length ? Math.min(...row.fpsSamples) : null, + startupAvg: average(row.startupSamples), + scrubP95Max: maximum(row.scrubP95Samples), + }); + } + return rows; } @@ -187,7 +212,7 @@ function formatNumber(value, digits = 2) { return value === null ? "n/a" : value.toFixed(digits); } -function compareMetrics(baselineRows, candidateRows) { +function compareMetrics(baselineRows, candidateRows, options) { const comparisons = []; const missingCandidateRows = []; const candidateOnlyRows = []; @@ -242,6 +267,8 @@ function compareMetrics(baselineRows, candidateRows) { scenario: candidate.scenario, recording: candidate.recording, format: candidate.format, + baselineReportCount: baseline.reportCount, + candidateReportCount: candidate.reportCount, fpsDelta, startupDelta, scrubDelta, @@ -287,10 +314,10 @@ function toMarkdown( md += "\n"; } md += - "| Platform | GPU | Scenario | Recording | Format | FPS Δ | Startup Δ (ms) | Scrub p95 Δ (ms) | Regression |\n"; - md += "|---|---|---|---|---|---:|---:|---:|---|\n"; + "| Platform | GPU | Scenario | Recording | Format | B Runs | C Runs | FPS Δ | Startup Δ (ms) | Scrub p95 Δ (ms) | Regression |\n"; + md += "|---|---|---|---|---|---:|---:|---:|---:|---:|---|\n"; for (const row of comparisons) { - md += `| ${row.platform} | ${row.gpu} | ${row.scenario} | ${row.recording} | ${row.format} | ${formatNumber(row.fpsDelta)} | ${formatNumber(row.startupDelta)} | ${formatNumber(row.scrubDelta)} | ${row.regressions.length > 0 ? row.regressions.join(", ") : "none"} |\n`; + md += `| ${row.platform} | ${row.gpu} | ${row.scenario} | ${row.recording} | ${row.format} | ${row.baselineReportCount} | ${row.candidateReportCount} | ${formatNumber(row.fpsDelta)} | ${formatNumber(row.startupDelta)} | ${formatNumber(row.scrubDelta)} | ${row.regressions.length > 0 ? row.regressions.join(", ") : "none"} |\n`; } md += "\n"; return md; @@ -360,7 +387,7 @@ function main() { const baselineRows = collectMetrics(baselineFiles); const candidateRows = collectMetrics(candidateFiles); const { comparisons, missingCandidateRows, candidateOnlyRows } = - compareMetrics(baselineRows, candidateRows); + compareMetrics(baselineRows, candidateRows, options); const markdown = toMarkdown( comparisons, missingCandidateRows, From 19b0083fe79846cb33ad33810b590aa8716bc83f Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 22:57:12 +0000 Subject: [PATCH 127/333] improve: defer contiguous warmup scans until first frame arrival Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 14 +++++++++----- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index a5be6ee0b8..b7137c7c7b 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -351,6 +351,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Comparison output now includes baseline/candidate run counts per row to surface aggregation depth. - Fixed comparison regression evaluation to use explicit options parameter wiring inside compare function. +42. **Skipped contiguous warmup scans before first eligible frame (2026-02-13)** + - Warmup loop now defers contiguous-prefetch counting until first warmup frame arrival is observed. + - Reduces avoidable buffer scan work during pre-frame warmup wait. + --- ## Root Cause Analysis Archive @@ -486,6 +490,7 @@ Decoder Pipeline: 43. Added git branch/commit metadata into finalize summary JSON artifacts for source traceability. 44. Wired finalize summary JSON into publish flow so one-shot finalize runs can publish summary metadata alongside matrix artifacts. 45. Improved multi-input comparison aggregation by merging per-key metrics across runs and surfacing baseline/candidate run counts per comparison row. +46. Skipped contiguous warmup coverage scans until first warmup frame observation to reduce pre-frame warmup loop scan overhead. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -526,6 +531,7 @@ Decoder Pipeline: - `scripts/finalize-playback-matrix.js`: finalize summary JSON now includes git branch and commit metadata when available. - `scripts/finalize-playback-matrix.js`: finalize now writes summary JSON before publish and passes `--finalize-summary-json` into publish flow. - `scripts/publish-playback-matrix-summary.js`: publish flow now supports optional finalize summary JSON input and surfaces finalize source/validation metadata. +- `crates/editor/src/playback.rs`: warmup loop now skips contiguous coverage scanning until first warmup frame has been observed. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. - `crates/editor/src/playback.rs`: added ordered pruning of stale prefetched frames below current playhead to reduce stale buffer overhead during catch-up. - `scripts/publish-playback-matrix-summary.js`: publish flow now surfaces comparison gate status/summary metrics when comparison JSON is provided. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 7cbf09d6e1..a934954c8d 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -528,11 +528,15 @@ impl Playback { ); while !*stop_rx.borrow() { - let contiguous_prefetched = count_contiguous_prefetched_frames( - &prefetch_buffer, - frame_number, - warmup_target_frames, - ); + let contiguous_prefetched = if first_frame_time.is_some() { + count_contiguous_prefetched_frames( + &prefetch_buffer, + frame_number, + warmup_target_frames, + ) + } else { + 0 + }; let should_start = if let Some(first_time) = first_frame_time { contiguous_prefetched >= warmup_target_frames || first_time.elapsed() > warmup_after_first_timeout From efc29ab9c6f672a8528c59ef1822426d1dab667e Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:57:12 +0000 Subject: [PATCH 128/333] improve: defer contiguous warmup scans until first frame arrival Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 14 +++++++++----- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index a5be6ee0b8..b7137c7c7b 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -351,6 +351,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Comparison output now includes baseline/candidate run counts per row to surface aggregation depth. - Fixed comparison regression evaluation to use explicit options parameter wiring inside compare function. +42. **Skipped contiguous warmup scans before first eligible frame (2026-02-13)** + - Warmup loop now defers contiguous-prefetch counting until first warmup frame arrival is observed. + - Reduces avoidable buffer scan work during pre-frame warmup wait. + --- ## Root Cause Analysis Archive @@ -486,6 +490,7 @@ Decoder Pipeline: 43. Added git branch/commit metadata into finalize summary JSON artifacts for source traceability. 44. Wired finalize summary JSON into publish flow so one-shot finalize runs can publish summary metadata alongside matrix artifacts. 45. Improved multi-input comparison aggregation by merging per-key metrics across runs and surfacing baseline/candidate run counts per comparison row. +46. Skipped contiguous warmup coverage scans until first warmup frame observation to reduce pre-frame warmup loop scan overhead. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -526,6 +531,7 @@ Decoder Pipeline: - `scripts/finalize-playback-matrix.js`: finalize summary JSON now includes git branch and commit metadata when available. - `scripts/finalize-playback-matrix.js`: finalize now writes summary JSON before publish and passes `--finalize-summary-json` into publish flow. - `scripts/publish-playback-matrix-summary.js`: publish flow now supports optional finalize summary JSON input and surfaces finalize source/validation metadata. +- `crates/editor/src/playback.rs`: warmup loop now skips contiguous coverage scanning until first warmup frame has been observed. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. - `crates/editor/src/playback.rs`: added ordered pruning of stale prefetched frames below current playhead to reduce stale buffer overhead during catch-up. - `scripts/publish-playback-matrix-summary.js`: publish flow now surfaces comparison gate status/summary metrics when comparison JSON is provided. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 7cbf09d6e1..a934954c8d 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -528,11 +528,15 @@ impl Playback { ); while !*stop_rx.borrow() { - let contiguous_prefetched = count_contiguous_prefetched_frames( - &prefetch_buffer, - frame_number, - warmup_target_frames, - ); + let contiguous_prefetched = if first_frame_time.is_some() { + count_contiguous_prefetched_frames( + &prefetch_buffer, + frame_number, + warmup_target_frames, + ) + } else { + 0 + }; let should_start = if let Some(first_time) = first_frame_time { contiguous_prefetched >= warmup_target_frames || first_time.elapsed() > warmup_after_first_timeout From cc1cd3fae71e9285577cf661dc9e134c62b094ac Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 23:01:34 +0000 Subject: [PATCH 129/333] improve: add minimum sample gating for benchmark comparisons Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 2 + crates/editor/PLAYBACK-FINDINGS.md | 9 +++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 5 ++ scripts/compare-playback-benchmark-runs.js | 89 ++++++++++++++++++++-- scripts/finalize-playback-matrix.js | 13 +++- scripts/publish-playback-matrix-summary.js | 2 + 6 files changed, 111 insertions(+), 9 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index fbe575630e..32a27d92b6 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -81,6 +81,7 @@ node scripts/finalize-playback-matrix.js --input /path/to/json-results --output- node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5 node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-missing-candidate node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-candidate-only +node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --min-samples-per-row 3 node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --output-json /tmp/playback-matrix-final/playback-finalize-summary.json node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 --publish-target /workspace/crates/editor/PLAYBACK-BENCHMARKS.md @@ -100,6 +101,7 @@ node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-res node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results-a --baseline /path/to/baseline-results-b --candidate /path/to/candidate-results-a --candidate /path/to/candidate-results-b --output /tmp/playback-comparison.md node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --allow-missing-candidate node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-candidate-only +node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --min-samples-per-row 3 node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-comparison.md --output-json /tmp/playback-comparison.json Comparison output reports both baseline rows missing in candidate and candidate-only rows not present in baseline. diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index b7137c7c7b..eaff5bff35 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -355,6 +355,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Warmup loop now defers contiguous-prefetch counting until first warmup frame arrival is observed. - Reduces avoidable buffer scan work during pre-frame warmup wait. +43. **Added minimum sample-count gating for matrix comparisons (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` now supports `--min-samples-per-row`. + - Comparison now flags rows with insufficient effective sample counts and reports them in markdown/JSON outputs. + - `scripts/finalize-playback-matrix.js` forwards minimum sample gating settings into compare stage, and publish summary now surfaces sample gating status fields. + --- ## Root Cause Analysis Archive @@ -491,6 +496,7 @@ Decoder Pipeline: 44. Wired finalize summary JSON into publish flow so one-shot finalize runs can publish summary metadata alongside matrix artifacts. 45. Improved multi-input comparison aggregation by merging per-key metrics across runs and surfacing baseline/candidate run counts per comparison row. 46. Skipped contiguous warmup coverage scans until first warmup frame observation to reduce pre-frame warmup loop scan overhead. +47. Added minimum-sample comparison gating with `--min-samples-per-row`, insufficient-sample reporting, and finalize passthrough support. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -531,6 +537,9 @@ Decoder Pipeline: - `scripts/finalize-playback-matrix.js`: finalize summary JSON now includes git branch and commit metadata when available. - `scripts/finalize-playback-matrix.js`: finalize now writes summary JSON before publish and passes `--finalize-summary-json` into publish flow. - `scripts/publish-playback-matrix-summary.js`: publish flow now supports optional finalize summary JSON input and surfaces finalize source/validation metadata. +- `scripts/compare-playback-benchmark-runs.js`: added `--min-samples-per-row`, insufficient-sample row reporting, and sample gate fields in markdown/JSON outputs. +- `scripts/finalize-playback-matrix.js`: forwards `--min-samples-per-row` into compare stage and captures it in finalize summary settings. +- `scripts/publish-playback-matrix-summary.js`: published comparison status now includes insufficient sample row count and minimum sample threshold fields. - `crates/editor/src/playback.rs`: warmup loop now skips contiguous coverage scanning until first warmup frame has been observed. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. - `crates/editor/src/playback.rs`: added ordered pruning of stale prefetched frames below current playhead to reduce stale buffer overhead during catch-up. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 7d07903b34..c1f1c7f2f1 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -129,6 +129,9 @@ pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir # optional: fail finalize compare gate when candidate includes rows absent in baseline pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-candidate-only + +# optional: require minimum sample count per compared row +pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --min-samples-per-row 3 ``` Finalize and publish to benchmark history in one command: @@ -181,6 +184,8 @@ pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-candidate-only # when multiple inputs are provided, comparison output includes baseline/candidate run counts per row +# optional: require minimum sample count per compared row +pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --min-samples-per-row 3 ``` ## Evidence checklist diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index e9184c397f..9d788ba1ee 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -14,6 +14,7 @@ function parseArgs(argv) { allowScrubP95IncreaseMs: 5, allowMissingCandidate: false, failOnCandidateOnly: false, + minSamplesPerRow: 1, }; for (let i = 2; i < argv.length; i++) { @@ -75,6 +76,14 @@ function parseArgs(argv) { options.failOnCandidateOnly = true; continue; } + if (arg === "--min-samples-per-row") { + const value = Number.parseInt(argv[++i] ?? "", 10); + if (!Number.isFinite(value) || value < 1) { + throw new Error("Invalid --min-samples-per-row value"); + } + options.minSamplesPerRow = value; + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -82,7 +91,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline [--baseline ...] --candidate [--candidate ...] [--output ] [--output-json ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] + console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline [--baseline ...] --candidate [--candidate ...] [--output ] [--output-json ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--min-samples-per-row 1] Compares baseline and candidate playback matrix JSON outputs and flags regressions. Multiple --baseline and --candidate inputs are supported.`); } @@ -216,6 +225,7 @@ function compareMetrics(baselineRows, candidateRows, options) { const comparisons = []; const missingCandidateRows = []; const candidateOnlyRows = []; + const insufficientSampleRows = []; for (const [key, baseline] of baselineRows) { const candidate = candidateRows.get(key); @@ -248,6 +258,38 @@ function compareMetrics(baselineRows, candidateRows, options) { const scrubDelta = delta(candidate.scrubP95Max, baseline.scrubP95Max); const regressions = []; + const fpsMinSamples = Math.min( + baseline.fpsSampleCount, + candidate.fpsSampleCount, + ); + const startupMinSamples = Math.min( + baseline.startupSampleCount, + candidate.startupSampleCount, + ); + const scrubMinSamples = Math.min( + baseline.scrubSampleCount, + candidate.scrubSampleCount, + ); + const effectiveSampleCount = Math.min( + fpsMinSamples, + startupMinSamples, + scrubMinSamples, + ); + if (effectiveSampleCount < options.minSamplesPerRow) { + insufficientSampleRows.push({ + platform: candidate.platform, + gpu: candidate.gpu, + scenario: candidate.scenario, + recording: candidate.recording, + format: candidate.format, + effectiveSampleCount, + requiredSampleCount: options.minSamplesPerRow, + }); + regressions.push( + `insufficient_samples=${effectiveSampleCount}/${options.minSamplesPerRow}`, + ); + } + if (fpsDelta !== null && fpsDelta < -options.allowFpsDrop) { regressions.push(`fps_drop=${formatNumber(fpsDelta)}`); } @@ -269,6 +311,10 @@ function compareMetrics(baselineRows, candidateRows, options) { format: candidate.format, baselineReportCount: baseline.reportCount, candidateReportCount: candidate.reportCount, + fpsMinSamples, + startupMinSamples, + scrubMinSamples, + effectiveSampleCount, fpsDelta, startupDelta, scrubDelta, @@ -277,13 +323,19 @@ function compareMetrics(baselineRows, candidateRows, options) { } comparisons.sort((a, b) => b.regressions.length - a.regressions.length); - return { comparisons, missingCandidateRows, candidateOnlyRows }; + return { + comparisons, + missingCandidateRows, + candidateOnlyRows, + insufficientSampleRows, + }; } function toMarkdown( comparisons, missingCandidateRows, candidateOnlyRows, + insufficientSampleRows, options, ) { const regressions = comparisons.filter( @@ -294,7 +346,8 @@ function toMarkdown( md += `Generated: ${new Date().toISOString()}\n\n`; md += `Tolerance: fps_drop<=${options.allowFpsDrop}, startup_increase<=${options.allowStartupIncreaseMs}ms, scrub_p95_increase<=${options.allowScrubP95IncreaseMs}ms\n\n`; md += `Coverage gate: missing_candidate=${options.allowMissingCandidate ? "allow" : "fail"}, candidate_only=${options.failOnCandidateOnly ? "fail" : "allow"}\n\n`; - md += `Compared rows: ${comparisons.length}, regressions: ${regressions.length}, missing candidate rows: ${missingCandidateRows.length}, candidate-only rows: ${candidateOnlyRows.length}\n\n`; + md += `Sample gate: min_samples_per_row>=${options.minSamplesPerRow}\n\n`; + md += `Compared rows: ${comparisons.length}, regressions: ${regressions.length}, missing candidate rows: ${missingCandidateRows.length}, candidate-only rows: ${candidateOnlyRows.length}, insufficient sample rows: ${insufficientSampleRows.length}\n\n`; if (missingCandidateRows.length > 0) { md += "## Missing Candidate Rows\n\n"; md += "| Platform | GPU | Scenario | Recording | Format |\n"; @@ -313,11 +366,21 @@ function toMarkdown( } md += "\n"; } + if (insufficientSampleRows.length > 0) { + md += "## Insufficient Sample Rows\n\n"; + md += + "| Platform | GPU | Scenario | Recording | Format | Effective Samples | Required Samples |\n"; + md += "|---|---|---|---|---|---:|---:|\n"; + for (const row of insufficientSampleRows) { + md += `| ${row.platform} | ${row.gpu} | ${row.scenario} | ${row.recording} | ${row.format} | ${row.effectiveSampleCount} | ${row.requiredSampleCount} |\n`; + } + md += "\n"; + } md += - "| Platform | GPU | Scenario | Recording | Format | B Runs | C Runs | FPS Δ | Startup Δ (ms) | Scrub p95 Δ (ms) | Regression |\n"; - md += "|---|---|---|---|---|---:|---:|---:|---:|---:|---|\n"; + "| Platform | GPU | Scenario | Recording | Format | B Runs | C Runs | F Samples | S Samples | Q Samples | FPS Δ | Startup Δ (ms) | Scrub p95 Δ (ms) | Regression |\n"; + md += "|---|---|---|---|---|---:|---:|---:|---:|---:|---:|---:|---:|---|\n"; for (const row of comparisons) { - md += `| ${row.platform} | ${row.gpu} | ${row.scenario} | ${row.recording} | ${row.format} | ${row.baselineReportCount} | ${row.candidateReportCount} | ${formatNumber(row.fpsDelta)} | ${formatNumber(row.startupDelta)} | ${formatNumber(row.scrubDelta)} | ${row.regressions.length > 0 ? row.regressions.join(", ") : "none"} |\n`; + md += `| ${row.platform} | ${row.gpu} | ${row.scenario} | ${row.recording} | ${row.format} | ${row.baselineReportCount} | ${row.candidateReportCount} | ${row.fpsMinSamples} | ${row.startupMinSamples} | ${row.scrubMinSamples} | ${formatNumber(row.fpsDelta)} | ${formatNumber(row.startupDelta)} | ${formatNumber(row.scrubDelta)} | ${row.regressions.length > 0 ? row.regressions.join(", ") : "none"} |\n`; } md += "\n"; return md; @@ -327,6 +390,7 @@ function buildJsonOutput( comparisons, missingCandidateRows, candidateOnlyRows, + insufficientSampleRows, options, ) { const regressions = comparisons.filter( @@ -340,12 +404,14 @@ function buildJsonOutput( allowScrubP95IncreaseMs: options.allowScrubP95IncreaseMs, allowMissingCandidate: options.allowMissingCandidate, failOnCandidateOnly: options.failOnCandidateOnly, + minSamplesPerRow: options.minSamplesPerRow, }, summary: { comparedRows: comparisons.length, regressions: regressions.length, missingCandidateRows: missingCandidateRows.length, candidateOnlyRows: candidateOnlyRows.length, + insufficientSampleRows: insufficientSampleRows.length, passed: regressions.length === 0 && (options.allowMissingCandidate || missingCandidateRows.length === 0) && @@ -354,6 +420,7 @@ function buildJsonOutput( regressions, missingCandidateRows, candidateOnlyRows, + insufficientSampleRows, comparisons, }; } @@ -386,18 +453,24 @@ function main() { const baselineRows = collectMetrics(baselineFiles); const candidateRows = collectMetrics(candidateFiles); - const { comparisons, missingCandidateRows, candidateOnlyRows } = - compareMetrics(baselineRows, candidateRows, options); + const { + comparisons, + missingCandidateRows, + candidateOnlyRows, + insufficientSampleRows, + } = compareMetrics(baselineRows, candidateRows, options); const markdown = toMarkdown( comparisons, missingCandidateRows, candidateOnlyRows, + insufficientSampleRows, options, ); const outputJson = buildJsonOutput( comparisons, missingCandidateRows, candidateOnlyRows, + insufficientSampleRows, options, ); diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index 76d07b1c38..71babdd9e2 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -21,6 +21,7 @@ function parseArgs(argv) { allowScrubP95IncreaseMs: 5, allowMissingCandidate: false, failOnCandidateOnly: false, + minSamplesPerRow: 1, }; for (let i = 2; i < argv.length; i++) { @@ -128,6 +129,14 @@ function parseArgs(argv) { options.failOnCandidateOnly = true; continue; } + if (arg === "--min-samples-per-row") { + const value = Number.parseInt(argv[++i] ?? "", 10); + if (!Number.isFinite(value) || value < 1) { + throw new Error("Invalid --min-samples-per-row value"); + } + options.minSamplesPerRow = value; + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -135,7 +144,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--output-json ] [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--compare-baseline ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--publish-target ] + console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--output-json ] [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--compare-baseline ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--min-samples-per-row 1] [--publish-target ] Generates aggregate markdown, status markdown, validation JSON, and bottleneck analysis for collected playback matrix outputs. Optionally compares candidate inputs against baseline inputs and fails on regressions.`); } @@ -265,6 +274,7 @@ function main() { if (options.failOnCandidateOnly) { compareArgs.push("--fail-on-candidate-only"); } + compareArgs.push("--min-samples-per-row", String(options.minSamplesPerRow)); run("node", compareArgs); } const validation = JSON.parse(fs.readFileSync(validationPath, "utf8")); @@ -310,6 +320,7 @@ function main() { allowScrubP95IncreaseMs: options.allowScrubP95IncreaseMs, allowMissingCandidate: options.allowMissingCandidate, failOnCandidateOnly: options.failOnCandidateOnly, + minSamplesPerRow: options.minSamplesPerRow, }, results: { validationPassed: validation.passed === true, diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js index 128d19cd94..d2718b4efa 100644 --- a/scripts/publish-playback-matrix-summary.js +++ b/scripts/publish-playback-matrix-summary.js @@ -99,6 +99,8 @@ function buildSummarySection( markdown += `- Comparison regressions: ${comparison.summary?.regressions ?? "n/a"}\n`; markdown += `- Missing candidate rows: ${comparison.summary?.missingCandidateRows ?? "n/a"}\n\n`; markdown += `- Candidate-only rows: ${comparison.summary?.candidateOnlyRows ?? "n/a"}\n\n`; + markdown += `- Insufficient sample rows: ${comparison.summary?.insufficientSampleRows ?? "n/a"}\n`; + markdown += `- Minimum samples per row: ${comparison.tolerance?.minSamplesPerRow ?? "n/a"}\n\n`; markdown += `- Missing candidate policy: ${comparison.tolerance?.allowMissingCandidate ? "allow" : "fail"}\n`; markdown += `- Candidate-only policy: ${comparison.tolerance?.failOnCandidateOnly ? "fail" : "allow"}\n\n`; } From 7f158439a507145deff23b14ab0ee44d01a9820e Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:01:34 +0000 Subject: [PATCH 130/333] improve: add minimum sample gating for benchmark comparisons Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 2 + crates/editor/PLAYBACK-FINDINGS.md | 9 +++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 5 ++ scripts/compare-playback-benchmark-runs.js | 89 ++++++++++++++++++++-- scripts/finalize-playback-matrix.js | 13 +++- scripts/publish-playback-matrix-summary.js | 2 + 6 files changed, 111 insertions(+), 9 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index fbe575630e..32a27d92b6 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -81,6 +81,7 @@ node scripts/finalize-playback-matrix.js --input /path/to/json-results --output- node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5 node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-missing-candidate node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-candidate-only +node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --min-samples-per-row 3 node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --output-json /tmp/playback-matrix-final/playback-finalize-summary.json node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 --publish-target /workspace/crates/editor/PLAYBACK-BENCHMARKS.md @@ -100,6 +101,7 @@ node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-res node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results-a --baseline /path/to/baseline-results-b --candidate /path/to/candidate-results-a --candidate /path/to/candidate-results-b --output /tmp/playback-comparison.md node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --allow-missing-candidate node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-candidate-only +node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --min-samples-per-row 3 node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-comparison.md --output-json /tmp/playback-comparison.json Comparison output reports both baseline rows missing in candidate and candidate-only rows not present in baseline. diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index b7137c7c7b..eaff5bff35 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -355,6 +355,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Warmup loop now defers contiguous-prefetch counting until first warmup frame arrival is observed. - Reduces avoidable buffer scan work during pre-frame warmup wait. +43. **Added minimum sample-count gating for matrix comparisons (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` now supports `--min-samples-per-row`. + - Comparison now flags rows with insufficient effective sample counts and reports them in markdown/JSON outputs. + - `scripts/finalize-playback-matrix.js` forwards minimum sample gating settings into compare stage, and publish summary now surfaces sample gating status fields. + --- ## Root Cause Analysis Archive @@ -491,6 +496,7 @@ Decoder Pipeline: 44. Wired finalize summary JSON into publish flow so one-shot finalize runs can publish summary metadata alongside matrix artifacts. 45. Improved multi-input comparison aggregation by merging per-key metrics across runs and surfacing baseline/candidate run counts per comparison row. 46. Skipped contiguous warmup coverage scans until first warmup frame observation to reduce pre-frame warmup loop scan overhead. +47. Added minimum-sample comparison gating with `--min-samples-per-row`, insufficient-sample reporting, and finalize passthrough support. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -531,6 +537,9 @@ Decoder Pipeline: - `scripts/finalize-playback-matrix.js`: finalize summary JSON now includes git branch and commit metadata when available. - `scripts/finalize-playback-matrix.js`: finalize now writes summary JSON before publish and passes `--finalize-summary-json` into publish flow. - `scripts/publish-playback-matrix-summary.js`: publish flow now supports optional finalize summary JSON input and surfaces finalize source/validation metadata. +- `scripts/compare-playback-benchmark-runs.js`: added `--min-samples-per-row`, insufficient-sample row reporting, and sample gate fields in markdown/JSON outputs. +- `scripts/finalize-playback-matrix.js`: forwards `--min-samples-per-row` into compare stage and captures it in finalize summary settings. +- `scripts/publish-playback-matrix-summary.js`: published comparison status now includes insufficient sample row count and minimum sample threshold fields. - `crates/editor/src/playback.rs`: warmup loop now skips contiguous coverage scanning until first warmup frame has been observed. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. - `crates/editor/src/playback.rs`: added ordered pruning of stale prefetched frames below current playhead to reduce stale buffer overhead during catch-up. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 7d07903b34..c1f1c7f2f1 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -129,6 +129,9 @@ pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir # optional: fail finalize compare gate when candidate includes rows absent in baseline pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-candidate-only + +# optional: require minimum sample count per compared row +pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --min-samples-per-row 3 ``` Finalize and publish to benchmark history in one command: @@ -181,6 +184,8 @@ pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-candidate-only # when multiple inputs are provided, comparison output includes baseline/candidate run counts per row +# optional: require minimum sample count per compared row +pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --min-samples-per-row 3 ``` ## Evidence checklist diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index e9184c397f..9d788ba1ee 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -14,6 +14,7 @@ function parseArgs(argv) { allowScrubP95IncreaseMs: 5, allowMissingCandidate: false, failOnCandidateOnly: false, + minSamplesPerRow: 1, }; for (let i = 2; i < argv.length; i++) { @@ -75,6 +76,14 @@ function parseArgs(argv) { options.failOnCandidateOnly = true; continue; } + if (arg === "--min-samples-per-row") { + const value = Number.parseInt(argv[++i] ?? "", 10); + if (!Number.isFinite(value) || value < 1) { + throw new Error("Invalid --min-samples-per-row value"); + } + options.minSamplesPerRow = value; + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -82,7 +91,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline [--baseline ...] --candidate [--candidate ...] [--output ] [--output-json ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] + console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline [--baseline ...] --candidate [--candidate ...] [--output ] [--output-json ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--min-samples-per-row 1] Compares baseline and candidate playback matrix JSON outputs and flags regressions. Multiple --baseline and --candidate inputs are supported.`); } @@ -216,6 +225,7 @@ function compareMetrics(baselineRows, candidateRows, options) { const comparisons = []; const missingCandidateRows = []; const candidateOnlyRows = []; + const insufficientSampleRows = []; for (const [key, baseline] of baselineRows) { const candidate = candidateRows.get(key); @@ -248,6 +258,38 @@ function compareMetrics(baselineRows, candidateRows, options) { const scrubDelta = delta(candidate.scrubP95Max, baseline.scrubP95Max); const regressions = []; + const fpsMinSamples = Math.min( + baseline.fpsSampleCount, + candidate.fpsSampleCount, + ); + const startupMinSamples = Math.min( + baseline.startupSampleCount, + candidate.startupSampleCount, + ); + const scrubMinSamples = Math.min( + baseline.scrubSampleCount, + candidate.scrubSampleCount, + ); + const effectiveSampleCount = Math.min( + fpsMinSamples, + startupMinSamples, + scrubMinSamples, + ); + if (effectiveSampleCount < options.minSamplesPerRow) { + insufficientSampleRows.push({ + platform: candidate.platform, + gpu: candidate.gpu, + scenario: candidate.scenario, + recording: candidate.recording, + format: candidate.format, + effectiveSampleCount, + requiredSampleCount: options.minSamplesPerRow, + }); + regressions.push( + `insufficient_samples=${effectiveSampleCount}/${options.minSamplesPerRow}`, + ); + } + if (fpsDelta !== null && fpsDelta < -options.allowFpsDrop) { regressions.push(`fps_drop=${formatNumber(fpsDelta)}`); } @@ -269,6 +311,10 @@ function compareMetrics(baselineRows, candidateRows, options) { format: candidate.format, baselineReportCount: baseline.reportCount, candidateReportCount: candidate.reportCount, + fpsMinSamples, + startupMinSamples, + scrubMinSamples, + effectiveSampleCount, fpsDelta, startupDelta, scrubDelta, @@ -277,13 +323,19 @@ function compareMetrics(baselineRows, candidateRows, options) { } comparisons.sort((a, b) => b.regressions.length - a.regressions.length); - return { comparisons, missingCandidateRows, candidateOnlyRows }; + return { + comparisons, + missingCandidateRows, + candidateOnlyRows, + insufficientSampleRows, + }; } function toMarkdown( comparisons, missingCandidateRows, candidateOnlyRows, + insufficientSampleRows, options, ) { const regressions = comparisons.filter( @@ -294,7 +346,8 @@ function toMarkdown( md += `Generated: ${new Date().toISOString()}\n\n`; md += `Tolerance: fps_drop<=${options.allowFpsDrop}, startup_increase<=${options.allowStartupIncreaseMs}ms, scrub_p95_increase<=${options.allowScrubP95IncreaseMs}ms\n\n`; md += `Coverage gate: missing_candidate=${options.allowMissingCandidate ? "allow" : "fail"}, candidate_only=${options.failOnCandidateOnly ? "fail" : "allow"}\n\n`; - md += `Compared rows: ${comparisons.length}, regressions: ${regressions.length}, missing candidate rows: ${missingCandidateRows.length}, candidate-only rows: ${candidateOnlyRows.length}\n\n`; + md += `Sample gate: min_samples_per_row>=${options.minSamplesPerRow}\n\n`; + md += `Compared rows: ${comparisons.length}, regressions: ${regressions.length}, missing candidate rows: ${missingCandidateRows.length}, candidate-only rows: ${candidateOnlyRows.length}, insufficient sample rows: ${insufficientSampleRows.length}\n\n`; if (missingCandidateRows.length > 0) { md += "## Missing Candidate Rows\n\n"; md += "| Platform | GPU | Scenario | Recording | Format |\n"; @@ -313,11 +366,21 @@ function toMarkdown( } md += "\n"; } + if (insufficientSampleRows.length > 0) { + md += "## Insufficient Sample Rows\n\n"; + md += + "| Platform | GPU | Scenario | Recording | Format | Effective Samples | Required Samples |\n"; + md += "|---|---|---|---|---|---:|---:|\n"; + for (const row of insufficientSampleRows) { + md += `| ${row.platform} | ${row.gpu} | ${row.scenario} | ${row.recording} | ${row.format} | ${row.effectiveSampleCount} | ${row.requiredSampleCount} |\n`; + } + md += "\n"; + } md += - "| Platform | GPU | Scenario | Recording | Format | B Runs | C Runs | FPS Δ | Startup Δ (ms) | Scrub p95 Δ (ms) | Regression |\n"; - md += "|---|---|---|---|---|---:|---:|---:|---:|---:|---|\n"; + "| Platform | GPU | Scenario | Recording | Format | B Runs | C Runs | F Samples | S Samples | Q Samples | FPS Δ | Startup Δ (ms) | Scrub p95 Δ (ms) | Regression |\n"; + md += "|---|---|---|---|---|---:|---:|---:|---:|---:|---:|---:|---:|---|\n"; for (const row of comparisons) { - md += `| ${row.platform} | ${row.gpu} | ${row.scenario} | ${row.recording} | ${row.format} | ${row.baselineReportCount} | ${row.candidateReportCount} | ${formatNumber(row.fpsDelta)} | ${formatNumber(row.startupDelta)} | ${formatNumber(row.scrubDelta)} | ${row.regressions.length > 0 ? row.regressions.join(", ") : "none"} |\n`; + md += `| ${row.platform} | ${row.gpu} | ${row.scenario} | ${row.recording} | ${row.format} | ${row.baselineReportCount} | ${row.candidateReportCount} | ${row.fpsMinSamples} | ${row.startupMinSamples} | ${row.scrubMinSamples} | ${formatNumber(row.fpsDelta)} | ${formatNumber(row.startupDelta)} | ${formatNumber(row.scrubDelta)} | ${row.regressions.length > 0 ? row.regressions.join(", ") : "none"} |\n`; } md += "\n"; return md; @@ -327,6 +390,7 @@ function buildJsonOutput( comparisons, missingCandidateRows, candidateOnlyRows, + insufficientSampleRows, options, ) { const regressions = comparisons.filter( @@ -340,12 +404,14 @@ function buildJsonOutput( allowScrubP95IncreaseMs: options.allowScrubP95IncreaseMs, allowMissingCandidate: options.allowMissingCandidate, failOnCandidateOnly: options.failOnCandidateOnly, + minSamplesPerRow: options.minSamplesPerRow, }, summary: { comparedRows: comparisons.length, regressions: regressions.length, missingCandidateRows: missingCandidateRows.length, candidateOnlyRows: candidateOnlyRows.length, + insufficientSampleRows: insufficientSampleRows.length, passed: regressions.length === 0 && (options.allowMissingCandidate || missingCandidateRows.length === 0) && @@ -354,6 +420,7 @@ function buildJsonOutput( regressions, missingCandidateRows, candidateOnlyRows, + insufficientSampleRows, comparisons, }; } @@ -386,18 +453,24 @@ function main() { const baselineRows = collectMetrics(baselineFiles); const candidateRows = collectMetrics(candidateFiles); - const { comparisons, missingCandidateRows, candidateOnlyRows } = - compareMetrics(baselineRows, candidateRows, options); + const { + comparisons, + missingCandidateRows, + candidateOnlyRows, + insufficientSampleRows, + } = compareMetrics(baselineRows, candidateRows, options); const markdown = toMarkdown( comparisons, missingCandidateRows, candidateOnlyRows, + insufficientSampleRows, options, ); const outputJson = buildJsonOutput( comparisons, missingCandidateRows, candidateOnlyRows, + insufficientSampleRows, options, ); diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index 76d07b1c38..71babdd9e2 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -21,6 +21,7 @@ function parseArgs(argv) { allowScrubP95IncreaseMs: 5, allowMissingCandidate: false, failOnCandidateOnly: false, + minSamplesPerRow: 1, }; for (let i = 2; i < argv.length; i++) { @@ -128,6 +129,14 @@ function parseArgs(argv) { options.failOnCandidateOnly = true; continue; } + if (arg === "--min-samples-per-row") { + const value = Number.parseInt(argv[++i] ?? "", 10); + if (!Number.isFinite(value) || value < 1) { + throw new Error("Invalid --min-samples-per-row value"); + } + options.minSamplesPerRow = value; + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -135,7 +144,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--output-json ] [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--compare-baseline ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--publish-target ] + console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--output-json ] [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--compare-baseline ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--min-samples-per-row 1] [--publish-target ] Generates aggregate markdown, status markdown, validation JSON, and bottleneck analysis for collected playback matrix outputs. Optionally compares candidate inputs against baseline inputs and fails on regressions.`); } @@ -265,6 +274,7 @@ function main() { if (options.failOnCandidateOnly) { compareArgs.push("--fail-on-candidate-only"); } + compareArgs.push("--min-samples-per-row", String(options.minSamplesPerRow)); run("node", compareArgs); } const validation = JSON.parse(fs.readFileSync(validationPath, "utf8")); @@ -310,6 +320,7 @@ function main() { allowScrubP95IncreaseMs: options.allowScrubP95IncreaseMs, allowMissingCandidate: options.allowMissingCandidate, failOnCandidateOnly: options.failOnCandidateOnly, + minSamplesPerRow: options.minSamplesPerRow, }, results: { validationPassed: validation.passed === true, diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js index 128d19cd94..d2718b4efa 100644 --- a/scripts/publish-playback-matrix-summary.js +++ b/scripts/publish-playback-matrix-summary.js @@ -99,6 +99,8 @@ function buildSummarySection( markdown += `- Comparison regressions: ${comparison.summary?.regressions ?? "n/a"}\n`; markdown += `- Missing candidate rows: ${comparison.summary?.missingCandidateRows ?? "n/a"}\n\n`; markdown += `- Candidate-only rows: ${comparison.summary?.candidateOnlyRows ?? "n/a"}\n\n`; + markdown += `- Insufficient sample rows: ${comparison.summary?.insufficientSampleRows ?? "n/a"}\n`; + markdown += `- Minimum samples per row: ${comparison.tolerance?.minSamplesPerRow ?? "n/a"}\n\n`; markdown += `- Missing candidate policy: ${comparison.tolerance?.allowMissingCandidate ? "allow" : "fail"}\n`; markdown += `- Candidate-only policy: ${comparison.tolerance?.failOnCandidateOnly ? "fail" : "allow"}\n\n`; } From 703e39798356d01d3f2460bb957e4fad80dff09d Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 23:02:38 +0000 Subject: [PATCH 131/333] improve: cache warmup contiguous coverage between buffer updates Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 16 +++++++++++++--- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index eaff5bff35..caf43652f4 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -360,6 +360,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Comparison now flags rows with insufficient effective sample counts and reports them in markdown/JSON outputs. - `scripts/finalize-playback-matrix.js` forwards minimum sample gating settings into compare stage, and publish summary now surfaces sample gating status fields. +44. **Cached warmup contiguous coverage counts during warmup (2026-02-13)** + - Warmup loop now recomputes contiguous prefetched coverage only when warmup buffer content changes. + - Avoids repeated contiguous scans on idle warmup iterations. + --- ## Root Cause Analysis Archive @@ -497,6 +501,7 @@ Decoder Pipeline: 45. Improved multi-input comparison aggregation by merging per-key metrics across runs and surfacing baseline/candidate run counts per comparison row. 46. Skipped contiguous warmup coverage scans until first warmup frame observation to reduce pre-frame warmup loop scan overhead. 47. Added minimum-sample comparison gating with `--min-samples-per-row`, insufficient-sample reporting, and finalize passthrough support. +48. Cached warmup contiguous coverage values and only recomputed contiguous scan when warmup buffer changed. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -541,6 +546,7 @@ Decoder Pipeline: - `scripts/finalize-playback-matrix.js`: forwards `--min-samples-per-row` into compare stage and captures it in finalize summary settings. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes insufficient sample row count and minimum sample threshold fields. - `crates/editor/src/playback.rs`: warmup loop now skips contiguous coverage scanning until first warmup frame has been observed. +- `crates/editor/src/playback.rs`: warmup contiguous coverage counts are now cached and recomputed only on warmup buffer changes. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. - `crates/editor/src/playback.rs`: added ordered pruning of stale prefetched frames below current playhead to reduce stale buffer overhead during catch-up. - `scripts/publish-playback-matrix-summary.js`: publish flow now surfaces comparison gate status/summary metrics when comparison JSON is provided. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index a934954c8d..972824a75d 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -521,6 +521,8 @@ impl Playback { let warmup_no_frames_timeout = Duration::from_secs(5); let warmup_start = Instant::now(); let mut first_frame_time: Option = None; + let mut warmup_contiguous_prefetched = 0usize; + let mut warmup_buffer_changed = false; info!( warmup_target_frames, warmup_after_first_timeout_ms = warmup_after_first_timeout.as_secs_f64() * 1000.0, @@ -528,12 +530,16 @@ impl Playback { ); while !*stop_rx.borrow() { - let contiguous_prefetched = if first_frame_time.is_some() { - count_contiguous_prefetched_frames( + if first_frame_time.is_some() && warmup_buffer_changed { + warmup_contiguous_prefetched = count_contiguous_prefetched_frames( &prefetch_buffer, frame_number, warmup_target_frames, - ) + ); + warmup_buffer_changed = false; + } + let contiguous_prefetched = if first_frame_time.is_some() { + warmup_contiguous_prefetched } else { 0 }; @@ -560,7 +566,11 @@ impl Playback { tokio::select! { Some(prefetched) = prefetch_rx.recv() => { if prefetched.generation == seek_generation { + let pre_insert_len = prefetch_buffer.len(); insert_prefetched_frame(&mut prefetch_buffer, prefetched, frame_number); + if prefetch_buffer.len() != pre_insert_len { + warmup_buffer_changed = true; + } if first_frame_time.is_none() && !prefetch_buffer.is_empty() { first_frame_time = Some(Instant::now()); } From bc1d20f46d47a1afc419963abdb6878408002b3a Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:02:38 +0000 Subject: [PATCH 132/333] improve: cache warmup contiguous coverage between buffer updates Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 16 +++++++++++++--- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index eaff5bff35..caf43652f4 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -360,6 +360,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Comparison now flags rows with insufficient effective sample counts and reports them in markdown/JSON outputs. - `scripts/finalize-playback-matrix.js` forwards minimum sample gating settings into compare stage, and publish summary now surfaces sample gating status fields. +44. **Cached warmup contiguous coverage counts during warmup (2026-02-13)** + - Warmup loop now recomputes contiguous prefetched coverage only when warmup buffer content changes. + - Avoids repeated contiguous scans on idle warmup iterations. + --- ## Root Cause Analysis Archive @@ -497,6 +501,7 @@ Decoder Pipeline: 45. Improved multi-input comparison aggregation by merging per-key metrics across runs and surfacing baseline/candidate run counts per comparison row. 46. Skipped contiguous warmup coverage scans until first warmup frame observation to reduce pre-frame warmup loop scan overhead. 47. Added minimum-sample comparison gating with `--min-samples-per-row`, insufficient-sample reporting, and finalize passthrough support. +48. Cached warmup contiguous coverage values and only recomputed contiguous scan when warmup buffer changed. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -541,6 +546,7 @@ Decoder Pipeline: - `scripts/finalize-playback-matrix.js`: forwards `--min-samples-per-row` into compare stage and captures it in finalize summary settings. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes insufficient sample row count and minimum sample threshold fields. - `crates/editor/src/playback.rs`: warmup loop now skips contiguous coverage scanning until first warmup frame has been observed. +- `crates/editor/src/playback.rs`: warmup contiguous coverage counts are now cached and recomputed only on warmup buffer changes. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. - `crates/editor/src/playback.rs`: added ordered pruning of stale prefetched frames below current playhead to reduce stale buffer overhead during catch-up. - `scripts/publish-playback-matrix-summary.js`: publish flow now surfaces comparison gate status/summary metrics when comparison JSON is provided. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index a934954c8d..972824a75d 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -521,6 +521,8 @@ impl Playback { let warmup_no_frames_timeout = Duration::from_secs(5); let warmup_start = Instant::now(); let mut first_frame_time: Option = None; + let mut warmup_contiguous_prefetched = 0usize; + let mut warmup_buffer_changed = false; info!( warmup_target_frames, warmup_after_first_timeout_ms = warmup_after_first_timeout.as_secs_f64() * 1000.0, @@ -528,12 +530,16 @@ impl Playback { ); while !*stop_rx.borrow() { - let contiguous_prefetched = if first_frame_time.is_some() { - count_contiguous_prefetched_frames( + if first_frame_time.is_some() && warmup_buffer_changed { + warmup_contiguous_prefetched = count_contiguous_prefetched_frames( &prefetch_buffer, frame_number, warmup_target_frames, - ) + ); + warmup_buffer_changed = false; + } + let contiguous_prefetched = if first_frame_time.is_some() { + warmup_contiguous_prefetched } else { 0 }; @@ -560,7 +566,11 @@ impl Playback { tokio::select! { Some(prefetched) = prefetch_rx.recv() => { if prefetched.generation == seek_generation { + let pre_insert_len = prefetch_buffer.len(); insert_prefetched_frame(&mut prefetch_buffer, prefetched, frame_number); + if prefetch_buffer.len() != pre_insert_len { + warmup_buffer_changed = true; + } if first_frame_time.is_none() && !prefetch_buffer.is_empty() { first_frame_time = Some(Instant::now()); } From 766dabf941f2aacd3209b2abf2110699d6a85f4f Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 23:04:04 +0000 Subject: [PATCH 133/333] improve: expose comparison gate diagnostics for automation Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 7 ++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 2 + scripts/compare-playback-benchmark-runs.js | 45 +++++++++++++++++----- scripts/publish-playback-matrix-summary.js | 6 +++ 5 files changed, 52 insertions(+), 9 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 32a27d92b6..b535eb5464 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -106,6 +106,7 @@ node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-res Comparison output reports both baseline rows missing in candidate and candidate-only rows not present in baseline. Comparison table also reports baseline/candidate run counts per row when multiple JSON inputs contribute to the same key. +Comparison JSON summary includes `failureReasons` and `gateOutcomes` fields for automation-friendly gate diagnostics. ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index caf43652f4..da66af7eb7 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -364,6 +364,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Warmup loop now recomputes contiguous prefetched coverage only when warmup buffer content changes. - Avoids repeated contiguous scans on idle warmup iterations. +45. **Added explicit comparison gate diagnostics in JSON and published summaries (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` now emits `failureReasons` and `gateOutcomes` in summary JSON. + - `scripts/publish-playback-matrix-summary.js` now surfaces comparison failure reasons when present. + --- ## Root Cause Analysis Archive @@ -502,6 +506,7 @@ Decoder Pipeline: 46. Skipped contiguous warmup coverage scans until first warmup frame observation to reduce pre-frame warmup loop scan overhead. 47. Added minimum-sample comparison gating with `--min-samples-per-row`, insufficient-sample reporting, and finalize passthrough support. 48. Cached warmup contiguous coverage values and only recomputed contiguous scan when warmup buffer changed. +49. Added comparison JSON gate diagnostics (`failureReasons`, `gateOutcomes`) and surfaced failure reasons in published summary output. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -545,6 +550,8 @@ Decoder Pipeline: - `scripts/compare-playback-benchmark-runs.js`: added `--min-samples-per-row`, insufficient-sample row reporting, and sample gate fields in markdown/JSON outputs. - `scripts/finalize-playback-matrix.js`: forwards `--min-samples-per-row` into compare stage and captures it in finalize summary settings. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes insufficient sample row count and minimum sample threshold fields. +- `scripts/compare-playback-benchmark-runs.js`: comparison JSON summary now includes explicit `failureReasons` and `gateOutcomes` fields. +- `scripts/publish-playback-matrix-summary.js`: published comparison status now includes comparison failure reasons when present. - `crates/editor/src/playback.rs`: warmup loop now skips contiguous coverage scanning until first warmup frame has been observed. - `crates/editor/src/playback.rs`: warmup contiguous coverage counts are now cached and recomputed only on warmup buffer changes. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index c1f1c7f2f1..9cb6957e3e 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -186,6 +186,8 @@ pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate # when multiple inputs are provided, comparison output includes baseline/candidate run counts per row # optional: require minimum sample count per compared row pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --min-samples-per-row 3 + +# comparison JSON includes failureReasons and gateOutcomes for automation ``` ## Evidence checklist diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index 9d788ba1ee..f385b8bf37 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -396,6 +396,31 @@ function buildJsonOutput( const regressions = comparisons.filter( (entry) => entry.regressions.length > 0, ); + const hasMissingCandidateRows = missingCandidateRows.length > 0; + const hasCandidateOnlyRows = candidateOnlyRows.length > 0; + const hasInsufficientSamples = insufficientSampleRows.length > 0; + const hasMetricRegressions = regressions.some((entry) => + entry.regressions.some( + (issue) => + issue.startsWith("fps_drop=") || + issue.startsWith("startup_increase=") || + issue.startsWith("scrub_p95_increase="), + ), + ); + const failureReasons = []; + if (hasMetricRegressions) { + failureReasons.push("metric_regression"); + } + if (hasInsufficientSamples) { + failureReasons.push("insufficient_samples"); + } + if (!options.allowMissingCandidate && hasMissingCandidateRows) { + failureReasons.push("missing_candidate_rows"); + } + if (options.failOnCandidateOnly && hasCandidateOnlyRows) { + failureReasons.push("candidate_only_rows"); + } + const passed = failureReasons.length === 0; return { generatedAt: new Date().toISOString(), tolerance: { @@ -412,10 +437,16 @@ function buildJsonOutput( missingCandidateRows: missingCandidateRows.length, candidateOnlyRows: candidateOnlyRows.length, insufficientSampleRows: insufficientSampleRows.length, - passed: - regressions.length === 0 && - (options.allowMissingCandidate || missingCandidateRows.length === 0) && - (!options.failOnCandidateOnly || candidateOnlyRows.length === 0), + passed, + failureReasons, + gateOutcomes: { + metricRegressions: !hasMetricRegressions, + insufficientSamples: !hasInsufficientSamples, + missingCandidateRows: + options.allowMissingCandidate || !hasMissingCandidateRows, + candidateOnlyRows: + !options.failOnCandidateOnly || !hasCandidateOnlyRows, + }, }, regressions, missingCandidateRows, @@ -489,11 +520,7 @@ function main() { console.log(`Wrote comparison JSON to ${options.outputJson}`); } - if ( - comparisons.some((entry) => entry.regressions.length > 0) || - (!options.allowMissingCandidate && missingCandidateRows.length > 0) || - (options.failOnCandidateOnly && candidateOnlyRows.length > 0) - ) { + if (!outputJson.summary.passed) { process.exit(1); } } diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js index d2718b4efa..d3c254d8c5 100644 --- a/scripts/publish-playback-matrix-summary.js +++ b/scripts/publish-playback-matrix-summary.js @@ -103,6 +103,12 @@ function buildSummarySection( markdown += `- Minimum samples per row: ${comparison.tolerance?.minSamplesPerRow ?? "n/a"}\n\n`; markdown += `- Missing candidate policy: ${comparison.tolerance?.allowMissingCandidate ? "allow" : "fail"}\n`; markdown += `- Candidate-only policy: ${comparison.tolerance?.failOnCandidateOnly ? "fail" : "allow"}\n\n`; + const failureReasons = Array.isArray(comparison.summary?.failureReasons) + ? comparison.summary.failureReasons + : []; + if (failureReasons.length > 0) { + markdown += `- Comparison failure reasons: ${failureReasons.join(", ")}\n\n`; + } } if (finalizeSummaryJson) { const finalizeSummary = JSON.parse(finalizeSummaryJson); From 625efa6cdb3a904357fa87e671997a8d577a6adf Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:04:04 +0000 Subject: [PATCH 134/333] improve: expose comparison gate diagnostics for automation Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 7 ++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 2 + scripts/compare-playback-benchmark-runs.js | 45 +++++++++++++++++----- scripts/publish-playback-matrix-summary.js | 6 +++ 5 files changed, 52 insertions(+), 9 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 32a27d92b6..b535eb5464 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -106,6 +106,7 @@ node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-res Comparison output reports both baseline rows missing in candidate and candidate-only rows not present in baseline. Comparison table also reports baseline/candidate run counts per row when multiple JSON inputs contribute to the same key. +Comparison JSON summary includes `failureReasons` and `gateOutcomes` fields for automation-friendly gate diagnostics. ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index caf43652f4..da66af7eb7 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -364,6 +364,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Warmup loop now recomputes contiguous prefetched coverage only when warmup buffer content changes. - Avoids repeated contiguous scans on idle warmup iterations. +45. **Added explicit comparison gate diagnostics in JSON and published summaries (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` now emits `failureReasons` and `gateOutcomes` in summary JSON. + - `scripts/publish-playback-matrix-summary.js` now surfaces comparison failure reasons when present. + --- ## Root Cause Analysis Archive @@ -502,6 +506,7 @@ Decoder Pipeline: 46. Skipped contiguous warmup coverage scans until first warmup frame observation to reduce pre-frame warmup loop scan overhead. 47. Added minimum-sample comparison gating with `--min-samples-per-row`, insufficient-sample reporting, and finalize passthrough support. 48. Cached warmup contiguous coverage values and only recomputed contiguous scan when warmup buffer changed. +49. Added comparison JSON gate diagnostics (`failureReasons`, `gateOutcomes`) and surfaced failure reasons in published summary output. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -545,6 +550,8 @@ Decoder Pipeline: - `scripts/compare-playback-benchmark-runs.js`: added `--min-samples-per-row`, insufficient-sample row reporting, and sample gate fields in markdown/JSON outputs. - `scripts/finalize-playback-matrix.js`: forwards `--min-samples-per-row` into compare stage and captures it in finalize summary settings. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes insufficient sample row count and minimum sample threshold fields. +- `scripts/compare-playback-benchmark-runs.js`: comparison JSON summary now includes explicit `failureReasons` and `gateOutcomes` fields. +- `scripts/publish-playback-matrix-summary.js`: published comparison status now includes comparison failure reasons when present. - `crates/editor/src/playback.rs`: warmup loop now skips contiguous coverage scanning until first warmup frame has been observed. - `crates/editor/src/playback.rs`: warmup contiguous coverage counts are now cached and recomputed only on warmup buffer changes. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index c1f1c7f2f1..9cb6957e3e 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -186,6 +186,8 @@ pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate # when multiple inputs are provided, comparison output includes baseline/candidate run counts per row # optional: require minimum sample count per compared row pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --min-samples-per-row 3 + +# comparison JSON includes failureReasons and gateOutcomes for automation ``` ## Evidence checklist diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index 9d788ba1ee..f385b8bf37 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -396,6 +396,31 @@ function buildJsonOutput( const regressions = comparisons.filter( (entry) => entry.regressions.length > 0, ); + const hasMissingCandidateRows = missingCandidateRows.length > 0; + const hasCandidateOnlyRows = candidateOnlyRows.length > 0; + const hasInsufficientSamples = insufficientSampleRows.length > 0; + const hasMetricRegressions = regressions.some((entry) => + entry.regressions.some( + (issue) => + issue.startsWith("fps_drop=") || + issue.startsWith("startup_increase=") || + issue.startsWith("scrub_p95_increase="), + ), + ); + const failureReasons = []; + if (hasMetricRegressions) { + failureReasons.push("metric_regression"); + } + if (hasInsufficientSamples) { + failureReasons.push("insufficient_samples"); + } + if (!options.allowMissingCandidate && hasMissingCandidateRows) { + failureReasons.push("missing_candidate_rows"); + } + if (options.failOnCandidateOnly && hasCandidateOnlyRows) { + failureReasons.push("candidate_only_rows"); + } + const passed = failureReasons.length === 0; return { generatedAt: new Date().toISOString(), tolerance: { @@ -412,10 +437,16 @@ function buildJsonOutput( missingCandidateRows: missingCandidateRows.length, candidateOnlyRows: candidateOnlyRows.length, insufficientSampleRows: insufficientSampleRows.length, - passed: - regressions.length === 0 && - (options.allowMissingCandidate || missingCandidateRows.length === 0) && - (!options.failOnCandidateOnly || candidateOnlyRows.length === 0), + passed, + failureReasons, + gateOutcomes: { + metricRegressions: !hasMetricRegressions, + insufficientSamples: !hasInsufficientSamples, + missingCandidateRows: + options.allowMissingCandidate || !hasMissingCandidateRows, + candidateOnlyRows: + !options.failOnCandidateOnly || !hasCandidateOnlyRows, + }, }, regressions, missingCandidateRows, @@ -489,11 +520,7 @@ function main() { console.log(`Wrote comparison JSON to ${options.outputJson}`); } - if ( - comparisons.some((entry) => entry.regressions.length > 0) || - (!options.allowMissingCandidate && missingCandidateRows.length > 0) || - (options.failOnCandidateOnly && candidateOnlyRows.length > 0) - ) { + if (!outputJson.summary.passed) { process.exit(1); } } diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js index d2718b4efa..d3c254d8c5 100644 --- a/scripts/publish-playback-matrix-summary.js +++ b/scripts/publish-playback-matrix-summary.js @@ -103,6 +103,12 @@ function buildSummarySection( markdown += `- Minimum samples per row: ${comparison.tolerance?.minSamplesPerRow ?? "n/a"}\n\n`; markdown += `- Missing candidate policy: ${comparison.tolerance?.allowMissingCandidate ? "allow" : "fail"}\n`; markdown += `- Candidate-only policy: ${comparison.tolerance?.failOnCandidateOnly ? "fail" : "allow"}\n\n`; + const failureReasons = Array.isArray(comparison.summary?.failureReasons) + ? comparison.summary.failureReasons + : []; + if (failureReasons.length > 0) { + markdown += `- Comparison failure reasons: ${failureReasons.join(", ")}\n\n`; + } } if (finalizeSummaryJson) { const finalizeSummary = JSON.parse(finalizeSummaryJson); From 87fa39164bec334bc3f3f37b40dc41da07cec6e1 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 23:07:15 +0000 Subject: [PATCH 135/333] fix: apply minimum sample gating only to comparable metrics Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 7 ++++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 1 + scripts/compare-playback-benchmark-runs.js | 27 +++++++++++++++------- 4 files changed, 28 insertions(+), 8 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index b535eb5464..cb3787efd8 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -107,6 +107,7 @@ node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-res Comparison output reports both baseline rows missing in candidate and candidate-only rows not present in baseline. Comparison table also reports baseline/candidate run counts per row when multiple JSON inputs contribute to the same key. Comparison JSON summary includes `failureReasons` and `gateOutcomes` fields for automation-friendly gate diagnostics. +Minimum sample gating is applied against metrics that are actually comparable for the row (for example, scrub samples are not required for rows with no scrub metric comparison). ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index da66af7eb7..c318b0935c 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -360,6 +360,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Comparison now flags rows with insufficient effective sample counts and reports them in markdown/JSON outputs. - `scripts/finalize-playback-matrix.js` forwards minimum sample gating settings into compare stage, and publish summary now surfaces sample gating status fields. +44. **Fixed sample gating semantics for non-comparable metrics (2026-02-13)** + - Minimum sample checks now only consider metrics that are actually comparable for the row. + - Prevents scrub sample requirements from incorrectly failing non-scrub comparison rows. + - Comparison output now includes compared metric count and effective sample count per row. + 44. **Cached warmup contiguous coverage counts during warmup (2026-02-13)** - Warmup loop now recomputes contiguous prefetched coverage only when warmup buffer content changes. - Avoids repeated contiguous scans on idle warmup iterations. @@ -507,6 +512,7 @@ Decoder Pipeline: 47. Added minimum-sample comparison gating with `--min-samples-per-row`, insufficient-sample reporting, and finalize passthrough support. 48. Cached warmup contiguous coverage values and only recomputed contiguous scan when warmup buffer changed. 49. Added comparison JSON gate diagnostics (`failureReasons`, `gateOutcomes`) and surfaced failure reasons in published summary output. +50. Corrected minimum sample gating semantics to only count comparable metrics and added compared-metric/effective-sample columns in comparison output. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -548,6 +554,7 @@ Decoder Pipeline: - `scripts/finalize-playback-matrix.js`: finalize now writes summary JSON before publish and passes `--finalize-summary-json` into publish flow. - `scripts/publish-playback-matrix-summary.js`: publish flow now supports optional finalize summary JSON input and surfaces finalize source/validation metadata. - `scripts/compare-playback-benchmark-runs.js`: added `--min-samples-per-row`, insufficient-sample row reporting, and sample gate fields in markdown/JSON outputs. +- `scripts/compare-playback-benchmark-runs.js`: minimum sample checks now apply only to metrics that are comparable for each row; output now includes compared metric count and effective sample count columns. - `scripts/finalize-playback-matrix.js`: forwards `--min-samples-per-row` into compare stage and captures it in finalize summary settings. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes insufficient sample row count and minimum sample threshold fields. - `scripts/compare-playback-benchmark-runs.js`: comparison JSON summary now includes explicit `failureReasons` and `gateOutcomes` fields. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 9cb6957e3e..5f5d9cf203 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -188,6 +188,7 @@ pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --min-samples-per-row 3 # comparison JSON includes failureReasons and gateOutcomes for automation +# minimum sample gating uses metrics that are actually comparable for each row ``` ## Evidence checklist diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index f385b8bf37..e102d3f96f 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -270,11 +270,20 @@ function compareMetrics(baselineRows, candidateRows, options) { baseline.scrubSampleCount, candidate.scrubSampleCount, ); - const effectiveSampleCount = Math.min( - fpsMinSamples, - startupMinSamples, - scrubMinSamples, - ); + const comparableSampleCounts = []; + if (fpsDelta !== null) { + comparableSampleCounts.push(fpsMinSamples); + } + if (startupDelta !== null) { + comparableSampleCounts.push(startupMinSamples); + } + if (scrubDelta !== null) { + comparableSampleCounts.push(scrubMinSamples); + } + const effectiveSampleCount = + comparableSampleCounts.length > 0 + ? Math.min(...comparableSampleCounts) + : 0; if (effectiveSampleCount < options.minSamplesPerRow) { insufficientSampleRows.push({ platform: candidate.platform, @@ -314,6 +323,7 @@ function compareMetrics(baselineRows, candidateRows, options) { fpsMinSamples, startupMinSamples, scrubMinSamples, + comparedMetricCount: comparableSampleCounts.length, effectiveSampleCount, fpsDelta, startupDelta, @@ -377,10 +387,11 @@ function toMarkdown( md += "\n"; } md += - "| Platform | GPU | Scenario | Recording | Format | B Runs | C Runs | F Samples | S Samples | Q Samples | FPS Δ | Startup Δ (ms) | Scrub p95 Δ (ms) | Regression |\n"; - md += "|---|---|---|---|---|---:|---:|---:|---:|---:|---:|---:|---:|---|\n"; + "| Platform | GPU | Scenario | Recording | Format | B Runs | C Runs | F Samples | S Samples | Q Samples | Metrics | Effective Samples | FPS Δ | Startup Δ (ms) | Scrub p95 Δ (ms) | Regression |\n"; + md += + "|---|---|---|---|---|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---|\n"; for (const row of comparisons) { - md += `| ${row.platform} | ${row.gpu} | ${row.scenario} | ${row.recording} | ${row.format} | ${row.baselineReportCount} | ${row.candidateReportCount} | ${row.fpsMinSamples} | ${row.startupMinSamples} | ${row.scrubMinSamples} | ${formatNumber(row.fpsDelta)} | ${formatNumber(row.startupDelta)} | ${formatNumber(row.scrubDelta)} | ${row.regressions.length > 0 ? row.regressions.join(", ") : "none"} |\n`; + md += `| ${row.platform} | ${row.gpu} | ${row.scenario} | ${row.recording} | ${row.format} | ${row.baselineReportCount} | ${row.candidateReportCount} | ${row.fpsMinSamples} | ${row.startupMinSamples} | ${row.scrubMinSamples} | ${row.comparedMetricCount} | ${row.effectiveSampleCount} | ${formatNumber(row.fpsDelta)} | ${formatNumber(row.startupDelta)} | ${formatNumber(row.scrubDelta)} | ${row.regressions.length > 0 ? row.regressions.join(", ") : "none"} |\n`; } md += "\n"; return md; From b911d5d3ebcf06bc92d16683a9de6298d05c3821 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:07:15 +0000 Subject: [PATCH 136/333] fix: apply minimum sample gating only to comparable metrics Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 7 ++++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 1 + scripts/compare-playback-benchmark-runs.js | 27 +++++++++++++++------- 4 files changed, 28 insertions(+), 8 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index b535eb5464..cb3787efd8 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -107,6 +107,7 @@ node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-res Comparison output reports both baseline rows missing in candidate and candidate-only rows not present in baseline. Comparison table also reports baseline/candidate run counts per row when multiple JSON inputs contribute to the same key. Comparison JSON summary includes `failureReasons` and `gateOutcomes` fields for automation-friendly gate diagnostics. +Minimum sample gating is applied against metrics that are actually comparable for the row (for example, scrub samples are not required for rows with no scrub metric comparison). ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index da66af7eb7..c318b0935c 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -360,6 +360,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Comparison now flags rows with insufficient effective sample counts and reports them in markdown/JSON outputs. - `scripts/finalize-playback-matrix.js` forwards minimum sample gating settings into compare stage, and publish summary now surfaces sample gating status fields. +44. **Fixed sample gating semantics for non-comparable metrics (2026-02-13)** + - Minimum sample checks now only consider metrics that are actually comparable for the row. + - Prevents scrub sample requirements from incorrectly failing non-scrub comparison rows. + - Comparison output now includes compared metric count and effective sample count per row. + 44. **Cached warmup contiguous coverage counts during warmup (2026-02-13)** - Warmup loop now recomputes contiguous prefetched coverage only when warmup buffer content changes. - Avoids repeated contiguous scans on idle warmup iterations. @@ -507,6 +512,7 @@ Decoder Pipeline: 47. Added minimum-sample comparison gating with `--min-samples-per-row`, insufficient-sample reporting, and finalize passthrough support. 48. Cached warmup contiguous coverage values and only recomputed contiguous scan when warmup buffer changed. 49. Added comparison JSON gate diagnostics (`failureReasons`, `gateOutcomes`) and surfaced failure reasons in published summary output. +50. Corrected minimum sample gating semantics to only count comparable metrics and added compared-metric/effective-sample columns in comparison output. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -548,6 +554,7 @@ Decoder Pipeline: - `scripts/finalize-playback-matrix.js`: finalize now writes summary JSON before publish and passes `--finalize-summary-json` into publish flow. - `scripts/publish-playback-matrix-summary.js`: publish flow now supports optional finalize summary JSON input and surfaces finalize source/validation metadata. - `scripts/compare-playback-benchmark-runs.js`: added `--min-samples-per-row`, insufficient-sample row reporting, and sample gate fields in markdown/JSON outputs. +- `scripts/compare-playback-benchmark-runs.js`: minimum sample checks now apply only to metrics that are comparable for each row; output now includes compared metric count and effective sample count columns. - `scripts/finalize-playback-matrix.js`: forwards `--min-samples-per-row` into compare stage and captures it in finalize summary settings. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes insufficient sample row count and minimum sample threshold fields. - `scripts/compare-playback-benchmark-runs.js`: comparison JSON summary now includes explicit `failureReasons` and `gateOutcomes` fields. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 9cb6957e3e..5f5d9cf203 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -188,6 +188,7 @@ pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --min-samples-per-row 3 # comparison JSON includes failureReasons and gateOutcomes for automation +# minimum sample gating uses metrics that are actually comparable for each row ``` ## Evidence checklist diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index f385b8bf37..e102d3f96f 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -270,11 +270,20 @@ function compareMetrics(baselineRows, candidateRows, options) { baseline.scrubSampleCount, candidate.scrubSampleCount, ); - const effectiveSampleCount = Math.min( - fpsMinSamples, - startupMinSamples, - scrubMinSamples, - ); + const comparableSampleCounts = []; + if (fpsDelta !== null) { + comparableSampleCounts.push(fpsMinSamples); + } + if (startupDelta !== null) { + comparableSampleCounts.push(startupMinSamples); + } + if (scrubDelta !== null) { + comparableSampleCounts.push(scrubMinSamples); + } + const effectiveSampleCount = + comparableSampleCounts.length > 0 + ? Math.min(...comparableSampleCounts) + : 0; if (effectiveSampleCount < options.minSamplesPerRow) { insufficientSampleRows.push({ platform: candidate.platform, @@ -314,6 +323,7 @@ function compareMetrics(baselineRows, candidateRows, options) { fpsMinSamples, startupMinSamples, scrubMinSamples, + comparedMetricCount: comparableSampleCounts.length, effectiveSampleCount, fpsDelta, startupDelta, @@ -377,10 +387,11 @@ function toMarkdown( md += "\n"; } md += - "| Platform | GPU | Scenario | Recording | Format | B Runs | C Runs | F Samples | S Samples | Q Samples | FPS Δ | Startup Δ (ms) | Scrub p95 Δ (ms) | Regression |\n"; - md += "|---|---|---|---|---|---:|---:|---:|---:|---:|---:|---:|---:|---|\n"; + "| Platform | GPU | Scenario | Recording | Format | B Runs | C Runs | F Samples | S Samples | Q Samples | Metrics | Effective Samples | FPS Δ | Startup Δ (ms) | Scrub p95 Δ (ms) | Regression |\n"; + md += + "|---|---|---|---|---|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---|\n"; for (const row of comparisons) { - md += `| ${row.platform} | ${row.gpu} | ${row.scenario} | ${row.recording} | ${row.format} | ${row.baselineReportCount} | ${row.candidateReportCount} | ${row.fpsMinSamples} | ${row.startupMinSamples} | ${row.scrubMinSamples} | ${formatNumber(row.fpsDelta)} | ${formatNumber(row.startupDelta)} | ${formatNumber(row.scrubDelta)} | ${row.regressions.length > 0 ? row.regressions.join(", ") : "none"} |\n`; + md += `| ${row.platform} | ${row.gpu} | ${row.scenario} | ${row.recording} | ${row.format} | ${row.baselineReportCount} | ${row.candidateReportCount} | ${row.fpsMinSamples} | ${row.startupMinSamples} | ${row.scrubMinSamples} | ${row.comparedMetricCount} | ${row.effectiveSampleCount} | ${formatNumber(row.fpsDelta)} | ${formatNumber(row.startupDelta)} | ${formatNumber(row.scrubDelta)} | ${row.regressions.length > 0 ? row.regressions.join(", ") : "none"} |\n`; } md += "\n"; return md; From ee2c5a9aed8ba8f5dfaa319d9c0e31debf78eb1f Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 23:08:58 +0000 Subject: [PATCH 137/333] improve: include comparison diagnostics in finalize and publish summaries Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 1 + scripts/finalize-playback-matrix.js | 2 ++ scripts/publish-playback-matrix-summary.js | 8 ++++++++ 5 files changed, 18 insertions(+) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index cb3787efd8..cab9ae9ea8 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -86,6 +86,7 @@ node scripts/finalize-playback-matrix.js --input /path/to/json-results --output- node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 --publish-target /workspace/crates/editor/PLAYBACK-BENCHMARKS.md Finalize summary JSON includes generated timestamp, artifact paths, settings, pass/fail flags, and git branch/commit metadata when available. +Finalize summary JSON also carries comparison failure reasons and gate outcomes when comparison is enabled. # Publish matrix artifacts into this benchmark history node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --bottlenecks-md /tmp/playback-matrix-final/playback-bottlenecks.md diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index c318b0935c..5f6a621468 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -365,6 +365,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Prevents scrub sample requirements from incorrectly failing non-scrub comparison rows. - Comparison output now includes compared metric count and effective sample count per row. +45. **Extended finalize summary comparison diagnostics (2026-02-13)** + - `scripts/finalize-playback-matrix.js` now includes comparison failure reasons and gate outcomes in summary results. + - `scripts/publish-playback-matrix-summary.js` now surfaces finalize comparison failure reasons when present. + 44. **Cached warmup contiguous coverage counts during warmup (2026-02-13)** - Warmup loop now recomputes contiguous prefetched coverage only when warmup buffer content changes. - Avoids repeated contiguous scans on idle warmup iterations. @@ -513,6 +517,7 @@ Decoder Pipeline: 48. Cached warmup contiguous coverage values and only recomputed contiguous scan when warmup buffer changed. 49. Added comparison JSON gate diagnostics (`failureReasons`, `gateOutcomes`) and surfaced failure reasons in published summary output. 50. Corrected minimum sample gating semantics to only count comparable metrics and added compared-metric/effective-sample columns in comparison output. +51. Extended finalize summary and publish output with comparison failure reasons and gate outcome metadata. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -559,6 +564,7 @@ Decoder Pipeline: - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes insufficient sample row count and minimum sample threshold fields. - `scripts/compare-playback-benchmark-runs.js`: comparison JSON summary now includes explicit `failureReasons` and `gateOutcomes` fields. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes comparison failure reasons when present. +- `scripts/finalize-playback-matrix.js`: finalize summary now includes comparison failure reasons and gate outcome fields in results metadata. - `crates/editor/src/playback.rs`: warmup loop now skips contiguous coverage scanning until first warmup frame has been observed. - `crates/editor/src/playback.rs`: warmup contiguous coverage counts are now cached and recomputed only on warmup buffer changes. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 5f5d9cf203..001404ac0b 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -115,6 +115,7 @@ pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-di ``` Finalize summary JSON includes artifact paths, gate outcomes, settings, and git branch/commit metadata when available. +When comparison is enabled, finalize summary JSON also includes comparison failure reasons and gate outcome details. Include optimization thresholds when finalizing: diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index 71babdd9e2..f4c3107a3c 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -325,6 +325,8 @@ function main() { results: { validationPassed: validation.passed === true, comparisonPassed: comparison ? comparison.summary?.passed === true : null, + comparisonFailureReasons: comparison?.summary?.failureReasons ?? null, + comparisonGateOutcomes: comparison?.summary?.gateOutcomes ?? null, }, }; fs.writeFileSync(summaryJsonPath, JSON.stringify(summary, null, 2), "utf8"); diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js index d3c254d8c5..14f389da15 100644 --- a/scripts/publish-playback-matrix-summary.js +++ b/scripts/publish-playback-matrix-summary.js @@ -118,6 +118,14 @@ function buildSummarySection( if (finalizeSummary.results?.comparisonPassed !== null) { markdown += `- Finalize comparison passed: ${finalizeSummary.results?.comparisonPassed === true ? "true" : "false"}\n`; } + const finalizeFailureReasons = Array.isArray( + finalizeSummary.results?.comparisonFailureReasons, + ) + ? finalizeSummary.results.comparisonFailureReasons + : []; + if (finalizeFailureReasons.length > 0) { + markdown += `- Finalize comparison failure reasons: ${finalizeFailureReasons.join(", ")}\n`; + } markdown += "\n"; } From 1b105c7a00937258df7583fe4cd62ddcd7ea2299 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:08:58 +0000 Subject: [PATCH 138/333] improve: include comparison diagnostics in finalize and publish summaries Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 1 + scripts/finalize-playback-matrix.js | 2 ++ scripts/publish-playback-matrix-summary.js | 8 ++++++++ 5 files changed, 18 insertions(+) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index cb3787efd8..cab9ae9ea8 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -86,6 +86,7 @@ node scripts/finalize-playback-matrix.js --input /path/to/json-results --output- node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 --publish-target /workspace/crates/editor/PLAYBACK-BENCHMARKS.md Finalize summary JSON includes generated timestamp, artifact paths, settings, pass/fail flags, and git branch/commit metadata when available. +Finalize summary JSON also carries comparison failure reasons and gate outcomes when comparison is enabled. # Publish matrix artifacts into this benchmark history node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --bottlenecks-md /tmp/playback-matrix-final/playback-bottlenecks.md diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index c318b0935c..5f6a621468 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -365,6 +365,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Prevents scrub sample requirements from incorrectly failing non-scrub comparison rows. - Comparison output now includes compared metric count and effective sample count per row. +45. **Extended finalize summary comparison diagnostics (2026-02-13)** + - `scripts/finalize-playback-matrix.js` now includes comparison failure reasons and gate outcomes in summary results. + - `scripts/publish-playback-matrix-summary.js` now surfaces finalize comparison failure reasons when present. + 44. **Cached warmup contiguous coverage counts during warmup (2026-02-13)** - Warmup loop now recomputes contiguous prefetched coverage only when warmup buffer content changes. - Avoids repeated contiguous scans on idle warmup iterations. @@ -513,6 +517,7 @@ Decoder Pipeline: 48. Cached warmup contiguous coverage values and only recomputed contiguous scan when warmup buffer changed. 49. Added comparison JSON gate diagnostics (`failureReasons`, `gateOutcomes`) and surfaced failure reasons in published summary output. 50. Corrected minimum sample gating semantics to only count comparable metrics and added compared-metric/effective-sample columns in comparison output. +51. Extended finalize summary and publish output with comparison failure reasons and gate outcome metadata. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -559,6 +564,7 @@ Decoder Pipeline: - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes insufficient sample row count and minimum sample threshold fields. - `scripts/compare-playback-benchmark-runs.js`: comparison JSON summary now includes explicit `failureReasons` and `gateOutcomes` fields. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes comparison failure reasons when present. +- `scripts/finalize-playback-matrix.js`: finalize summary now includes comparison failure reasons and gate outcome fields in results metadata. - `crates/editor/src/playback.rs`: warmup loop now skips contiguous coverage scanning until first warmup frame has been observed. - `crates/editor/src/playback.rs`: warmup contiguous coverage counts are now cached and recomputed only on warmup buffer changes. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 5f5d9cf203..001404ac0b 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -115,6 +115,7 @@ pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-di ``` Finalize summary JSON includes artifact paths, gate outcomes, settings, and git branch/commit metadata when available. +When comparison is enabled, finalize summary JSON also includes comparison failure reasons and gate outcome details. Include optimization thresholds when finalizing: diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index 71babdd9e2..f4c3107a3c 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -325,6 +325,8 @@ function main() { results: { validationPassed: validation.passed === true, comparisonPassed: comparison ? comparison.summary?.passed === true : null, + comparisonFailureReasons: comparison?.summary?.failureReasons ?? null, + comparisonGateOutcomes: comparison?.summary?.gateOutcomes ?? null, }, }; fs.writeFileSync(summaryJsonPath, JSON.stringify(summary, null, 2), "utf8"); diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js index d3c254d8c5..14f389da15 100644 --- a/scripts/publish-playback-matrix-summary.js +++ b/scripts/publish-playback-matrix-summary.js @@ -118,6 +118,14 @@ function buildSummarySection( if (finalizeSummary.results?.comparisonPassed !== null) { markdown += `- Finalize comparison passed: ${finalizeSummary.results?.comparisonPassed === true ? "true" : "false"}\n`; } + const finalizeFailureReasons = Array.isArray( + finalizeSummary.results?.comparisonFailureReasons, + ) + ? finalizeSummary.results.comparisonFailureReasons + : []; + if (finalizeFailureReasons.length > 0) { + markdown += `- Finalize comparison failure reasons: ${finalizeFailureReasons.join(", ")}\n`; + } markdown += "\n"; } From 2fb1b74e4f348641bb39df11d76acd9f34651129 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 23:13:48 +0000 Subject: [PATCH 139/333] improve: add parse-error gating and parse stats to matrix comparisons Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 3 + crates/editor/PLAYBACK-FINDINGS.md | 9 ++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 5 ++ scripts/compare-playback-benchmark-runs.js | 97 ++++++++++++++++++++-- scripts/finalize-playback-matrix.js | 11 ++- scripts/publish-playback-matrix-summary.js | 3 + 6 files changed, 122 insertions(+), 6 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index cab9ae9ea8..ea1ce8c465 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -82,6 +82,7 @@ node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --ou node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-missing-candidate node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-candidate-only node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --min-samples-per-row 3 +node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-parse-errors node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --output-json /tmp/playback-matrix-final/playback-finalize-summary.json node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 --publish-target /workspace/crates/editor/PLAYBACK-BENCHMARKS.md @@ -103,12 +104,14 @@ node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-res node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --allow-missing-candidate node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-candidate-only node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --min-samples-per-row 3 +node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-parse-errors node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-comparison.md --output-json /tmp/playback-comparison.json Comparison output reports both baseline rows missing in candidate and candidate-only rows not present in baseline. Comparison table also reports baseline/candidate run counts per row when multiple JSON inputs contribute to the same key. Comparison JSON summary includes `failureReasons` and `gateOutcomes` fields for automation-friendly gate diagnostics. Minimum sample gating is applied against metrics that are actually comparable for the row (for example, scrub samples are not required for rows with no scrub metric comparison). +Comparison JSON also includes baseline/candidate file parsing stats and optional parse-error gating support. ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 5f6a621468..c973dd166a 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -377,6 +377,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - `scripts/compare-playback-benchmark-runs.js` now emits `failureReasons` and `gateOutcomes` in summary JSON. - `scripts/publish-playback-matrix-summary.js` now surfaces comparison failure reasons when present. +46. **Added parse-error gating and parse stats to comparison flows (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` now supports `--fail-on-parse-errors`. + - Comparison JSON now includes baseline/candidate file parsing stats and parse error entries. + - `scripts/finalize-playback-matrix.js` now forwards parse-error gating option to compare stage; published summary surfaces parse policy and parse error counts. + --- ## Root Cause Analysis Archive @@ -518,6 +523,7 @@ Decoder Pipeline: 49. Added comparison JSON gate diagnostics (`failureReasons`, `gateOutcomes`) and surfaced failure reasons in published summary output. 50. Corrected minimum sample gating semantics to only count comparable metrics and added compared-metric/effective-sample columns in comparison output. 51. Extended finalize summary and publish output with comparison failure reasons and gate outcome metadata. +52. Added comparison parse-error gating (`--fail-on-parse-errors`) with parse stats surfaced in comparison JSON, finalize settings, and published summaries. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -565,6 +571,9 @@ Decoder Pipeline: - `scripts/compare-playback-benchmark-runs.js`: comparison JSON summary now includes explicit `failureReasons` and `gateOutcomes` fields. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes comparison failure reasons when present. - `scripts/finalize-playback-matrix.js`: finalize summary now includes comparison failure reasons and gate outcome fields in results metadata. +- `scripts/compare-playback-benchmark-runs.js`: added parse-error gating and baseline/candidate file parse stats/parse error entries in JSON output. +- `scripts/finalize-playback-matrix.js`: forwards parse-error gating and records parse-error policy in finalize summary settings. +- `scripts/publish-playback-matrix-summary.js`: published comparison status now includes parse policy and baseline/candidate parse error counts. - `crates/editor/src/playback.rs`: warmup loop now skips contiguous coverage scanning until first warmup frame has been observed. - `crates/editor/src/playback.rs`: warmup contiguous coverage counts are now cached and recomputed only on warmup buffer changes. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 001404ac0b..67e70870f6 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -133,6 +133,9 @@ pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir # optional: require minimum sample count per compared row pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --min-samples-per-row 3 + +# optional: fail comparison gate when any input JSON fails to parse +pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-parse-errors ``` Finalize and publish to benchmark history in one command: @@ -190,6 +193,8 @@ pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate # comparison JSON includes failureReasons and gateOutcomes for automation # minimum sample gating uses metrics that are actually comparable for each row +# optional: fail comparison gate when any input JSON fails to parse +pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-parse-errors ``` ## Evidence checklist diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index e102d3f96f..3beec8cd03 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -15,6 +15,7 @@ function parseArgs(argv) { allowMissingCandidate: false, failOnCandidateOnly: false, minSamplesPerRow: 1, + failOnParseErrors: false, }; for (let i = 2; i < argv.length; i++) { @@ -84,6 +85,10 @@ function parseArgs(argv) { options.minSamplesPerRow = value; continue; } + if (arg === "--fail-on-parse-errors") { + options.failOnParseErrors = true; + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -91,7 +96,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline [--baseline ...] --candidate [--candidate ...] [--output ] [--output-json ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--min-samples-per-row 1] + console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline [--baseline ...] --candidate [--candidate ...] [--output ] [--output-json ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--min-samples-per-row 1] [--fail-on-parse-errors] Compares baseline and candidate playback matrix JSON outputs and flags regressions. Multiple --baseline and --candidate inputs are supported.`); } @@ -141,14 +146,38 @@ function maximum(values) { function collectMetrics(files) { const accumulators = new Map(); + const stats = { + totalFiles: files.length, + parsedFiles: 0, + usableFiles: 0, + skippedFiles: 0, + parseErrors: [], + }; for (const filePath of files) { - const parsed = JSON.parse(fs.readFileSync(filePath, "utf8")); + let parsed; + try { + parsed = JSON.parse(fs.readFileSync(filePath, "utf8")); + stats.parsedFiles += 1; + } catch (error) { + stats.parseErrors.push({ + file: filePath, + error: error instanceof Error ? error.message : String(error), + }); + continue; + } + + if (!Array.isArray(parsed.reports) || parsed.reports.length === 0) { + stats.skippedFiles += 1; + continue; + } + const notes = parseNotes(parsed.notes); const platform = notes.platform ?? "unknown"; const gpu = notes.gpu ?? "unknown"; const scenario = notes.scenario ?? "unspecified"; const reports = Array.isArray(parsed.reports) ? parsed.reports : []; + let fileContributedRows = false; for (const report of reports) { const key = `${platform}|${gpu}|${scenario}|${report.recording_name ?? "unknown"}|${report.is_fragmented ? "fragmented" : "mp4"}`; @@ -169,6 +198,13 @@ function collectMetrics(files) { const scrubP95Values = scrub .map((entry) => entry.p95_seek_time_ms) .filter((entry) => typeof entry === "number"); + const hasUsableMetrics = + fpsValues.length > 0 || + startupValues.length > 0 || + scrubP95Values.length > 0; + if (!hasUsableMetrics) { + continue; + } const existing = accumulators.get(key) ?? { key, @@ -187,6 +223,13 @@ function collectMetrics(files) { existing.startupSamples.push(...startupValues); existing.scrubP95Samples.push(...scrubP95Values); accumulators.set(key, existing); + fileContributedRows = true; + } + + if (fileContributedRows) { + stats.usableFiles += 1; + } else { + stats.skippedFiles += 1; } } @@ -209,7 +252,7 @@ function collectMetrics(files) { }); } - return rows; + return { rows, stats }; } function delta(candidate, baseline) { @@ -346,6 +389,8 @@ function toMarkdown( missingCandidateRows, candidateOnlyRows, insufficientSampleRows, + baselineStats, + candidateStats, options, ) { const regressions = comparisons.filter( @@ -357,7 +402,25 @@ function toMarkdown( md += `Tolerance: fps_drop<=${options.allowFpsDrop}, startup_increase<=${options.allowStartupIncreaseMs}ms, scrub_p95_increase<=${options.allowScrubP95IncreaseMs}ms\n\n`; md += `Coverage gate: missing_candidate=${options.allowMissingCandidate ? "allow" : "fail"}, candidate_only=${options.failOnCandidateOnly ? "fail" : "allow"}\n\n`; md += `Sample gate: min_samples_per_row>=${options.minSamplesPerRow}\n\n`; + md += `Parse gate: parse_errors=${options.failOnParseErrors ? "fail" : "allow"}\n\n`; + md += `Baseline files: total=${baselineStats.totalFiles}, parsed=${baselineStats.parsedFiles}, usable=${baselineStats.usableFiles}, skipped=${baselineStats.skippedFiles}, parse_errors=${baselineStats.parseErrors.length}\n`; + md += `Candidate files: total=${candidateStats.totalFiles}, parsed=${candidateStats.parsedFiles}, usable=${candidateStats.usableFiles}, skipped=${candidateStats.skippedFiles}, parse_errors=${candidateStats.parseErrors.length}\n\n`; md += `Compared rows: ${comparisons.length}, regressions: ${regressions.length}, missing candidate rows: ${missingCandidateRows.length}, candidate-only rows: ${candidateOnlyRows.length}, insufficient sample rows: ${insufficientSampleRows.length}\n\n`; + if ( + baselineStats.parseErrors.length > 0 || + candidateStats.parseErrors.length > 0 + ) { + md += "## Parse Errors\n\n"; + md += "| Side | File | Error |\n"; + md += "|---|---|---|\n"; + for (const entry of baselineStats.parseErrors.slice(0, 20)) { + md += `| baseline | ${entry.file} | ${entry.error.replace(/\|/g, "\\|")} |\n`; + } + for (const entry of candidateStats.parseErrors.slice(0, 20)) { + md += `| candidate | ${entry.file} | ${entry.error.replace(/\|/g, "\\|")} |\n`; + } + md += "\n"; + } if (missingCandidateRows.length > 0) { md += "## Missing Candidate Rows\n\n"; md += "| Platform | GPU | Scenario | Recording | Format |\n"; @@ -402,6 +465,8 @@ function buildJsonOutput( missingCandidateRows, candidateOnlyRows, insufficientSampleRows, + baselineStats, + candidateStats, options, ) { const regressions = comparisons.filter( @@ -431,6 +496,13 @@ function buildJsonOutput( if (options.failOnCandidateOnly && hasCandidateOnlyRows) { failureReasons.push("candidate_only_rows"); } + if ( + options.failOnParseErrors && + (baselineStats.parseErrors.length > 0 || + candidateStats.parseErrors.length > 0) + ) { + failureReasons.push("parse_errors"); + } const passed = failureReasons.length === 0; return { generatedAt: new Date().toISOString(), @@ -441,6 +513,11 @@ function buildJsonOutput( allowMissingCandidate: options.allowMissingCandidate, failOnCandidateOnly: options.failOnCandidateOnly, minSamplesPerRow: options.minSamplesPerRow, + failOnParseErrors: options.failOnParseErrors, + }, + fileStats: { + baseline: baselineStats, + candidate: candidateStats, }, summary: { comparedRows: comparisons.length, @@ -457,6 +534,10 @@ function buildJsonOutput( options.allowMissingCandidate || !hasMissingCandidateRows, candidateOnlyRows: !options.failOnCandidateOnly || !hasCandidateOnlyRows, + parseErrors: + !options.failOnParseErrors || + (baselineStats.parseErrors.length === 0 && + candidateStats.parseErrors.length === 0), }, }, regressions, @@ -493,8 +574,10 @@ function main() { throw new Error("No candidate JSON files found"); } - const baselineRows = collectMetrics(baselineFiles); - const candidateRows = collectMetrics(candidateFiles); + const baselineCollected = collectMetrics(baselineFiles); + const candidateCollected = collectMetrics(candidateFiles); + const baselineRows = baselineCollected.rows; + const candidateRows = candidateCollected.rows; const { comparisons, missingCandidateRows, @@ -506,6 +589,8 @@ function main() { missingCandidateRows, candidateOnlyRows, insufficientSampleRows, + baselineCollected.stats, + candidateCollected.stats, options, ); const outputJson = buildJsonOutput( @@ -513,6 +598,8 @@ function main() { missingCandidateRows, candidateOnlyRows, insufficientSampleRows, + baselineCollected.stats, + candidateCollected.stats, options, ); diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index f4c3107a3c..a2bd3865e8 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -22,6 +22,7 @@ function parseArgs(argv) { allowMissingCandidate: false, failOnCandidateOnly: false, minSamplesPerRow: 1, + failOnParseErrors: false, }; for (let i = 2; i < argv.length; i++) { @@ -137,6 +138,10 @@ function parseArgs(argv) { options.minSamplesPerRow = value; continue; } + if (arg === "--fail-on-parse-errors") { + options.failOnParseErrors = true; + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -144,7 +149,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--output-json ] [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--compare-baseline ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--min-samples-per-row 1] [--publish-target ] + console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--output-json ] [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--compare-baseline ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--min-samples-per-row 1] [--fail-on-parse-errors] [--publish-target ] Generates aggregate markdown, status markdown, validation JSON, and bottleneck analysis for collected playback matrix outputs. Optionally compares candidate inputs against baseline inputs and fails on regressions.`); } @@ -274,6 +279,9 @@ function main() { if (options.failOnCandidateOnly) { compareArgs.push("--fail-on-candidate-only"); } + if (options.failOnParseErrors) { + compareArgs.push("--fail-on-parse-errors"); + } compareArgs.push("--min-samples-per-row", String(options.minSamplesPerRow)); run("node", compareArgs); } @@ -321,6 +329,7 @@ function main() { allowMissingCandidate: options.allowMissingCandidate, failOnCandidateOnly: options.failOnCandidateOnly, minSamplesPerRow: options.minSamplesPerRow, + failOnParseErrors: options.failOnParseErrors, }, results: { validationPassed: validation.passed === true, diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js index 14f389da15..218f9a5e1d 100644 --- a/scripts/publish-playback-matrix-summary.js +++ b/scripts/publish-playback-matrix-summary.js @@ -103,6 +103,9 @@ function buildSummarySection( markdown += `- Minimum samples per row: ${comparison.tolerance?.minSamplesPerRow ?? "n/a"}\n\n`; markdown += `- Missing candidate policy: ${comparison.tolerance?.allowMissingCandidate ? "allow" : "fail"}\n`; markdown += `- Candidate-only policy: ${comparison.tolerance?.failOnCandidateOnly ? "fail" : "allow"}\n\n`; + markdown += `- Parse error policy: ${comparison.tolerance?.failOnParseErrors ? "fail" : "allow"}\n`; + markdown += `- Baseline parse errors: ${comparison.fileStats?.baseline?.parseErrors?.length ?? "n/a"}\n`; + markdown += `- Candidate parse errors: ${comparison.fileStats?.candidate?.parseErrors?.length ?? "n/a"}\n\n`; const failureReasons = Array.isArray(comparison.summary?.failureReasons) ? comparison.summary.failureReasons : []; From ffc84f2c49f3e1605f0c579f39659f1dd8fd395e Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:13:48 +0000 Subject: [PATCH 140/333] improve: add parse-error gating and parse stats to matrix comparisons Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 3 + crates/editor/PLAYBACK-FINDINGS.md | 9 ++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 5 ++ scripts/compare-playback-benchmark-runs.js | 97 ++++++++++++++++++++-- scripts/finalize-playback-matrix.js | 11 ++- scripts/publish-playback-matrix-summary.js | 3 + 6 files changed, 122 insertions(+), 6 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index cab9ae9ea8..ea1ce8c465 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -82,6 +82,7 @@ node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --ou node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-missing-candidate node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-candidate-only node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --min-samples-per-row 3 +node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-parse-errors node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --output-json /tmp/playback-matrix-final/playback-finalize-summary.json node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 --publish-target /workspace/crates/editor/PLAYBACK-BENCHMARKS.md @@ -103,12 +104,14 @@ node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-res node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --allow-missing-candidate node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-candidate-only node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --min-samples-per-row 3 +node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-parse-errors node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-comparison.md --output-json /tmp/playback-comparison.json Comparison output reports both baseline rows missing in candidate and candidate-only rows not present in baseline. Comparison table also reports baseline/candidate run counts per row when multiple JSON inputs contribute to the same key. Comparison JSON summary includes `failureReasons` and `gateOutcomes` fields for automation-friendly gate diagnostics. Minimum sample gating is applied against metrics that are actually comparable for the row (for example, scrub samples are not required for rows with no scrub metric comparison). +Comparison JSON also includes baseline/candidate file parsing stats and optional parse-error gating support. ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 5f6a621468..c973dd166a 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -377,6 +377,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - `scripts/compare-playback-benchmark-runs.js` now emits `failureReasons` and `gateOutcomes` in summary JSON. - `scripts/publish-playback-matrix-summary.js` now surfaces comparison failure reasons when present. +46. **Added parse-error gating and parse stats to comparison flows (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` now supports `--fail-on-parse-errors`. + - Comparison JSON now includes baseline/candidate file parsing stats and parse error entries. + - `scripts/finalize-playback-matrix.js` now forwards parse-error gating option to compare stage; published summary surfaces parse policy and parse error counts. + --- ## Root Cause Analysis Archive @@ -518,6 +523,7 @@ Decoder Pipeline: 49. Added comparison JSON gate diagnostics (`failureReasons`, `gateOutcomes`) and surfaced failure reasons in published summary output. 50. Corrected minimum sample gating semantics to only count comparable metrics and added compared-metric/effective-sample columns in comparison output. 51. Extended finalize summary and publish output with comparison failure reasons and gate outcome metadata. +52. Added comparison parse-error gating (`--fail-on-parse-errors`) with parse stats surfaced in comparison JSON, finalize settings, and published summaries. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -565,6 +571,9 @@ Decoder Pipeline: - `scripts/compare-playback-benchmark-runs.js`: comparison JSON summary now includes explicit `failureReasons` and `gateOutcomes` fields. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes comparison failure reasons when present. - `scripts/finalize-playback-matrix.js`: finalize summary now includes comparison failure reasons and gate outcome fields in results metadata. +- `scripts/compare-playback-benchmark-runs.js`: added parse-error gating and baseline/candidate file parse stats/parse error entries in JSON output. +- `scripts/finalize-playback-matrix.js`: forwards parse-error gating and records parse-error policy in finalize summary settings. +- `scripts/publish-playback-matrix-summary.js`: published comparison status now includes parse policy and baseline/candidate parse error counts. - `crates/editor/src/playback.rs`: warmup loop now skips contiguous coverage scanning until first warmup frame has been observed. - `crates/editor/src/playback.rs`: warmup contiguous coverage counts are now cached and recomputed only on warmup buffer changes. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 001404ac0b..67e70870f6 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -133,6 +133,9 @@ pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir # optional: require minimum sample count per compared row pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --min-samples-per-row 3 + +# optional: fail comparison gate when any input JSON fails to parse +pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-parse-errors ``` Finalize and publish to benchmark history in one command: @@ -190,6 +193,8 @@ pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate # comparison JSON includes failureReasons and gateOutcomes for automation # minimum sample gating uses metrics that are actually comparable for each row +# optional: fail comparison gate when any input JSON fails to parse +pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-parse-errors ``` ## Evidence checklist diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index e102d3f96f..3beec8cd03 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -15,6 +15,7 @@ function parseArgs(argv) { allowMissingCandidate: false, failOnCandidateOnly: false, minSamplesPerRow: 1, + failOnParseErrors: false, }; for (let i = 2; i < argv.length; i++) { @@ -84,6 +85,10 @@ function parseArgs(argv) { options.minSamplesPerRow = value; continue; } + if (arg === "--fail-on-parse-errors") { + options.failOnParseErrors = true; + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -91,7 +96,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline [--baseline ...] --candidate [--candidate ...] [--output ] [--output-json ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--min-samples-per-row 1] + console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline [--baseline ...] --candidate [--candidate ...] [--output ] [--output-json ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--min-samples-per-row 1] [--fail-on-parse-errors] Compares baseline and candidate playback matrix JSON outputs and flags regressions. Multiple --baseline and --candidate inputs are supported.`); } @@ -141,14 +146,38 @@ function maximum(values) { function collectMetrics(files) { const accumulators = new Map(); + const stats = { + totalFiles: files.length, + parsedFiles: 0, + usableFiles: 0, + skippedFiles: 0, + parseErrors: [], + }; for (const filePath of files) { - const parsed = JSON.parse(fs.readFileSync(filePath, "utf8")); + let parsed; + try { + parsed = JSON.parse(fs.readFileSync(filePath, "utf8")); + stats.parsedFiles += 1; + } catch (error) { + stats.parseErrors.push({ + file: filePath, + error: error instanceof Error ? error.message : String(error), + }); + continue; + } + + if (!Array.isArray(parsed.reports) || parsed.reports.length === 0) { + stats.skippedFiles += 1; + continue; + } + const notes = parseNotes(parsed.notes); const platform = notes.platform ?? "unknown"; const gpu = notes.gpu ?? "unknown"; const scenario = notes.scenario ?? "unspecified"; const reports = Array.isArray(parsed.reports) ? parsed.reports : []; + let fileContributedRows = false; for (const report of reports) { const key = `${platform}|${gpu}|${scenario}|${report.recording_name ?? "unknown"}|${report.is_fragmented ? "fragmented" : "mp4"}`; @@ -169,6 +198,13 @@ function collectMetrics(files) { const scrubP95Values = scrub .map((entry) => entry.p95_seek_time_ms) .filter((entry) => typeof entry === "number"); + const hasUsableMetrics = + fpsValues.length > 0 || + startupValues.length > 0 || + scrubP95Values.length > 0; + if (!hasUsableMetrics) { + continue; + } const existing = accumulators.get(key) ?? { key, @@ -187,6 +223,13 @@ function collectMetrics(files) { existing.startupSamples.push(...startupValues); existing.scrubP95Samples.push(...scrubP95Values); accumulators.set(key, existing); + fileContributedRows = true; + } + + if (fileContributedRows) { + stats.usableFiles += 1; + } else { + stats.skippedFiles += 1; } } @@ -209,7 +252,7 @@ function collectMetrics(files) { }); } - return rows; + return { rows, stats }; } function delta(candidate, baseline) { @@ -346,6 +389,8 @@ function toMarkdown( missingCandidateRows, candidateOnlyRows, insufficientSampleRows, + baselineStats, + candidateStats, options, ) { const regressions = comparisons.filter( @@ -357,7 +402,25 @@ function toMarkdown( md += `Tolerance: fps_drop<=${options.allowFpsDrop}, startup_increase<=${options.allowStartupIncreaseMs}ms, scrub_p95_increase<=${options.allowScrubP95IncreaseMs}ms\n\n`; md += `Coverage gate: missing_candidate=${options.allowMissingCandidate ? "allow" : "fail"}, candidate_only=${options.failOnCandidateOnly ? "fail" : "allow"}\n\n`; md += `Sample gate: min_samples_per_row>=${options.minSamplesPerRow}\n\n`; + md += `Parse gate: parse_errors=${options.failOnParseErrors ? "fail" : "allow"}\n\n`; + md += `Baseline files: total=${baselineStats.totalFiles}, parsed=${baselineStats.parsedFiles}, usable=${baselineStats.usableFiles}, skipped=${baselineStats.skippedFiles}, parse_errors=${baselineStats.parseErrors.length}\n`; + md += `Candidate files: total=${candidateStats.totalFiles}, parsed=${candidateStats.parsedFiles}, usable=${candidateStats.usableFiles}, skipped=${candidateStats.skippedFiles}, parse_errors=${candidateStats.parseErrors.length}\n\n`; md += `Compared rows: ${comparisons.length}, regressions: ${regressions.length}, missing candidate rows: ${missingCandidateRows.length}, candidate-only rows: ${candidateOnlyRows.length}, insufficient sample rows: ${insufficientSampleRows.length}\n\n`; + if ( + baselineStats.parseErrors.length > 0 || + candidateStats.parseErrors.length > 0 + ) { + md += "## Parse Errors\n\n"; + md += "| Side | File | Error |\n"; + md += "|---|---|---|\n"; + for (const entry of baselineStats.parseErrors.slice(0, 20)) { + md += `| baseline | ${entry.file} | ${entry.error.replace(/\|/g, "\\|")} |\n`; + } + for (const entry of candidateStats.parseErrors.slice(0, 20)) { + md += `| candidate | ${entry.file} | ${entry.error.replace(/\|/g, "\\|")} |\n`; + } + md += "\n"; + } if (missingCandidateRows.length > 0) { md += "## Missing Candidate Rows\n\n"; md += "| Platform | GPU | Scenario | Recording | Format |\n"; @@ -402,6 +465,8 @@ function buildJsonOutput( missingCandidateRows, candidateOnlyRows, insufficientSampleRows, + baselineStats, + candidateStats, options, ) { const regressions = comparisons.filter( @@ -431,6 +496,13 @@ function buildJsonOutput( if (options.failOnCandidateOnly && hasCandidateOnlyRows) { failureReasons.push("candidate_only_rows"); } + if ( + options.failOnParseErrors && + (baselineStats.parseErrors.length > 0 || + candidateStats.parseErrors.length > 0) + ) { + failureReasons.push("parse_errors"); + } const passed = failureReasons.length === 0; return { generatedAt: new Date().toISOString(), @@ -441,6 +513,11 @@ function buildJsonOutput( allowMissingCandidate: options.allowMissingCandidate, failOnCandidateOnly: options.failOnCandidateOnly, minSamplesPerRow: options.minSamplesPerRow, + failOnParseErrors: options.failOnParseErrors, + }, + fileStats: { + baseline: baselineStats, + candidate: candidateStats, }, summary: { comparedRows: comparisons.length, @@ -457,6 +534,10 @@ function buildJsonOutput( options.allowMissingCandidate || !hasMissingCandidateRows, candidateOnlyRows: !options.failOnCandidateOnly || !hasCandidateOnlyRows, + parseErrors: + !options.failOnParseErrors || + (baselineStats.parseErrors.length === 0 && + candidateStats.parseErrors.length === 0), }, }, regressions, @@ -493,8 +574,10 @@ function main() { throw new Error("No candidate JSON files found"); } - const baselineRows = collectMetrics(baselineFiles); - const candidateRows = collectMetrics(candidateFiles); + const baselineCollected = collectMetrics(baselineFiles); + const candidateCollected = collectMetrics(candidateFiles); + const baselineRows = baselineCollected.rows; + const candidateRows = candidateCollected.rows; const { comparisons, missingCandidateRows, @@ -506,6 +589,8 @@ function main() { missingCandidateRows, candidateOnlyRows, insufficientSampleRows, + baselineCollected.stats, + candidateCollected.stats, options, ); const outputJson = buildJsonOutput( @@ -513,6 +598,8 @@ function main() { missingCandidateRows, candidateOnlyRows, insufficientSampleRows, + baselineCollected.stats, + candidateCollected.stats, options, ); diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index f4c3107a3c..a2bd3865e8 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -22,6 +22,7 @@ function parseArgs(argv) { allowMissingCandidate: false, failOnCandidateOnly: false, minSamplesPerRow: 1, + failOnParseErrors: false, }; for (let i = 2; i < argv.length; i++) { @@ -137,6 +138,10 @@ function parseArgs(argv) { options.minSamplesPerRow = value; continue; } + if (arg === "--fail-on-parse-errors") { + options.failOnParseErrors = true; + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -144,7 +149,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--output-json ] [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--compare-baseline ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--min-samples-per-row 1] [--publish-target ] + console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--output-json ] [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--compare-baseline ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--min-samples-per-row 1] [--fail-on-parse-errors] [--publish-target ] Generates aggregate markdown, status markdown, validation JSON, and bottleneck analysis for collected playback matrix outputs. Optionally compares candidate inputs against baseline inputs and fails on regressions.`); } @@ -274,6 +279,9 @@ function main() { if (options.failOnCandidateOnly) { compareArgs.push("--fail-on-candidate-only"); } + if (options.failOnParseErrors) { + compareArgs.push("--fail-on-parse-errors"); + } compareArgs.push("--min-samples-per-row", String(options.minSamplesPerRow)); run("node", compareArgs); } @@ -321,6 +329,7 @@ function main() { allowMissingCandidate: options.allowMissingCandidate, failOnCandidateOnly: options.failOnCandidateOnly, minSamplesPerRow: options.minSamplesPerRow, + failOnParseErrors: options.failOnParseErrors, }, results: { validationPassed: validation.passed === true, diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js index 14f389da15..218f9a5e1d 100644 --- a/scripts/publish-playback-matrix-summary.js +++ b/scripts/publish-playback-matrix-summary.js @@ -103,6 +103,9 @@ function buildSummarySection( markdown += `- Minimum samples per row: ${comparison.tolerance?.minSamplesPerRow ?? "n/a"}\n\n`; markdown += `- Missing candidate policy: ${comparison.tolerance?.allowMissingCandidate ? "allow" : "fail"}\n`; markdown += `- Candidate-only policy: ${comparison.tolerance?.failOnCandidateOnly ? "fail" : "allow"}\n\n`; + markdown += `- Parse error policy: ${comparison.tolerance?.failOnParseErrors ? "fail" : "allow"}\n`; + markdown += `- Baseline parse errors: ${comparison.fileStats?.baseline?.parseErrors?.length ?? "n/a"}\n`; + markdown += `- Candidate parse errors: ${comparison.fileStats?.candidate?.parseErrors?.length ?? "n/a"}\n\n`; const failureReasons = Array.isArray(comparison.summary?.failureReasons) ? comparison.summary.failureReasons : []; From d770bc4567c690d0e24f60299ad5a139beb30ce7 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 23:15:35 +0000 Subject: [PATCH 141/333] improve: invalidate warmup contiguous cache on structural buffer changes Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 7 ++++++ crates/editor/src/playback.rs | 36 +++++++++++++++++++++--------- 2 files changed, 32 insertions(+), 11 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index c973dd166a..a5bf591c5f 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -382,6 +382,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Comparison JSON now includes baseline/candidate file parsing stats and parse error entries. - `scripts/finalize-playback-matrix.js` now forwards parse-error gating option to compare stage; published summary surfaces parse policy and parse error counts. +47. **Made keyed prefetch insert helper report structural changes (2026-02-13)** + - `insert_prefetched_frame` now returns whether keyed prefetch buffer changed (insert and/or trim). + - Warmup loop now uses this direct signal instead of length-only delta checks for contiguous coverage cache invalidation. + - Improves warmup cache correctness when inserts and trims occur with stable overall buffer length. + --- ## Root Cause Analysis Archive @@ -524,6 +529,7 @@ Decoder Pipeline: 50. Corrected minimum sample gating semantics to only count comparable metrics and added compared-metric/effective-sample columns in comparison output. 51. Extended finalize summary and publish output with comparison failure reasons and gate outcome metadata. 52. Added comparison parse-error gating (`--fail-on-parse-errors`) with parse stats surfaced in comparison JSON, finalize settings, and published summaries. +53. Updated keyed prefetch insert helper to emit structural-change signals for warmup contiguous coverage cache invalidation. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -574,6 +580,7 @@ Decoder Pipeline: - `scripts/compare-playback-benchmark-runs.js`: added parse-error gating and baseline/candidate file parse stats/parse error entries in JSON output. - `scripts/finalize-playback-matrix.js`: forwards parse-error gating and records parse-error policy in finalize summary settings. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes parse policy and baseline/candidate parse error counts. +- `crates/editor/src/playback.rs`: `insert_prefetched_frame` now returns structural-change signals and warmup cache invalidation uses this signal to avoid stale contiguous counts when insert+trim keeps buffer length unchanged. - `crates/editor/src/playback.rs`: warmup loop now skips contiguous coverage scanning until first warmup frame has been observed. - `crates/editor/src/playback.rs`: warmup contiguous coverage counts are now cached and recomputed only on warmup buffer changes. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 972824a75d..d6bf970227 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -101,7 +101,8 @@ impl FrameCache { } } -fn trim_prefetch_buffer(buffer: &mut BTreeMap, current_frame: u32) { +fn trim_prefetch_buffer(buffer: &mut BTreeMap, current_frame: u32) -> bool { + let mut changed = false; while buffer.len() > PREFETCH_BUFFER_SIZE { let far_ahead_frame = buffer .iter() @@ -111,6 +112,7 @@ fn trim_prefetch_buffer(buffer: &mut BTreeMap, current_fra if let Some(frame) = far_ahead_frame { buffer.remove(&frame); + changed = true; continue; } @@ -118,20 +120,25 @@ fn trim_prefetch_buffer(buffer: &mut BTreeMap, current_fra break; }; buffer.remove(&oldest_frame); + changed = true; } + changed } fn insert_prefetched_frame( buffer: &mut BTreeMap, prefetched: PrefetchedFrame, current_frame: u32, -) { +) -> bool { if prefetched.frame_number < current_frame { - return; + return false; } - buffer.entry(prefetched.frame_number).or_insert(prefetched); - trim_prefetch_buffer(buffer, current_frame); + let frame_number = prefetched.frame_number; + let inserted_new = !buffer.contains_key(&frame_number); + buffer.entry(frame_number).or_insert(prefetched); + let trimmed = trim_prefetch_buffer(buffer, current_frame); + inserted_new || trimmed } fn prune_prefetch_buffer_before_frame( @@ -566,9 +573,11 @@ impl Playback { tokio::select! { Some(prefetched) = prefetch_rx.recv() => { if prefetched.generation == seek_generation { - let pre_insert_len = prefetch_buffer.len(); - insert_prefetched_frame(&mut prefetch_buffer, prefetched, frame_number); - if prefetch_buffer.len() != pre_insert_len { + if insert_prefetched_frame( + &mut prefetch_buffer, + prefetched, + frame_number, + ) { warmup_buffer_changed = true; } if first_frame_time.is_none() && !prefetch_buffer.is_empty() { @@ -617,7 +626,8 @@ impl Playback { } while let Ok(prefetched) = prefetch_rx.try_recv() { if prefetched.generation == seek_generation { - insert_prefetched_frame(&mut prefetch_buffer, prefetched, frame_number); + let _ = + insert_prefetched_frame(&mut prefetch_buffer, prefetched, frame_number); } } prune_prefetch_buffer_before_frame(&mut prefetch_buffer, frame_number); @@ -698,7 +708,11 @@ impl Playback { found_frame = Some(prefetched); break; } else if prefetched.frame_number >= frame_number { - insert_prefetched_frame(&mut prefetch_buffer, prefetched, frame_number); + let _ = insert_prefetched_frame( + &mut prefetch_buffer, + prefetched, + frame_number, + ); } } _ = tokio::time::sleep(in_flight_poll_interval) => { @@ -752,7 +766,7 @@ impl Playback { prefetched.segment_index, )) } else { - insert_prefetched_frame( + let _ = insert_prefetched_frame( &mut prefetch_buffer, prefetched, frame_number, From 51f98323647247a0e32447acf406c45d100ef498 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:15:35 +0000 Subject: [PATCH 142/333] improve: invalidate warmup contiguous cache on structural buffer changes Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 7 ++++++ crates/editor/src/playback.rs | 36 +++++++++++++++++++++--------- 2 files changed, 32 insertions(+), 11 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index c973dd166a..a5bf591c5f 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -382,6 +382,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Comparison JSON now includes baseline/candidate file parsing stats and parse error entries. - `scripts/finalize-playback-matrix.js` now forwards parse-error gating option to compare stage; published summary surfaces parse policy and parse error counts. +47. **Made keyed prefetch insert helper report structural changes (2026-02-13)** + - `insert_prefetched_frame` now returns whether keyed prefetch buffer changed (insert and/or trim). + - Warmup loop now uses this direct signal instead of length-only delta checks for contiguous coverage cache invalidation. + - Improves warmup cache correctness when inserts and trims occur with stable overall buffer length. + --- ## Root Cause Analysis Archive @@ -524,6 +529,7 @@ Decoder Pipeline: 50. Corrected minimum sample gating semantics to only count comparable metrics and added compared-metric/effective-sample columns in comparison output. 51. Extended finalize summary and publish output with comparison failure reasons and gate outcome metadata. 52. Added comparison parse-error gating (`--fail-on-parse-errors`) with parse stats surfaced in comparison JSON, finalize settings, and published summaries. +53. Updated keyed prefetch insert helper to emit structural-change signals for warmup contiguous coverage cache invalidation. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -574,6 +580,7 @@ Decoder Pipeline: - `scripts/compare-playback-benchmark-runs.js`: added parse-error gating and baseline/candidate file parse stats/parse error entries in JSON output. - `scripts/finalize-playback-matrix.js`: forwards parse-error gating and records parse-error policy in finalize summary settings. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes parse policy and baseline/candidate parse error counts. +- `crates/editor/src/playback.rs`: `insert_prefetched_frame` now returns structural-change signals and warmup cache invalidation uses this signal to avoid stale contiguous counts when insert+trim keeps buffer length unchanged. - `crates/editor/src/playback.rs`: warmup loop now skips contiguous coverage scanning until first warmup frame has been observed. - `crates/editor/src/playback.rs`: warmup contiguous coverage counts are now cached and recomputed only on warmup buffer changes. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 972824a75d..d6bf970227 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -101,7 +101,8 @@ impl FrameCache { } } -fn trim_prefetch_buffer(buffer: &mut BTreeMap, current_frame: u32) { +fn trim_prefetch_buffer(buffer: &mut BTreeMap, current_frame: u32) -> bool { + let mut changed = false; while buffer.len() > PREFETCH_BUFFER_SIZE { let far_ahead_frame = buffer .iter() @@ -111,6 +112,7 @@ fn trim_prefetch_buffer(buffer: &mut BTreeMap, current_fra if let Some(frame) = far_ahead_frame { buffer.remove(&frame); + changed = true; continue; } @@ -118,20 +120,25 @@ fn trim_prefetch_buffer(buffer: &mut BTreeMap, current_fra break; }; buffer.remove(&oldest_frame); + changed = true; } + changed } fn insert_prefetched_frame( buffer: &mut BTreeMap, prefetched: PrefetchedFrame, current_frame: u32, -) { +) -> bool { if prefetched.frame_number < current_frame { - return; + return false; } - buffer.entry(prefetched.frame_number).or_insert(prefetched); - trim_prefetch_buffer(buffer, current_frame); + let frame_number = prefetched.frame_number; + let inserted_new = !buffer.contains_key(&frame_number); + buffer.entry(frame_number).or_insert(prefetched); + let trimmed = trim_prefetch_buffer(buffer, current_frame); + inserted_new || trimmed } fn prune_prefetch_buffer_before_frame( @@ -566,9 +573,11 @@ impl Playback { tokio::select! { Some(prefetched) = prefetch_rx.recv() => { if prefetched.generation == seek_generation { - let pre_insert_len = prefetch_buffer.len(); - insert_prefetched_frame(&mut prefetch_buffer, prefetched, frame_number); - if prefetch_buffer.len() != pre_insert_len { + if insert_prefetched_frame( + &mut prefetch_buffer, + prefetched, + frame_number, + ) { warmup_buffer_changed = true; } if first_frame_time.is_none() && !prefetch_buffer.is_empty() { @@ -617,7 +626,8 @@ impl Playback { } while let Ok(prefetched) = prefetch_rx.try_recv() { if prefetched.generation == seek_generation { - insert_prefetched_frame(&mut prefetch_buffer, prefetched, frame_number); + let _ = + insert_prefetched_frame(&mut prefetch_buffer, prefetched, frame_number); } } prune_prefetch_buffer_before_frame(&mut prefetch_buffer, frame_number); @@ -698,7 +708,11 @@ impl Playback { found_frame = Some(prefetched); break; } else if prefetched.frame_number >= frame_number { - insert_prefetched_frame(&mut prefetch_buffer, prefetched, frame_number); + let _ = insert_prefetched_frame( + &mut prefetch_buffer, + prefetched, + frame_number, + ); } } _ = tokio::time::sleep(in_flight_poll_interval) => { @@ -752,7 +766,7 @@ impl Playback { prefetched.segment_index, )) } else { - insert_prefetched_frame( + let _ = insert_prefetched_frame( &mut prefetch_buffer, prefetched, frame_number, From 35d5a5ee2d98ba271294c472e146417a68946cef Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 23:16:55 +0000 Subject: [PATCH 143/333] improve: include comparison file stats in finalize summary output Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 7 +++++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 1 + scripts/finalize-playback-matrix.js | 1 + scripts/publish-playback-matrix-summary.js | 5 +++++ 5 files changed, 15 insertions(+) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index ea1ce8c465..9b58bc2f80 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -88,6 +88,7 @@ node scripts/finalize-playback-matrix.js --input /path/to/json-results --output- Finalize summary JSON includes generated timestamp, artifact paths, settings, pass/fail flags, and git branch/commit metadata when available. Finalize summary JSON also carries comparison failure reasons and gate outcomes when comparison is enabled. +Finalize summary JSON includes comparison file stats (including parse error counts) when comparison is enabled. # Publish matrix artifacts into this benchmark history node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --bottlenecks-md /tmp/playback-matrix-final/playback-bottlenecks.md diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index a5bf591c5f..9ebbd6dab2 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -387,6 +387,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Warmup loop now uses this direct signal instead of length-only delta checks for contiguous coverage cache invalidation. - Improves warmup cache correctness when inserts and trims occur with stable overall buffer length. +48. **Extended finalize summary with comparison file stats (2026-02-13)** + - Finalize summary JSON now includes comparison file stats payload when comparison is enabled. + - Publish summary now surfaces finalize baseline/candidate parse error counts from finalize summary metadata. + --- ## Root Cause Analysis Archive @@ -530,6 +534,7 @@ Decoder Pipeline: 51. Extended finalize summary and publish output with comparison failure reasons and gate outcome metadata. 52. Added comparison parse-error gating (`--fail-on-parse-errors`) with parse stats surfaced in comparison JSON, finalize settings, and published summaries. 53. Updated keyed prefetch insert helper to emit structural-change signals for warmup contiguous coverage cache invalidation. +54. Extended finalize summary and publish output with comparison file stats (including parse error counts). **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -581,6 +586,8 @@ Decoder Pipeline: - `scripts/finalize-playback-matrix.js`: forwards parse-error gating and records parse-error policy in finalize summary settings. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes parse policy and baseline/candidate parse error counts. - `crates/editor/src/playback.rs`: `insert_prefetched_frame` now returns structural-change signals and warmup cache invalidation uses this signal to avoid stale contiguous counts when insert+trim keeps buffer length unchanged. +- `scripts/finalize-playback-matrix.js`: finalize summary now includes comparison file stats payload when comparison runs are enabled. +- `scripts/publish-playback-matrix-summary.js`: publish summary now surfaces finalize baseline/candidate parse error counts from finalize summary metadata. - `crates/editor/src/playback.rs`: warmup loop now skips contiguous coverage scanning until first warmup frame has been observed. - `crates/editor/src/playback.rs`: warmup contiguous coverage counts are now cached and recomputed only on warmup buffer changes. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 67e70870f6..62e9b92582 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -116,6 +116,7 @@ pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-di Finalize summary JSON includes artifact paths, gate outcomes, settings, and git branch/commit metadata when available. When comparison is enabled, finalize summary JSON also includes comparison failure reasons and gate outcome details. +When comparison is enabled, finalize summary JSON also includes comparison file stats such as baseline/candidate parse error counts. Include optimization thresholds when finalizing: diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index a2bd3865e8..fb72687670 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -336,6 +336,7 @@ function main() { comparisonPassed: comparison ? comparison.summary?.passed === true : null, comparisonFailureReasons: comparison?.summary?.failureReasons ?? null, comparisonGateOutcomes: comparison?.summary?.gateOutcomes ?? null, + comparisonFileStats: comparison?.fileStats ?? null, }, }; fs.writeFileSync(summaryJsonPath, JSON.stringify(summary, null, 2), "utf8"); diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js index 218f9a5e1d..4837382634 100644 --- a/scripts/publish-playback-matrix-summary.js +++ b/scripts/publish-playback-matrix-summary.js @@ -121,6 +121,11 @@ function buildSummarySection( if (finalizeSummary.results?.comparisonPassed !== null) { markdown += `- Finalize comparison passed: ${finalizeSummary.results?.comparisonPassed === true ? "true" : "false"}\n`; } + const finalizeFileStats = finalizeSummary.results?.comparisonFileStats; + if (finalizeFileStats?.baseline || finalizeFileStats?.candidate) { + markdown += `- Finalize baseline parse errors: ${finalizeFileStats?.baseline?.parseErrors?.length ?? "n/a"}\n`; + markdown += `- Finalize candidate parse errors: ${finalizeFileStats?.candidate?.parseErrors?.length ?? "n/a"}\n`; + } const finalizeFailureReasons = Array.isArray( finalizeSummary.results?.comparisonFailureReasons, ) From 3f99687252e0d204df52891b685f72ed4ee1a96b Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:16:55 +0000 Subject: [PATCH 144/333] improve: include comparison file stats in finalize summary output Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 7 +++++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 1 + scripts/finalize-playback-matrix.js | 1 + scripts/publish-playback-matrix-summary.js | 5 +++++ 5 files changed, 15 insertions(+) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index ea1ce8c465..9b58bc2f80 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -88,6 +88,7 @@ node scripts/finalize-playback-matrix.js --input /path/to/json-results --output- Finalize summary JSON includes generated timestamp, artifact paths, settings, pass/fail flags, and git branch/commit metadata when available. Finalize summary JSON also carries comparison failure reasons and gate outcomes when comparison is enabled. +Finalize summary JSON includes comparison file stats (including parse error counts) when comparison is enabled. # Publish matrix artifacts into this benchmark history node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --bottlenecks-md /tmp/playback-matrix-final/playback-bottlenecks.md diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index a5bf591c5f..9ebbd6dab2 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -387,6 +387,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Warmup loop now uses this direct signal instead of length-only delta checks for contiguous coverage cache invalidation. - Improves warmup cache correctness when inserts and trims occur with stable overall buffer length. +48. **Extended finalize summary with comparison file stats (2026-02-13)** + - Finalize summary JSON now includes comparison file stats payload when comparison is enabled. + - Publish summary now surfaces finalize baseline/candidate parse error counts from finalize summary metadata. + --- ## Root Cause Analysis Archive @@ -530,6 +534,7 @@ Decoder Pipeline: 51. Extended finalize summary and publish output with comparison failure reasons and gate outcome metadata. 52. Added comparison parse-error gating (`--fail-on-parse-errors`) with parse stats surfaced in comparison JSON, finalize settings, and published summaries. 53. Updated keyed prefetch insert helper to emit structural-change signals for warmup contiguous coverage cache invalidation. +54. Extended finalize summary and publish output with comparison file stats (including parse error counts). **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -581,6 +586,8 @@ Decoder Pipeline: - `scripts/finalize-playback-matrix.js`: forwards parse-error gating and records parse-error policy in finalize summary settings. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes parse policy and baseline/candidate parse error counts. - `crates/editor/src/playback.rs`: `insert_prefetched_frame` now returns structural-change signals and warmup cache invalidation uses this signal to avoid stale contiguous counts when insert+trim keeps buffer length unchanged. +- `scripts/finalize-playback-matrix.js`: finalize summary now includes comparison file stats payload when comparison runs are enabled. +- `scripts/publish-playback-matrix-summary.js`: publish summary now surfaces finalize baseline/candidate parse error counts from finalize summary metadata. - `crates/editor/src/playback.rs`: warmup loop now skips contiguous coverage scanning until first warmup frame has been observed. - `crates/editor/src/playback.rs`: warmup contiguous coverage counts are now cached and recomputed only on warmup buffer changes. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 67e70870f6..62e9b92582 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -116,6 +116,7 @@ pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-di Finalize summary JSON includes artifact paths, gate outcomes, settings, and git branch/commit metadata when available. When comparison is enabled, finalize summary JSON also includes comparison failure reasons and gate outcome details. +When comparison is enabled, finalize summary JSON also includes comparison file stats such as baseline/candidate parse error counts. Include optimization thresholds when finalizing: diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index a2bd3865e8..fb72687670 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -336,6 +336,7 @@ function main() { comparisonPassed: comparison ? comparison.summary?.passed === true : null, comparisonFailureReasons: comparison?.summary?.failureReasons ?? null, comparisonGateOutcomes: comparison?.summary?.gateOutcomes ?? null, + comparisonFileStats: comparison?.fileStats ?? null, }, }; fs.writeFileSync(summaryJsonPath, JSON.stringify(summary, null, 2), "utf8"); diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js index 218f9a5e1d..4837382634 100644 --- a/scripts/publish-playback-matrix-summary.js +++ b/scripts/publish-playback-matrix-summary.js @@ -121,6 +121,11 @@ function buildSummarySection( if (finalizeSummary.results?.comparisonPassed !== null) { markdown += `- Finalize comparison passed: ${finalizeSummary.results?.comparisonPassed === true ? "true" : "false"}\n`; } + const finalizeFileStats = finalizeSummary.results?.comparisonFileStats; + if (finalizeFileStats?.baseline || finalizeFileStats?.candidate) { + markdown += `- Finalize baseline parse errors: ${finalizeFileStats?.baseline?.parseErrors?.length ?? "n/a"}\n`; + markdown += `- Finalize candidate parse errors: ${finalizeFileStats?.candidate?.parseErrors?.length ?? "n/a"}\n`; + } const finalizeFailureReasons = Array.isArray( finalizeSummary.results?.comparisonFailureReasons, ) From 33f15fe638896e1c2556c13095886dd2fbb8d264 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 23:20:23 +0000 Subject: [PATCH 145/333] improve: stabilize comparison artifact ordering for reproducibility Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ scripts/compare-playback-benchmark-runs.js | 18 +++++++++++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 9ebbd6dab2..89093a5d8e 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -391,6 +391,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Finalize summary JSON now includes comparison file stats payload when comparison is enabled. - Publish summary now surfaces finalize baseline/candidate parse error counts from finalize summary metadata. +49. **Stabilized comparison report ordering for reproducibility (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` now deterministically sorts comparison, missing, candidate-only, and insufficient-sample rows. + - Keeps markdown/JSON outputs stable across repeated runs with identical inputs. + --- ## Root Cause Analysis Archive @@ -535,6 +539,7 @@ Decoder Pipeline: 52. Added comparison parse-error gating (`--fail-on-parse-errors`) with parse stats surfaced in comparison JSON, finalize settings, and published summaries. 53. Updated keyed prefetch insert helper to emit structural-change signals for warmup contiguous coverage cache invalidation. 54. Extended finalize summary and publish output with comparison file stats (including parse error counts). +55. Stabilized comparison output ordering with deterministic sorting for comparison rows and coverage-delta sections. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -557,6 +562,7 @@ Decoder Pipeline: - `scripts/analyze-playback-matrix-bottlenecks.js`: added prioritized bottleneck analysis output from matrix JSON evidence. - `scripts/compare-playback-benchmark-runs.js`: added regression-aware baseline/candidate comparison with configurable FPS/startup/scrub tolerances. - `scripts/compare-playback-benchmark-runs.js`: fixed options wiring inside comparison regression checks and now aggregates per-key metrics across multi-input runs with run-count reporting. +- `scripts/compare-playback-benchmark-runs.js`: comparison row sets are now deterministically sorted for stable markdown/json artifact diffs. - `scripts/publish-playback-matrix-summary.js`: added optional baseline-vs-candidate comparison artifact attachment in published summaries. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index 3beec8cd03..39809c17b3 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -264,6 +264,16 @@ function formatNumber(value, digits = 2) { return value === null ? "n/a" : value.toFixed(digits); } +function compareCoverageRows(a, b) { + return ( + a.platform.localeCompare(b.platform) || + a.gpu.localeCompare(b.gpu) || + a.scenario.localeCompare(b.scenario) || + a.recording.localeCompare(b.recording) || + a.format.localeCompare(b.format) + ); +} + function compareMetrics(baselineRows, candidateRows, options) { const comparisons = []; const missingCandidateRows = []; @@ -375,7 +385,13 @@ function compareMetrics(baselineRows, candidateRows, options) { }); } - comparisons.sort((a, b) => b.regressions.length - a.regressions.length); + comparisons.sort( + (a, b) => + b.regressions.length - a.regressions.length || compareCoverageRows(a, b), + ); + missingCandidateRows.sort(compareCoverageRows); + candidateOnlyRows.sort(compareCoverageRows); + insufficientSampleRows.sort(compareCoverageRows); return { comparisons, missingCandidateRows, From 59ef24717ad652b900acaab38ae1159f9f7ec1b3 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:20:23 +0000 Subject: [PATCH 146/333] improve: stabilize comparison artifact ordering for reproducibility Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ scripts/compare-playback-benchmark-runs.js | 18 +++++++++++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 9ebbd6dab2..89093a5d8e 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -391,6 +391,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Finalize summary JSON now includes comparison file stats payload when comparison is enabled. - Publish summary now surfaces finalize baseline/candidate parse error counts from finalize summary metadata. +49. **Stabilized comparison report ordering for reproducibility (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` now deterministically sorts comparison, missing, candidate-only, and insufficient-sample rows. + - Keeps markdown/JSON outputs stable across repeated runs with identical inputs. + --- ## Root Cause Analysis Archive @@ -535,6 +539,7 @@ Decoder Pipeline: 52. Added comparison parse-error gating (`--fail-on-parse-errors`) with parse stats surfaced in comparison JSON, finalize settings, and published summaries. 53. Updated keyed prefetch insert helper to emit structural-change signals for warmup contiguous coverage cache invalidation. 54. Extended finalize summary and publish output with comparison file stats (including parse error counts). +55. Stabilized comparison output ordering with deterministic sorting for comparison rows and coverage-delta sections. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -557,6 +562,7 @@ Decoder Pipeline: - `scripts/analyze-playback-matrix-bottlenecks.js`: added prioritized bottleneck analysis output from matrix JSON evidence. - `scripts/compare-playback-benchmark-runs.js`: added regression-aware baseline/candidate comparison with configurable FPS/startup/scrub tolerances. - `scripts/compare-playback-benchmark-runs.js`: fixed options wiring inside comparison regression checks and now aggregates per-key metrics across multi-input runs with run-count reporting. +- `scripts/compare-playback-benchmark-runs.js`: comparison row sets are now deterministically sorted for stable markdown/json artifact diffs. - `scripts/publish-playback-matrix-summary.js`: added optional baseline-vs-candidate comparison artifact attachment in published summaries. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index 3beec8cd03..39809c17b3 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -264,6 +264,16 @@ function formatNumber(value, digits = 2) { return value === null ? "n/a" : value.toFixed(digits); } +function compareCoverageRows(a, b) { + return ( + a.platform.localeCompare(b.platform) || + a.gpu.localeCompare(b.gpu) || + a.scenario.localeCompare(b.scenario) || + a.recording.localeCompare(b.recording) || + a.format.localeCompare(b.format) + ); +} + function compareMetrics(baselineRows, candidateRows, options) { const comparisons = []; const missingCandidateRows = []; @@ -375,7 +385,13 @@ function compareMetrics(baselineRows, candidateRows, options) { }); } - comparisons.sort((a, b) => b.regressions.length - a.regressions.length); + comparisons.sort( + (a, b) => + b.regressions.length - a.regressions.length || compareCoverageRows(a, b), + ); + missingCandidateRows.sort(compareCoverageRows); + candidateOnlyRows.sort(compareCoverageRows); + insufficientSampleRows.sort(compareCoverageRows); return { comparisons, missingCandidateRows, From 9fadd0fb498c896b94300eb31f335b768d1618f9 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 23:22:11 +0000 Subject: [PATCH 147/333] improve: add comparison count rollups to finalize summaries Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 7 +++++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 1 + scripts/finalize-playback-matrix.js | 8 ++++++++ scripts/publish-playback-matrix-summary.js | 7 +++++++ 5 files changed, 24 insertions(+) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 9b58bc2f80..f2a797d83a 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -89,6 +89,7 @@ node scripts/finalize-playback-matrix.js --input /path/to/json-results --output- Finalize summary JSON includes generated timestamp, artifact paths, settings, pass/fail flags, and git branch/commit metadata when available. Finalize summary JSON also carries comparison failure reasons and gate outcomes when comparison is enabled. Finalize summary JSON includes comparison file stats (including parse error counts) when comparison is enabled. +Finalize summary JSON includes comparison summary counts (compared rows, regressions, missing/candidate-only/insufficient-sample counts) when comparison is enabled. # Publish matrix artifacts into this benchmark history node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --bottlenecks-md /tmp/playback-matrix-final/playback-bottlenecks.md diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 89093a5d8e..fb1e1cb290 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -395,6 +395,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - `scripts/compare-playback-benchmark-runs.js` now deterministically sorts comparison, missing, candidate-only, and insufficient-sample rows. - Keeps markdown/JSON outputs stable across repeated runs with identical inputs. +50. **Extended finalize summary with comparison count rollups (2026-02-13)** + - `scripts/finalize-playback-matrix.js` now captures comparison count rollups in summary results (compared rows, regressions, missing/candidate-only/insufficient-sample counts). + - `scripts/publish-playback-matrix-summary.js` now surfaces these finalize comparison counts in published summaries. + --- ## Root Cause Analysis Archive @@ -540,6 +544,7 @@ Decoder Pipeline: 53. Updated keyed prefetch insert helper to emit structural-change signals for warmup contiguous coverage cache invalidation. 54. Extended finalize summary and publish output with comparison file stats (including parse error counts). 55. Stabilized comparison output ordering with deterministic sorting for comparison rows and coverage-delta sections. +56. Extended finalize and publish summaries with comparison count rollups (compared rows, regressions, missing/candidate-only/insufficient-sample counts). **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -563,6 +568,8 @@ Decoder Pipeline: - `scripts/compare-playback-benchmark-runs.js`: added regression-aware baseline/candidate comparison with configurable FPS/startup/scrub tolerances. - `scripts/compare-playback-benchmark-runs.js`: fixed options wiring inside comparison regression checks and now aggregates per-key metrics across multi-input runs with run-count reporting. - `scripts/compare-playback-benchmark-runs.js`: comparison row sets are now deterministically sorted for stable markdown/json artifact diffs. +- `scripts/finalize-playback-matrix.js`: finalize summary now includes comparison count rollup fields for compared rows, regressions, and coverage deltas. +- `scripts/publish-playback-matrix-summary.js`: publish summary now surfaces finalize comparison count rollups when finalize summary metadata is attached. - `scripts/publish-playback-matrix-summary.js`: added optional baseline-vs-candidate comparison artifact attachment in published summaries. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 62e9b92582..303b010ad1 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -117,6 +117,7 @@ pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-di Finalize summary JSON includes artifact paths, gate outcomes, settings, and git branch/commit metadata when available. When comparison is enabled, finalize summary JSON also includes comparison failure reasons and gate outcome details. When comparison is enabled, finalize summary JSON also includes comparison file stats such as baseline/candidate parse error counts. +When comparison is enabled, finalize summary JSON also includes comparison summary counts for compared rows, regressions, and coverage deltas. Include optimization thresholds when finalizing: diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index fb72687670..6341c59523 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -334,6 +334,14 @@ function main() { results: { validationPassed: validation.passed === true, comparisonPassed: comparison ? comparison.summary?.passed === true : null, + comparisonComparedRows: comparison?.summary?.comparedRows ?? null, + comparisonRegressions: comparison?.summary?.regressions ?? null, + comparisonMissingCandidateRows: + comparison?.summary?.missingCandidateRows ?? null, + comparisonCandidateOnlyRows: + comparison?.summary?.candidateOnlyRows ?? null, + comparisonInsufficientSampleRows: + comparison?.summary?.insufficientSampleRows ?? null, comparisonFailureReasons: comparison?.summary?.failureReasons ?? null, comparisonGateOutcomes: comparison?.summary?.gateOutcomes ?? null, comparisonFileStats: comparison?.fileStats ?? null, diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js index 4837382634..f73c447c1e 100644 --- a/scripts/publish-playback-matrix-summary.js +++ b/scripts/publish-playback-matrix-summary.js @@ -121,6 +121,13 @@ function buildSummarySection( if (finalizeSummary.results?.comparisonPassed !== null) { markdown += `- Finalize comparison passed: ${finalizeSummary.results?.comparisonPassed === true ? "true" : "false"}\n`; } + if (finalizeSummary.results?.comparisonComparedRows !== null) { + markdown += `- Finalize compared rows: ${finalizeSummary.results?.comparisonComparedRows}\n`; + markdown += `- Finalize comparison regressions: ${finalizeSummary.results?.comparisonRegressions ?? "n/a"}\n`; + markdown += `- Finalize missing candidate rows: ${finalizeSummary.results?.comparisonMissingCandidateRows ?? "n/a"}\n`; + markdown += `- Finalize candidate-only rows: ${finalizeSummary.results?.comparisonCandidateOnlyRows ?? "n/a"}\n`; + markdown += `- Finalize insufficient sample rows: ${finalizeSummary.results?.comparisonInsufficientSampleRows ?? "n/a"}\n`; + } const finalizeFileStats = finalizeSummary.results?.comparisonFileStats; if (finalizeFileStats?.baseline || finalizeFileStats?.candidate) { markdown += `- Finalize baseline parse errors: ${finalizeFileStats?.baseline?.parseErrors?.length ?? "n/a"}\n`; From c87ca9e81674b21e751ab8f620bf29d816573ef6 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:22:11 +0000 Subject: [PATCH 148/333] improve: add comparison count rollups to finalize summaries Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 7 +++++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 1 + scripts/finalize-playback-matrix.js | 8 ++++++++ scripts/publish-playback-matrix-summary.js | 7 +++++++ 5 files changed, 24 insertions(+) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 9b58bc2f80..f2a797d83a 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -89,6 +89,7 @@ node scripts/finalize-playback-matrix.js --input /path/to/json-results --output- Finalize summary JSON includes generated timestamp, artifact paths, settings, pass/fail flags, and git branch/commit metadata when available. Finalize summary JSON also carries comparison failure reasons and gate outcomes when comparison is enabled. Finalize summary JSON includes comparison file stats (including parse error counts) when comparison is enabled. +Finalize summary JSON includes comparison summary counts (compared rows, regressions, missing/candidate-only/insufficient-sample counts) when comparison is enabled. # Publish matrix artifacts into this benchmark history node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --bottlenecks-md /tmp/playback-matrix-final/playback-bottlenecks.md diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 89093a5d8e..fb1e1cb290 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -395,6 +395,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - `scripts/compare-playback-benchmark-runs.js` now deterministically sorts comparison, missing, candidate-only, and insufficient-sample rows. - Keeps markdown/JSON outputs stable across repeated runs with identical inputs. +50. **Extended finalize summary with comparison count rollups (2026-02-13)** + - `scripts/finalize-playback-matrix.js` now captures comparison count rollups in summary results (compared rows, regressions, missing/candidate-only/insufficient-sample counts). + - `scripts/publish-playback-matrix-summary.js` now surfaces these finalize comparison counts in published summaries. + --- ## Root Cause Analysis Archive @@ -540,6 +544,7 @@ Decoder Pipeline: 53. Updated keyed prefetch insert helper to emit structural-change signals for warmup contiguous coverage cache invalidation. 54. Extended finalize summary and publish output with comparison file stats (including parse error counts). 55. Stabilized comparison output ordering with deterministic sorting for comparison rows and coverage-delta sections. +56. Extended finalize and publish summaries with comparison count rollups (compared rows, regressions, missing/candidate-only/insufficient-sample counts). **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -563,6 +568,8 @@ Decoder Pipeline: - `scripts/compare-playback-benchmark-runs.js`: added regression-aware baseline/candidate comparison with configurable FPS/startup/scrub tolerances. - `scripts/compare-playback-benchmark-runs.js`: fixed options wiring inside comparison regression checks and now aggregates per-key metrics across multi-input runs with run-count reporting. - `scripts/compare-playback-benchmark-runs.js`: comparison row sets are now deterministically sorted for stable markdown/json artifact diffs. +- `scripts/finalize-playback-matrix.js`: finalize summary now includes comparison count rollup fields for compared rows, regressions, and coverage deltas. +- `scripts/publish-playback-matrix-summary.js`: publish summary now surfaces finalize comparison count rollups when finalize summary metadata is attached. - `scripts/publish-playback-matrix-summary.js`: added optional baseline-vs-candidate comparison artifact attachment in published summaries. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 62e9b92582..303b010ad1 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -117,6 +117,7 @@ pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-di Finalize summary JSON includes artifact paths, gate outcomes, settings, and git branch/commit metadata when available. When comparison is enabled, finalize summary JSON also includes comparison failure reasons and gate outcome details. When comparison is enabled, finalize summary JSON also includes comparison file stats such as baseline/candidate parse error counts. +When comparison is enabled, finalize summary JSON also includes comparison summary counts for compared rows, regressions, and coverage deltas. Include optimization thresholds when finalizing: diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index fb72687670..6341c59523 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -334,6 +334,14 @@ function main() { results: { validationPassed: validation.passed === true, comparisonPassed: comparison ? comparison.summary?.passed === true : null, + comparisonComparedRows: comparison?.summary?.comparedRows ?? null, + comparisonRegressions: comparison?.summary?.regressions ?? null, + comparisonMissingCandidateRows: + comparison?.summary?.missingCandidateRows ?? null, + comparisonCandidateOnlyRows: + comparison?.summary?.candidateOnlyRows ?? null, + comparisonInsufficientSampleRows: + comparison?.summary?.insufficientSampleRows ?? null, comparisonFailureReasons: comparison?.summary?.failureReasons ?? null, comparisonGateOutcomes: comparison?.summary?.gateOutcomes ?? null, comparisonFileStats: comparison?.fileStats ?? null, diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js index 4837382634..f73c447c1e 100644 --- a/scripts/publish-playback-matrix-summary.js +++ b/scripts/publish-playback-matrix-summary.js @@ -121,6 +121,13 @@ function buildSummarySection( if (finalizeSummary.results?.comparisonPassed !== null) { markdown += `- Finalize comparison passed: ${finalizeSummary.results?.comparisonPassed === true ? "true" : "false"}\n`; } + if (finalizeSummary.results?.comparisonComparedRows !== null) { + markdown += `- Finalize compared rows: ${finalizeSummary.results?.comparisonComparedRows}\n`; + markdown += `- Finalize comparison regressions: ${finalizeSummary.results?.comparisonRegressions ?? "n/a"}\n`; + markdown += `- Finalize missing candidate rows: ${finalizeSummary.results?.comparisonMissingCandidateRows ?? "n/a"}\n`; + markdown += `- Finalize candidate-only rows: ${finalizeSummary.results?.comparisonCandidateOnlyRows ?? "n/a"}\n`; + markdown += `- Finalize insufficient sample rows: ${finalizeSummary.results?.comparisonInsufficientSampleRows ?? "n/a"}\n`; + } const finalizeFileStats = finalizeSummary.results?.comparisonFileStats; if (finalizeFileStats?.baseline || finalizeFileStats?.candidate) { markdown += `- Finalize baseline parse errors: ${finalizeFileStats?.baseline?.parseErrors?.length ?? "n/a"}\n`; From 05bbd0abfe0b4a05269c104a4fe09d7d4207ac84 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 23:25:13 +0000 Subject: [PATCH 149/333] improve: add optional zero-comparison gating for matrix compare flows Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 2 ++ crates/editor/PLAYBACK-FINDINGS.md | 8 ++++++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 6 ++++++ scripts/compare-playback-benchmark-runs.js | 13 ++++++++++++- scripts/finalize-playback-matrix.js | 11 ++++++++++- scripts/publish-playback-matrix-summary.js | 1 + 6 files changed, 39 insertions(+), 2 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index f2a797d83a..43e7d2a355 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -83,6 +83,7 @@ node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --ou node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-candidate-only node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --min-samples-per-row 3 node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-parse-errors +node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-zero-compared node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --output-json /tmp/playback-matrix-final/playback-finalize-summary.json node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 --publish-target /workspace/crates/editor/PLAYBACK-BENCHMARKS.md @@ -107,6 +108,7 @@ node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-res node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-candidate-only node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --min-samples-per-row 3 node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-parse-errors +node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-zero-compared node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-comparison.md --output-json /tmp/playback-comparison.json Comparison output reports both baseline rows missing in candidate and candidate-only rows not present in baseline. diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index fb1e1cb290..5ff649e6e6 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -399,6 +399,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - `scripts/finalize-playback-matrix.js` now captures comparison count rollups in summary results (compared rows, regressions, missing/candidate-only/insufficient-sample counts). - `scripts/publish-playback-matrix-summary.js` now surfaces these finalize comparison counts in published summaries. +51. **Added optional zero-comparison gating (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` now supports `--fail-on-zero-compared`. + - Enables strict failure when comparison processing yields zero comparable rows. + - `scripts/finalize-playback-matrix.js` forwards zero-comparison gating option in integrated compare flows. + --- ## Root Cause Analysis Archive @@ -545,6 +550,7 @@ Decoder Pipeline: 54. Extended finalize summary and publish output with comparison file stats (including parse error counts). 55. Stabilized comparison output ordering with deterministic sorting for comparison rows and coverage-delta sections. 56. Extended finalize and publish summaries with comparison count rollups (compared rows, regressions, missing/candidate-only/insufficient-sample counts). +57. Added optional zero-comparison gating (`--fail-on-zero-compared`) for compare/finalize flows and surfaced zero-compare policy in comparison/published summaries. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -568,6 +574,8 @@ Decoder Pipeline: - `scripts/compare-playback-benchmark-runs.js`: added regression-aware baseline/candidate comparison with configurable FPS/startup/scrub tolerances. - `scripts/compare-playback-benchmark-runs.js`: fixed options wiring inside comparison regression checks and now aggregates per-key metrics across multi-input runs with run-count reporting. - `scripts/compare-playback-benchmark-runs.js`: comparison row sets are now deterministically sorted for stable markdown/json artifact diffs. +- `scripts/compare-playback-benchmark-runs.js`: added optional `--fail-on-zero-compared` and zero-compare gate diagnostics in markdown/json outputs. +- `scripts/finalize-playback-matrix.js`: forwards `--fail-on-zero-compared` into compare stage and records policy in finalize summary settings. - `scripts/finalize-playback-matrix.js`: finalize summary now includes comparison count rollup fields for compared rows, regressions, and coverage deltas. - `scripts/publish-playback-matrix-summary.js`: publish summary now surfaces finalize comparison count rollups when finalize summary metadata is attached. - `scripts/publish-playback-matrix-summary.js`: added optional baseline-vs-candidate comparison artifact attachment in published summaries. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 303b010ad1..686ca5efe5 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -138,6 +138,9 @@ pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir # optional: fail comparison gate when any input JSON fails to parse pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-parse-errors + +# optional: fail comparison gate when no comparable rows remain +pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-zero-compared ``` Finalize and publish to benchmark history in one command: @@ -197,6 +200,9 @@ pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate # minimum sample gating uses metrics that are actually comparable for each row # optional: fail comparison gate when any input JSON fails to parse pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-parse-errors + +# optional: fail comparison gate when no comparable rows remain after filtering +pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-zero-compared ``` ## Evidence checklist diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index 39809c17b3..2aae10ca84 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -16,6 +16,7 @@ function parseArgs(argv) { failOnCandidateOnly: false, minSamplesPerRow: 1, failOnParseErrors: false, + failOnZeroCompared: false, }; for (let i = 2; i < argv.length; i++) { @@ -89,6 +90,10 @@ function parseArgs(argv) { options.failOnParseErrors = true; continue; } + if (arg === "--fail-on-zero-compared") { + options.failOnZeroCompared = true; + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -96,7 +101,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline [--baseline ...] --candidate [--candidate ...] [--output ] [--output-json ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--min-samples-per-row 1] [--fail-on-parse-errors] + console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline [--baseline ...] --candidate [--candidate ...] [--output ] [--output-json ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--min-samples-per-row 1] [--fail-on-parse-errors] [--fail-on-zero-compared] Compares baseline and candidate playback matrix JSON outputs and flags regressions. Multiple --baseline and --candidate inputs are supported.`); } @@ -419,6 +424,7 @@ function toMarkdown( md += `Coverage gate: missing_candidate=${options.allowMissingCandidate ? "allow" : "fail"}, candidate_only=${options.failOnCandidateOnly ? "fail" : "allow"}\n\n`; md += `Sample gate: min_samples_per_row>=${options.minSamplesPerRow}\n\n`; md += `Parse gate: parse_errors=${options.failOnParseErrors ? "fail" : "allow"}\n\n`; + md += `Zero-compare gate: compared_rows=${options.failOnZeroCompared ? "fail_if_zero" : "allow"}\n\n`; md += `Baseline files: total=${baselineStats.totalFiles}, parsed=${baselineStats.parsedFiles}, usable=${baselineStats.usableFiles}, skipped=${baselineStats.skippedFiles}, parse_errors=${baselineStats.parseErrors.length}\n`; md += `Candidate files: total=${candidateStats.totalFiles}, parsed=${candidateStats.parsedFiles}, usable=${candidateStats.usableFiles}, skipped=${candidateStats.skippedFiles}, parse_errors=${candidateStats.parseErrors.length}\n\n`; md += `Compared rows: ${comparisons.length}, regressions: ${regressions.length}, missing candidate rows: ${missingCandidateRows.length}, candidate-only rows: ${candidateOnlyRows.length}, insufficient sample rows: ${insufficientSampleRows.length}\n\n`; @@ -519,6 +525,9 @@ function buildJsonOutput( ) { failureReasons.push("parse_errors"); } + if (options.failOnZeroCompared && comparisons.length === 0) { + failureReasons.push("zero_compared_rows"); + } const passed = failureReasons.length === 0; return { generatedAt: new Date().toISOString(), @@ -530,6 +539,7 @@ function buildJsonOutput( failOnCandidateOnly: options.failOnCandidateOnly, minSamplesPerRow: options.minSamplesPerRow, failOnParseErrors: options.failOnParseErrors, + failOnZeroCompared: options.failOnZeroCompared, }, fileStats: { baseline: baselineStats, @@ -554,6 +564,7 @@ function buildJsonOutput( !options.failOnParseErrors || (baselineStats.parseErrors.length === 0 && candidateStats.parseErrors.length === 0), + zeroComparedRows: !options.failOnZeroCompared || comparisons.length > 0, }, }, regressions, diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index 6341c59523..74f50d1254 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -23,6 +23,7 @@ function parseArgs(argv) { failOnCandidateOnly: false, minSamplesPerRow: 1, failOnParseErrors: false, + failOnZeroCompared: false, }; for (let i = 2; i < argv.length; i++) { @@ -142,6 +143,10 @@ function parseArgs(argv) { options.failOnParseErrors = true; continue; } + if (arg === "--fail-on-zero-compared") { + options.failOnZeroCompared = true; + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -149,7 +154,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--output-json ] [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--compare-baseline ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--min-samples-per-row 1] [--fail-on-parse-errors] [--publish-target ] + console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--output-json ] [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--compare-baseline ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--min-samples-per-row 1] [--fail-on-parse-errors] [--fail-on-zero-compared] [--publish-target ] Generates aggregate markdown, status markdown, validation JSON, and bottleneck analysis for collected playback matrix outputs. Optionally compares candidate inputs against baseline inputs and fails on regressions.`); } @@ -282,6 +287,9 @@ function main() { if (options.failOnParseErrors) { compareArgs.push("--fail-on-parse-errors"); } + if (options.failOnZeroCompared) { + compareArgs.push("--fail-on-zero-compared"); + } compareArgs.push("--min-samples-per-row", String(options.minSamplesPerRow)); run("node", compareArgs); } @@ -330,6 +338,7 @@ function main() { failOnCandidateOnly: options.failOnCandidateOnly, minSamplesPerRow: options.minSamplesPerRow, failOnParseErrors: options.failOnParseErrors, + failOnZeroCompared: options.failOnZeroCompared, }, results: { validationPassed: validation.passed === true, diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js index f73c447c1e..94231e6070 100644 --- a/scripts/publish-playback-matrix-summary.js +++ b/scripts/publish-playback-matrix-summary.js @@ -104,6 +104,7 @@ function buildSummarySection( markdown += `- Missing candidate policy: ${comparison.tolerance?.allowMissingCandidate ? "allow" : "fail"}\n`; markdown += `- Candidate-only policy: ${comparison.tolerance?.failOnCandidateOnly ? "fail" : "allow"}\n\n`; markdown += `- Parse error policy: ${comparison.tolerance?.failOnParseErrors ? "fail" : "allow"}\n`; + markdown += `- Zero-compare policy: ${comparison.tolerance?.failOnZeroCompared ? "fail" : "allow"}\n`; markdown += `- Baseline parse errors: ${comparison.fileStats?.baseline?.parseErrors?.length ?? "n/a"}\n`; markdown += `- Candidate parse errors: ${comparison.fileStats?.candidate?.parseErrors?.length ?? "n/a"}\n\n`; const failureReasons = Array.isArray(comparison.summary?.failureReasons) From 8048eb1ee0647a72f4f74d87022c1d0b6b4a69f6 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:25:13 +0000 Subject: [PATCH 150/333] improve: add optional zero-comparison gating for matrix compare flows Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 2 ++ crates/editor/PLAYBACK-FINDINGS.md | 8 ++++++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 6 ++++++ scripts/compare-playback-benchmark-runs.js | 13 ++++++++++++- scripts/finalize-playback-matrix.js | 11 ++++++++++- scripts/publish-playback-matrix-summary.js | 1 + 6 files changed, 39 insertions(+), 2 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index f2a797d83a..43e7d2a355 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -83,6 +83,7 @@ node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --ou node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-candidate-only node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --min-samples-per-row 3 node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-parse-errors +node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-zero-compared node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --output-json /tmp/playback-matrix-final/playback-finalize-summary.json node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 --publish-target /workspace/crates/editor/PLAYBACK-BENCHMARKS.md @@ -107,6 +108,7 @@ node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-res node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-candidate-only node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --min-samples-per-row 3 node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-parse-errors +node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-zero-compared node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-comparison.md --output-json /tmp/playback-comparison.json Comparison output reports both baseline rows missing in candidate and candidate-only rows not present in baseline. diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index fb1e1cb290..5ff649e6e6 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -399,6 +399,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - `scripts/finalize-playback-matrix.js` now captures comparison count rollups in summary results (compared rows, regressions, missing/candidate-only/insufficient-sample counts). - `scripts/publish-playback-matrix-summary.js` now surfaces these finalize comparison counts in published summaries. +51. **Added optional zero-comparison gating (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` now supports `--fail-on-zero-compared`. + - Enables strict failure when comparison processing yields zero comparable rows. + - `scripts/finalize-playback-matrix.js` forwards zero-comparison gating option in integrated compare flows. + --- ## Root Cause Analysis Archive @@ -545,6 +550,7 @@ Decoder Pipeline: 54. Extended finalize summary and publish output with comparison file stats (including parse error counts). 55. Stabilized comparison output ordering with deterministic sorting for comparison rows and coverage-delta sections. 56. Extended finalize and publish summaries with comparison count rollups (compared rows, regressions, missing/candidate-only/insufficient-sample counts). +57. Added optional zero-comparison gating (`--fail-on-zero-compared`) for compare/finalize flows and surfaced zero-compare policy in comparison/published summaries. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -568,6 +574,8 @@ Decoder Pipeline: - `scripts/compare-playback-benchmark-runs.js`: added regression-aware baseline/candidate comparison with configurable FPS/startup/scrub tolerances. - `scripts/compare-playback-benchmark-runs.js`: fixed options wiring inside comparison regression checks and now aggregates per-key metrics across multi-input runs with run-count reporting. - `scripts/compare-playback-benchmark-runs.js`: comparison row sets are now deterministically sorted for stable markdown/json artifact diffs. +- `scripts/compare-playback-benchmark-runs.js`: added optional `--fail-on-zero-compared` and zero-compare gate diagnostics in markdown/json outputs. +- `scripts/finalize-playback-matrix.js`: forwards `--fail-on-zero-compared` into compare stage and records policy in finalize summary settings. - `scripts/finalize-playback-matrix.js`: finalize summary now includes comparison count rollup fields for compared rows, regressions, and coverage deltas. - `scripts/publish-playback-matrix-summary.js`: publish summary now surfaces finalize comparison count rollups when finalize summary metadata is attached. - `scripts/publish-playback-matrix-summary.js`: added optional baseline-vs-candidate comparison artifact attachment in published summaries. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 303b010ad1..686ca5efe5 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -138,6 +138,9 @@ pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir # optional: fail comparison gate when any input JSON fails to parse pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-parse-errors + +# optional: fail comparison gate when no comparable rows remain +pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-zero-compared ``` Finalize and publish to benchmark history in one command: @@ -197,6 +200,9 @@ pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate # minimum sample gating uses metrics that are actually comparable for each row # optional: fail comparison gate when any input JSON fails to parse pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-parse-errors + +# optional: fail comparison gate when no comparable rows remain after filtering +pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-zero-compared ``` ## Evidence checklist diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index 39809c17b3..2aae10ca84 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -16,6 +16,7 @@ function parseArgs(argv) { failOnCandidateOnly: false, minSamplesPerRow: 1, failOnParseErrors: false, + failOnZeroCompared: false, }; for (let i = 2; i < argv.length; i++) { @@ -89,6 +90,10 @@ function parseArgs(argv) { options.failOnParseErrors = true; continue; } + if (arg === "--fail-on-zero-compared") { + options.failOnZeroCompared = true; + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -96,7 +101,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline [--baseline ...] --candidate [--candidate ...] [--output ] [--output-json ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--min-samples-per-row 1] [--fail-on-parse-errors] + console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline [--baseline ...] --candidate [--candidate ...] [--output ] [--output-json ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--min-samples-per-row 1] [--fail-on-parse-errors] [--fail-on-zero-compared] Compares baseline and candidate playback matrix JSON outputs and flags regressions. Multiple --baseline and --candidate inputs are supported.`); } @@ -419,6 +424,7 @@ function toMarkdown( md += `Coverage gate: missing_candidate=${options.allowMissingCandidate ? "allow" : "fail"}, candidate_only=${options.failOnCandidateOnly ? "fail" : "allow"}\n\n`; md += `Sample gate: min_samples_per_row>=${options.minSamplesPerRow}\n\n`; md += `Parse gate: parse_errors=${options.failOnParseErrors ? "fail" : "allow"}\n\n`; + md += `Zero-compare gate: compared_rows=${options.failOnZeroCompared ? "fail_if_zero" : "allow"}\n\n`; md += `Baseline files: total=${baselineStats.totalFiles}, parsed=${baselineStats.parsedFiles}, usable=${baselineStats.usableFiles}, skipped=${baselineStats.skippedFiles}, parse_errors=${baselineStats.parseErrors.length}\n`; md += `Candidate files: total=${candidateStats.totalFiles}, parsed=${candidateStats.parsedFiles}, usable=${candidateStats.usableFiles}, skipped=${candidateStats.skippedFiles}, parse_errors=${candidateStats.parseErrors.length}\n\n`; md += `Compared rows: ${comparisons.length}, regressions: ${regressions.length}, missing candidate rows: ${missingCandidateRows.length}, candidate-only rows: ${candidateOnlyRows.length}, insufficient sample rows: ${insufficientSampleRows.length}\n\n`; @@ -519,6 +525,9 @@ function buildJsonOutput( ) { failureReasons.push("parse_errors"); } + if (options.failOnZeroCompared && comparisons.length === 0) { + failureReasons.push("zero_compared_rows"); + } const passed = failureReasons.length === 0; return { generatedAt: new Date().toISOString(), @@ -530,6 +539,7 @@ function buildJsonOutput( failOnCandidateOnly: options.failOnCandidateOnly, minSamplesPerRow: options.minSamplesPerRow, failOnParseErrors: options.failOnParseErrors, + failOnZeroCompared: options.failOnZeroCompared, }, fileStats: { baseline: baselineStats, @@ -554,6 +564,7 @@ function buildJsonOutput( !options.failOnParseErrors || (baselineStats.parseErrors.length === 0 && candidateStats.parseErrors.length === 0), + zeroComparedRows: !options.failOnZeroCompared || comparisons.length > 0, }, }, regressions, diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index 6341c59523..74f50d1254 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -23,6 +23,7 @@ function parseArgs(argv) { failOnCandidateOnly: false, minSamplesPerRow: 1, failOnParseErrors: false, + failOnZeroCompared: false, }; for (let i = 2; i < argv.length; i++) { @@ -142,6 +143,10 @@ function parseArgs(argv) { options.failOnParseErrors = true; continue; } + if (arg === "--fail-on-zero-compared") { + options.failOnZeroCompared = true; + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -149,7 +154,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--output-json ] [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--compare-baseline ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--min-samples-per-row 1] [--fail-on-parse-errors] [--publish-target ] + console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--output-json ] [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--compare-baseline ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--min-samples-per-row 1] [--fail-on-parse-errors] [--fail-on-zero-compared] [--publish-target ] Generates aggregate markdown, status markdown, validation JSON, and bottleneck analysis for collected playback matrix outputs. Optionally compares candidate inputs against baseline inputs and fails on regressions.`); } @@ -282,6 +287,9 @@ function main() { if (options.failOnParseErrors) { compareArgs.push("--fail-on-parse-errors"); } + if (options.failOnZeroCompared) { + compareArgs.push("--fail-on-zero-compared"); + } compareArgs.push("--min-samples-per-row", String(options.minSamplesPerRow)); run("node", compareArgs); } @@ -330,6 +338,7 @@ function main() { failOnCandidateOnly: options.failOnCandidateOnly, minSamplesPerRow: options.minSamplesPerRow, failOnParseErrors: options.failOnParseErrors, + failOnZeroCompared: options.failOnZeroCompared, }, results: { validationPassed: validation.passed === true, diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js index f73c447c1e..94231e6070 100644 --- a/scripts/publish-playback-matrix-summary.js +++ b/scripts/publish-playback-matrix-summary.js @@ -104,6 +104,7 @@ function buildSummarySection( markdown += `- Missing candidate policy: ${comparison.tolerance?.allowMissingCandidate ? "allow" : "fail"}\n`; markdown += `- Candidate-only policy: ${comparison.tolerance?.failOnCandidateOnly ? "fail" : "allow"}\n\n`; markdown += `- Parse error policy: ${comparison.tolerance?.failOnParseErrors ? "fail" : "allow"}\n`; + markdown += `- Zero-compare policy: ${comparison.tolerance?.failOnZeroCompared ? "fail" : "allow"}\n`; markdown += `- Baseline parse errors: ${comparison.fileStats?.baseline?.parseErrors?.length ?? "n/a"}\n`; markdown += `- Candidate parse errors: ${comparison.fileStats?.candidate?.parseErrors?.length ?? "n/a"}\n\n`; const failureReasons = Array.isArray(comparison.summary?.failureReasons) From 6bc816e4d224ef53eaddeb843ac05472ca1f61e5 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 23:28:59 +0000 Subject: [PATCH 151/333] improve: handle seek updates during playback warmup Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 7 +++++++ crates/editor/src/playback.rs | 23 ++++++++++++++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 5ff649e6e6..437ee37631 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -404,6 +404,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Enables strict failure when comparison processing yields zero comparable rows. - `scripts/finalize-playback-matrix.js` forwards zero-comparison gating option in integrated compare flows. +52. **Added warmup-stage seek handling before playback loop entry (2026-02-13)** + - Warmup loop now consumes seek updates immediately instead of waiting for playback loop start. + - Seek during warmup now resets warmup timers/buffer state and updates frame/audio playhead targets immediately. + - Improves responsiveness when users seek while playback is still warming up. + --- ## Root Cause Analysis Archive @@ -551,6 +556,7 @@ Decoder Pipeline: 55. Stabilized comparison output ordering with deterministic sorting for comparison rows and coverage-delta sections. 56. Extended finalize and publish summaries with comparison count rollups (compared rows, regressions, missing/candidate-only/insufficient-sample counts). 57. Added optional zero-comparison gating (`--fail-on-zero-compared`) for compare/finalize flows and surfaced zero-compare policy in comparison/published summaries. +58. Added warmup-stage seek handling to apply seeks immediately while playback warmup is in progress. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -579,6 +585,7 @@ Decoder Pipeline: - `scripts/finalize-playback-matrix.js`: finalize summary now includes comparison count rollup fields for compared rows, regressions, and coverage deltas. - `scripts/publish-playback-matrix-summary.js`: publish summary now surfaces finalize comparison count rollups when finalize summary metadata is attached. - `scripts/publish-playback-matrix-summary.js`: added optional baseline-vs-candidate comparison artifact attachment in published summaries. +- `crates/editor/src/playback.rs`: warmup loop now handles seek updates immediately, resetting warmup state and updating frame/audio targets before playback loop entry. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index d6bf970227..5735831682 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -526,7 +526,7 @@ impl Playback { .max(Duration::from_millis(200)) .min(Duration::from_millis(700)); let warmup_no_frames_timeout = Duration::from_secs(5); - let warmup_start = Instant::now(); + let mut warmup_start = Instant::now(); let mut first_frame_time: Option = None; let mut warmup_contiguous_prefetched = 0usize; let mut warmup_buffer_changed = false; @@ -585,6 +585,27 @@ impl Playback { } } } + _ = seek_rx.changed() => { + let seek_frame = *seek_rx.borrow_and_update(); + seek_generation = seek_generation.saturating_add(1); + frame_number = seek_frame; + prefetch_buffer.clear(); + frame_cache.cache.clear(); + warmup_contiguous_prefetched = 0; + warmup_buffer_changed = false; + first_frame_time = None; + warmup_start = Instant::now(); + let _ = seek_generation_tx.send(seek_generation); + let _ = frame_request_tx.send(frame_number); + let _ = playback_position_tx.send(frame_number); + if has_audio + && audio_playhead_tx + .send(frame_number as f64 / fps_f64) + .is_err() + { + break; + } + } _ = stop_rx.changed() => { if *stop_rx.borrow() { break; From 46413abbe6abe279be47b6ae287f86d72e63c26f Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:28:59 +0000 Subject: [PATCH 152/333] improve: handle seek updates during playback warmup Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 7 +++++++ crates/editor/src/playback.rs | 23 ++++++++++++++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 5ff649e6e6..437ee37631 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -404,6 +404,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Enables strict failure when comparison processing yields zero comparable rows. - `scripts/finalize-playback-matrix.js` forwards zero-comparison gating option in integrated compare flows. +52. **Added warmup-stage seek handling before playback loop entry (2026-02-13)** + - Warmup loop now consumes seek updates immediately instead of waiting for playback loop start. + - Seek during warmup now resets warmup timers/buffer state and updates frame/audio playhead targets immediately. + - Improves responsiveness when users seek while playback is still warming up. + --- ## Root Cause Analysis Archive @@ -551,6 +556,7 @@ Decoder Pipeline: 55. Stabilized comparison output ordering with deterministic sorting for comparison rows and coverage-delta sections. 56. Extended finalize and publish summaries with comparison count rollups (compared rows, regressions, missing/candidate-only/insufficient-sample counts). 57. Added optional zero-comparison gating (`--fail-on-zero-compared`) for compare/finalize flows and surfaced zero-compare policy in comparison/published summaries. +58. Added warmup-stage seek handling to apply seeks immediately while playback warmup is in progress. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -579,6 +585,7 @@ Decoder Pipeline: - `scripts/finalize-playback-matrix.js`: finalize summary now includes comparison count rollup fields for compared rows, regressions, and coverage deltas. - `scripts/publish-playback-matrix-summary.js`: publish summary now surfaces finalize comparison count rollups when finalize summary metadata is attached. - `scripts/publish-playback-matrix-summary.js`: added optional baseline-vs-candidate comparison artifact attachment in published summaries. +- `crates/editor/src/playback.rs`: warmup loop now handles seek updates immediately, resetting warmup state and updating frame/audio targets before playback loop entry. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index d6bf970227..5735831682 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -526,7 +526,7 @@ impl Playback { .max(Duration::from_millis(200)) .min(Duration::from_millis(700)); let warmup_no_frames_timeout = Duration::from_secs(5); - let warmup_start = Instant::now(); + let mut warmup_start = Instant::now(); let mut first_frame_time: Option = None; let mut warmup_contiguous_prefetched = 0usize; let mut warmup_buffer_changed = false; @@ -585,6 +585,27 @@ impl Playback { } } } + _ = seek_rx.changed() => { + let seek_frame = *seek_rx.borrow_and_update(); + seek_generation = seek_generation.saturating_add(1); + frame_number = seek_frame; + prefetch_buffer.clear(); + frame_cache.cache.clear(); + warmup_contiguous_prefetched = 0; + warmup_buffer_changed = false; + first_frame_time = None; + warmup_start = Instant::now(); + let _ = seek_generation_tx.send(seek_generation); + let _ = frame_request_tx.send(frame_number); + let _ = playback_position_tx.send(frame_number); + if has_audio + && audio_playhead_tx + .send(frame_number as f64 / fps_f64) + .is_err() + { + break; + } + } _ = stop_rx.changed() => { if *stop_rx.borrow() { break; From 1b7a2a5da478ad71b98a52b49fc9cb9479a6a952 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 23:33:02 +0000 Subject: [PATCH 153/333] improve: add skipped-file gating for comparison workflows Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 3 +++ crates/editor/PLAYBACK-FINDINGS.md | 9 +++++++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 6 ++++++ scripts/compare-playback-benchmark-runs.js | 19 ++++++++++++++++++- scripts/finalize-playback-matrix.js | 11 ++++++++++- scripts/publish-playback-matrix-summary.js | 1 + 6 files changed, 47 insertions(+), 2 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 43e7d2a355..6b3737b4e7 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -84,6 +84,7 @@ node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --ou node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --min-samples-per-row 3 node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-parse-errors node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-zero-compared +node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-skipped-files node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --output-json /tmp/playback-matrix-final/playback-finalize-summary.json node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 --publish-target /workspace/crates/editor/PLAYBACK-BENCHMARKS.md @@ -109,6 +110,7 @@ node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-res node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --min-samples-per-row 3 node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-parse-errors node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-zero-compared +node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-skipped-files node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-comparison.md --output-json /tmp/playback-comparison.json Comparison output reports both baseline rows missing in candidate and candidate-only rows not present in baseline. @@ -116,6 +118,7 @@ Comparison table also reports baseline/candidate run counts per row when multipl Comparison JSON summary includes `failureReasons` and `gateOutcomes` fields for automation-friendly gate diagnostics. Minimum sample gating is applied against metrics that are actually comparable for the row (for example, scrub samples are not required for rows with no scrub metric comparison). Comparison JSON also includes baseline/candidate file parsing stats and optional parse-error gating support. +Comparison can optionally gate on skipped input files via `--fail-on-skipped-files`. ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 437ee37631..be3ceae240 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -409,6 +409,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Seek during warmup now resets warmup timers/buffer state and updates frame/audio playhead targets immediately. - Improves responsiveness when users seek while playback is still warming up. +53. **Added optional skipped-file gating for comparison workflows (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` now supports `--fail-on-skipped-files`. + - Enables strict failure when baseline/candidate inputs include skipped JSON files without usable benchmark payloads. + - `scripts/finalize-playback-matrix.js` forwards skipped-file gating option in integrated compare flows. + --- ## Root Cause Analysis Archive @@ -557,6 +562,7 @@ Decoder Pipeline: 56. Extended finalize and publish summaries with comparison count rollups (compared rows, regressions, missing/candidate-only/insufficient-sample counts). 57. Added optional zero-comparison gating (`--fail-on-zero-compared`) for compare/finalize flows and surfaced zero-compare policy in comparison/published summaries. 58. Added warmup-stage seek handling to apply seeks immediately while playback warmup is in progress. +59. Added optional skipped-file gating (`--fail-on-skipped-files`) for compare/finalize flows and surfaced skipped-file policy in comparison/published summaries. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -593,7 +599,9 @@ Decoder Pipeline: - `scripts/compare-playback-benchmark-runs.js`: comparison outputs now include candidate-only rows in addition to missing-candidate coverage deltas. - `scripts/finalize-playback-matrix.js`: baseline comparison flow now writes both `playback-comparison.md` and `playback-comparison.json`. - `scripts/compare-playback-benchmark-runs.js`: added optional strict `--fail-on-candidate-only` coverage gate and surfaced coverage gate mode in comparison markdown output. +- `scripts/compare-playback-benchmark-runs.js`: added optional strict `--fail-on-skipped-files` gate and parse/skip policy reporting in comparison markdown/json outputs. - `scripts/finalize-playback-matrix.js`: added passthrough support for strict `--fail-on-candidate-only` compare mode in one-shot finalize workflows. +- `scripts/finalize-playback-matrix.js`: forwards `--fail-on-skipped-files` into compare stage and records skipped-file policy in finalize summary settings. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes candidate-only row count from comparison JSON summary. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes missing-candidate and candidate-only coverage policy modes from comparison JSON tolerance settings. - `crates/editor/src/playback.rs`: warmup readiness now requires contiguous prefetched frame coverage from current frame instead of raw buffer length threshold. @@ -613,6 +621,7 @@ Decoder Pipeline: - `scripts/compare-playback-benchmark-runs.js`: added parse-error gating and baseline/candidate file parse stats/parse error entries in JSON output. - `scripts/finalize-playback-matrix.js`: forwards parse-error gating and records parse-error policy in finalize summary settings. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes parse policy and baseline/candidate parse error counts. +- `scripts/publish-playback-matrix-summary.js`: published comparison status now includes skipped-file policy mode from comparison tolerance settings. - `crates/editor/src/playback.rs`: `insert_prefetched_frame` now returns structural-change signals and warmup cache invalidation uses this signal to avoid stale contiguous counts when insert+trim keeps buffer length unchanged. - `scripts/finalize-playback-matrix.js`: finalize summary now includes comparison file stats payload when comparison runs are enabled. - `scripts/publish-playback-matrix-summary.js`: publish summary now surfaces finalize baseline/candidate parse error counts from finalize summary metadata. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 686ca5efe5..a2fd851e59 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -141,6 +141,9 @@ pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir # optional: fail comparison gate when no comparable rows remain pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-zero-compared + +# optional: fail comparison gate when any input JSON is skipped +pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-skipped-files ``` Finalize and publish to benchmark history in one command: @@ -203,6 +206,9 @@ pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate # optional: fail comparison gate when no comparable rows remain after filtering pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-zero-compared + +# optional: fail comparison gate when any input JSON is skipped (no reports/usable metrics) +pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-skipped-files ``` ## Evidence checklist diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index 2aae10ca84..dcc263bbbb 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -17,6 +17,7 @@ function parseArgs(argv) { minSamplesPerRow: 1, failOnParseErrors: false, failOnZeroCompared: false, + failOnSkippedFiles: false, }; for (let i = 2; i < argv.length; i++) { @@ -94,6 +95,10 @@ function parseArgs(argv) { options.failOnZeroCompared = true; continue; } + if (arg === "--fail-on-skipped-files") { + options.failOnSkippedFiles = true; + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -101,7 +106,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline [--baseline ...] --candidate [--candidate ...] [--output ] [--output-json ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--min-samples-per-row 1] [--fail-on-parse-errors] [--fail-on-zero-compared] + console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline [--baseline ...] --candidate [--candidate ...] [--output ] [--output-json ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--min-samples-per-row 1] [--fail-on-parse-errors] [--fail-on-zero-compared] [--fail-on-skipped-files] Compares baseline and candidate playback matrix JSON outputs and flags regressions. Multiple --baseline and --candidate inputs are supported.`); } @@ -425,6 +430,7 @@ function toMarkdown( md += `Sample gate: min_samples_per_row>=${options.minSamplesPerRow}\n\n`; md += `Parse gate: parse_errors=${options.failOnParseErrors ? "fail" : "allow"}\n\n`; md += `Zero-compare gate: compared_rows=${options.failOnZeroCompared ? "fail_if_zero" : "allow"}\n\n`; + md += `Skipped-file gate: skipped_files=${options.failOnSkippedFiles ? "fail" : "allow"}\n\n`; md += `Baseline files: total=${baselineStats.totalFiles}, parsed=${baselineStats.parsedFiles}, usable=${baselineStats.usableFiles}, skipped=${baselineStats.skippedFiles}, parse_errors=${baselineStats.parseErrors.length}\n`; md += `Candidate files: total=${candidateStats.totalFiles}, parsed=${candidateStats.parsedFiles}, usable=${candidateStats.usableFiles}, skipped=${candidateStats.skippedFiles}, parse_errors=${candidateStats.parseErrors.length}\n\n`; md += `Compared rows: ${comparisons.length}, regressions: ${regressions.length}, missing candidate rows: ${missingCandidateRows.length}, candidate-only rows: ${candidateOnlyRows.length}, insufficient sample rows: ${insufficientSampleRows.length}\n\n`; @@ -528,6 +534,12 @@ function buildJsonOutput( if (options.failOnZeroCompared && comparisons.length === 0) { failureReasons.push("zero_compared_rows"); } + if ( + options.failOnSkippedFiles && + (baselineStats.skippedFiles > 0 || candidateStats.skippedFiles > 0) + ) { + failureReasons.push("skipped_files"); + } const passed = failureReasons.length === 0; return { generatedAt: new Date().toISOString(), @@ -540,6 +552,7 @@ function buildJsonOutput( minSamplesPerRow: options.minSamplesPerRow, failOnParseErrors: options.failOnParseErrors, failOnZeroCompared: options.failOnZeroCompared, + failOnSkippedFiles: options.failOnSkippedFiles, }, fileStats: { baseline: baselineStats, @@ -565,6 +578,10 @@ function buildJsonOutput( (baselineStats.parseErrors.length === 0 && candidateStats.parseErrors.length === 0), zeroComparedRows: !options.failOnZeroCompared || comparisons.length > 0, + skippedFiles: + !options.failOnSkippedFiles || + (baselineStats.skippedFiles === 0 && + candidateStats.skippedFiles === 0), }, }, regressions, diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index 74f50d1254..3c5dc0eb0b 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -24,6 +24,7 @@ function parseArgs(argv) { minSamplesPerRow: 1, failOnParseErrors: false, failOnZeroCompared: false, + failOnSkippedFiles: false, }; for (let i = 2; i < argv.length; i++) { @@ -147,6 +148,10 @@ function parseArgs(argv) { options.failOnZeroCompared = true; continue; } + if (arg === "--fail-on-skipped-files") { + options.failOnSkippedFiles = true; + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -154,7 +159,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--output-json ] [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--compare-baseline ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--min-samples-per-row 1] [--fail-on-parse-errors] [--fail-on-zero-compared] [--publish-target ] + console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--output-json ] [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--compare-baseline ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--min-samples-per-row 1] [--fail-on-parse-errors] [--fail-on-zero-compared] [--fail-on-skipped-files] [--publish-target ] Generates aggregate markdown, status markdown, validation JSON, and bottleneck analysis for collected playback matrix outputs. Optionally compares candidate inputs against baseline inputs and fails on regressions.`); } @@ -290,6 +295,9 @@ function main() { if (options.failOnZeroCompared) { compareArgs.push("--fail-on-zero-compared"); } + if (options.failOnSkippedFiles) { + compareArgs.push("--fail-on-skipped-files"); + } compareArgs.push("--min-samples-per-row", String(options.minSamplesPerRow)); run("node", compareArgs); } @@ -339,6 +347,7 @@ function main() { minSamplesPerRow: options.minSamplesPerRow, failOnParseErrors: options.failOnParseErrors, failOnZeroCompared: options.failOnZeroCompared, + failOnSkippedFiles: options.failOnSkippedFiles, }, results: { validationPassed: validation.passed === true, diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js index 94231e6070..47b6d12148 100644 --- a/scripts/publish-playback-matrix-summary.js +++ b/scripts/publish-playback-matrix-summary.js @@ -105,6 +105,7 @@ function buildSummarySection( markdown += `- Candidate-only policy: ${comparison.tolerance?.failOnCandidateOnly ? "fail" : "allow"}\n\n`; markdown += `- Parse error policy: ${comparison.tolerance?.failOnParseErrors ? "fail" : "allow"}\n`; markdown += `- Zero-compare policy: ${comparison.tolerance?.failOnZeroCompared ? "fail" : "allow"}\n`; + markdown += `- Skipped-file policy: ${comparison.tolerance?.failOnSkippedFiles ? "fail" : "allow"}\n`; markdown += `- Baseline parse errors: ${comparison.fileStats?.baseline?.parseErrors?.length ?? "n/a"}\n`; markdown += `- Candidate parse errors: ${comparison.fileStats?.candidate?.parseErrors?.length ?? "n/a"}\n\n`; const failureReasons = Array.isArray(comparison.summary?.failureReasons) From 7567a1f351e515fe934bce238a76d5dd507b4c97 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:33:02 +0000 Subject: [PATCH 154/333] improve: add skipped-file gating for comparison workflows Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 3 +++ crates/editor/PLAYBACK-FINDINGS.md | 9 +++++++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 6 ++++++ scripts/compare-playback-benchmark-runs.js | 19 ++++++++++++++++++- scripts/finalize-playback-matrix.js | 11 ++++++++++- scripts/publish-playback-matrix-summary.js | 1 + 6 files changed, 47 insertions(+), 2 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 43e7d2a355..6b3737b4e7 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -84,6 +84,7 @@ node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --ou node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --min-samples-per-row 3 node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-parse-errors node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-zero-compared +node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-skipped-files node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --output-json /tmp/playback-matrix-final/playback-finalize-summary.json node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 --publish-target /workspace/crates/editor/PLAYBACK-BENCHMARKS.md @@ -109,6 +110,7 @@ node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-res node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --min-samples-per-row 3 node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-parse-errors node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-zero-compared +node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-skipped-files node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-comparison.md --output-json /tmp/playback-comparison.json Comparison output reports both baseline rows missing in candidate and candidate-only rows not present in baseline. @@ -116,6 +118,7 @@ Comparison table also reports baseline/candidate run counts per row when multipl Comparison JSON summary includes `failureReasons` and `gateOutcomes` fields for automation-friendly gate diagnostics. Minimum sample gating is applied against metrics that are actually comparable for the row (for example, scrub samples are not required for rows with no scrub metric comparison). Comparison JSON also includes baseline/candidate file parsing stats and optional parse-error gating support. +Comparison can optionally gate on skipped input files via `--fail-on-skipped-files`. ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 437ee37631..be3ceae240 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -409,6 +409,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Seek during warmup now resets warmup timers/buffer state and updates frame/audio playhead targets immediately. - Improves responsiveness when users seek while playback is still warming up. +53. **Added optional skipped-file gating for comparison workflows (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` now supports `--fail-on-skipped-files`. + - Enables strict failure when baseline/candidate inputs include skipped JSON files without usable benchmark payloads. + - `scripts/finalize-playback-matrix.js` forwards skipped-file gating option in integrated compare flows. + --- ## Root Cause Analysis Archive @@ -557,6 +562,7 @@ Decoder Pipeline: 56. Extended finalize and publish summaries with comparison count rollups (compared rows, regressions, missing/candidate-only/insufficient-sample counts). 57. Added optional zero-comparison gating (`--fail-on-zero-compared`) for compare/finalize flows and surfaced zero-compare policy in comparison/published summaries. 58. Added warmup-stage seek handling to apply seeks immediately while playback warmup is in progress. +59. Added optional skipped-file gating (`--fail-on-skipped-files`) for compare/finalize flows and surfaced skipped-file policy in comparison/published summaries. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -593,7 +599,9 @@ Decoder Pipeline: - `scripts/compare-playback-benchmark-runs.js`: comparison outputs now include candidate-only rows in addition to missing-candidate coverage deltas. - `scripts/finalize-playback-matrix.js`: baseline comparison flow now writes both `playback-comparison.md` and `playback-comparison.json`. - `scripts/compare-playback-benchmark-runs.js`: added optional strict `--fail-on-candidate-only` coverage gate and surfaced coverage gate mode in comparison markdown output. +- `scripts/compare-playback-benchmark-runs.js`: added optional strict `--fail-on-skipped-files` gate and parse/skip policy reporting in comparison markdown/json outputs. - `scripts/finalize-playback-matrix.js`: added passthrough support for strict `--fail-on-candidate-only` compare mode in one-shot finalize workflows. +- `scripts/finalize-playback-matrix.js`: forwards `--fail-on-skipped-files` into compare stage and records skipped-file policy in finalize summary settings. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes candidate-only row count from comparison JSON summary. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes missing-candidate and candidate-only coverage policy modes from comparison JSON tolerance settings. - `crates/editor/src/playback.rs`: warmup readiness now requires contiguous prefetched frame coverage from current frame instead of raw buffer length threshold. @@ -613,6 +621,7 @@ Decoder Pipeline: - `scripts/compare-playback-benchmark-runs.js`: added parse-error gating and baseline/candidate file parse stats/parse error entries in JSON output. - `scripts/finalize-playback-matrix.js`: forwards parse-error gating and records parse-error policy in finalize summary settings. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes parse policy and baseline/candidate parse error counts. +- `scripts/publish-playback-matrix-summary.js`: published comparison status now includes skipped-file policy mode from comparison tolerance settings. - `crates/editor/src/playback.rs`: `insert_prefetched_frame` now returns structural-change signals and warmup cache invalidation uses this signal to avoid stale contiguous counts when insert+trim keeps buffer length unchanged. - `scripts/finalize-playback-matrix.js`: finalize summary now includes comparison file stats payload when comparison runs are enabled. - `scripts/publish-playback-matrix-summary.js`: publish summary now surfaces finalize baseline/candidate parse error counts from finalize summary metadata. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 686ca5efe5..a2fd851e59 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -141,6 +141,9 @@ pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir # optional: fail comparison gate when no comparable rows remain pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-zero-compared + +# optional: fail comparison gate when any input JSON is skipped +pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-skipped-files ``` Finalize and publish to benchmark history in one command: @@ -203,6 +206,9 @@ pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate # optional: fail comparison gate when no comparable rows remain after filtering pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-zero-compared + +# optional: fail comparison gate when any input JSON is skipped (no reports/usable metrics) +pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-skipped-files ``` ## Evidence checklist diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index 2aae10ca84..dcc263bbbb 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -17,6 +17,7 @@ function parseArgs(argv) { minSamplesPerRow: 1, failOnParseErrors: false, failOnZeroCompared: false, + failOnSkippedFiles: false, }; for (let i = 2; i < argv.length; i++) { @@ -94,6 +95,10 @@ function parseArgs(argv) { options.failOnZeroCompared = true; continue; } + if (arg === "--fail-on-skipped-files") { + options.failOnSkippedFiles = true; + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -101,7 +106,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline [--baseline ...] --candidate [--candidate ...] [--output ] [--output-json ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--min-samples-per-row 1] [--fail-on-parse-errors] [--fail-on-zero-compared] + console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline [--baseline ...] --candidate [--candidate ...] [--output ] [--output-json ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--min-samples-per-row 1] [--fail-on-parse-errors] [--fail-on-zero-compared] [--fail-on-skipped-files] Compares baseline and candidate playback matrix JSON outputs and flags regressions. Multiple --baseline and --candidate inputs are supported.`); } @@ -425,6 +430,7 @@ function toMarkdown( md += `Sample gate: min_samples_per_row>=${options.minSamplesPerRow}\n\n`; md += `Parse gate: parse_errors=${options.failOnParseErrors ? "fail" : "allow"}\n\n`; md += `Zero-compare gate: compared_rows=${options.failOnZeroCompared ? "fail_if_zero" : "allow"}\n\n`; + md += `Skipped-file gate: skipped_files=${options.failOnSkippedFiles ? "fail" : "allow"}\n\n`; md += `Baseline files: total=${baselineStats.totalFiles}, parsed=${baselineStats.parsedFiles}, usable=${baselineStats.usableFiles}, skipped=${baselineStats.skippedFiles}, parse_errors=${baselineStats.parseErrors.length}\n`; md += `Candidate files: total=${candidateStats.totalFiles}, parsed=${candidateStats.parsedFiles}, usable=${candidateStats.usableFiles}, skipped=${candidateStats.skippedFiles}, parse_errors=${candidateStats.parseErrors.length}\n\n`; md += `Compared rows: ${comparisons.length}, regressions: ${regressions.length}, missing candidate rows: ${missingCandidateRows.length}, candidate-only rows: ${candidateOnlyRows.length}, insufficient sample rows: ${insufficientSampleRows.length}\n\n`; @@ -528,6 +534,12 @@ function buildJsonOutput( if (options.failOnZeroCompared && comparisons.length === 0) { failureReasons.push("zero_compared_rows"); } + if ( + options.failOnSkippedFiles && + (baselineStats.skippedFiles > 0 || candidateStats.skippedFiles > 0) + ) { + failureReasons.push("skipped_files"); + } const passed = failureReasons.length === 0; return { generatedAt: new Date().toISOString(), @@ -540,6 +552,7 @@ function buildJsonOutput( minSamplesPerRow: options.minSamplesPerRow, failOnParseErrors: options.failOnParseErrors, failOnZeroCompared: options.failOnZeroCompared, + failOnSkippedFiles: options.failOnSkippedFiles, }, fileStats: { baseline: baselineStats, @@ -565,6 +578,10 @@ function buildJsonOutput( (baselineStats.parseErrors.length === 0 && candidateStats.parseErrors.length === 0), zeroComparedRows: !options.failOnZeroCompared || comparisons.length > 0, + skippedFiles: + !options.failOnSkippedFiles || + (baselineStats.skippedFiles === 0 && + candidateStats.skippedFiles === 0), }, }, regressions, diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index 74f50d1254..3c5dc0eb0b 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -24,6 +24,7 @@ function parseArgs(argv) { minSamplesPerRow: 1, failOnParseErrors: false, failOnZeroCompared: false, + failOnSkippedFiles: false, }; for (let i = 2; i < argv.length; i++) { @@ -147,6 +148,10 @@ function parseArgs(argv) { options.failOnZeroCompared = true; continue; } + if (arg === "--fail-on-skipped-files") { + options.failOnSkippedFiles = true; + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -154,7 +159,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--output-json ] [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--compare-baseline ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--min-samples-per-row 1] [--fail-on-parse-errors] [--fail-on-zero-compared] [--publish-target ] + console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--output-json ] [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--compare-baseline ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--min-samples-per-row 1] [--fail-on-parse-errors] [--fail-on-zero-compared] [--fail-on-skipped-files] [--publish-target ] Generates aggregate markdown, status markdown, validation JSON, and bottleneck analysis for collected playback matrix outputs. Optionally compares candidate inputs against baseline inputs and fails on regressions.`); } @@ -290,6 +295,9 @@ function main() { if (options.failOnZeroCompared) { compareArgs.push("--fail-on-zero-compared"); } + if (options.failOnSkippedFiles) { + compareArgs.push("--fail-on-skipped-files"); + } compareArgs.push("--min-samples-per-row", String(options.minSamplesPerRow)); run("node", compareArgs); } @@ -339,6 +347,7 @@ function main() { minSamplesPerRow: options.minSamplesPerRow, failOnParseErrors: options.failOnParseErrors, failOnZeroCompared: options.failOnZeroCompared, + failOnSkippedFiles: options.failOnSkippedFiles, }, results: { validationPassed: validation.passed === true, diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js index 94231e6070..47b6d12148 100644 --- a/scripts/publish-playback-matrix-summary.js +++ b/scripts/publish-playback-matrix-summary.js @@ -105,6 +105,7 @@ function buildSummarySection( markdown += `- Candidate-only policy: ${comparison.tolerance?.failOnCandidateOnly ? "fail" : "allow"}\n\n`; markdown += `- Parse error policy: ${comparison.tolerance?.failOnParseErrors ? "fail" : "allow"}\n`; markdown += `- Zero-compare policy: ${comparison.tolerance?.failOnZeroCompared ? "fail" : "allow"}\n`; + markdown += `- Skipped-file policy: ${comparison.tolerance?.failOnSkippedFiles ? "fail" : "allow"}\n`; markdown += `- Baseline parse errors: ${comparison.fileStats?.baseline?.parseErrors?.length ?? "n/a"}\n`; markdown += `- Candidate parse errors: ${comparison.fileStats?.candidate?.parseErrors?.length ?? "n/a"}\n\n`; const failureReasons = Array.isArray(comparison.summary?.failureReasons) From baace4f2e1657c10d14bf1cd667db00a4c093251 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 23:35:09 +0000 Subject: [PATCH 155/333] improve: add skipped-file reason breakdown to comparison diagnostics Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 7 +++++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 2 ++ scripts/compare-playback-benchmark-runs.js | 8 ++++++-- scripts/publish-playback-matrix-summary.js | 6 ++++++ 5 files changed, 22 insertions(+), 2 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 6b3737b4e7..c06f822c5d 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -119,6 +119,7 @@ Comparison JSON summary includes `failureReasons` and `gateOutcomes` fields for Minimum sample gating is applied against metrics that are actually comparable for the row (for example, scrub samples are not required for rows with no scrub metric comparison). Comparison JSON also includes baseline/candidate file parsing stats and optional parse-error gating support. Comparison can optionally gate on skipped input files via `--fail-on-skipped-files`. +Comparison file stats now include skipped-file breakdown for `no_reports` and `no_usable_metrics`. ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index be3ceae240..674d267922 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -414,6 +414,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Enables strict failure when baseline/candidate inputs include skipped JSON files without usable benchmark payloads. - `scripts/finalize-playback-matrix.js` forwards skipped-file gating option in integrated compare flows. +54. **Added skipped-file reason breakdown in comparison file stats (2026-02-13)** + - Comparison file stats now report skipped-file reasons as `skippedNoReports` and `skippedNoUsableMetrics`. + - Published summary now surfaces skipped-file breakdown for baseline and candidate inputs. + --- ## Root Cause Analysis Archive @@ -563,6 +567,7 @@ Decoder Pipeline: 57. Added optional zero-comparison gating (`--fail-on-zero-compared`) for compare/finalize flows and surfaced zero-compare policy in comparison/published summaries. 58. Added warmup-stage seek handling to apply seeks immediately while playback warmup is in progress. 59. Added optional skipped-file gating (`--fail-on-skipped-files`) for compare/finalize flows and surfaced skipped-file policy in comparison/published summaries. +60. Added skipped-file reason breakdown (`skippedNoReports`, `skippedNoUsableMetrics`) into comparison file stats and published summaries. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -600,6 +605,7 @@ Decoder Pipeline: - `scripts/finalize-playback-matrix.js`: baseline comparison flow now writes both `playback-comparison.md` and `playback-comparison.json`. - `scripts/compare-playback-benchmark-runs.js`: added optional strict `--fail-on-candidate-only` coverage gate and surfaced coverage gate mode in comparison markdown output. - `scripts/compare-playback-benchmark-runs.js`: added optional strict `--fail-on-skipped-files` gate and parse/skip policy reporting in comparison markdown/json outputs. +- `scripts/compare-playback-benchmark-runs.js`: comparison file stats now include skipped-file reason breakdown (`skippedNoReports`, `skippedNoUsableMetrics`). - `scripts/finalize-playback-matrix.js`: added passthrough support for strict `--fail-on-candidate-only` compare mode in one-shot finalize workflows. - `scripts/finalize-playback-matrix.js`: forwards `--fail-on-skipped-files` into compare stage and records skipped-file policy in finalize summary settings. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes candidate-only row count from comparison JSON summary. @@ -622,6 +628,7 @@ Decoder Pipeline: - `scripts/finalize-playback-matrix.js`: forwards parse-error gating and records parse-error policy in finalize summary settings. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes parse policy and baseline/candidate parse error counts. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes skipped-file policy mode from comparison tolerance settings. +- `scripts/publish-playback-matrix-summary.js`: published comparison status now includes skipped-file breakdown counts for no-reports and no-usable-metrics cases. - `crates/editor/src/playback.rs`: `insert_prefetched_frame` now returns structural-change signals and warmup cache invalidation uses this signal to avoid stale contiguous counts when insert+trim keeps buffer length unchanged. - `scripts/finalize-playback-matrix.js`: finalize summary now includes comparison file stats payload when comparison runs are enabled. - `scripts/publish-playback-matrix-summary.js`: publish summary now surfaces finalize baseline/candidate parse error counts from finalize summary metadata. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index a2fd851e59..b4302f8808 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -209,6 +209,8 @@ pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate # optional: fail comparison gate when any input JSON is skipped (no reports/usable metrics) pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-skipped-files + +# comparison file stats include skipped-file breakdown (no_reports / no_usable_metrics) ``` ## Evidence checklist diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index dcc263bbbb..96b5e9432e 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -161,6 +161,8 @@ function collectMetrics(files) { parsedFiles: 0, usableFiles: 0, skippedFiles: 0, + skippedNoReports: 0, + skippedNoUsableMetrics: 0, parseErrors: [], }; @@ -179,6 +181,7 @@ function collectMetrics(files) { if (!Array.isArray(parsed.reports) || parsed.reports.length === 0) { stats.skippedFiles += 1; + stats.skippedNoReports += 1; continue; } @@ -240,6 +243,7 @@ function collectMetrics(files) { stats.usableFiles += 1; } else { stats.skippedFiles += 1; + stats.skippedNoUsableMetrics += 1; } } @@ -431,8 +435,8 @@ function toMarkdown( md += `Parse gate: parse_errors=${options.failOnParseErrors ? "fail" : "allow"}\n\n`; md += `Zero-compare gate: compared_rows=${options.failOnZeroCompared ? "fail_if_zero" : "allow"}\n\n`; md += `Skipped-file gate: skipped_files=${options.failOnSkippedFiles ? "fail" : "allow"}\n\n`; - md += `Baseline files: total=${baselineStats.totalFiles}, parsed=${baselineStats.parsedFiles}, usable=${baselineStats.usableFiles}, skipped=${baselineStats.skippedFiles}, parse_errors=${baselineStats.parseErrors.length}\n`; - md += `Candidate files: total=${candidateStats.totalFiles}, parsed=${candidateStats.parsedFiles}, usable=${candidateStats.usableFiles}, skipped=${candidateStats.skippedFiles}, parse_errors=${candidateStats.parseErrors.length}\n\n`; + md += `Baseline files: total=${baselineStats.totalFiles}, parsed=${baselineStats.parsedFiles}, usable=${baselineStats.usableFiles}, skipped=${baselineStats.skippedFiles}, skipped_no_reports=${baselineStats.skippedNoReports}, skipped_no_metrics=${baselineStats.skippedNoUsableMetrics}, parse_errors=${baselineStats.parseErrors.length}\n`; + md += `Candidate files: total=${candidateStats.totalFiles}, parsed=${candidateStats.parsedFiles}, usable=${candidateStats.usableFiles}, skipped=${candidateStats.skippedFiles}, skipped_no_reports=${candidateStats.skippedNoReports}, skipped_no_metrics=${candidateStats.skippedNoUsableMetrics}, parse_errors=${candidateStats.parseErrors.length}\n\n`; md += `Compared rows: ${comparisons.length}, regressions: ${regressions.length}, missing candidate rows: ${missingCandidateRows.length}, candidate-only rows: ${candidateOnlyRows.length}, insufficient sample rows: ${insufficientSampleRows.length}\n\n`; if ( baselineStats.parseErrors.length > 0 || diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js index 47b6d12148..55f6b5b4cc 100644 --- a/scripts/publish-playback-matrix-summary.js +++ b/scripts/publish-playback-matrix-summary.js @@ -108,6 +108,12 @@ function buildSummarySection( markdown += `- Skipped-file policy: ${comparison.tolerance?.failOnSkippedFiles ? "fail" : "allow"}\n`; markdown += `- Baseline parse errors: ${comparison.fileStats?.baseline?.parseErrors?.length ?? "n/a"}\n`; markdown += `- Candidate parse errors: ${comparison.fileStats?.candidate?.parseErrors?.length ?? "n/a"}\n\n`; + markdown += `- Baseline skipped files: ${comparison.fileStats?.baseline?.skippedFiles ?? "n/a"}\n`; + markdown += `- Candidate skipped files: ${comparison.fileStats?.candidate?.skippedFiles ?? "n/a"}\n`; + markdown += `- Baseline skipped (no reports): ${comparison.fileStats?.baseline?.skippedNoReports ?? "n/a"}\n`; + markdown += `- Candidate skipped (no reports): ${comparison.fileStats?.candidate?.skippedNoReports ?? "n/a"}\n`; + markdown += `- Baseline skipped (no usable metrics): ${comparison.fileStats?.baseline?.skippedNoUsableMetrics ?? "n/a"}\n`; + markdown += `- Candidate skipped (no usable metrics): ${comparison.fileStats?.candidate?.skippedNoUsableMetrics ?? "n/a"}\n\n`; const failureReasons = Array.isArray(comparison.summary?.failureReasons) ? comparison.summary.failureReasons : []; From e98a64ef43d7a4a142edc12b927b4cb2cd8490e5 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:35:09 +0000 Subject: [PATCH 156/333] improve: add skipped-file reason breakdown to comparison diagnostics Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 7 +++++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 2 ++ scripts/compare-playback-benchmark-runs.js | 8 ++++++-- scripts/publish-playback-matrix-summary.js | 6 ++++++ 5 files changed, 22 insertions(+), 2 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 6b3737b4e7..c06f822c5d 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -119,6 +119,7 @@ Comparison JSON summary includes `failureReasons` and `gateOutcomes` fields for Minimum sample gating is applied against metrics that are actually comparable for the row (for example, scrub samples are not required for rows with no scrub metric comparison). Comparison JSON also includes baseline/candidate file parsing stats and optional parse-error gating support. Comparison can optionally gate on skipped input files via `--fail-on-skipped-files`. +Comparison file stats now include skipped-file breakdown for `no_reports` and `no_usable_metrics`. ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index be3ceae240..674d267922 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -414,6 +414,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Enables strict failure when baseline/candidate inputs include skipped JSON files without usable benchmark payloads. - `scripts/finalize-playback-matrix.js` forwards skipped-file gating option in integrated compare flows. +54. **Added skipped-file reason breakdown in comparison file stats (2026-02-13)** + - Comparison file stats now report skipped-file reasons as `skippedNoReports` and `skippedNoUsableMetrics`. + - Published summary now surfaces skipped-file breakdown for baseline and candidate inputs. + --- ## Root Cause Analysis Archive @@ -563,6 +567,7 @@ Decoder Pipeline: 57. Added optional zero-comparison gating (`--fail-on-zero-compared`) for compare/finalize flows and surfaced zero-compare policy in comparison/published summaries. 58. Added warmup-stage seek handling to apply seeks immediately while playback warmup is in progress. 59. Added optional skipped-file gating (`--fail-on-skipped-files`) for compare/finalize flows and surfaced skipped-file policy in comparison/published summaries. +60. Added skipped-file reason breakdown (`skippedNoReports`, `skippedNoUsableMetrics`) into comparison file stats and published summaries. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -600,6 +605,7 @@ Decoder Pipeline: - `scripts/finalize-playback-matrix.js`: baseline comparison flow now writes both `playback-comparison.md` and `playback-comparison.json`. - `scripts/compare-playback-benchmark-runs.js`: added optional strict `--fail-on-candidate-only` coverage gate and surfaced coverage gate mode in comparison markdown output. - `scripts/compare-playback-benchmark-runs.js`: added optional strict `--fail-on-skipped-files` gate and parse/skip policy reporting in comparison markdown/json outputs. +- `scripts/compare-playback-benchmark-runs.js`: comparison file stats now include skipped-file reason breakdown (`skippedNoReports`, `skippedNoUsableMetrics`). - `scripts/finalize-playback-matrix.js`: added passthrough support for strict `--fail-on-candidate-only` compare mode in one-shot finalize workflows. - `scripts/finalize-playback-matrix.js`: forwards `--fail-on-skipped-files` into compare stage and records skipped-file policy in finalize summary settings. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes candidate-only row count from comparison JSON summary. @@ -622,6 +628,7 @@ Decoder Pipeline: - `scripts/finalize-playback-matrix.js`: forwards parse-error gating and records parse-error policy in finalize summary settings. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes parse policy and baseline/candidate parse error counts. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes skipped-file policy mode from comparison tolerance settings. +- `scripts/publish-playback-matrix-summary.js`: published comparison status now includes skipped-file breakdown counts for no-reports and no-usable-metrics cases. - `crates/editor/src/playback.rs`: `insert_prefetched_frame` now returns structural-change signals and warmup cache invalidation uses this signal to avoid stale contiguous counts when insert+trim keeps buffer length unchanged. - `scripts/finalize-playback-matrix.js`: finalize summary now includes comparison file stats payload when comparison runs are enabled. - `scripts/publish-playback-matrix-summary.js`: publish summary now surfaces finalize baseline/candidate parse error counts from finalize summary metadata. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index a2fd851e59..b4302f8808 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -209,6 +209,8 @@ pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate # optional: fail comparison gate when any input JSON is skipped (no reports/usable metrics) pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-skipped-files + +# comparison file stats include skipped-file breakdown (no_reports / no_usable_metrics) ``` ## Evidence checklist diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index dcc263bbbb..96b5e9432e 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -161,6 +161,8 @@ function collectMetrics(files) { parsedFiles: 0, usableFiles: 0, skippedFiles: 0, + skippedNoReports: 0, + skippedNoUsableMetrics: 0, parseErrors: [], }; @@ -179,6 +181,7 @@ function collectMetrics(files) { if (!Array.isArray(parsed.reports) || parsed.reports.length === 0) { stats.skippedFiles += 1; + stats.skippedNoReports += 1; continue; } @@ -240,6 +243,7 @@ function collectMetrics(files) { stats.usableFiles += 1; } else { stats.skippedFiles += 1; + stats.skippedNoUsableMetrics += 1; } } @@ -431,8 +435,8 @@ function toMarkdown( md += `Parse gate: parse_errors=${options.failOnParseErrors ? "fail" : "allow"}\n\n`; md += `Zero-compare gate: compared_rows=${options.failOnZeroCompared ? "fail_if_zero" : "allow"}\n\n`; md += `Skipped-file gate: skipped_files=${options.failOnSkippedFiles ? "fail" : "allow"}\n\n`; - md += `Baseline files: total=${baselineStats.totalFiles}, parsed=${baselineStats.parsedFiles}, usable=${baselineStats.usableFiles}, skipped=${baselineStats.skippedFiles}, parse_errors=${baselineStats.parseErrors.length}\n`; - md += `Candidate files: total=${candidateStats.totalFiles}, parsed=${candidateStats.parsedFiles}, usable=${candidateStats.usableFiles}, skipped=${candidateStats.skippedFiles}, parse_errors=${candidateStats.parseErrors.length}\n\n`; + md += `Baseline files: total=${baselineStats.totalFiles}, parsed=${baselineStats.parsedFiles}, usable=${baselineStats.usableFiles}, skipped=${baselineStats.skippedFiles}, skipped_no_reports=${baselineStats.skippedNoReports}, skipped_no_metrics=${baselineStats.skippedNoUsableMetrics}, parse_errors=${baselineStats.parseErrors.length}\n`; + md += `Candidate files: total=${candidateStats.totalFiles}, parsed=${candidateStats.parsedFiles}, usable=${candidateStats.usableFiles}, skipped=${candidateStats.skippedFiles}, skipped_no_reports=${candidateStats.skippedNoReports}, skipped_no_metrics=${candidateStats.skippedNoUsableMetrics}, parse_errors=${candidateStats.parseErrors.length}\n\n`; md += `Compared rows: ${comparisons.length}, regressions: ${regressions.length}, missing candidate rows: ${missingCandidateRows.length}, candidate-only rows: ${candidateOnlyRows.length}, insufficient sample rows: ${insufficientSampleRows.length}\n\n`; if ( baselineStats.parseErrors.length > 0 || diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js index 47b6d12148..55f6b5b4cc 100644 --- a/scripts/publish-playback-matrix-summary.js +++ b/scripts/publish-playback-matrix-summary.js @@ -108,6 +108,12 @@ function buildSummarySection( markdown += `- Skipped-file policy: ${comparison.tolerance?.failOnSkippedFiles ? "fail" : "allow"}\n`; markdown += `- Baseline parse errors: ${comparison.fileStats?.baseline?.parseErrors?.length ?? "n/a"}\n`; markdown += `- Candidate parse errors: ${comparison.fileStats?.candidate?.parseErrors?.length ?? "n/a"}\n\n`; + markdown += `- Baseline skipped files: ${comparison.fileStats?.baseline?.skippedFiles ?? "n/a"}\n`; + markdown += `- Candidate skipped files: ${comparison.fileStats?.candidate?.skippedFiles ?? "n/a"}\n`; + markdown += `- Baseline skipped (no reports): ${comparison.fileStats?.baseline?.skippedNoReports ?? "n/a"}\n`; + markdown += `- Candidate skipped (no reports): ${comparison.fileStats?.candidate?.skippedNoReports ?? "n/a"}\n`; + markdown += `- Baseline skipped (no usable metrics): ${comparison.fileStats?.baseline?.skippedNoUsableMetrics ?? "n/a"}\n`; + markdown += `- Candidate skipped (no usable metrics): ${comparison.fileStats?.candidate?.skippedNoUsableMetrics ?? "n/a"}\n\n`; const failureReasons = Array.isArray(comparison.summary?.failureReasons) ? comparison.summary.failureReasons : []; From fa6d23ac78a4f7674aaebc9293362c86b8dde327 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 23:40:42 +0000 Subject: [PATCH 157/333] improve: scale warmup idle polling with frame budget Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 7 ++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 674d267922..f8f779f126 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -418,6 +418,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Comparison file stats now report skipped-file reasons as `skippedNoReports` and `skippedNoUsableMetrics`. - Published summary now surfaces skipped-file breakdown for baseline and candidate inputs. +55. **Scaled warmup idle poll interval by frame budget (2026-02-13)** + - Warmup loop fallback poll now scales with frame duration and stays in bounded low-latency range. + - Reduces fixed 100ms idle poll delay during warmup while avoiding high-frequency busy polling. + --- ## Root Cause Analysis Archive @@ -568,6 +572,7 @@ Decoder Pipeline: 58. Added warmup-stage seek handling to apply seeks immediately while playback warmup is in progress. 59. Added optional skipped-file gating (`--fail-on-skipped-files`) for compare/finalize flows and surfaced skipped-file policy in comparison/published summaries. 60. Added skipped-file reason breakdown (`skippedNoReports`, `skippedNoUsableMetrics`) into comparison file stats and published summaries. +61. Scaled warmup idle poll interval with frame budget to reduce warmup fallback latency under sparse frame arrival. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -597,6 +602,7 @@ Decoder Pipeline: - `scripts/publish-playback-matrix-summary.js`: publish summary now surfaces finalize comparison count rollups when finalize summary metadata is attached. - `scripts/publish-playback-matrix-summary.js`: added optional baseline-vs-candidate comparison artifact attachment in published summaries. - `crates/editor/src/playback.rs`: warmup loop now handles seek updates immediately, resetting warmup state and updating frame/audio targets before playback loop entry. +- `crates/editor/src/playback.rs`: warmup loop fallback polling now scales with frame budget instead of fixed 100ms sleep to improve responsiveness without busy waiting. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 5735831682..2aabb51453 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -526,6 +526,10 @@ impl Playback { .max(Duration::from_millis(200)) .min(Duration::from_millis(700)); let warmup_no_frames_timeout = Duration::from_secs(5); + let warmup_idle_poll_interval = frame_duration + .mul_f64(0.5) + .max(Duration::from_millis(8)) + .min(Duration::from_millis(25)); let mut warmup_start = Instant::now(); let mut first_frame_time: Option = None; let mut warmup_contiguous_prefetched = 0usize; @@ -533,6 +537,7 @@ impl Playback { info!( warmup_target_frames, warmup_after_first_timeout_ms = warmup_after_first_timeout.as_secs_f64() * 1000.0, + warmup_idle_poll_interval_ms = warmup_idle_poll_interval.as_secs_f64() * 1000.0, "Playback warmup configuration" ); @@ -611,7 +616,7 @@ impl Playback { break; } } - _ = tokio::time::sleep(Duration::from_millis(100)) => { + _ = tokio::time::sleep(warmup_idle_poll_interval) => { } } } From b2c8a7d72436f11e12979ed830b25d22c93f065a Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:40:42 +0000 Subject: [PATCH 158/333] improve: scale warmup idle polling with frame budget Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 7 ++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 674d267922..f8f779f126 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -418,6 +418,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Comparison file stats now report skipped-file reasons as `skippedNoReports` and `skippedNoUsableMetrics`. - Published summary now surfaces skipped-file breakdown for baseline and candidate inputs. +55. **Scaled warmup idle poll interval by frame budget (2026-02-13)** + - Warmup loop fallback poll now scales with frame duration and stays in bounded low-latency range. + - Reduces fixed 100ms idle poll delay during warmup while avoiding high-frequency busy polling. + --- ## Root Cause Analysis Archive @@ -568,6 +572,7 @@ Decoder Pipeline: 58. Added warmup-stage seek handling to apply seeks immediately while playback warmup is in progress. 59. Added optional skipped-file gating (`--fail-on-skipped-files`) for compare/finalize flows and surfaced skipped-file policy in comparison/published summaries. 60. Added skipped-file reason breakdown (`skippedNoReports`, `skippedNoUsableMetrics`) into comparison file stats and published summaries. +61. Scaled warmup idle poll interval with frame budget to reduce warmup fallback latency under sparse frame arrival. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -597,6 +602,7 @@ Decoder Pipeline: - `scripts/publish-playback-matrix-summary.js`: publish summary now surfaces finalize comparison count rollups when finalize summary metadata is attached. - `scripts/publish-playback-matrix-summary.js`: added optional baseline-vs-candidate comparison artifact attachment in published summaries. - `crates/editor/src/playback.rs`: warmup loop now handles seek updates immediately, resetting warmup state and updating frame/audio targets before playback loop entry. +- `crates/editor/src/playback.rs`: warmup loop fallback polling now scales with frame budget instead of fixed 100ms sleep to improve responsiveness without busy waiting. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 5735831682..2aabb51453 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -526,6 +526,10 @@ impl Playback { .max(Duration::from_millis(200)) .min(Duration::from_millis(700)); let warmup_no_frames_timeout = Duration::from_secs(5); + let warmup_idle_poll_interval = frame_duration + .mul_f64(0.5) + .max(Duration::from_millis(8)) + .min(Duration::from_millis(25)); let mut warmup_start = Instant::now(); let mut first_frame_time: Option = None; let mut warmup_contiguous_prefetched = 0usize; @@ -533,6 +537,7 @@ impl Playback { info!( warmup_target_frames, warmup_after_first_timeout_ms = warmup_after_first_timeout.as_secs_f64() * 1000.0, + warmup_idle_poll_interval_ms = warmup_idle_poll_interval.as_secs_f64() * 1000.0, "Playback warmup configuration" ); @@ -611,7 +616,7 @@ impl Playback { break; } } - _ = tokio::time::sleep(Duration::from_millis(100)) => { + _ = tokio::time::sleep(warmup_idle_poll_interval) => { } } } From 297b9b4cc0cc2a8cccfce6fe9f5f069fa0a6a58f Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 23:42:13 +0000 Subject: [PATCH 159/333] improve: avoid duplicate prefetch decodes on small rebases Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 7 +++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index f8f779f126..e3f47b1423 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -422,6 +422,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Warmup loop fallback poll now scales with frame duration and stays in bounded low-latency range. - Reduces fixed 100ms idle poll delay during warmup while avoiding high-frequency busy polling. +56. **Retained in-flight prefetch markers for small frame-request shifts (2026-02-13)** + - Frame-request rebases now clear in-flight marker sets only for backward or large-distance seeks that also reset in-flight futures. + - Prevents duplicate decode scheduling caused by clearing marker sets while earlier in-flight futures are still active. + --- ## Root Cause Analysis Archive @@ -573,6 +577,7 @@ Decoder Pipeline: 59. Added optional skipped-file gating (`--fail-on-skipped-files`) for compare/finalize flows and surfaced skipped-file policy in comparison/published summaries. 60. Added skipped-file reason breakdown (`skippedNoReports`, `skippedNoUsableMetrics`) into comparison file stats and published summaries. 61. Scaled warmup idle poll interval with frame budget to reduce warmup fallback latency under sparse frame arrival. +62. Retained in-flight prefetch markers for small frame-request shifts to avoid duplicate decode scheduling during active prefetch execution. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -603,6 +608,7 @@ Decoder Pipeline: - `scripts/publish-playback-matrix-summary.js`: added optional baseline-vs-candidate comparison artifact attachment in published summaries. - `crates/editor/src/playback.rs`: warmup loop now handles seek updates immediately, resetting warmup state and updating frame/audio targets before playback loop entry. - `crates/editor/src/playback.rs`: warmup loop fallback polling now scales with frame budget instead of fixed 100ms sleep to improve responsiveness without busy waiting. +- `crates/editor/src/playback.rs`: frame-request rebases now preserve in-flight marker sets unless in-flight futures are explicitly reset for backward/large seek changes. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 2aabb51453..c1a974d6e7 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -302,11 +302,10 @@ impl Playback { frames_decoded = 0; prefetched_behind.clear(); - if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { - in_flight_guard.clear(); - } - if is_backward_seek || seek_distance > dynamic_prefetch_ahead / 2 { + if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { + in_flight_guard.clear(); + } in_flight = FuturesUnordered::new(); } } From ff55bcfefc2ae01b7a4726e7b3b46791b734bdf5 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:42:13 +0000 Subject: [PATCH 160/333] improve: avoid duplicate prefetch decodes on small rebases Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 7 +++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index f8f779f126..e3f47b1423 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -422,6 +422,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Warmup loop fallback poll now scales with frame duration and stays in bounded low-latency range. - Reduces fixed 100ms idle poll delay during warmup while avoiding high-frequency busy polling. +56. **Retained in-flight prefetch markers for small frame-request shifts (2026-02-13)** + - Frame-request rebases now clear in-flight marker sets only for backward or large-distance seeks that also reset in-flight futures. + - Prevents duplicate decode scheduling caused by clearing marker sets while earlier in-flight futures are still active. + --- ## Root Cause Analysis Archive @@ -573,6 +577,7 @@ Decoder Pipeline: 59. Added optional skipped-file gating (`--fail-on-skipped-files`) for compare/finalize flows and surfaced skipped-file policy in comparison/published summaries. 60. Added skipped-file reason breakdown (`skippedNoReports`, `skippedNoUsableMetrics`) into comparison file stats and published summaries. 61. Scaled warmup idle poll interval with frame budget to reduce warmup fallback latency under sparse frame arrival. +62. Retained in-flight prefetch markers for small frame-request shifts to avoid duplicate decode scheduling during active prefetch execution. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -603,6 +608,7 @@ Decoder Pipeline: - `scripts/publish-playback-matrix-summary.js`: added optional baseline-vs-candidate comparison artifact attachment in published summaries. - `crates/editor/src/playback.rs`: warmup loop now handles seek updates immediately, resetting warmup state and updating frame/audio targets before playback loop entry. - `crates/editor/src/playback.rs`: warmup loop fallback polling now scales with frame budget instead of fixed 100ms sleep to improve responsiveness without busy waiting. +- `crates/editor/src/playback.rs`: frame-request rebases now preserve in-flight marker sets unless in-flight futures are explicitly reset for backward/large seek changes. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 2aabb51453..c1a974d6e7 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -302,11 +302,10 @@ impl Playback { frames_decoded = 0; prefetched_behind.clear(); - if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { - in_flight_guard.clear(); - } - if is_backward_seek || seek_distance > dynamic_prefetch_ahead / 2 { + if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { + in_flight_guard.clear(); + } in_flight = FuturesUnordered::new(); } } From 68a0128a34bbd438b62e36c41da941d648f19133 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 23:46:28 +0000 Subject: [PATCH 161/333] improve: reduce prefetch lock contention with local inflight tracking Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 18 ++++++++---------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index e3f47b1423..d6eef91f8f 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -426,6 +426,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Frame-request rebases now clear in-flight marker sets only for backward or large-distance seeks that also reset in-flight futures. - Prevents duplicate decode scheduling caused by clearing marker sets while earlier in-flight futures are still active. +57. **Added local in-flight frame tracking inside prefetch scheduler (2026-02-13)** + - Prefetch scheduler now tracks active frame numbers locally and uses this set for duplicate scheduling checks. + - Reduces repeated shared lock reads in prefetch hot-path while preserving cross-thread in-flight visibility. + --- ## Root Cause Analysis Archive @@ -578,6 +582,7 @@ Decoder Pipeline: 60. Added skipped-file reason breakdown (`skippedNoReports`, `skippedNoUsableMetrics`) into comparison file stats and published summaries. 61. Scaled warmup idle poll interval with frame budget to reduce warmup fallback latency under sparse frame arrival. 62. Retained in-flight prefetch markers for small frame-request shifts to avoid duplicate decode scheduling during active prefetch execution. +63. Added local in-flight frame tracking in prefetch scheduler to reduce lock-heavy duplicate-check lookups on scheduling hot-path. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -609,6 +614,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: warmup loop now handles seek updates immediately, resetting warmup state and updating frame/audio targets before playback loop entry. - `crates/editor/src/playback.rs`: warmup loop fallback polling now scales with frame budget instead of fixed 100ms sleep to improve responsiveness without busy waiting. - `crates/editor/src/playback.rs`: frame-request rebases now preserve in-flight marker sets unless in-flight futures are explicitly reset for backward/large seek changes. +- `crates/editor/src/playback.rs`: prefetch scheduler now uses a local in-flight frame set for duplicate scheduling checks and mirrors it into shared generation-keyed in-flight markers for playback coordination. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index c1a974d6e7..9d46270e60 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -243,6 +243,7 @@ impl Playback { let mut in_flight: FuturesUnordered = FuturesUnordered::new(); let mut frames_decoded: u32 = 0; let mut prefetched_behind: HashSet = HashSet::new(); + let mut scheduled_in_flight_frames: HashSet = HashSet::new(); const RAMP_UP_AFTER_FRAMES: u32 = 5; let dynamic_prefetch_ahead = fps.clamp(30, 90); let dynamic_prefetch_behind = (fps / 4).clamp(8, 24); @@ -282,6 +283,7 @@ impl Playback { if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.clear(); } + scheduled_in_flight_frames.clear(); in_flight = FuturesUnordered::new(); } @@ -306,6 +308,7 @@ impl Playback { if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.clear(); } + scheduled_in_flight_frames.clear(); in_flight = FuturesUnordered::new(); } } @@ -333,11 +336,7 @@ impl Playback { break; } - let already_in_flight = prefetch_in_flight - .read() - .map(|guard| guard.contains(&(active_generation, frame_num))) - .unwrap_or(false); - if already_in_flight { + if scheduled_in_flight_frames.contains(&frame_num) { next_prefetch_frame += 1; continue; } @@ -360,6 +359,7 @@ impl Playback { let is_initial = frames_decoded < 10; let generation = active_generation; + scheduled_in_flight_frames.insert(frame_num); if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.insert((generation, frame_num)); } @@ -400,11 +400,7 @@ impl Playback { continue; } - let already_in_flight = prefetch_in_flight - .read() - .map(|guard| guard.contains(&(active_generation, behind_frame))) - .unwrap_or(false); - if already_in_flight { + if scheduled_in_flight_frames.contains(&behind_frame) { continue; } @@ -425,6 +421,7 @@ impl Playback { let segment_index = segment.recording_clip; let generation = active_generation; + scheduled_in_flight_frames.insert(behind_frame); if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.insert((generation, behind_frame)); } @@ -444,6 +441,7 @@ impl Playback { biased; Some((frame_num, segment_index, generation, result)) = in_flight.next() => { + scheduled_in_flight_frames.remove(&frame_num); if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.remove(&(generation, frame_num)); } From f19a98500e2463519e6e005c5d20a072d724c077 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:46:28 +0000 Subject: [PATCH 162/333] improve: reduce prefetch lock contention with local inflight tracking Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 18 ++++++++---------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index e3f47b1423..d6eef91f8f 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -426,6 +426,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Frame-request rebases now clear in-flight marker sets only for backward or large-distance seeks that also reset in-flight futures. - Prevents duplicate decode scheduling caused by clearing marker sets while earlier in-flight futures are still active. +57. **Added local in-flight frame tracking inside prefetch scheduler (2026-02-13)** + - Prefetch scheduler now tracks active frame numbers locally and uses this set for duplicate scheduling checks. + - Reduces repeated shared lock reads in prefetch hot-path while preserving cross-thread in-flight visibility. + --- ## Root Cause Analysis Archive @@ -578,6 +582,7 @@ Decoder Pipeline: 60. Added skipped-file reason breakdown (`skippedNoReports`, `skippedNoUsableMetrics`) into comparison file stats and published summaries. 61. Scaled warmup idle poll interval with frame budget to reduce warmup fallback latency under sparse frame arrival. 62. Retained in-flight prefetch markers for small frame-request shifts to avoid duplicate decode scheduling during active prefetch execution. +63. Added local in-flight frame tracking in prefetch scheduler to reduce lock-heavy duplicate-check lookups on scheduling hot-path. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -609,6 +614,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: warmup loop now handles seek updates immediately, resetting warmup state and updating frame/audio targets before playback loop entry. - `crates/editor/src/playback.rs`: warmup loop fallback polling now scales with frame budget instead of fixed 100ms sleep to improve responsiveness without busy waiting. - `crates/editor/src/playback.rs`: frame-request rebases now preserve in-flight marker sets unless in-flight futures are explicitly reset for backward/large seek changes. +- `crates/editor/src/playback.rs`: prefetch scheduler now uses a local in-flight frame set for duplicate scheduling checks and mirrors it into shared generation-keyed in-flight markers for playback coordination. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index c1a974d6e7..9d46270e60 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -243,6 +243,7 @@ impl Playback { let mut in_flight: FuturesUnordered = FuturesUnordered::new(); let mut frames_decoded: u32 = 0; let mut prefetched_behind: HashSet = HashSet::new(); + let mut scheduled_in_flight_frames: HashSet = HashSet::new(); const RAMP_UP_AFTER_FRAMES: u32 = 5; let dynamic_prefetch_ahead = fps.clamp(30, 90); let dynamic_prefetch_behind = (fps / 4).clamp(8, 24); @@ -282,6 +283,7 @@ impl Playback { if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.clear(); } + scheduled_in_flight_frames.clear(); in_flight = FuturesUnordered::new(); } @@ -306,6 +308,7 @@ impl Playback { if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.clear(); } + scheduled_in_flight_frames.clear(); in_flight = FuturesUnordered::new(); } } @@ -333,11 +336,7 @@ impl Playback { break; } - let already_in_flight = prefetch_in_flight - .read() - .map(|guard| guard.contains(&(active_generation, frame_num))) - .unwrap_or(false); - if already_in_flight { + if scheduled_in_flight_frames.contains(&frame_num) { next_prefetch_frame += 1; continue; } @@ -360,6 +359,7 @@ impl Playback { let is_initial = frames_decoded < 10; let generation = active_generation; + scheduled_in_flight_frames.insert(frame_num); if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.insert((generation, frame_num)); } @@ -400,11 +400,7 @@ impl Playback { continue; } - let already_in_flight = prefetch_in_flight - .read() - .map(|guard| guard.contains(&(active_generation, behind_frame))) - .unwrap_or(false); - if already_in_flight { + if scheduled_in_flight_frames.contains(&behind_frame) { continue; } @@ -425,6 +421,7 @@ impl Playback { let segment_index = segment.recording_clip; let generation = active_generation; + scheduled_in_flight_frames.insert(behind_frame); if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.insert((generation, behind_frame)); } @@ -444,6 +441,7 @@ impl Playback { biased; Some((frame_num, segment_index, generation, result)) = in_flight.next() => { + scheduled_in_flight_frames.remove(&frame_num); if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.remove(&(generation, frame_num)); } From 2b2ea793522852a4094be611ea029a864588a24d Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 23:47:12 +0000 Subject: [PATCH 163/333] improve: batch warmup prefetch queue consumption Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 29 ++++++++++++++++++++--------- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index d6eef91f8f..88d0049364 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -430,6 +430,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Prefetch scheduler now tracks active frame numbers locally and uses this set for duplicate scheduling checks. - Reduces repeated shared lock reads in prefetch hot-path while preserving cross-thread in-flight visibility. +58. **Batched warmup prefetch queue consumption (2026-02-13)** + - Warmup stage now drains all immediately available prefetched frames on each receive wake-up. + - Improves warmup readiness convergence by reducing one-frame-per-iteration queue handling overhead. + --- ## Root Cause Analysis Archive @@ -583,6 +587,7 @@ Decoder Pipeline: 61. Scaled warmup idle poll interval with frame budget to reduce warmup fallback latency under sparse frame arrival. 62. Retained in-flight prefetch markers for small frame-request shifts to avoid duplicate decode scheduling during active prefetch execution. 63. Added local in-flight frame tracking in prefetch scheduler to reduce lock-heavy duplicate-check lookups on scheduling hot-path. +64. Batched warmup prefetch queue consumption to reduce warmup staging overhead and improve contiguous warmup-fill responsiveness. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -615,6 +620,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: warmup loop fallback polling now scales with frame budget instead of fixed 100ms sleep to improve responsiveness without busy waiting. - `crates/editor/src/playback.rs`: frame-request rebases now preserve in-flight marker sets unless in-flight futures are explicitly reset for backward/large seek changes. - `crates/editor/src/playback.rs`: prefetch scheduler now uses a local in-flight frame set for duplicate scheduling checks and mirrors it into shared generation-keyed in-flight markers for playback coordination. +- `crates/editor/src/playback.rs`: warmup prefetch receive path now drains immediately queued prefetched frames in batches to accelerate warmup buffer population. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 9d46270e60..f8acc46091 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -574,17 +574,28 @@ impl Playback { tokio::select! { Some(prefetched) = prefetch_rx.recv() => { - if prefetched.generation == seek_generation { - if insert_prefetched_frame( - &mut prefetch_buffer, - prefetched, - frame_number, - ) { + let mut next_prefetched = Some(prefetched); + + loop { + let Some(prefetched) = next_prefetched.take() else { + break; + }; + + if prefetched.generation == seek_generation + && insert_prefetched_frame( + &mut prefetch_buffer, + prefetched, + frame_number, + ) + { warmup_buffer_changed = true; } - if first_frame_time.is_none() && !prefetch_buffer.is_empty() { - first_frame_time = Some(Instant::now()); - } + + next_prefetched = prefetch_rx.try_recv().ok(); + } + + if first_frame_time.is_none() && !prefetch_buffer.is_empty() { + first_frame_time = Some(Instant::now()); } } _ = seek_rx.changed() => { From ed3efe5013da3fe78adbbf3727a9ef8bd6bd6e71 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:47:12 +0000 Subject: [PATCH 164/333] improve: batch warmup prefetch queue consumption Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 29 ++++++++++++++++++++--------- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index d6eef91f8f..88d0049364 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -430,6 +430,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Prefetch scheduler now tracks active frame numbers locally and uses this set for duplicate scheduling checks. - Reduces repeated shared lock reads in prefetch hot-path while preserving cross-thread in-flight visibility. +58. **Batched warmup prefetch queue consumption (2026-02-13)** + - Warmup stage now drains all immediately available prefetched frames on each receive wake-up. + - Improves warmup readiness convergence by reducing one-frame-per-iteration queue handling overhead. + --- ## Root Cause Analysis Archive @@ -583,6 +587,7 @@ Decoder Pipeline: 61. Scaled warmup idle poll interval with frame budget to reduce warmup fallback latency under sparse frame arrival. 62. Retained in-flight prefetch markers for small frame-request shifts to avoid duplicate decode scheduling during active prefetch execution. 63. Added local in-flight frame tracking in prefetch scheduler to reduce lock-heavy duplicate-check lookups on scheduling hot-path. +64. Batched warmup prefetch queue consumption to reduce warmup staging overhead and improve contiguous warmup-fill responsiveness. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -615,6 +620,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: warmup loop fallback polling now scales with frame budget instead of fixed 100ms sleep to improve responsiveness without busy waiting. - `crates/editor/src/playback.rs`: frame-request rebases now preserve in-flight marker sets unless in-flight futures are explicitly reset for backward/large seek changes. - `crates/editor/src/playback.rs`: prefetch scheduler now uses a local in-flight frame set for duplicate scheduling checks and mirrors it into shared generation-keyed in-flight markers for playback coordination. +- `crates/editor/src/playback.rs`: warmup prefetch receive path now drains immediately queued prefetched frames in batches to accelerate warmup buffer population. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 9d46270e60..f8acc46091 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -574,17 +574,28 @@ impl Playback { tokio::select! { Some(prefetched) = prefetch_rx.recv() => { - if prefetched.generation == seek_generation { - if insert_prefetched_frame( - &mut prefetch_buffer, - prefetched, - frame_number, - ) { + let mut next_prefetched = Some(prefetched); + + loop { + let Some(prefetched) = next_prefetched.take() else { + break; + }; + + if prefetched.generation == seek_generation + && insert_prefetched_frame( + &mut prefetch_buffer, + prefetched, + frame_number, + ) + { warmup_buffer_changed = true; } - if first_frame_time.is_none() && !prefetch_buffer.is_empty() { - first_frame_time = Some(Instant::now()); - } + + next_prefetched = prefetch_rx.try_recv().ok(); + } + + if first_frame_time.is_none() && !prefetch_buffer.is_empty() { + first_frame_time = Some(Instant::now()); } } _ = seek_rx.changed() => { From 0965dec090b6d22025f860c590b19756f5d7c1bf Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 23:47:56 +0000 Subject: [PATCH 165/333] improve: scale prefetch idle polling with frame budget Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 11 +++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 88d0049364..db43a45c4e 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -434,6 +434,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Warmup stage now drains all immediately available prefetched frames on each receive wake-up. - Improves warmup readiness convergence by reducing one-frame-per-iteration queue handling overhead. +59. **Scaled prefetch idle polling by frame budget (2026-02-13)** + - Prefetch scheduler idle-yield interval now scales with target frame duration in a bounded low-latency range. + - Reduces fixed 1ms wakeup churn in empty in-flight periods while keeping prefetch request responsiveness high. + --- ## Root Cause Analysis Archive @@ -588,6 +592,7 @@ Decoder Pipeline: 62. Retained in-flight prefetch markers for small frame-request shifts to avoid duplicate decode scheduling during active prefetch execution. 63. Added local in-flight frame tracking in prefetch scheduler to reduce lock-heavy duplicate-check lookups on scheduling hot-path. 64. Batched warmup prefetch queue consumption to reduce warmup staging overhead and improve contiguous warmup-fill responsiveness. +65. Scaled prefetch idle polling with frame budget to reduce scheduler wakeup churn during empty in-flight periods. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -621,6 +626,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: frame-request rebases now preserve in-flight marker sets unless in-flight futures are explicitly reset for backward/large seek changes. - `crates/editor/src/playback.rs`: prefetch scheduler now uses a local in-flight frame set for duplicate scheduling checks and mirrors it into shared generation-keyed in-flight markers for playback coordination. - `crates/editor/src/playback.rs`: warmup prefetch receive path now drains immediately queued prefetched frames in batches to accelerate warmup buffer population. +- `crates/editor/src/playback.rs`: prefetch scheduler idle polling now scales with frame budget (bounded) instead of fixed 1ms delay, reducing idle wakeup overhead. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index f8acc46091..f83448472d 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -255,12 +255,19 @@ impl Playback { PARALLEL_DECODE_TASKS }; let initial_parallel_tasks = dynamic_parallel_tasks.min(4); + let prefetch_idle_poll_interval = Duration::from_secs_f64(1.0 / fps_f64) + .mul_f64(0.25) + .max(Duration::from_millis(2)) + .min(Duration::from_millis(8)); let mut active_generation = *prefetch_seek_generation.borrow(); let mut cached_project = prefetch_project.borrow().clone(); info!( dynamic_prefetch_ahead, - dynamic_prefetch_behind, dynamic_parallel_tasks, "Prefetch window configuration" + dynamic_prefetch_behind, + dynamic_parallel_tasks, + prefetch_idle_poll_interval_ms = prefetch_idle_poll_interval.as_secs_f64() * 1000.0, + "Prefetch window configuration" ); loop { @@ -468,7 +475,7 @@ impl Playback { } } - _ = tokio::time::sleep(Duration::from_millis(1)), if in_flight.is_empty() => {} + _ = tokio::time::sleep(prefetch_idle_poll_interval), if in_flight.is_empty() => {} } } }); From ba334da1327dfe2f035b9b8fff696e8d9f8b41e9 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:47:56 +0000 Subject: [PATCH 166/333] improve: scale prefetch idle polling with frame budget Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 11 +++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 88d0049364..db43a45c4e 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -434,6 +434,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Warmup stage now drains all immediately available prefetched frames on each receive wake-up. - Improves warmup readiness convergence by reducing one-frame-per-iteration queue handling overhead. +59. **Scaled prefetch idle polling by frame budget (2026-02-13)** + - Prefetch scheduler idle-yield interval now scales with target frame duration in a bounded low-latency range. + - Reduces fixed 1ms wakeup churn in empty in-flight periods while keeping prefetch request responsiveness high. + --- ## Root Cause Analysis Archive @@ -588,6 +592,7 @@ Decoder Pipeline: 62. Retained in-flight prefetch markers for small frame-request shifts to avoid duplicate decode scheduling during active prefetch execution. 63. Added local in-flight frame tracking in prefetch scheduler to reduce lock-heavy duplicate-check lookups on scheduling hot-path. 64. Batched warmup prefetch queue consumption to reduce warmup staging overhead and improve contiguous warmup-fill responsiveness. +65. Scaled prefetch idle polling with frame budget to reduce scheduler wakeup churn during empty in-flight periods. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -621,6 +626,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: frame-request rebases now preserve in-flight marker sets unless in-flight futures are explicitly reset for backward/large seek changes. - `crates/editor/src/playback.rs`: prefetch scheduler now uses a local in-flight frame set for duplicate scheduling checks and mirrors it into shared generation-keyed in-flight markers for playback coordination. - `crates/editor/src/playback.rs`: warmup prefetch receive path now drains immediately queued prefetched frames in batches to accelerate warmup buffer population. +- `crates/editor/src/playback.rs`: prefetch scheduler idle polling now scales with frame budget (bounded) instead of fixed 1ms delay, reducing idle wakeup overhead. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index f8acc46091..f83448472d 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -255,12 +255,19 @@ impl Playback { PARALLEL_DECODE_TASKS }; let initial_parallel_tasks = dynamic_parallel_tasks.min(4); + let prefetch_idle_poll_interval = Duration::from_secs_f64(1.0 / fps_f64) + .mul_f64(0.25) + .max(Duration::from_millis(2)) + .min(Duration::from_millis(8)); let mut active_generation = *prefetch_seek_generation.borrow(); let mut cached_project = prefetch_project.borrow().clone(); info!( dynamic_prefetch_ahead, - dynamic_prefetch_behind, dynamic_parallel_tasks, "Prefetch window configuration" + dynamic_prefetch_behind, + dynamic_parallel_tasks, + prefetch_idle_poll_interval_ms = prefetch_idle_poll_interval.as_secs_f64() * 1000.0, + "Prefetch window configuration" ); loop { @@ -468,7 +475,7 @@ impl Playback { } } - _ = tokio::time::sleep(Duration::from_millis(1)), if in_flight.is_empty() => {} + _ = tokio::time::sleep(prefetch_idle_poll_interval), if in_flight.is_empty() => {} } } }); From c801652efd85d0cf6bde578ebe963bc9d7221587 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 23:48:47 +0000 Subject: [PATCH 167/333] improve: bound behind-prefetch dedupe tracking window Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 15 +++++++++++++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index db43a45c4e..5153575812 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -438,6 +438,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Prefetch scheduler idle-yield interval now scales with target frame duration in a bounded low-latency range. - Reduces fixed 1ms wakeup churn in empty in-flight periods while keeping prefetch request responsiveness high. +60. **Bounded behind-prefetch dedupe memory growth (2026-02-13)** + - Behind-prefetch dedupe tracking now keeps a bounded eviction-ordered window instead of unbounded growth over long playback sessions. + - Prevents long-session hash-set expansion from degrading behind-prefetch lookup efficiency. + --- ## Root Cause Analysis Archive @@ -593,6 +597,7 @@ Decoder Pipeline: 63. Added local in-flight frame tracking in prefetch scheduler to reduce lock-heavy duplicate-check lookups on scheduling hot-path. 64. Batched warmup prefetch queue consumption to reduce warmup staging overhead and improve contiguous warmup-fill responsiveness. 65. Scaled prefetch idle polling with frame budget to reduce scheduler wakeup churn during empty in-flight periods. +66. Bounded behind-prefetch dedupe tracking window to avoid unbounded growth and preserve lookup efficiency over long sessions. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -627,6 +632,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: prefetch scheduler now uses a local in-flight frame set for duplicate scheduling checks and mirrors it into shared generation-keyed in-flight markers for playback coordination. - `crates/editor/src/playback.rs`: warmup prefetch receive path now drains immediately queued prefetched frames in batches to accelerate warmup buffer population. - `crates/editor/src/playback.rs`: prefetch scheduler idle polling now scales with frame budget (bounded) instead of fixed 1ms delay, reducing idle wakeup overhead. +- `crates/editor/src/playback.rs`: behind-prefetch dedupe tracking now uses a bounded eviction-ordered window to prevent unbounded set growth during long playback. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index f83448472d..2d6bd6ca97 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -16,7 +16,7 @@ use cpal::{ use futures::stream::{FuturesUnordered, StreamExt}; use lru::LruCache; use std::{ - collections::{BTreeMap, HashSet}, + collections::{BTreeMap, HashSet, VecDeque}, num::NonZeroUsize, sync::{ Arc, RwLock, @@ -243,6 +243,7 @@ impl Playback { let mut in_flight: FuturesUnordered = FuturesUnordered::new(); let mut frames_decoded: u32 = 0; let mut prefetched_behind: HashSet = HashSet::new(); + let mut prefetched_behind_order: VecDeque = VecDeque::new(); let mut scheduled_in_flight_frames: HashSet = HashSet::new(); const RAMP_UP_AFTER_FRAMES: u32 = 5; let dynamic_prefetch_ahead = fps.clamp(30, 90); @@ -259,6 +260,7 @@ impl Playback { .mul_f64(0.25) .max(Duration::from_millis(2)) .min(Duration::from_millis(8)); + let prefetched_behind_capacity = (dynamic_prefetch_behind as usize).saturating_mul(8); let mut active_generation = *prefetch_seek_generation.borrow(); let mut cached_project = prefetch_project.borrow().clone(); @@ -286,6 +288,7 @@ impl Playback { next_prefetch_frame = *frame_request_rx.borrow(); frames_decoded = 0; prefetched_behind.clear(); + prefetched_behind_order.clear(); if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.clear(); @@ -310,6 +313,7 @@ impl Playback { next_prefetch_frame = requested; frames_decoded = 0; prefetched_behind.clear(); + prefetched_behind_order.clear(); if is_backward_seek || seek_distance > dynamic_prefetch_ahead / 2 { if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { @@ -433,7 +437,14 @@ impl Playback { in_flight_guard.insert((generation, behind_frame)); } - prefetched_behind.insert(behind_frame); + if prefetched_behind.insert(behind_frame) { + prefetched_behind_order.push_back(behind_frame); + while prefetched_behind_order.len() > prefetched_behind_capacity { + if let Some(evicted) = prefetched_behind_order.pop_front() { + prefetched_behind.remove(&evicted); + } + } + } in_flight.push(Box::pin(async move { let result = decoders .get_frames(segment_time as f32, !hide_camera, clip_offsets) From e4a53192ee7fa7b86473b6e783a8ec45a1d3e39a Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:48:47 +0000 Subject: [PATCH 168/333] improve: bound behind-prefetch dedupe tracking window Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 15 +++++++++++++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index db43a45c4e..5153575812 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -438,6 +438,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Prefetch scheduler idle-yield interval now scales with target frame duration in a bounded low-latency range. - Reduces fixed 1ms wakeup churn in empty in-flight periods while keeping prefetch request responsiveness high. +60. **Bounded behind-prefetch dedupe memory growth (2026-02-13)** + - Behind-prefetch dedupe tracking now keeps a bounded eviction-ordered window instead of unbounded growth over long playback sessions. + - Prevents long-session hash-set expansion from degrading behind-prefetch lookup efficiency. + --- ## Root Cause Analysis Archive @@ -593,6 +597,7 @@ Decoder Pipeline: 63. Added local in-flight frame tracking in prefetch scheduler to reduce lock-heavy duplicate-check lookups on scheduling hot-path. 64. Batched warmup prefetch queue consumption to reduce warmup staging overhead and improve contiguous warmup-fill responsiveness. 65. Scaled prefetch idle polling with frame budget to reduce scheduler wakeup churn during empty in-flight periods. +66. Bounded behind-prefetch dedupe tracking window to avoid unbounded growth and preserve lookup efficiency over long sessions. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -627,6 +632,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: prefetch scheduler now uses a local in-flight frame set for duplicate scheduling checks and mirrors it into shared generation-keyed in-flight markers for playback coordination. - `crates/editor/src/playback.rs`: warmup prefetch receive path now drains immediately queued prefetched frames in batches to accelerate warmup buffer population. - `crates/editor/src/playback.rs`: prefetch scheduler idle polling now scales with frame budget (bounded) instead of fixed 1ms delay, reducing idle wakeup overhead. +- `crates/editor/src/playback.rs`: behind-prefetch dedupe tracking now uses a bounded eviction-ordered window to prevent unbounded set growth during long playback. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index f83448472d..2d6bd6ca97 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -16,7 +16,7 @@ use cpal::{ use futures::stream::{FuturesUnordered, StreamExt}; use lru::LruCache; use std::{ - collections::{BTreeMap, HashSet}, + collections::{BTreeMap, HashSet, VecDeque}, num::NonZeroUsize, sync::{ Arc, RwLock, @@ -243,6 +243,7 @@ impl Playback { let mut in_flight: FuturesUnordered = FuturesUnordered::new(); let mut frames_decoded: u32 = 0; let mut prefetched_behind: HashSet = HashSet::new(); + let mut prefetched_behind_order: VecDeque = VecDeque::new(); let mut scheduled_in_flight_frames: HashSet = HashSet::new(); const RAMP_UP_AFTER_FRAMES: u32 = 5; let dynamic_prefetch_ahead = fps.clamp(30, 90); @@ -259,6 +260,7 @@ impl Playback { .mul_f64(0.25) .max(Duration::from_millis(2)) .min(Duration::from_millis(8)); + let prefetched_behind_capacity = (dynamic_prefetch_behind as usize).saturating_mul(8); let mut active_generation = *prefetch_seek_generation.borrow(); let mut cached_project = prefetch_project.borrow().clone(); @@ -286,6 +288,7 @@ impl Playback { next_prefetch_frame = *frame_request_rx.borrow(); frames_decoded = 0; prefetched_behind.clear(); + prefetched_behind_order.clear(); if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.clear(); @@ -310,6 +313,7 @@ impl Playback { next_prefetch_frame = requested; frames_decoded = 0; prefetched_behind.clear(); + prefetched_behind_order.clear(); if is_backward_seek || seek_distance > dynamic_prefetch_ahead / 2 { if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { @@ -433,7 +437,14 @@ impl Playback { in_flight_guard.insert((generation, behind_frame)); } - prefetched_behind.insert(behind_frame); + if prefetched_behind.insert(behind_frame) { + prefetched_behind_order.push_back(behind_frame); + while prefetched_behind_order.len() > prefetched_behind_capacity { + if let Some(evicted) = prefetched_behind_order.pop_front() { + prefetched_behind.remove(&evicted); + } + } + } in_flight.push(Box::pin(async move { let result = decoders .get_frames(segment_time as f32, !hide_camera, clip_offsets) From 3fcb14e654b7e67fda9bb3cd79b73e766b93670e Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 20:22:05 +0000 Subject: [PATCH 169/333] improve: switch editor playback to streaming audio and live seek --- apps/desktop/src-tauri/src/lib.rs | 18 +++ .../src/routes/editor/Timeline/index.tsx | 27 +--- crates/editor/src/audio.rs | 17 ++- crates/editor/src/playback.rs | 138 +++++++++++++++--- 4 files changed, 157 insertions(+), 43 deletions(-) diff --git a/apps/desktop/src-tauri/src/lib.rs b/apps/desktop/src-tauri/src/lib.rs index 00dc6903b1..c00d465ced 100644 --- a/apps/desktop/src-tauri/src/lib.rs +++ b/apps/desktop/src-tauri/src/lib.rs @@ -1921,6 +1921,15 @@ async fn set_playhead_position( }) .await; + let playback_handle = { + let state = editor_instance.state.lock().await; + state.playback_task.clone() + }; + + if let Some(handle) = playback_handle { + handle.seek(frame_number); + } + Ok(()) } @@ -2545,6 +2554,15 @@ async fn seek_to(editor_instance: WindowEditorInstance, frame_number: u32) -> Re }) .await; + let playback_handle = { + let state = editor_instance.state.lock().await; + state.playback_task.clone() + }; + + if let Some(handle) = playback_handle { + handle.seek(frame_number); + } + Ok(()) } diff --git a/apps/desktop/src/routes/editor/Timeline/index.tsx b/apps/desktop/src/routes/editor/Timeline/index.tsx index 8aacea955f..da839931ad 100644 --- a/apps/desktop/src/routes/editor/Timeline/index.tsx +++ b/apps/desktop/src/routes/editor/Timeline/index.tsx @@ -89,7 +89,6 @@ export function Timeline() { editorState, projectActions, meta, - previewResolutionBase, } = useEditorContext(); const duration = () => editorInstance.recordingDuration; @@ -274,32 +273,16 @@ export function Timeline() { maskSegmentDragState.type !== "moving" && textSegmentDragState.type !== "moving" ) { - // Guard against missing bounds and clamp computed time to [0, totalDuration()] if (left == null) return; const rawTime = secsPerPixel() * (e.clientX - left) + transform().position; const newTime = Math.min(Math.max(0, rawTime), totalDuration()); + const targetFrame = Math.round(newTime * FPS); - // If playing, some backends require restart to seek reliably - if (editorState.playing) { - try { - await commands.stopPlayback(); - - // Round to nearest frame to prevent off-by-one drift - const targetFrame = Math.round(newTime * FPS); - await commands.seekTo(targetFrame); - - // If the user paused during these async ops, bail out without restarting - if (!editorState.playing) { - setEditorState("playbackTime", newTime); - return; - } - - await commands.startPlayback(FPS, previewResolutionBase()); - setEditorState("playing", true); - } catch (err) { - console.error("Failed to seek during playback:", err); - } + try { + await commands.seekTo(targetFrame); + } catch (err) { + console.error("Failed to seek timeline playhead:", err); } setEditorState("playbackTime", newTime); diff --git a/crates/editor/src/audio.rs b/crates/editor/src/audio.rs index d2cdbf8df4..7051ce5549 100644 --- a/crates/editor/src/audio.rs +++ b/crates/editor/src/audio.rs @@ -7,7 +7,6 @@ use cap_project::{AudioConfiguration, ClipOffsets, ProjectConfiguration, Timelin use ffmpeg::{ ChannelLayout, Dictionary, format as avformat, frame::Audio as FFAudio, software::resampling, }; -#[cfg(not(target_os = "windows"))] use ringbuf::{ HeapRb, traits::{Consumer, Observer, Producer}, @@ -248,14 +247,12 @@ impl AudioRenderer { } } -#[cfg(not(target_os = "windows"))] pub struct AudioPlaybackBuffer { frame_buffer: AudioRenderer, resampler: AudioResampler, resampled_buffer: HeapRb, } -#[cfg(not(target_os = "windows"))] impl AudioPlaybackBuffer { pub const PLAYBACK_SAMPLES_COUNT: u32 = 512; @@ -296,6 +293,19 @@ impl AudioPlaybackBuffer { self.frame_buffer.set_playhead(playhead, project); } + #[cfg(target_os = "windows")] + pub fn set_playhead_smooth(&mut self, playhead: f64, project: &ProjectConfiguration) { + let current_playhead = self.frame_buffer.elapsed_samples_to_playhead(); + let drift = (playhead - current_playhead).abs(); + + if drift > 0.2 { + self.set_playhead(playhead, project); + return; + } + + self.frame_buffer.set_playhead(playhead, project); + } + #[allow(dead_code)] pub fn current_playhead(&self) -> f64 { self.frame_buffer.elapsed_samples_to_playhead() @@ -426,7 +436,6 @@ impl AudioResampler { }) } - #[cfg(not(target_os = "windows"))] pub fn reset(&mut self) { *self = Self::new(self.output).unwrap(); } diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 000f209c6b..3672590369 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -1,6 +1,6 @@ -use cap_audio::FromSampleBytes; -#[cfg(not(target_os = "windows"))] -use cap_audio::{LatencyCorrectionConfig, LatencyCorrector, default_output_latency_hint}; +use cap_audio::{ + FromSampleBytes, LatencyCorrectionConfig, LatencyCorrector, default_output_latency_hint, +}; use cap_media::MediaError; use cap_media_info::AudioInfo; use cap_project::{ProjectConfiguration, XY}; @@ -8,7 +8,6 @@ use cap_rendering::{ DecodedSegmentFrames, ProjectUniforms, RenderVideoConstants, ZoomFocusInterpolator, spring_mass_damper::SpringMassDamperSimulationConfig, }; -#[cfg(not(target_os = "windows"))] use cpal::{BufferSize, SupportedBufferSize}; use cpal::{ SampleFormat, @@ -28,7 +27,6 @@ use tokio::{ }; use tracing::{error, info, warn}; -#[cfg(not(target_os = "windows"))] use crate::audio::AudioPlaybackBuffer; use crate::{ audio::AudioSegment, editor, editor_instance::SegmentMedia, segments::get_audio_segments, @@ -64,6 +62,7 @@ pub enum PlaybackEvent { pub struct PlaybackHandle { stop_tx: watch::Sender, event_rx: watch::Receiver, + seek_tx: tokio_mpsc::UnboundedSender, } struct PrefetchedFrame { @@ -118,10 +117,12 @@ impl Playback { let (event_tx, mut event_rx) = watch::channel(PlaybackEvent::Start); event_rx.borrow_and_update(); + let (seek_tx, mut seek_rx) = tokio_mpsc::unbounded_channel::(); let handle = PlaybackHandle { stop_tx: stop_tx.clone(), event_rx, + seek_tx, }; let (prefetch_tx, mut prefetch_rx) = @@ -437,10 +438,33 @@ impl Playback { .make_contiguous() .sort_by_key(|p| p.frame_number); - let start = Instant::now(); + let mut playback_anchor_start = Instant::now(); + let mut playback_anchor_frame = frame_number; let mut cached_project = self.project.borrow().clone(); 'playback: loop { + let mut pending_seek = None; + while let Ok(next_seek_frame) = seek_rx.try_recv() { + pending_seek = Some(next_seek_frame); + } + + if let Some(seek_frame) = pending_seek { + frame_number = seek_frame; + playback_anchor_start = Instant::now(); + playback_anchor_frame = seek_frame; + prefetch_buffer.retain(|p| p.frame_number >= frame_number); + frame_cache.cache.clear(); + let _ = frame_request_tx.send(frame_number); + let _ = playback_position_tx.send(frame_number); + if has_audio + && audio_playhead_tx + .send(frame_number as f64 / fps_f64) + .is_err() + { + break 'playback; + } + } + if self.project.has_changed().unwrap_or(false) { cached_project = self.project.borrow_and_update().clone(); } @@ -465,11 +489,28 @@ impl Playback { } } - let frame_offset = frame_number.saturating_sub(self.start_frame_number) as f64; - let next_deadline = start + frame_duration.mul_f64(frame_offset); + let frame_offset = frame_number.saturating_sub(playback_anchor_frame) as f64; + let next_deadline = playback_anchor_start + frame_duration.mul_f64(frame_offset); tokio::select! { _ = stop_rx.changed() => break 'playback, + Some(seek_frame) = seek_rx.recv() => { + frame_number = seek_frame; + playback_anchor_start = Instant::now(); + playback_anchor_frame = seek_frame; + prefetch_buffer.retain(|p| p.frame_number >= frame_number); + frame_cache.cache.clear(); + let _ = frame_request_tx.send(frame_number); + let _ = playback_position_tx.send(frame_number); + if has_audio + && audio_playhead_tx + .send(frame_number as f64 / fps_f64) + .is_err() + { + break 'playback; + } + continue; + } _ = tokio::time::sleep_until(next_deadline) => {} } @@ -699,8 +740,8 @@ impl Playback { break 'playback; } - let expected_frame = self.start_frame_number - + (start.elapsed().as_secs_f64() * fps_f64).floor() as u32; + let expected_frame = playback_anchor_frame + + (playback_anchor_start.elapsed().as_secs_f64() * fps_f64).floor() as u32; if frame_number < expected_frame { let frames_behind = expected_frame - frame_number; @@ -742,6 +783,10 @@ impl PlaybackHandle { self.stop_tx.send(true).ok(); } + pub fn seek(&self, frame_number: u32) { + let _ = self.seek_tx.send(frame_number); + } + pub async fn receive_event(&mut self) -> watch::Ref<'_, PlaybackEvent> { self.event_rx.changed().await.ok(); self.event_rx.borrow_and_update() @@ -759,6 +804,12 @@ struct AudioPlayback { } impl AudioPlayback { + fn use_prerendered_audio() -> bool { + std::env::var("CAP_AUDIO_PRERENDER_PLAYBACK") + .map(|value| value == "1" || value.eq_ignore_ascii_case("true")) + .unwrap_or(false) + } + fn spawn(self) -> bool { let handle = tokio::runtime::Handle::current(); @@ -787,26 +838,80 @@ impl AudioPlayback { } }; + let use_prerendered_audio = Self::use_prerendered_audio(); let duration_secs = self.duration_secs; + if use_prerendered_audio { + info!("Using pre-rendered audio playback mode"); + } else { + info!("Using low-latency streaming audio playback mode"); + } let result = match supported_config.sample_format() { SampleFormat::I16 => { - self.create_stream_prerendered::(device, supported_config, duration_secs) + if use_prerendered_audio { + self.create_stream_prerendered::( + device, + supported_config, + duration_secs, + ) + } else { + self.create_stream::(device, supported_config) + } } SampleFormat::I32 => { - self.create_stream_prerendered::(device, supported_config, duration_secs) + if use_prerendered_audio { + self.create_stream_prerendered::( + device, + supported_config, + duration_secs, + ) + } else { + self.create_stream::(device, supported_config) + } } SampleFormat::F32 => { - self.create_stream_prerendered::(device, supported_config, duration_secs) + if use_prerendered_audio { + self.create_stream_prerendered::( + device, + supported_config, + duration_secs, + ) + } else { + self.create_stream::(device, supported_config) + } } SampleFormat::I64 => { - self.create_stream_prerendered::(device, supported_config, duration_secs) + if use_prerendered_audio { + self.create_stream_prerendered::( + device, + supported_config, + duration_secs, + ) + } else { + self.create_stream::(device, supported_config) + } } SampleFormat::U8 => { - self.create_stream_prerendered::(device, supported_config, duration_secs) + if use_prerendered_audio { + self.create_stream_prerendered::( + device, + supported_config, + duration_secs, + ) + } else { + self.create_stream::(device, supported_config) + } } SampleFormat::F64 => { - self.create_stream_prerendered::(device, supported_config, duration_secs) + if use_prerendered_audio { + self.create_stream_prerendered::( + device, + supported_config, + duration_secs, + ) + } else { + self.create_stream::(device, supported_config) + } } format => { error!( @@ -843,7 +948,6 @@ impl AudioPlayback { true } - #[cfg(not(target_os = "windows"))] #[allow(dead_code)] fn create_stream( self, From 796e6177e94a2e6be55dc6a09fed7a7f2aa25e7e Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 20:22:14 +0000 Subject: [PATCH 170/333] improve: lazy-initialize avassetreader pool and refresh findings --- crates/editor/PLAYBACK-FINDINGS.md | 71 +++++++++++++++++-- crates/rendering/src/decoder/avassetreader.rs | 63 +++++++++++++++- 2 files changed, 127 insertions(+), 7 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index d30b940f21..e97796063a 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -35,7 +35,7 @@ ## Current Status -**Last Updated**: 2026-01-30 +**Last Updated**: 2026-02-13 ### Performance Summary @@ -60,10 +60,12 @@ - ✅ Multi-position decoder pool for smooth scrubbing - ✅ Mic audio sync within tolerance - ✅ Camera-display sync perfect (0ms drift) +- ✅ Editor playback now keeps a live seek channel during playback instead of stop/start restart loops +- ✅ Audio playback defaults to low-latency streaming buffer path with bounded prefill ### Known Issues (Lower Priority) 1. **System audio timing**: ~162ms difference inherited from recording-side timing issue -2. **Display decoder init time**: 337ms due to multi-position pool (creates 3 decoders) +2. **Display decoder init time**: baseline was 337ms from eager multi-decoder setup; now reduced by lazy decoder warmup but needs benchmark confirmation --- @@ -73,12 +75,17 @@ *(Update this section as you work)* - [ ] **Test fragmented mode** - Run playback tests on fragmented recordings -- [ ] **Investigate display decoder init time** - 337ms may be optimizable +- [ ] **Collect cross-platform benchmark evidence** - macOS 13+ and Windows GPU matrix for FPS, scrub settle, audio start latency, and A/V drift +- [ ] **Validate lazy decoder warmup impact** - measure display decoder init and scrub settle before/after on real recordings +- [ ] **Validate streaming audio startup/sync** - benchmark low-latency path vs legacy pre-render path across long timelines ### Completed - [x] **Run initial baseline** - Established current playback performance metrics (2026-01-28) - [x] **Profile decoder init time** - Hardware acceleration confirmed (AVAssetReader) (2026-01-28) - [x] **Identify latency hotspots** - No issues found, p95=3.1ms (2026-01-28) +- [x] **Remove seek restart churn in timeline path** - in-playback seeks now route through live playback handle (2026-02-13) +- [x] **Switch default audio mode to low-latency streaming** - full prerender now opt-in by env flag (2026-02-13) +- [x] **Reduce eager AVAssetReader decoder warmup** - pool now initializes lazily beyond first warm decoders (2026-02-13) --- @@ -127,13 +134,36 @@ cargo run -p cap-recording --example playback-test-runner -- full ## Completed Fixes -*(Document fixes here as they are implemented)* +1. **Low-latency audio startup enabled by default (2026-02-13)** + - `AudioPlayback::spawn()` now selects streaming `create_stream()` path by default. + - Legacy full-timeline prerender path is still available via `CAP_AUDIO_PRERENDER_PLAYBACK=1`. + - `AudioPlaybackBuffer` is available on all platforms so Windows can use streaming sync logic. + +2. **In-playback seek path without stop/start (2026-02-13)** + - Added seek channel to `PlaybackHandle` and playback loop. + - `seek_to` and `set_playhead_position` commands now forward seek requests to active playback. + - Timeline seek no longer tears down and recreates playback while playing. + +3. **Lazy decoder pool warmup on macOS AVAssetReader (2026-02-13)** + - Initial warmup now creates only a small subset of decoder instances. + - Additional decoder instances are initialized lazily when scrub patterns request them. + - Failed lazy init falls back safely to currently available decoders. --- ## Root Cause Analysis Archive -*(Document investigated issues here)* +1. **Audio start delay from full-track prerender** + - Root cause: playback startup used `create_stream_prerendered()` for all sample formats, forcing full timeline audio render before output stream started. + - Fix direction: switch default to incremental `AudioPlaybackBuffer` path with bounded prefill and live playhead correction. + +2. **Scrub lag from playback restart loop** + - Root cause: timeline seek while playing called stop → seek → start, rebuilding playback/audio state on every interactive seek. + - Fix direction: add live seek channel into running playback loop and route frontend seeks to it. + +3. **Display decoder init inflation on macOS** + - Root cause: AVAssetReader decoder pool eagerly initialized multiple decoders during startup. + - Fix direction: reduce eager warmup and lazily instantiate additional pool decoders when scrub behavior actually needs them. --- @@ -199,6 +229,37 @@ Decoder Pipeline: --- +### Session 2026-02-13 (Audio Startup + Live Seek + Lazy Decoder Warmup) + +**Goal**: Remove major editor playback bottlenecks affecting startup latency, scrub responsiveness, and decoder init overhead. + +**What was done**: +1. Switched playback audio startup default to streaming buffer path. +2. Kept prerender audio path behind `CAP_AUDIO_PRERENDER_PLAYBACK` as explicit fallback. +3. Enabled `AudioPlaybackBuffer` for all platforms so Windows uses live buffering/sync path. +4. Added a seek channel to `PlaybackHandle` and integrated seek handling into the main playback loop. +5. Updated Tauri seek/playhead commands to forward seeks into active playback handle. +6. Removed frontend timeline stop/start cycle when seeking while playing. +7. Reduced AVAssetReader eager pool warmup and added lazy decoder instantiation for additional pool slots. + +**Changes Made**: +- `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. +- `crates/editor/src/audio.rs`: cross-platform `AudioPlaybackBuffer`, windows-only smooth seek helper. +- `apps/desktop/src-tauri/src/lib.rs`: forward `seek_to` and `set_playhead_position` into active playback handle. +- `apps/desktop/src/routes/editor/Timeline/index.tsx`: seek while playing now sends direct `seekTo` without playback restart. +- `crates/rendering/src/decoder/avassetreader.rs`: lower eager decoder warmup and lazy pool growth. + +**Results**: +- ✅ `cargo +stable check -p cap-editor` passes after changes. +- ✅ `cargo +stable check -p cap-rendering` passes after changes. +- ✅ `pnpm --dir apps/desktop exec tsc --noEmit` passes after frontend seek changes. +- ⚠️ `cargo +stable check -p cap-desktop` and `cargo +stable run -p cap-recording --example playback-test-runner -- list` fail in this Linux environment because `scap-targets` does not currently compile on this target (`DisplayIdImpl`/`WindowImpl` unresolved), preventing local benchmark execution here. +- ⚠️ Cross-platform FPS/scrub/A-V benchmark evidence still pending on macOS and Windows devices with real recordings. + +**Stopping point**: Core playback code-path optimizations are implemented and compiling in touched crates; next step is benchmark execution on macOS 13+ and Windows GPU matrix to quantify gains. + +--- + ### Session 2026-01-28 (Initial Baseline - MP4) **Goal**: Establish initial playback performance baseline diff --git a/crates/rendering/src/decoder/avassetreader.rs b/crates/rendering/src/decoder/avassetreader.rs index 73f9c62a4a..62582efe67 100644 --- a/crates/rendering/src/decoder/avassetreader.rs +++ b/crates/rendering/src/decoder/avassetreader.rs @@ -417,6 +417,8 @@ pub struct AVAssetReaderDecoder { } impl AVAssetReaderDecoder { + const INITIAL_WARM_DECODER_COUNT: usize = 2; + fn new(path: PathBuf, tokio_handle: TokioHandle) -> Result { let keyframe_index = cap_video_decode::avassetreader::KeyframeIndex::build(&path).ok(); let fps = keyframe_index @@ -449,7 +451,10 @@ impl AVAssetReaderDecoder { let mut decoders = vec![primary_instance]; let initial_positions = pool_manager.positions(); - for pos in initial_positions.iter().skip(1) { + let warm_decoder_count = Self::INITIAL_WARM_DECODER_COUNT + .max(1) + .min(initial_positions.len()); + for pos in initial_positions.iter().take(warm_decoder_count).skip(1) { let start_time = pos.position_secs; match DecoderInstance::new( path.clone(), @@ -496,11 +501,65 @@ impl AVAssetReaderDecoder { }) } + fn ensure_decoder_available(&mut self, decoder_id: usize) -> usize { + if decoder_id < self.decoders.len() { + return decoder_id; + } + + let Some(template) = self.decoders.first() else { + return 0; + }; + let template_path = template.path.clone(); + let template_tokio_handle = template.tokio_handle.clone(); + let template_keyframe_index = template.keyframe_index.clone(); + + while self.decoders.len() <= decoder_id { + let next_id = self.decoders.len(); + let Some(position) = self + .pool_manager + .positions() + .iter() + .find(|p| p.id == next_id) + .map(|p| p.position_secs) + else { + break; + }; + + match DecoderInstance::new( + template_path.clone(), + template_tokio_handle.clone(), + position, + template_keyframe_index.clone(), + ) { + Ok(instance) => { + self.decoders.push(instance); + tracing::info!( + decoder_id = next_id, + position_secs = position, + total_decoders = self.decoders.len(), + "Lazily initialized decoder instance" + ); + } + Err(e) => { + tracing::warn!( + decoder_id = next_id, + position_secs = position, + error = %e, + "Failed to lazily initialize decoder instance" + ); + break; + } + } + } + + decoder_id.min(self.decoders.len().saturating_sub(1)) + } + fn select_best_decoder(&mut self, requested_time: f32) -> (usize, bool) { let (best_id, _distance, needs_reset) = self.pool_manager.find_best_decoder_for_time(requested_time); - let decoder_idx = best_id.min(self.decoders.len().saturating_sub(1)); + let decoder_idx = self.ensure_decoder_available(best_id); if needs_reset && decoder_idx < self.decoders.len() { self.decoders[decoder_idx].reset(requested_time); From b2b4804ee92b01279415eeb96cd92715dcc62871 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 20:26:12 +0000 Subject: [PATCH 171/333] improve: add scrub and startup metrics to playback benchmark --- crates/editor/PLAYBACK-BENCHMARKS.md | 16 ++ crates/editor/PLAYBACK-FINDINGS.md | 9 + .../examples/playback-test-runner.rs | 217 +++++++++++++++++- 3 files changed, 235 insertions(+), 7 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index a2253d580f..dd2f3fe853 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -12,6 +12,7 @@ This document tracks performance benchmarks for Cap's playback and decoding syst | Decode Latency (p95) | <50ms | - | | Effective FPS | ≥30 fps | ±2 fps | | Decode Jitter | <10ms | - | +| Scrub Seek Latency (p95) | <40ms | - | | A/V Sync (mic↔video) | <100ms | - | | A/V Sync (system↔video) | <100ms | - | | Camera-Display Drift | <100ms | - | @@ -20,6 +21,7 @@ This document tracks performance benchmarks for Cap's playback and decoding syst - **Decoder Tests**: Init time, hardware acceleration detection, fallback handling - **Playback Tests**: Sequential decode, frame retrieval, latency percentiles +- **Scrub Tests**: Random access seek decode latency and seek failure rate - **Audio Sync Tests**: Mic-video sync, system audio-video sync - **Camera Sync Tests**: Camera-display drift, frame count alignment - **Decode Benchmark**: Creation, sequential, seek, and random access performance @@ -44,6 +46,7 @@ cargo run -p cap-recording --example playback-test-runner -- full # Run specific test categories cargo run -p cap-recording --example playback-test-runner -- decoder cargo run -p cap-recording --example playback-test-runner -- playback +cargo run -p cap-recording --example playback-test-runner -- scrub cargo run -p cap-recording --example playback-test-runner -- audio-sync cargo run -p cap-recording --example playback-test-runner -- camera-sync @@ -106,6 +109,19 @@ cargo run -p cap-recording --example playback-test-runner -- full | **P50/P95/P99** | Latency percentiles | Sorted distribution | | **Effective FPS** | Actual decode throughput | frames / elapsed_time | | **Jitter** | Decode time variance (std dev) | sqrt(variance) | +| **First Decode** | Decode latency for first successful frame | elapsed from first frame request | +| **Startup to First** | Time from playback test start to first decoded frame | elapsed since playback test start | + +### Scrub Metrics + +| Metric | Description | How Measured | +|--------|-------------|--------------| +| **Seek Operations** | Total random seek attempts | Fixed operation count per segment | +| **Successful Seeks** | Seeks returning a decoded frame | Count of non-None seek decodes | +| **Failed Seeks** | Seeks returning no frame | Count of None seek decodes | +| **Avg Seek Time** | Mean random seek decode latency | Avg of seek decode times | +| **P50/P95/P99 Seek** | Seek latency percentiles | Sorted seek time distribution | +| **Max Seek Time** | Worst seek decode latency | Max of seek decode times | ### Audio Sync Metrics diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index e97796063a..0b693b65f9 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -98,6 +98,7 @@ cargo run -p cap-recording --example playback-test-runner -- full # Test specific categories cargo run -p cap-recording --example playback-test-runner -- decoder cargo run -p cap-recording --example playback-test-runner -- playback +cargo run -p cap-recording --example playback-test-runner -- scrub cargo run -p cap-recording --example playback-test-runner -- audio-sync cargo run -p cap-recording --example playback-test-runner -- camera-sync @@ -149,6 +150,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Additional decoder instances are initialized lazily when scrub patterns request them. - Failed lazy init falls back safely to currently available decoders. +4. **Playback benchmark runner now captures scrub and startup metrics (2026-02-13)** + - Added `scrub` benchmark mode to `playback-test-runner`. + - Playback result now includes first-frame decode and startup-to-first-frame latency. + - Scrub result now reports seek p50/p95/p99 and seek failure counts. + --- ## Root Cause Analysis Archive @@ -241,6 +247,7 @@ Decoder Pipeline: 5. Updated Tauri seek/playhead commands to forward seeks into active playback handle. 6. Removed frontend timeline stop/start cycle when seeking while playing. 7. Reduced AVAssetReader eager pool warmup and added lazy decoder instantiation for additional pool slots. +8. Extended playback benchmark tooling with scrub mode and startup latency metrics. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -248,6 +255,8 @@ Decoder Pipeline: - `apps/desktop/src-tauri/src/lib.rs`: forward `seek_to` and `set_playhead_position` into active playback handle. - `apps/desktop/src/routes/editor/Timeline/index.tsx`: seek while playing now sends direct `seekTo` without playback restart. - `crates/rendering/src/decoder/avassetreader.rs`: lower eager decoder warmup and lazy pool growth. +- `crates/recording/examples/playback-test-runner.rs`: added scrub command and startup/scrub latency metrics. +- `crates/editor/PLAYBACK-BENCHMARKS.md`: updated benchmark reference and metric definitions. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/recording/examples/playback-test-runner.rs b/crates/recording/examples/playback-test-runner.rs index 437b3844a3..9c7bff6110 100644 --- a/crates/recording/examples/playback-test-runner.rs +++ b/crates/recording/examples/playback-test-runner.rs @@ -53,6 +53,7 @@ enum Commands { Full, Decoder, Playback, + Scrub, AudioSync, CameraSync, List, @@ -60,6 +61,7 @@ enum Commands { const FPS_TOLERANCE: f64 = 2.0; const DECODE_LATENCY_WARNING_MS: f64 = 50.0; +const SCRUB_SEEK_WARNING_MS: f64 = 40.0; const AUDIO_VIDEO_SYNC_TOLERANCE_MS: f64 = 100.0; const CAMERA_SYNC_TOLERANCE_MS: f64 = 100.0; @@ -82,6 +84,8 @@ struct PlaybackTestResult { total_frames: usize, decoded_frames: usize, failed_frames: usize, + first_frame_decode_time_ms: f64, + startup_to_first_frame_ms: f64, avg_decode_time_ms: f64, min_decode_time_ms: f64, max_decode_time_ms: f64, @@ -96,6 +100,22 @@ struct PlaybackTestResult { errors: Vec, } +#[derive(Debug, Clone, Default)] +struct ScrubTestResult { + passed: bool, + segment_index: usize, + seek_operations: usize, + successful_seeks: usize, + failed_seeks: usize, + avg_seek_time_ms: f64, + p50_seek_time_ms: f64, + p95_seek_time_ms: f64, + p99_seek_time_ms: f64, + max_seek_time_ms: f64, + seek_latency_ok: bool, + errors: Vec, +} + #[derive(Debug, Clone, Default)] struct AudioSyncTestResult { passed: bool, @@ -141,6 +161,7 @@ struct RecordingTestReport { has_system_audio: bool, decoder_results: Vec, playback_results: Vec, + scrub_results: Vec, audio_sync_results: Vec, camera_sync_results: Vec, overall_passed: bool, @@ -208,6 +229,10 @@ impl RecordingTestReport { result.p95_decode_time_ms, result.p99_decode_time_ms ); + println!( + " Startup: first_decode={:.1}ms startup_to_first={:.1}ms", + result.first_frame_decode_time_ms, result.startup_to_first_frame_ms + ); if !result.fps_ok { println!(" WARN: FPS outside tolerance!"); } @@ -219,6 +244,28 @@ impl RecordingTestReport { } } + if !self.scrub_results.is_empty() { + println!("\n SCRUB TESTS:"); + for result in &self.scrub_results { + let status = if result.passed { "OK" } else { "FAIL" }; + println!( + " Segment {}: [{}] seeks={}/{} avg={:.1}ms p95={:.1}ms", + result.segment_index, + status, + result.successful_seeks, + result.seek_operations, + result.avg_seek_time_ms, + result.p95_seek_time_ms + ); + if !result.seek_latency_ok { + println!(" WARN: Scrub seek latency exceeds {SCRUB_SEEK_WARNING_MS}ms!"); + } + for err in &result.errors { + println!(" ERROR: {err}"); + } + } + } + if !self.audio_sync_results.is_empty() { println!("\n AUDIO SYNC TESTS:"); for result in &self.audio_sync_results { @@ -341,6 +388,7 @@ async fn test_playback( fps: u32, verbose: bool, ) -> PlaybackTestResult { + let playback_start = Instant::now(); let mut result = PlaybackTestResult { segment_index, expected_fps: fps as f64, @@ -384,6 +432,11 @@ async fn test_playback( let decode_time_ms = start.elapsed().as_secs_f64() * 1000.0; decode_times.push(decode_time_ms); decoded_count += 1; + if decoded_count == 1 { + result.first_frame_decode_time_ms = decode_time_ms; + result.startup_to_first_frame_ms = + playback_start.elapsed().as_secs_f64() * 1000.0; + } if frame.width() == 0 || frame.height() == 0 { result @@ -448,6 +501,96 @@ async fn test_playback( result } +async fn test_scrub( + recording_meta: &RecordingMeta, + meta: &StudioRecordingMeta, + segment_index: usize, + fps: u32, + verbose: bool, +) -> ScrubTestResult { + let mut result = ScrubTestResult { + segment_index, + seek_operations: 120, + ..Default::default() + }; + + let display_path = match meta { + StudioRecordingMeta::SingleSegment { segment } => { + recording_meta.path(&segment.display.path) + } + StudioRecordingMeta::MultipleSegments { inner } => { + recording_meta.path(&inner.segments[segment_index].display.path) + } + }; + + let decoder = match spawn_decoder("display", display_path.clone(), fps, 0.0, false).await { + Ok(d) => d, + Err(e) => { + result.errors.push(format!("Failed to create decoder: {e}")); + return result; + } + }; + + let duration_secs = get_video_duration(&display_path); + let total_frames = (duration_secs * fps as f64).ceil() as usize; + if total_frames < 2 { + result + .errors + .push("Video duration too short for scrub benchmark".to_string()); + return result; + } + + let mut seek_times = Vec::with_capacity(result.seek_operations); + + for operation in 0..result.seek_operations { + let target_frame = ((operation * 7919) % total_frames).max(1); + let target_time = target_frame as f32 / fps as f32; + let seek_start = Instant::now(); + match decoder.get_frame(target_time).await { + Some(_) => { + let seek_time_ms = seek_start.elapsed().as_secs_f64() * 1000.0; + seek_times.push(seek_time_ms); + result.successful_seeks += 1; + if verbose && operation % 20 == 0 { + println!( + " Scrub {} / {}: frame={} time={:.3}s seek={:.1}ms", + operation + 1, + result.seek_operations, + target_frame, + target_time, + seek_time_ms + ); + } + } + None => { + result.failed_seeks += 1; + if verbose { + println!( + " Scrub {} / {}: frame={} FAILED", + operation + 1, + result.seek_operations, + target_frame + ); + } + } + } + } + + if !seek_times.is_empty() { + result.avg_seek_time_ms = seek_times.iter().sum::() / seek_times.len() as f64; + result.p50_seek_time_ms = percentile(&seek_times, 50.0); + result.p95_seek_time_ms = percentile(&seek_times, 95.0); + result.p99_seek_time_ms = percentile(&seek_times, 99.0); + result.max_seek_time_ms = seek_times.iter().copied().fold(f64::NEG_INFINITY, f64::max); + } + + result.seek_latency_ok = result.p95_seek_time_ms <= SCRUB_SEEK_WARNING_MS; + result.passed = + result.seek_latency_ok && result.failed_seeks == 0 && result.successful_seeks > 0; + + result +} + async fn test_audio_sync( recording_meta: &RecordingMeta, meta: &StudioRecordingMeta, @@ -735,6 +878,7 @@ async fn run_tests_on_recording( fps: u32, run_decoder: bool, run_playback: bool, + run_scrub: bool, run_audio_sync: bool, run_camera_sync: bool, verbose: bool, @@ -826,6 +970,15 @@ async fn run_tests_on_recording( report.playback_results.push(playback_result); } + if run_scrub { + if verbose { + println!(" Testing scrub performance for segment {segment_idx}..."); + } + let scrub_result = + test_scrub(&meta, studio_meta.as_ref(), segment_idx, fps, verbose).await; + report.scrub_results.push(scrub_result); + } + if run_audio_sync { if verbose { println!(" Testing audio sync for segment {segment_idx}..."); @@ -848,10 +1001,11 @@ async fn run_tests_on_recording( let decoder_ok = report.decoder_results.iter().all(|r| r.passed); let playback_ok = report.playback_results.iter().all(|r| r.passed); + let scrub_ok = report.scrub_results.iter().all(|r| r.passed); let audio_ok = report.audio_sync_results.iter().all(|r| r.passed); let camera_ok = report.camera_sync_results.iter().all(|r| r.passed); - report.overall_passed = decoder_ok && playback_ok && audio_ok && camera_ok; + report.overall_passed = decoder_ok && playback_ok && scrub_ok && audio_ok && camera_ok; Ok(report) } @@ -906,6 +1060,12 @@ fn get_failure_tags(report: &RecordingTestReport) -> Vec { if report.playback_results.iter().any(|r| !r.decode_latency_ok) { tags.push("LATENCY".to_string()); } + if report.scrub_results.iter().any(|r| !r.seek_latency_ok) { + tags.push("SCRUB_LATENCY".to_string()); + } + if report.scrub_results.iter().any(|r| r.failed_seeks > 0) { + tags.push("SCRUB_ERRORS".to_string()); + } if report.playback_results.iter().any(|r| r.failed_frames > 0) { tags.push("DECODE_ERRORS".to_string()); } @@ -1007,6 +1167,16 @@ fn report_to_markdown(report: &RecordingTestReport) -> String { result.p99_decode_time_ms, result.max_decode_time_ms )); + md.push_str(&format!( + "| ↳ Startup | {} | first_decode={:.1}ms startup_to_first={:.1}ms |\n", + if result.startup_to_first_frame_ms > 0.0 { + "✅" + } else { + "❌" + }, + result.first_frame_decode_time_ms, + result.startup_to_first_frame_ms + )); if result.failed_frames > 0 { md.push_str(&format!( "| ↳ Failed Frames | ⚠️ | {} |\n", @@ -1015,6 +1185,31 @@ fn report_to_markdown(report: &RecordingTestReport) -> String { } } + for result in &report.scrub_results { + md.push_str(&format!( + "| Scrub Seg {} | {} | seeks={}/{} avg={:.1}ms p95={:.1}ms p99={:.1}ms |\n", + result.segment_index, + if result.passed { "✅" } else { "❌" }, + result.successful_seeks, + result.seek_operations, + result.avg_seek_time_ms, + result.p95_seek_time_ms, + result.p99_seek_time_ms + )); + md.push_str(&format!( + "| ↳ Scrub Latency | {} | max={:.1}ms threshold={:.1}ms |\n", + if result.seek_latency_ok { "✅" } else { "❌" }, + result.max_seek_time_ms, + SCRUB_SEEK_WARNING_MS + )); + if result.failed_seeks > 0 { + md.push_str(&format!( + "| ↳ Scrub Failures | ⚠️ | {} |\n", + result.failed_seeks + )); + } + } + for result in &report.audio_sync_results { if result.has_mic_audio { let status = if result.mic_sync_ok { "✅" } else { "❌" }; @@ -1202,6 +1397,7 @@ fn print_summary(reports: &[RecordingTestReport]) { let decoder_failed = report.decoder_results.iter().any(|r| !r.passed); let playback_failed = report.playback_results.iter().any(|r| !r.passed); + let scrub_failed = report.scrub_results.iter().any(|r| !r.passed); let audio_failed = report.audio_sync_results.iter().any(|r| !r.passed); let camera_failed = report.camera_sync_results.iter().any(|r| !r.passed); @@ -1211,6 +1407,9 @@ fn print_summary(reports: &[RecordingTestReport]) { if playback_failed { print!(" [PLAYBACK]"); } + if scrub_failed { + print!(" [SCRUB]"); + } if audio_failed { print!(" [AUDIO SYNC]"); } @@ -1269,12 +1468,14 @@ async fn main() -> anyhow::Result<()> { return Ok(()); } - let (run_decoder, run_playback, run_audio_sync, run_camera_sync) = match cli.command { - Some(Commands::Decoder) => (true, false, false, false), - Some(Commands::Playback) => (false, true, false, false), - Some(Commands::AudioSync) => (false, false, true, false), - Some(Commands::CameraSync) => (false, false, false, true), - Some(Commands::Full) | None => (true, true, true, true), + let (run_decoder, run_playback, run_scrub, run_audio_sync, run_camera_sync) = match cli.command + { + Some(Commands::Decoder) => (true, false, false, false, false), + Some(Commands::Playback) => (false, true, false, false, false), + Some(Commands::Scrub) => (false, false, true, false, false), + Some(Commands::AudioSync) => (false, false, false, true, false), + Some(Commands::CameraSync) => (false, false, false, false, true), + Some(Commands::Full) | None => (true, true, true, true, true), Some(Commands::List) => unreachable!(), }; @@ -1297,6 +1498,7 @@ async fn main() -> anyhow::Result<()> { cli.fps, run_decoder, run_playback, + run_scrub, run_audio_sync, run_camera_sync, cli.verbose, @@ -1321,6 +1523,7 @@ async fn main() -> anyhow::Result<()> { match cli.command { Some(Commands::Decoder) => "decoder", Some(Commands::Playback) => "playback", + Some(Commands::Scrub) => "scrub", Some(Commands::AudioSync) => "audio-sync", Some(Commands::CameraSync) => "camera-sync", Some(Commands::Full) | None => "full", From eea5e6970d6aff12e83c1bef4bb463faf920960f Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 20:26:44 +0000 Subject: [PATCH 172/333] docs: add cross-platform playback benchmark matrix --- crates/editor/PLAYBACK-BENCHMARKS.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index dd2f3fe853..8b23767a92 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -77,6 +77,22 @@ cargo run -p cap-recording --example real-device-test-runner -- full --keep-outp cargo run -p cap-recording --example playback-test-runner -- full ``` +### Cross-Platform Validation Matrix + +Run these scenarios on each required hardware class and append outputs via `--benchmark-output`. + +```bash +cargo run -p cap-recording --example playback-test-runner -- full --fps 60 --benchmark-output --notes "platform= gpu= scenario=full" +cargo run -p cap-recording --example playback-test-runner -- scrub --fps 60 --benchmark-output --notes "platform= gpu= scenario=scrub" +``` + +| Platform | GPU Class | MP4 Full | Fragmented Full | MP4 Scrub | Fragmented Scrub | Notes | +|----------|-----------|----------|-----------------|-----------|------------------|-------| +| macOS 13+ | Apple Silicon | ☐ | ☐ | ☐ | ☐ | | +| Windows 11 | NVIDIA discrete | ☐ | ☐ | ☐ | ☐ | | +| Windows 11 | AMD discrete | ☐ | ☐ | ☐ | ☐ | | +| Windows 11 | Integrated baseline | ☐ | ☐ | ☐ | ☐ | | + --- ## Benchmark History From 97bd11c0d4166a819b9348100140ecad35440d7b Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 20:28:37 +0000 Subject: [PATCH 173/333] improve: add playback startup latency telemetry logs --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 30 +++++++++++++++++++++++++++++- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 0b693b65f9..b4d6bf57c1 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -155,6 +155,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Playback result now includes first-frame decode and startup-to-first-frame latency. - Scrub result now reports seek p50/p95/p99 and seek failure counts. +5. **Playback runtime emits startup latency signals (2026-02-13)** + - Playback loop now logs first rendered frame latency. + - Audio stream setup now logs startup preparation time and first callback latency. + --- ## Root Cause Analysis Archive @@ -248,6 +252,7 @@ Decoder Pipeline: 6. Removed frontend timeline stop/start cycle when seeking while playing. 7. Reduced AVAssetReader eager pool warmup and added lazy decoder instantiation for additional pool slots. 8. Extended playback benchmark tooling with scrub mode and startup latency metrics. +9. Added playback runtime startup telemetry logs for first frame and audio callback bring-up. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -257,6 +262,7 @@ Decoder Pipeline: - `crates/rendering/src/decoder/avassetreader.rs`: lower eager decoder warmup and lazy pool growth. - `crates/recording/examples/playback-test-runner.rs`: added scrub command and startup/scrub latency metrics. - `crates/editor/PLAYBACK-BENCHMARKS.md`: updated benchmark reference and metric definitions. +- `crates/editor/src/playback.rs`: added first-render and audio-callback startup latency logging. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 3672590369..70e019ef5d 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -18,7 +18,10 @@ use lru::LruCache; use std::{ collections::{HashSet, VecDeque}, num::NonZeroUsize, - sync::{Arc, RwLock}, + sync::{ + Arc, RwLock, + atomic::{AtomicBool, Ordering}, + }, time::Duration, }; use tokio::{ @@ -358,6 +361,7 @@ impl Playback { }); tokio::spawn(async move { + let playback_task_start = Instant::now(); let duration = if let Some(timeline) = &self.project.borrow().timeline { timeline.duration() } else { @@ -387,6 +391,7 @@ impl Playback { let mut total_frames_rendered = 0u64; let mut _total_frames_skipped = 0u64; + let mut first_render_logged = false; let warmup_target_frames = 20usize; let warmup_after_first_timeout = Duration::from_millis(1000); @@ -726,6 +731,14 @@ impl Playback { .await; total_frames_rendered += 1; + if !first_render_logged { + first_render_logged = true; + info!( + first_render_latency_ms = + playback_task_start.elapsed().as_secs_f64() * 1000.0, + "Playback rendered first frame" + ); + } } event_tx.send(PlaybackEvent::Frame(frame_number)).ok(); @@ -819,6 +832,7 @@ impl AudioPlayback { } std::thread::spawn(move || { + let audio_thread_start = Instant::now(); let host = cpal::default_host(); let device = match host.default_output_device() { Some(d) => d, @@ -933,6 +947,10 @@ impl AudioPlayback { } }; + info!( + startup_prepare_ms = audio_thread_start.elapsed().as_secs_f64() * 1000.0, + "Audio stream prepared, starting playback stream" + ); if let Err(e) = stream.play() { error!( "Failed to play audio stream: {}. Skipping audio playback.", @@ -1103,6 +1121,8 @@ impl AudioPlayback { let mut latency_corrector = LatencyCorrector::new(static_latency_hint, latency_config); let initial_compensation_secs = latency_corrector.initial_compensation_secs(); let device_sample_rate = sample_rate; + let stream_build_start = Instant::now(); + let callback_started = Arc::new(AtomicBool::new(false)); { let project_snapshot = project.borrow(); @@ -1140,6 +1160,7 @@ impl AudioPlayback { let headroom_for_stream = headroom_samples; let mut playhead_rx_for_stream = playhead_rx.clone(); let mut last_video_playhead = playhead; + let callback_started_for_stream = callback_started.clone(); #[cfg(target_os = "windows")] const FIXED_LATENCY_SECS: f64 = 0.08; @@ -1159,6 +1180,13 @@ impl AudioPlayback { let stream_result = device.build_output_stream( &config, move |buffer: &mut [T], info| { + if !callback_started_for_stream.swap(true, Ordering::Relaxed) { + info!( + startup_to_callback_ms = + stream_build_start.elapsed().as_secs_f64() * 1000.0, + "Audio output callback started" + ); + } #[cfg(not(target_os = "windows"))] let latency_secs = latency_corrector.update_from_callback(info); #[cfg(target_os = "windows")] From 127f727e5a3aa5942ccba7b5cff059a533932b48 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 20:42:59 +0000 Subject: [PATCH 174/333] improve: extend decode benchmark with json startup metrics --- Cargo.lock | 1 + crates/editor/Cargo.toml | 1 + crates/editor/PLAYBACK-BENCHMARKS.md | 3 + crates/editor/PLAYBACK-FINDINGS.md | 6 + crates/editor/examples/decode-benchmark.rs | 169 ++++++++++++++++++--- 5 files changed, 158 insertions(+), 22 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c4f8665e90..6076dc378a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1311,6 +1311,7 @@ dependencies = [ "ringbuf", "sentry", "serde", + "serde_json", "specta", "tokio", "tokio-util", diff --git a/crates/editor/Cargo.toml b/crates/editor/Cargo.toml index c612d1e33f..0cc16b4111 100644 --- a/crates/editor/Cargo.toml +++ b/crates/editor/Cargo.toml @@ -23,6 +23,7 @@ axum = { version = "0.7.5", features = ["ws"] } ffmpeg.workspace = true specta.workspace = true serde = { workspace = true } +serde_json = "1" sentry.workspace = true futures = { workspace = true } tracing.workspace = true diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 8b23767a92..da84ed7b3a 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -65,6 +65,9 @@ cargo run -p cap-editor --example decode-benchmark -- --video /path/to/video.mp4 # With custom FPS and iterations cargo run -p cap-editor --example decode-benchmark -- --video /path/to/video.mp4 --fps 60 --iterations 50 + +# Emit machine-readable JSON with startup/scrub metrics +cargo run -p cap-editor --example decode-benchmark -- --video /path/to/video.mp4 --fps 60 --sequential-frames 180 --random-samples 120 --output-json /tmp/decode-benchmark.json ``` #### Combined Workflow (Recording → Playback) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index b4d6bf57c1..fa28454366 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -159,6 +159,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Playback loop now logs first rendered frame latency. - Audio stream setup now logs startup preparation time and first callback latency. +6. **Decode benchmark now supports machine-readable evidence output (2026-02-13)** + - `decode-benchmark` supports `--output-json` for structured metric capture. + - Added sequential frame and random sample count controls to scale benchmark depth per hardware class. + --- ## Root Cause Analysis Archive @@ -253,6 +257,7 @@ Decoder Pipeline: 7. Reduced AVAssetReader eager pool warmup and added lazy decoder instantiation for additional pool slots. 8. Extended playback benchmark tooling with scrub mode and startup latency metrics. 9. Added playback runtime startup telemetry logs for first frame and audio callback bring-up. +10. Enhanced decode benchmark example with structured JSON output and configurable sample depth. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -263,6 +268,7 @@ Decoder Pipeline: - `crates/recording/examples/playback-test-runner.rs`: added scrub command and startup/scrub latency metrics. - `crates/editor/PLAYBACK-BENCHMARKS.md`: updated benchmark reference and metric definitions. - `crates/editor/src/playback.rs`: added first-render and audio-callback startup latency logging. +- `crates/editor/examples/decode-benchmark.rs`: added `--output-json`, startup metrics, and configurable sequential/random sampling. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/examples/decode-benchmark.rs b/crates/editor/examples/decode-benchmark.rs index d29ab2dda0..2ae11f207b 100644 --- a/crates/editor/examples/decode-benchmark.rs +++ b/crates/editor/examples/decode-benchmark.rs @@ -1,4 +1,6 @@ use cap_rendering::decoder::{AsyncVideoDecoderHandle, spawn_decoder}; +use serde::Serialize; +use std::fs; use std::path::{Path, PathBuf}; use std::process::Command; use std::time::Instant; @@ -38,23 +40,44 @@ struct BenchmarkConfig { video_path: PathBuf, fps: u32, iterations: usize, + sequential_frames: usize, + random_samples: usize, + output_json: Option, } -#[derive(Debug, Default)] +#[derive(Debug, Default, Serialize)] struct BenchmarkResults { decoder_creation_ms: f64, sequential_decode_times_ms: Vec, + first_frame_decode_ms: f64, + startup_to_first_frame_ms: f64, + sequential_p50_ms: f64, + sequential_p95_ms: f64, + sequential_p99_ms: f64, sequential_fps: f64, sequential_failures: usize, seek_times_by_distance: Vec<(f32, f64)>, seek_failures: usize, random_access_times_ms: Vec, random_access_avg_ms: f64, + random_access_p50_ms: f64, + random_access_p95_ms: f64, + random_access_p99_ms: f64, random_access_failures: usize, cache_hits: usize, cache_misses: usize, } +#[derive(Debug, Serialize)] +struct BenchmarkOutput { + video_path: PathBuf, + fps: u32, + iterations: usize, + sequential_frames: usize, + random_samples: usize, + results: BenchmarkResults, +} + impl BenchmarkResults { fn print_report(&self) { println!("\n{}", "=".repeat(60)); @@ -96,6 +119,14 @@ impl BenchmarkResults { println!(" Avg decode time: {avg:.2}ms"); println!(" Min decode time: {min:.2}ms"); println!(" Max decode time: {max:.2}ms"); + println!(" P50 decode time: {:.2}ms", self.sequential_p50_ms); + println!(" P95 decode time: {:.2}ms", self.sequential_p95_ms); + println!(" P99 decode time: {:.2}ms", self.sequential_p99_ms); + println!(" First frame decode: {:.2}ms", self.first_frame_decode_ms); + println!( + " Startup to first frame: {:.2}ms", + self.startup_to_first_frame_ms + ); println!(" Effective FPS: {:.1}", self.sequential_fps); } println!(); @@ -138,18 +169,9 @@ impl BenchmarkResults { println!(" Avg access time: {avg:.2}ms"); println!(" Min access time: {min:.2}ms"); println!(" Max access time: {max:.2}ms"); - println!( - " P50: {:.2}ms", - percentile(&self.random_access_times_ms, 50.0) - ); - println!( - " P95: {:.2}ms", - percentile(&self.random_access_times_ms, 95.0) - ); - println!( - " P99: {:.2}ms", - percentile(&self.random_access_times_ms, 99.0) - ); + println!(" P50: {:.2}ms", self.random_access_p50_ms); + println!(" P95: {:.2}ms", self.random_access_p95_ms); + println!(" P99: {:.2}ms", self.random_access_p99_ms); } println!(); @@ -215,10 +237,13 @@ async fn benchmark_sequential_decode( fps: u32, frame_count: usize, start_time: f32, -) -> (Vec, f64, usize) { +) -> (Vec, f64, usize, f64, f64) { let mut times = Vec::with_capacity(frame_count); let mut failures = 0; let overall_start = Instant::now(); + let mut first_frame_decode_ms = 0.0; + let mut startup_to_first_frame_ms = 0.0; + let mut first_frame_captured = false; for i in 0..frame_count { let time = start_time + (i as f32 / fps as f32); @@ -227,6 +252,11 @@ async fn benchmark_sequential_decode( Some(_frame) => { let elapsed = start.elapsed(); times.push(elapsed.as_secs_f64() * 1000.0); + if !first_frame_captured { + first_frame_captured = true; + first_frame_decode_ms = elapsed.as_secs_f64() * 1000.0; + startup_to_first_frame_ms = overall_start.elapsed().as_secs_f64() * 1000.0; + } } None => { failures += 1; @@ -243,7 +273,13 @@ async fn benchmark_sequential_decode( 0.0 }; - (times, effective_fps, failures) + ( + times, + effective_fps, + failures, + first_frame_decode_ms, + startup_to_first_frame_ms, + ) } async fn benchmark_seek( @@ -308,6 +344,10 @@ async fn run_full_benchmark(config: BenchmarkConfig) -> BenchmarkResults { config.video_path.display() ); println!("FPS: {}, Iterations: {}", config.fps, config.iterations); + println!( + "Sequential frames: {}, Random samples: {}", + config.sequential_frames, config.random_samples + ); println!(); println!("[1/5] Benchmarking decoder creation..."); @@ -341,12 +381,20 @@ async fn run_full_benchmark(config: BenchmarkConfig) -> BenchmarkResults { println!("Detected video duration: {video_duration:.2}s"); println!(); - println!("[3/5] Benchmarking sequential decode (100 frames from start)..."); - let (seq_times, seq_fps, seq_failures) = - benchmark_sequential_decode(&decoder, config.fps, 100, 0.0).await; + println!( + "[3/5] Benchmarking sequential decode ({} frames from start)...", + config.sequential_frames + ); + let (seq_times, seq_fps, seq_failures, first_frame_decode_ms, startup_to_first_frame_ms) = + benchmark_sequential_decode(&decoder, config.fps, config.sequential_frames, 0.0).await; results.sequential_decode_times_ms = seq_times; results.sequential_fps = seq_fps; results.sequential_failures = seq_failures; + results.first_frame_decode_ms = first_frame_decode_ms; + results.startup_to_first_frame_ms = startup_to_first_frame_ms; + results.sequential_p50_ms = percentile(&results.sequential_decode_times_ms, 50.0); + results.sequential_p95_ms = percentile(&results.sequential_decode_times_ms, 95.0); + results.sequential_p99_ms = percentile(&results.sequential_decode_times_ms, 99.0); println!(" Done: {seq_fps:.1} effective FPS"); if seq_failures > 0 { println!(" Warning: {seq_failures} frames failed to decode"); @@ -370,9 +418,12 @@ async fn run_full_benchmark(config: BenchmarkConfig) -> BenchmarkResults { } } - println!("[5/5] Benchmarking random access (50 samples)..."); + println!( + "[5/5] Benchmarking random access ({} samples)...", + config.random_samples + ); let (random_times, random_failures) = - benchmark_random_access(&decoder, config.fps, video_duration, 50).await; + benchmark_random_access(&decoder, config.fps, video_duration, config.random_samples).await; results.random_access_times_ms = random_times; results.random_access_failures = random_failures; results.random_access_avg_ms = if results.random_access_times_ms.is_empty() { @@ -381,6 +432,9 @@ async fn run_full_benchmark(config: BenchmarkConfig) -> BenchmarkResults { results.random_access_times_ms.iter().sum::() / results.random_access_times_ms.len() as f64 }; + results.random_access_p50_ms = percentile(&results.random_access_times_ms, 50.0); + results.random_access_p95_ms = percentile(&results.random_access_times_ms, 95.0); + results.random_access_p99_ms = percentile(&results.random_access_times_ms, 99.0); println!(" Done: {:.2}ms avg", results.random_access_avg_ms); if random_failures > 0 { println!(" Warning: {random_failures} random accesses failed"); @@ -389,6 +443,53 @@ async fn run_full_benchmark(config: BenchmarkConfig) -> BenchmarkResults { results } +fn write_json_output(config: &BenchmarkConfig, results: &BenchmarkResults) { + let Some(output_path) = &config.output_json else { + return; + }; + + let output = BenchmarkOutput { + video_path: config.video_path.clone(), + fps: config.fps, + iterations: config.iterations, + sequential_frames: config.sequential_frames, + random_samples: config.random_samples, + results: BenchmarkResults { + decoder_creation_ms: results.decoder_creation_ms, + sequential_decode_times_ms: results.sequential_decode_times_ms.clone(), + first_frame_decode_ms: results.first_frame_decode_ms, + startup_to_first_frame_ms: results.startup_to_first_frame_ms, + sequential_p50_ms: results.sequential_p50_ms, + sequential_p95_ms: results.sequential_p95_ms, + sequential_p99_ms: results.sequential_p99_ms, + sequential_fps: results.sequential_fps, + sequential_failures: results.sequential_failures, + seek_times_by_distance: results.seek_times_by_distance.clone(), + seek_failures: results.seek_failures, + random_access_times_ms: results.random_access_times_ms.clone(), + random_access_avg_ms: results.random_access_avg_ms, + random_access_p50_ms: results.random_access_p50_ms, + random_access_p95_ms: results.random_access_p95_ms, + random_access_p99_ms: results.random_access_p99_ms, + random_access_failures: results.random_access_failures, + cache_hits: results.cache_hits, + cache_misses: results.cache_misses, + }, + }; + + match serde_json::to_string_pretty(&output) { + Ok(json) => match fs::write(output_path, json) { + Ok(()) => println!("Wrote benchmark JSON to {}", output_path.display()), + Err(error) => eprintln!( + "Failed to write benchmark JSON to {}: {}", + output_path.display(), + error + ), + }, + Err(error) => eprintln!("Failed to serialize benchmark JSON output: {}", error), + } +} + fn main() { let args: Vec = std::env::args().collect(); @@ -397,7 +498,7 @@ fn main() { .position(|a| a == "--video") .and_then(|i| args.get(i + 1)) .map(PathBuf::from) - .expect("Usage: decode-benchmark --video [--fps ] [--iterations ]"); + .expect("Usage: decode-benchmark --video [--fps ] [--iterations ] [--sequential-frames ] [--random-samples ] [--output-json ]"); let fps = args .iter() @@ -413,14 +514,38 @@ fn main() { .and_then(|s| s.parse().ok()) .unwrap_or(100); + let sequential_frames = args + .iter() + .position(|a| a == "--sequential-frames") + .and_then(|i| args.get(i + 1)) + .and_then(|s| s.parse().ok()) + .unwrap_or(100); + + let random_samples = args + .iter() + .position(|a| a == "--random-samples") + .and_then(|i| args.get(i + 1)) + .and_then(|s| s.parse().ok()) + .unwrap_or(50); + + let output_json = args + .iter() + .position(|a| a == "--output-json") + .and_then(|i| args.get(i + 1)) + .map(PathBuf::from); + let config = BenchmarkConfig { video_path, fps, iterations, + sequential_frames, + random_samples, + output_json, }; let rt = Runtime::new().expect("Failed to create Tokio runtime"); - let results = rt.block_on(run_full_benchmark(config)); + let results = rt.block_on(run_full_benchmark(config.clone())); results.print_report(); + write_json_output(&config, &results); } From a848479aab8cc3a3cec89ad1a252d57988c7314e Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 20:43:59 +0000 Subject: [PATCH 175/333] improve: support fragmented inputs in decode benchmark --- crates/editor/PLAYBACK-BENCHMARKS.md | 3 + crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/examples/decode-benchmark.rs | 93 ++++++++++++++++++++++ 3 files changed, 97 insertions(+) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index da84ed7b3a..a3fc87dad6 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -68,6 +68,9 @@ cargo run -p cap-editor --example decode-benchmark -- --video /path/to/video.mp4 # Emit machine-readable JSON with startup/scrub metrics cargo run -p cap-editor --example decode-benchmark -- --video /path/to/video.mp4 --fps 60 --sequential-frames 180 --random-samples 120 --output-json /tmp/decode-benchmark.json + +# Fragmented segment input is supported by passing the display directory +cargo run -p cap-editor --example decode-benchmark -- --video /path/to/segment/display --fps 60 --output-json /tmp/decode-benchmark-fragmented.json ``` #### Combined Workflow (Recording → Playback) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index fa28454366..2a73ea5726 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -162,6 +162,7 @@ cargo run -p cap-recording --example playback-test-runner -- full 6. **Decode benchmark now supports machine-readable evidence output (2026-02-13)** - `decode-benchmark` supports `--output-json` for structured metric capture. - Added sequential frame and random sample count controls to scale benchmark depth per hardware class. + - Supports fragmented segment directories for duration-aware benchmarking. --- diff --git a/crates/editor/examples/decode-benchmark.rs b/crates/editor/examples/decode-benchmark.rs index 2ae11f207b..b7e74ce4c3 100644 --- a/crates/editor/examples/decode-benchmark.rs +++ b/crates/editor/examples/decode-benchmark.rs @@ -4,11 +4,16 @@ use std::fs; use std::path::{Path, PathBuf}; use std::process::Command; use std::time::Instant; +use std::time::{SystemTime, UNIX_EPOCH}; use tokio::runtime::Runtime; const DEFAULT_DURATION_SECS: f32 = 60.0; fn get_video_duration(path: &Path) -> f32 { + if path.is_dir() { + return get_fragmented_video_duration(path); + } + let output = Command::new("ffprobe") .args([ "-v", @@ -35,6 +40,94 @@ fn get_video_duration(path: &Path) -> f32 { } } +fn get_fragmented_video_duration(path: &Path) -> f32 { + let init_segment = path.join("init.mp4"); + if !init_segment.exists() { + eprintln!( + "Warning: Fragmented input {} missing init.mp4", + path.display() + ); + return DEFAULT_DURATION_SECS; + } + + let mut fragments: Vec = match fs::read_dir(path) { + Ok(entries) => entries + .filter_map(|entry| entry.ok()) + .map(|entry| entry.path()) + .filter(|entry| entry.extension().is_some_and(|ext| ext == "m4s")) + .collect(), + Err(error) => { + eprintln!( + "Warning: Failed to read fragmented directory {}: {}", + path.display(), + error + ); + return DEFAULT_DURATION_SECS; + } + }; + fragments.sort(); + + if fragments.is_empty() { + eprintln!( + "Warning: Fragmented input {} has no .m4s segments", + path.display() + ); + return DEFAULT_DURATION_SECS; + } + + let timestamp = SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|value| value.as_nanos()) + .unwrap_or(0); + let combined_path = + std::env::temp_dir().join(format!("cap-decode-benchmark-combined-{timestamp}.mp4")); + + let mut combined_data = match fs::read(&init_segment) { + Ok(data) => data, + Err(error) => { + eprintln!( + "Warning: Failed to read init segment {}: {}", + init_segment.display(), + error + ); + return DEFAULT_DURATION_SECS; + } + }; + + for fragment in fragments { + match fs::read(&fragment) { + Ok(data) => combined_data.extend(data), + Err(error) => { + eprintln!( + "Warning: Failed to read segment {}: {}", + fragment.display(), + error + ); + return DEFAULT_DURATION_SECS; + } + } + } + + if let Err(error) = fs::write(&combined_path, &combined_data) { + eprintln!( + "Warning: Failed to write combined fragmented video {}: {}", + combined_path.display(), + error + ); + return DEFAULT_DURATION_SECS; + } + + let duration = get_video_duration(&combined_path); + if let Err(error) = fs::remove_file(&combined_path) { + eprintln!( + "Warning: Failed to remove temporary combined file {}: {}", + combined_path.display(), + error + ); + } + duration +} + #[derive(Debug, Clone)] struct BenchmarkConfig { video_path: PathBuf, From 232608d0234ecd5c4b1b7fdc30cf0ded86aa5b7d Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 20:46:11 +0000 Subject: [PATCH 176/333] improve: coalesce timeline seek commands during scrubbing --- .../src/routes/editor/Timeline/index.tsx | 45 ++++++++++++++++--- crates/editor/PLAYBACK-FINDINGS.md | 6 +++ 2 files changed, 44 insertions(+), 7 deletions(-) diff --git a/apps/desktop/src/routes/editor/Timeline/index.tsx b/apps/desktop/src/routes/editor/Timeline/index.tsx index da839931ad..465ff5c596 100644 --- a/apps/desktop/src/routes/editor/Timeline/index.tsx +++ b/apps/desktop/src/routes/editor/Timeline/index.tsx @@ -220,6 +220,9 @@ export function Timeline() { let pendingScrollDelta = 0; let scrollRafId: number | null = null; + let pendingSeekFrame: number | null = null; + let seekRafId: number | null = null; + let seekInFlight = false; function flushPendingZoom() { if (pendingZoomDelta === 0 || pendingZoomOrigin === null) { @@ -265,7 +268,40 @@ export function Timeline() { } } - async function handleUpdatePlayhead(e: MouseEvent) { + function scheduleSeek(frameNumber: number) { + pendingSeekFrame = frameNumber; + if (seekRafId === null) { + seekRafId = requestAnimationFrame(flushPendingSeek); + } + } + + async function flushPendingSeek() { + seekRafId = null; + + if (seekInFlight || pendingSeekFrame === null) { + if (pendingSeekFrame !== null && seekRafId === null) { + seekRafId = requestAnimationFrame(flushPendingSeek); + } + return; + } + + const frameNumber = pendingSeekFrame; + pendingSeekFrame = null; + seekInFlight = true; + + try { + await commands.seekTo(frameNumber); + } catch (err) { + console.error("Failed to seek timeline playhead:", err); + } finally { + seekInFlight = false; + if (pendingSeekFrame !== null && seekRafId === null) { + seekRafId = requestAnimationFrame(flushPendingSeek); + } + } + } + + function handleUpdatePlayhead(e: MouseEvent) { const { left } = timelineBounds; if ( zoomSegmentDragState.type !== "moving" && @@ -278,12 +314,7 @@ export function Timeline() { secsPerPixel() * (e.clientX - left) + transform().position; const newTime = Math.min(Math.max(0, rawTime), totalDuration()); const targetFrame = Math.round(newTime * FPS); - - try { - await commands.seekTo(targetFrame); - } catch (err) { - console.error("Failed to seek timeline playhead:", err); - } + scheduleSeek(targetFrame); setEditorState("playbackTime", newTime); } diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 2a73ea5726..5d27c2c58a 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -164,6 +164,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Added sequential frame and random sample count controls to scale benchmark depth per hardware class. - Supports fragmented segment directories for duration-aware benchmarking. +7. **Timeline seek dispatch now coalesces during drag (2026-02-13)** + - Frontend seek calls are requestAnimationFrame-batched. + - Only the latest pending seek frame is sent while an async seek is in-flight. + --- ## Root Cause Analysis Archive @@ -259,6 +263,7 @@ Decoder Pipeline: 8. Extended playback benchmark tooling with scrub mode and startup latency metrics. 9. Added playback runtime startup telemetry logs for first frame and audio callback bring-up. 10. Enhanced decode benchmark example with structured JSON output and configurable sample depth. +11. Added timeline seek dispatch coalescing to reduce seek command storms during drag. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -270,6 +275,7 @@ Decoder Pipeline: - `crates/editor/PLAYBACK-BENCHMARKS.md`: updated benchmark reference and metric definitions. - `crates/editor/src/playback.rs`: added first-render and audio-callback startup latency logging. - `crates/editor/examples/decode-benchmark.rs`: added `--output-json`, startup metrics, and configurable sequential/random sampling. +- `apps/desktop/src/routes/editor/Timeline/index.tsx`: added requestAnimationFrame-based seek coalescing with in-flight protection. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. From 0f6795e58900a3b3b798eedd3b6bc0416ca13cf6 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:00:59 +0000 Subject: [PATCH 177/333] improve: clean up scheduled timeline raf tasks on unmount --- apps/desktop/src/routes/editor/Timeline/index.tsx | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/apps/desktop/src/routes/editor/Timeline/index.tsx b/apps/desktop/src/routes/editor/Timeline/index.tsx index 465ff5c596..7726bb4258 100644 --- a/apps/desktop/src/routes/editor/Timeline/index.tsx +++ b/apps/desktop/src/routes/editor/Timeline/index.tsx @@ -7,6 +7,7 @@ import { createSignal, Index, type JSX, + onCleanup, onMount, Show, } from "solid-js"; @@ -224,6 +225,12 @@ export function Timeline() { let seekRafId: number | null = null; let seekInFlight = false; + onCleanup(() => { + if (zoomRafId !== null) cancelAnimationFrame(zoomRafId); + if (scrollRafId !== null) cancelAnimationFrame(scrollRafId); + if (seekRafId !== null) cancelAnimationFrame(seekRafId); + }); + function flushPendingZoom() { if (pendingZoomDelta === 0 || pendingZoomOrigin === null) { zoomRafId = null; From 42017c537afbb30d3929d0340dabf726370e34c4 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:02:56 +0000 Subject: [PATCH 178/333] improve: add json export support to playback benchmark runner --- crates/editor/PLAYBACK-BENCHMARKS.md | 3 + crates/editor/PLAYBACK-FINDINGS.md | 6 + .../examples/playback-test-runner.rs | 141 +++++++++++++----- 3 files changed, 114 insertions(+), 36 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index a3fc87dad6..6c7126b72b 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -55,6 +55,9 @@ cargo run -p cap-recording --example playback-test-runner -- --recording-path /p # List available recordings cargo run -p cap-recording --example playback-test-runner -- list + +# Emit machine-readable JSON report +cargo run -p cap-recording --example playback-test-runner -- full --json-output /tmp/playback-benchmark.json ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 5d27c2c58a..abc88dd919 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -168,6 +168,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Frontend seek calls are requestAnimationFrame-batched. - Only the latest pending seek frame is sent while an async seek is in-flight. +8. **Playback benchmark runner now supports JSON evidence export (2026-02-13)** + - `playback-test-runner` supports `--json-output` for structured report emission. + - JSON output includes command metadata, system info, summary, and per-recording test detail. + --- ## Root Cause Analysis Archive @@ -264,6 +268,7 @@ Decoder Pipeline: 9. Added playback runtime startup telemetry logs for first frame and audio callback bring-up. 10. Enhanced decode benchmark example with structured JSON output and configurable sample depth. 11. Added timeline seek dispatch coalescing to reduce seek command storms during drag. +12. Added JSON report output support to playback-test-runner for benchmark evidence collection. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -276,6 +281,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: added first-render and audio-callback startup latency logging. - `crates/editor/examples/decode-benchmark.rs`: added `--output-json`, startup metrics, and configurable sequential/random sampling. - `apps/desktop/src/routes/editor/Timeline/index.tsx`: added requestAnimationFrame-based seek coalescing with in-flight protection. +- `crates/recording/examples/playback-test-runner.rs`: added `--json-output` to emit structured benchmark reports. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/recording/examples/playback-test-runner.rs b/crates/recording/examples/playback-test-runner.rs index 9c7bff6110..abebc6cd14 100644 --- a/crates/recording/examples/playback-test-runner.rs +++ b/crates/recording/examples/playback-test-runner.rs @@ -4,6 +4,7 @@ use cap_project::{RecordingMeta, RecordingMetaInner, StudioRecordingMeta}; use cap_rendering::decoder::spawn_decoder; use chrono::{Local, Utc}; use clap::{Parser, Subcommand}; +use serde::Serialize; use std::{ fs, path::{Path, PathBuf}, @@ -44,6 +45,9 @@ struct Cli { #[arg(long, global = true)] benchmark_output: bool, + #[arg(long, global = true)] + json_output: Option, + #[arg(long, global = true)] notes: Option, } @@ -65,7 +69,7 @@ const SCRUB_SEEK_WARNING_MS: f64 = 40.0; const AUDIO_VIDEO_SYNC_TOLERANCE_MS: f64 = 100.0; const CAMERA_SYNC_TOLERANCE_MS: f64 = 100.0; -#[derive(Debug, Clone, Default)] +#[derive(Debug, Clone, Default, Serialize)] struct DecoderTestResult { passed: bool, decoder_type: String, @@ -77,7 +81,7 @@ struct DecoderTestResult { errors: Vec, } -#[derive(Debug, Clone, Default)] +#[derive(Debug, Clone, Default, Serialize)] struct PlaybackTestResult { passed: bool, segment_index: usize, @@ -100,7 +104,7 @@ struct PlaybackTestResult { errors: Vec, } -#[derive(Debug, Clone, Default)] +#[derive(Debug, Clone, Default, Serialize)] struct ScrubTestResult { passed: bool, segment_index: usize, @@ -116,7 +120,7 @@ struct ScrubTestResult { errors: Vec, } -#[derive(Debug, Clone, Default)] +#[derive(Debug, Clone, Default, Serialize)] struct AudioSyncTestResult { passed: bool, segment_index: usize, @@ -134,7 +138,7 @@ struct AudioSyncTestResult { errors: Vec, } -#[derive(Debug, Clone, Default)] +#[derive(Debug, Clone, Default, Serialize)] struct CameraSyncTestResult { passed: bool, segment_index: usize, @@ -150,7 +154,7 @@ struct CameraSyncTestResult { errors: Vec, } -#[derive(Debug, Clone, Default)] +#[derive(Debug, Clone, Default, Serialize)] struct RecordingTestReport { recording_path: PathBuf, recording_name: String, @@ -1010,13 +1014,31 @@ async fn run_tests_on_recording( Ok(report) } -#[derive(Debug)] +#[derive(Debug, Clone, Serialize)] struct SystemInfo { os: String, arch: String, cpu: String, } +#[derive(Debug, Serialize)] +struct JsonBenchmarkSummary { + total_recordings: usize, + passed_recordings: usize, + failed_recordings: usize, +} + +#[derive(Debug, Serialize)] +struct JsonBenchmarkOutput { + generated_at_utc: String, + local_time: String, + command: String, + notes: Option, + system: SystemInfo, + summary: JsonBenchmarkSummary, + reports: Vec, +} + impl SystemInfo { fn collect() -> Self { let mut sys = System::new_all(); @@ -1380,6 +1402,41 @@ fn write_benchmark_to_file(benchmark_md: &str) -> anyhow::Result<()> { Ok(()) } +fn write_json_output_to_file( + output_path: &Path, + reports: &[RecordingTestReport], + notes: Option<&str>, + command: &str, +) -> anyhow::Result<()> { + let passed = reports.iter().filter(|r| r.overall_passed).count(); + let total = reports.len(); + let failed = total.saturating_sub(passed); + + let output = JsonBenchmarkOutput { + generated_at_utc: Utc::now().format("%Y-%m-%d %H:%M:%S UTC").to_string(), + local_time: Local::now().format("%Y-%m-%d %H:%M:%S").to_string(), + command: command.to_string(), + notes: notes.map(ToString::to_string), + system: SystemInfo::collect(), + summary: JsonBenchmarkSummary { + total_recordings: total, + passed_recordings: passed, + failed_recordings: failed, + }, + reports: reports.to_vec(), + }; + + let json = serde_json::to_string_pretty(&output)?; + fs::write(output_path, json)?; + + println!( + "\n✅ JSON benchmark results written to {}", + output_path.display() + ); + + Ok(()) +} + fn print_summary(reports: &[RecordingTestReport]) { println!("\n{}", "=".repeat(70)); println!("PLAYBACK TEST SUMMARY"); @@ -1423,6 +1480,18 @@ fn print_summary(reports: &[RecordingTestReport]) { println!(); } +fn command_name(command: Option<&Commands>) -> &'static str { + match command { + Some(Commands::Decoder) => "decoder", + Some(Commands::Playback) => "playback", + Some(Commands::Scrub) => "scrub", + Some(Commands::AudioSync) => "audio-sync", + Some(Commands::CameraSync) => "camera-sync", + Some(Commands::Full) | None => "full", + Some(Commands::List) => "list", + } +} + #[tokio::main] async fn main() -> anyhow::Result<()> { tracing_subscriber::registry() @@ -1434,7 +1503,7 @@ async fn main() -> anyhow::Result<()> { let cli = Cli::parse(); - if let Some(Commands::List) = cli.command { + if matches!(cli.command.as_ref(), Some(Commands::List)) { let recordings = discover_recordings(&cli.input_dir); if recordings.is_empty() { println!("No recordings found in {}", cli.input_dir.display()); @@ -1468,16 +1537,16 @@ async fn main() -> anyhow::Result<()> { return Ok(()); } - let (run_decoder, run_playback, run_scrub, run_audio_sync, run_camera_sync) = match cli.command - { - Some(Commands::Decoder) => (true, false, false, false, false), - Some(Commands::Playback) => (false, true, false, false, false), - Some(Commands::Scrub) => (false, false, true, false, false), - Some(Commands::AudioSync) => (false, false, false, true, false), - Some(Commands::CameraSync) => (false, false, false, false, true), - Some(Commands::Full) | None => (true, true, true, true, true), - Some(Commands::List) => unreachable!(), - }; + let (run_decoder, run_playback, run_scrub, run_audio_sync, run_camera_sync) = + match cli.command.as_ref() { + Some(Commands::Decoder) => (true, false, false, false, false), + Some(Commands::Playback) => (false, true, false, false, false), + Some(Commands::Scrub) => (false, false, true, false, false), + Some(Commands::AudioSync) => (false, false, false, true, false), + Some(Commands::CameraSync) => (false, false, false, false, true), + Some(Commands::Full) | None => (true, true, true, true, true), + Some(Commands::List) => unreachable!(), + }; println!("\nCap Playback Test Runner"); println!("{}", "=".repeat(40)); @@ -1517,25 +1586,17 @@ async fn main() -> anyhow::Result<()> { print_summary(&reports); - if cli.benchmark_output { - let command = format!( - "cargo run -p cap-recording --example playback-test-runner -- {} --fps {}{}", - match cli.command { - Some(Commands::Decoder) => "decoder", - Some(Commands::Playback) => "playback", - Some(Commands::Scrub) => "scrub", - Some(Commands::AudioSync) => "audio-sync", - Some(Commands::CameraSync) => "camera-sync", - Some(Commands::Full) | None => "full", - Some(Commands::List) => "list", - }, - cli.fps, - cli.recording_path - .as_ref() - .map(|p| format!(" --recording-path {}", p.display())) - .unwrap_or_default(), - ); + let command = format!( + "cargo run -p cap-recording --example playback-test-runner -- {} --fps {}{}", + command_name(cli.command.as_ref()), + cli.fps, + cli.recording_path + .as_ref() + .map(|p| format!(" --recording-path {}", p.display())) + .unwrap_or_default(), + ); + if cli.benchmark_output { let benchmark_md = generate_benchmark_markdown(&reports, cli.notes.as_deref(), command.trim()); @@ -1544,6 +1605,14 @@ async fn main() -> anyhow::Result<()> { } } + if let Some(output_path) = &cli.json_output { + if let Err(e) = + write_json_output_to_file(output_path, &reports, cli.notes.as_deref(), command.trim()) + { + tracing::error!("Failed to write JSON benchmark results: {}", e); + } + } + let failed = reports.iter().filter(|r| !r.overall_passed).count(); std::process::exit(if failed > 0 { 1 } else { 0 }); } From b459535e4727d6c1e2b6a86d9f1f146866b88aa3 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:04:31 +0000 Subject: [PATCH 179/333] improve: add playback benchmark json aggregation script --- crates/editor/PLAYBACK-BENCHMARKS.md | 6 + crates/editor/PLAYBACK-FINDINGS.md | 6 + scripts/aggregate-playback-benchmarks.js | 248 +++++++++++++++++++++++ 3 files changed, 260 insertions(+) create mode 100644 scripts/aggregate-playback-benchmarks.js diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 6c7126b72b..a46e6ab281 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -60,6 +60,12 @@ cargo run -p cap-recording --example playback-test-runner -- list cargo run -p cap-recording --example playback-test-runner -- full --json-output /tmp/playback-benchmark.json ``` +Aggregate JSON outputs from multiple machines: + +```bash +node scripts/aggregate-playback-benchmarks.js --input /path/to/json-results --output /tmp/playback-benchmark-aggregate.md +``` + #### Decode Performance Benchmark ```bash diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index abc88dd919..b519641ecf 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -172,6 +172,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - `playback-test-runner` supports `--json-output` for structured report emission. - JSON output includes command metadata, system info, summary, and per-recording test detail. +9. **Added JSON aggregate utility for cross-platform benchmark collation (2026-02-13)** + - `scripts/aggregate-playback-benchmarks.js` builds a markdown table from multiple JSON outputs. + - Aggregates platform/gpu/scenario-tagged runs for matrix reporting. + --- ## Root Cause Analysis Archive @@ -269,6 +273,7 @@ Decoder Pipeline: 10. Enhanced decode benchmark example with structured JSON output and configurable sample depth. 11. Added timeline seek dispatch coalescing to reduce seek command storms during drag. 12. Added JSON report output support to playback-test-runner for benchmark evidence collection. +13. Added cross-platform benchmark JSON aggregation utility script. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -282,6 +287,7 @@ Decoder Pipeline: - `crates/editor/examples/decode-benchmark.rs`: added `--output-json`, startup metrics, and configurable sequential/random sampling. - `apps/desktop/src/routes/editor/Timeline/index.tsx`: added requestAnimationFrame-based seek coalescing with in-flight protection. - `crates/recording/examples/playback-test-runner.rs`: added `--json-output` to emit structured benchmark reports. +- `scripts/aggregate-playback-benchmarks.js`: added markdown aggregation for multiple playback benchmark JSON artifacts. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/scripts/aggregate-playback-benchmarks.js b/scripts/aggregate-playback-benchmarks.js new file mode 100644 index 0000000000..d5ee7fe7ec --- /dev/null +++ b/scripts/aggregate-playback-benchmarks.js @@ -0,0 +1,248 @@ +#!/usr/bin/env node + +import fs from "node:fs"; +import path from "node:path"; + +function parseArgs(argv) { + const inputs = []; + let output = null; + let help = false; + + for (let i = 2; i < argv.length; i++) { + const arg = argv[i]; + if (arg === "--help" || arg === "-h") { + help = true; + continue; + } + if (arg === "--input" || arg === "-i") { + const value = argv[i + 1]; + if (!value) { + throw new Error("Missing value for --input"); + } + inputs.push(path.resolve(value)); + i += 1; + continue; + } + if (arg === "--output" || arg === "-o") { + const value = argv[i + 1]; + if (!value) { + throw new Error("Missing value for --output"); + } + output = path.resolve(value); + i += 1; + continue; + } + throw new Error(`Unknown argument: ${arg}`); + } + + return { inputs, output, help }; +} + +function collectJsonFiles(targetPath) { + if (!fs.existsSync(targetPath)) { + throw new Error(`Input path does not exist: ${targetPath}`); + } + + const stats = fs.statSync(targetPath); + if (stats.isFile()) { + return targetPath.endsWith(".json") ? [targetPath] : []; + } + + const files = []; + const entries = fs.readdirSync(targetPath, { withFileTypes: true }); + for (const entry of entries) { + const fullPath = path.join(targetPath, entry.name); + if (entry.isDirectory()) { + files.push(...collectJsonFiles(fullPath)); + } else if (entry.isFile() && entry.name.endsWith(".json")) { + files.push(fullPath); + } + } + return files; +} + +function parseNotes(notes) { + if (!notes) { + return {}; + } + const parsed = {}; + for (const token of notes.split(/\s+/)) { + if (!token.includes("=")) { + continue; + } + const [key, ...rest] = token.split("="); + const value = rest.join("="); + if (!key || !value) { + continue; + } + parsed[key.trim()] = value.trim(); + } + return parsed; +} + +function numberOrNull(value) { + if (typeof value !== "number" || Number.isNaN(value)) { + return null; + } + return value; +} + +function maxOrNull(values) { + const numeric = values + .map(numberOrNull) + .filter((value) => value !== null); + if (numeric.length === 0) { + return null; + } + return Math.max(...numeric); +} + +function avgOrNull(values) { + const numeric = values + .map(numberOrNull) + .filter((value) => value !== null); + if (numeric.length === 0) { + return null; + } + return numeric.reduce((acc, value) => acc + value, 0) / numeric.length; +} + +function formatMetric(value, digits = 1) { + return value === null ? "n/a" : value.toFixed(digits); +} + +function extractRows(jsonPath, data) { + if (!Array.isArray(data.reports)) { + return []; + } + + const notes = parseNotes(data.notes); + const platform = notes.platform ?? "unknown"; + const gpu = notes.gpu ?? "unknown"; + const scenario = notes.scenario ?? "unspecified"; + const runTime = data.generated_at_utc ?? "unknown"; + + const rows = []; + for (const report of data.reports) { + const playbackResults = Array.isArray(report.playback_results) + ? report.playback_results + : []; + const scrubResults = Array.isArray(report.scrub_results) + ? report.scrub_results + : []; + const audioResults = Array.isArray(report.audio_sync_results) + ? report.audio_sync_results + : []; + + const effectiveFpsMin = playbackResults.length + ? Math.min( + ...playbackResults + .map((result) => numberOrNull(result.effective_fps)) + .filter((value) => value !== null), + ) + : null; + const scrubP95Max = maxOrNull( + scrubResults.map((result) => result.p95_seek_time_ms), + ); + const startupAvg = avgOrNull( + playbackResults.map((result) => result.startup_to_first_frame_ms), + ); + const micDiffMax = maxOrNull( + audioResults + .filter((result) => result.has_mic_audio) + .map((result) => result.mic_video_diff_ms), + ); + const sysDiffMax = maxOrNull( + audioResults + .filter((result) => result.has_system_audio) + .map((result) => result.system_audio_video_diff_ms), + ); + + rows.push({ + runTime, + platform, + gpu, + scenario, + recording: report.recording_name ?? path.basename(jsonPath), + format: report.is_fragmented ? "fragmented" : "mp4", + status: report.overall_passed ? "PASS" : "FAIL", + effectiveFpsMin, + scrubP95Max, + startupAvg, + micDiffMax, + sysDiffMax, + command: data.command ?? "unknown", + source: jsonPath, + }); + } + + return rows; +} + +function buildMarkdown(rows) { + const sorted = [...rows].sort((a, b) => (a.runTime < b.runTime ? 1 : -1)); + const passed = sorted.filter((row) => row.status === "PASS").length; + const failed = sorted.length - passed; + + let md = ""; + md += `# Playback Benchmark Aggregate\n\n`; + md += `Generated: ${new Date().toISOString()}\n\n`; + md += `Total rows: ${sorted.length}, Passed: ${passed}, Failed: ${failed}\n\n`; + md += "| Run Time (UTC) | Platform | GPU | Scenario | Recording | Format | Status | FPS(min) | Scrub p95(ms) | Startup avg(ms) | Mic diff max(ms) | Sys diff max(ms) |\n"; + md += "|---|---|---|---|---|---|---|---:|---:|---:|---:|---:|\n"; + for (const row of sorted) { + md += `| ${row.runTime} | ${row.platform} | ${row.gpu} | ${row.scenario} | ${row.recording} | ${row.format} | ${row.status} | ${formatMetric(row.effectiveFpsMin)} | ${formatMetric(row.scrubP95Max)} | ${formatMetric(row.startupAvg)} | ${formatMetric(row.micDiffMax)} | ${formatMetric(row.sysDiffMax)} |\n`; + } + md += "\n"; + return md; +} + +function printUsage() { + console.log(`Usage: node scripts/aggregate-playback-benchmarks.js --input [--input ...] [--output ] + +Aggregates playback-test-runner JSON outputs into a markdown summary table.`); +} + +function main() { + const args = parseArgs(process.argv); + if (args.help) { + printUsage(); + return; + } + if (args.inputs.length === 0) { + throw new Error("At least one --input is required"); + } + + const files = new Set(); + for (const inputPath of args.inputs) { + for (const filePath of collectJsonFiles(inputPath)) { + files.add(filePath); + } + } + + if (files.size === 0) { + throw new Error("No JSON files found for aggregation"); + } + + const rows = []; + for (const filePath of files) { + const raw = fs.readFileSync(filePath, "utf8"); + const parsed = JSON.parse(raw); + rows.push(...extractRows(filePath, parsed)); + } + + const markdown = buildMarkdown(rows); + if (args.output) { + fs.writeFileSync(args.output, markdown, "utf8"); + console.log(`Wrote aggregate markdown to ${args.output}`); + } else { + process.stdout.write(markdown); + } +} + +try { + main(); +} catch (error) { + console.error(error instanceof Error ? error.message : String(error)); + process.exit(1); +} From fd2b6c70a8046bc7e1d8943810b2a47d861d35b0 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:05:22 +0000 Subject: [PATCH 180/333] improve: use latest-only watch channel for playback seeks --- crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/src/playback.rs | 16 +++++++--------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index b519641ecf..fac917b34e 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -144,6 +144,7 @@ cargo run -p cap-recording --example playback-test-runner -- full - Added seek channel to `PlaybackHandle` and playback loop. - `seek_to` and `set_playhead_position` commands now forward seek requests to active playback. - Timeline seek no longer tears down and recreates playback while playing. + - Seek signaling now uses watch semantics so only latest frame target is consumed under heavy scrub load. 3. **Lazy decoder pool warmup on macOS AVAssetReader (2026-02-13)** - Initial warmup now creates only a small subset of decoder instances. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 70e019ef5d..c9533ab63b 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -65,7 +65,7 @@ pub enum PlaybackEvent { pub struct PlaybackHandle { stop_tx: watch::Sender, event_rx: watch::Receiver, - seek_tx: tokio_mpsc::UnboundedSender, + seek_tx: watch::Sender, } struct PrefetchedFrame { @@ -120,7 +120,8 @@ impl Playback { let (event_tx, mut event_rx) = watch::channel(PlaybackEvent::Start); event_rx.borrow_and_update(); - let (seek_tx, mut seek_rx) = tokio_mpsc::unbounded_channel::(); + let (seek_tx, mut seek_rx) = watch::channel(self.start_frame_number); + seek_rx.borrow_and_update(); let handle = PlaybackHandle { stop_tx: stop_tx.clone(), @@ -448,12 +449,8 @@ impl Playback { let mut cached_project = self.project.borrow().clone(); 'playback: loop { - let mut pending_seek = None; - while let Ok(next_seek_frame) = seek_rx.try_recv() { - pending_seek = Some(next_seek_frame); - } - - if let Some(seek_frame) = pending_seek { + if seek_rx.has_changed().unwrap_or(false) { + let seek_frame = *seek_rx.borrow_and_update(); frame_number = seek_frame; playback_anchor_start = Instant::now(); playback_anchor_frame = seek_frame; @@ -499,7 +496,8 @@ impl Playback { tokio::select! { _ = stop_rx.changed() => break 'playback, - Some(seek_frame) = seek_rx.recv() => { + _ = seek_rx.changed() => { + let seek_frame = *seek_rx.borrow_and_update(); frame_number = seek_frame; playback_anchor_start = Instant::now(); playback_anchor_frame = seek_frame; From bf7aa9895b71b74655603625b1b943f6d49314d2 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:06:21 +0000 Subject: [PATCH 181/333] improve: add matrix helper for playback benchmark runs --- crates/editor/PLAYBACK-BENCHMARKS.md | 6 + crates/editor/PLAYBACK-FINDINGS.md | 6 + scripts/run-playback-benchmark-matrix.js | 163 +++++++++++++++++++++++ 3 files changed, 175 insertions(+) create mode 100644 scripts/run-playback-benchmark-matrix.js diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index a46e6ab281..79a75e2052 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -101,6 +101,12 @@ cargo run -p cap-recording --example playback-test-runner -- full --fps 60 --ben cargo run -p cap-recording --example playback-test-runner -- scrub --fps 60 --benchmark-output --notes "platform= gpu= scenario=scrub" ``` +Automated helper for machine runs: + +```bash +node scripts/run-playback-benchmark-matrix.js --platform macos-13 --gpu apple-silicon --output-dir /tmp/playback-matrix --fps 60 --input-dir /tmp/cap-real-device-tests +``` + | Platform | GPU Class | MP4 Full | Fragmented Full | MP4 Scrub | Fragmented Scrub | Notes | |----------|-----------|----------|-----------------|-----------|------------------|-------| | macOS 13+ | Apple Silicon | ☐ | ☐ | ☐ | ☐ | | diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index fac917b34e..065c659a6d 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -177,6 +177,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - `scripts/aggregate-playback-benchmarks.js` builds a markdown table from multiple JSON outputs. - Aggregates platform/gpu/scenario-tagged runs for matrix reporting. +10. **Added matrix run helper for platform/GPU benchmark execution (2026-02-13)** + - `scripts/run-playback-benchmark-matrix.js` runs `full` and `scrub` scenarios with tagged notes and JSON output. + - Automatically generates aggregate markdown for each machine run directory. + --- ## Root Cause Analysis Archive @@ -275,6 +279,7 @@ Decoder Pipeline: 11. Added timeline seek dispatch coalescing to reduce seek command storms during drag. 12. Added JSON report output support to playback-test-runner for benchmark evidence collection. 13. Added cross-platform benchmark JSON aggregation utility script. +14. Added matrix execution helper script for full + scrub benchmark runs per platform/GPU. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -289,6 +294,7 @@ Decoder Pipeline: - `apps/desktop/src/routes/editor/Timeline/index.tsx`: added requestAnimationFrame-based seek coalescing with in-flight protection. - `crates/recording/examples/playback-test-runner.rs`: added `--json-output` to emit structured benchmark reports. - `scripts/aggregate-playback-benchmarks.js`: added markdown aggregation for multiple playback benchmark JSON artifacts. +- `scripts/run-playback-benchmark-matrix.js`: added orchestrated full/scrub benchmark runner with per-machine aggregate generation. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/scripts/run-playback-benchmark-matrix.js b/scripts/run-playback-benchmark-matrix.js new file mode 100644 index 0000000000..d5793550f5 --- /dev/null +++ b/scripts/run-playback-benchmark-matrix.js @@ -0,0 +1,163 @@ +#!/usr/bin/env node + +import { spawnSync } from "node:child_process"; +import fs from "node:fs"; +import path from "node:path"; + +function parseArgs(argv) { + const options = { + platform: null, + gpu: null, + outputDir: null, + fps: 60, + recordingPath: null, + inputDir: null, + }; + + for (let i = 2; i < argv.length; i++) { + const arg = argv[i]; + if (arg === "--help" || arg === "-h") { + options.help = true; + continue; + } + if (arg === "--platform") { + options.platform = argv[++i] ?? null; + continue; + } + if (arg === "--gpu") { + options.gpu = argv[++i] ?? null; + continue; + } + if (arg === "--output-dir") { + options.outputDir = argv[++i] ?? null; + continue; + } + if (arg === "--fps") { + const value = Number.parseInt(argv[++i] ?? "", 10); + if (!Number.isFinite(value) || value <= 0) { + throw new Error("Invalid --fps value"); + } + options.fps = value; + continue; + } + if (arg === "--recording-path") { + options.recordingPath = argv[++i] ?? null; + continue; + } + if (arg === "--input-dir") { + options.inputDir = argv[++i] ?? null; + continue; + } + throw new Error(`Unknown argument: ${arg}`); + } + + return options; +} + +function usage() { + console.log(`Usage: node scripts/run-playback-benchmark-matrix.js --platform --gpu --output-dir [--fps 60] [--recording-path ] [--input-dir ] + +Runs playback benchmark matrix scenarios and writes JSON outputs. + +Required: + --platform Platform label (for notes metadata) + --gpu GPU label (for notes metadata) + --output-dir Directory for benchmark JSON outputs + +Optional: + --fps FPS for benchmark runs (default: 60) + --recording-path Specific recording path + --input-dir Recording discovery directory`); +} + +function run(command, args) { + const result = spawnSync(command, args, { stdio: "inherit" }); + if (result.status !== 0) { + throw new Error(`Command failed: ${command} ${args.join(" ")}`); + } +} + +function scenarioOutputPath(outputDir, platform, gpu, scenario) { + const stamp = new Date().toISOString().replace(/[:.]/g, "-"); + return path.join(outputDir, `${stamp}-${platform}-${gpu}-${scenario}.json`); +} + +function scenarioArgs(options, scenario) { + const jsonOutput = scenarioOutputPath( + options.outputDir, + options.platform, + options.gpu, + scenario, + ); + const notes = `platform=${options.platform} gpu=${options.gpu} scenario=${scenario}`; + + const args = [ + "run", + "-p", + "cap-recording", + "--example", + "playback-test-runner", + "--", + scenario, + "--fps", + String(options.fps), + "--json-output", + jsonOutput, + "--notes", + notes, + ]; + + if (options.recordingPath) { + args.push("--recording-path", options.recordingPath); + } else if (options.inputDir) { + args.push("--input-dir", options.inputDir); + } + + return args; +} + +function validateOptions(options) { + if (!options.platform || !options.gpu || !options.outputDir) { + throw new Error("Missing required options: --platform, --gpu, --output-dir"); + } + + const absoluteOutputDir = path.resolve(options.outputDir); + options.outputDir = absoluteOutputDir; + if (!fs.existsSync(absoluteOutputDir)) { + fs.mkdirSync(absoluteOutputDir, { recursive: true }); + } +} + +function main() { + const options = parseArgs(process.argv); + if (options.help) { + usage(); + return; + } + + validateOptions(options); + + console.log(`Running matrix for platform=${options.platform} gpu=${options.gpu}`); + run("cargo", scenarioArgs(options, "full")); + run("cargo", scenarioArgs(options, "scrub")); + + const aggregatePath = path.join( + options.outputDir, + `${options.platform}-${options.gpu}-aggregate.md`, + ); + run("node", [ + "scripts/aggregate-playback-benchmarks.js", + "--input", + options.outputDir, + "--output", + aggregatePath, + ]); + console.log(`Aggregate markdown: ${aggregatePath}`); +} + +try { + main(); +} catch (error) { + console.error(error instanceof Error ? error.message : String(error)); + process.exit(1); +} From 599562627251b5f6bb2b4cb1c73bf9ed250b2192 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:07:26 +0000 Subject: [PATCH 182/333] docs: add playback matrix runbook for hardware validation --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 79 ++++++++++++++++++++++++ 3 files changed, 81 insertions(+) create mode 100644 crates/editor/PLAYBACK-MATRIX-RUNBOOK.md diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 79a75e2052..4d9ff41649 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -241,5 +241,6 @@ When analyzing benchmark results, focus on: ## Related Documentation - [Recording Benchmarks](../recording/BENCHMARKS.md) - Recording performance tracking +- [Playback Matrix Runbook](./PLAYBACK-MATRIX-RUNBOOK.md) - Cross-platform evidence collection workflow - [cap-rendering/decoder](../rendering/src/decoder.rs) - Decoder implementation - [cap-video-decode](../video-decode/) - Platform-specific decoders diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 065c659a6d..4a42a3e5fc 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -435,6 +435,7 @@ The CPU RGBA→NV12 conversion was taking 15-25ms per frame for 3024x1964 resolu ## References - `PLAYBACK-BENCHMARKS.md` - Raw performance test data (auto-updated by test runner) +- `PLAYBACK-MATRIX-RUNBOOK.md` - Cross-platform playback evidence collection process - `../recording/FINDINGS.md` - Recording performance findings (source of test files) - `../recording/BENCHMARKS.md` - Recording benchmark data - `examples/playback-test-runner.rs` - Playback test implementation diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md new file mode 100644 index 0000000000..281dcbdef8 --- /dev/null +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -0,0 +1,79 @@ +# Playback Matrix Runbook + +This runbook defines how to collect benchmark evidence for desktop playback performance and sync validation across required hardware classes. + +## Target matrix + +| Platform | GPU class | Required scenarios | +|---|---|---| +| macOS 13+ | Apple Silicon | full, scrub | +| Windows 11 | NVIDIA discrete | full, scrub | +| Windows 11 | AMD discrete | full, scrub | +| Windows 11 | Integrated baseline | full, scrub | + +## Preconditions + +1. Build can run on target machine. +2. Real-device recording outputs are available. +3. Recordings include both MP4 and fragmented samples. +4. Node and Rust toolchains are installed. + +## Inputs and output directories + +Set these per machine: + +- `INPUT_DIR`: recording root (default `/tmp/cap-real-device-tests`) +- `OUT_DIR`: machine-local output folder for JSON and aggregate markdown + +Example: + +```bash +export INPUT_DIR="/tmp/cap-real-device-tests" +export OUT_DIR="/tmp/cap-playback-matrix/macos-apple-silicon" +mkdir -p "$OUT_DIR" +``` + +## Machine run command + +Run this once per platform/GPU class: + +```bash +node scripts/run-playback-benchmark-matrix.js \ + --platform "" \ + --gpu "" \ + --output-dir "$OUT_DIR" \ + --fps 60 \ + --input-dir "$INPUT_DIR" +``` + +Examples: + +```bash +node scripts/run-playback-benchmark-matrix.js --platform macos-13 --gpu apple-silicon --output-dir /tmp/cap-playback-matrix/macos-apple-silicon --fps 60 --input-dir /tmp/cap-real-device-tests +node scripts/run-playback-benchmark-matrix.js --platform windows-11 --gpu nvidia-discrete --output-dir C:\temp\cap-playback-matrix\windows-nvidia --fps 60 --input-dir C:\temp\cap-real-device-tests +node scripts/run-playback-benchmark-matrix.js --platform windows-11 --gpu amd-discrete --output-dir C:\temp\cap-playback-matrix\windows-amd --fps 60 --input-dir C:\temp\cap-real-device-tests +node scripts/run-playback-benchmark-matrix.js --platform windows-11 --gpu integrated --output-dir C:\temp\cap-playback-matrix\windows-integrated --fps 60 --input-dir C:\temp\cap-real-device-tests +``` + +## Outputs produced per machine + +Each run directory contains: + +- timestamped `full` scenario JSON +- timestamped `scrub` scenario JSON +- `--aggregate.md` summary table + +## Cross-machine aggregation + +After collecting all machine folders into a shared root: + +```bash +node scripts/aggregate-playback-benchmarks.js --input /path/to/all-machine-results --output /tmp/playback-matrix-aggregate.md +``` + +## Evidence checklist + +1. Confirm all matrix rows exist. +2. Confirm each row has both `full` and `scrub` scenarios. +3. Capture aggregate markdown and raw JSON artifacts. +4. Attach outputs to playback findings update. From 1370bd69614befb7c873a8f8af60fdb27158abaa Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:10:15 +0000 Subject: [PATCH 183/333] improve: add playback matrix coverage validation script --- crates/editor/PLAYBACK-BENCHMARKS.md | 6 + crates/editor/PLAYBACK-FINDINGS.md | 6 + crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 6 + scripts/validate-playback-matrix.js | 231 +++++++++++++++++++++++ 4 files changed, 249 insertions(+) create mode 100644 scripts/validate-playback-matrix.js diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 4d9ff41649..6da7192bb6 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -66,6 +66,12 @@ Aggregate JSON outputs from multiple machines: node scripts/aggregate-playback-benchmarks.js --input /path/to/json-results --output /tmp/playback-benchmark-aggregate.md ``` +Validate matrix coverage and required formats: + +```bash +node scripts/validate-playback-matrix.js --input /path/to/json-results --require-formats mp4,fragmented +``` + #### Decode Performance Benchmark ```bash diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 4a42a3e5fc..dcb954a5d8 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -181,6 +181,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - `scripts/run-playback-benchmark-matrix.js` runs `full` and `scrub` scenarios with tagged notes and JSON output. - Automatically generates aggregate markdown for each machine run directory. +11. **Added matrix completeness validator (2026-02-13)** + - `scripts/validate-playback-matrix.js` validates required platform/gpu/scenario cells. + - Supports required format checks per cell (mp4 + fragmented). + --- ## Root Cause Analysis Archive @@ -280,6 +284,7 @@ Decoder Pipeline: 12. Added JSON report output support to playback-test-runner for benchmark evidence collection. 13. Added cross-platform benchmark JSON aggregation utility script. 14. Added matrix execution helper script for full + scrub benchmark runs per platform/GPU. +15. Added matrix validation script for required cell and format coverage checks. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -295,6 +300,7 @@ Decoder Pipeline: - `crates/recording/examples/playback-test-runner.rs`: added `--json-output` to emit structured benchmark reports. - `scripts/aggregate-playback-benchmarks.js`: added markdown aggregation for multiple playback benchmark JSON artifacts. - `scripts/run-playback-benchmark-matrix.js`: added orchestrated full/scrub benchmark runner with per-machine aggregate generation. +- `scripts/validate-playback-matrix.js`: added required matrix cell/format validation for aggregated evidence. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 281dcbdef8..5d633c8cc2 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -71,6 +71,12 @@ After collecting all machine folders into a shared root: node scripts/aggregate-playback-benchmarks.js --input /path/to/all-machine-results --output /tmp/playback-matrix-aggregate.md ``` +Validate matrix completeness: + +```bash +node scripts/validate-playback-matrix.js --input /path/to/all-machine-results --require-formats mp4,fragmented +``` + ## Evidence checklist 1. Confirm all matrix rows exist. diff --git a/scripts/validate-playback-matrix.js b/scripts/validate-playback-matrix.js new file mode 100644 index 0000000000..a95ae023cb --- /dev/null +++ b/scripts/validate-playback-matrix.js @@ -0,0 +1,231 @@ +#!/usr/bin/env node + +import fs from "node:fs"; +import path from "node:path"; + +const DEFAULT_REQUIRED_CELLS = [ + { platform: "macos-13", gpu: "apple-silicon", scenario: "full" }, + { platform: "macos-13", gpu: "apple-silicon", scenario: "scrub" }, + { platform: "windows-11", gpu: "nvidia-discrete", scenario: "full" }, + { platform: "windows-11", gpu: "nvidia-discrete", scenario: "scrub" }, + { platform: "windows-11", gpu: "amd-discrete", scenario: "full" }, + { platform: "windows-11", gpu: "amd-discrete", scenario: "scrub" }, + { platform: "windows-11", gpu: "integrated", scenario: "full" }, + { platform: "windows-11", gpu: "integrated", scenario: "scrub" }, +]; + +function parseArgs(argv) { + const options = { + inputs: [], + requiredCells: [], + requiredFormats: [], + useDefaultMatrix: true, + }; + + for (let i = 2; i < argv.length; i++) { + const arg = argv[i]; + if (arg === "--help" || arg === "-h") { + options.help = true; + continue; + } + if (arg === "--input" || arg === "-i") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --input"); + options.inputs.push(path.resolve(value)); + continue; + } + if (arg === "--require-cell") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --require-cell"); + options.requiredCells.push(parseCell(value)); + options.useDefaultMatrix = false; + continue; + } + if (arg === "--require-formats") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --require-formats"); + options.requiredFormats = value + .split(",") + .map((entry) => entry.trim().toLowerCase()) + .filter(Boolean); + continue; + } + if (arg === "--no-default-matrix") { + options.useDefaultMatrix = false; + continue; + } + throw new Error(`Unknown argument: ${arg}`); + } + + return options; +} + +function parseCell(value) { + const [platform, gpu, scenario] = value.split(":"); + if (!platform || !gpu || !scenario) { + throw new Error( + `Invalid --require-cell format: ${value}. Expected platform:gpu:scenario`, + ); + } + return { platform, gpu, scenario }; +} + +function printUsage() { + console.log(`Usage: node scripts/validate-playback-matrix.js --input [--input ...] [--require-cell platform:gpu:scenario ...] [--require-formats mp4,fragmented] + +Validates that required benchmark matrix cells are present in playback benchmark JSON results. + +Options: + --input, -i JSON file or directory containing JSON files (repeatable) + --require-cell Required cell as platform:gpu:scenario (repeatable) + --require-formats Comma-separated required formats per cell + --no-default-matrix Disable built-in required matrix + --help, -h Show help`); +} + +function collectJsonFiles(targetPath) { + if (!fs.existsSync(targetPath)) { + throw new Error(`Input path does not exist: ${targetPath}`); + } + const stats = fs.statSync(targetPath); + if (stats.isFile()) { + return targetPath.endsWith(".json") ? [targetPath] : []; + } + const files = []; + for (const entry of fs.readdirSync(targetPath, { withFileTypes: true })) { + const fullPath = path.join(targetPath, entry.name); + if (entry.isDirectory()) { + files.push(...collectJsonFiles(fullPath)); + } else if (entry.isFile() && entry.name.endsWith(".json")) { + files.push(fullPath); + } + } + return files; +} + +function parseNotes(notes) { + if (!notes) return {}; + const result = {}; + for (const token of notes.split(/\s+/)) { + if (!token.includes("=")) continue; + const [key, ...rest] = token.split("="); + const value = rest.join("="); + if (!key || !value) continue; + result[key.trim()] = value.trim(); + } + return result; +} + +function keyForCell(cell) { + return `${cell.platform}|${cell.gpu}|${cell.scenario}`; +} + +function collectObservedCells(files) { + const observed = new Map(); + for (const filePath of files) { + const parsed = JSON.parse(fs.readFileSync(filePath, "utf8")); + const notes = parseNotes(parsed.notes); + const platform = notes.platform ?? "unknown"; + const gpu = notes.gpu ?? "unknown"; + const scenario = notes.scenario ?? "unspecified"; + const key = keyForCell({ platform, gpu, scenario }); + if (!observed.has(key)) { + observed.set(key, { + platform, + gpu, + scenario, + formats: new Set(), + files: new Set(), + }); + } + const entry = observed.get(key); + entry.files.add(filePath); + const reports = Array.isArray(parsed.reports) ? parsed.reports : []; + for (const report of reports) { + entry.formats.add(report.is_fragmented ? "fragmented" : "mp4"); + } + } + return observed; +} + +function main() { + const options = parseArgs(process.argv); + if (options.help) { + printUsage(); + return; + } + if (options.inputs.length === 0) { + throw new Error("At least one --input is required"); + } + + const files = new Set(); + for (const input of options.inputs) { + for (const filePath of collectJsonFiles(input)) { + files.add(filePath); + } + } + if (files.size === 0) { + throw new Error("No JSON files found"); + } + + const requiredCells = options.useDefaultMatrix + ? [...DEFAULT_REQUIRED_CELLS, ...options.requiredCells] + : options.requiredCells; + if (requiredCells.length === 0) { + throw new Error("No required matrix cells configured"); + } + + const observed = collectObservedCells([...files]); + const missingCells = []; + const formatFailures = []; + + for (const cell of requiredCells) { + const key = keyForCell(cell); + const observedCell = observed.get(key); + if (!observedCell) { + missingCells.push(cell); + continue; + } + for (const requiredFormat of options.requiredFormats) { + if (!observedCell.formats.has(requiredFormat)) { + formatFailures.push({ + ...cell, + requiredFormat, + observedFormats: [...observedCell.formats], + }); + } + } + } + + console.log(`Validated ${requiredCells.length} required cells`); + console.log(`Observed ${observed.size} unique cells`); + + if (missingCells.length > 0) { + console.log("Missing required cells:"); + for (const cell of missingCells) { + console.log(` - ${cell.platform}:${cell.gpu}:${cell.scenario}`); + } + } + + if (formatFailures.length > 0) { + console.log("Missing required formats:"); + for (const failure of formatFailures) { + console.log( + ` - ${failure.platform}:${failure.gpu}:${failure.scenario} missing ${failure.requiredFormat} (observed: ${failure.observedFormats.join(", ") || "none"})`, + ); + } + } + + if (missingCells.length > 0 || formatFailures.length > 0) { + process.exit(1); + } + + console.log("Matrix validation passed"); +} + +try { + main(); +} catch (error) { + console.error(error instanceof Error ? error.message : String(error)); + process.exit(1); +} From 59d1e6f68d497996c05e29ccbbe309f0962d1f0f Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:11:16 +0000 Subject: [PATCH 184/333] improve: include full runner context in benchmark command metadata --- crates/editor/PLAYBACK-FINDINGS.md | 1 + .../examples/playback-test-runner.rs | 57 ++++++++++++++++--- 2 files changed, 49 insertions(+), 9 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index dcb954a5d8..f584b0f2b7 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -172,6 +172,7 @@ cargo run -p cap-recording --example playback-test-runner -- full 8. **Playback benchmark runner now supports JSON evidence export (2026-02-13)** - `playback-test-runner` supports `--json-output` for structured report emission. - JSON output includes command metadata, system info, summary, and per-recording test detail. + - Command metadata now includes input scope and output flags for reproducibility. 9. **Added JSON aggregate utility for cross-platform benchmark collation (2026-02-13)** - `scripts/aggregate-playback-benchmarks.js` builds a markdown table from multiple JSON outputs. diff --git a/crates/recording/examples/playback-test-runner.rs b/crates/recording/examples/playback-test-runner.rs index abebc6cd14..b930a2cb41 100644 --- a/crates/recording/examples/playback-test-runner.rs +++ b/crates/recording/examples/playback-test-runner.rs @@ -1492,6 +1492,53 @@ fn command_name(command: Option<&Commands>) -> &'static str { } } +fn shell_quote(value: &str) -> String { + let is_safe = value + .chars() + .all(|char| char.is_ascii_alphanumeric() || "-_./:=,".contains(char)); + if is_safe { + value.to_string() + } else { + format!("'{}'", value.replace('\'', "'\"'\"'")) + } +} + +fn build_command_string(cli: &Cli) -> String { + let mut command = format!( + "cargo run -p cap-recording --example playback-test-runner -- {} --fps {}", + command_name(cli.command.as_ref()), + cli.fps + ); + + if let Some(path) = &cli.recording_path { + command.push_str(" --recording-path "); + command.push_str(&shell_quote(path.to_string_lossy().as_ref())); + } else { + command.push_str(" --input-dir "); + command.push_str(&shell_quote(cli.input_dir.to_string_lossy().as_ref())); + } + + if cli.verbose { + command.push_str(" --verbose"); + } + + if cli.benchmark_output { + command.push_str(" --benchmark-output"); + } + + if let Some(path) = &cli.json_output { + command.push_str(" --json-output "); + command.push_str(&shell_quote(path.to_string_lossy().as_ref())); + } + + if let Some(notes) = &cli.notes { + command.push_str(" --notes "); + command.push_str(&shell_quote(notes)); + } + + command +} + #[tokio::main] async fn main() -> anyhow::Result<()> { tracing_subscriber::registry() @@ -1586,15 +1633,7 @@ async fn main() -> anyhow::Result<()> { print_summary(&reports); - let command = format!( - "cargo run -p cap-recording --example playback-test-runner -- {} --fps {}{}", - command_name(cli.command.as_ref()), - cli.fps, - cli.recording_path - .as_ref() - .map(|p| format!(" --recording-path {}", p.display())) - .unwrap_or_default(), - ); + let command = build_command_string(&cli); if cli.benchmark_output { let benchmark_md = From e30027fe08b9ea8d97501a58ba900c8545f8ddc0 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:12:47 +0000 Subject: [PATCH 185/333] improve: add playback benchmark npm aliases and cli passthrough --- crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 18 ++++++++++++++++++ package.json | 3 +++ scripts/aggregate-playback-benchmarks.js | 3 +++ scripts/run-playback-benchmark-matrix.js | 3 +++ scripts/validate-playback-matrix.js | 3 +++ 6 files changed, 31 insertions(+) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index f584b0f2b7..5cc463428d 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -185,6 +185,7 @@ cargo run -p cap-recording --example playback-test-runner -- full 11. **Added matrix completeness validator (2026-02-13)** - `scripts/validate-playback-matrix.js` validates required platform/gpu/scenario cells. - Supports required format checks per cell (mp4 + fragmented). + - Root `package.json` now exposes `bench:playback:*` script aliases for matrix, aggregate, and validate flows. --- diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 5d633c8cc2..38fd659ddc 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -46,6 +46,12 @@ node scripts/run-playback-benchmark-matrix.js \ --input-dir "$INPUT_DIR" ``` +Equivalent shortcut: + +```bash +pnpm bench:playback:matrix -- --platform "" --gpu "" --output-dir "$OUT_DIR" --fps 60 --input-dir "$INPUT_DIR" +``` + Examples: ```bash @@ -71,12 +77,24 @@ After collecting all machine folders into a shared root: node scripts/aggregate-playback-benchmarks.js --input /path/to/all-machine-results --output /tmp/playback-matrix-aggregate.md ``` +Equivalent shortcut: + +```bash +pnpm bench:playback:aggregate -- --input /path/to/all-machine-results --output /tmp/playback-matrix-aggregate.md +``` + Validate matrix completeness: ```bash node scripts/validate-playback-matrix.js --input /path/to/all-machine-results --require-formats mp4,fragmented ``` +Equivalent shortcut: + +```bash +pnpm bench:playback:validate -- --input /path/to/all-machine-results --require-formats mp4,fragmented +``` + ## Evidence checklist 1. Confirm all matrix rows exist. diff --git a/package.json b/package.json index 47f69790ef..dc4ad744af 100644 --- a/package.json +++ b/package.json @@ -41,6 +41,9 @@ "web": "pnpm --filter=@cap/web", "env-setup": "node scripts/env-cli.js", "check-tauri-versions": "node scripts/check-tauri-plugin-versions.js", + "bench:playback:matrix": "node scripts/run-playback-benchmark-matrix.js", + "bench:playback:aggregate": "node scripts/aggregate-playback-benchmarks.js", + "bench:playback:validate": "node scripts/validate-playback-matrix.js", "clean": "find . -name node_modules -o -name .next -o -name .output -o -name .turbo -o -name dist -type d -prune | xargs rm -rf", "lgtm-otel": "docker run -p 3010:3000 -p 4317:4317 -p 4318:4318 --rm -it docker.io/grafana/otel-lgtm", "with-env": "dotenv -e .env --" diff --git a/scripts/aggregate-playback-benchmarks.js b/scripts/aggregate-playback-benchmarks.js index d5ee7fe7ec..75275c944f 100644 --- a/scripts/aggregate-playback-benchmarks.js +++ b/scripts/aggregate-playback-benchmarks.js @@ -10,6 +10,9 @@ function parseArgs(argv) { for (let i = 2; i < argv.length; i++) { const arg = argv[i]; + if (arg === "--") { + continue; + } if (arg === "--help" || arg === "-h") { help = true; continue; diff --git a/scripts/run-playback-benchmark-matrix.js b/scripts/run-playback-benchmark-matrix.js index d5793550f5..f53c16a102 100644 --- a/scripts/run-playback-benchmark-matrix.js +++ b/scripts/run-playback-benchmark-matrix.js @@ -16,6 +16,9 @@ function parseArgs(argv) { for (let i = 2; i < argv.length; i++) { const arg = argv[i]; + if (arg === "--") { + continue; + } if (arg === "--help" || arg === "-h") { options.help = true; continue; diff --git a/scripts/validate-playback-matrix.js b/scripts/validate-playback-matrix.js index a95ae023cb..4fc85dc61d 100644 --- a/scripts/validate-playback-matrix.js +++ b/scripts/validate-playback-matrix.js @@ -24,6 +24,9 @@ function parseArgs(argv) { for (let i = 2; i < argv.length; i++) { const arg = argv[i]; + if (arg === "--") { + continue; + } if (arg === "--help" || arg === "-h") { options.help = true; continue; From bcbb5f486b1a8286e5599712bf65a441a072b516 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:15:52 +0000 Subject: [PATCH 186/333] improve: drop duplicate same-frame seek updates --- apps/desktop/src/routes/editor/Timeline/index.tsx | 12 ++++++++++++ crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/src/playback.rs | 9 ++++++++- 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/apps/desktop/src/routes/editor/Timeline/index.tsx b/apps/desktop/src/routes/editor/Timeline/index.tsx index 7726bb4258..c475fa1d27 100644 --- a/apps/desktop/src/routes/editor/Timeline/index.tsx +++ b/apps/desktop/src/routes/editor/Timeline/index.tsx @@ -224,6 +224,8 @@ export function Timeline() { let pendingSeekFrame: number | null = null; let seekRafId: number | null = null; let seekInFlight = false; + let inFlightSeekFrame: number | null = null; + let lastCompletedSeekFrame: number | null = null; onCleanup(() => { if (zoomRafId !== null) cancelAnimationFrame(zoomRafId); @@ -276,6 +278,13 @@ export function Timeline() { } function scheduleSeek(frameNumber: number) { + if ( + frameNumber === pendingSeekFrame || + frameNumber === inFlightSeekFrame || + frameNumber === lastCompletedSeekFrame + ) { + return; + } pendingSeekFrame = frameNumber; if (seekRafId === null) { seekRafId = requestAnimationFrame(flushPendingSeek); @@ -295,13 +304,16 @@ export function Timeline() { const frameNumber = pendingSeekFrame; pendingSeekFrame = null; seekInFlight = true; + inFlightSeekFrame = frameNumber; try { await commands.seekTo(frameNumber); + lastCompletedSeekFrame = frameNumber; } catch (err) { console.error("Failed to seek timeline playhead:", err); } finally { seekInFlight = false; + inFlightSeekFrame = null; if (pendingSeekFrame !== null && seekRafId === null) { seekRafId = requestAnimationFrame(flushPendingSeek); } diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 5cc463428d..09d0f85376 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -168,6 +168,7 @@ cargo run -p cap-recording --example playback-test-runner -- full 7. **Timeline seek dispatch now coalesces during drag (2026-02-13)** - Frontend seek calls are requestAnimationFrame-batched. - Only the latest pending seek frame is sent while an async seek is in-flight. + - Duplicate same-frame seeks are dropped in both frontend dispatch and playback seek signaling. 8. **Playback benchmark runner now supports JSON evidence export (2026-02-13)** - `playback-test-runner` supports `--json-output` for structured report emission. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index c9533ab63b..b25939cd14 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -795,7 +795,14 @@ impl PlaybackHandle { } pub fn seek(&self, frame_number: u32) { - let _ = self.seek_tx.send(frame_number); + let _ = self.seek_tx.send_if_modified(|current_frame| { + if *current_frame == frame_number { + false + } else { + *current_frame = frame_number; + true + } + }); } pub async fn receive_event(&mut self) -> watch::Ref<'_, PlaybackEvent> { From bed092529b52a5d3b0019d646bc9bd4779fb825e Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:16:57 +0000 Subject: [PATCH 187/333] improve: validate per-machine matrix runs in helper script --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 3 +- scripts/run-playback-benchmark-matrix.js | 40 ++++++++++++++++++++++-- 4 files changed, 42 insertions(+), 3 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 6da7192bb6..684161ff4d 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -111,6 +111,7 @@ Automated helper for machine runs: ```bash node scripts/run-playback-benchmark-matrix.js --platform macos-13 --gpu apple-silicon --output-dir /tmp/playback-matrix --fps 60 --input-dir /tmp/cap-real-device-tests +node scripts/run-playback-benchmark-matrix.js --platform windows-11 --gpu nvidia-discrete --output-dir /tmp/playback-matrix-windows-nvidia --fps 60 --require-formats mp4,fragmented --input-dir /tmp/cap-real-device-tests ``` | Platform | GPU Class | MP4 Full | Fragmented Full | MP4 Scrub | Fragmented Scrub | Notes | diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 09d0f85376..8431346cc3 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -182,6 +182,7 @@ cargo run -p cap-recording --example playback-test-runner -- full 10. **Added matrix run helper for platform/GPU benchmark execution (2026-02-13)** - `scripts/run-playback-benchmark-matrix.js` runs `full` and `scrub` scenarios with tagged notes and JSON output. - Automatically generates aggregate markdown for each machine run directory. + - Performs per-machine post-run validation for required scenarios and optional format requirements. 11. **Added matrix completeness validator (2026-02-13)** - `scripts/validate-playback-matrix.js` validates required platform/gpu/scenario cells. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 38fd659ddc..1c84c1b95c 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -43,13 +43,14 @@ node scripts/run-playback-benchmark-matrix.js \ --gpu "" \ --output-dir "$OUT_DIR" \ --fps 60 \ + --require-formats mp4,fragmented \ --input-dir "$INPUT_DIR" ``` Equivalent shortcut: ```bash -pnpm bench:playback:matrix -- --platform "" --gpu "" --output-dir "$OUT_DIR" --fps 60 --input-dir "$INPUT_DIR" +pnpm bench:playback:matrix -- --platform "" --gpu "" --output-dir "$OUT_DIR" --fps 60 --require-formats mp4,fragmented --input-dir "$INPUT_DIR" ``` Examples: diff --git a/scripts/run-playback-benchmark-matrix.js b/scripts/run-playback-benchmark-matrix.js index f53c16a102..f5a404e487 100644 --- a/scripts/run-playback-benchmark-matrix.js +++ b/scripts/run-playback-benchmark-matrix.js @@ -12,6 +12,8 @@ function parseArgs(argv) { fps: 60, recordingPath: null, inputDir: null, + validate: true, + requireFormats: [], }; for (let i = 2; i < argv.length; i++) { @@ -51,6 +53,18 @@ function parseArgs(argv) { options.inputDir = argv[++i] ?? null; continue; } + if (arg === "--skip-validate") { + options.validate = false; + continue; + } + if (arg === "--require-formats") { + const value = argv[++i] ?? ""; + options.requireFormats = value + .split(",") + .map((entry) => entry.trim().toLowerCase()) + .filter(Boolean); + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -58,7 +72,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/run-playback-benchmark-matrix.js --platform --gpu --output-dir [--fps 60] [--recording-path ] [--input-dir ] + console.log(`Usage: node scripts/run-playback-benchmark-matrix.js --platform --gpu --output-dir [--fps 60] [--recording-path ] [--input-dir ] [--require-formats mp4,fragmented] Runs playback benchmark matrix scenarios and writes JSON outputs. @@ -70,7 +84,9 @@ Required: Optional: --fps FPS for benchmark runs (default: 60) --recording-path Specific recording path - --input-dir Recording discovery directory`); + --input-dir Recording discovery directory + --require-formats Required formats for local validation (comma-separated) + --skip-validate Skip post-run validation`); } function run(command, args) { @@ -156,6 +172,26 @@ function main() { aggregatePath, ]); console.log(`Aggregate markdown: ${aggregatePath}`); + + if (options.validate) { + const validateArgs = [ + "scripts/validate-playback-matrix.js", + "--input", + options.outputDir, + "--no-default-matrix", + "--require-cell", + `${options.platform}:${options.gpu}:full`, + "--require-cell", + `${options.platform}:${options.gpu}:scrub`, + ]; + + if (options.requireFormats.length > 0) { + validateArgs.push("--require-formats", options.requireFormats.join(",")); + } + + run("node", validateArgs); + console.log("Matrix run validation passed"); + } } try { From 3ca8913b50007e53e0fbb60015dbf31dd7cc9796 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:18:11 +0000 Subject: [PATCH 188/333] improve: emit matrix validation json artifacts --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 1 + scripts/run-playback-benchmark-matrix.js | 7 ++++++ scripts/validate-playback-matrix.js | 30 ++++++++++++++++++++---- 5 files changed, 36 insertions(+), 4 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 684161ff4d..63011bb7a9 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -70,6 +70,7 @@ Validate matrix coverage and required formats: ```bash node scripts/validate-playback-matrix.js --input /path/to/json-results --require-formats mp4,fragmented +node scripts/validate-playback-matrix.js --input /path/to/json-results --require-formats mp4,fragmented --output-json /tmp/playback-matrix-validation.json ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 8431346cc3..a048af1c95 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -188,6 +188,7 @@ cargo run -p cap-recording --example playback-test-runner -- full - `scripts/validate-playback-matrix.js` validates required platform/gpu/scenario cells. - Supports required format checks per cell (mp4 + fragmented). - Root `package.json` now exposes `bench:playback:*` script aliases for matrix, aggregate, and validate flows. + - Can emit structured validation JSON for artifact upload and automation. --- diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 1c84c1b95c..ad778012b3 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -69,6 +69,7 @@ Each run directory contains: - timestamped `full` scenario JSON - timestamped `scrub` scenario JSON - `--aggregate.md` summary table +- `--validation.json` matrix validation result ## Cross-machine aggregation diff --git a/scripts/run-playback-benchmark-matrix.js b/scripts/run-playback-benchmark-matrix.js index f5a404e487..98b08def7f 100644 --- a/scripts/run-playback-benchmark-matrix.js +++ b/scripts/run-playback-benchmark-matrix.js @@ -174,11 +174,17 @@ function main() { console.log(`Aggregate markdown: ${aggregatePath}`); if (options.validate) { + const validationJsonPath = path.join( + options.outputDir, + `${options.platform}-${options.gpu}-validation.json`, + ); const validateArgs = [ "scripts/validate-playback-matrix.js", "--input", options.outputDir, "--no-default-matrix", + "--output-json", + validationJsonPath, "--require-cell", `${options.platform}:${options.gpu}:full`, "--require-cell", @@ -191,6 +197,7 @@ function main() { run("node", validateArgs); console.log("Matrix run validation passed"); + console.log(`Validation JSON: ${validationJsonPath}`); } } diff --git a/scripts/validate-playback-matrix.js b/scripts/validate-playback-matrix.js index 4fc85dc61d..c919369c42 100644 --- a/scripts/validate-playback-matrix.js +++ b/scripts/validate-playback-matrix.js @@ -20,6 +20,7 @@ function parseArgs(argv) { requiredCells: [], requiredFormats: [], useDefaultMatrix: true, + outputJson: null, }; for (let i = 2; i < argv.length; i++) { @@ -57,6 +58,12 @@ function parseArgs(argv) { options.useDefaultMatrix = false; continue; } + if (arg === "--output-json") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --output-json"); + options.outputJson = path.resolve(value); + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -74,7 +81,7 @@ function parseCell(value) { } function printUsage() { - console.log(`Usage: node scripts/validate-playback-matrix.js --input [--input ...] [--require-cell platform:gpu:scenario ...] [--require-formats mp4,fragmented] + console.log(`Usage: node scripts/validate-playback-matrix.js --input [--input ...] [--require-cell platform:gpu:scenario ...] [--require-formats mp4,fragmented] [--output-json ] Validates that required benchmark matrix cells are present in playback benchmark JSON results. @@ -83,6 +90,7 @@ Options: --require-cell Required cell as platform:gpu:scenario (repeatable) --require-formats Comma-separated required formats per cell --no-default-matrix Disable built-in required matrix + --output-json Write validation result JSON file --help, -h Show help`); } @@ -200,8 +208,22 @@ function main() { } } - console.log(`Validated ${requiredCells.length} required cells`); - console.log(`Observed ${observed.size} unique cells`); + const validationResult = { + validatedCells: requiredCells.length, + observedCells: observed.size, + requiredFormats: options.requiredFormats, + missingCells, + formatFailures, + passed: missingCells.length === 0 && formatFailures.length === 0, + }; + + if (options.outputJson) { + fs.writeFileSync(options.outputJson, JSON.stringify(validationResult, null, 2)); + console.log(`Validation JSON: ${options.outputJson}`); + } + + console.log(`Validated ${validationResult.validatedCells} required cells`); + console.log(`Observed ${validationResult.observedCells} unique cells`); if (missingCells.length > 0) { console.log("Missing required cells:"); @@ -219,7 +241,7 @@ function main() { } } - if (missingCells.length > 0 || formatFailures.length > 0) { + if (!validationResult.passed) { process.exit(1); } From 43db4995bbe8f2bd6634b0b5ccc5ec77d5a70bd5 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:20:37 +0000 Subject: [PATCH 189/333] improve: emit seek settle telemetry in playback loop --- crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/src/playback.rs | 14 ++++++++++++++ 2 files changed, 15 insertions(+) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index a048af1c95..a1fb2349e7 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -159,6 +159,7 @@ cargo run -p cap-recording --example playback-test-runner -- full 5. **Playback runtime emits startup latency signals (2026-02-13)** - Playback loop now logs first rendered frame latency. - Audio stream setup now logs startup preparation time and first callback latency. + - Playback loop now logs seek settle latency (`seek_target_frame` to rendered frame). 6. **Decode benchmark now supports machine-readable evidence output (2026-02-13)** - `decode-benchmark` supports `--output-json` for structured metric capture. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index b25939cd14..3b2c72411f 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -393,6 +393,7 @@ impl Playback { let mut total_frames_rendered = 0u64; let mut _total_frames_skipped = 0u64; let mut first_render_logged = false; + let mut pending_seek_observation: Option<(u32, Instant)> = None; let warmup_target_frames = 20usize; let warmup_after_first_timeout = Duration::from_millis(1000); @@ -454,6 +455,7 @@ impl Playback { frame_number = seek_frame; playback_anchor_start = Instant::now(); playback_anchor_frame = seek_frame; + pending_seek_observation = Some((seek_frame, Instant::now())); prefetch_buffer.retain(|p| p.frame_number >= frame_number); frame_cache.cache.clear(); let _ = frame_request_tx.send(frame_number); @@ -501,6 +503,7 @@ impl Playback { frame_number = seek_frame; playback_anchor_start = Instant::now(); playback_anchor_frame = seek_frame; + pending_seek_observation = Some((seek_frame, Instant::now())); prefetch_buffer.retain(|p| p.frame_number >= frame_number); frame_cache.cache.clear(); let _ = frame_request_tx.send(frame_number); @@ -737,6 +740,17 @@ impl Playback { "Playback rendered first frame" ); } + if let Some((seek_target_frame, seek_started_at)) = pending_seek_observation + && frame_number >= seek_target_frame + { + info!( + seek_target_frame, + rendered_frame = frame_number, + seek_settle_ms = seek_started_at.elapsed().as_secs_f64() * 1000.0, + "Playback seek settled" + ); + pending_seek_observation = None; + } } event_tx.send(PlaybackEvent::Frame(frame_number)).ok(); From 225de247793d429fea4ff0c5a75641463d7efc26 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:22:20 +0000 Subject: [PATCH 190/333] improve: add playback matrix status report generator --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 6 + crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 2 + package.json | 1 + scripts/build-playback-matrix-report.js | 289 +++++++++++++++++++++++ 5 files changed, 299 insertions(+) create mode 100644 scripts/build-playback-matrix-report.js diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 63011bb7a9..c9d1cd95b5 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -64,6 +64,7 @@ Aggregate JSON outputs from multiple machines: ```bash node scripts/aggregate-playback-benchmarks.js --input /path/to/json-results --output /tmp/playback-benchmark-aggregate.md +node scripts/build-playback-matrix-report.js --input /path/to/json-results --output /tmp/playback-matrix-status.md ``` Validate matrix coverage and required formats: diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index a1fb2349e7..8636b73cda 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -191,6 +191,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Root `package.json` now exposes `bench:playback:*` script aliases for matrix, aggregate, and validate flows. - Can emit structured validation JSON for artifact upload and automation. +12. **Added matrix status report generator (2026-02-13)** + - `scripts/build-playback-matrix-report.js` generates concise matrix markdown from JSON results. + - Highlights missing cells, scenario pass/fail, and format coverage per platform/GPU row. + --- ## Root Cause Analysis Archive @@ -291,6 +295,7 @@ Decoder Pipeline: 13. Added cross-platform benchmark JSON aggregation utility script. 14. Added matrix execution helper script for full + scrub benchmark runs per platform/GPU. 15. Added matrix validation script for required cell and format coverage checks. +16. Added matrix status report generator for concise artifact summaries. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -307,6 +312,7 @@ Decoder Pipeline: - `scripts/aggregate-playback-benchmarks.js`: added markdown aggregation for multiple playback benchmark JSON artifacts. - `scripts/run-playback-benchmark-matrix.js`: added orchestrated full/scrub benchmark runner with per-machine aggregate generation. - `scripts/validate-playback-matrix.js`: added required matrix cell/format validation for aggregated evidence. +- `scripts/build-playback-matrix-report.js`: added concise matrix status report generation from JSON benchmark outputs. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index ad778012b3..b0f5b78ea6 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -77,12 +77,14 @@ After collecting all machine folders into a shared root: ```bash node scripts/aggregate-playback-benchmarks.js --input /path/to/all-machine-results --output /tmp/playback-matrix-aggregate.md +node scripts/build-playback-matrix-report.js --input /path/to/all-machine-results --output /tmp/playback-matrix-status.md ``` Equivalent shortcut: ```bash pnpm bench:playback:aggregate -- --input /path/to/all-machine-results --output /tmp/playback-matrix-aggregate.md +pnpm bench:playback:report -- --input /path/to/all-machine-results --output /tmp/playback-matrix-status.md ``` Validate matrix completeness: diff --git a/package.json b/package.json index dc4ad744af..f8346043a0 100644 --- a/package.json +++ b/package.json @@ -44,6 +44,7 @@ "bench:playback:matrix": "node scripts/run-playback-benchmark-matrix.js", "bench:playback:aggregate": "node scripts/aggregate-playback-benchmarks.js", "bench:playback:validate": "node scripts/validate-playback-matrix.js", + "bench:playback:report": "node scripts/build-playback-matrix-report.js", "clean": "find . -name node_modules -o -name .next -o -name .output -o -name .turbo -o -name dist -type d -prune | xargs rm -rf", "lgtm-otel": "docker run -p 3010:3000 -p 4317:4317 -p 4318:4318 --rm -it docker.io/grafana/otel-lgtm", "with-env": "dotenv -e .env --" diff --git a/scripts/build-playback-matrix-report.js b/scripts/build-playback-matrix-report.js new file mode 100644 index 0000000000..a07b1876c5 --- /dev/null +++ b/scripts/build-playback-matrix-report.js @@ -0,0 +1,289 @@ +#!/usr/bin/env node + +import fs from "node:fs"; +import path from "node:path"; + +const DEFAULT_REQUIRED_CELLS = [ + { platform: "macos-13", gpu: "apple-silicon", scenario: "full" }, + { platform: "macos-13", gpu: "apple-silicon", scenario: "scrub" }, + { platform: "windows-11", gpu: "nvidia-discrete", scenario: "full" }, + { platform: "windows-11", gpu: "nvidia-discrete", scenario: "scrub" }, + { platform: "windows-11", gpu: "amd-discrete", scenario: "full" }, + { platform: "windows-11", gpu: "amd-discrete", scenario: "scrub" }, + { platform: "windows-11", gpu: "integrated", scenario: "full" }, + { platform: "windows-11", gpu: "integrated", scenario: "scrub" }, +]; + +function parseArgs(argv) { + const options = { + inputs: [], + output: null, + useDefaultMatrix: true, + requiredCells: [], + }; + + for (let i = 2; i < argv.length; i++) { + const arg = argv[i]; + if (arg === "--") continue; + if (arg === "--help" || arg === "-h") { + options.help = true; + continue; + } + if (arg === "--input" || arg === "-i") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --input"); + options.inputs.push(path.resolve(value)); + continue; + } + if (arg === "--output" || arg === "-o") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --output"); + options.output = path.resolve(value); + continue; + } + if (arg === "--no-default-matrix") { + options.useDefaultMatrix = false; + continue; + } + if (arg === "--require-cell") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --require-cell"); + options.requiredCells.push(parseCell(value)); + options.useDefaultMatrix = false; + continue; + } + throw new Error(`Unknown argument: ${arg}`); + } + + return options; +} + +function parseCell(value) { + const [platform, gpu, scenario] = value.split(":"); + if (!platform || !gpu || !scenario) { + throw new Error( + `Invalid --require-cell format: ${value}. Expected platform:gpu:scenario`, + ); + } + return { platform, gpu, scenario }; +} + +function usage() { + console.log(`Usage: node scripts/build-playback-matrix-report.js --input [--input ...] [--output ] + +Builds a concise playback matrix markdown report from playback benchmark JSON outputs.`); +} + +function collectJsonFiles(targetPath) { + if (!fs.existsSync(targetPath)) { + throw new Error(`Input path does not exist: ${targetPath}`); + } + + const stats = fs.statSync(targetPath); + if (stats.isFile()) { + return targetPath.endsWith(".json") ? [targetPath] : []; + } + + const files = []; + for (const entry of fs.readdirSync(targetPath, { withFileTypes: true })) { + const fullPath = path.join(targetPath, entry.name); + if (entry.isDirectory()) { + files.push(...collectJsonFiles(fullPath)); + } else if (entry.isFile() && entry.name.endsWith(".json")) { + files.push(fullPath); + } + } + return files; +} + +function parseNotes(notes) { + if (!notes) return {}; + const result = {}; + for (const token of notes.split(/\s+/)) { + if (!token.includes("=")) continue; + const [key, ...rest] = token.split("="); + const value = rest.join("="); + if (!key || !value) continue; + result[key.trim()] = value.trim(); + } + return result; +} + +function cellKey(platform, gpu, scenario) { + return `${platform}|${gpu}|${scenario}`; +} + +function platformGpuKey(platform, gpu) { + return `${platform}|${gpu}`; +} + +function timestampOrEpoch(value) { + const parsed = Date.parse(value ?? ""); + return Number.isNaN(parsed) ? 0 : parsed; +} + +function upsertLatestCell(cells, candidate) { + const key = cellKey(candidate.platform, candidate.gpu, candidate.scenario); + const existing = cells.get(key); + if (!existing || timestampOrEpoch(candidate.generatedAt) >= timestampOrEpoch(existing.generatedAt)) { + cells.set(key, candidate); + } +} + +function collectData(files) { + const latestCells = new Map(); + const formatCoverage = new Map(); + + for (const filePath of files) { + const raw = fs.readFileSync(filePath, "utf8"); + const parsed = JSON.parse(raw); + const notes = parseNotes(parsed.notes); + const platform = notes.platform ?? "unknown"; + const gpu = notes.gpu ?? "unknown"; + const scenario = notes.scenario ?? "unspecified"; + const reports = Array.isArray(parsed.reports) ? parsed.reports : []; + const pass = reports.every((report) => report.overall_passed === true); + const generatedAt = parsed.generated_at_utc ?? ""; + + const formats = new Set(); + for (const report of reports) { + formats.add(report.is_fragmented ? "fragmented" : "mp4"); + } + + upsertLatestCell(latestCells, { + platform, + gpu, + scenario, + pass, + generatedAt, + filePath, + formats, + }); + + const pgKey = platformGpuKey(platform, gpu); + if (!formatCoverage.has(pgKey)) { + formatCoverage.set(pgKey, new Set()); + } + for (const format of formats) { + formatCoverage.get(pgKey).add(format); + } + } + + return { latestCells, formatCoverage }; +} + +function formatStatus(entry) { + if (!entry) return "MISSING"; + return entry.pass ? "PASS" : "FAIL"; +} + +function formatCoverageStatus(formats, target) { + if (!formats || !formats.has(target)) return "NO"; + return "YES"; +} + +function buildReport(requiredCells, latestCells, formatCoverage) { + const platformGpuPairs = new Map(); + for (const cell of requiredCells) { + const key = platformGpuKey(cell.platform, cell.gpu); + if (!platformGpuPairs.has(key)) { + platformGpuPairs.set(key, { platform: cell.platform, gpu: cell.gpu }); + } + } + + const rows = []; + let missingCount = 0; + let failCount = 0; + for (const { platform, gpu } of platformGpuPairs.values()) { + const full = latestCells.get(cellKey(platform, gpu, "full")); + const scrub = latestCells.get(cellKey(platform, gpu, "scrub")); + const formats = formatCoverage.get(platformGpuKey(platform, gpu)); + const fullStatus = formatStatus(full); + const scrubStatus = formatStatus(scrub); + if (fullStatus === "MISSING" || scrubStatus === "MISSING") { + missingCount += 1; + } + if (fullStatus === "FAIL" || scrubStatus === "FAIL") { + failCount += 1; + } + rows.push({ + platform, + gpu, + fullStatus, + scrubStatus, + mp4: formatCoverageStatus(formats, "mp4"), + fragmented: formatCoverageStatus(formats, "fragmented"), + fullTime: full?.generatedAt ?? "n/a", + scrubTime: scrub?.generatedAt ?? "n/a", + }); + } + + let markdown = ""; + markdown += "# Playback Matrix Status Report\n\n"; + markdown += `Generated: ${new Date().toISOString()}\n\n`; + markdown += `Rows: ${rows.length}, Missing rows: ${missingCount}, Rows with failures: ${failCount}\n\n`; + markdown += "| Platform | GPU | Full | Scrub | MP4 Seen | Fragmented Seen | Full Timestamp | Scrub Timestamp |\n"; + markdown += "|---|---|---|---|---|---|---|---|\n"; + for (const row of rows) { + markdown += `| ${row.platform} | ${row.gpu} | ${row.fullStatus} | ${row.scrubStatus} | ${row.mp4} | ${row.fragmented} | ${row.fullTime} | ${row.scrubTime} |\n`; + } + markdown += "\n"; + + const missingCells = requiredCells.filter((cell) => { + return !latestCells.has(cellKey(cell.platform, cell.gpu, cell.scenario)); + }); + if (missingCells.length > 0) { + markdown += "## Missing Cells\n\n"; + for (const cell of missingCells) { + markdown += `- ${cell.platform}:${cell.gpu}:${cell.scenario}\n`; + } + markdown += "\n"; + } + + return markdown; +} + +function main() { + const options = parseArgs(process.argv); + if (options.help) { + usage(); + return; + } + if (options.inputs.length === 0) { + throw new Error("At least one --input is required"); + } + + const files = new Set(); + for (const inputPath of options.inputs) { + for (const filePath of collectJsonFiles(inputPath)) { + files.add(filePath); + } + } + if (files.size === 0) { + throw new Error("No JSON files found"); + } + + const requiredCells = options.useDefaultMatrix + ? [...DEFAULT_REQUIRED_CELLS, ...options.requiredCells] + : options.requiredCells; + if (requiredCells.length === 0) { + throw new Error("No required cells configured"); + } + + const { latestCells, formatCoverage } = collectData([...files]); + const report = buildReport(requiredCells, latestCells, formatCoverage); + + if (options.output) { + fs.writeFileSync(options.output, report, "utf8"); + console.log(`Wrote matrix report to ${options.output}`); + } else { + process.stdout.write(report); + } +} + +try { + main(); +} catch (error) { + console.error(error instanceof Error ? error.message : String(error)); + process.exit(1); +} From b7d07378e431072572adc9edab70aab050280eb6 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:23:31 +0000 Subject: [PATCH 191/333] improve: support scenario subset runs in matrix helper --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 9 +++++- scripts/run-playback-benchmark-matrix.js | 37 +++++++++++++++++++----- 4 files changed, 40 insertions(+), 8 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index c9d1cd95b5..81819e2e5f 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -114,6 +114,7 @@ Automated helper for machine runs: ```bash node scripts/run-playback-benchmark-matrix.js --platform macos-13 --gpu apple-silicon --output-dir /tmp/playback-matrix --fps 60 --input-dir /tmp/cap-real-device-tests node scripts/run-playback-benchmark-matrix.js --platform windows-11 --gpu nvidia-discrete --output-dir /tmp/playback-matrix-windows-nvidia --fps 60 --require-formats mp4,fragmented --input-dir /tmp/cap-real-device-tests +node scripts/run-playback-benchmark-matrix.js --platform windows-11 --gpu integrated --output-dir /tmp/playback-matrix-windows-integrated --fps 60 --scenarios scrub --input-dir /tmp/cap-real-device-tests ``` | Platform | GPU Class | MP4 Full | Fragmented Full | MP4 Scrub | Fragmented Scrub | Notes | diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 8636b73cda..88dc25adf2 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -184,6 +184,7 @@ cargo run -p cap-recording --example playback-test-runner -- full - `scripts/run-playback-benchmark-matrix.js` runs `full` and `scrub` scenarios with tagged notes and JSON output. - Automatically generates aggregate markdown for each machine run directory. - Performs per-machine post-run validation for required scenarios and optional format requirements. + - Supports scenario subset reruns via `--scenarios` for faster targeted validation. 11. **Added matrix completeness validator (2026-02-13)** - `scripts/validate-playback-matrix.js` validates required platform/gpu/scenario cells. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index b0f5b78ea6..a7644dc888 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -44,13 +44,20 @@ node scripts/run-playback-benchmark-matrix.js \ --output-dir "$OUT_DIR" \ --fps 60 \ --require-formats mp4,fragmented \ + --scenarios full,scrub \ --input-dir "$INPUT_DIR" ``` Equivalent shortcut: ```bash -pnpm bench:playback:matrix -- --platform "" --gpu "" --output-dir "$OUT_DIR" --fps 60 --require-formats mp4,fragmented --input-dir "$INPUT_DIR" +pnpm bench:playback:matrix -- --platform "" --gpu "" --output-dir "$OUT_DIR" --fps 60 --require-formats mp4,fragmented --scenarios full,scrub --input-dir "$INPUT_DIR" +``` + +Rerun only scrub scenario for a machine: + +```bash +pnpm bench:playback:matrix -- --platform "" --gpu "" --output-dir "$OUT_DIR" --fps 60 --scenarios scrub --input-dir "$INPUT_DIR" ``` Examples: diff --git a/scripts/run-playback-benchmark-matrix.js b/scripts/run-playback-benchmark-matrix.js index 98b08def7f..89f0a26e90 100644 --- a/scripts/run-playback-benchmark-matrix.js +++ b/scripts/run-playback-benchmark-matrix.js @@ -14,6 +14,7 @@ function parseArgs(argv) { inputDir: null, validate: true, requireFormats: [], + scenarios: ["full", "scrub"], }; for (let i = 2; i < argv.length; i++) { @@ -65,6 +66,18 @@ function parseArgs(argv) { .filter(Boolean); continue; } + if (arg === "--scenarios") { + const value = argv[++i] ?? ""; + const scenarios = value + .split(",") + .map((entry) => entry.trim().toLowerCase()) + .filter(Boolean); + if (scenarios.length === 0) { + throw new Error("Invalid --scenarios value"); + } + options.scenarios = scenarios; + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -72,7 +85,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/run-playback-benchmark-matrix.js --platform --gpu --output-dir [--fps 60] [--recording-path ] [--input-dir ] [--require-formats mp4,fragmented] + console.log(`Usage: node scripts/run-playback-benchmark-matrix.js --platform --gpu --output-dir [--fps 60] [--recording-path ] [--input-dir ] [--require-formats mp4,fragmented] [--scenarios full,scrub] Runs playback benchmark matrix scenarios and writes JSON outputs. @@ -86,6 +99,7 @@ Optional: --recording-path Specific recording path --input-dir Recording discovery directory --require-formats Required formats for local validation (comma-separated) + --scenarios Scenarios to run (comma-separated; default: full,scrub) --skip-validate Skip post-run validation`); } @@ -139,6 +153,12 @@ function validateOptions(options) { if (!options.platform || !options.gpu || !options.outputDir) { throw new Error("Missing required options: --platform, --gpu, --output-dir"); } + const validScenarios = new Set(["full", "scrub", "decoder", "playback", "audio-sync", "camera-sync"]); + for (const scenario of options.scenarios) { + if (!validScenarios.has(scenario)) { + throw new Error(`Unsupported scenario: ${scenario}`); + } + } const absoluteOutputDir = path.resolve(options.outputDir); options.outputDir = absoluteOutputDir; @@ -157,8 +177,9 @@ function main() { validateOptions(options); console.log(`Running matrix for platform=${options.platform} gpu=${options.gpu}`); - run("cargo", scenarioArgs(options, "full")); - run("cargo", scenarioArgs(options, "scrub")); + for (const scenario of options.scenarios) { + run("cargo", scenarioArgs(options, scenario)); + } const aggregatePath = path.join( options.outputDir, @@ -185,11 +206,13 @@ function main() { "--no-default-matrix", "--output-json", validationJsonPath, - "--require-cell", - `${options.platform}:${options.gpu}:full`, - "--require-cell", - `${options.platform}:${options.gpu}:scrub`, ]; + for (const scenario of options.scenarios) { + validateArgs.push( + "--require-cell", + `${options.platform}:${options.gpu}:${scenario}`, + ); + } if (options.requireFormats.length > 0) { validateArgs.push("--require-formats", options.requireFormats.join(",")); From 8fe2d5ac34356a6fbd0bb0721ff6691e33952c83 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:24:50 +0000 Subject: [PATCH 192/333] improve: add one-shot playback matrix finalization command --- crates/editor/PLAYBACK-BENCHMARKS.md | 3 + crates/editor/PLAYBACK-FINDINGS.md | 6 ++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 6 ++ package.json | 1 + scripts/finalize-playback-matrix.js | 114 +++++++++++++++++++++++ 5 files changed, 130 insertions(+) create mode 100644 scripts/finalize-playback-matrix.js diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 81819e2e5f..fec61c3ad6 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -72,6 +72,9 @@ Validate matrix coverage and required formats: ```bash node scripts/validate-playback-matrix.js --input /path/to/json-results --require-formats mp4,fragmented node scripts/validate-playback-matrix.js --input /path/to/json-results --require-formats mp4,fragmented --output-json /tmp/playback-matrix-validation.json + +# Finalize aggregate + status + validation artifacts +node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 88dc25adf2..4a0d81e88b 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -196,6 +196,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - `scripts/build-playback-matrix-report.js` generates concise matrix markdown from JSON results. - Highlights missing cells, scenario pass/fail, and format coverage per platform/GPU row. +13. **Added matrix finalization helper (2026-02-13)** + - `scripts/finalize-playback-matrix.js` generates aggregate markdown, status markdown, and validation JSON in one command. + - Supports optional required format enforcement during finalization. + --- ## Root Cause Analysis Archive @@ -297,6 +301,7 @@ Decoder Pipeline: 14. Added matrix execution helper script for full + scrub benchmark runs per platform/GPU. 15. Added matrix validation script for required cell and format coverage checks. 16. Added matrix status report generator for concise artifact summaries. +17. Added one-shot finalization script for aggregate + status + validation outputs. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -314,6 +319,7 @@ Decoder Pipeline: - `scripts/run-playback-benchmark-matrix.js`: added orchestrated full/scrub benchmark runner with per-machine aggregate generation. - `scripts/validate-playback-matrix.js`: added required matrix cell/format validation for aggregated evidence. - `scripts/build-playback-matrix-report.js`: added concise matrix status report generation from JSON benchmark outputs. +- `scripts/finalize-playback-matrix.js`: added one-shot matrix artifact finalization workflow. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index a7644dc888..de3db31808 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -106,6 +106,12 @@ Equivalent shortcut: pnpm bench:playback:validate -- --input /path/to/all-machine-results --require-formats mp4,fragmented ``` +One-shot finalize command: + +```bash +pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented +``` + ## Evidence checklist 1. Confirm all matrix rows exist. diff --git a/package.json b/package.json index f8346043a0..c8bdea3b17 100644 --- a/package.json +++ b/package.json @@ -45,6 +45,7 @@ "bench:playback:aggregate": "node scripts/aggregate-playback-benchmarks.js", "bench:playback:validate": "node scripts/validate-playback-matrix.js", "bench:playback:report": "node scripts/build-playback-matrix-report.js", + "bench:playback:finalize": "node scripts/finalize-playback-matrix.js", "clean": "find . -name node_modules -o -name .next -o -name .output -o -name .turbo -o -name dist -type d -prune | xargs rm -rf", "lgtm-otel": "docker run -p 3010:3000 -p 4317:4317 -p 4318:4318 --rm -it docker.io/grafana/otel-lgtm", "with-env": "dotenv -e .env --" diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js new file mode 100644 index 0000000000..886f2fc52b --- /dev/null +++ b/scripts/finalize-playback-matrix.js @@ -0,0 +1,114 @@ +#!/usr/bin/env node + +import { spawnSync } from "node:child_process"; +import fs from "node:fs"; +import path from "node:path"; + +function parseArgs(argv) { + const options = { + inputs: [], + outputDir: null, + requireFormats: [], + }; + + for (let i = 2; i < argv.length; i++) { + const arg = argv[i]; + if (arg === "--") continue; + if (arg === "--help" || arg === "-h") { + options.help = true; + continue; + } + if (arg === "--input" || arg === "-i") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --input"); + options.inputs.push(path.resolve(value)); + continue; + } + if (arg === "--output-dir" || arg === "-o") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --output-dir"); + options.outputDir = path.resolve(value); + continue; + } + if (arg === "--require-formats") { + const value = argv[++i] ?? ""; + options.requireFormats = value + .split(",") + .map((entry) => entry.trim().toLowerCase()) + .filter(Boolean); + continue; + } + throw new Error(`Unknown argument: ${arg}`); + } + + return options; +} + +function usage() { + console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--require-formats mp4,fragmented] + +Generates aggregate markdown, status markdown, and validation JSON for collected playback matrix outputs.`); +} + +function run(command, args) { + const result = spawnSync(command, args, { stdio: "inherit" }); + if (result.status !== 0) { + throw new Error(`Command failed: ${command} ${args.join(" ")}`); + } +} + +function main() { + const options = parseArgs(process.argv); + if (options.help) { + usage(); + return; + } + if (options.inputs.length === 0) { + throw new Error("At least one --input is required"); + } + if (!options.outputDir) { + throw new Error("--output-dir is required"); + } + if (!fs.existsSync(options.outputDir)) { + fs.mkdirSync(options.outputDir, { recursive: true }); + } + + const aggregatePath = path.join(options.outputDir, "playback-benchmark-aggregate.md"); + const statusPath = path.join(options.outputDir, "playback-matrix-status.md"); + const validationPath = path.join(options.outputDir, "playback-matrix-validation.json"); + + const aggregateArgs = ["scripts/aggregate-playback-benchmarks.js"]; + const statusArgs = ["scripts/build-playback-matrix-report.js"]; + const validateArgs = [ + "scripts/validate-playback-matrix.js", + "--output-json", + validationPath, + ]; + + for (const input of options.inputs) { + aggregateArgs.push("--input", input); + statusArgs.push("--input", input); + validateArgs.push("--input", input); + } + + aggregateArgs.push("--output", aggregatePath); + statusArgs.push("--output", statusPath); + if (options.requireFormats.length > 0) { + validateArgs.push("--require-formats", options.requireFormats.join(",")); + } + + run("node", aggregateArgs); + run("node", statusArgs); + run("node", validateArgs); + + console.log(`Aggregate markdown: ${aggregatePath}`); + console.log(`Status markdown: ${statusPath}`); + console.log(`Validation JSON: ${validationPath}`); +} + +try { + main(); +} catch (error) { + console.error(error instanceof Error ? error.message : String(error)); + process.exit(1); +} From 055cf427b995f75a64042ab3ed419dfe5b6f2278 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:26:23 +0000 Subject: [PATCH 193/333] improve: add matrix artifact publisher into benchmark history --- crates/editor/PLAYBACK-BENCHMARKS.md | 3 + crates/editor/PLAYBACK-FINDINGS.md | 6 + crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 9 ++ package.json | 1 + scripts/publish-playback-matrix-summary.js | 139 +++++++++++++++++++++ 5 files changed, 158 insertions(+) create mode 100644 scripts/publish-playback-matrix-summary.js diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index fec61c3ad6..b30b5dce56 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -75,6 +75,9 @@ node scripts/validate-playback-matrix.js --input /path/to/json-results --require # Finalize aggregate + status + validation artifacts node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented + +# Publish matrix artifacts into this benchmark history +node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 4a0d81e88b..9aefaf01e9 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -200,6 +200,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - `scripts/finalize-playback-matrix.js` generates aggregate markdown, status markdown, and validation JSON in one command. - Supports optional required format enforcement during finalization. +14. **Added matrix summary publisher (2026-02-13)** + - `scripts/publish-playback-matrix-summary.js` injects finalized matrix artifacts into playback benchmark history. + - Keeps matrix evidence updates consistent and repeatable. + --- ## Root Cause Analysis Archive @@ -302,6 +306,7 @@ Decoder Pipeline: 15. Added matrix validation script for required cell and format coverage checks. 16. Added matrix status report generator for concise artifact summaries. 17. Added one-shot finalization script for aggregate + status + validation outputs. +18. Added benchmark history publisher script for finalized matrix artifacts. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -320,6 +325,7 @@ Decoder Pipeline: - `scripts/validate-playback-matrix.js`: added required matrix cell/format validation for aggregated evidence. - `scripts/build-playback-matrix-report.js`: added concise matrix status report generation from JSON benchmark outputs. - `scripts/finalize-playback-matrix.js`: added one-shot matrix artifact finalization workflow. +- `scripts/publish-playback-matrix-summary.js`: added matrix artifact publisher into PLAYBACK-BENCHMARKS history region. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index de3db31808..8399e401ea 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -112,6 +112,15 @@ One-shot finalize command: pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented ``` +Publish finalized artifacts into benchmark history: + +```bash +pnpm bench:playback:publish -- \ + --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md \ + --status-md /tmp/playback-matrix-final/playback-matrix-status.md \ + --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json +``` + ## Evidence checklist 1. Confirm all matrix rows exist. diff --git a/package.json b/package.json index c8bdea3b17..51abc7ce0a 100644 --- a/package.json +++ b/package.json @@ -46,6 +46,7 @@ "bench:playback:validate": "node scripts/validate-playback-matrix.js", "bench:playback:report": "node scripts/build-playback-matrix-report.js", "bench:playback:finalize": "node scripts/finalize-playback-matrix.js", + "bench:playback:publish": "node scripts/publish-playback-matrix-summary.js", "clean": "find . -name node_modules -o -name .next -o -name .output -o -name .turbo -o -name dist -type d -prune | xargs rm -rf", "lgtm-otel": "docker run -p 3010:3000 -p 4317:4317 -p 4318:4318 --rm -it docker.io/grafana/otel-lgtm", "with-env": "dotenv -e .env --" diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js new file mode 100644 index 0000000000..0ee89fc8fe --- /dev/null +++ b/scripts/publish-playback-matrix-summary.js @@ -0,0 +1,139 @@ +#!/usr/bin/env node + +import fs from "node:fs"; +import path from "node:path"; + +function parseArgs(argv) { + const options = { + aggregateMd: null, + statusMd: null, + validationJson: null, + target: path.resolve("crates/editor/PLAYBACK-BENCHMARKS.md"), + }; + + for (let i = 2; i < argv.length; i++) { + const arg = argv[i]; + if (arg === "--") continue; + if (arg === "--help" || arg === "-h") { + options.help = true; + continue; + } + if (arg === "--aggregate-md") { + options.aggregateMd = path.resolve(argv[++i] ?? ""); + continue; + } + if (arg === "--status-md") { + options.statusMd = path.resolve(argv[++i] ?? ""); + continue; + } + if (arg === "--validation-json") { + options.validationJson = path.resolve(argv[++i] ?? ""); + continue; + } + if (arg === "--target") { + options.target = path.resolve(argv[++i] ?? ""); + continue; + } + throw new Error(`Unknown argument: ${arg}`); + } + + return options; +} + +function usage() { + console.log(`Usage: node scripts/publish-playback-matrix-summary.js --aggregate-md --status-md --validation-json [--target ] + +Prepends a matrix summary section into PLAYBACK-BENCHMARKS.md benchmark history region.`); +} + +function ensureFile(filePath, label) { + if (!filePath || !fs.existsSync(filePath)) { + throw new Error(`${label} file not found: ${filePath ?? "undefined"}`); + } +} + +function buildSummarySection(aggregateMd, statusMd, validationJson) { + const now = new Date().toISOString(); + const validation = JSON.parse(validationJson); + const status = validation.passed ? "✅ MATRIX PASS" : "❌ MATRIX FAIL"; + + let markdown = ""; + markdown += `### Matrix Summary Run: ${now}\n\n`; + markdown += `**Validation:** ${status}\n\n`; + markdown += `- Validated cells: ${validation.validatedCells}\n`; + markdown += `- Observed cells: ${validation.observedCells}\n`; + markdown += `- Missing cells: ${validation.missingCells?.length ?? 0}\n`; + markdown += `- Format failures: ${validation.formatFailures?.length ?? 0}\n\n`; + + if ((validation.missingCells?.length ?? 0) > 0) { + markdown += "**Missing Cells**\n"; + for (const cell of validation.missingCells) { + markdown += `- ${cell.platform}:${cell.gpu}:${cell.scenario}\n`; + } + markdown += "\n"; + } + + if ((validation.formatFailures?.length ?? 0) > 0) { + markdown += "**Format Failures**\n"; + for (const failure of validation.formatFailures) { + markdown += `- ${failure.platform}:${failure.gpu}:${failure.scenario} missing ${failure.requiredFormat}\n`; + } + markdown += "\n"; + } + + markdown += "
\nMatrix Status Report\n\n"; + markdown += `${statusMd.trim()}\n\n`; + markdown += "
\n\n"; + + markdown += "
\nAggregate Benchmark Report\n\n"; + markdown += `${aggregateMd.trim()}\n\n`; + markdown += "
\n\n"; + + return markdown; +} + +function writeToBenchmarkHistory(targetFile, summaryMd) { + const markerStart = ""; + const markerEnd = ""; + const current = fs.readFileSync(targetFile, "utf8"); + const start = current.indexOf(markerStart); + const end = current.indexOf(markerEnd); + if (start === -1 || end === -1 || start >= end) { + throw new Error(`Could not find benchmark result markers in ${targetFile}`); + } + + const insertPos = start + markerStart.length; + const updated = + current.slice(0, insertPos) + + "\n\n" + + summaryMd + + current.slice(end); + fs.writeFileSync(targetFile, updated, "utf8"); +} + +function main() { + const options = parseArgs(process.argv); + if (options.help) { + usage(); + return; + } + + ensureFile(options.aggregateMd, "Aggregate markdown"); + ensureFile(options.statusMd, "Status markdown"); + ensureFile(options.validationJson, "Validation JSON"); + ensureFile(options.target, "Target"); + + const aggregateMd = fs.readFileSync(options.aggregateMd, "utf8"); + const statusMd = fs.readFileSync(options.statusMd, "utf8"); + const validationJson = fs.readFileSync(options.validationJson, "utf8"); + const summaryMd = buildSummarySection(aggregateMd, statusMd, validationJson); + writeToBenchmarkHistory(options.target, summaryMd); + console.log(`Published matrix summary into ${options.target}`); +} + +try { + main(); +} catch (error) { + console.error(error instanceof Error ? error.message : String(error)); + process.exit(1); +} From ed405185fb9877eece8605a7c3d904310fbb2a91 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:32:55 +0000 Subject: [PATCH 194/333] improve: tune frame wait and add startup threshold checks --- crates/editor/PLAYBACK-BENCHMARKS.md | 8 ++-- crates/editor/PLAYBACK-FINDINGS.md | 6 +++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 3 +- crates/editor/src/playback.rs | 15 +++--- .../examples/playback-test-runner.rs | 48 ++++++++++++++++--- scripts/run-playback-benchmark-matrix.js | 14 +++++- 6 files changed, 75 insertions(+), 19 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index b30b5dce56..aaef9c71fc 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -10,6 +10,7 @@ This document tracks performance benchmarks for Cap's playback and decoding syst |--------|--------|-----------| | Decoder Init | <200ms | - | | Decode Latency (p95) | <50ms | - | +| Startup to First Frame | <250ms | configurable | | Effective FPS | ≥30 fps | ±2 fps | | Decode Jitter | <10ms | - | | Scrub Seek Latency (p95) | <40ms | - | @@ -42,6 +43,7 @@ This document tracks performance benchmarks for Cap's playback and decoding syst ```bash # Run full playback validation on recordings from real-device-test-runner cargo run -p cap-recording --example playback-test-runner -- full +cargo run -p cap-recording --example playback-test-runner -- full --startup-threshold-ms 250 # Run specific test categories cargo run -p cap-recording --example playback-test-runner -- decoder @@ -118,9 +120,9 @@ cargo run -p cap-recording --example playback-test-runner -- scrub --fps 60 --be Automated helper for machine runs: ```bash -node scripts/run-playback-benchmark-matrix.js --platform macos-13 --gpu apple-silicon --output-dir /tmp/playback-matrix --fps 60 --input-dir /tmp/cap-real-device-tests -node scripts/run-playback-benchmark-matrix.js --platform windows-11 --gpu nvidia-discrete --output-dir /tmp/playback-matrix-windows-nvidia --fps 60 --require-formats mp4,fragmented --input-dir /tmp/cap-real-device-tests -node scripts/run-playback-benchmark-matrix.js --platform windows-11 --gpu integrated --output-dir /tmp/playback-matrix-windows-integrated --fps 60 --scenarios scrub --input-dir /tmp/cap-real-device-tests +node scripts/run-playback-benchmark-matrix.js --platform macos-13 --gpu apple-silicon --output-dir /tmp/playback-matrix --fps 60 --startup-threshold-ms 250 --input-dir /tmp/cap-real-device-tests +node scripts/run-playback-benchmark-matrix.js --platform windows-11 --gpu nvidia-discrete --output-dir /tmp/playback-matrix-windows-nvidia --fps 60 --startup-threshold-ms 250 --require-formats mp4,fragmented --input-dir /tmp/cap-real-device-tests +node scripts/run-playback-benchmark-matrix.js --platform windows-11 --gpu integrated --output-dir /tmp/playback-matrix-windows-integrated --fps 60 --startup-threshold-ms 250 --scenarios scrub --input-dir /tmp/cap-real-device-tests ``` | Platform | GPU Class | MP4 Full | Fragmented Full | MP4 Scrub | Fragmented Scrub | Notes | diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 9aefaf01e9..3c4dae83dc 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -171,10 +171,15 @@ cargo run -p cap-recording --example playback-test-runner -- full - Only the latest pending seek frame is sent while an async seek is in-flight. - Duplicate same-frame seeks are dropped in both frontend dispatch and playback seek signaling. +8. **Playback frame wait timeout now scales with target FPS (2026-02-13)** + - Replaced fixed 200ms frame fetch waits with FPS-derived bounded timeout. + - Reduces long stall windows on 60fps playback and improves real-time catch-up behavior. + 8. **Playback benchmark runner now supports JSON evidence export (2026-02-13)** - `playback-test-runner` supports `--json-output` for structured report emission. - JSON output includes command metadata, system info, summary, and per-recording test detail. - Command metadata now includes input scope and output flags for reproducibility. + - Startup-to-first-frame threshold is configurable with `--startup-threshold-ms` and tracked as pass/fail signal. 9. **Added JSON aggregate utility for cross-platform benchmark collation (2026-02-13)** - `scripts/aggregate-playback-benchmarks.js` builds a markdown table from multiple JSON outputs. @@ -185,6 +190,7 @@ cargo run -p cap-recording --example playback-test-runner -- full - Automatically generates aggregate markdown for each machine run directory. - Performs per-machine post-run validation for required scenarios and optional format requirements. - Supports scenario subset reruns via `--scenarios` for faster targeted validation. + - Supports startup threshold tuning via `--startup-threshold-ms`. 11. **Added matrix completeness validator (2026-02-13)** - `scripts/validate-playback-matrix.js` validates required platform/gpu/scenario cells. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 8399e401ea..4997f57f64 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -43,6 +43,7 @@ node scripts/run-playback-benchmark-matrix.js \ --gpu "" \ --output-dir "$OUT_DIR" \ --fps 60 \ + --startup-threshold-ms 250 \ --require-formats mp4,fragmented \ --scenarios full,scrub \ --input-dir "$INPUT_DIR" @@ -51,7 +52,7 @@ node scripts/run-playback-benchmark-matrix.js \ Equivalent shortcut: ```bash -pnpm bench:playback:matrix -- --platform "" --gpu "" --output-dir "$OUT_DIR" --fps 60 --require-formats mp4,fragmented --scenarios full,scrub --input-dir "$INPUT_DIR" +pnpm bench:playback:matrix -- --platform "" --gpu "" --output-dir "$OUT_DIR" --fps 60 --startup-threshold-ms 250 --require-formats mp4,fragmented --scenarios full,scrub --input-dir "$INPUT_DIR" ``` Rerun only scrub scenario for a machine: diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 3b2c72411f..dacede787f 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -384,6 +384,10 @@ impl Playback { .spawn(); let frame_duration = Duration::from_secs_f64(1.0 / fps_f64); + let frame_fetch_timeout = frame_duration + .mul_f64(4.0) + .max(Duration::from_millis(20)) + .min(Duration::from_millis(80)); let mut frame_number = self.start_frame_number; let mut prefetch_buffer: VecDeque = VecDeque::with_capacity(PREFETCH_BUFFER_SIZE); @@ -553,7 +557,7 @@ impl Playback { if is_in_flight { let wait_start = Instant::now(); - let max_wait = Duration::from_millis(200); + let max_wait = frame_fetch_timeout; let mut found_frame = None; while wait_start.elapsed() < max_wait { @@ -603,11 +607,8 @@ impl Playback { } else if prefetch_buffer.is_empty() && total_frames_rendered < 15 { let _ = frame_request_tx.send(frame_number); - let wait_result = tokio::time::timeout( - Duration::from_millis(200), - prefetch_rx.recv(), - ) - .await; + let wait_result = + tokio::time::timeout(frame_fetch_timeout, prefetch_rx.recv()).await; if let Ok(Some(prefetched)) = wait_result { if prefetched.frame_number == frame_number { @@ -651,7 +652,7 @@ impl Playback { guard.insert(frame_number); } - let max_wait = Duration::from_millis(200); + let max_wait = frame_fetch_timeout; let data = tokio::select! { _ = stop_rx.changed() => { if let Ok(mut guard) = main_in_flight.write() { diff --git a/crates/recording/examples/playback-test-runner.rs b/crates/recording/examples/playback-test-runner.rs index b930a2cb41..16865ae654 100644 --- a/crates/recording/examples/playback-test-runner.rs +++ b/crates/recording/examples/playback-test-runner.rs @@ -39,6 +39,9 @@ struct Cli { #[arg(long, global = true, default_value = "30")] fps: u32, + #[arg(long, global = true, default_value_t = STARTUP_TO_FIRST_FRAME_WARNING_MS)] + startup_threshold_ms: f64, + #[arg(long, global = true)] verbose: bool, @@ -66,6 +69,7 @@ enum Commands { const FPS_TOLERANCE: f64 = 2.0; const DECODE_LATENCY_WARNING_MS: f64 = 50.0; const SCRUB_SEEK_WARNING_MS: f64 = 40.0; +const STARTUP_TO_FIRST_FRAME_WARNING_MS: f64 = 250.0; const AUDIO_VIDEO_SYNC_TOLERANCE_MS: f64 = 100.0; const CAMERA_SYNC_TOLERANCE_MS: f64 = 100.0; @@ -101,6 +105,8 @@ struct PlaybackTestResult { fps_ok: bool, jitter_ms: f64, decode_latency_ok: bool, + startup_latency_ok: bool, + startup_threshold_ms: f64, errors: Vec, } @@ -243,6 +249,12 @@ impl RecordingTestReport { if !result.decode_latency_ok { println!(" WARN: Decode latency exceeds {DECODE_LATENCY_WARNING_MS}ms!"); } + if !result.startup_latency_ok { + println!( + " WARN: Startup-to-first-frame exceeds {:.1}ms!", + result.startup_threshold_ms + ); + } for err in &result.errors { println!(" ERROR: {err}"); } @@ -390,12 +402,14 @@ async fn test_playback( meta: &StudioRecordingMeta, segment_index: usize, fps: u32, + startup_threshold_ms: f64, verbose: bool, ) -> PlaybackTestResult { let playback_start = Instant::now(); let mut result = PlaybackTestResult { segment_index, expected_fps: fps as f64, + startup_threshold_ms, ..Default::default() }; @@ -496,9 +510,11 @@ async fn test_playback( result.fps_ok = (result.effective_fps - result.expected_fps).abs() <= FPS_TOLERANCE || result.effective_fps >= result.expected_fps; result.decode_latency_ok = result.p95_decode_time_ms <= DECODE_LATENCY_WARNING_MS; + result.startup_latency_ok = result.startup_to_first_frame_ms <= startup_threshold_ms; result.passed = result.fps_ok && result.decode_latency_ok + && result.startup_latency_ok && result.failed_frames == 0 && result.decoded_frames > 0; @@ -880,6 +896,7 @@ fn discover_recordings(input_dir: &Path) -> Vec { async fn run_tests_on_recording( recording_path: &Path, fps: u32, + startup_threshold_ms: f64, run_decoder: bool, run_playback: bool, run_scrub: bool, @@ -969,8 +986,15 @@ async fn run_tests_on_recording( if verbose { println!(" Testing playback for segment {segment_idx}..."); } - let playback_result = - test_playback(&meta, studio_meta.as_ref(), segment_idx, fps, verbose).await; + let playback_result = test_playback( + &meta, + studio_meta.as_ref(), + segment_idx, + fps, + startup_threshold_ms, + verbose, + ) + .await; report.playback_results.push(playback_result); } @@ -1082,6 +1106,13 @@ fn get_failure_tags(report: &RecordingTestReport) -> Vec { if report.playback_results.iter().any(|r| !r.decode_latency_ok) { tags.push("LATENCY".to_string()); } + if report + .playback_results + .iter() + .any(|r| !r.startup_latency_ok) + { + tags.push("STARTUP".to_string()); + } if report.scrub_results.iter().any(|r| !r.seek_latency_ok) { tags.push("SCRUB_LATENCY".to_string()); } @@ -1191,7 +1222,7 @@ fn report_to_markdown(report: &RecordingTestReport) -> String { )); md.push_str(&format!( "| ↳ Startup | {} | first_decode={:.1}ms startup_to_first={:.1}ms |\n", - if result.startup_to_first_frame_ms > 0.0 { + if result.startup_latency_ok { "✅" } else { "❌" @@ -1505,9 +1536,10 @@ fn shell_quote(value: &str) -> String { fn build_command_string(cli: &Cli) -> String { let mut command = format!( - "cargo run -p cap-recording --example playback-test-runner -- {} --fps {}", + "cargo run -p cap-recording --example playback-test-runner -- {} --fps {} --startup-threshold-ms {:.1}", command_name(cli.command.as_ref()), - cli.fps + cli.fps, + cli.startup_threshold_ms ); if let Some(path) = &cli.recording_path { @@ -1598,9 +1630,10 @@ async fn main() -> anyhow::Result<()> { println!("\nCap Playback Test Runner"); println!("{}", "=".repeat(40)); println!( - "Testing {} recording(s) at {} FPS", + "Testing {} recording(s) at {} FPS (startup threshold: {:.1}ms)", recordings.len(), - cli.fps + cli.fps, + cli.startup_threshold_ms ); println!(); @@ -1612,6 +1645,7 @@ async fn main() -> anyhow::Result<()> { match run_tests_on_recording( recording_path, cli.fps, + cli.startup_threshold_ms, run_decoder, run_playback, run_scrub, diff --git a/scripts/run-playback-benchmark-matrix.js b/scripts/run-playback-benchmark-matrix.js index 89f0a26e90..d49fec6024 100644 --- a/scripts/run-playback-benchmark-matrix.js +++ b/scripts/run-playback-benchmark-matrix.js @@ -10,6 +10,7 @@ function parseArgs(argv) { gpu: null, outputDir: null, fps: 60, + startupThresholdMs: 250, recordingPath: null, inputDir: null, validate: true, @@ -46,6 +47,14 @@ function parseArgs(argv) { options.fps = value; continue; } + if (arg === "--startup-threshold-ms") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value <= 0) { + throw new Error("Invalid --startup-threshold-ms value"); + } + options.startupThresholdMs = value; + continue; + } if (arg === "--recording-path") { options.recordingPath = argv[++i] ?? null; continue; @@ -85,7 +94,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/run-playback-benchmark-matrix.js --platform --gpu --output-dir [--fps 60] [--recording-path ] [--input-dir ] [--require-formats mp4,fragmented] [--scenarios full,scrub] + console.log(`Usage: node scripts/run-playback-benchmark-matrix.js --platform --gpu --output-dir [--fps 60] [--startup-threshold-ms 250] [--recording-path ] [--input-dir ] [--require-formats mp4,fragmented] [--scenarios full,scrub] Runs playback benchmark matrix scenarios and writes JSON outputs. @@ -96,6 +105,7 @@ Required: Optional: --fps FPS for benchmark runs (default: 60) + --startup-threshold-ms Startup-to-first-frame threshold in ms (default: 250) --recording-path Specific recording path --input-dir Recording discovery directory --require-formats Required formats for local validation (comma-separated) @@ -134,6 +144,8 @@ function scenarioArgs(options, scenario) { scenario, "--fps", String(options.fps), + "--startup-threshold-ms", + String(options.startupThresholdMs), "--json-output", jsonOutput, "--notes", From afa9ea57a0938c32b5b3802e2df1292458ff045f Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:34:55 +0000 Subject: [PATCH 195/333] improve: add matrix bottleneck analysis for fps optimization --- crates/editor/PLAYBACK-BENCHMARKS.md | 3 + crates/editor/PLAYBACK-FINDINGS.md | 6 + crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 6 + package.json | 1 + .../analyze-playback-matrix-bottlenecks.js | 249 ++++++++++++++++++ 5 files changed, 265 insertions(+) create mode 100644 scripts/analyze-playback-matrix-bottlenecks.js diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index aaef9c71fc..fde0860b97 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -80,6 +80,9 @@ node scripts/finalize-playback-matrix.js --input /path/to/json-results --output- # Publish matrix artifacts into this benchmark history node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json + +# Analyze bottlenecks from matrix results +node scripts/analyze-playback-matrix-bottlenecks.js --input /path/to/json-results --output /tmp/playback-bottlenecks.md --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 3c4dae83dc..b535a28b50 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -210,6 +210,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - `scripts/publish-playback-matrix-summary.js` injects finalized matrix artifacts into playback benchmark history. - Keeps matrix evidence updates consistent and repeatable. +15. **Added bottleneck analyzer for continuous FPS optimization (2026-02-13)** + - `scripts/analyze-playback-matrix-bottlenecks.js` ranks matrix cells by FPS, startup, and scrub threshold breaches. + - Produces prioritized optimization backlog from real matrix evidence. + --- ## Root Cause Analysis Archive @@ -313,6 +317,7 @@ Decoder Pipeline: 16. Added matrix status report generator for concise artifact summaries. 17. Added one-shot finalization script for aggregate + status + validation outputs. 18. Added benchmark history publisher script for finalized matrix artifacts. +19. Added matrix bottleneck analysis script for prioritized FPS optimization follow-up. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -332,6 +337,7 @@ Decoder Pipeline: - `scripts/build-playback-matrix-report.js`: added concise matrix status report generation from JSON benchmark outputs. - `scripts/finalize-playback-matrix.js`: added one-shot matrix artifact finalization workflow. - `scripts/publish-playback-matrix-summary.js`: added matrix artifact publisher into PLAYBACK-BENCHMARKS history region. +- `scripts/analyze-playback-matrix-bottlenecks.js`: added prioritized bottleneck analysis output from matrix JSON evidence. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 4997f57f64..46ff772df8 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -122,6 +122,12 @@ pnpm bench:playback:publish -- \ --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json ``` +Generate bottleneck analysis for optimization backlog: + +```bash +pnpm bench:playback:analyze -- --input /path/to/all-machine-results --output /tmp/playback-matrix-final/playback-bottlenecks.md --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 +``` + ## Evidence checklist 1. Confirm all matrix rows exist. diff --git a/package.json b/package.json index 51abc7ce0a..93f760e48f 100644 --- a/package.json +++ b/package.json @@ -47,6 +47,7 @@ "bench:playback:report": "node scripts/build-playback-matrix-report.js", "bench:playback:finalize": "node scripts/finalize-playback-matrix.js", "bench:playback:publish": "node scripts/publish-playback-matrix-summary.js", + "bench:playback:analyze": "node scripts/analyze-playback-matrix-bottlenecks.js", "clean": "find . -name node_modules -o -name .next -o -name .output -o -name .turbo -o -name dist -type d -prune | xargs rm -rf", "lgtm-otel": "docker run -p 3010:3000 -p 4317:4317 -p 4318:4318 --rm -it docker.io/grafana/otel-lgtm", "with-env": "dotenv -e .env --" diff --git a/scripts/analyze-playback-matrix-bottlenecks.js b/scripts/analyze-playback-matrix-bottlenecks.js new file mode 100644 index 0000000000..74ca0b3294 --- /dev/null +++ b/scripts/analyze-playback-matrix-bottlenecks.js @@ -0,0 +1,249 @@ +#!/usr/bin/env node + +import fs from "node:fs"; +import path from "node:path"; + +function parseArgs(argv) { + const options = { + inputs: [], + output: null, + targetFps: 60, + maxScrubP95Ms: 40, + maxStartupMs: 250, + }; + + for (let i = 2; i < argv.length; i++) { + const arg = argv[i]; + if (arg === "--") continue; + if (arg === "--help" || arg === "-h") { + options.help = true; + continue; + } + if (arg === "--input" || arg === "-i") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --input"); + options.inputs.push(path.resolve(value)); + continue; + } + if (arg === "--output" || arg === "-o") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --output"); + options.output = path.resolve(value); + continue; + } + if (arg === "--target-fps") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value <= 0) { + throw new Error("Invalid --target-fps value"); + } + options.targetFps = value; + continue; + } + if (arg === "--max-scrub-p95-ms") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value <= 0) { + throw new Error("Invalid --max-scrub-p95-ms value"); + } + options.maxScrubP95Ms = value; + continue; + } + if (arg === "--max-startup-ms") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value <= 0) { + throw new Error("Invalid --max-startup-ms value"); + } + options.maxStartupMs = value; + continue; + } + throw new Error(`Unknown argument: ${arg}`); + } + + return options; +} + +function usage() { + console.log(`Usage: node scripts/analyze-playback-matrix-bottlenecks.js --input [--input ...] [--output ] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] + +Analyzes playback matrix JSON outputs and highlights prioritized bottlenecks.`); +} + +function collectJsonFiles(targetPath) { + if (!fs.existsSync(targetPath)) { + throw new Error(`Input path does not exist: ${targetPath}`); + } + const stats = fs.statSync(targetPath); + if (stats.isFile()) { + return targetPath.endsWith(".json") ? [targetPath] : []; + } + const files = []; + for (const entry of fs.readdirSync(targetPath, { withFileTypes: true })) { + const fullPath = path.join(targetPath, entry.name); + if (entry.isDirectory()) { + files.push(...collectJsonFiles(fullPath)); + } else if (entry.isFile() && entry.name.endsWith(".json")) { + files.push(fullPath); + } + } + return files; +} + +function parseNotes(notes) { + if (!notes) return {}; + const result = {}; + for (const token of notes.split(/\s+/)) { + if (!token.includes("=")) continue; + const [key, ...rest] = token.split("="); + const value = rest.join("="); + if (!key || !value) continue; + result[key.trim()] = value.trim(); + } + return result; +} + +function average(values) { + if (values.length === 0) return null; + return values.reduce((acc, value) => acc + value, 0) / values.length; +} + +function max(values) { + if (values.length === 0) return null; + return Math.max(...values); +} + +function scoreIssue(issue, options) { + let score = 0; + if (issue.fpsMin !== null && issue.fpsMin < options.targetFps) { + score += (options.targetFps - issue.fpsMin) * 5; + } + if (issue.scrubP95 !== null && issue.scrubP95 > options.maxScrubP95Ms) { + score += issue.scrubP95 - options.maxScrubP95Ms; + } + if (issue.startupAvg !== null && issue.startupAvg > options.maxStartupMs) { + score += (issue.startupAvg - options.maxStartupMs) / 2; + } + return score; +} + +function formatValue(value, digits = 1) { + return value === null ? "n/a" : value.toFixed(digits); +} + +function collectIssues(files, options) { + const issues = []; + + for (const filePath of files) { + const parsed = JSON.parse(fs.readFileSync(filePath, "utf8")); + const notes = parseNotes(parsed.notes); + const platform = notes.platform ?? "unknown"; + const gpu = notes.gpu ?? "unknown"; + const scenario = notes.scenario ?? "unspecified"; + const reports = Array.isArray(parsed.reports) ? parsed.reports : []; + + for (const report of reports) { + const playback = Array.isArray(report.playback_results) + ? report.playback_results + : []; + const scrub = Array.isArray(report.scrub_results) ? report.scrub_results : []; + + const fpsValues = playback + .map((entry) => entry.effective_fps) + .filter((entry) => typeof entry === "number"); + const startupValues = playback + .map((entry) => entry.startup_to_first_frame_ms) + .filter((entry) => typeof entry === "number"); + const scrubP95Values = scrub + .map((entry) => entry.p95_seek_time_ms) + .filter((entry) => typeof entry === "number"); + + const issue = { + platform, + gpu, + scenario, + recording: report.recording_name ?? path.basename(filePath), + format: report.is_fragmented ? "fragmented" : "mp4", + fpsMin: fpsValues.length ? Math.min(...fpsValues) : null, + startupAvg: average(startupValues), + scrubP95: max(scrubP95Values), + filePath, + }; + issue.score = scoreIssue(issue, options); + if (issue.score > 0) { + issues.push(issue); + } + } + } + + issues.sort((a, b) => b.score - a.score); + return issues; +} + +function recommendation(issue, options) { + const recommendations = []; + if (issue.fpsMin !== null && issue.fpsMin < options.targetFps) { + recommendations.push("inspect decode/render path and frame wait behavior"); + } + if (issue.scrubP95 !== null && issue.scrubP95 > options.maxScrubP95Ms) { + recommendations.push("investigate seek dispatch pressure and decoder reposition cost"); + } + if (issue.startupAvg !== null && issue.startupAvg > options.maxStartupMs) { + recommendations.push("optimize startup warmup and first-frame path"); + } + return recommendations.join("; "); +} + +function buildMarkdown(issues, options) { + let md = ""; + md += "# Playback Matrix Bottleneck Analysis\n\n"; + md += `Generated: ${new Date().toISOString()}\n\n`; + md += `Thresholds: target_fps=${options.targetFps}, max_scrub_p95_ms=${options.maxScrubP95Ms}, max_startup_ms=${options.maxStartupMs}\n\n`; + + if (issues.length === 0) { + md += "No bottlenecks detected for configured thresholds.\n"; + return md; + } + + md += "| Rank | Platform | GPU | Scenario | Recording | Format | FPS(min) | Startup avg(ms) | Scrub p95(ms) | Score | Recommendation |\n"; + md += "|---:|---|---|---|---|---|---:|---:|---:|---:|---|\n"; + issues.forEach((issue, index) => { + md += `| ${index + 1} | ${issue.platform} | ${issue.gpu} | ${issue.scenario} | ${issue.recording} | ${issue.format} | ${formatValue(issue.fpsMin)} | ${formatValue(issue.startupAvg)} | ${formatValue(issue.scrubP95)} | ${formatValue(issue.score, 2)} | ${recommendation(issue, options)} |\n`; + }); + md += "\n"; + return md; +} + +function main() { + const options = parseArgs(process.argv); + if (options.help) { + usage(); + return; + } + if (options.inputs.length === 0) { + throw new Error("At least one --input is required"); + } + + const files = new Set(); + for (const input of options.inputs) { + for (const filePath of collectJsonFiles(input)) { + files.add(filePath); + } + } + if (files.size === 0) { + throw new Error("No JSON files found"); + } + + const issues = collectIssues([...files], options); + const markdown = buildMarkdown(issues, options); + if (options.output) { + fs.writeFileSync(options.output, markdown, "utf8"); + console.log(`Wrote bottleneck analysis to ${options.output}`); + } else { + process.stdout.write(markdown); + } +} + +try { + main(); +} catch (error) { + console.error(error instanceof Error ? error.message : String(error)); + process.exit(1); +} From d83dc8675a1ae2ebb2ee49db1a4218fbfa2cb807 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:36:03 +0000 Subject: [PATCH 196/333] improve: scale playback frame polling with frame budget --- crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/src/playback.rs | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index b535a28b50..b2dbfb1e86 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -174,6 +174,7 @@ cargo run -p cap-recording --example playback-test-runner -- full 8. **Playback frame wait timeout now scales with target FPS (2026-02-13)** - Replaced fixed 200ms frame fetch waits with FPS-derived bounded timeout. - Reduces long stall windows on 60fps playback and improves real-time catch-up behavior. + - In-flight polling interval now scales with frame budget instead of fixed 5ms. 8. **Playback benchmark runner now supports JSON evidence export (2026-02-13)** - `playback-test-runner` supports `--json-output` for structured report emission. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index dacede787f..626b942f7b 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -388,6 +388,10 @@ impl Playback { .mul_f64(4.0) .max(Duration::from_millis(20)) .min(Duration::from_millis(80)); + let in_flight_poll_interval = frame_duration + .mul_f64(0.25) + .max(Duration::from_millis(1)) + .min(Duration::from_millis(4)); let mut frame_number = self.start_frame_number; let mut prefetch_buffer: VecDeque = VecDeque::with_capacity(PREFETCH_BUFFER_SIZE); @@ -571,7 +575,7 @@ impl Playback { prefetch_buffer.push_back(prefetched); } } - _ = tokio::time::sleep(Duration::from_millis(5)) => { + _ = tokio::time::sleep(in_flight_poll_interval) => { let still_in_flight = main_in_flight .read() .map(|guard| guard.contains(&frame_number)) From 79dd6d6c323e1602ba5b5fd43bf08012a21669fb Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:37:25 +0000 Subject: [PATCH 197/333] improve: include bottleneck analysis in matrix finalization --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 6 +++ scripts/finalize-playback-matrix.js | 57 +++++++++++++++++++++++- 4 files changed, 63 insertions(+), 2 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index fde0860b97..6fd60faced 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -77,6 +77,7 @@ node scripts/validate-playback-matrix.js --input /path/to/json-results --require # Finalize aggregate + status + validation artifacts node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented +node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 # Publish matrix artifacts into this benchmark history node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index b2dbfb1e86..7e91671450 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -206,6 +206,7 @@ cargo run -p cap-recording --example playback-test-runner -- full 13. **Added matrix finalization helper (2026-02-13)** - `scripts/finalize-playback-matrix.js` generates aggregate markdown, status markdown, and validation JSON in one command. - Supports optional required format enforcement during finalization. + - Also emits bottleneck analysis markdown using configurable FPS/scrub/startup thresholds. 14. **Added matrix summary publisher (2026-02-13)** - `scripts/publish-playback-matrix-summary.js` injects finalized matrix artifacts into playback benchmark history. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 46ff772df8..8c0569be65 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -113,6 +113,12 @@ One-shot finalize command: pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented ``` +Include optimization thresholds when finalizing: + +```bash +pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 +``` + Publish finalized artifacts into benchmark history: ```bash diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index 886f2fc52b..9f281de9c4 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -9,6 +9,10 @@ function parseArgs(argv) { inputs: [], outputDir: null, requireFormats: [], + targetFps: 60, + maxScrubP95Ms: 40, + maxStartupMs: 250, + analyze: true, }; for (let i = 2; i < argv.length; i++) { @@ -38,6 +42,34 @@ function parseArgs(argv) { .filter(Boolean); continue; } + if (arg === "--target-fps") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value <= 0) { + throw new Error("Invalid --target-fps value"); + } + options.targetFps = value; + continue; + } + if (arg === "--max-scrub-p95-ms") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value <= 0) { + throw new Error("Invalid --max-scrub-p95-ms value"); + } + options.maxScrubP95Ms = value; + continue; + } + if (arg === "--max-startup-ms") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value <= 0) { + throw new Error("Invalid --max-startup-ms value"); + } + options.maxStartupMs = value; + continue; + } + if (arg === "--skip-analyze") { + options.analyze = false; + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -45,9 +77,9 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--require-formats mp4,fragmented] + console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] -Generates aggregate markdown, status markdown, and validation JSON for collected playback matrix outputs.`); +Generates aggregate markdown, status markdown, validation JSON, and bottleneck analysis for collected playback matrix outputs.`); } function run(command, args) { @@ -76,6 +108,7 @@ function main() { const aggregatePath = path.join(options.outputDir, "playback-benchmark-aggregate.md"); const statusPath = path.join(options.outputDir, "playback-matrix-status.md"); const validationPath = path.join(options.outputDir, "playback-matrix-validation.json"); + const bottleneckPath = path.join(options.outputDir, "playback-bottlenecks.md"); const aggregateArgs = ["scripts/aggregate-playback-benchmarks.js"]; const statusArgs = ["scripts/build-playback-matrix-report.js"]; @@ -100,10 +133,30 @@ function main() { run("node", aggregateArgs); run("node", statusArgs); run("node", validateArgs); + if (options.analyze) { + const analyzeArgs = ["scripts/analyze-playback-matrix-bottlenecks.js"]; + for (const input of options.inputs) { + analyzeArgs.push("--input", input); + } + analyzeArgs.push( + "--output", + bottleneckPath, + "--target-fps", + String(options.targetFps), + "--max-scrub-p95-ms", + String(options.maxScrubP95Ms), + "--max-startup-ms", + String(options.maxStartupMs), + ); + run("node", analyzeArgs); + } console.log(`Aggregate markdown: ${aggregatePath}`); console.log(`Status markdown: ${statusPath}`); console.log(`Validation JSON: ${validationPath}`); + if (options.analyze) { + console.log(`Bottleneck analysis: ${bottleneckPath}`); + } } try { From a5ef5aa27579dc0bd3ce4c796b54d017fd0e7e91 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:41:14 +0000 Subject: [PATCH 198/333] improve: adapt playback catch-up skipping to sustained lag --- crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/src/playback.rs | 42 ++++++++++++++++++++++++------ 2 files changed, 35 insertions(+), 8 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 7e91671450..a12b2bae9e 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -175,6 +175,7 @@ cargo run -p cap-recording --example playback-test-runner -- full - Replaced fixed 200ms frame fetch waits with FPS-derived bounded timeout. - Reduces long stall windows on 60fps playback and improves real-time catch-up behavior. - In-flight polling interval now scales with frame budget instead of fixed 5ms. + - Catch-up skip threshold now adapts with late streak depth and logs skip event telemetry. 8. **Playback benchmark runner now supports JSON evidence export (2026-02-13)** - `playback-test-runner` supports `--json-output` for structured report emission. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 626b942f7b..b15e80603a 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -396,10 +396,12 @@ impl Playback { let mut prefetch_buffer: VecDeque = VecDeque::with_capacity(PREFETCH_BUFFER_SIZE); let mut frame_cache = FrameCache::new(FRAME_CACHE_SIZE); - let aggressive_skip_threshold = 10u32; + let base_skip_threshold = (fps / 6).clamp(6, 16); + let mut late_streak = 0u32; + let mut skip_events = 0u64; let mut total_frames_rendered = 0u64; - let mut _total_frames_skipped = 0u64; + let mut total_frames_skipped = 0u64; let mut first_render_logged = false; let mut pending_seek_observation: Option<(u32, Instant)> = None; @@ -604,7 +606,7 @@ impl Playback { )) } else { frame_number = frame_number.saturating_add(1); - _total_frames_skipped += 1; + total_frames_skipped += 1; continue; } } @@ -623,12 +625,12 @@ impl Playback { } else { prefetch_buffer.push_back(prefetched); frame_number = frame_number.saturating_add(1); - _total_frames_skipped += 1; + total_frames_skipped += 1; continue; } } else { frame_number = frame_number.saturating_add(1); - _total_frames_skipped += 1; + total_frames_skipped += 1; continue; } } else { @@ -669,7 +671,7 @@ impl Playback { guard.remove(&frame_number); } frame_number = frame_number.saturating_add(1); - _total_frames_skipped += 1; + total_frames_skipped += 1; continue; }, data = segment_media @@ -775,15 +777,20 @@ impl Playback { if frame_number < expected_frame { let frames_behind = expected_frame - frame_number; + late_streak = late_streak.saturating_add(1); + let threshold_reduction = (late_streak / 12).min(base_skip_threshold); + let dynamic_skip_threshold = + base_skip_threshold.saturating_sub(threshold_reduction); - if frames_behind <= aggressive_skip_threshold { + if frames_behind <= dynamic_skip_threshold { continue; } let skipped = frames_behind.saturating_sub(1); if skipped > 0 { frame_number += skipped; - _total_frames_skipped += skipped as u64; + total_frames_skipped += skipped as u64; + skip_events = skip_events.saturating_add(1); prefetch_buffer.retain(|p| p.frame_number >= frame_number); let _ = frame_request_tx.send(frame_number); @@ -795,10 +802,29 @@ impl Playback { { break 'playback; } + + if skipped >= fps.saturating_div(2) || skip_events % 120 == 0 { + info!( + skipped_frames = skipped, + frames_behind, + dynamic_skip_threshold, + late_streak, + total_frames_skipped, + skip_events, + "Playback applied frame skip catch-up" + ); + } } + } else { + late_streak = 0; } } + info!( + total_frames_rendered, + total_frames_skipped, skip_events, "Playback loop completed" + ); + stop_tx.send(true).ok(); event_tx.send(PlaybackEvent::Stop).ok(); From 34f45b5c5d82234b38c1ab4ec3f1cc1868f75e23 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:42:39 +0000 Subject: [PATCH 199/333] improve: allow bottleneck attachment in matrix summary publishing --- crates/editor/PLAYBACK-BENCHMARKS.md | 2 +- crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 3 ++- scripts/publish-playback-matrix-summary.js | 28 +++++++++++++++++++--- 4 files changed, 29 insertions(+), 5 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 6fd60faced..9a0067d5a6 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -80,7 +80,7 @@ node scripts/finalize-playback-matrix.js --input /path/to/json-results --output- node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 # Publish matrix artifacts into this benchmark history -node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json +node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --bottlenecks-md /tmp/playback-matrix-final/playback-bottlenecks.md # Analyze bottlenecks from matrix results node scripts/analyze-playback-matrix-bottlenecks.js --input /path/to/json-results --output /tmp/playback-bottlenecks.md --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index a12b2bae9e..eb23355cc1 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -212,6 +212,7 @@ cargo run -p cap-recording --example playback-test-runner -- full 14. **Added matrix summary publisher (2026-02-13)** - `scripts/publish-playback-matrix-summary.js` injects finalized matrix artifacts into playback benchmark history. - Keeps matrix evidence updates consistent and repeatable. + - Supports optional bottleneck analysis attachment in published summary. 15. **Added bottleneck analyzer for continuous FPS optimization (2026-02-13)** - `scripts/analyze-playback-matrix-bottlenecks.js` ranks matrix cells by FPS, startup, and scrub threshold breaches. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 8c0569be65..4daf71ab12 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -125,7 +125,8 @@ Publish finalized artifacts into benchmark history: pnpm bench:playback:publish -- \ --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md \ --status-md /tmp/playback-matrix-final/playback-matrix-status.md \ - --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json + --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json \ + --bottlenecks-md /tmp/playback-matrix-final/playback-bottlenecks.md ``` Generate bottleneck analysis for optimization backlog: diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js index 0ee89fc8fe..45fb7fe107 100644 --- a/scripts/publish-playback-matrix-summary.js +++ b/scripts/publish-playback-matrix-summary.js @@ -7,6 +7,7 @@ function parseArgs(argv) { const options = { aggregateMd: null, statusMd: null, + bottlenecksMd: null, validationJson: null, target: path.resolve("crates/editor/PLAYBACK-BENCHMARKS.md"), }; @@ -26,6 +27,10 @@ function parseArgs(argv) { options.statusMd = path.resolve(argv[++i] ?? ""); continue; } + if (arg === "--bottlenecks-md") { + options.bottlenecksMd = path.resolve(argv[++i] ?? ""); + continue; + } if (arg === "--validation-json") { options.validationJson = path.resolve(argv[++i] ?? ""); continue; @@ -41,7 +46,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/publish-playback-matrix-summary.js --aggregate-md --status-md --validation-json [--target ] + console.log(`Usage: node scripts/publish-playback-matrix-summary.js --aggregate-md --status-md --validation-json [--bottlenecks-md ] [--target ] Prepends a matrix summary section into PLAYBACK-BENCHMARKS.md benchmark history region.`); } @@ -52,7 +57,7 @@ function ensureFile(filePath, label) { } } -function buildSummarySection(aggregateMd, statusMd, validationJson) { +function buildSummarySection(aggregateMd, statusMd, validationJson, bottlenecksMd) { const now = new Date().toISOString(); const validation = JSON.parse(validationJson); const status = validation.passed ? "✅ MATRIX PASS" : "❌ MATRIX FAIL"; @@ -89,6 +94,12 @@ function buildSummarySection(aggregateMd, statusMd, validationJson) { markdown += `${aggregateMd.trim()}\n\n`; markdown += "\n\n"; + if (bottlenecksMd) { + markdown += "
\nBottleneck Analysis\n\n"; + markdown += `${bottlenecksMd.trim()}\n\n`; + markdown += "
\n\n"; + } + return markdown; } @@ -121,12 +132,23 @@ function main() { ensureFile(options.aggregateMd, "Aggregate markdown"); ensureFile(options.statusMd, "Status markdown"); ensureFile(options.validationJson, "Validation JSON"); + if (options.bottlenecksMd) { + ensureFile(options.bottlenecksMd, "Bottlenecks markdown"); + } ensureFile(options.target, "Target"); const aggregateMd = fs.readFileSync(options.aggregateMd, "utf8"); const statusMd = fs.readFileSync(options.statusMd, "utf8"); const validationJson = fs.readFileSync(options.validationJson, "utf8"); - const summaryMd = buildSummarySection(aggregateMd, statusMd, validationJson); + const bottlenecksMd = options.bottlenecksMd + ? fs.readFileSync(options.bottlenecksMd, "utf8") + : null; + const summaryMd = buildSummarySection( + aggregateMd, + statusMd, + validationJson, + bottlenecksMd, + ); writeToBenchmarkHistory(options.target, summaryMd); console.log(`Published matrix summary into ${options.target}`); } From f9e371c7b8ab89004015b204b80cea97424b28eb Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:43:52 +0000 Subject: [PATCH 200/333] improve: scale playback warmup buffering with fps --- crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/src/playback.rs | 12 ++++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index eb23355cc1..03adc437d3 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -176,6 +176,7 @@ cargo run -p cap-recording --example playback-test-runner -- full - Reduces long stall windows on 60fps playback and improves real-time catch-up behavior. - In-flight polling interval now scales with frame budget instead of fixed 5ms. - Catch-up skip threshold now adapts with late streak depth and logs skip event telemetry. + - Warmup target and warmup timeout now scale with FPS, reducing startup buffering overhead. 8. **Playback benchmark runner now supports JSON evidence export (2026-02-13)** - `playback-test-runner` supports `--json-output` for structured report emission. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index b15e80603a..80596cbe7f 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -405,11 +405,19 @@ impl Playback { let mut first_render_logged = false; let mut pending_seek_observation: Option<(u32, Instant)> = None; - let warmup_target_frames = 20usize; - let warmup_after_first_timeout = Duration::from_millis(1000); + let warmup_target_frames = (fps.saturating_div(4)).clamp(8, 16) as usize; + let warmup_after_first_timeout = frame_duration + .mul_f64((warmup_target_frames as f64) * 2.0) + .max(Duration::from_millis(200)) + .min(Duration::from_millis(700)); let warmup_no_frames_timeout = Duration::from_secs(5); let warmup_start = Instant::now(); let mut first_frame_time: Option = None; + info!( + warmup_target_frames, + warmup_after_first_timeout_ms = warmup_after_first_timeout.as_secs_f64() * 1000.0, + "Playback warmup configuration" + ); while !*stop_rx.borrow() { let should_start = if let Some(first_time) = first_frame_time { From 56235f57616b99877668184f891a0e8a16ce189e Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:45:09 +0000 Subject: [PATCH 201/333] improve: support publish-target in matrix finalization --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 6 +++++ scripts/finalize-playback-matrix.js | 29 +++++++++++++++++++++++- 4 files changed, 36 insertions(+), 1 deletion(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 9a0067d5a6..b3d485801d 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -78,6 +78,7 @@ node scripts/validate-playback-matrix.js --input /path/to/json-results --require # Finalize aggregate + status + validation artifacts node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 +node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 --publish-target /workspace/crates/editor/PLAYBACK-BENCHMARKS.md # Publish matrix artifacts into this benchmark history node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --bottlenecks-md /tmp/playback-matrix-final/playback-bottlenecks.md diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 03adc437d3..4ad22c5d7a 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -209,6 +209,7 @@ cargo run -p cap-recording --example playback-test-runner -- full - `scripts/finalize-playback-matrix.js` generates aggregate markdown, status markdown, and validation JSON in one command. - Supports optional required format enforcement during finalization. - Also emits bottleneck analysis markdown using configurable FPS/scrub/startup thresholds. + - Can optionally publish finalized artifacts directly into benchmark history target. 14. **Added matrix summary publisher (2026-02-13)** - `scripts/publish-playback-matrix-summary.js` injects finalized matrix artifacts into playback benchmark history. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 4daf71ab12..abd991a5f2 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -119,6 +119,12 @@ Include optimization thresholds when finalizing: pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 ``` +Finalize and publish to benchmark history in one command: + +```bash +pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 --publish-target /workspace/crates/editor/PLAYBACK-BENCHMARKS.md +``` + Publish finalized artifacts into benchmark history: ```bash diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index 9f281de9c4..14ea83a436 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -13,6 +13,7 @@ function parseArgs(argv) { maxScrubP95Ms: 40, maxStartupMs: 250, analyze: true, + publishTarget: null, }; for (let i = 2; i < argv.length; i++) { @@ -70,6 +71,12 @@ function parseArgs(argv) { options.analyze = false; continue; } + if (arg === "--publish-target") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --publish-target"); + options.publishTarget = path.resolve(value); + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -77,7 +84,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] + console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--publish-target ] Generates aggregate markdown, status markdown, validation JSON, and bottleneck analysis for collected playback matrix outputs.`); } @@ -150,6 +157,23 @@ function main() { ); run("node", analyzeArgs); } + if (options.publishTarget) { + const publishArgs = [ + "scripts/publish-playback-matrix-summary.js", + "--aggregate-md", + aggregatePath, + "--status-md", + statusPath, + "--validation-json", + validationPath, + "--target", + options.publishTarget, + ]; + if (options.analyze) { + publishArgs.push("--bottlenecks-md", bottleneckPath); + } + run("node", publishArgs); + } console.log(`Aggregate markdown: ${aggregatePath}`); console.log(`Status markdown: ${statusPath}`); @@ -157,6 +181,9 @@ function main() { if (options.analyze) { console.log(`Bottleneck analysis: ${bottleneckPath}`); } + if (options.publishTarget) { + console.log(`Published target: ${options.publishTarget}`); + } } try { From 3c4e00406a4d4124db880795b62984f748ebb776 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:46:27 +0000 Subject: [PATCH 202/333] improve: scale prefetch windows with playback fps --- crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/src/playback.rs | 14 +++++++++----- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 4ad22c5d7a..2354fad87b 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -177,6 +177,7 @@ cargo run -p cap-recording --example playback-test-runner -- full - In-flight polling interval now scales with frame budget instead of fixed 5ms. - Catch-up skip threshold now adapts with late streak depth and logs skip event telemetry. - Warmup target and warmup timeout now scale with FPS, reducing startup buffering overhead. + - Prefetch ahead/behind windows now scale with FPS to reduce unnecessary decode pressure at lower targets. 8. **Playback benchmark runner now supports JSON evidence export (2026-02-13)** - `playback-test-runner` supports `--json-output` for structured report emission. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 80596cbe7f..b50c11be3a 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -37,8 +37,6 @@ use crate::{ const PREFETCH_BUFFER_SIZE: usize = 60; const PARALLEL_DECODE_TASKS: usize = 4; -const MAX_PREFETCH_AHEAD: u32 = 60; -const PREFETCH_BEHIND: u32 = 15; const FRAME_CACHE_SIZE: usize = 60; #[derive(Debug)] @@ -168,8 +166,14 @@ impl Playback { let mut prefetched_behind: HashSet = HashSet::new(); const INITIAL_PARALLEL_TASKS: usize = 4; const RAMP_UP_AFTER_FRAMES: u32 = 5; + let dynamic_prefetch_ahead = fps.clamp(30, 90); + let dynamic_prefetch_behind = (fps / 4).clamp(8, 24); let mut cached_project = prefetch_project.borrow().clone(); + info!( + dynamic_prefetch_ahead, + dynamic_prefetch_behind, "Prefetch window configuration" + ); loop { if *prefetch_stop_rx.borrow() { @@ -199,14 +203,14 @@ impl Playback { in_flight_guard.clear(); } - if is_backward_seek || seek_distance > MAX_PREFETCH_AHEAD / 2 { + if is_backward_seek || seek_distance > dynamic_prefetch_ahead / 2 { in_flight = FuturesUnordered::new(); } } } let current_playback_frame = *playback_position_rx.borrow(); - let max_prefetch_frame = current_playback_frame + MAX_PREFETCH_AHEAD; + let max_prefetch_frame = current_playback_frame + dynamic_prefetch_ahead; let effective_parallel = if frames_decoded < RAMP_UP_AFTER_FRAMES { INITIAL_PARALLEL_TASKS @@ -279,7 +283,7 @@ impl Playback { } if in_flight.len() < effective_parallel { - for behind_offset in 1..=PREFETCH_BEHIND { + for behind_offset in 1..=dynamic_prefetch_behind { if in_flight.len() >= effective_parallel { break; } From 78b126681125cc5b81e4d4241a309e4a051d2c7d Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:48:07 +0000 Subject: [PATCH 203/333] improve: emit structured bottleneck artifacts in matrix analysis --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 1 + .../analyze-playback-matrix-bottlenecks.js | 34 ++++++++++++++++++- scripts/finalize-playback-matrix.js | 4 +++ 5 files changed, 40 insertions(+), 1 deletion(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index b3d485801d..819746eec8 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -85,6 +85,7 @@ node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-mat # Analyze bottlenecks from matrix results node scripts/analyze-playback-matrix-bottlenecks.js --input /path/to/json-results --output /tmp/playback-bottlenecks.md --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 +node scripts/analyze-playback-matrix-bottlenecks.js --input /path/to/json-results --output /tmp/playback-bottlenecks.md --output-json /tmp/playback-bottlenecks.json --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 2354fad87b..dec6cf81b7 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -220,6 +220,7 @@ cargo run -p cap-recording --example playback-test-runner -- full 15. **Added bottleneck analyzer for continuous FPS optimization (2026-02-13)** - `scripts/analyze-playback-matrix-bottlenecks.js` ranks matrix cells by FPS, startup, and scrub threshold breaches. - Produces prioritized optimization backlog from real matrix evidence. + - Supports structured JSON output for automation and regression tracking. --- diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index abd991a5f2..e1503dd5f0 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -139,6 +139,7 @@ Generate bottleneck analysis for optimization backlog: ```bash pnpm bench:playback:analyze -- --input /path/to/all-machine-results --output /tmp/playback-matrix-final/playback-bottlenecks.md --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 +pnpm bench:playback:analyze -- --input /path/to/all-machine-results --output /tmp/playback-matrix-final/playback-bottlenecks.md --output-json /tmp/playback-matrix-final/playback-bottlenecks.json --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 ``` ## Evidence checklist diff --git a/scripts/analyze-playback-matrix-bottlenecks.js b/scripts/analyze-playback-matrix-bottlenecks.js index 74ca0b3294..ac91d0c8ce 100644 --- a/scripts/analyze-playback-matrix-bottlenecks.js +++ b/scripts/analyze-playback-matrix-bottlenecks.js @@ -7,6 +7,7 @@ function parseArgs(argv) { const options = { inputs: [], output: null, + outputJson: null, targetFps: 60, maxScrubP95Ms: 40, maxStartupMs: 250, @@ -31,6 +32,12 @@ function parseArgs(argv) { options.output = path.resolve(value); continue; } + if (arg === "--output-json") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --output-json"); + options.outputJson = path.resolve(value); + continue; + } if (arg === "--target-fps") { const value = Number.parseFloat(argv[++i] ?? ""); if (!Number.isFinite(value) || value <= 0) { @@ -62,7 +69,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/analyze-playback-matrix-bottlenecks.js --input [--input ...] [--output ] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] + console.log(`Usage: node scripts/analyze-playback-matrix-bottlenecks.js --input [--input ...] [--output ] [--output-json ] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] Analyzes playback matrix JSON outputs and highlights prioritized bottlenecks.`); } @@ -211,6 +218,23 @@ function buildMarkdown(issues, options) { return md; } +function buildJson(issues, options) { + return { + generatedAt: new Date().toISOString(), + thresholds: { + targetFps: options.targetFps, + maxScrubP95Ms: options.maxScrubP95Ms, + maxStartupMs: options.maxStartupMs, + }, + issueCount: issues.length, + issues: issues.map((issue, index) => ({ + rank: index + 1, + ...issue, + recommendation: recommendation(issue, options), + })), + }; +} + function main() { const options = parseArgs(process.argv); if (options.help) { @@ -239,6 +263,14 @@ function main() { } else { process.stdout.write(markdown); } + if (options.outputJson) { + fs.writeFileSync( + options.outputJson, + JSON.stringify(buildJson(issues, options), null, 2), + "utf8", + ); + console.log(`Wrote bottleneck analysis JSON to ${options.outputJson}`); + } } try { diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index 14ea83a436..d542d6f1b5 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -116,6 +116,7 @@ function main() { const statusPath = path.join(options.outputDir, "playback-matrix-status.md"); const validationPath = path.join(options.outputDir, "playback-matrix-validation.json"); const bottleneckPath = path.join(options.outputDir, "playback-bottlenecks.md"); + const bottleneckJsonPath = path.join(options.outputDir, "playback-bottlenecks.json"); const aggregateArgs = ["scripts/aggregate-playback-benchmarks.js"]; const statusArgs = ["scripts/build-playback-matrix-report.js"]; @@ -148,6 +149,8 @@ function main() { analyzeArgs.push( "--output", bottleneckPath, + "--output-json", + bottleneckJsonPath, "--target-fps", String(options.targetFps), "--max-scrub-p95-ms", @@ -180,6 +183,7 @@ function main() { console.log(`Validation JSON: ${validationPath}`); if (options.analyze) { console.log(`Bottleneck analysis: ${bottleneckPath}`); + console.log(`Bottleneck analysis JSON: ${bottleneckJsonPath}`); } if (options.publishTarget) { console.log(`Published target: ${options.publishTarget}`); From 2b7ae23ce7013d5cc5247b7f56a1e988f6f09747 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:49:33 +0000 Subject: [PATCH 204/333] improve: scale prefetch decode parallelism for high-fps playback --- crates/editor/PLAYBACK-FINDINGS.md | 1 + crates/editor/src/playback.rs | 15 +++++++++++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index dec6cf81b7..b49d91a9ce 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -178,6 +178,7 @@ cargo run -p cap-recording --example playback-test-runner -- full - Catch-up skip threshold now adapts with late streak depth and logs skip event telemetry. - Warmup target and warmup timeout now scale with FPS, reducing startup buffering overhead. - Prefetch ahead/behind windows now scale with FPS to reduce unnecessary decode pressure at lower targets. + - Prefetch parallelism now scales with FPS target to increase decode throughput under 60fps workloads. 8. **Playback benchmark runner now supports JSON evidence export (2026-02-13)** - `playback-test-runner` supports `--json-output` for structured report emission. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index b50c11be3a..06a01210dd 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -164,15 +164,22 @@ impl Playback { let mut in_flight: FuturesUnordered = FuturesUnordered::new(); let mut frames_decoded: u32 = 0; let mut prefetched_behind: HashSet = HashSet::new(); - const INITIAL_PARALLEL_TASKS: usize = 4; const RAMP_UP_AFTER_FRAMES: u32 = 5; let dynamic_prefetch_ahead = fps.clamp(30, 90); let dynamic_prefetch_behind = (fps / 4).clamp(8, 24); + let dynamic_parallel_tasks = if fps >= 60 { + 6 + } else if fps >= 45 { + 5 + } else { + PARALLEL_DECODE_TASKS + }; + let initial_parallel_tasks = dynamic_parallel_tasks.min(4); let mut cached_project = prefetch_project.borrow().clone(); info!( dynamic_prefetch_ahead, - dynamic_prefetch_behind, "Prefetch window configuration" + dynamic_prefetch_behind, dynamic_parallel_tasks, "Prefetch window configuration" ); loop { @@ -213,9 +220,9 @@ impl Playback { let max_prefetch_frame = current_playback_frame + dynamic_prefetch_ahead; let effective_parallel = if frames_decoded < RAMP_UP_AFTER_FRAMES { - INITIAL_PARALLEL_TASKS + initial_parallel_tasks } else { - PARALLEL_DECODE_TASKS + dynamic_parallel_tasks }; while in_flight.len() < effective_parallel { From df142fced0965793d69851d4cd7a8b3c7ededfb6 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 21:53:45 +0000 Subject: [PATCH 205/333] improve: skip no-op playhead state updates in tauri commands --- apps/desktop/src-tauri/src/lib.rs | 42 +++++++++++++++++++++--------- crates/editor/PLAYBACK-FINDINGS.md | 1 + 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/apps/desktop/src-tauri/src/lib.rs b/apps/desktop/src-tauri/src/lib.rs index c00d465ced..795fbe7710 100644 --- a/apps/desktop/src-tauri/src/lib.rs +++ b/apps/desktop/src-tauri/src/lib.rs @@ -1915,15 +1915,24 @@ async fn set_playhead_position( editor_instance: WindowEditorInstance, frame_number: u32, ) -> Result<(), String> { - editor_instance - .modify_and_emit_state(|state| { - state.playhead_position = frame_number; - }) - .await; + let state_changed = { + let state = editor_instance.state.lock().await; + state.playhead_position != frame_number + }; - let playback_handle = { + if state_changed { + editor_instance + .modify_and_emit_state(|state| { + state.playhead_position = frame_number; + }) + .await; + } + + let playback_handle = if state_changed { let state = editor_instance.state.lock().await; state.playback_task.clone() + } else { + None }; if let Some(handle) = playback_handle { @@ -2548,15 +2557,24 @@ async fn is_camera_window_open(app: AppHandle) -> bool { #[specta::specta] #[instrument(skip(editor_instance))] async fn seek_to(editor_instance: WindowEditorInstance, frame_number: u32) -> Result<(), String> { - editor_instance - .modify_and_emit_state(|state| { - state.playhead_position = frame_number; - }) - .await; + let state_changed = { + let state = editor_instance.state.lock().await; + state.playhead_position != frame_number + }; - let playback_handle = { + if state_changed { + editor_instance + .modify_and_emit_state(|state| { + state.playhead_position = frame_number; + }) + .await; + } + + let playback_handle = if state_changed { let state = editor_instance.state.lock().await; state.playback_task.clone() + } else { + None }; if let Some(handle) = playback_handle { diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index b49d91a9ce..1a0c4ff43c 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -145,6 +145,7 @@ cargo run -p cap-recording --example playback-test-runner -- full - `seek_to` and `set_playhead_position` commands now forward seek requests to active playback. - Timeline seek no longer tears down and recreates playback while playing. - Seek signaling now uses watch semantics so only latest frame target is consumed under heavy scrub load. + - Tauri playhead/seek commands now skip no-op same-frame state emission to reduce state/event churn. 3. **Lazy decoder pool warmup on macOS AVAssetReader (2026-02-13)** - Initial warmup now creates only a small subset of decoder instances. From 2afdbd6864228a7ff3f02c022384c65defeab977 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:00:15 +0000 Subject: [PATCH 206/333] improve: add baseline comparison gating for playback benchmarks --- crates/editor/PLAYBACK-BENCHMARKS.md | 3 + crates/editor/PLAYBACK-FINDINGS.md | 7 + crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 6 + package.json | 1 + scripts/compare-playback-benchmark-runs.js | 273 +++++++++++++++++++++ 5 files changed, 290 insertions(+) create mode 100644 scripts/compare-playback-benchmark-runs.js diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 819746eec8..62434b52b2 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -86,6 +86,9 @@ node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-mat # Analyze bottlenecks from matrix results node scripts/analyze-playback-matrix-bottlenecks.js --input /path/to/json-results --output /tmp/playback-bottlenecks.md --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 node scripts/analyze-playback-matrix-bottlenecks.js --input /path/to/json-results --output /tmp/playback-bottlenecks.md --output-json /tmp/playback-bottlenecks.json --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 + +# Compare candidate against baseline and flag regressions +node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-comparison.md --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5 ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 1a0c4ff43c..84f3072ba4 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -224,6 +224,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Produces prioritized optimization backlog from real matrix evidence. - Supports structured JSON output for automation and regression tracking. +16. **Added baseline-vs-candidate comparator for regression gating (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` compares candidate matrix outputs against baseline outputs. + - Flags regressions when FPS drops or startup/scrub latency increase beyond configured tolerance. + - Exits non-zero on regressions so matrix-driven optimization loops can be gated automatically. + --- ## Root Cause Analysis Archive @@ -328,6 +333,7 @@ Decoder Pipeline: 17. Added one-shot finalization script for aggregate + status + validation outputs. 18. Added benchmark history publisher script for finalized matrix artifacts. 19. Added matrix bottleneck analysis script for prioritized FPS optimization follow-up. +20. Added baseline-vs-candidate comparison script to gate regressions in optimization loops. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -348,6 +354,7 @@ Decoder Pipeline: - `scripts/finalize-playback-matrix.js`: added one-shot matrix artifact finalization workflow. - `scripts/publish-playback-matrix-summary.js`: added matrix artifact publisher into PLAYBACK-BENCHMARKS history region. - `scripts/analyze-playback-matrix-bottlenecks.js`: added prioritized bottleneck analysis output from matrix JSON evidence. +- `scripts/compare-playback-benchmark-runs.js`: added regression-aware baseline/candidate comparison with configurable FPS/startup/scrub tolerances. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index e1503dd5f0..069f2f0a53 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -142,6 +142,12 @@ pnpm bench:playback:analyze -- --input /path/to/all-machine-results --output /tm pnpm bench:playback:analyze -- --input /path/to/all-machine-results --output /tmp/playback-matrix-final/playback-bottlenecks.md --output-json /tmp/playback-matrix-final/playback-bottlenecks.json --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 ``` +Compare candidate run against baseline and fail on regressions: + +```bash +pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-matrix-final/playback-comparison.md --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5 +``` + ## Evidence checklist 1. Confirm all matrix rows exist. diff --git a/package.json b/package.json index 93f760e48f..4f81b478cc 100644 --- a/package.json +++ b/package.json @@ -48,6 +48,7 @@ "bench:playback:finalize": "node scripts/finalize-playback-matrix.js", "bench:playback:publish": "node scripts/publish-playback-matrix-summary.js", "bench:playback:analyze": "node scripts/analyze-playback-matrix-bottlenecks.js", + "bench:playback:compare": "node scripts/compare-playback-benchmark-runs.js", "clean": "find . -name node_modules -o -name .next -o -name .output -o -name .turbo -o -name dist -type d -prune | xargs rm -rf", "lgtm-otel": "docker run -p 3010:3000 -p 4317:4317 -p 4318:4318 --rm -it docker.io/grafana/otel-lgtm", "with-env": "dotenv -e .env --" diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js new file mode 100644 index 0000000000..a7a7eaca22 --- /dev/null +++ b/scripts/compare-playback-benchmark-runs.js @@ -0,0 +1,273 @@ +#!/usr/bin/env node + +import fs from "node:fs"; +import path from "node:path"; + +function parseArgs(argv) { + const options = { + baseline: null, + candidate: null, + output: null, + allowFpsDrop: 2, + allowStartupIncreaseMs: 25, + allowScrubP95IncreaseMs: 5, + }; + + for (let i = 2; i < argv.length; i++) { + const arg = argv[i]; + if (arg === "--") continue; + if (arg === "--help" || arg === "-h") { + options.help = true; + continue; + } + if (arg === "--baseline") { + options.baseline = path.resolve(argv[++i] ?? ""); + continue; + } + if (arg === "--candidate") { + options.candidate = path.resolve(argv[++i] ?? ""); + continue; + } + if (arg === "--output" || arg === "-o") { + options.output = path.resolve(argv[++i] ?? ""); + continue; + } + if (arg === "--allow-fps-drop") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value < 0) { + throw new Error("Invalid --allow-fps-drop value"); + } + options.allowFpsDrop = value; + continue; + } + if (arg === "--allow-startup-increase-ms") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value < 0) { + throw new Error("Invalid --allow-startup-increase-ms value"); + } + options.allowStartupIncreaseMs = value; + continue; + } + if (arg === "--allow-scrub-p95-increase-ms") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value < 0) { + throw new Error("Invalid --allow-scrub-p95-increase-ms value"); + } + options.allowScrubP95IncreaseMs = value; + continue; + } + throw new Error(`Unknown argument: ${arg}`); + } + + return options; +} + +function usage() { + console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline --candidate [--output ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] + +Compares baseline and candidate playback matrix JSON outputs and flags regressions.`); +} + +function collectJsonFiles(targetPath) { + if (!fs.existsSync(targetPath)) { + throw new Error(`Path does not exist: ${targetPath}`); + } + const stats = fs.statSync(targetPath); + if (stats.isFile()) { + return targetPath.endsWith(".json") ? [targetPath] : []; + } + const files = []; + for (const entry of fs.readdirSync(targetPath, { withFileTypes: true })) { + const fullPath = path.join(targetPath, entry.name); + if (entry.isDirectory()) { + files.push(...collectJsonFiles(fullPath)); + } else if (entry.isFile() && entry.name.endsWith(".json")) { + files.push(fullPath); + } + } + return files; +} + +function parseNotes(notes) { + if (!notes) return {}; + const parsed = {}; + for (const token of notes.split(/\s+/)) { + if (!token.includes("=")) continue; + const [key, ...rest] = token.split("="); + const value = rest.join("="); + if (!key || !value) continue; + parsed[key.trim()] = value.trim(); + } + return parsed; +} + +function average(values) { + if (values.length === 0) return null; + return values.reduce((acc, value) => acc + value, 0) / values.length; +} + +function maximum(values) { + if (values.length === 0) return null; + return Math.max(...values); +} + +function collectMetrics(files) { + const rows = new Map(); + + for (const filePath of files) { + const parsed = JSON.parse(fs.readFileSync(filePath, "utf8")); + const notes = parseNotes(parsed.notes); + const platform = notes.platform ?? "unknown"; + const gpu = notes.gpu ?? "unknown"; + const scenario = notes.scenario ?? "unspecified"; + const reports = Array.isArray(parsed.reports) ? parsed.reports : []; + + for (const report of reports) { + const key = `${platform}|${gpu}|${scenario}|${report.recording_name ?? "unknown"}|${report.is_fragmented ? "fragmented" : "mp4"}`; + + const playback = Array.isArray(report.playback_results) + ? report.playback_results + : []; + const scrub = Array.isArray(report.scrub_results) + ? report.scrub_results + : []; + + const fpsValues = playback + .map((entry) => entry.effective_fps) + .filter((entry) => typeof entry === "number"); + const startupValues = playback + .map((entry) => entry.startup_to_first_frame_ms) + .filter((entry) => typeof entry === "number"); + const scrubP95Values = scrub + .map((entry) => entry.p95_seek_time_ms) + .filter((entry) => typeof entry === "number"); + + rows.set(key, { + key, + platform, + gpu, + scenario, + recording: report.recording_name ?? "unknown", + format: report.is_fragmented ? "fragmented" : "mp4", + fpsMin: fpsValues.length ? Math.min(...fpsValues) : null, + startupAvg: average(startupValues), + scrubP95Max: maximum(scrubP95Values), + }); + } + } + + return rows; +} + +function delta(candidate, baseline) { + if (candidate === null || baseline === null) return null; + return candidate - baseline; +} + +function formatNumber(value, digits = 2) { + return value === null ? "n/a" : value.toFixed(digits); +} + +function compareMetrics(baselineRows, candidateRows, options) { + const comparisons = []; + + for (const [key, candidate] of candidateRows) { + const baseline = baselineRows.get(key); + if (!baseline) continue; + + const fpsDelta = delta(candidate.fpsMin, baseline.fpsMin); + const startupDelta = delta(candidate.startupAvg, baseline.startupAvg); + const scrubDelta = delta(candidate.scrubP95Max, baseline.scrubP95Max); + + const regressions = []; + if (fpsDelta !== null && fpsDelta < -options.allowFpsDrop) { + regressions.push(`fps_drop=${formatNumber(fpsDelta)}`); + } + if ( + startupDelta !== null && + startupDelta > options.allowStartupIncreaseMs + ) { + regressions.push(`startup_increase=${formatNumber(startupDelta)}`); + } + if (scrubDelta !== null && scrubDelta > options.allowScrubP95IncreaseMs) { + regressions.push(`scrub_p95_increase=${formatNumber(scrubDelta)}`); + } + + comparisons.push({ + platform: candidate.platform, + gpu: candidate.gpu, + scenario: candidate.scenario, + recording: candidate.recording, + format: candidate.format, + fpsDelta, + startupDelta, + scrubDelta, + regressions, + }); + } + + comparisons.sort((a, b) => b.regressions.length - a.regressions.length); + return comparisons; +} + +function toMarkdown(comparisons, options) { + const regressions = comparisons.filter( + (entry) => entry.regressions.length > 0, + ); + let md = ""; + md += "# Playback Benchmark Comparison\n\n"; + md += `Generated: ${new Date().toISOString()}\n\n`; + md += `Tolerance: fps_drop<=${options.allowFpsDrop}, startup_increase<=${options.allowStartupIncreaseMs}ms, scrub_p95_increase<=${options.allowScrubP95IncreaseMs}ms\n\n`; + md += `Compared rows: ${comparisons.length}, regressions: ${regressions.length}\n\n`; + md += + "| Platform | GPU | Scenario | Recording | Format | FPS Δ | Startup Δ (ms) | Scrub p95 Δ (ms) | Regression |\n"; + md += "|---|---|---|---|---|---:|---:|---:|---|\n"; + for (const row of comparisons) { + md += `| ${row.platform} | ${row.gpu} | ${row.scenario} | ${row.recording} | ${row.format} | ${formatNumber(row.fpsDelta)} | ${formatNumber(row.startupDelta)} | ${formatNumber(row.scrubDelta)} | ${row.regressions.length > 0 ? row.regressions.join(", ") : "none"} |\n`; + } + md += "\n"; + return md; +} + +function main() { + const options = parseArgs(process.argv); + if (options.help) { + usage(); + return; + } + if (!options.baseline || !options.candidate) { + throw new Error("--baseline and --candidate are required"); + } + + const baselineFiles = collectJsonFiles(options.baseline); + const candidateFiles = collectJsonFiles(options.candidate); + if (baselineFiles.length === 0) { + throw new Error("No baseline JSON files found"); + } + if (candidateFiles.length === 0) { + throw new Error("No candidate JSON files found"); + } + + const baselineRows = collectMetrics(baselineFiles); + const candidateRows = collectMetrics(candidateFiles); + const comparisons = compareMetrics(baselineRows, candidateRows, options); + const markdown = toMarkdown(comparisons, options); + + if (options.output) { + fs.writeFileSync(options.output, markdown, "utf8"); + console.log(`Wrote comparison report to ${options.output}`); + } else { + process.stdout.write(markdown); + } + + if (comparisons.some((entry) => entry.regressions.length > 0)) { + process.exit(1); + } +} + +try { + main(); +} catch (error) { + console.error(error instanceof Error ? error.message : String(error)); + process.exit(1); +} From 5b1807cba565ebe2bab4c248bf82060a18b7272c Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:02:01 +0000 Subject: [PATCH 207/333] improve: discard stale prefetched frames after live seeks --- crates/editor/PLAYBACK-FINDINGS.md | 7 ++++ crates/editor/src/playback.rs | 56 ++++++++++++++++++++++++++---- 2 files changed, 57 insertions(+), 6 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 84f3072ba4..c88dde9c05 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -229,6 +229,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Flags regressions when FPS drops or startup/scrub latency increase beyond configured tolerance. - Exits non-zero on regressions so matrix-driven optimization loops can be gated automatically. +17. **Added prefetch generation gating for live seek correctness and latency (2026-02-13)** + - Prefetch outputs are tagged with seek-generation IDs and stale generation frames are dropped. + - Seek events now advance generation and flush prefetch consumption to prevent old in-flight decode outputs from polluting post-seek playback. + - Reduces redundant decode/render work during aggressive scrub and improves settle reliability. + --- ## Root Cause Analysis Archive @@ -334,6 +339,7 @@ Decoder Pipeline: 18. Added benchmark history publisher script for finalized matrix artifacts. 19. Added matrix bottleneck analysis script for prioritized FPS optimization follow-up. 20. Added baseline-vs-candidate comparison script to gate regressions in optimization loops. +21. Added seek-generation prefetch gating to drop stale decode outputs after live seek updates. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -355,6 +361,7 @@ Decoder Pipeline: - `scripts/publish-playback-matrix-summary.js`: added matrix artifact publisher into PLAYBACK-BENCHMARKS history region. - `scripts/analyze-playback-matrix-bottlenecks.js`: added prioritized bottleneck analysis output from matrix JSON evidence. - `scripts/compare-playback-benchmark-runs.js`: added regression-aware baseline/candidate comparison with configurable FPS/startup/scrub tolerances. +- `crates/editor/src/playback.rs`: added seek-generation tagging for prefetched frames so stale in-flight decode results are ignored after seek generation advances. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 06a01210dd..7e9671554a 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -70,6 +70,7 @@ struct PrefetchedFrame { frame_number: u32, segment_frames: DecodedSegmentFrames, segment_index: u32, + generation: u64, } struct FrameCache { @@ -131,6 +132,8 @@ impl Playback { tokio_mpsc::channel::(PREFETCH_BUFFER_SIZE * 2); let (frame_request_tx, mut frame_request_rx) = watch::channel(self.start_frame_number); let (playback_position_tx, playback_position_rx) = watch::channel(self.start_frame_number); + let (seek_generation_tx, mut seek_generation_rx) = watch::channel(0u64); + seek_generation_rx.borrow_and_update(); let in_flight_frames: Arc>> = Arc::new(RwLock::new(HashSet::new())); let prefetch_in_flight = in_flight_frames.clone(); @@ -138,6 +141,7 @@ impl Playback { let prefetch_stop_rx = stop_rx.clone(); let mut prefetch_project = self.project.clone(); + let mut prefetch_seek_generation = seek_generation_rx.clone(); let prefetch_segment_medias = self.segment_medias.clone(); let (prefetch_duration, has_timeline) = if let Some(timeline) = &self.project.borrow().timeline { @@ -156,7 +160,7 @@ impl Playback { } type PrefetchFuture = std::pin::Pin< Box< - dyn std::future::Future)> + dyn std::future::Future)> + Send, >, >; @@ -175,6 +179,7 @@ impl Playback { PARALLEL_DECODE_TASKS }; let initial_parallel_tasks = dynamic_parallel_tasks.min(4); + let mut active_generation = *prefetch_seek_generation.borrow(); let mut cached_project = prefetch_project.borrow().clone(); info!( @@ -191,6 +196,22 @@ impl Playback { cached_project = prefetch_project.borrow_and_update().clone(); } + if prefetch_seek_generation.has_changed().unwrap_or(false) { + let generation = *prefetch_seek_generation.borrow_and_update(); + if generation != active_generation { + active_generation = generation; + next_prefetch_frame = *frame_request_rx.borrow(); + frames_decoded = 0; + prefetched_behind.clear(); + + if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { + in_flight_guard.clear(); + } + + in_flight = FuturesUnordered::new(); + } + } + if let Ok(true) = frame_request_rx.has_changed() { let requested = *frame_request_rx.borrow_and_update(); if requested != next_prefetch_frame { @@ -263,6 +284,7 @@ impl Playback { let hide_camera = cached_project.camera.hide; let segment_index = segment.recording_clip; let is_initial = frames_decoded < 10; + let generation = active_generation; if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.insert(frame_num); @@ -282,7 +304,7 @@ impl Playback { .get_frames(segment_time as f32, !hide_camera, clip_offsets) .await }; - (frame_num, segment_index, result) + (frame_num, segment_index, generation, result) })); } @@ -327,6 +349,7 @@ impl Playback { let decoders = segment_media.decoders.clone(); let hide_camera = cached_project.camera.hide; let segment_index = segment.recording_clip; + let generation = active_generation; if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.insert(behind_frame); @@ -337,7 +360,7 @@ impl Playback { let result = decoders .get_frames(segment_time as f32, !hide_camera, clip_offsets) .await; - (behind_frame, segment_index, result) + (behind_frame, segment_index, generation, result) })); } } @@ -346,10 +369,15 @@ impl Playback { tokio::select! { biased; - Some((frame_num, segment_index, result)) = in_flight.next() => { + Some((frame_num, segment_index, generation, result)) = in_flight.next() => { if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.remove(&frame_num); } + + if generation != active_generation { + continue; + } + frames_decoded = frames_decoded.saturating_add(1); if let Some(segment_frames) = result { @@ -357,6 +385,7 @@ impl Playback { frame_number: frame_num, segment_frames, segment_index, + generation, }).await; } else if frames_decoded <= 5 { warn!( @@ -407,6 +436,7 @@ impl Playback { let mut prefetch_buffer: VecDeque = VecDeque::with_capacity(PREFETCH_BUFFER_SIZE); let mut frame_cache = FrameCache::new(FRAME_CACHE_SIZE); + let mut seek_generation = 0u64; let base_skip_threshold = (fps / 6).clamp(6, 16); let mut late_streak = 0u32; let mut skip_events = 0u64; @@ -453,7 +483,7 @@ impl Playback { tokio::select! { Some(prefetched) = prefetch_rx.recv() => { - if prefetched.frame_number >= frame_number { + if prefetched.generation == seek_generation && prefetched.frame_number >= frame_number { prefetch_buffer.push_back(prefetched); if first_frame_time.is_none() { first_frame_time = Some(Instant::now()); @@ -481,12 +511,14 @@ impl Playback { 'playback: loop { if seek_rx.has_changed().unwrap_or(false) { let seek_frame = *seek_rx.borrow_and_update(); + seek_generation = seek_generation.saturating_add(1); frame_number = seek_frame; playback_anchor_start = Instant::now(); playback_anchor_frame = seek_frame; pending_seek_observation = Some((seek_frame, Instant::now())); prefetch_buffer.retain(|p| p.frame_number >= frame_number); frame_cache.cache.clear(); + let _ = seek_generation_tx.send(seek_generation); let _ = frame_request_tx.send(frame_number); let _ = playback_position_tx.send(frame_number); if has_audio @@ -502,7 +534,9 @@ impl Playback { cached_project = self.project.borrow_and_update().clone(); } while let Ok(prefetched) = prefetch_rx.try_recv() { - if prefetched.frame_number >= frame_number { + if prefetched.generation == seek_generation + && prefetched.frame_number >= frame_number + { prefetch_buffer.push_back(prefetched); while prefetch_buffer.len() > PREFETCH_BUFFER_SIZE { if let Some(idx) = prefetch_buffer @@ -529,12 +563,14 @@ impl Playback { _ = stop_rx.changed() => break 'playback, _ = seek_rx.changed() => { let seek_frame = *seek_rx.borrow_and_update(); + seek_generation = seek_generation.saturating_add(1); frame_number = seek_frame; playback_anchor_start = Instant::now(); playback_anchor_frame = seek_frame; pending_seek_observation = Some((seek_frame, Instant::now())); prefetch_buffer.retain(|p| p.frame_number >= frame_number); frame_cache.cache.clear(); + let _ = seek_generation_tx.send(seek_generation); let _ = frame_request_tx.send(frame_number); let _ = playback_position_tx.send(frame_number); if has_audio @@ -589,6 +625,9 @@ impl Playback { tokio::select! { _ = stop_rx.changed() => break 'playback, Some(prefetched) = prefetch_rx.recv() => { + if prefetched.generation != seek_generation { + continue; + } if prefetched.frame_number == frame_number { found_frame = Some(prefetched); break; @@ -636,6 +675,11 @@ impl Playback { tokio::time::timeout(frame_fetch_timeout, prefetch_rx.recv()).await; if let Ok(Some(prefetched)) = wait_result { + if prefetched.generation != seek_generation { + frame_number = frame_number.saturating_add(1); + total_frames_skipped += 1; + continue; + } if prefetched.frame_number == frame_number { Some(( Arc::new(prefetched.segment_frames), From baba6e61f8d47e4d9f75b57e88c0f7f220278d6c Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:05:22 +0000 Subject: [PATCH 208/333] improve: clear prefetched buffer immediately on live seek --- crates/editor/PLAYBACK-FINDINGS.md | 7 +++++++ crates/editor/src/playback.rs | 4 ++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index c88dde9c05..6d7ff24f03 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -234,6 +234,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Seek events now advance generation and flush prefetch consumption to prevent old in-flight decode outputs from polluting post-seek playback. - Reduces redundant decode/render work during aggressive scrub and improves settle reliability. +18. **Flushed prefetched-frame buffer on seek generation changes (2026-02-13)** + - Live seek handling now clears prefetch buffer immediately on seek events. + - Prevents stale buffered frames from prior playback position from being reused after seek jumps. + - Reduces unnecessary post-seek frame scans and improves settle determinism. + --- ## Root Cause Analysis Archive @@ -340,6 +345,7 @@ Decoder Pipeline: 19. Added matrix bottleneck analysis script for prioritized FPS optimization follow-up. 20. Added baseline-vs-candidate comparison script to gate regressions in optimization loops. 21. Added seek-generation prefetch gating to drop stale decode outputs after live seek updates. +22. Cleared prefetched-frame buffer on live seek handling to avoid stale buffered frame reuse. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -362,6 +368,7 @@ Decoder Pipeline: - `scripts/analyze-playback-matrix-bottlenecks.js`: added prioritized bottleneck analysis output from matrix JSON evidence. - `scripts/compare-playback-benchmark-runs.js`: added regression-aware baseline/candidate comparison with configurable FPS/startup/scrub tolerances. - `crates/editor/src/playback.rs`: added seek-generation tagging for prefetched frames so stale in-flight decode results are ignored after seek generation advances. +- `crates/editor/src/playback.rs`: seek handling now clears prefetched frame buffer on generation changes to guarantee stale buffered frames are discarded immediately. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 7e9671554a..de686b71a5 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -516,7 +516,7 @@ impl Playback { playback_anchor_start = Instant::now(); playback_anchor_frame = seek_frame; pending_seek_observation = Some((seek_frame, Instant::now())); - prefetch_buffer.retain(|p| p.frame_number >= frame_number); + prefetch_buffer.clear(); frame_cache.cache.clear(); let _ = seek_generation_tx.send(seek_generation); let _ = frame_request_tx.send(frame_number); @@ -568,7 +568,7 @@ impl Playback { playback_anchor_start = Instant::now(); playback_anchor_frame = seek_frame; pending_seek_observation = Some((seek_frame, Instant::now())); - prefetch_buffer.retain(|p| p.frame_number >= frame_number); + prefetch_buffer.clear(); frame_cache.cache.clear(); let _ = seek_generation_tx.send(seek_generation); let _ = frame_request_tx.send(frame_number); From 43f298d840d8cb003eae738199db264fcad4fc16 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:06:13 +0000 Subject: [PATCH 209/333] improve: avoid buffering stale prefetched frames behind playhead --- crates/editor/PLAYBACK-FINDINGS.md | 7 +++++++ crates/editor/src/playback.rs | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 6d7ff24f03..a87c66dfc6 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -239,6 +239,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Prevents stale buffered frames from prior playback position from being reused after seek jumps. - Reduces unnecessary post-seek frame scans and improves settle determinism. +19. **Tightened in-flight prefetch buffering to current playhead (2026-02-13)** + - In-flight wait path now buffers only frames at or ahead of current frame. + - Avoids re-queueing older frames from initial start position baseline. + - Reduces avoidable prefetch buffer churn during late playback and aggressive seek scenarios. + --- ## Root Cause Analysis Archive @@ -346,6 +351,7 @@ Decoder Pipeline: 20. Added baseline-vs-candidate comparison script to gate regressions in optimization loops. 21. Added seek-generation prefetch gating to drop stale decode outputs after live seek updates. 22. Cleared prefetched-frame buffer on live seek handling to avoid stale buffered frame reuse. +23. Restricted in-flight prefetch buffering to current frame or newer frames during frame wait path. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -369,6 +375,7 @@ Decoder Pipeline: - `scripts/compare-playback-benchmark-runs.js`: added regression-aware baseline/candidate comparison with configurable FPS/startup/scrub tolerances. - `crates/editor/src/playback.rs`: added seek-generation tagging for prefetched frames so stale in-flight decode results are ignored after seek generation advances. - `crates/editor/src/playback.rs`: seek handling now clears prefetched frame buffer on generation changes to guarantee stale buffered frames are discarded immediately. +- `crates/editor/src/playback.rs`: in-flight prefetch wait path now only buffers frames at or ahead of current frame to reduce stale buffer accumulation. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index de686b71a5..ee294b267e 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -631,7 +631,7 @@ impl Playback { if prefetched.frame_number == frame_number { found_frame = Some(prefetched); break; - } else if prefetched.frame_number >= self.start_frame_number { + } else if prefetched.frame_number >= frame_number { prefetch_buffer.push_back(prefetched); } } From 4becc57d6bc21e5cf49cb9f9701246f5a3b9b655 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:07:26 +0000 Subject: [PATCH 210/333] improve: support multi-input benchmark comparison gating --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 7 +++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 3 +++ scripts/compare-playback-benchmark-runs.js | 31 +++++++++++++++------- 4 files changed, 32 insertions(+), 10 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 62434b52b2..c7c5716cb9 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -89,6 +89,7 @@ node scripts/analyze-playback-matrix-bottlenecks.js --input /path/to/json-result # Compare candidate against baseline and flag regressions node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-comparison.md --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5 +node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results-a --baseline /path/to/baseline-results-b --candidate /path/to/candidate-results-a --candidate /path/to/candidate-results-b --output /tmp/playback-comparison.md ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index a87c66dfc6..c93dcaf164 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -244,6 +244,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Avoids re-queueing older frames from initial start position baseline. - Reduces avoidable prefetch buffer churn during late playback and aggressive seek scenarios. +20. **Expanded comparison gating for multi-run matrix diffs (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` now supports multiple baseline and candidate inputs. + - Enables aggregate regression gating across batched machine runs instead of one directory at a time. + - Improves reliability of continuous optimization loops when matrix outputs are split across multiple sources. + --- ## Root Cause Analysis Archive @@ -352,6 +357,7 @@ Decoder Pipeline: 21. Added seek-generation prefetch gating to drop stale decode outputs after live seek updates. 22. Cleared prefetched-frame buffer on live seek handling to avoid stale buffered frame reuse. 23. Restricted in-flight prefetch buffering to current frame or newer frames during frame wait path. +24. Expanded benchmark comparison gating to support multi-input baseline/candidate matrix sets. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -376,6 +382,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: added seek-generation tagging for prefetched frames so stale in-flight decode results are ignored after seek generation advances. - `crates/editor/src/playback.rs`: seek handling now clears prefetched frame buffer on generation changes to guarantee stale buffered frames are discarded immediately. - `crates/editor/src/playback.rs`: in-flight prefetch wait path now only buffers frames at or ahead of current frame to reduce stale buffer accumulation. +- `scripts/compare-playback-benchmark-runs.js`: comparison gating now accepts multiple baseline and candidate inputs for aggregated matrix regression checks. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 069f2f0a53..132d1bf440 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -146,6 +146,9 @@ Compare candidate run against baseline and fail on regressions: ```bash pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-matrix-final/playback-comparison.md --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5 + +# multiple baseline/candidate directories can be provided +pnpm bench:playback:compare -- --baseline /path/to/baseline-a --baseline /path/to/baseline-b --candidate /path/to/candidate-a --candidate /path/to/candidate-b --output /tmp/playback-matrix-final/playback-comparison.md ``` ## Evidence checklist diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index a7a7eaca22..73fe67253b 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -5,8 +5,8 @@ import path from "node:path"; function parseArgs(argv) { const options = { - baseline: null, - candidate: null, + baselineInputs: [], + candidateInputs: [], output: null, allowFpsDrop: 2, allowStartupIncreaseMs: 25, @@ -21,11 +21,15 @@ function parseArgs(argv) { continue; } if (arg === "--baseline") { - options.baseline = path.resolve(argv[++i] ?? ""); + const value = argv[++i]; + if (!value) throw new Error("Missing value for --baseline"); + options.baselineInputs.push(path.resolve(value)); continue; } if (arg === "--candidate") { - options.candidate = path.resolve(argv[++i] ?? ""); + const value = argv[++i]; + if (!value) throw new Error("Missing value for --candidate"); + options.candidateInputs.push(path.resolve(value)); continue; } if (arg === "--output" || arg === "-o") { @@ -63,9 +67,9 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline --candidate [--output ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] + console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline [--baseline ...] --candidate [--candidate ...] [--output ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] -Compares baseline and candidate playback matrix JSON outputs and flags regressions.`); +Compares baseline and candidate playback matrix JSON outputs and flags regressions. Multiple --baseline and --candidate inputs are supported.`); } function collectJsonFiles(targetPath) { @@ -235,12 +239,19 @@ function main() { usage(); return; } - if (!options.baseline || !options.candidate) { - throw new Error("--baseline and --candidate are required"); + if ( + options.baselineInputs.length === 0 || + options.candidateInputs.length === 0 + ) { + throw new Error("At least one --baseline and one --candidate are required"); } - const baselineFiles = collectJsonFiles(options.baseline); - const candidateFiles = collectJsonFiles(options.candidate); + const baselineFiles = [ + ...new Set(options.baselineInputs.flatMap(collectJsonFiles)), + ]; + const candidateFiles = [ + ...new Set(options.candidateInputs.flatMap(collectJsonFiles)), + ]; if (baselineFiles.length === 0) { throw new Error("No baseline JSON files found"); } From bee50f55b365a018b218b87a0787e167ef59f99a Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:08:47 +0000 Subject: [PATCH 211/333] improve: add baseline compare gate to matrix finalization --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 7 ++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 3 + scripts/finalize-playback-matrix.js | 82 ++++++++++++++++++++++-- 4 files changed, 87 insertions(+), 6 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index c7c5716cb9..bbd376d28d 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -78,6 +78,7 @@ node scripts/validate-playback-matrix.js --input /path/to/json-results --require # Finalize aggregate + status + validation artifacts node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 +node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5 node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 --publish-target /workspace/crates/editor/PLAYBACK-BENCHMARKS.md # Publish matrix artifacts into this benchmark history diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index c93dcaf164..c0a7f5f8ab 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -249,6 +249,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Enables aggregate regression gating across batched machine runs instead of one directory at a time. - Improves reliability of continuous optimization loops when matrix outputs are split across multiple sources. +21. **Added finalization-integrated regression gate support (2026-02-13)** + - `scripts/finalize-playback-matrix.js` now supports `--compare-baseline` and threshold args. + - Finalization can now produce aggregate/status/validation/bottleneck artifacts and run baseline-vs-candidate gating in one command. + - Keeps optimization loops strict by failing finalize runs when regression tolerances are exceeded. + --- ## Root Cause Analysis Archive @@ -358,6 +363,7 @@ Decoder Pipeline: 22. Cleared prefetched-frame buffer on live seek handling to avoid stale buffered frame reuse. 23. Restricted in-flight prefetch buffering to current frame or newer frames during frame wait path. 24. Expanded benchmark comparison gating to support multi-input baseline/candidate matrix sets. +25. Added optional baseline comparison gating inside matrix finalization workflow. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -383,6 +389,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: seek handling now clears prefetched frame buffer on generation changes to guarantee stale buffered frames are discarded immediately. - `crates/editor/src/playback.rs`: in-flight prefetch wait path now only buffers frames at or ahead of current frame to reduce stale buffer accumulation. - `scripts/compare-playback-benchmark-runs.js`: comparison gating now accepts multiple baseline and candidate inputs for aggregated matrix regression checks. +- `scripts/finalize-playback-matrix.js`: finalization now supports optional baseline comparison gating and threshold controls in the same pass. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 132d1bf440..736ff538d5 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -117,6 +117,9 @@ Include optimization thresholds when finalizing: ```bash pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 + +# include baseline comparison gate during finalization +pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5 ``` Finalize and publish to benchmark history in one command: diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index d542d6f1b5..257e4c11c6 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -14,6 +14,10 @@ function parseArgs(argv) { maxStartupMs: 250, analyze: true, publishTarget: null, + compareBaselineInputs: [], + allowFpsDrop: 2, + allowStartupIncreaseMs: 25, + allowScrubP95IncreaseMs: 5, }; for (let i = 2; i < argv.length; i++) { @@ -77,6 +81,36 @@ function parseArgs(argv) { options.publishTarget = path.resolve(value); continue; } + if (arg === "--compare-baseline") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --compare-baseline"); + options.compareBaselineInputs.push(path.resolve(value)); + continue; + } + if (arg === "--allow-fps-drop") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value < 0) { + throw new Error("Invalid --allow-fps-drop value"); + } + options.allowFpsDrop = value; + continue; + } + if (arg === "--allow-startup-increase-ms") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value < 0) { + throw new Error("Invalid --allow-startup-increase-ms value"); + } + options.allowStartupIncreaseMs = value; + continue; + } + if (arg === "--allow-scrub-p95-increase-ms") { + const value = Number.parseFloat(argv[++i] ?? ""); + if (!Number.isFinite(value) || value < 0) { + throw new Error("Invalid --allow-scrub-p95-increase-ms value"); + } + options.allowScrubP95IncreaseMs = value; + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -84,9 +118,9 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--publish-target ] + console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--compare-baseline ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--publish-target ] -Generates aggregate markdown, status markdown, validation JSON, and bottleneck analysis for collected playback matrix outputs.`); +Generates aggregate markdown, status markdown, validation JSON, and bottleneck analysis for collected playback matrix outputs. Optionally compares candidate inputs against baseline inputs and fails on regressions.`); } function run(command, args) { @@ -112,11 +146,24 @@ function main() { fs.mkdirSync(options.outputDir, { recursive: true }); } - const aggregatePath = path.join(options.outputDir, "playback-benchmark-aggregate.md"); + const aggregatePath = path.join( + options.outputDir, + "playback-benchmark-aggregate.md", + ); const statusPath = path.join(options.outputDir, "playback-matrix-status.md"); - const validationPath = path.join(options.outputDir, "playback-matrix-validation.json"); - const bottleneckPath = path.join(options.outputDir, "playback-bottlenecks.md"); - const bottleneckJsonPath = path.join(options.outputDir, "playback-bottlenecks.json"); + const validationPath = path.join( + options.outputDir, + "playback-matrix-validation.json", + ); + const bottleneckPath = path.join( + options.outputDir, + "playback-bottlenecks.md", + ); + const bottleneckJsonPath = path.join( + options.outputDir, + "playback-bottlenecks.json", + ); + const comparisonPath = path.join(options.outputDir, "playback-comparison.md"); const aggregateArgs = ["scripts/aggregate-playback-benchmarks.js"]; const statusArgs = ["scripts/build-playback-matrix-report.js"]; @@ -177,6 +224,26 @@ function main() { } run("node", publishArgs); } + if (options.compareBaselineInputs.length > 0) { + const compareArgs = ["scripts/compare-playback-benchmark-runs.js"]; + for (const baselineInput of options.compareBaselineInputs) { + compareArgs.push("--baseline", baselineInput); + } + for (const candidateInput of options.inputs) { + compareArgs.push("--candidate", candidateInput); + } + compareArgs.push( + "--output", + comparisonPath, + "--allow-fps-drop", + String(options.allowFpsDrop), + "--allow-startup-increase-ms", + String(options.allowStartupIncreaseMs), + "--allow-scrub-p95-increase-ms", + String(options.allowScrubP95IncreaseMs), + ); + run("node", compareArgs); + } console.log(`Aggregate markdown: ${aggregatePath}`); console.log(`Status markdown: ${statusPath}`); @@ -188,6 +255,9 @@ function main() { if (options.publishTarget) { console.log(`Published target: ${options.publishTarget}`); } + if (options.compareBaselineInputs.length > 0) { + console.log(`Comparison report: ${comparisonPath}`); + } } try { From 61d10d7494ea3b9cd1d0a81fa9e4a74b9ab37e26 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:12:03 +0000 Subject: [PATCH 212/333] improve: key in-flight frame tracking by seek generation --- crates/editor/PLAYBACK-FINDINGS.md | 7 +++++++ crates/editor/src/playback.rs | 26 ++++++++++++++------------ 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index c0a7f5f8ab..ed07b623b7 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -254,6 +254,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Finalization can now produce aggregate/status/validation/bottleneck artifacts and run baseline-vs-candidate gating in one command. - Keeps optimization loops strict by failing finalize runs when regression tolerances are exceeded. +22. **Made in-flight tracking generation-aware to avoid seek races (2026-02-13)** + - Shared in-flight frame tracking now keys entries by `(seek_generation, frame_number)`. + - Prevents old-generation decode completions from removing new-generation in-flight markers for the same frame number. + - Improves seek correctness under rapid repeated seeks to nearby frame ranges. + --- ## Root Cause Analysis Archive @@ -364,6 +369,7 @@ Decoder Pipeline: 23. Restricted in-flight prefetch buffering to current frame or newer frames during frame wait path. 24. Expanded benchmark comparison gating to support multi-input baseline/candidate matrix sets. 25. Added optional baseline comparison gating inside matrix finalization workflow. +26. Made shared in-flight frame tracking generation-aware to prevent cross-seek marker collisions. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -390,6 +396,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: in-flight prefetch wait path now only buffers frames at or ahead of current frame to reduce stale buffer accumulation. - `scripts/compare-playback-benchmark-runs.js`: comparison gating now accepts multiple baseline and candidate inputs for aggregated matrix regression checks. - `scripts/finalize-playback-matrix.js`: finalization now supports optional baseline comparison gating and threshold controls in the same pass. +- `crates/editor/src/playback.rs`: in-flight frame markers now include seek generation to prevent old decode paths from clearing current-generation markers. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index ee294b267e..10963c31f2 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -135,7 +135,8 @@ impl Playback { let (seek_generation_tx, mut seek_generation_rx) = watch::channel(0u64); seek_generation_rx.borrow_and_update(); - let in_flight_frames: Arc>> = Arc::new(RwLock::new(HashSet::new())); + let in_flight_frames: Arc>> = + Arc::new(RwLock::new(HashSet::new())); let prefetch_in_flight = in_flight_frames.clone(); let main_in_flight = in_flight_frames; @@ -261,7 +262,7 @@ impl Playback { let already_in_flight = prefetch_in_flight .read() - .map(|guard| guard.contains(&frame_num)) + .map(|guard| guard.contains(&(active_generation, frame_num))) .unwrap_or(false); if already_in_flight { next_prefetch_frame += 1; @@ -287,7 +288,7 @@ impl Playback { let generation = active_generation; if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { - in_flight_guard.insert(frame_num); + in_flight_guard.insert((generation, frame_num)); } in_flight.push(Box::pin(async move { @@ -328,7 +329,7 @@ impl Playback { let already_in_flight = prefetch_in_flight .read() - .map(|guard| guard.contains(&behind_frame)) + .map(|guard| guard.contains(&(active_generation, behind_frame))) .unwrap_or(false); if already_in_flight { continue; @@ -352,7 +353,7 @@ impl Playback { let generation = active_generation; if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { - in_flight_guard.insert(behind_frame); + in_flight_guard.insert((generation, behind_frame)); } prefetched_behind.insert(behind_frame); @@ -371,7 +372,7 @@ impl Playback { Some((frame_num, segment_index, generation, result)) = in_flight.next() => { if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { - in_flight_guard.remove(&frame_num); + in_flight_guard.remove(&(generation, frame_num)); } if generation != active_generation { @@ -613,7 +614,7 @@ impl Playback { } else { let is_in_flight = main_in_flight .read() - .map(|guard| guard.contains(&frame_number)) + .map(|guard| guard.contains(&(seek_generation, frame_number))) .unwrap_or(false); if is_in_flight { @@ -638,7 +639,7 @@ impl Playback { _ = tokio::time::sleep(in_flight_poll_interval) => { let still_in_flight = main_in_flight .read() - .map(|guard| guard.contains(&frame_number)) + .map(|guard| guard.contains(&(seek_generation, frame_number))) .unwrap_or(false); if !still_in_flight { break; @@ -717,21 +718,22 @@ impl Playback { .map(|v| v.offsets) .unwrap_or_default(); + let in_flight_key = (seek_generation, frame_number); if let Ok(mut guard) = main_in_flight.write() { - guard.insert(frame_number); + guard.insert(in_flight_key); } let max_wait = frame_fetch_timeout; let data = tokio::select! { _ = stop_rx.changed() => { if let Ok(mut guard) = main_in_flight.write() { - guard.remove(&frame_number); + guard.remove(&in_flight_key); } break 'playback }, _ = tokio::time::sleep(max_wait) => { if let Ok(mut guard) = main_in_flight.write() { - guard.remove(&frame_number); + guard.remove(&in_flight_key); } frame_number = frame_number.saturating_add(1); total_frames_skipped += 1; @@ -741,7 +743,7 @@ impl Playback { .decoders .get_frames(segment_time as f32, !cached_project.camera.hide, clip_offsets) => { if let Ok(mut guard) = main_in_flight.write() { - guard.remove(&frame_number); + guard.remove(&in_flight_key); } data }, From d140e69a14dfa9535db4b5b2d55a3971150addfb Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:13:46 +0000 Subject: [PATCH 213/333] improve: publish baseline comparison artifacts in matrix summaries --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 8 ++++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 6 ++++ scripts/finalize-playback-matrix.js | 3 ++ scripts/publish-playback-matrix-summary.js | 33 ++++++++++++++++++---- 5 files changed, 45 insertions(+), 6 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index bbd376d28d..94350ea549 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -83,6 +83,7 @@ node scripts/finalize-playback-matrix.js --input /path/to/json-results --output- # Publish matrix artifacts into this benchmark history node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --bottlenecks-md /tmp/playback-matrix-final/playback-bottlenecks.md +node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --comparison-md /tmp/playback-matrix-final/playback-comparison.md # Analyze bottlenecks from matrix results node scripts/analyze-playback-matrix-bottlenecks.js --input /path/to/json-results --output /tmp/playback-bottlenecks.md --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index ed07b623b7..ea6a0edf5c 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -259,6 +259,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Prevents old-generation decode completions from removing new-generation in-flight markers for the same frame number. - Improves seek correctness under rapid repeated seeks to nearby frame ranges. +23. **Added comparison artifact publishing in finalize workflows (2026-02-13)** + - `scripts/publish-playback-matrix-summary.js` now accepts optional `--comparison-md`. + - `scripts/finalize-playback-matrix.js` now forwards generated comparison artifact to publishing when both compare and publish options are enabled. + - Keeps benchmark history entries self-contained with regression gate evidence. + --- ## Root Cause Analysis Archive @@ -370,6 +375,7 @@ Decoder Pipeline: 24. Expanded benchmark comparison gating to support multi-input baseline/candidate matrix sets. 25. Added optional baseline comparison gating inside matrix finalization workflow. 26. Made shared in-flight frame tracking generation-aware to prevent cross-seek marker collisions. +27. Added comparison artifact attachment support in publish/finalize matrix summary workflows. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -397,6 +403,8 @@ Decoder Pipeline: - `scripts/compare-playback-benchmark-runs.js`: comparison gating now accepts multiple baseline and candidate inputs for aggregated matrix regression checks. - `scripts/finalize-playback-matrix.js`: finalization now supports optional baseline comparison gating and threshold controls in the same pass. - `crates/editor/src/playback.rs`: in-flight frame markers now include seek generation to prevent old decode paths from clearing current-generation markers. +- `scripts/publish-playback-matrix-summary.js`: publish flow now supports optional comparison artifact attachment. +- `scripts/finalize-playback-matrix.js`: finalize flow now includes comparison artifact when publishing and baseline comparison are both requested. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 736ff538d5..bba2e07b46 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -136,6 +136,12 @@ pnpm bench:playback:publish -- \ --status-md /tmp/playback-matrix-final/playback-matrix-status.md \ --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json \ --bottlenecks-md /tmp/playback-matrix-final/playback-bottlenecks.md + +pnpm bench:playback:publish -- \ + --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md \ + --status-md /tmp/playback-matrix-final/playback-matrix-status.md \ + --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json \ + --comparison-md /tmp/playback-matrix-final/playback-comparison.md ``` Generate bottleneck analysis for optimization backlog: diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index 257e4c11c6..30ebdf5792 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -222,6 +222,9 @@ function main() { if (options.analyze) { publishArgs.push("--bottlenecks-md", bottleneckPath); } + if (options.compareBaselineInputs.length > 0) { + publishArgs.push("--comparison-md", comparisonPath); + } run("node", publishArgs); } if (options.compareBaselineInputs.length > 0) { diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js index 45fb7fe107..fab62486f9 100644 --- a/scripts/publish-playback-matrix-summary.js +++ b/scripts/publish-playback-matrix-summary.js @@ -8,6 +8,7 @@ function parseArgs(argv) { aggregateMd: null, statusMd: null, bottlenecksMd: null, + comparisonMd: null, validationJson: null, target: path.resolve("crates/editor/PLAYBACK-BENCHMARKS.md"), }; @@ -31,6 +32,10 @@ function parseArgs(argv) { options.bottlenecksMd = path.resolve(argv[++i] ?? ""); continue; } + if (arg === "--comparison-md") { + options.comparisonMd = path.resolve(argv[++i] ?? ""); + continue; + } if (arg === "--validation-json") { options.validationJson = path.resolve(argv[++i] ?? ""); continue; @@ -46,7 +51,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/publish-playback-matrix-summary.js --aggregate-md --status-md --validation-json [--bottlenecks-md ] [--target ] + console.log(`Usage: node scripts/publish-playback-matrix-summary.js --aggregate-md --status-md --validation-json [--bottlenecks-md ] [--comparison-md ] [--target ] Prepends a matrix summary section into PLAYBACK-BENCHMARKS.md benchmark history region.`); } @@ -57,7 +62,13 @@ function ensureFile(filePath, label) { } } -function buildSummarySection(aggregateMd, statusMd, validationJson, bottlenecksMd) { +function buildSummarySection( + aggregateMd, + statusMd, + validationJson, + bottlenecksMd, + comparisonMd, +) { const now = new Date().toISOString(); const validation = JSON.parse(validationJson); const status = validation.passed ? "✅ MATRIX PASS" : "❌ MATRIX FAIL"; @@ -99,6 +110,12 @@ function buildSummarySection(aggregateMd, statusMd, validationJson, bottlenecksM markdown += `${bottlenecksMd.trim()}\n\n`; markdown += "\n\n"; } + if (comparisonMd) { + markdown += + "
\nBaseline vs Candidate Comparison\n\n"; + markdown += `${comparisonMd.trim()}\n\n`; + markdown += "
\n\n"; + } return markdown; } @@ -115,10 +132,7 @@ function writeToBenchmarkHistory(targetFile, summaryMd) { const insertPos = start + markerStart.length; const updated = - current.slice(0, insertPos) + - "\n\n" + - summaryMd + - current.slice(end); + current.slice(0, insertPos) + "\n\n" + summaryMd + current.slice(end); fs.writeFileSync(targetFile, updated, "utf8"); } @@ -135,6 +149,9 @@ function main() { if (options.bottlenecksMd) { ensureFile(options.bottlenecksMd, "Bottlenecks markdown"); } + if (options.comparisonMd) { + ensureFile(options.comparisonMd, "Comparison markdown"); + } ensureFile(options.target, "Target"); const aggregateMd = fs.readFileSync(options.aggregateMd, "utf8"); @@ -143,11 +160,15 @@ function main() { const bottlenecksMd = options.bottlenecksMd ? fs.readFileSync(options.bottlenecksMd, "utf8") : null; + const comparisonMd = options.comparisonMd + ? fs.readFileSync(options.comparisonMd, "utf8") + : null; const summaryMd = buildSummarySection( aggregateMd, statusMd, validationJson, bottlenecksMd, + comparisonMd, ); writeToBenchmarkHistory(options.target, summaryMd); console.log(`Published matrix summary into ${options.target}`); From 9b4e9b9f82fb0d10d0c49e4f5c94d24778fdb89a Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:16:54 +0000 Subject: [PATCH 214/333] improve: separate prefetch and decode in-flight tracking --- crates/editor/PLAYBACK-FINDINGS.md | 11 ++++++++ crates/editor/src/playback.rs | 42 ++++++++++++++++++++---------- 2 files changed, 39 insertions(+), 14 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index ea6a0edf5c..cb077ed982 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -264,6 +264,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - `scripts/finalize-playback-matrix.js` now forwards generated comparison artifact to publishing when both compare and publish options are enabled. - Keeps benchmark history entries self-contained with regression gate evidence. +24. **Separated prefetch/direct decode in-flight tracking (2026-02-13)** + - Playback now tracks prefetch in-flight frames and direct decode in-flight frames in separate generation-aware sets. + - Prevents prefetch-side clear/reset paths from clearing direct decode in-flight markers. + - In-flight wait logic now checks both sets and direct decode outputs are dropped when a pending seek is detected before frame use. + --- ## Root Cause Analysis Archive @@ -374,8 +379,11 @@ Decoder Pipeline: 23. Restricted in-flight prefetch buffering to current frame or newer frames during frame wait path. 24. Expanded benchmark comparison gating to support multi-input baseline/candidate matrix sets. 25. Added optional baseline comparison gating inside matrix finalization workflow. +26. Made in-flight frame tracking generation-aware to prevent cross-seek marker collisions. +27. Split prefetch and direct decode in-flight tracking and guarded direct decode frame usage when seek updates are pending. 26. Made shared in-flight frame tracking generation-aware to prevent cross-seek marker collisions. 27. Added comparison artifact attachment support in publish/finalize matrix summary workflows. +28. Split prefetch and direct decode in-flight tracking to avoid cross-path marker interference. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -397,6 +405,8 @@ Decoder Pipeline: - `scripts/publish-playback-matrix-summary.js`: added matrix artifact publisher into PLAYBACK-BENCHMARKS history region. - `scripts/analyze-playback-matrix-bottlenecks.js`: added prioritized bottleneck analysis output from matrix JSON evidence. - `scripts/compare-playback-benchmark-runs.js`: added regression-aware baseline/candidate comparison with configurable FPS/startup/scrub tolerances. +- `scripts/publish-playback-matrix-summary.js`: added optional baseline-vs-candidate comparison artifact attachment in published summaries. +- `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `crates/editor/src/playback.rs`: added seek-generation tagging for prefetched frames so stale in-flight decode results are ignored after seek generation advances. - `crates/editor/src/playback.rs`: seek handling now clears prefetched frame buffer on generation changes to guarantee stale buffered frames are discarded immediately. - `crates/editor/src/playback.rs`: in-flight prefetch wait path now only buffers frames at or ahead of current frame to reduce stale buffer accumulation. @@ -405,6 +415,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: in-flight frame markers now include seek generation to prevent old decode paths from clearing current-generation markers. - `scripts/publish-playback-matrix-summary.js`: publish flow now supports optional comparison artifact attachment. - `scripts/finalize-playback-matrix.js`: finalize flow now includes comparison artifact when publishing and baseline comparison are both requested. +- `crates/editor/src/playback.rs`: prefetch and direct decode now use separate generation-aware in-flight sets, with combined checks in frame wait path. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 10963c31f2..1071231185 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -135,10 +135,12 @@ impl Playback { let (seek_generation_tx, mut seek_generation_rx) = watch::channel(0u64); seek_generation_rx.borrow_and_update(); - let in_flight_frames: Arc>> = + let prefetch_in_flight_frames: Arc>> = + Arc::new(RwLock::new(HashSet::new())); + let prefetch_in_flight = prefetch_in_flight_frames.clone(); + let playback_prefetch_in_flight = prefetch_in_flight_frames; + let playback_decode_in_flight: Arc>> = Arc::new(RwLock::new(HashSet::new())); - let prefetch_in_flight = in_flight_frames.clone(); - let main_in_flight = in_flight_frames; let prefetch_stop_rx = stop_rx.clone(); let mut prefetch_project = self.project.clone(); @@ -612,10 +614,15 @@ impl Playback { prefetched.segment_index, )) } else { - let is_in_flight = main_in_flight + let in_flight_key = (seek_generation, frame_number); + let is_in_flight = playback_prefetch_in_flight .read() - .map(|guard| guard.contains(&(seek_generation, frame_number))) - .unwrap_or(false); + .map(|guard| guard.contains(&in_flight_key)) + .unwrap_or(false) + || playback_decode_in_flight + .read() + .map(|guard| guard.contains(&in_flight_key)) + .unwrap_or(false); if is_in_flight { let wait_start = Instant::now(); @@ -637,10 +644,14 @@ impl Playback { } } _ = tokio::time::sleep(in_flight_poll_interval) => { - let still_in_flight = main_in_flight + let still_in_flight = playback_prefetch_in_flight .read() - .map(|guard| guard.contains(&(seek_generation, frame_number))) - .unwrap_or(false); + .map(|guard| guard.contains(&in_flight_key)) + .unwrap_or(false) + || playback_decode_in_flight + .read() + .map(|guard| guard.contains(&in_flight_key)) + .unwrap_or(false); if !still_in_flight { break; } @@ -718,21 +729,20 @@ impl Playback { .map(|v| v.offsets) .unwrap_or_default(); - let in_flight_key = (seek_generation, frame_number); - if let Ok(mut guard) = main_in_flight.write() { + if let Ok(mut guard) = playback_decode_in_flight.write() { guard.insert(in_flight_key); } let max_wait = frame_fetch_timeout; let data = tokio::select! { _ = stop_rx.changed() => { - if let Ok(mut guard) = main_in_flight.write() { + if let Ok(mut guard) = playback_decode_in_flight.write() { guard.remove(&in_flight_key); } break 'playback }, _ = tokio::time::sleep(max_wait) => { - if let Ok(mut guard) = main_in_flight.write() { + if let Ok(mut guard) = playback_decode_in_flight.write() { guard.remove(&in_flight_key); } frame_number = frame_number.saturating_add(1); @@ -742,13 +752,17 @@ impl Playback { data = segment_media .decoders .get_frames(segment_time as f32, !cached_project.camera.hide, clip_offsets) => { - if let Ok(mut guard) = main_in_flight.write() { + if let Ok(mut guard) = playback_decode_in_flight.write() { guard.remove(&in_flight_key); } data }, }; + if seek_rx.has_changed().unwrap_or(false) { + continue; + } + data.map(|frames| (Arc::new(frames), segment.recording_clip)) } } From 7b26c215c96aacc138464ba50c07329d2b5b66f5 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:18:34 +0000 Subject: [PATCH 215/333] improve: gate benchmark comparisons on candidate coverage gaps --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 9 ++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 3 ++ scripts/compare-playback-benchmark-runs.js | 50 ++++++++++++++++++---- 4 files changed, 55 insertions(+), 8 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 94350ea549..44a210ebfa 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -92,6 +92,7 @@ node scripts/analyze-playback-matrix-bottlenecks.js --input /path/to/json-result # Compare candidate against baseline and flag regressions node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-comparison.md --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5 node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results-a --baseline /path/to/baseline-results-b --candidate /path/to/candidate-results-a --candidate /path/to/candidate-results-b --output /tmp/playback-comparison.md +node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --allow-missing-candidate ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index cb077ed982..b148eab06d 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -269,6 +269,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Prevents prefetch-side clear/reset paths from clearing direct decode in-flight markers. - In-flight wait logic now checks both sets and direct decode outputs are dropped when a pending seek is detected before frame use. +25. **Added comparison coverage gating for missing candidate rows (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` now reports baseline rows that are missing in candidate runs. + - Comparison now fails by default when candidate coverage is missing baseline rows. + - Optional `--allow-missing-candidate` flag keeps metric regression checks while allowing partial candidate matrices. + --- ## Root Cause Analysis Archive @@ -381,9 +386,11 @@ Decoder Pipeline: 25. Added optional baseline comparison gating inside matrix finalization workflow. 26. Made in-flight frame tracking generation-aware to prevent cross-seek marker collisions. 27. Split prefetch and direct decode in-flight tracking and guarded direct decode frame usage when seek updates are pending. +28. Added missing-candidate-row coverage gating in baseline-vs-candidate comparison script with optional override flag. 26. Made shared in-flight frame tracking generation-aware to prevent cross-seek marker collisions. 27. Added comparison artifact attachment support in publish/finalize matrix summary workflows. 28. Split prefetch and direct decode in-flight tracking to avoid cross-path marker interference. +29. Added missing-candidate-row gating to baseline-vs-candidate comparison workflow. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -407,6 +414,7 @@ Decoder Pipeline: - `scripts/compare-playback-benchmark-runs.js`: added regression-aware baseline/candidate comparison with configurable FPS/startup/scrub tolerances. - `scripts/publish-playback-matrix-summary.js`: added optional baseline-vs-candidate comparison artifact attachment in published summaries. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. +- `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `crates/editor/src/playback.rs`: added seek-generation tagging for prefetched frames so stale in-flight decode results are ignored after seek generation advances. - `crates/editor/src/playback.rs`: seek handling now clears prefetched frame buffer on generation changes to guarantee stale buffered frames are discarded immediately. - `crates/editor/src/playback.rs`: in-flight prefetch wait path now only buffers frames at or ahead of current frame to reduce stale buffer accumulation. @@ -416,6 +424,7 @@ Decoder Pipeline: - `scripts/publish-playback-matrix-summary.js`: publish flow now supports optional comparison artifact attachment. - `scripts/finalize-playback-matrix.js`: finalize flow now includes comparison artifact when publishing and baseline comparison are both requested. - `crates/editor/src/playback.rs`: prefetch and direct decode now use separate generation-aware in-flight sets, with combined checks in frame wait path. +- `scripts/compare-playback-benchmark-runs.js`: comparison now reports and gates missing candidate rows relative to baseline coverage. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index bba2e07b46..30c1e4c0b3 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -158,6 +158,9 @@ pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate # multiple baseline/candidate directories can be provided pnpm bench:playback:compare -- --baseline /path/to/baseline-a --baseline /path/to/baseline-b --candidate /path/to/candidate-a --candidate /path/to/candidate-b --output /tmp/playback-matrix-final/playback-comparison.md + +# optional: allow missing candidate rows while still checking metric regressions +pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --allow-missing-candidate ``` ## Evidence checklist diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index 73fe67253b..fc1c101dd1 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -11,6 +11,7 @@ function parseArgs(argv) { allowFpsDrop: 2, allowStartupIncreaseMs: 25, allowScrubP95IncreaseMs: 5, + allowMissingCandidate: false, }; for (let i = 2; i < argv.length; i++) { @@ -60,6 +61,10 @@ function parseArgs(argv) { options.allowScrubP95IncreaseMs = value; continue; } + if (arg === "--allow-missing-candidate") { + options.allowMissingCandidate = true; + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -67,7 +72,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline [--baseline ...] --candidate [--candidate ...] [--output ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] + console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline [--baseline ...] --candidate [--candidate ...] [--output ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] Compares baseline and candidate playback matrix JSON outputs and flags regressions. Multiple --baseline and --candidate inputs are supported.`); } @@ -172,8 +177,22 @@ function formatNumber(value, digits = 2) { return value === null ? "n/a" : value.toFixed(digits); } -function compareMetrics(baselineRows, candidateRows, options) { +function compareMetrics(baselineRows, candidateRows) { const comparisons = []; + const missingCandidateRows = []; + + for (const [key, baseline] of baselineRows) { + const candidate = candidateRows.get(key); + if (!candidate) { + missingCandidateRows.push({ + platform: baseline.platform, + gpu: baseline.gpu, + scenario: baseline.scenario, + recording: baseline.recording, + format: baseline.format, + }); + } + } for (const [key, candidate] of candidateRows) { const baseline = baselineRows.get(key); @@ -211,10 +230,10 @@ function compareMetrics(baselineRows, candidateRows, options) { } comparisons.sort((a, b) => b.regressions.length - a.regressions.length); - return comparisons; + return { comparisons, missingCandidateRows }; } -function toMarkdown(comparisons, options) { +function toMarkdown(comparisons, missingCandidateRows, options) { const regressions = comparisons.filter( (entry) => entry.regressions.length > 0, ); @@ -222,7 +241,16 @@ function toMarkdown(comparisons, options) { md += "# Playback Benchmark Comparison\n\n"; md += `Generated: ${new Date().toISOString()}\n\n`; md += `Tolerance: fps_drop<=${options.allowFpsDrop}, startup_increase<=${options.allowStartupIncreaseMs}ms, scrub_p95_increase<=${options.allowScrubP95IncreaseMs}ms\n\n`; - md += `Compared rows: ${comparisons.length}, regressions: ${regressions.length}\n\n`; + md += `Compared rows: ${comparisons.length}, regressions: ${regressions.length}, missing candidate rows: ${missingCandidateRows.length}\n\n`; + if (missingCandidateRows.length > 0) { + md += "## Missing Candidate Rows\n\n"; + md += "| Platform | GPU | Scenario | Recording | Format |\n"; + md += "|---|---|---|---|---|\n"; + for (const row of missingCandidateRows) { + md += `| ${row.platform} | ${row.gpu} | ${row.scenario} | ${row.recording} | ${row.format} |\n`; + } + md += "\n"; + } md += "| Platform | GPU | Scenario | Recording | Format | FPS Δ | Startup Δ (ms) | Scrub p95 Δ (ms) | Regression |\n"; md += "|---|---|---|---|---|---:|---:|---:|---|\n"; @@ -261,8 +289,11 @@ function main() { const baselineRows = collectMetrics(baselineFiles); const candidateRows = collectMetrics(candidateFiles); - const comparisons = compareMetrics(baselineRows, candidateRows, options); - const markdown = toMarkdown(comparisons, options); + const { comparisons, missingCandidateRows } = compareMetrics( + baselineRows, + candidateRows, + ); + const markdown = toMarkdown(comparisons, missingCandidateRows, options); if (options.output) { fs.writeFileSync(options.output, markdown, "utf8"); @@ -271,7 +302,10 @@ function main() { process.stdout.write(markdown); } - if (comparisons.some((entry) => entry.regressions.length > 0)) { + if ( + comparisons.some((entry) => entry.regressions.length > 0) || + (!options.allowMissingCandidate && missingCandidateRows.length > 0) + ) { process.exit(1); } } From 65148e5cc9dd0b6a316ca41ddd3cfe8d6b0e0f21 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:20:33 +0000 Subject: [PATCH 216/333] improve: run finalize comparison before publish attachment --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 9 +++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 3 ++ scripts/finalize-playback-matrix.js | 50 ++++++++++++++---------- 4 files changed, 42 insertions(+), 21 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 44a210ebfa..bc03e8fd90 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -79,6 +79,7 @@ node scripts/validate-playback-matrix.js --input /path/to/json-results --require node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5 +node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-missing-candidate node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 --publish-target /workspace/crates/editor/PLAYBACK-BENCHMARKS.md # Publish matrix artifacts into this benchmark history diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index b148eab06d..7589f350ae 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -274,6 +274,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Comparison now fails by default when candidate coverage is missing baseline rows. - Optional `--allow-missing-candidate` flag keeps metric regression checks while allowing partial candidate matrices. +26. **Fixed finalize publish ordering for comparison artifacts (2026-02-13)** + - `scripts/finalize-playback-matrix.js` now executes baseline comparison before publish when both options are enabled. + - Prevents publish step from referencing missing comparison artifact files. + - Added finalize passthrough support for `--allow-missing-candidate`. + --- ## Root Cause Analysis Archive @@ -387,10 +392,12 @@ Decoder Pipeline: 26. Made in-flight frame tracking generation-aware to prevent cross-seek marker collisions. 27. Split prefetch and direct decode in-flight tracking and guarded direct decode frame usage when seek updates are pending. 28. Added missing-candidate-row coverage gating in baseline-vs-candidate comparison script with optional override flag. +29. Fixed finalize compare/publish ordering so comparison artifacts exist before publish attachment and added finalize support for missing-candidate override. 26. Made shared in-flight frame tracking generation-aware to prevent cross-seek marker collisions. 27. Added comparison artifact attachment support in publish/finalize matrix summary workflows. 28. Split prefetch and direct decode in-flight tracking to avoid cross-path marker interference. 29. Added missing-candidate-row gating to baseline-vs-candidate comparison workflow. +30. Fixed finalize compare/publish ordering and propagated missing-candidate override into finalize compare flow. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -415,6 +422,7 @@ Decoder Pipeline: - `scripts/publish-playback-matrix-summary.js`: added optional baseline-vs-candidate comparison artifact attachment in published summaries. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. +- `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. - `crates/editor/src/playback.rs`: added seek-generation tagging for prefetched frames so stale in-flight decode results are ignored after seek generation advances. - `crates/editor/src/playback.rs`: seek handling now clears prefetched frame buffer on generation changes to guarantee stale buffered frames are discarded immediately. - `crates/editor/src/playback.rs`: in-flight prefetch wait path now only buffers frames at or ahead of current frame to reduce stale buffer accumulation. @@ -425,6 +433,7 @@ Decoder Pipeline: - `scripts/finalize-playback-matrix.js`: finalize flow now includes comparison artifact when publishing and baseline comparison are both requested. - `crates/editor/src/playback.rs`: prefetch and direct decode now use separate generation-aware in-flight sets, with combined checks in frame wait path. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports and gates missing candidate rows relative to baseline coverage. +- `scripts/finalize-playback-matrix.js`: comparison now runs before publish in combined workflows and forwards missing-candidate override to compare step. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 30c1e4c0b3..51f937d307 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -120,6 +120,9 @@ pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-di # include baseline comparison gate during finalization pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5 + +# optional: allow missing candidate rows during compare gate +pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-missing-candidate ``` Finalize and publish to benchmark history in one command: diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index 30ebdf5792..130fd6bb34 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -18,6 +18,7 @@ function parseArgs(argv) { allowFpsDrop: 2, allowStartupIncreaseMs: 25, allowScrubP95IncreaseMs: 5, + allowMissingCandidate: false, }; for (let i = 2; i < argv.length; i++) { @@ -111,6 +112,10 @@ function parseArgs(argv) { options.allowScrubP95IncreaseMs = value; continue; } + if (arg === "--allow-missing-candidate") { + options.allowMissingCandidate = true; + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -118,7 +123,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--compare-baseline ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--publish-target ] + console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--compare-baseline ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--publish-target ] Generates aggregate markdown, status markdown, validation JSON, and bottleneck analysis for collected playback matrix outputs. Optionally compares candidate inputs against baseline inputs and fails on regressions.`); } @@ -207,26 +212,6 @@ function main() { ); run("node", analyzeArgs); } - if (options.publishTarget) { - const publishArgs = [ - "scripts/publish-playback-matrix-summary.js", - "--aggregate-md", - aggregatePath, - "--status-md", - statusPath, - "--validation-json", - validationPath, - "--target", - options.publishTarget, - ]; - if (options.analyze) { - publishArgs.push("--bottlenecks-md", bottleneckPath); - } - if (options.compareBaselineInputs.length > 0) { - publishArgs.push("--comparison-md", comparisonPath); - } - run("node", publishArgs); - } if (options.compareBaselineInputs.length > 0) { const compareArgs = ["scripts/compare-playback-benchmark-runs.js"]; for (const baselineInput of options.compareBaselineInputs) { @@ -245,8 +230,31 @@ function main() { "--allow-scrub-p95-increase-ms", String(options.allowScrubP95IncreaseMs), ); + if (options.allowMissingCandidate) { + compareArgs.push("--allow-missing-candidate"); + } run("node", compareArgs); } + if (options.publishTarget) { + const publishArgs = [ + "scripts/publish-playback-matrix-summary.js", + "--aggregate-md", + aggregatePath, + "--status-md", + statusPath, + "--validation-json", + validationPath, + "--target", + options.publishTarget, + ]; + if (options.analyze) { + publishArgs.push("--bottlenecks-md", bottleneckPath); + } + if (options.compareBaselineInputs.length > 0) { + publishArgs.push("--comparison-md", comparisonPath); + } + run("node", publishArgs); + } console.log(`Aggregate markdown: ${aggregatePath}`); console.log(`Status markdown: ${statusPath}`); From e6a5548de9ad5bb3335678c17dc263c665bddc34 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:22:18 +0000 Subject: [PATCH 217/333] improve: emit structured comparison artifacts for matrix gating --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 11 ++++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 3 ++ scripts/compare-playback-benchmark-runs.js | 46 +++++++++++++++++++++- scripts/finalize-playback-matrix.js | 7 ++++ 5 files changed, 67 insertions(+), 1 deletion(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index bc03e8fd90..8f533ecdfe 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -94,6 +94,7 @@ node scripts/analyze-playback-matrix-bottlenecks.js --input /path/to/json-result node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-comparison.md --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5 node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results-a --baseline /path/to/baseline-results-b --candidate /path/to/candidate-results-a --candidate /path/to/candidate-results-b --output /tmp/playback-comparison.md node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --allow-missing-candidate +node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-comparison.md --output-json /tmp/playback-comparison.json ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 7589f350ae..4aa9451bc6 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -279,6 +279,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Prevents publish step from referencing missing comparison artifact files. - Added finalize passthrough support for `--allow-missing-candidate`. +27. **Added structured JSON output for comparison gating (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` now supports `--output-json`. + - Emits comparison summary/regression/missing-coverage details for automation. + - `scripts/finalize-playback-matrix.js` now writes comparison markdown and JSON artifacts during baseline comparison runs. + --- ## Root Cause Analysis Archive @@ -393,11 +398,13 @@ Decoder Pipeline: 27. Split prefetch and direct decode in-flight tracking and guarded direct decode frame usage when seek updates are pending. 28. Added missing-candidate-row coverage gating in baseline-vs-candidate comparison script with optional override flag. 29. Fixed finalize compare/publish ordering so comparison artifacts exist before publish attachment and added finalize support for missing-candidate override. +30. Added structured JSON output for baseline-vs-candidate comparison script and wired finalize comparison runs to emit comparison JSON artifacts. 26. Made shared in-flight frame tracking generation-aware to prevent cross-seek marker collisions. 27. Added comparison artifact attachment support in publish/finalize matrix summary workflows. 28. Split prefetch and direct decode in-flight tracking to avoid cross-path marker interference. 29. Added missing-candidate-row gating to baseline-vs-candidate comparison workflow. 30. Fixed finalize compare/publish ordering and propagated missing-candidate override into finalize compare flow. +31. Added structured JSON artifact emission for baseline-vs-candidate comparison workflows. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -423,6 +430,8 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. +- `scripts/compare-playback-benchmark-runs.js`: added structured comparison JSON output with pass/fail summary and regression detail payload. +- `scripts/finalize-playback-matrix.js`: baseline comparison flow now writes both `playback-comparison.md` and `playback-comparison.json`. - `crates/editor/src/playback.rs`: added seek-generation tagging for prefetched frames so stale in-flight decode results are ignored after seek generation advances. - `crates/editor/src/playback.rs`: seek handling now clears prefetched frame buffer on generation changes to guarantee stale buffered frames are discarded immediately. - `crates/editor/src/playback.rs`: in-flight prefetch wait path now only buffers frames at or ahead of current frame to reduce stale buffer accumulation. @@ -434,6 +443,8 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: prefetch and direct decode now use separate generation-aware in-flight sets, with combined checks in frame wait path. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports and gates missing candidate rows relative to baseline coverage. - `scripts/finalize-playback-matrix.js`: comparison now runs before publish in combined workflows and forwards missing-candidate override to compare step. +- `scripts/compare-playback-benchmark-runs.js`: comparison now supports optional structured JSON output for downstream automation. +- `scripts/finalize-playback-matrix.js`: baseline comparison in finalize now writes both markdown and JSON comparison artifacts. **Results**: - ✅ `cargo +stable check -p cap-editor` passes after changes. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 51f937d307..d88f7e982b 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -164,6 +164,9 @@ pnpm bench:playback:compare -- --baseline /path/to/baseline-a --baseline /path/t # optional: allow missing candidate rows while still checking metric regressions pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --allow-missing-candidate + +# emit structured JSON alongside markdown for automation +pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-matrix-final/playback-comparison.md --output-json /tmp/playback-matrix-final/playback-comparison.json ``` ## Evidence checklist diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index fc1c101dd1..2f76545b6a 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -8,6 +8,7 @@ function parseArgs(argv) { baselineInputs: [], candidateInputs: [], output: null, + outputJson: null, allowFpsDrop: 2, allowStartupIncreaseMs: 25, allowScrubP95IncreaseMs: 5, @@ -37,6 +38,10 @@ function parseArgs(argv) { options.output = path.resolve(argv[++i] ?? ""); continue; } + if (arg === "--output-json") { + options.outputJson = path.resolve(argv[++i] ?? ""); + continue; + } if (arg === "--allow-fps-drop") { const value = Number.parseFloat(argv[++i] ?? ""); if (!Number.isFinite(value) || value < 0) { @@ -72,7 +77,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline [--baseline ...] --candidate [--candidate ...] [--output ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] + console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline [--baseline ...] --candidate [--candidate ...] [--output ] [--output-json ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] Compares baseline and candidate playback matrix JSON outputs and flags regressions. Multiple --baseline and --candidate inputs are supported.`); } @@ -261,6 +266,32 @@ function toMarkdown(comparisons, missingCandidateRows, options) { return md; } +function buildJsonOutput(comparisons, missingCandidateRows, options) { + const regressions = comparisons.filter( + (entry) => entry.regressions.length > 0, + ); + return { + generatedAt: new Date().toISOString(), + tolerance: { + allowFpsDrop: options.allowFpsDrop, + allowStartupIncreaseMs: options.allowStartupIncreaseMs, + allowScrubP95IncreaseMs: options.allowScrubP95IncreaseMs, + allowMissingCandidate: options.allowMissingCandidate, + }, + summary: { + comparedRows: comparisons.length, + regressions: regressions.length, + missingCandidateRows: missingCandidateRows.length, + passed: + regressions.length === 0 && + (options.allowMissingCandidate || missingCandidateRows.length === 0), + }, + regressions, + missingCandidateRows, + comparisons, + }; +} + function main() { const options = parseArgs(process.argv); if (options.help) { @@ -294,6 +325,11 @@ function main() { candidateRows, ); const markdown = toMarkdown(comparisons, missingCandidateRows, options); + const outputJson = buildJsonOutput( + comparisons, + missingCandidateRows, + options, + ); if (options.output) { fs.writeFileSync(options.output, markdown, "utf8"); @@ -301,6 +337,14 @@ function main() { } else { process.stdout.write(markdown); } + if (options.outputJson) { + fs.writeFileSync( + options.outputJson, + JSON.stringify(outputJson, null, 2), + "utf8", + ); + console.log(`Wrote comparison JSON to ${options.outputJson}`); + } if ( comparisons.some((entry) => entry.regressions.length > 0) || diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index 130fd6bb34..4e3699b401 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -169,6 +169,10 @@ function main() { "playback-bottlenecks.json", ); const comparisonPath = path.join(options.outputDir, "playback-comparison.md"); + const comparisonJsonPath = path.join( + options.outputDir, + "playback-comparison.json", + ); const aggregateArgs = ["scripts/aggregate-playback-benchmarks.js"]; const statusArgs = ["scripts/build-playback-matrix-report.js"]; @@ -223,6 +227,8 @@ function main() { compareArgs.push( "--output", comparisonPath, + "--output-json", + comparisonJsonPath, "--allow-fps-drop", String(options.allowFpsDrop), "--allow-startup-increase-ms", @@ -268,6 +274,7 @@ function main() { } if (options.compareBaselineInputs.length > 0) { console.log(`Comparison report: ${comparisonPath}`); + console.log(`Comparison JSON: ${comparisonJsonPath}`); } } From 6924c3832296dfeb930551d10cc48a739babefed Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:26:13 +0000 Subject: [PATCH 218/333] improve: use keyed prefetch buffer for faster frame lookup --- crates/editor/PLAYBACK-FINDINGS.md | 7 +++ crates/editor/src/playback.rs | 90 ++++++++++++++++-------------- 2 files changed, 55 insertions(+), 42 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 4aa9451bc6..54c5ca97d5 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -284,6 +284,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Emits comparison summary/regression/missing-coverage details for automation. - `scripts/finalize-playback-matrix.js` now writes comparison markdown and JSON artifacts during baseline comparison runs. +28. **Switched playback prefetch buffer to keyed map storage (2026-02-13)** + - Playback prefetch buffer now uses `BTreeMap` keyed by frame number. + - Removes repeated linear scans over deque entries for target frame lookup in hot playback path. + - Retains bounded buffer behavior with deterministic far-ahead/oldest eviction. + --- ## Root Cause Analysis Archive @@ -405,6 +410,7 @@ Decoder Pipeline: 29. Added missing-candidate-row gating to baseline-vs-candidate comparison workflow. 30. Fixed finalize compare/publish ordering and propagated missing-candidate override into finalize compare flow. 31. Added structured JSON artifact emission for baseline-vs-candidate comparison workflows. +32. Replaced playback prefetch deque scans with keyed `BTreeMap` buffering for lower lookup overhead in frame acquisition path. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -432,6 +438,7 @@ Decoder Pipeline: - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. - `scripts/compare-playback-benchmark-runs.js`: added structured comparison JSON output with pass/fail summary and regression detail payload. - `scripts/finalize-playback-matrix.js`: baseline comparison flow now writes both `playback-comparison.md` and `playback-comparison.json`. +- `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. - `crates/editor/src/playback.rs`: added seek-generation tagging for prefetched frames so stale in-flight decode results are ignored after seek generation advances. - `crates/editor/src/playback.rs`: seek handling now clears prefetched frame buffer on generation changes to guarantee stale buffered frames are discarded immediately. - `crates/editor/src/playback.rs`: in-flight prefetch wait path now only buffers frames at or ahead of current frame to reduce stale buffer accumulation. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 1071231185..0ce521b222 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -16,7 +16,7 @@ use cpal::{ use futures::stream::{FuturesUnordered, StreamExt}; use lru::LruCache; use std::{ - collections::{HashSet, VecDeque}, + collections::{BTreeMap, HashSet}, num::NonZeroUsize, sync::{ Arc, RwLock, @@ -101,6 +101,39 @@ impl FrameCache { } } +fn trim_prefetch_buffer(buffer: &mut BTreeMap, current_frame: u32) { + while buffer.len() > PREFETCH_BUFFER_SIZE { + let far_ahead_frame = buffer + .iter() + .rev() + .find(|(frame, _)| **frame > current_frame + PREFETCH_BUFFER_SIZE as u32) + .map(|(frame, _)| *frame); + + if let Some(frame) = far_ahead_frame { + buffer.remove(&frame); + continue; + } + + let Some(oldest_frame) = buffer.keys().next().copied() else { + break; + }; + buffer.remove(&oldest_frame); + } +} + +fn insert_prefetched_frame( + buffer: &mut BTreeMap, + prefetched: PrefetchedFrame, + current_frame: u32, +) { + if prefetched.frame_number < current_frame { + return; + } + + buffer.entry(prefetched.frame_number).or_insert(prefetched); + trim_prefetch_buffer(buffer, current_frame); +} + impl Playback { pub async fn start( mut self, @@ -436,8 +469,7 @@ impl Playback { .max(Duration::from_millis(1)) .min(Duration::from_millis(4)); let mut frame_number = self.start_frame_number; - let mut prefetch_buffer: VecDeque = - VecDeque::with_capacity(PREFETCH_BUFFER_SIZE); + let mut prefetch_buffer: BTreeMap = BTreeMap::new(); let mut frame_cache = FrameCache::new(FRAME_CACHE_SIZE); let mut seek_generation = 0u64; let base_skip_threshold = (fps / 6).clamp(6, 16); @@ -486,8 +518,8 @@ impl Playback { tokio::select! { Some(prefetched) = prefetch_rx.recv() => { - if prefetched.generation == seek_generation && prefetched.frame_number >= frame_number { - prefetch_buffer.push_back(prefetched); + if prefetched.generation == seek_generation { + insert_prefetched_frame(&mut prefetch_buffer, prefetched, frame_number); if first_frame_time.is_none() { first_frame_time = Some(Instant::now()); } @@ -503,10 +535,6 @@ impl Playback { } } - prefetch_buffer - .make_contiguous() - .sort_by_key(|p| p.frame_number); - let mut playback_anchor_start = Instant::now(); let mut playback_anchor_frame = frame_number; let mut cached_project = self.project.borrow().clone(); @@ -537,25 +565,8 @@ impl Playback { cached_project = self.project.borrow_and_update().clone(); } while let Ok(prefetched) = prefetch_rx.try_recv() { - if prefetched.generation == seek_generation - && prefetched.frame_number >= frame_number - { - prefetch_buffer.push_back(prefetched); - while prefetch_buffer.len() > PREFETCH_BUFFER_SIZE { - if let Some(idx) = prefetch_buffer - .iter() - .enumerate() - .filter(|(_, p)| { - p.frame_number > frame_number + PREFETCH_BUFFER_SIZE as u32 - }) - .max_by_key(|(_, p)| p.frame_number) - .map(|(i, _)| i) - { - prefetch_buffer.remove(idx); - } else { - prefetch_buffer.pop_front(); - } - } + if prefetched.generation == seek_generation { + insert_prefetched_frame(&mut prefetch_buffer, prefetched, frame_number); } } @@ -603,12 +614,7 @@ impl Playback { was_cached = true; Some(cached) } else { - let prefetched_idx = prefetch_buffer - .iter() - .position(|p| p.frame_number == frame_number); - - if let Some(idx) = prefetched_idx { - let prefetched = prefetch_buffer.remove(idx).unwrap(); + if let Some(prefetched) = prefetch_buffer.remove(&frame_number) { Some(( Arc::new(prefetched.segment_frames), prefetched.segment_index, @@ -640,7 +646,7 @@ impl Playback { found_frame = Some(prefetched); break; } else if prefetched.frame_number >= frame_number { - prefetch_buffer.push_back(prefetched); + insert_prefetched_frame(&mut prefetch_buffer, prefetched, frame_number); } } _ = tokio::time::sleep(in_flight_poll_interval) => { @@ -665,11 +671,7 @@ impl Playback { prefetched.segment_index, )) } else { - let prefetched_idx = prefetch_buffer - .iter() - .position(|p| p.frame_number == frame_number); - if let Some(idx) = prefetched_idx { - let prefetched = prefetch_buffer.remove(idx).unwrap(); + if let Some(prefetched) = prefetch_buffer.remove(&frame_number) { Some(( Arc::new(prefetched.segment_frames), prefetched.segment_index, @@ -698,7 +700,11 @@ impl Playback { prefetched.segment_index, )) } else { - prefetch_buffer.push_back(prefetched); + insert_prefetched_frame( + &mut prefetch_buffer, + prefetched, + frame_number, + ); frame_number = frame_number.saturating_add(1); total_frames_skipped += 1; continue; @@ -871,7 +877,7 @@ impl Playback { total_frames_skipped += skipped as u64; skip_events = skip_events.saturating_add(1); - prefetch_buffer.retain(|p| p.frame_number >= frame_number); + prefetch_buffer.retain(|frame, _| *frame >= frame_number); let _ = frame_request_tx.send(frame_number); let _ = playback_position_tx.send(frame_number); if has_audio From 5498a511e80c028f5b6628aa69adf83eeea921e7 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:27:12 +0000 Subject: [PATCH 219/333] improve: prune stale prefetched frames using ordered map --- crates/editor/PLAYBACK-FINDINGS.md | 7 +++++++ crates/editor/src/playback.rs | 13 +++++++++++++ 2 files changed, 20 insertions(+) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 54c5ca97d5..0fb3dfd712 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -289,6 +289,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Removes repeated linear scans over deque entries for target frame lookup in hot playback path. - Retains bounded buffer behavior with deterministic far-ahead/oldest eviction. +29. **Added sorted prefetch stale-frame pruning (2026-02-13)** + - Playback loop now prunes prefetched frames older than current playhead from the keyed buffer. + - Uses ordered map operations to remove outdated frames efficiently. + - Reduces stale-buffer buildup during frame skips and sustained catch-up scenarios. + --- ## Root Cause Analysis Archive @@ -411,6 +416,7 @@ Decoder Pipeline: 30. Fixed finalize compare/publish ordering and propagated missing-candidate override into finalize compare flow. 31. Added structured JSON artifact emission for baseline-vs-candidate comparison workflows. 32. Replaced playback prefetch deque scans with keyed `BTreeMap` buffering for lower lookup overhead in frame acquisition path. +33. Added ordered stale-frame pruning in keyed prefetch buffer to keep playback buffer aligned with current playhead. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -439,6 +445,7 @@ Decoder Pipeline: - `scripts/compare-playback-benchmark-runs.js`: added structured comparison JSON output with pass/fail summary and regression detail payload. - `scripts/finalize-playback-matrix.js`: baseline comparison flow now writes both `playback-comparison.md` and `playback-comparison.json`. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. +- `crates/editor/src/playback.rs`: added ordered pruning of stale prefetched frames below current playhead to reduce stale buffer overhead during catch-up. - `crates/editor/src/playback.rs`: added seek-generation tagging for prefetched frames so stale in-flight decode results are ignored after seek generation advances. - `crates/editor/src/playback.rs`: seek handling now clears prefetched frame buffer on generation changes to guarantee stale buffered frames are discarded immediately. - `crates/editor/src/playback.rs`: in-flight prefetch wait path now only buffers frames at or ahead of current frame to reduce stale buffer accumulation. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 0ce521b222..28de5205df 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -134,6 +134,18 @@ fn insert_prefetched_frame( trim_prefetch_buffer(buffer, current_frame); } +fn prune_prefetch_buffer_before_frame( + buffer: &mut BTreeMap, + current_frame: u32, +) { + while let Some((frame, _)) = buffer.first_key_value() { + if *frame >= current_frame { + break; + } + buffer.pop_first(); + } +} + impl Playback { pub async fn start( mut self, @@ -569,6 +581,7 @@ impl Playback { insert_prefetched_frame(&mut prefetch_buffer, prefetched, frame_number); } } + prune_prefetch_buffer_before_frame(&mut prefetch_buffer, frame_number); let frame_offset = frame_number.saturating_sub(playback_anchor_frame) as f64; let next_deadline = playback_anchor_start + frame_duration.mul_f64(frame_offset); From 75add0bf66a93c2dc440f7e3b54ea24f41c718f4 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:28:31 +0000 Subject: [PATCH 220/333] improve: include comparison gate status in published matrix summaries --- crates/editor/PLAYBACK-BENCHMARKS.md | 2 +- crates/editor/PLAYBACK-FINDINGS.md | 8 ++++++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 3 ++- scripts/finalize-playback-matrix.js | 7 ++++++- scripts/publish-playback-matrix-summary.js | 22 +++++++++++++++++++++- 5 files changed, 38 insertions(+), 4 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 8f533ecdfe..2ff8e37b63 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -84,7 +84,7 @@ node scripts/finalize-playback-matrix.js --input /path/to/json-results --output- # Publish matrix artifacts into this benchmark history node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --bottlenecks-md /tmp/playback-matrix-final/playback-bottlenecks.md -node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --comparison-md /tmp/playback-matrix-final/playback-comparison.md +node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --comparison-md /tmp/playback-matrix-final/playback-comparison.md --comparison-json /tmp/playback-matrix-final/playback-comparison.json # Analyze bottlenecks from matrix results node scripts/analyze-playback-matrix-bottlenecks.js --input /path/to/json-results --output /tmp/playback-bottlenecks.md --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 0fb3dfd712..a72a384855 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -294,6 +294,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Uses ordered map operations to remove outdated frames efficiently. - Reduces stale-buffer buildup during frame skips and sustained catch-up scenarios. +30. **Published comparison gate status in matrix summaries (2026-02-13)** + - `scripts/publish-playback-matrix-summary.js` now accepts optional `--comparison-json`. + - Published matrix summary now includes comparison gate pass/fail, regression count, and missing-candidate-row count when comparison JSON is provided. + - `scripts/finalize-playback-matrix.js` now forwards both comparison markdown and comparison JSON to publish flow. + --- ## Root Cause Analysis Archive @@ -417,6 +422,7 @@ Decoder Pipeline: 31. Added structured JSON artifact emission for baseline-vs-candidate comparison workflows. 32. Replaced playback prefetch deque scans with keyed `BTreeMap` buffering for lower lookup overhead in frame acquisition path. 33. Added ordered stale-frame pruning in keyed prefetch buffer to keep playback buffer aligned with current playhead. +34. Added comparison gate status fields to published matrix summary entries via comparison JSON attachment. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -446,6 +452,8 @@ Decoder Pipeline: - `scripts/finalize-playback-matrix.js`: baseline comparison flow now writes both `playback-comparison.md` and `playback-comparison.json`. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. - `crates/editor/src/playback.rs`: added ordered pruning of stale prefetched frames below current playhead to reduce stale buffer overhead during catch-up. +- `scripts/publish-playback-matrix-summary.js`: publish flow now surfaces comparison gate status/summary metrics when comparison JSON is provided. +- `scripts/finalize-playback-matrix.js`: finalize publish pass now forwards both comparison markdown and comparison JSON artifacts. - `crates/editor/src/playback.rs`: added seek-generation tagging for prefetched frames so stale in-flight decode results are ignored after seek generation advances. - `crates/editor/src/playback.rs`: seek handling now clears prefetched frame buffer on generation changes to guarantee stale buffered frames are discarded immediately. - `crates/editor/src/playback.rs`: in-flight prefetch wait path now only buffers frames at or ahead of current frame to reduce stale buffer accumulation. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index d88f7e982b..2a4896fbeb 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -144,7 +144,8 @@ pnpm bench:playback:publish -- \ --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md \ --status-md /tmp/playback-matrix-final/playback-matrix-status.md \ --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json \ - --comparison-md /tmp/playback-matrix-final/playback-comparison.md + --comparison-md /tmp/playback-matrix-final/playback-comparison.md \ + --comparison-json /tmp/playback-matrix-final/playback-comparison.json ``` Generate bottleneck analysis for optimization backlog: diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index 4e3699b401..45f9a5838a 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -257,7 +257,12 @@ function main() { publishArgs.push("--bottlenecks-md", bottleneckPath); } if (options.compareBaselineInputs.length > 0) { - publishArgs.push("--comparison-md", comparisonPath); + publishArgs.push( + "--comparison-md", + comparisonPath, + "--comparison-json", + comparisonJsonPath, + ); } run("node", publishArgs); } diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js index fab62486f9..ac7adfbbe3 100644 --- a/scripts/publish-playback-matrix-summary.js +++ b/scripts/publish-playback-matrix-summary.js @@ -9,6 +9,7 @@ function parseArgs(argv) { statusMd: null, bottlenecksMd: null, comparisonMd: null, + comparisonJson: null, validationJson: null, target: path.resolve("crates/editor/PLAYBACK-BENCHMARKS.md"), }; @@ -36,6 +37,10 @@ function parseArgs(argv) { options.comparisonMd = path.resolve(argv[++i] ?? ""); continue; } + if (arg === "--comparison-json") { + options.comparisonJson = path.resolve(argv[++i] ?? ""); + continue; + } if (arg === "--validation-json") { options.validationJson = path.resolve(argv[++i] ?? ""); continue; @@ -51,7 +56,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/publish-playback-matrix-summary.js --aggregate-md --status-md --validation-json [--bottlenecks-md ] [--comparison-md ] [--target ] + console.log(`Usage: node scripts/publish-playback-matrix-summary.js --aggregate-md --status-md --validation-json [--bottlenecks-md ] [--comparison-md ] [--comparison-json ] [--target ] Prepends a matrix summary section into PLAYBACK-BENCHMARKS.md benchmark history region.`); } @@ -68,6 +73,7 @@ function buildSummarySection( validationJson, bottlenecksMd, comparisonMd, + comparisonJson, ) { const now = new Date().toISOString(); const validation = JSON.parse(validationJson); @@ -80,6 +86,13 @@ function buildSummarySection( markdown += `- Observed cells: ${validation.observedCells}\n`; markdown += `- Missing cells: ${validation.missingCells?.length ?? 0}\n`; markdown += `- Format failures: ${validation.formatFailures?.length ?? 0}\n\n`; + if (comparisonJson) { + const comparison = JSON.parse(comparisonJson); + const comparisonPassed = comparison.summary?.passed === true; + markdown += `- Comparison gate: ${comparisonPassed ? "✅ PASS" : "❌ FAIL"}\n`; + markdown += `- Comparison regressions: ${comparison.summary?.regressions ?? "n/a"}\n`; + markdown += `- Missing candidate rows: ${comparison.summary?.missingCandidateRows ?? "n/a"}\n\n`; + } if ((validation.missingCells?.length ?? 0) > 0) { markdown += "**Missing Cells**\n"; @@ -152,6 +165,9 @@ function main() { if (options.comparisonMd) { ensureFile(options.comparisonMd, "Comparison markdown"); } + if (options.comparisonJson) { + ensureFile(options.comparisonJson, "Comparison JSON"); + } ensureFile(options.target, "Target"); const aggregateMd = fs.readFileSync(options.aggregateMd, "utf8"); @@ -163,12 +179,16 @@ function main() { const comparisonMd = options.comparisonMd ? fs.readFileSync(options.comparisonMd, "utf8") : null; + const comparisonJson = options.comparisonJson + ? fs.readFileSync(options.comparisonJson, "utf8") + : null; const summaryMd = buildSummarySection( aggregateMd, statusMd, validationJson, bottlenecksMd, comparisonMd, + comparisonJson, ); writeToBenchmarkHistory(options.target, summaryMd); console.log(`Published matrix summary into ${options.target}`); From b13aa6c138f9b7491342f5ec1a9cc126e0fd79a2 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:31:40 +0000 Subject: [PATCH 221/333] improve: tighten warmup timing and skip pruning in keyed prefetch path --- crates/editor/PLAYBACK-FINDINGS.md | 7 +++++++ crates/editor/src/playback.rs | 4 ++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index a72a384855..7d33c14e19 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -299,6 +299,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Published matrix summary now includes comparison gate pass/fail, regression count, and missing-candidate-row count when comparison JSON is provided. - `scripts/finalize-playback-matrix.js` now forwards both comparison markdown and comparison JSON to publish flow. +31. **Tightened prefetch warmup/skip maintenance with keyed buffer helpers (2026-02-13)** + - Warmup first-frame timer now starts only after at least one eligible prefetched frame is present in the keyed buffer. + - Skip catch-up path now uses ordered stale-frame pruning helper instead of full-map retain filtering. + - Reduces avoidable warmup timing noise and stale-buffer maintenance overhead in high-skip playback paths. + --- ## Root Cause Analysis Archive @@ -423,6 +428,7 @@ Decoder Pipeline: 32. Replaced playback prefetch deque scans with keyed `BTreeMap` buffering for lower lookup overhead in frame acquisition path. 33. Added ordered stale-frame pruning in keyed prefetch buffer to keep playback buffer aligned with current playhead. 34. Added comparison gate status fields to published matrix summary entries via comparison JSON attachment. +35. Tightened keyed prefetch buffer warmup timing and skip-path pruning behavior using map-aware helper usage in playback loop. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -450,6 +456,7 @@ Decoder Pipeline: - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. - `scripts/compare-playback-benchmark-runs.js`: added structured comparison JSON output with pass/fail summary and regression detail payload. - `scripts/finalize-playback-matrix.js`: baseline comparison flow now writes both `playback-comparison.md` and `playback-comparison.json`. +- `crates/editor/src/playback.rs`: warmup first-frame timing now only starts after eligible prefetched frame insertion, and skip catch-up now reuses ordered stale-prune helper. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. - `crates/editor/src/playback.rs`: added ordered pruning of stale prefetched frames below current playhead to reduce stale buffer overhead during catch-up. - `scripts/publish-playback-matrix-summary.js`: publish flow now surfaces comparison gate status/summary metrics when comparison JSON is provided. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 28de5205df..ae5d1cb128 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -532,7 +532,7 @@ impl Playback { Some(prefetched) = prefetch_rx.recv() => { if prefetched.generation == seek_generation { insert_prefetched_frame(&mut prefetch_buffer, prefetched, frame_number); - if first_frame_time.is_none() { + if first_frame_time.is_none() && !prefetch_buffer.is_empty() { first_frame_time = Some(Instant::now()); } } @@ -890,7 +890,7 @@ impl Playback { total_frames_skipped += skipped as u64; skip_events = skip_events.saturating_add(1); - prefetch_buffer.retain(|frame, _| *frame >= frame_number); + prune_prefetch_buffer_before_frame(&mut prefetch_buffer, frame_number); let _ = frame_request_tx.send(frame_number); let _ = playback_position_tx.send(frame_number); if has_audio From 56557c2bf4d8ae821c4c3a963ef58ed93659e0ec Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:33:37 +0000 Subject: [PATCH 222/333] improve: report candidate-only coverage in benchmark comparisons --- crates/editor/PLAYBACK-BENCHMARKS.md | 2 + crates/editor/PLAYBACK-FINDINGS.md | 7 +++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 2 + scripts/compare-playback-benchmark-runs.js | 53 ++++++++++++++++++---- 4 files changed, 55 insertions(+), 9 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 2ff8e37b63..accad50330 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -95,6 +95,8 @@ node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-res node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results-a --baseline /path/to/baseline-results-b --candidate /path/to/candidate-results-a --candidate /path/to/candidate-results-b --output /tmp/playback-comparison.md node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --allow-missing-candidate node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-comparison.md --output-json /tmp/playback-comparison.json + +Comparison output reports both baseline rows missing in candidate and candidate-only rows not present in baseline. ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 7d33c14e19..264b553181 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -304,6 +304,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Skip catch-up path now uses ordered stale-frame pruning helper instead of full-map retain filtering. - Reduces avoidable warmup timing noise and stale-buffer maintenance overhead in high-skip playback paths. +32. **Expanded comparison outputs with candidate-only coverage visibility (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` now reports candidate-only rows that do not exist in baseline. + - Markdown and JSON comparison outputs now include both missing-candidate and candidate-only coverage summaries. + - Improves matrix diff diagnostics when test coverage differs between baseline and candidate runs. + --- ## Root Cause Analysis Archive @@ -429,6 +434,7 @@ Decoder Pipeline: 33. Added ordered stale-frame pruning in keyed prefetch buffer to keep playback buffer aligned with current playhead. 34. Added comparison gate status fields to published matrix summary entries via comparison JSON attachment. 35. Tightened keyed prefetch buffer warmup timing and skip-path pruning behavior using map-aware helper usage in playback loop. +36. Expanded baseline-vs-candidate comparison outputs with candidate-only row reporting. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -455,6 +461,7 @@ Decoder Pipeline: - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. - `scripts/compare-playback-benchmark-runs.js`: added structured comparison JSON output with pass/fail summary and regression detail payload. +- `scripts/compare-playback-benchmark-runs.js`: comparison outputs now include candidate-only rows in addition to missing-candidate coverage deltas. - `scripts/finalize-playback-matrix.js`: baseline comparison flow now writes both `playback-comparison.md` and `playback-comparison.json`. - `crates/editor/src/playback.rs`: warmup first-frame timing now only starts after eligible prefetched frame insertion, and skip catch-up now reuses ordered stale-prune helper. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 2a4896fbeb..b3b4184234 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -168,6 +168,8 @@ pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate # emit structured JSON alongside markdown for automation pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-matrix-final/playback-comparison.md --output-json /tmp/playback-matrix-final/playback-comparison.json + +# compare output now includes both missing-candidate rows and candidate-only rows ``` ## Evidence checklist diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index 2f76545b6a..d3c634b3bc 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -185,6 +185,7 @@ function formatNumber(value, digits = 2) { function compareMetrics(baselineRows, candidateRows) { const comparisons = []; const missingCandidateRows = []; + const candidateOnlyRows = []; for (const [key, baseline] of baselineRows) { const candidate = candidateRows.get(key); @@ -201,7 +202,16 @@ function compareMetrics(baselineRows, candidateRows) { for (const [key, candidate] of candidateRows) { const baseline = baselineRows.get(key); - if (!baseline) continue; + if (!baseline) { + candidateOnlyRows.push({ + platform: candidate.platform, + gpu: candidate.gpu, + scenario: candidate.scenario, + recording: candidate.recording, + format: candidate.format, + }); + continue; + } const fpsDelta = delta(candidate.fpsMin, baseline.fpsMin); const startupDelta = delta(candidate.startupAvg, baseline.startupAvg); @@ -235,10 +245,15 @@ function compareMetrics(baselineRows, candidateRows) { } comparisons.sort((a, b) => b.regressions.length - a.regressions.length); - return { comparisons, missingCandidateRows }; + return { comparisons, missingCandidateRows, candidateOnlyRows }; } -function toMarkdown(comparisons, missingCandidateRows, options) { +function toMarkdown( + comparisons, + missingCandidateRows, + candidateOnlyRows, + options, +) { const regressions = comparisons.filter( (entry) => entry.regressions.length > 0, ); @@ -246,7 +261,7 @@ function toMarkdown(comparisons, missingCandidateRows, options) { md += "# Playback Benchmark Comparison\n\n"; md += `Generated: ${new Date().toISOString()}\n\n`; md += `Tolerance: fps_drop<=${options.allowFpsDrop}, startup_increase<=${options.allowStartupIncreaseMs}ms, scrub_p95_increase<=${options.allowScrubP95IncreaseMs}ms\n\n`; - md += `Compared rows: ${comparisons.length}, regressions: ${regressions.length}, missing candidate rows: ${missingCandidateRows.length}\n\n`; + md += `Compared rows: ${comparisons.length}, regressions: ${regressions.length}, missing candidate rows: ${missingCandidateRows.length}, candidate-only rows: ${candidateOnlyRows.length}\n\n`; if (missingCandidateRows.length > 0) { md += "## Missing Candidate Rows\n\n"; md += "| Platform | GPU | Scenario | Recording | Format |\n"; @@ -256,6 +271,15 @@ function toMarkdown(comparisons, missingCandidateRows, options) { } md += "\n"; } + if (candidateOnlyRows.length > 0) { + md += "## Candidate-Only Rows\n\n"; + md += "| Platform | GPU | Scenario | Recording | Format |\n"; + md += "|---|---|---|---|---|\n"; + for (const row of candidateOnlyRows) { + md += `| ${row.platform} | ${row.gpu} | ${row.scenario} | ${row.recording} | ${row.format} |\n`; + } + md += "\n"; + } md += "| Platform | GPU | Scenario | Recording | Format | FPS Δ | Startup Δ (ms) | Scrub p95 Δ (ms) | Regression |\n"; md += "|---|---|---|---|---|---:|---:|---:|---|\n"; @@ -266,7 +290,12 @@ function toMarkdown(comparisons, missingCandidateRows, options) { return md; } -function buildJsonOutput(comparisons, missingCandidateRows, options) { +function buildJsonOutput( + comparisons, + missingCandidateRows, + candidateOnlyRows, + options, +) { const regressions = comparisons.filter( (entry) => entry.regressions.length > 0, ); @@ -282,12 +311,14 @@ function buildJsonOutput(comparisons, missingCandidateRows, options) { comparedRows: comparisons.length, regressions: regressions.length, missingCandidateRows: missingCandidateRows.length, + candidateOnlyRows: candidateOnlyRows.length, passed: regressions.length === 0 && (options.allowMissingCandidate || missingCandidateRows.length === 0), }, regressions, missingCandidateRows, + candidateOnlyRows, comparisons, }; } @@ -320,14 +351,18 @@ function main() { const baselineRows = collectMetrics(baselineFiles); const candidateRows = collectMetrics(candidateFiles); - const { comparisons, missingCandidateRows } = compareMetrics( - baselineRows, - candidateRows, + const { comparisons, missingCandidateRows, candidateOnlyRows } = + compareMetrics(baselineRows, candidateRows); + const markdown = toMarkdown( + comparisons, + missingCandidateRows, + candidateOnlyRows, + options, ); - const markdown = toMarkdown(comparisons, missingCandidateRows, options); const outputJson = buildJsonOutput( comparisons, missingCandidateRows, + candidateOnlyRows, options, ); From 1cd997c0e66c42007450af7c970505ec169fc237 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:34:25 +0000 Subject: [PATCH 223/333] improve: include candidate-only count in published comparison summary --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ scripts/publish-playback-matrix-summary.js | 1 + 2 files changed, 7 insertions(+) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 264b553181..9a6facdfc7 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -309,6 +309,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Markdown and JSON comparison outputs now include both missing-candidate and candidate-only coverage summaries. - Improves matrix diff diagnostics when test coverage differs between baseline and candidate runs. +33. **Extended published comparison summary fields (2026-02-13)** + - `scripts/publish-playback-matrix-summary.js` now includes candidate-only row count from comparison JSON in published matrix summary bullets. + - Keeps published matrix evidence aligned with expanded comparison coverage diagnostics. + --- ## Root Cause Analysis Archive @@ -435,6 +439,7 @@ Decoder Pipeline: 34. Added comparison gate status fields to published matrix summary entries via comparison JSON attachment. 35. Tightened keyed prefetch buffer warmup timing and skip-path pruning behavior using map-aware helper usage in playback loop. 36. Expanded baseline-vs-candidate comparison outputs with candidate-only row reporting. +37. Added candidate-only row count reporting in published matrix summary comparison status bullets. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -463,6 +468,7 @@ Decoder Pipeline: - `scripts/compare-playback-benchmark-runs.js`: added structured comparison JSON output with pass/fail summary and regression detail payload. - `scripts/compare-playback-benchmark-runs.js`: comparison outputs now include candidate-only rows in addition to missing-candidate coverage deltas. - `scripts/finalize-playback-matrix.js`: baseline comparison flow now writes both `playback-comparison.md` and `playback-comparison.json`. +- `scripts/publish-playback-matrix-summary.js`: published comparison status now includes candidate-only row count from comparison JSON summary. - `crates/editor/src/playback.rs`: warmup first-frame timing now only starts after eligible prefetched frame insertion, and skip catch-up now reuses ordered stale-prune helper. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. - `crates/editor/src/playback.rs`: added ordered pruning of stale prefetched frames below current playhead to reduce stale buffer overhead during catch-up. diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js index ac7adfbbe3..60d0f6a6c4 100644 --- a/scripts/publish-playback-matrix-summary.js +++ b/scripts/publish-playback-matrix-summary.js @@ -92,6 +92,7 @@ function buildSummarySection( markdown += `- Comparison gate: ${comparisonPassed ? "✅ PASS" : "❌ FAIL"}\n`; markdown += `- Comparison regressions: ${comparison.summary?.regressions ?? "n/a"}\n`; markdown += `- Missing candidate rows: ${comparison.summary?.missingCandidateRows ?? "n/a"}\n\n`; + markdown += `- Candidate-only rows: ${comparison.summary?.candidateOnlyRows ?? "n/a"}\n\n`; } if ((validation.missingCells?.length ?? 0) > 0) { From ae1356200fec346ec7ca3e49e6782a71c28651f5 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:37:48 +0000 Subject: [PATCH 224/333] improve: add strict candidate-only gating for matrix comparisons --- crates/editor/PLAYBACK-BENCHMARKS.md | 2 ++ crates/editor/PLAYBACK-FINDINGS.md | 8 ++++++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 5 +++++ scripts/compare-playback-benchmark-runs.js | 15 ++++++++++++--- scripts/finalize-playback-matrix.js | 10 +++++++++- 5 files changed, 36 insertions(+), 4 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index accad50330..f40284d4b8 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -80,6 +80,7 @@ node scripts/finalize-playback-matrix.js --input /path/to/json-results --output- node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5 node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-missing-candidate +node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-candidate-only node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 --publish-target /workspace/crates/editor/PLAYBACK-BENCHMARKS.md # Publish matrix artifacts into this benchmark history @@ -94,6 +95,7 @@ node scripts/analyze-playback-matrix-bottlenecks.js --input /path/to/json-result node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-comparison.md --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5 node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results-a --baseline /path/to/baseline-results-b --candidate /path/to/candidate-results-a --candidate /path/to/candidate-results-b --output /tmp/playback-comparison.md node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --allow-missing-candidate +node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-candidate-only node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-comparison.md --output-json /tmp/playback-comparison.json Comparison output reports both baseline rows missing in candidate and candidate-only rows not present in baseline. diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 9a6facdfc7..57b1dcf58c 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -313,6 +313,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - `scripts/publish-playback-matrix-summary.js` now includes candidate-only row count from comparison JSON in published matrix summary bullets. - Keeps published matrix evidence aligned with expanded comparison coverage diagnostics. +34. **Added strict candidate-only gating option for comparison workflows (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` now supports `--fail-on-candidate-only`. + - When enabled, comparison exits non-zero if candidate contains rows not present in baseline. + - `scripts/finalize-playback-matrix.js` now forwards the same strict option in integrated compare flows. + --- ## Root Cause Analysis Archive @@ -439,6 +444,7 @@ Decoder Pipeline: 34. Added comparison gate status fields to published matrix summary entries via comparison JSON attachment. 35. Tightened keyed prefetch buffer warmup timing and skip-path pruning behavior using map-aware helper usage in playback loop. 36. Expanded baseline-vs-candidate comparison outputs with candidate-only row reporting. +37. Added strict `fail-on-candidate-only` gating option for compare/finalize matrix comparison workflows. 37. Added candidate-only row count reporting in published matrix summary comparison status bullets. **Changes Made**: @@ -468,6 +474,8 @@ Decoder Pipeline: - `scripts/compare-playback-benchmark-runs.js`: added structured comparison JSON output with pass/fail summary and regression detail payload. - `scripts/compare-playback-benchmark-runs.js`: comparison outputs now include candidate-only rows in addition to missing-candidate coverage deltas. - `scripts/finalize-playback-matrix.js`: baseline comparison flow now writes both `playback-comparison.md` and `playback-comparison.json`. +- `scripts/compare-playback-benchmark-runs.js`: added optional strict `--fail-on-candidate-only` coverage gate and surfaced coverage gate mode in comparison markdown output. +- `scripts/finalize-playback-matrix.js`: added passthrough support for strict `--fail-on-candidate-only` compare mode in one-shot finalize workflows. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes candidate-only row count from comparison JSON summary. - `crates/editor/src/playback.rs`: warmup first-frame timing now only starts after eligible prefetched frame insertion, and skip catch-up now reuses ordered stale-prune helper. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index b3b4184234..23e1689ac7 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -123,6 +123,9 @@ pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir # optional: allow missing candidate rows during compare gate pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-missing-candidate + +# optional: fail finalize compare gate when candidate includes rows absent in baseline +pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-candidate-only ``` Finalize and publish to benchmark history in one command: @@ -170,6 +173,8 @@ pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-matrix-final/playback-comparison.md --output-json /tmp/playback-matrix-final/playback-comparison.json # compare output now includes both missing-candidate rows and candidate-only rows +# optional: fail compare gate when candidate includes rows absent in baseline +pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-candidate-only ``` ## Evidence checklist diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index d3c634b3bc..c858e8d396 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -13,6 +13,7 @@ function parseArgs(argv) { allowStartupIncreaseMs: 25, allowScrubP95IncreaseMs: 5, allowMissingCandidate: false, + failOnCandidateOnly: false, }; for (let i = 2; i < argv.length; i++) { @@ -70,6 +71,10 @@ function parseArgs(argv) { options.allowMissingCandidate = true; continue; } + if (arg === "--fail-on-candidate-only") { + options.failOnCandidateOnly = true; + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -77,7 +82,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline [--baseline ...] --candidate [--candidate ...] [--output ] [--output-json ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] + console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline [--baseline ...] --candidate [--candidate ...] [--output ] [--output-json ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] Compares baseline and candidate playback matrix JSON outputs and flags regressions. Multiple --baseline and --candidate inputs are supported.`); } @@ -261,6 +266,7 @@ function toMarkdown( md += "# Playback Benchmark Comparison\n\n"; md += `Generated: ${new Date().toISOString()}\n\n`; md += `Tolerance: fps_drop<=${options.allowFpsDrop}, startup_increase<=${options.allowStartupIncreaseMs}ms, scrub_p95_increase<=${options.allowScrubP95IncreaseMs}ms\n\n`; + md += `Coverage gate: missing_candidate=${options.allowMissingCandidate ? "allow" : "fail"}, candidate_only=${options.failOnCandidateOnly ? "fail" : "allow"}\n\n`; md += `Compared rows: ${comparisons.length}, regressions: ${regressions.length}, missing candidate rows: ${missingCandidateRows.length}, candidate-only rows: ${candidateOnlyRows.length}\n\n`; if (missingCandidateRows.length > 0) { md += "## Missing Candidate Rows\n\n"; @@ -306,6 +312,7 @@ function buildJsonOutput( allowStartupIncreaseMs: options.allowStartupIncreaseMs, allowScrubP95IncreaseMs: options.allowScrubP95IncreaseMs, allowMissingCandidate: options.allowMissingCandidate, + failOnCandidateOnly: options.failOnCandidateOnly, }, summary: { comparedRows: comparisons.length, @@ -314,7 +321,8 @@ function buildJsonOutput( candidateOnlyRows: candidateOnlyRows.length, passed: regressions.length === 0 && - (options.allowMissingCandidate || missingCandidateRows.length === 0), + (options.allowMissingCandidate || missingCandidateRows.length === 0) && + (!options.failOnCandidateOnly || candidateOnlyRows.length === 0), }, regressions, missingCandidateRows, @@ -383,7 +391,8 @@ function main() { if ( comparisons.some((entry) => entry.regressions.length > 0) || - (!options.allowMissingCandidate && missingCandidateRows.length > 0) + (!options.allowMissingCandidate && missingCandidateRows.length > 0) || + (options.failOnCandidateOnly && candidateOnlyRows.length > 0) ) { process.exit(1); } diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index 45f9a5838a..a767916571 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -19,6 +19,7 @@ function parseArgs(argv) { allowStartupIncreaseMs: 25, allowScrubP95IncreaseMs: 5, allowMissingCandidate: false, + failOnCandidateOnly: false, }; for (let i = 2; i < argv.length; i++) { @@ -116,6 +117,10 @@ function parseArgs(argv) { options.allowMissingCandidate = true; continue; } + if (arg === "--fail-on-candidate-only") { + options.failOnCandidateOnly = true; + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -123,7 +128,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--compare-baseline ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--publish-target ] + console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--compare-baseline ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--publish-target ] Generates aggregate markdown, status markdown, validation JSON, and bottleneck analysis for collected playback matrix outputs. Optionally compares candidate inputs against baseline inputs and fails on regressions.`); } @@ -239,6 +244,9 @@ function main() { if (options.allowMissingCandidate) { compareArgs.push("--allow-missing-candidate"); } + if (options.failOnCandidateOnly) { + compareArgs.push("--fail-on-candidate-only"); + } run("node", compareArgs); } if (options.publishTarget) { From dc516ba9388559ec278024bcaed1598ed9c09953 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:39:09 +0000 Subject: [PATCH 225/333] improve: require contiguous prefetch coverage for warmup readiness --- crates/editor/PLAYBACK-FINDINGS.md | 9 ++++++++- crates/editor/src/playback.rs | 23 ++++++++++++++++++++++- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 57b1dcf58c..9913888499 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -318,6 +318,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - When enabled, comparison exits non-zero if candidate contains rows not present in baseline. - `scripts/finalize-playback-matrix.js` now forwards the same strict option in integrated compare flows. +35. **Required contiguous prefetched frames for warmup readiness (2026-02-13)** + - Playback warmup readiness now checks contiguous prefetched frame coverage from current frame. + - Avoids treating sparse/non-contiguous prefetched entries as equivalent to contiguous startup readiness. + - Reduces early playback start jitter risk when warmup buffer is fragmented. + --- ## Root Cause Analysis Archive @@ -445,7 +450,8 @@ Decoder Pipeline: 35. Tightened keyed prefetch buffer warmup timing and skip-path pruning behavior using map-aware helper usage in playback loop. 36. Expanded baseline-vs-candidate comparison outputs with candidate-only row reporting. 37. Added strict `fail-on-candidate-only` gating option for compare/finalize matrix comparison workflows. -37. Added candidate-only row count reporting in published matrix summary comparison status bullets. +38. Added candidate-only row count reporting in published matrix summary comparison status bullets. +39. Updated playback warmup start condition to require contiguous prefetched frame coverage from current frame. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -477,6 +483,7 @@ Decoder Pipeline: - `scripts/compare-playback-benchmark-runs.js`: added optional strict `--fail-on-candidate-only` coverage gate and surfaced coverage gate mode in comparison markdown output. - `scripts/finalize-playback-matrix.js`: added passthrough support for strict `--fail-on-candidate-only` compare mode in one-shot finalize workflows. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes candidate-only row count from comparison JSON summary. +- `crates/editor/src/playback.rs`: warmup readiness now requires contiguous prefetched frame coverage from current frame instead of raw buffer length threshold. - `crates/editor/src/playback.rs`: warmup first-frame timing now only starts after eligible prefetched frame insertion, and skip catch-up now reuses ordered stale-prune helper. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. - `crates/editor/src/playback.rs`: added ordered pruning of stale prefetched frames below current playhead to reduce stale buffer overhead during catch-up. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index ae5d1cb128..0966be3fe4 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -146,6 +146,22 @@ fn prune_prefetch_buffer_before_frame( } } +fn count_contiguous_prefetched_frames( + buffer: &BTreeMap, + start_frame: u32, + limit: usize, +) -> usize { + let mut contiguous = 0usize; + while contiguous < limit { + let frame = start_frame.saturating_add(contiguous as u32); + if !buffer.contains_key(&frame) { + break; + } + contiguous += 1; + } + contiguous +} + impl Playback { pub async fn start( mut self, @@ -508,8 +524,13 @@ impl Playback { ); while !*stop_rx.borrow() { + let contiguous_prefetched = count_contiguous_prefetched_frames( + &prefetch_buffer, + frame_number, + warmup_target_frames, + ); let should_start = if let Some(first_time) = first_frame_time { - prefetch_buffer.len() >= warmup_target_frames + contiguous_prefetched >= warmup_target_frames || first_time.elapsed() > warmup_after_first_timeout } else { false From 47d794780f8f4205318cc9d777abe7cdd3e7e5d9 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:40:27 +0000 Subject: [PATCH 226/333] improve: surface comparison coverage policies in published summaries --- crates/editor/PLAYBACK-FINDINGS.md | 10 ++++++++-- scripts/publish-playback-matrix-summary.js | 2 ++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 9913888499..cd5abaa7db 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -313,12 +313,16 @@ cargo run -p cap-recording --example playback-test-runner -- full - `scripts/publish-playback-matrix-summary.js` now includes candidate-only row count from comparison JSON in published matrix summary bullets. - Keeps published matrix evidence aligned with expanded comparison coverage diagnostics. -34. **Added strict candidate-only gating option for comparison workflows (2026-02-13)** +34. **Published comparison policy mode in summary output (2026-02-13)** + - Published matrix summary now includes comparison policy modes for missing-candidate and candidate-only coverage handling. + - Keeps published evidence explicit about whether coverage gaps were allowed or gated in the comparison run. + +35. **Added strict candidate-only gating option for comparison workflows (2026-02-13)** - `scripts/compare-playback-benchmark-runs.js` now supports `--fail-on-candidate-only`. - When enabled, comparison exits non-zero if candidate contains rows not present in baseline. - `scripts/finalize-playback-matrix.js` now forwards the same strict option in integrated compare flows. -35. **Required contiguous prefetched frames for warmup readiness (2026-02-13)** +36. **Required contiguous prefetched frames for warmup readiness (2026-02-13)** - Playback warmup readiness now checks contiguous prefetched frame coverage from current frame. - Avoids treating sparse/non-contiguous prefetched entries as equivalent to contiguous startup readiness. - Reduces early playback start jitter risk when warmup buffer is fragmented. @@ -452,6 +456,7 @@ Decoder Pipeline: 37. Added strict `fail-on-candidate-only` gating option for compare/finalize matrix comparison workflows. 38. Added candidate-only row count reporting in published matrix summary comparison status bullets. 39. Updated playback warmup start condition to require contiguous prefetched frame coverage from current frame. +40. Added comparison policy mode reporting (allow/fail) for missing-candidate and candidate-only coverage in published matrix summaries. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -483,6 +488,7 @@ Decoder Pipeline: - `scripts/compare-playback-benchmark-runs.js`: added optional strict `--fail-on-candidate-only` coverage gate and surfaced coverage gate mode in comparison markdown output. - `scripts/finalize-playback-matrix.js`: added passthrough support for strict `--fail-on-candidate-only` compare mode in one-shot finalize workflows. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes candidate-only row count from comparison JSON summary. +- `scripts/publish-playback-matrix-summary.js`: published comparison status now includes missing-candidate and candidate-only coverage policy modes from comparison JSON tolerance settings. - `crates/editor/src/playback.rs`: warmup readiness now requires contiguous prefetched frame coverage from current frame instead of raw buffer length threshold. - `crates/editor/src/playback.rs`: warmup first-frame timing now only starts after eligible prefetched frame insertion, and skip catch-up now reuses ordered stale-prune helper. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js index 60d0f6a6c4..663a67480c 100644 --- a/scripts/publish-playback-matrix-summary.js +++ b/scripts/publish-playback-matrix-summary.js @@ -93,6 +93,8 @@ function buildSummarySection( markdown += `- Comparison regressions: ${comparison.summary?.regressions ?? "n/a"}\n`; markdown += `- Missing candidate rows: ${comparison.summary?.missingCandidateRows ?? "n/a"}\n\n`; markdown += `- Candidate-only rows: ${comparison.summary?.candidateOnlyRows ?? "n/a"}\n\n`; + markdown += `- Missing candidate policy: ${comparison.tolerance?.allowMissingCandidate ? "allow" : "fail"}\n`; + markdown += `- Candidate-only policy: ${comparison.tolerance?.failOnCandidateOnly ? "fail" : "allow"}\n\n`; } if ((validation.missingCells?.length ?? 0) > 0) { From 25435cff982e090edf58afa728d2416d79d36b9d Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:44:04 +0000 Subject: [PATCH 227/333] improve: emit finalize summary json artifacts for automation --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 7 ++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 1 + scripts/finalize-playback-matrix.js | 53 +++++++++++++++++++++++- 4 files changed, 61 insertions(+), 1 deletion(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index f40284d4b8..a0b2017849 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -81,6 +81,7 @@ node scripts/finalize-playback-matrix.js --input /path/to/json-results --output- node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5 node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-missing-candidate node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-candidate-only +node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --output-json /tmp/playback-matrix-final/playback-finalize-summary.json node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 --publish-target /workspace/crates/editor/PLAYBACK-BENCHMARKS.md # Publish matrix artifacts into this benchmark history diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index cd5abaa7db..564b1de8ef 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -327,6 +327,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Avoids treating sparse/non-contiguous prefetched entries as equivalent to contiguous startup readiness. - Reduces early playback start jitter risk when warmup buffer is fragmented. +37. **Added finalize summary JSON artifact output (2026-02-13)** + - `scripts/finalize-playback-matrix.js` now supports optional `--output-json`. + - Finalize now emits `playback-finalize-summary.json` by default in output directory. + - Summary JSON includes artifact paths, settings, and validation/comparison pass flags for automation. + --- ## Root Cause Analysis Archive @@ -457,6 +462,7 @@ Decoder Pipeline: 38. Added candidate-only row count reporting in published matrix summary comparison status bullets. 39. Updated playback warmup start condition to require contiguous prefetched frame coverage from current frame. 40. Added comparison policy mode reporting (allow/fail) for missing-candidate and candidate-only coverage in published matrix summaries. +41. Added finalize summary JSON artifact output with artifact/settings/result metadata for automation workflows. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -491,6 +497,7 @@ Decoder Pipeline: - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes missing-candidate and candidate-only coverage policy modes from comparison JSON tolerance settings. - `crates/editor/src/playback.rs`: warmup readiness now requires contiguous prefetched frame coverage from current frame instead of raw buffer length threshold. - `crates/editor/src/playback.rs`: warmup first-frame timing now only starts after eligible prefetched frame insertion, and skip catch-up now reuses ordered stale-prune helper. +- `scripts/finalize-playback-matrix.js`: added optional `--output-json` and default finalize summary JSON emission with artifact path and pass/fail metadata. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. - `crates/editor/src/playback.rs`: added ordered pruning of stale prefetched frames below current playhead to reduce stale buffer overhead during catch-up. - `scripts/publish-playback-matrix-summary.js`: publish flow now surfaces comparison gate status/summary metrics when comparison JSON is provided. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 23e1689ac7..9f0a37b26a 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -111,6 +111,7 @@ One-shot finalize command: ```bash pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented +pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-dir /tmp/playback-matrix-final --output-json /tmp/playback-matrix-final/playback-finalize-summary.json ``` Include optimization thresholds when finalizing: diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index a767916571..bef816d78b 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -15,6 +15,7 @@ function parseArgs(argv) { analyze: true, publishTarget: null, compareBaselineInputs: [], + outputJson: null, allowFpsDrop: 2, allowStartupIncreaseMs: 25, allowScrubP95IncreaseMs: 5, @@ -83,6 +84,12 @@ function parseArgs(argv) { options.publishTarget = path.resolve(value); continue; } + if (arg === "--output-json") { + const value = argv[++i]; + if (!value) throw new Error("Missing value for --output-json"); + options.outputJson = path.resolve(value); + continue; + } if (arg === "--compare-baseline") { const value = argv[++i]; if (!value) throw new Error("Missing value for --compare-baseline"); @@ -128,7 +135,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--compare-baseline ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--publish-target ] + console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--output-json ] [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--compare-baseline ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--publish-target ] Generates aggregate markdown, status markdown, validation JSON, and bottleneck analysis for collected playback matrix outputs. Optionally compares candidate inputs against baseline inputs and fails on regressions.`); } @@ -178,6 +185,9 @@ function main() { options.outputDir, "playback-comparison.json", ); + const summaryJsonPath = + options.outputJson ?? + path.join(options.outputDir, "playback-finalize-summary.json"); const aggregateArgs = ["scripts/aggregate-playback-benchmarks.js"]; const statusArgs = ["scripts/build-playback-matrix-report.js"]; @@ -289,6 +299,47 @@ function main() { console.log(`Comparison report: ${comparisonPath}`); console.log(`Comparison JSON: ${comparisonJsonPath}`); } + const validation = JSON.parse(fs.readFileSync(validationPath, "utf8")); + const comparison = + options.compareBaselineInputs.length > 0 + ? JSON.parse(fs.readFileSync(comparisonJsonPath, "utf8")) + : null; + const summary = { + generatedAt: new Date().toISOString(), + inputs: options.inputs, + outputDir: options.outputDir, + artifacts: { + aggregatePath, + statusPath, + validationPath, + bottleneckPath: options.analyze ? bottleneckPath : null, + bottleneckJsonPath: options.analyze ? bottleneckJsonPath : null, + comparisonPath: + options.compareBaselineInputs.length > 0 ? comparisonPath : null, + comparisonJsonPath: + options.compareBaselineInputs.length > 0 ? comparisonJsonPath : null, + }, + settings: { + requireFormats: options.requireFormats, + targetFps: options.targetFps, + maxScrubP95Ms: options.maxScrubP95Ms, + maxStartupMs: options.maxStartupMs, + analyze: options.analyze, + publishTarget: options.publishTarget, + compareBaselineInputs: options.compareBaselineInputs, + allowFpsDrop: options.allowFpsDrop, + allowStartupIncreaseMs: options.allowStartupIncreaseMs, + allowScrubP95IncreaseMs: options.allowScrubP95IncreaseMs, + allowMissingCandidate: options.allowMissingCandidate, + failOnCandidateOnly: options.failOnCandidateOnly, + }, + results: { + validationPassed: validation.passed === true, + comparisonPassed: comparison ? comparison.summary?.passed === true : null, + }, + }; + fs.writeFileSync(summaryJsonPath, JSON.stringify(summary, null, 2), "utf8"); + console.log(`Finalize summary JSON: ${summaryJsonPath}`); } try { From afbd5d1b1f296a651c922193be1cb80d21a49157 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:49:24 +0000 Subject: [PATCH 228/333] improve: optimize contiguous prefetch warmup scan --- crates/editor/PLAYBACK-FINDINGS.md | 7 +++++++ crates/editor/src/playback.rs | 10 +++++++--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 564b1de8ef..eb4ce05fa5 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -332,6 +332,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Finalize now emits `playback-finalize-summary.json` by default in output directory. - Summary JSON includes artifact paths, settings, and validation/comparison pass flags for automation. +38. **Optimized contiguous warmup coverage scan on keyed buffer (2026-02-13)** + - Contiguous prefetched-frame counting now walks ordered keys via map range iteration. + - Reduces repeated keyed lookups during warmup readiness checks. + - Preserves contiguous coverage semantics while lowering per-loop lookup overhead. + --- ## Root Cause Analysis Archive @@ -463,6 +468,7 @@ Decoder Pipeline: 39. Updated playback warmup start condition to require contiguous prefetched frame coverage from current frame. 40. Added comparison policy mode reporting (allow/fail) for missing-candidate and candidate-only coverage in published matrix summaries. 41. Added finalize summary JSON artifact output with artifact/settings/result metadata for automation workflows. +42. Optimized contiguous prefetched-frame warmup scan using ordered map range iteration instead of repeated key lookups. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -496,6 +502,7 @@ Decoder Pipeline: - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes candidate-only row count from comparison JSON summary. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes missing-candidate and candidate-only coverage policy modes from comparison JSON tolerance settings. - `crates/editor/src/playback.rs`: warmup readiness now requires contiguous prefetched frame coverage from current frame instead of raw buffer length threshold. +- `crates/editor/src/playback.rs`: contiguous warmup coverage scan now uses ordered map range iteration to reduce repeated key lookup overhead. - `crates/editor/src/playback.rs`: warmup first-frame timing now only starts after eligible prefetched frame insertion, and skip catch-up now reuses ordered stale-prune helper. - `scripts/finalize-playback-matrix.js`: added optional `--output-json` and default finalize summary JSON emission with artifact path and pass/fail metadata. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 0966be3fe4..7cbf09d6e1 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -152,12 +152,16 @@ fn count_contiguous_prefetched_frames( limit: usize, ) -> usize { let mut contiguous = 0usize; - while contiguous < limit { - let frame = start_frame.saturating_add(contiguous as u32); - if !buffer.contains_key(&frame) { + let mut expected_frame = start_frame; + for (frame, _) in buffer.range(start_frame..) { + if *frame != expected_frame { break; } contiguous += 1; + if contiguous >= limit { + break; + } + expected_frame = expected_frame.saturating_add(1); } contiguous } From 3effc17fa0ea28c6978b2b1e9473fc90fc1cee50 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:50:24 +0000 Subject: [PATCH 229/333] improve: include git metadata in finalize summary artifacts --- crates/editor/PLAYBACK-BENCHMARKS.md | 2 ++ crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 2 ++ scripts/finalize-playback-matrix.js | 18 ++++++++++++++++++ 4 files changed, 28 insertions(+) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index a0b2017849..7c83166054 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -84,6 +84,8 @@ node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --ou node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --output-json /tmp/playback-matrix-final/playback-finalize-summary.json node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 --publish-target /workspace/crates/editor/PLAYBACK-BENCHMARKS.md +Finalize summary JSON includes generated timestamp, artifact paths, settings, pass/fail flags, and git branch/commit metadata when available. + # Publish matrix artifacts into this benchmark history node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --bottlenecks-md /tmp/playback-matrix-final/playback-bottlenecks.md node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --comparison-md /tmp/playback-matrix-final/playback-comparison.md --comparison-json /tmp/playback-matrix-final/playback-comparison.json diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index eb4ce05fa5..595cb748c0 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -337,6 +337,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Reduces repeated keyed lookups during warmup readiness checks. - Preserves contiguous coverage semantics while lowering per-loop lookup overhead. +39. **Added git metadata to finalize summary artifacts (2026-02-13)** + - `scripts/finalize-playback-matrix.js` now records git branch and commit SHA in finalize summary JSON output. + - Improves traceability of benchmark artifacts to exact source revision. + --- ## Root Cause Analysis Archive @@ -469,6 +473,7 @@ Decoder Pipeline: 40. Added comparison policy mode reporting (allow/fail) for missing-candidate and candidate-only coverage in published matrix summaries. 41. Added finalize summary JSON artifact output with artifact/settings/result metadata for automation workflows. 42. Optimized contiguous prefetched-frame warmup scan using ordered map range iteration instead of repeated key lookups. +43. Added git branch/commit metadata into finalize summary JSON artifacts for source traceability. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -505,6 +510,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: contiguous warmup coverage scan now uses ordered map range iteration to reduce repeated key lookup overhead. - `crates/editor/src/playback.rs`: warmup first-frame timing now only starts after eligible prefetched frame insertion, and skip catch-up now reuses ordered stale-prune helper. - `scripts/finalize-playback-matrix.js`: added optional `--output-json` and default finalize summary JSON emission with artifact path and pass/fail metadata. +- `scripts/finalize-playback-matrix.js`: finalize summary JSON now includes git branch and commit metadata when available. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. - `crates/editor/src/playback.rs`: added ordered pruning of stale prefetched frames below current playhead to reduce stale buffer overhead during catch-up. - `scripts/publish-playback-matrix-summary.js`: publish flow now surfaces comparison gate status/summary metrics when comparison JSON is provided. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 9f0a37b26a..45ea7ea43f 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -114,6 +114,8 @@ pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-di pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-dir /tmp/playback-matrix-final --output-json /tmp/playback-matrix-final/playback-finalize-summary.json ``` +Finalize summary JSON includes artifact paths, gate outcomes, settings, and git branch/commit metadata when available. + Include optimization thresholds when finalizing: ```bash diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index bef816d78b..6296c27ca1 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -147,6 +147,14 @@ function run(command, args) { } } +function readCommandOutput(command, args) { + const result = spawnSync(command, args, { encoding: "utf8" }); + if (result.status !== 0) { + return null; + } + return result.stdout.trim() || null; +} + function main() { const options = parseArgs(process.argv); if (options.help) { @@ -304,10 +312,20 @@ function main() { options.compareBaselineInputs.length > 0 ? JSON.parse(fs.readFileSync(comparisonJsonPath, "utf8")) : null; + const gitBranch = readCommandOutput("git", [ + "rev-parse", + "--abbrev-ref", + "HEAD", + ]); + const gitCommit = readCommandOutput("git", ["rev-parse", "HEAD"]); const summary = { generatedAt: new Date().toISOString(), inputs: options.inputs, outputDir: options.outputDir, + git: { + branch: gitBranch, + commit: gitCommit, + }, artifacts: { aggregatePath, statusPath, From 9c465a6be7d707875ecc222c143a368a21c7735c Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:52:12 +0000 Subject: [PATCH 230/333] improve: publish finalize summary metadata in matrix reports --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 8 +++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 3 +- scripts/finalize-playback-matrix.js | 81 +++++++++++----------- scripts/publish-playback-matrix-summary.js | 25 ++++++- 5 files changed, 76 insertions(+), 42 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 7c83166054..7ec06dbb69 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -89,6 +89,7 @@ Finalize summary JSON includes generated timestamp, artifact paths, settings, pa # Publish matrix artifacts into this benchmark history node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --bottlenecks-md /tmp/playback-matrix-final/playback-bottlenecks.md node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --comparison-md /tmp/playback-matrix-final/playback-comparison.md --comparison-json /tmp/playback-matrix-final/playback-comparison.json +node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --finalize-summary-json /tmp/playback-matrix-final/playback-finalize-summary.json # Analyze bottlenecks from matrix results node scripts/analyze-playback-matrix-bottlenecks.js --input /path/to/json-results --output /tmp/playback-bottlenecks.md --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 595cb748c0..33b178950a 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -341,6 +341,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - `scripts/finalize-playback-matrix.js` now records git branch and commit SHA in finalize summary JSON output. - Improves traceability of benchmark artifacts to exact source revision. +40. **Wired finalize summary artifact into publish flow (2026-02-13)** + - `scripts/finalize-playback-matrix.js` now generates finalize summary JSON before publish step. + - Finalize now forwards `--finalize-summary-json` to `publish-playback-matrix-summary.js`. + - Published matrix summaries can now include finalize artifact metadata in one-shot finalize runs. + --- ## Root Cause Analysis Archive @@ -474,6 +479,7 @@ Decoder Pipeline: 41. Added finalize summary JSON artifact output with artifact/settings/result metadata for automation workflows. 42. Optimized contiguous prefetched-frame warmup scan using ordered map range iteration instead of repeated key lookups. 43. Added git branch/commit metadata into finalize summary JSON artifacts for source traceability. +44. Wired finalize summary JSON into publish flow so one-shot finalize runs can publish summary metadata alongside matrix artifacts. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -511,6 +517,8 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: warmup first-frame timing now only starts after eligible prefetched frame insertion, and skip catch-up now reuses ordered stale-prune helper. - `scripts/finalize-playback-matrix.js`: added optional `--output-json` and default finalize summary JSON emission with artifact path and pass/fail metadata. - `scripts/finalize-playback-matrix.js`: finalize summary JSON now includes git branch and commit metadata when available. +- `scripts/finalize-playback-matrix.js`: finalize now writes summary JSON before publish and passes `--finalize-summary-json` into publish flow. +- `scripts/publish-playback-matrix-summary.js`: publish flow now supports optional finalize summary JSON input and surfaces finalize source/validation metadata. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. - `crates/editor/src/playback.rs`: added ordered pruning of stale prefetched frames below current playhead to reduce stale buffer overhead during catch-up. - `scripts/publish-playback-matrix-summary.js`: publish flow now surfaces comparison gate status/summary metrics when comparison JSON is provided. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 45ea7ea43f..284360cfd1 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -151,7 +151,8 @@ pnpm bench:playback:publish -- \ --status-md /tmp/playback-matrix-final/playback-matrix-status.md \ --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json \ --comparison-md /tmp/playback-matrix-final/playback-comparison.md \ - --comparison-json /tmp/playback-matrix-final/playback-comparison.json + --comparison-json /tmp/playback-matrix-final/playback-comparison.json \ + --finalize-summary-json /tmp/playback-matrix-final/playback-finalize-summary.json ``` Generate bottleneck analysis for optimization backlog: diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index 6296c27ca1..76d07b1c38 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -267,46 +267,6 @@ function main() { } run("node", compareArgs); } - if (options.publishTarget) { - const publishArgs = [ - "scripts/publish-playback-matrix-summary.js", - "--aggregate-md", - aggregatePath, - "--status-md", - statusPath, - "--validation-json", - validationPath, - "--target", - options.publishTarget, - ]; - if (options.analyze) { - publishArgs.push("--bottlenecks-md", bottleneckPath); - } - if (options.compareBaselineInputs.length > 0) { - publishArgs.push( - "--comparison-md", - comparisonPath, - "--comparison-json", - comparisonJsonPath, - ); - } - run("node", publishArgs); - } - - console.log(`Aggregate markdown: ${aggregatePath}`); - console.log(`Status markdown: ${statusPath}`); - console.log(`Validation JSON: ${validationPath}`); - if (options.analyze) { - console.log(`Bottleneck analysis: ${bottleneckPath}`); - console.log(`Bottleneck analysis JSON: ${bottleneckJsonPath}`); - } - if (options.publishTarget) { - console.log(`Published target: ${options.publishTarget}`); - } - if (options.compareBaselineInputs.length > 0) { - console.log(`Comparison report: ${comparisonPath}`); - console.log(`Comparison JSON: ${comparisonJsonPath}`); - } const validation = JSON.parse(fs.readFileSync(validationPath, "utf8")); const comparison = options.compareBaselineInputs.length > 0 @@ -357,6 +317,47 @@ function main() { }, }; fs.writeFileSync(summaryJsonPath, JSON.stringify(summary, null, 2), "utf8"); + if (options.publishTarget) { + const publishArgs = [ + "scripts/publish-playback-matrix-summary.js", + "--aggregate-md", + aggregatePath, + "--status-md", + statusPath, + "--validation-json", + validationPath, + "--target", + options.publishTarget, + ]; + if (options.analyze) { + publishArgs.push("--bottlenecks-md", bottleneckPath); + } + if (options.compareBaselineInputs.length > 0) { + publishArgs.push( + "--comparison-md", + comparisonPath, + "--comparison-json", + comparisonJsonPath, + ); + } + publishArgs.push("--finalize-summary-json", summaryJsonPath); + run("node", publishArgs); + } + + console.log(`Aggregate markdown: ${aggregatePath}`); + console.log(`Status markdown: ${statusPath}`); + console.log(`Validation JSON: ${validationPath}`); + if (options.analyze) { + console.log(`Bottleneck analysis: ${bottleneckPath}`); + console.log(`Bottleneck analysis JSON: ${bottleneckJsonPath}`); + } + if (options.publishTarget) { + console.log(`Published target: ${options.publishTarget}`); + } + if (options.compareBaselineInputs.length > 0) { + console.log(`Comparison report: ${comparisonPath}`); + console.log(`Comparison JSON: ${comparisonJsonPath}`); + } console.log(`Finalize summary JSON: ${summaryJsonPath}`); } diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js index 663a67480c..128d19cd94 100644 --- a/scripts/publish-playback-matrix-summary.js +++ b/scripts/publish-playback-matrix-summary.js @@ -10,6 +10,7 @@ function parseArgs(argv) { bottlenecksMd: null, comparisonMd: null, comparisonJson: null, + finalizeSummaryJson: null, validationJson: null, target: path.resolve("crates/editor/PLAYBACK-BENCHMARKS.md"), }; @@ -41,6 +42,10 @@ function parseArgs(argv) { options.comparisonJson = path.resolve(argv[++i] ?? ""); continue; } + if (arg === "--finalize-summary-json") { + options.finalizeSummaryJson = path.resolve(argv[++i] ?? ""); + continue; + } if (arg === "--validation-json") { options.validationJson = path.resolve(argv[++i] ?? ""); continue; @@ -56,7 +61,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/publish-playback-matrix-summary.js --aggregate-md --status-md --validation-json [--bottlenecks-md ] [--comparison-md ] [--comparison-json ] [--target ] + console.log(`Usage: node scripts/publish-playback-matrix-summary.js --aggregate-md --status-md --validation-json [--bottlenecks-md ] [--comparison-md ] [--comparison-json ] [--finalize-summary-json ] [--target ] Prepends a matrix summary section into PLAYBACK-BENCHMARKS.md benchmark history region.`); } @@ -74,6 +79,7 @@ function buildSummarySection( bottlenecksMd, comparisonMd, comparisonJson, + finalizeSummaryJson, ) { const now = new Date().toISOString(); const validation = JSON.parse(validationJson); @@ -96,6 +102,16 @@ function buildSummarySection( markdown += `- Missing candidate policy: ${comparison.tolerance?.allowMissingCandidate ? "allow" : "fail"}\n`; markdown += `- Candidate-only policy: ${comparison.tolerance?.failOnCandidateOnly ? "fail" : "allow"}\n\n`; } + if (finalizeSummaryJson) { + const finalizeSummary = JSON.parse(finalizeSummaryJson); + markdown += `- Finalize source branch: ${finalizeSummary.git?.branch ?? "n/a"}\n`; + markdown += `- Finalize source commit: ${finalizeSummary.git?.commit ?? "n/a"}\n`; + markdown += `- Finalize validation passed: ${finalizeSummary.results?.validationPassed === true ? "true" : "false"}\n`; + if (finalizeSummary.results?.comparisonPassed !== null) { + markdown += `- Finalize comparison passed: ${finalizeSummary.results?.comparisonPassed === true ? "true" : "false"}\n`; + } + markdown += "\n"; + } if ((validation.missingCells?.length ?? 0) > 0) { markdown += "**Missing Cells**\n"; @@ -171,6 +187,9 @@ function main() { if (options.comparisonJson) { ensureFile(options.comparisonJson, "Comparison JSON"); } + if (options.finalizeSummaryJson) { + ensureFile(options.finalizeSummaryJson, "Finalize summary JSON"); + } ensureFile(options.target, "Target"); const aggregateMd = fs.readFileSync(options.aggregateMd, "utf8"); @@ -185,6 +204,9 @@ function main() { const comparisonJson = options.comparisonJson ? fs.readFileSync(options.comparisonJson, "utf8") : null; + const finalizeSummaryJson = options.finalizeSummaryJson + ? fs.readFileSync(options.finalizeSummaryJson, "utf8") + : null; const summaryMd = buildSummarySection( aggregateMd, statusMd, @@ -192,6 +214,7 @@ function main() { bottlenecksMd, comparisonMd, comparisonJson, + finalizeSummaryJson, ); writeToBenchmarkHistory(options.target, summaryMd); console.log(`Published matrix summary into ${options.target}`); From 4455c5fefcffcefda64275531c2d1d20aef7a406 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:56:04 +0000 Subject: [PATCH 231/333] improve: aggregate multi-run comparison metrics by key --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 7 ++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 2 + scripts/compare-playback-benchmark-runs.js | 49 +++++++++++++++++----- 4 files changed, 48 insertions(+), 11 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 7ec06dbb69..fbe575630e 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -103,6 +103,7 @@ node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-res node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-comparison.md --output-json /tmp/playback-comparison.json Comparison output reports both baseline rows missing in candidate and candidate-only rows not present in baseline. +Comparison table also reports baseline/candidate run counts per row when multiple JSON inputs contribute to the same key. ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 33b178950a..a5be6ee0b8 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -346,6 +346,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Finalize now forwards `--finalize-summary-json` to `publish-playback-matrix-summary.js`. - Published matrix summaries can now include finalize artifact metadata in one-shot finalize runs. +41. **Improved comparison aggregation across multi-input runs (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` now aggregates metrics per comparison key across all contributing input reports instead of last-write-wins replacement. + - Comparison output now includes baseline/candidate run counts per row to surface aggregation depth. + - Fixed comparison regression evaluation to use explicit options parameter wiring inside compare function. + --- ## Root Cause Analysis Archive @@ -480,6 +485,7 @@ Decoder Pipeline: 42. Optimized contiguous prefetched-frame warmup scan using ordered map range iteration instead of repeated key lookups. 43. Added git branch/commit metadata into finalize summary JSON artifacts for source traceability. 44. Wired finalize summary JSON into publish flow so one-shot finalize runs can publish summary metadata alongside matrix artifacts. +45. Improved multi-input comparison aggregation by merging per-key metrics across runs and surfacing baseline/candidate run counts per comparison row. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -501,6 +507,7 @@ Decoder Pipeline: - `scripts/publish-playback-matrix-summary.js`: added matrix artifact publisher into PLAYBACK-BENCHMARKS history region. - `scripts/analyze-playback-matrix-bottlenecks.js`: added prioritized bottleneck analysis output from matrix JSON evidence. - `scripts/compare-playback-benchmark-runs.js`: added regression-aware baseline/candidate comparison with configurable FPS/startup/scrub tolerances. +- `scripts/compare-playback-benchmark-runs.js`: fixed options wiring inside comparison regression checks and now aggregates per-key metrics across multi-input runs with run-count reporting. - `scripts/publish-playback-matrix-summary.js`: added optional baseline-vs-candidate comparison artifact attachment in published summaries. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 284360cfd1..7d07903b34 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -179,6 +179,8 @@ pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate # compare output now includes both missing-candidate rows and candidate-only rows # optional: fail compare gate when candidate includes rows absent in baseline pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-candidate-only + +# when multiple inputs are provided, comparison output includes baseline/candidate run counts per row ``` ## Evidence checklist diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index c858e8d396..e9184c397f 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -131,7 +131,7 @@ function maximum(values) { } function collectMetrics(files) { - const rows = new Map(); + const accumulators = new Map(); for (const filePath of files) { const parsed = JSON.parse(fs.readFileSync(filePath, "utf8")); @@ -161,20 +161,45 @@ function collectMetrics(files) { .map((entry) => entry.p95_seek_time_ms) .filter((entry) => typeof entry === "number"); - rows.set(key, { + const existing = accumulators.get(key) ?? { key, platform, gpu, scenario, recording: report.recording_name ?? "unknown", format: report.is_fragmented ? "fragmented" : "mp4", - fpsMin: fpsValues.length ? Math.min(...fpsValues) : null, - startupAvg: average(startupValues), - scrubP95Max: maximum(scrubP95Values), - }); + reportCount: 0, + fpsSamples: [], + startupSamples: [], + scrubP95Samples: [], + }; + existing.reportCount += 1; + existing.fpsSamples.push(...fpsValues); + existing.startupSamples.push(...startupValues); + existing.scrubP95Samples.push(...scrubP95Values); + accumulators.set(key, existing); } } + const rows = new Map(); + for (const [key, row] of accumulators) { + rows.set(key, { + key, + platform: row.platform, + gpu: row.gpu, + scenario: row.scenario, + recording: row.recording, + format: row.format, + reportCount: row.reportCount, + fpsSampleCount: row.fpsSamples.length, + startupSampleCount: row.startupSamples.length, + scrubSampleCount: row.scrubP95Samples.length, + fpsMin: row.fpsSamples.length ? Math.min(...row.fpsSamples) : null, + startupAvg: average(row.startupSamples), + scrubP95Max: maximum(row.scrubP95Samples), + }); + } + return rows; } @@ -187,7 +212,7 @@ function formatNumber(value, digits = 2) { return value === null ? "n/a" : value.toFixed(digits); } -function compareMetrics(baselineRows, candidateRows) { +function compareMetrics(baselineRows, candidateRows, options) { const comparisons = []; const missingCandidateRows = []; const candidateOnlyRows = []; @@ -242,6 +267,8 @@ function compareMetrics(baselineRows, candidateRows) { scenario: candidate.scenario, recording: candidate.recording, format: candidate.format, + baselineReportCount: baseline.reportCount, + candidateReportCount: candidate.reportCount, fpsDelta, startupDelta, scrubDelta, @@ -287,10 +314,10 @@ function toMarkdown( md += "\n"; } md += - "| Platform | GPU | Scenario | Recording | Format | FPS Δ | Startup Δ (ms) | Scrub p95 Δ (ms) | Regression |\n"; - md += "|---|---|---|---|---|---:|---:|---:|---|\n"; + "| Platform | GPU | Scenario | Recording | Format | B Runs | C Runs | FPS Δ | Startup Δ (ms) | Scrub p95 Δ (ms) | Regression |\n"; + md += "|---|---|---|---|---|---:|---:|---:|---:|---:|---|\n"; for (const row of comparisons) { - md += `| ${row.platform} | ${row.gpu} | ${row.scenario} | ${row.recording} | ${row.format} | ${formatNumber(row.fpsDelta)} | ${formatNumber(row.startupDelta)} | ${formatNumber(row.scrubDelta)} | ${row.regressions.length > 0 ? row.regressions.join(", ") : "none"} |\n`; + md += `| ${row.platform} | ${row.gpu} | ${row.scenario} | ${row.recording} | ${row.format} | ${row.baselineReportCount} | ${row.candidateReportCount} | ${formatNumber(row.fpsDelta)} | ${formatNumber(row.startupDelta)} | ${formatNumber(row.scrubDelta)} | ${row.regressions.length > 0 ? row.regressions.join(", ") : "none"} |\n`; } md += "\n"; return md; @@ -360,7 +387,7 @@ function main() { const baselineRows = collectMetrics(baselineFiles); const candidateRows = collectMetrics(candidateFiles); const { comparisons, missingCandidateRows, candidateOnlyRows } = - compareMetrics(baselineRows, candidateRows); + compareMetrics(baselineRows, candidateRows, options); const markdown = toMarkdown( comparisons, missingCandidateRows, From 48968728083598d61e7d7c2981f47701ca02bf1a Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 22:57:12 +0000 Subject: [PATCH 232/333] improve: defer contiguous warmup scans until first frame arrival --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 14 +++++++++----- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index a5be6ee0b8..b7137c7c7b 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -351,6 +351,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Comparison output now includes baseline/candidate run counts per row to surface aggregation depth. - Fixed comparison regression evaluation to use explicit options parameter wiring inside compare function. +42. **Skipped contiguous warmup scans before first eligible frame (2026-02-13)** + - Warmup loop now defers contiguous-prefetch counting until first warmup frame arrival is observed. + - Reduces avoidable buffer scan work during pre-frame warmup wait. + --- ## Root Cause Analysis Archive @@ -486,6 +490,7 @@ Decoder Pipeline: 43. Added git branch/commit metadata into finalize summary JSON artifacts for source traceability. 44. Wired finalize summary JSON into publish flow so one-shot finalize runs can publish summary metadata alongside matrix artifacts. 45. Improved multi-input comparison aggregation by merging per-key metrics across runs and surfacing baseline/candidate run counts per comparison row. +46. Skipped contiguous warmup coverage scans until first warmup frame observation to reduce pre-frame warmup loop scan overhead. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -526,6 +531,7 @@ Decoder Pipeline: - `scripts/finalize-playback-matrix.js`: finalize summary JSON now includes git branch and commit metadata when available. - `scripts/finalize-playback-matrix.js`: finalize now writes summary JSON before publish and passes `--finalize-summary-json` into publish flow. - `scripts/publish-playback-matrix-summary.js`: publish flow now supports optional finalize summary JSON input and surfaces finalize source/validation metadata. +- `crates/editor/src/playback.rs`: warmup loop now skips contiguous coverage scanning until first warmup frame has been observed. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. - `crates/editor/src/playback.rs`: added ordered pruning of stale prefetched frames below current playhead to reduce stale buffer overhead during catch-up. - `scripts/publish-playback-matrix-summary.js`: publish flow now surfaces comparison gate status/summary metrics when comparison JSON is provided. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 7cbf09d6e1..a934954c8d 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -528,11 +528,15 @@ impl Playback { ); while !*stop_rx.borrow() { - let contiguous_prefetched = count_contiguous_prefetched_frames( - &prefetch_buffer, - frame_number, - warmup_target_frames, - ); + let contiguous_prefetched = if first_frame_time.is_some() { + count_contiguous_prefetched_frames( + &prefetch_buffer, + frame_number, + warmup_target_frames, + ) + } else { + 0 + }; let should_start = if let Some(first_time) = first_frame_time { contiguous_prefetched >= warmup_target_frames || first_time.elapsed() > warmup_after_first_timeout From 4860d7447c430b6f84bc5d16524795cb30bf2c26 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:01:34 +0000 Subject: [PATCH 233/333] improve: add minimum sample gating for benchmark comparisons --- crates/editor/PLAYBACK-BENCHMARKS.md | 2 + crates/editor/PLAYBACK-FINDINGS.md | 9 +++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 5 ++ scripts/compare-playback-benchmark-runs.js | 89 ++++++++++++++++++++-- scripts/finalize-playback-matrix.js | 13 +++- scripts/publish-playback-matrix-summary.js | 2 + 6 files changed, 111 insertions(+), 9 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index fbe575630e..32a27d92b6 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -81,6 +81,7 @@ node scripts/finalize-playback-matrix.js --input /path/to/json-results --output- node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-fps-drop 2 --allow-startup-increase-ms 25 --allow-scrub-p95-increase-ms 5 node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-missing-candidate node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-candidate-only +node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --min-samples-per-row 3 node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --output-json /tmp/playback-matrix-final/playback-finalize-summary.json node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 --publish-target /workspace/crates/editor/PLAYBACK-BENCHMARKS.md @@ -100,6 +101,7 @@ node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-res node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results-a --baseline /path/to/baseline-results-b --candidate /path/to/candidate-results-a --candidate /path/to/candidate-results-b --output /tmp/playback-comparison.md node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --allow-missing-candidate node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-candidate-only +node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --min-samples-per-row 3 node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-comparison.md --output-json /tmp/playback-comparison.json Comparison output reports both baseline rows missing in candidate and candidate-only rows not present in baseline. diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index b7137c7c7b..eaff5bff35 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -355,6 +355,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Warmup loop now defers contiguous-prefetch counting until first warmup frame arrival is observed. - Reduces avoidable buffer scan work during pre-frame warmup wait. +43. **Added minimum sample-count gating for matrix comparisons (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` now supports `--min-samples-per-row`. + - Comparison now flags rows with insufficient effective sample counts and reports them in markdown/JSON outputs. + - `scripts/finalize-playback-matrix.js` forwards minimum sample gating settings into compare stage, and publish summary now surfaces sample gating status fields. + --- ## Root Cause Analysis Archive @@ -491,6 +496,7 @@ Decoder Pipeline: 44. Wired finalize summary JSON into publish flow so one-shot finalize runs can publish summary metadata alongside matrix artifacts. 45. Improved multi-input comparison aggregation by merging per-key metrics across runs and surfacing baseline/candidate run counts per comparison row. 46. Skipped contiguous warmup coverage scans until first warmup frame observation to reduce pre-frame warmup loop scan overhead. +47. Added minimum-sample comparison gating with `--min-samples-per-row`, insufficient-sample reporting, and finalize passthrough support. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -531,6 +537,9 @@ Decoder Pipeline: - `scripts/finalize-playback-matrix.js`: finalize summary JSON now includes git branch and commit metadata when available. - `scripts/finalize-playback-matrix.js`: finalize now writes summary JSON before publish and passes `--finalize-summary-json` into publish flow. - `scripts/publish-playback-matrix-summary.js`: publish flow now supports optional finalize summary JSON input and surfaces finalize source/validation metadata. +- `scripts/compare-playback-benchmark-runs.js`: added `--min-samples-per-row`, insufficient-sample row reporting, and sample gate fields in markdown/JSON outputs. +- `scripts/finalize-playback-matrix.js`: forwards `--min-samples-per-row` into compare stage and captures it in finalize summary settings. +- `scripts/publish-playback-matrix-summary.js`: published comparison status now includes insufficient sample row count and minimum sample threshold fields. - `crates/editor/src/playback.rs`: warmup loop now skips contiguous coverage scanning until first warmup frame has been observed. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. - `crates/editor/src/playback.rs`: added ordered pruning of stale prefetched frames below current playhead to reduce stale buffer overhead during catch-up. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 7d07903b34..c1f1c7f2f1 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -129,6 +129,9 @@ pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir # optional: fail finalize compare gate when candidate includes rows absent in baseline pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-candidate-only + +# optional: require minimum sample count per compared row +pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --min-samples-per-row 3 ``` Finalize and publish to benchmark history in one command: @@ -181,6 +184,8 @@ pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-candidate-only # when multiple inputs are provided, comparison output includes baseline/candidate run counts per row +# optional: require minimum sample count per compared row +pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --min-samples-per-row 3 ``` ## Evidence checklist diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index e9184c397f..9d788ba1ee 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -14,6 +14,7 @@ function parseArgs(argv) { allowScrubP95IncreaseMs: 5, allowMissingCandidate: false, failOnCandidateOnly: false, + minSamplesPerRow: 1, }; for (let i = 2; i < argv.length; i++) { @@ -75,6 +76,14 @@ function parseArgs(argv) { options.failOnCandidateOnly = true; continue; } + if (arg === "--min-samples-per-row") { + const value = Number.parseInt(argv[++i] ?? "", 10); + if (!Number.isFinite(value) || value < 1) { + throw new Error("Invalid --min-samples-per-row value"); + } + options.minSamplesPerRow = value; + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -82,7 +91,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline [--baseline ...] --candidate [--candidate ...] [--output ] [--output-json ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] + console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline [--baseline ...] --candidate [--candidate ...] [--output ] [--output-json ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--min-samples-per-row 1] Compares baseline and candidate playback matrix JSON outputs and flags regressions. Multiple --baseline and --candidate inputs are supported.`); } @@ -216,6 +225,7 @@ function compareMetrics(baselineRows, candidateRows, options) { const comparisons = []; const missingCandidateRows = []; const candidateOnlyRows = []; + const insufficientSampleRows = []; for (const [key, baseline] of baselineRows) { const candidate = candidateRows.get(key); @@ -248,6 +258,38 @@ function compareMetrics(baselineRows, candidateRows, options) { const scrubDelta = delta(candidate.scrubP95Max, baseline.scrubP95Max); const regressions = []; + const fpsMinSamples = Math.min( + baseline.fpsSampleCount, + candidate.fpsSampleCount, + ); + const startupMinSamples = Math.min( + baseline.startupSampleCount, + candidate.startupSampleCount, + ); + const scrubMinSamples = Math.min( + baseline.scrubSampleCount, + candidate.scrubSampleCount, + ); + const effectiveSampleCount = Math.min( + fpsMinSamples, + startupMinSamples, + scrubMinSamples, + ); + if (effectiveSampleCount < options.minSamplesPerRow) { + insufficientSampleRows.push({ + platform: candidate.platform, + gpu: candidate.gpu, + scenario: candidate.scenario, + recording: candidate.recording, + format: candidate.format, + effectiveSampleCount, + requiredSampleCount: options.minSamplesPerRow, + }); + regressions.push( + `insufficient_samples=${effectiveSampleCount}/${options.minSamplesPerRow}`, + ); + } + if (fpsDelta !== null && fpsDelta < -options.allowFpsDrop) { regressions.push(`fps_drop=${formatNumber(fpsDelta)}`); } @@ -269,6 +311,10 @@ function compareMetrics(baselineRows, candidateRows, options) { format: candidate.format, baselineReportCount: baseline.reportCount, candidateReportCount: candidate.reportCount, + fpsMinSamples, + startupMinSamples, + scrubMinSamples, + effectiveSampleCount, fpsDelta, startupDelta, scrubDelta, @@ -277,13 +323,19 @@ function compareMetrics(baselineRows, candidateRows, options) { } comparisons.sort((a, b) => b.regressions.length - a.regressions.length); - return { comparisons, missingCandidateRows, candidateOnlyRows }; + return { + comparisons, + missingCandidateRows, + candidateOnlyRows, + insufficientSampleRows, + }; } function toMarkdown( comparisons, missingCandidateRows, candidateOnlyRows, + insufficientSampleRows, options, ) { const regressions = comparisons.filter( @@ -294,7 +346,8 @@ function toMarkdown( md += `Generated: ${new Date().toISOString()}\n\n`; md += `Tolerance: fps_drop<=${options.allowFpsDrop}, startup_increase<=${options.allowStartupIncreaseMs}ms, scrub_p95_increase<=${options.allowScrubP95IncreaseMs}ms\n\n`; md += `Coverage gate: missing_candidate=${options.allowMissingCandidate ? "allow" : "fail"}, candidate_only=${options.failOnCandidateOnly ? "fail" : "allow"}\n\n`; - md += `Compared rows: ${comparisons.length}, regressions: ${regressions.length}, missing candidate rows: ${missingCandidateRows.length}, candidate-only rows: ${candidateOnlyRows.length}\n\n`; + md += `Sample gate: min_samples_per_row>=${options.minSamplesPerRow}\n\n`; + md += `Compared rows: ${comparisons.length}, regressions: ${regressions.length}, missing candidate rows: ${missingCandidateRows.length}, candidate-only rows: ${candidateOnlyRows.length}, insufficient sample rows: ${insufficientSampleRows.length}\n\n`; if (missingCandidateRows.length > 0) { md += "## Missing Candidate Rows\n\n"; md += "| Platform | GPU | Scenario | Recording | Format |\n"; @@ -313,11 +366,21 @@ function toMarkdown( } md += "\n"; } + if (insufficientSampleRows.length > 0) { + md += "## Insufficient Sample Rows\n\n"; + md += + "| Platform | GPU | Scenario | Recording | Format | Effective Samples | Required Samples |\n"; + md += "|---|---|---|---|---|---:|---:|\n"; + for (const row of insufficientSampleRows) { + md += `| ${row.platform} | ${row.gpu} | ${row.scenario} | ${row.recording} | ${row.format} | ${row.effectiveSampleCount} | ${row.requiredSampleCount} |\n`; + } + md += "\n"; + } md += - "| Platform | GPU | Scenario | Recording | Format | B Runs | C Runs | FPS Δ | Startup Δ (ms) | Scrub p95 Δ (ms) | Regression |\n"; - md += "|---|---|---|---|---|---:|---:|---:|---:|---:|---|\n"; + "| Platform | GPU | Scenario | Recording | Format | B Runs | C Runs | F Samples | S Samples | Q Samples | FPS Δ | Startup Δ (ms) | Scrub p95 Δ (ms) | Regression |\n"; + md += "|---|---|---|---|---|---:|---:|---:|---:|---:|---:|---:|---:|---|\n"; for (const row of comparisons) { - md += `| ${row.platform} | ${row.gpu} | ${row.scenario} | ${row.recording} | ${row.format} | ${row.baselineReportCount} | ${row.candidateReportCount} | ${formatNumber(row.fpsDelta)} | ${formatNumber(row.startupDelta)} | ${formatNumber(row.scrubDelta)} | ${row.regressions.length > 0 ? row.regressions.join(", ") : "none"} |\n`; + md += `| ${row.platform} | ${row.gpu} | ${row.scenario} | ${row.recording} | ${row.format} | ${row.baselineReportCount} | ${row.candidateReportCount} | ${row.fpsMinSamples} | ${row.startupMinSamples} | ${row.scrubMinSamples} | ${formatNumber(row.fpsDelta)} | ${formatNumber(row.startupDelta)} | ${formatNumber(row.scrubDelta)} | ${row.regressions.length > 0 ? row.regressions.join(", ") : "none"} |\n`; } md += "\n"; return md; @@ -327,6 +390,7 @@ function buildJsonOutput( comparisons, missingCandidateRows, candidateOnlyRows, + insufficientSampleRows, options, ) { const regressions = comparisons.filter( @@ -340,12 +404,14 @@ function buildJsonOutput( allowScrubP95IncreaseMs: options.allowScrubP95IncreaseMs, allowMissingCandidate: options.allowMissingCandidate, failOnCandidateOnly: options.failOnCandidateOnly, + minSamplesPerRow: options.minSamplesPerRow, }, summary: { comparedRows: comparisons.length, regressions: regressions.length, missingCandidateRows: missingCandidateRows.length, candidateOnlyRows: candidateOnlyRows.length, + insufficientSampleRows: insufficientSampleRows.length, passed: regressions.length === 0 && (options.allowMissingCandidate || missingCandidateRows.length === 0) && @@ -354,6 +420,7 @@ function buildJsonOutput( regressions, missingCandidateRows, candidateOnlyRows, + insufficientSampleRows, comparisons, }; } @@ -386,18 +453,24 @@ function main() { const baselineRows = collectMetrics(baselineFiles); const candidateRows = collectMetrics(candidateFiles); - const { comparisons, missingCandidateRows, candidateOnlyRows } = - compareMetrics(baselineRows, candidateRows, options); + const { + comparisons, + missingCandidateRows, + candidateOnlyRows, + insufficientSampleRows, + } = compareMetrics(baselineRows, candidateRows, options); const markdown = toMarkdown( comparisons, missingCandidateRows, candidateOnlyRows, + insufficientSampleRows, options, ); const outputJson = buildJsonOutput( comparisons, missingCandidateRows, candidateOnlyRows, + insufficientSampleRows, options, ); diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index 76d07b1c38..71babdd9e2 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -21,6 +21,7 @@ function parseArgs(argv) { allowScrubP95IncreaseMs: 5, allowMissingCandidate: false, failOnCandidateOnly: false, + minSamplesPerRow: 1, }; for (let i = 2; i < argv.length; i++) { @@ -128,6 +129,14 @@ function parseArgs(argv) { options.failOnCandidateOnly = true; continue; } + if (arg === "--min-samples-per-row") { + const value = Number.parseInt(argv[++i] ?? "", 10); + if (!Number.isFinite(value) || value < 1) { + throw new Error("Invalid --min-samples-per-row value"); + } + options.minSamplesPerRow = value; + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -135,7 +144,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--output-json ] [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--compare-baseline ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--publish-target ] + console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--output-json ] [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--compare-baseline ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--min-samples-per-row 1] [--publish-target ] Generates aggregate markdown, status markdown, validation JSON, and bottleneck analysis for collected playback matrix outputs. Optionally compares candidate inputs against baseline inputs and fails on regressions.`); } @@ -265,6 +274,7 @@ function main() { if (options.failOnCandidateOnly) { compareArgs.push("--fail-on-candidate-only"); } + compareArgs.push("--min-samples-per-row", String(options.minSamplesPerRow)); run("node", compareArgs); } const validation = JSON.parse(fs.readFileSync(validationPath, "utf8")); @@ -310,6 +320,7 @@ function main() { allowScrubP95IncreaseMs: options.allowScrubP95IncreaseMs, allowMissingCandidate: options.allowMissingCandidate, failOnCandidateOnly: options.failOnCandidateOnly, + minSamplesPerRow: options.minSamplesPerRow, }, results: { validationPassed: validation.passed === true, diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js index 128d19cd94..d2718b4efa 100644 --- a/scripts/publish-playback-matrix-summary.js +++ b/scripts/publish-playback-matrix-summary.js @@ -99,6 +99,8 @@ function buildSummarySection( markdown += `- Comparison regressions: ${comparison.summary?.regressions ?? "n/a"}\n`; markdown += `- Missing candidate rows: ${comparison.summary?.missingCandidateRows ?? "n/a"}\n\n`; markdown += `- Candidate-only rows: ${comparison.summary?.candidateOnlyRows ?? "n/a"}\n\n`; + markdown += `- Insufficient sample rows: ${comparison.summary?.insufficientSampleRows ?? "n/a"}\n`; + markdown += `- Minimum samples per row: ${comparison.tolerance?.minSamplesPerRow ?? "n/a"}\n\n`; markdown += `- Missing candidate policy: ${comparison.tolerance?.allowMissingCandidate ? "allow" : "fail"}\n`; markdown += `- Candidate-only policy: ${comparison.tolerance?.failOnCandidateOnly ? "fail" : "allow"}\n\n`; } From f4a3cb8aa2b013112ea17928a9bb779e7253606d Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:02:38 +0000 Subject: [PATCH 234/333] improve: cache warmup contiguous coverage between buffer updates --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 16 +++++++++++++--- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index eaff5bff35..caf43652f4 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -360,6 +360,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Comparison now flags rows with insufficient effective sample counts and reports them in markdown/JSON outputs. - `scripts/finalize-playback-matrix.js` forwards minimum sample gating settings into compare stage, and publish summary now surfaces sample gating status fields. +44. **Cached warmup contiguous coverage counts during warmup (2026-02-13)** + - Warmup loop now recomputes contiguous prefetched coverage only when warmup buffer content changes. + - Avoids repeated contiguous scans on idle warmup iterations. + --- ## Root Cause Analysis Archive @@ -497,6 +501,7 @@ Decoder Pipeline: 45. Improved multi-input comparison aggregation by merging per-key metrics across runs and surfacing baseline/candidate run counts per comparison row. 46. Skipped contiguous warmup coverage scans until first warmup frame observation to reduce pre-frame warmup loop scan overhead. 47. Added minimum-sample comparison gating with `--min-samples-per-row`, insufficient-sample reporting, and finalize passthrough support. +48. Cached warmup contiguous coverage values and only recomputed contiguous scan when warmup buffer changed. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -541,6 +546,7 @@ Decoder Pipeline: - `scripts/finalize-playback-matrix.js`: forwards `--min-samples-per-row` into compare stage and captures it in finalize summary settings. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes insufficient sample row count and minimum sample threshold fields. - `crates/editor/src/playback.rs`: warmup loop now skips contiguous coverage scanning until first warmup frame has been observed. +- `crates/editor/src/playback.rs`: warmup contiguous coverage counts are now cached and recomputed only on warmup buffer changes. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. - `crates/editor/src/playback.rs`: added ordered pruning of stale prefetched frames below current playhead to reduce stale buffer overhead during catch-up. - `scripts/publish-playback-matrix-summary.js`: publish flow now surfaces comparison gate status/summary metrics when comparison JSON is provided. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index a934954c8d..972824a75d 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -521,6 +521,8 @@ impl Playback { let warmup_no_frames_timeout = Duration::from_secs(5); let warmup_start = Instant::now(); let mut first_frame_time: Option = None; + let mut warmup_contiguous_prefetched = 0usize; + let mut warmup_buffer_changed = false; info!( warmup_target_frames, warmup_after_first_timeout_ms = warmup_after_first_timeout.as_secs_f64() * 1000.0, @@ -528,12 +530,16 @@ impl Playback { ); while !*stop_rx.borrow() { - let contiguous_prefetched = if first_frame_time.is_some() { - count_contiguous_prefetched_frames( + if first_frame_time.is_some() && warmup_buffer_changed { + warmup_contiguous_prefetched = count_contiguous_prefetched_frames( &prefetch_buffer, frame_number, warmup_target_frames, - ) + ); + warmup_buffer_changed = false; + } + let contiguous_prefetched = if first_frame_time.is_some() { + warmup_contiguous_prefetched } else { 0 }; @@ -560,7 +566,11 @@ impl Playback { tokio::select! { Some(prefetched) = prefetch_rx.recv() => { if prefetched.generation == seek_generation { + let pre_insert_len = prefetch_buffer.len(); insert_prefetched_frame(&mut prefetch_buffer, prefetched, frame_number); + if prefetch_buffer.len() != pre_insert_len { + warmup_buffer_changed = true; + } if first_frame_time.is_none() && !prefetch_buffer.is_empty() { first_frame_time = Some(Instant::now()); } From 4bbfa59e64ab1c3e2150d3cc80591a8c1a842eb1 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:04:04 +0000 Subject: [PATCH 235/333] improve: expose comparison gate diagnostics for automation --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 7 ++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 2 + scripts/compare-playback-benchmark-runs.js | 45 +++++++++++++++++----- scripts/publish-playback-matrix-summary.js | 6 +++ 5 files changed, 52 insertions(+), 9 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 32a27d92b6..b535eb5464 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -106,6 +106,7 @@ node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-res Comparison output reports both baseline rows missing in candidate and candidate-only rows not present in baseline. Comparison table also reports baseline/candidate run counts per row when multiple JSON inputs contribute to the same key. +Comparison JSON summary includes `failureReasons` and `gateOutcomes` fields for automation-friendly gate diagnostics. ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index caf43652f4..da66af7eb7 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -364,6 +364,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Warmup loop now recomputes contiguous prefetched coverage only when warmup buffer content changes. - Avoids repeated contiguous scans on idle warmup iterations. +45. **Added explicit comparison gate diagnostics in JSON and published summaries (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` now emits `failureReasons` and `gateOutcomes` in summary JSON. + - `scripts/publish-playback-matrix-summary.js` now surfaces comparison failure reasons when present. + --- ## Root Cause Analysis Archive @@ -502,6 +506,7 @@ Decoder Pipeline: 46. Skipped contiguous warmup coverage scans until first warmup frame observation to reduce pre-frame warmup loop scan overhead. 47. Added minimum-sample comparison gating with `--min-samples-per-row`, insufficient-sample reporting, and finalize passthrough support. 48. Cached warmup contiguous coverage values and only recomputed contiguous scan when warmup buffer changed. +49. Added comparison JSON gate diagnostics (`failureReasons`, `gateOutcomes`) and surfaced failure reasons in published summary output. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -545,6 +550,8 @@ Decoder Pipeline: - `scripts/compare-playback-benchmark-runs.js`: added `--min-samples-per-row`, insufficient-sample row reporting, and sample gate fields in markdown/JSON outputs. - `scripts/finalize-playback-matrix.js`: forwards `--min-samples-per-row` into compare stage and captures it in finalize summary settings. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes insufficient sample row count and minimum sample threshold fields. +- `scripts/compare-playback-benchmark-runs.js`: comparison JSON summary now includes explicit `failureReasons` and `gateOutcomes` fields. +- `scripts/publish-playback-matrix-summary.js`: published comparison status now includes comparison failure reasons when present. - `crates/editor/src/playback.rs`: warmup loop now skips contiguous coverage scanning until first warmup frame has been observed. - `crates/editor/src/playback.rs`: warmup contiguous coverage counts are now cached and recomputed only on warmup buffer changes. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index c1f1c7f2f1..9cb6957e3e 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -186,6 +186,8 @@ pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate # when multiple inputs are provided, comparison output includes baseline/candidate run counts per row # optional: require minimum sample count per compared row pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --min-samples-per-row 3 + +# comparison JSON includes failureReasons and gateOutcomes for automation ``` ## Evidence checklist diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index 9d788ba1ee..f385b8bf37 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -396,6 +396,31 @@ function buildJsonOutput( const regressions = comparisons.filter( (entry) => entry.regressions.length > 0, ); + const hasMissingCandidateRows = missingCandidateRows.length > 0; + const hasCandidateOnlyRows = candidateOnlyRows.length > 0; + const hasInsufficientSamples = insufficientSampleRows.length > 0; + const hasMetricRegressions = regressions.some((entry) => + entry.regressions.some( + (issue) => + issue.startsWith("fps_drop=") || + issue.startsWith("startup_increase=") || + issue.startsWith("scrub_p95_increase="), + ), + ); + const failureReasons = []; + if (hasMetricRegressions) { + failureReasons.push("metric_regression"); + } + if (hasInsufficientSamples) { + failureReasons.push("insufficient_samples"); + } + if (!options.allowMissingCandidate && hasMissingCandidateRows) { + failureReasons.push("missing_candidate_rows"); + } + if (options.failOnCandidateOnly && hasCandidateOnlyRows) { + failureReasons.push("candidate_only_rows"); + } + const passed = failureReasons.length === 0; return { generatedAt: new Date().toISOString(), tolerance: { @@ -412,10 +437,16 @@ function buildJsonOutput( missingCandidateRows: missingCandidateRows.length, candidateOnlyRows: candidateOnlyRows.length, insufficientSampleRows: insufficientSampleRows.length, - passed: - regressions.length === 0 && - (options.allowMissingCandidate || missingCandidateRows.length === 0) && - (!options.failOnCandidateOnly || candidateOnlyRows.length === 0), + passed, + failureReasons, + gateOutcomes: { + metricRegressions: !hasMetricRegressions, + insufficientSamples: !hasInsufficientSamples, + missingCandidateRows: + options.allowMissingCandidate || !hasMissingCandidateRows, + candidateOnlyRows: + !options.failOnCandidateOnly || !hasCandidateOnlyRows, + }, }, regressions, missingCandidateRows, @@ -489,11 +520,7 @@ function main() { console.log(`Wrote comparison JSON to ${options.outputJson}`); } - if ( - comparisons.some((entry) => entry.regressions.length > 0) || - (!options.allowMissingCandidate && missingCandidateRows.length > 0) || - (options.failOnCandidateOnly && candidateOnlyRows.length > 0) - ) { + if (!outputJson.summary.passed) { process.exit(1); } } diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js index d2718b4efa..d3c254d8c5 100644 --- a/scripts/publish-playback-matrix-summary.js +++ b/scripts/publish-playback-matrix-summary.js @@ -103,6 +103,12 @@ function buildSummarySection( markdown += `- Minimum samples per row: ${comparison.tolerance?.minSamplesPerRow ?? "n/a"}\n\n`; markdown += `- Missing candidate policy: ${comparison.tolerance?.allowMissingCandidate ? "allow" : "fail"}\n`; markdown += `- Candidate-only policy: ${comparison.tolerance?.failOnCandidateOnly ? "fail" : "allow"}\n\n`; + const failureReasons = Array.isArray(comparison.summary?.failureReasons) + ? comparison.summary.failureReasons + : []; + if (failureReasons.length > 0) { + markdown += `- Comparison failure reasons: ${failureReasons.join(", ")}\n\n`; + } } if (finalizeSummaryJson) { const finalizeSummary = JSON.parse(finalizeSummaryJson); From 0d6f995d2061170f248b4139eb9a438d88d02f6c Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:07:15 +0000 Subject: [PATCH 236/333] fix: apply minimum sample gating only to comparable metrics --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 7 ++++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 1 + scripts/compare-playback-benchmark-runs.js | 27 +++++++++++++++------- 4 files changed, 28 insertions(+), 8 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index b535eb5464..cb3787efd8 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -107,6 +107,7 @@ node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-res Comparison output reports both baseline rows missing in candidate and candidate-only rows not present in baseline. Comparison table also reports baseline/candidate run counts per row when multiple JSON inputs contribute to the same key. Comparison JSON summary includes `failureReasons` and `gateOutcomes` fields for automation-friendly gate diagnostics. +Minimum sample gating is applied against metrics that are actually comparable for the row (for example, scrub samples are not required for rows with no scrub metric comparison). ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index da66af7eb7..c318b0935c 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -360,6 +360,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Comparison now flags rows with insufficient effective sample counts and reports them in markdown/JSON outputs. - `scripts/finalize-playback-matrix.js` forwards minimum sample gating settings into compare stage, and publish summary now surfaces sample gating status fields. +44. **Fixed sample gating semantics for non-comparable metrics (2026-02-13)** + - Minimum sample checks now only consider metrics that are actually comparable for the row. + - Prevents scrub sample requirements from incorrectly failing non-scrub comparison rows. + - Comparison output now includes compared metric count and effective sample count per row. + 44. **Cached warmup contiguous coverage counts during warmup (2026-02-13)** - Warmup loop now recomputes contiguous prefetched coverage only when warmup buffer content changes. - Avoids repeated contiguous scans on idle warmup iterations. @@ -507,6 +512,7 @@ Decoder Pipeline: 47. Added minimum-sample comparison gating with `--min-samples-per-row`, insufficient-sample reporting, and finalize passthrough support. 48. Cached warmup contiguous coverage values and only recomputed contiguous scan when warmup buffer changed. 49. Added comparison JSON gate diagnostics (`failureReasons`, `gateOutcomes`) and surfaced failure reasons in published summary output. +50. Corrected minimum sample gating semantics to only count comparable metrics and added compared-metric/effective-sample columns in comparison output. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -548,6 +554,7 @@ Decoder Pipeline: - `scripts/finalize-playback-matrix.js`: finalize now writes summary JSON before publish and passes `--finalize-summary-json` into publish flow. - `scripts/publish-playback-matrix-summary.js`: publish flow now supports optional finalize summary JSON input and surfaces finalize source/validation metadata. - `scripts/compare-playback-benchmark-runs.js`: added `--min-samples-per-row`, insufficient-sample row reporting, and sample gate fields in markdown/JSON outputs. +- `scripts/compare-playback-benchmark-runs.js`: minimum sample checks now apply only to metrics that are comparable for each row; output now includes compared metric count and effective sample count columns. - `scripts/finalize-playback-matrix.js`: forwards `--min-samples-per-row` into compare stage and captures it in finalize summary settings. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes insufficient sample row count and minimum sample threshold fields. - `scripts/compare-playback-benchmark-runs.js`: comparison JSON summary now includes explicit `failureReasons` and `gateOutcomes` fields. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 9cb6957e3e..5f5d9cf203 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -188,6 +188,7 @@ pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --min-samples-per-row 3 # comparison JSON includes failureReasons and gateOutcomes for automation +# minimum sample gating uses metrics that are actually comparable for each row ``` ## Evidence checklist diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index f385b8bf37..e102d3f96f 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -270,11 +270,20 @@ function compareMetrics(baselineRows, candidateRows, options) { baseline.scrubSampleCount, candidate.scrubSampleCount, ); - const effectiveSampleCount = Math.min( - fpsMinSamples, - startupMinSamples, - scrubMinSamples, - ); + const comparableSampleCounts = []; + if (fpsDelta !== null) { + comparableSampleCounts.push(fpsMinSamples); + } + if (startupDelta !== null) { + comparableSampleCounts.push(startupMinSamples); + } + if (scrubDelta !== null) { + comparableSampleCounts.push(scrubMinSamples); + } + const effectiveSampleCount = + comparableSampleCounts.length > 0 + ? Math.min(...comparableSampleCounts) + : 0; if (effectiveSampleCount < options.minSamplesPerRow) { insufficientSampleRows.push({ platform: candidate.platform, @@ -314,6 +323,7 @@ function compareMetrics(baselineRows, candidateRows, options) { fpsMinSamples, startupMinSamples, scrubMinSamples, + comparedMetricCount: comparableSampleCounts.length, effectiveSampleCount, fpsDelta, startupDelta, @@ -377,10 +387,11 @@ function toMarkdown( md += "\n"; } md += - "| Platform | GPU | Scenario | Recording | Format | B Runs | C Runs | F Samples | S Samples | Q Samples | FPS Δ | Startup Δ (ms) | Scrub p95 Δ (ms) | Regression |\n"; - md += "|---|---|---|---|---|---:|---:|---:|---:|---:|---:|---:|---:|---|\n"; + "| Platform | GPU | Scenario | Recording | Format | B Runs | C Runs | F Samples | S Samples | Q Samples | Metrics | Effective Samples | FPS Δ | Startup Δ (ms) | Scrub p95 Δ (ms) | Regression |\n"; + md += + "|---|---|---|---|---|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---|\n"; for (const row of comparisons) { - md += `| ${row.platform} | ${row.gpu} | ${row.scenario} | ${row.recording} | ${row.format} | ${row.baselineReportCount} | ${row.candidateReportCount} | ${row.fpsMinSamples} | ${row.startupMinSamples} | ${row.scrubMinSamples} | ${formatNumber(row.fpsDelta)} | ${formatNumber(row.startupDelta)} | ${formatNumber(row.scrubDelta)} | ${row.regressions.length > 0 ? row.regressions.join(", ") : "none"} |\n`; + md += `| ${row.platform} | ${row.gpu} | ${row.scenario} | ${row.recording} | ${row.format} | ${row.baselineReportCount} | ${row.candidateReportCount} | ${row.fpsMinSamples} | ${row.startupMinSamples} | ${row.scrubMinSamples} | ${row.comparedMetricCount} | ${row.effectiveSampleCount} | ${formatNumber(row.fpsDelta)} | ${formatNumber(row.startupDelta)} | ${formatNumber(row.scrubDelta)} | ${row.regressions.length > 0 ? row.regressions.join(", ") : "none"} |\n`; } md += "\n"; return md; From 913e6fd5597dce0440e87debf4f33268fc23cfee Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:08:58 +0000 Subject: [PATCH 237/333] improve: include comparison diagnostics in finalize and publish summaries --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 1 + scripts/finalize-playback-matrix.js | 2 ++ scripts/publish-playback-matrix-summary.js | 8 ++++++++ 5 files changed, 18 insertions(+) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index cb3787efd8..cab9ae9ea8 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -86,6 +86,7 @@ node scripts/finalize-playback-matrix.js --input /path/to/json-results --output- node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 --publish-target /workspace/crates/editor/PLAYBACK-BENCHMARKS.md Finalize summary JSON includes generated timestamp, artifact paths, settings, pass/fail flags, and git branch/commit metadata when available. +Finalize summary JSON also carries comparison failure reasons and gate outcomes when comparison is enabled. # Publish matrix artifacts into this benchmark history node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --bottlenecks-md /tmp/playback-matrix-final/playback-bottlenecks.md diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index c318b0935c..5f6a621468 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -365,6 +365,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Prevents scrub sample requirements from incorrectly failing non-scrub comparison rows. - Comparison output now includes compared metric count and effective sample count per row. +45. **Extended finalize summary comparison diagnostics (2026-02-13)** + - `scripts/finalize-playback-matrix.js` now includes comparison failure reasons and gate outcomes in summary results. + - `scripts/publish-playback-matrix-summary.js` now surfaces finalize comparison failure reasons when present. + 44. **Cached warmup contiguous coverage counts during warmup (2026-02-13)** - Warmup loop now recomputes contiguous prefetched coverage only when warmup buffer content changes. - Avoids repeated contiguous scans on idle warmup iterations. @@ -513,6 +517,7 @@ Decoder Pipeline: 48. Cached warmup contiguous coverage values and only recomputed contiguous scan when warmup buffer changed. 49. Added comparison JSON gate diagnostics (`failureReasons`, `gateOutcomes`) and surfaced failure reasons in published summary output. 50. Corrected minimum sample gating semantics to only count comparable metrics and added compared-metric/effective-sample columns in comparison output. +51. Extended finalize summary and publish output with comparison failure reasons and gate outcome metadata. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -559,6 +564,7 @@ Decoder Pipeline: - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes insufficient sample row count and minimum sample threshold fields. - `scripts/compare-playback-benchmark-runs.js`: comparison JSON summary now includes explicit `failureReasons` and `gateOutcomes` fields. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes comparison failure reasons when present. +- `scripts/finalize-playback-matrix.js`: finalize summary now includes comparison failure reasons and gate outcome fields in results metadata. - `crates/editor/src/playback.rs`: warmup loop now skips contiguous coverage scanning until first warmup frame has been observed. - `crates/editor/src/playback.rs`: warmup contiguous coverage counts are now cached and recomputed only on warmup buffer changes. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 5f5d9cf203..001404ac0b 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -115,6 +115,7 @@ pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-di ``` Finalize summary JSON includes artifact paths, gate outcomes, settings, and git branch/commit metadata when available. +When comparison is enabled, finalize summary JSON also includes comparison failure reasons and gate outcome details. Include optimization thresholds when finalizing: diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index 71babdd9e2..f4c3107a3c 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -325,6 +325,8 @@ function main() { results: { validationPassed: validation.passed === true, comparisonPassed: comparison ? comparison.summary?.passed === true : null, + comparisonFailureReasons: comparison?.summary?.failureReasons ?? null, + comparisonGateOutcomes: comparison?.summary?.gateOutcomes ?? null, }, }; fs.writeFileSync(summaryJsonPath, JSON.stringify(summary, null, 2), "utf8"); diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js index d3c254d8c5..14f389da15 100644 --- a/scripts/publish-playback-matrix-summary.js +++ b/scripts/publish-playback-matrix-summary.js @@ -118,6 +118,14 @@ function buildSummarySection( if (finalizeSummary.results?.comparisonPassed !== null) { markdown += `- Finalize comparison passed: ${finalizeSummary.results?.comparisonPassed === true ? "true" : "false"}\n`; } + const finalizeFailureReasons = Array.isArray( + finalizeSummary.results?.comparisonFailureReasons, + ) + ? finalizeSummary.results.comparisonFailureReasons + : []; + if (finalizeFailureReasons.length > 0) { + markdown += `- Finalize comparison failure reasons: ${finalizeFailureReasons.join(", ")}\n`; + } markdown += "\n"; } From 2219362e5b600015f271499fc58e1f9f340a67cb Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:13:48 +0000 Subject: [PATCH 238/333] improve: add parse-error gating and parse stats to matrix comparisons --- crates/editor/PLAYBACK-BENCHMARKS.md | 3 + crates/editor/PLAYBACK-FINDINGS.md | 9 ++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 5 ++ scripts/compare-playback-benchmark-runs.js | 97 ++++++++++++++++++++-- scripts/finalize-playback-matrix.js | 11 ++- scripts/publish-playback-matrix-summary.js | 3 + 6 files changed, 122 insertions(+), 6 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index cab9ae9ea8..ea1ce8c465 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -82,6 +82,7 @@ node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --ou node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --allow-missing-candidate node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-candidate-only node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --min-samples-per-row 3 +node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-parse-errors node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --output-json /tmp/playback-matrix-final/playback-finalize-summary.json node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 --publish-target /workspace/crates/editor/PLAYBACK-BENCHMARKS.md @@ -103,12 +104,14 @@ node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-res node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --allow-missing-candidate node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-candidate-only node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --min-samples-per-row 3 +node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-parse-errors node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-comparison.md --output-json /tmp/playback-comparison.json Comparison output reports both baseline rows missing in candidate and candidate-only rows not present in baseline. Comparison table also reports baseline/candidate run counts per row when multiple JSON inputs contribute to the same key. Comparison JSON summary includes `failureReasons` and `gateOutcomes` fields for automation-friendly gate diagnostics. Minimum sample gating is applied against metrics that are actually comparable for the row (for example, scrub samples are not required for rows with no scrub metric comparison). +Comparison JSON also includes baseline/candidate file parsing stats and optional parse-error gating support. ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 5f6a621468..c973dd166a 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -377,6 +377,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - `scripts/compare-playback-benchmark-runs.js` now emits `failureReasons` and `gateOutcomes` in summary JSON. - `scripts/publish-playback-matrix-summary.js` now surfaces comparison failure reasons when present. +46. **Added parse-error gating and parse stats to comparison flows (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` now supports `--fail-on-parse-errors`. + - Comparison JSON now includes baseline/candidate file parsing stats and parse error entries. + - `scripts/finalize-playback-matrix.js` now forwards parse-error gating option to compare stage; published summary surfaces parse policy and parse error counts. + --- ## Root Cause Analysis Archive @@ -518,6 +523,7 @@ Decoder Pipeline: 49. Added comparison JSON gate diagnostics (`failureReasons`, `gateOutcomes`) and surfaced failure reasons in published summary output. 50. Corrected minimum sample gating semantics to only count comparable metrics and added compared-metric/effective-sample columns in comparison output. 51. Extended finalize summary and publish output with comparison failure reasons and gate outcome metadata. +52. Added comparison parse-error gating (`--fail-on-parse-errors`) with parse stats surfaced in comparison JSON, finalize settings, and published summaries. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -565,6 +571,9 @@ Decoder Pipeline: - `scripts/compare-playback-benchmark-runs.js`: comparison JSON summary now includes explicit `failureReasons` and `gateOutcomes` fields. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes comparison failure reasons when present. - `scripts/finalize-playback-matrix.js`: finalize summary now includes comparison failure reasons and gate outcome fields in results metadata. +- `scripts/compare-playback-benchmark-runs.js`: added parse-error gating and baseline/candidate file parse stats/parse error entries in JSON output. +- `scripts/finalize-playback-matrix.js`: forwards parse-error gating and records parse-error policy in finalize summary settings. +- `scripts/publish-playback-matrix-summary.js`: published comparison status now includes parse policy and baseline/candidate parse error counts. - `crates/editor/src/playback.rs`: warmup loop now skips contiguous coverage scanning until first warmup frame has been observed. - `crates/editor/src/playback.rs`: warmup contiguous coverage counts are now cached and recomputed only on warmup buffer changes. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 001404ac0b..67e70870f6 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -133,6 +133,9 @@ pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir # optional: require minimum sample count per compared row pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --min-samples-per-row 3 + +# optional: fail comparison gate when any input JSON fails to parse +pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-parse-errors ``` Finalize and publish to benchmark history in one command: @@ -190,6 +193,8 @@ pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate # comparison JSON includes failureReasons and gateOutcomes for automation # minimum sample gating uses metrics that are actually comparable for each row +# optional: fail comparison gate when any input JSON fails to parse +pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-parse-errors ``` ## Evidence checklist diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index e102d3f96f..3beec8cd03 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -15,6 +15,7 @@ function parseArgs(argv) { allowMissingCandidate: false, failOnCandidateOnly: false, minSamplesPerRow: 1, + failOnParseErrors: false, }; for (let i = 2; i < argv.length; i++) { @@ -84,6 +85,10 @@ function parseArgs(argv) { options.minSamplesPerRow = value; continue; } + if (arg === "--fail-on-parse-errors") { + options.failOnParseErrors = true; + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -91,7 +96,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline [--baseline ...] --candidate [--candidate ...] [--output ] [--output-json ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--min-samples-per-row 1] + console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline [--baseline ...] --candidate [--candidate ...] [--output ] [--output-json ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--min-samples-per-row 1] [--fail-on-parse-errors] Compares baseline and candidate playback matrix JSON outputs and flags regressions. Multiple --baseline and --candidate inputs are supported.`); } @@ -141,14 +146,38 @@ function maximum(values) { function collectMetrics(files) { const accumulators = new Map(); + const stats = { + totalFiles: files.length, + parsedFiles: 0, + usableFiles: 0, + skippedFiles: 0, + parseErrors: [], + }; for (const filePath of files) { - const parsed = JSON.parse(fs.readFileSync(filePath, "utf8")); + let parsed; + try { + parsed = JSON.parse(fs.readFileSync(filePath, "utf8")); + stats.parsedFiles += 1; + } catch (error) { + stats.parseErrors.push({ + file: filePath, + error: error instanceof Error ? error.message : String(error), + }); + continue; + } + + if (!Array.isArray(parsed.reports) || parsed.reports.length === 0) { + stats.skippedFiles += 1; + continue; + } + const notes = parseNotes(parsed.notes); const platform = notes.platform ?? "unknown"; const gpu = notes.gpu ?? "unknown"; const scenario = notes.scenario ?? "unspecified"; const reports = Array.isArray(parsed.reports) ? parsed.reports : []; + let fileContributedRows = false; for (const report of reports) { const key = `${platform}|${gpu}|${scenario}|${report.recording_name ?? "unknown"}|${report.is_fragmented ? "fragmented" : "mp4"}`; @@ -169,6 +198,13 @@ function collectMetrics(files) { const scrubP95Values = scrub .map((entry) => entry.p95_seek_time_ms) .filter((entry) => typeof entry === "number"); + const hasUsableMetrics = + fpsValues.length > 0 || + startupValues.length > 0 || + scrubP95Values.length > 0; + if (!hasUsableMetrics) { + continue; + } const existing = accumulators.get(key) ?? { key, @@ -187,6 +223,13 @@ function collectMetrics(files) { existing.startupSamples.push(...startupValues); existing.scrubP95Samples.push(...scrubP95Values); accumulators.set(key, existing); + fileContributedRows = true; + } + + if (fileContributedRows) { + stats.usableFiles += 1; + } else { + stats.skippedFiles += 1; } } @@ -209,7 +252,7 @@ function collectMetrics(files) { }); } - return rows; + return { rows, stats }; } function delta(candidate, baseline) { @@ -346,6 +389,8 @@ function toMarkdown( missingCandidateRows, candidateOnlyRows, insufficientSampleRows, + baselineStats, + candidateStats, options, ) { const regressions = comparisons.filter( @@ -357,7 +402,25 @@ function toMarkdown( md += `Tolerance: fps_drop<=${options.allowFpsDrop}, startup_increase<=${options.allowStartupIncreaseMs}ms, scrub_p95_increase<=${options.allowScrubP95IncreaseMs}ms\n\n`; md += `Coverage gate: missing_candidate=${options.allowMissingCandidate ? "allow" : "fail"}, candidate_only=${options.failOnCandidateOnly ? "fail" : "allow"}\n\n`; md += `Sample gate: min_samples_per_row>=${options.minSamplesPerRow}\n\n`; + md += `Parse gate: parse_errors=${options.failOnParseErrors ? "fail" : "allow"}\n\n`; + md += `Baseline files: total=${baselineStats.totalFiles}, parsed=${baselineStats.parsedFiles}, usable=${baselineStats.usableFiles}, skipped=${baselineStats.skippedFiles}, parse_errors=${baselineStats.parseErrors.length}\n`; + md += `Candidate files: total=${candidateStats.totalFiles}, parsed=${candidateStats.parsedFiles}, usable=${candidateStats.usableFiles}, skipped=${candidateStats.skippedFiles}, parse_errors=${candidateStats.parseErrors.length}\n\n`; md += `Compared rows: ${comparisons.length}, regressions: ${regressions.length}, missing candidate rows: ${missingCandidateRows.length}, candidate-only rows: ${candidateOnlyRows.length}, insufficient sample rows: ${insufficientSampleRows.length}\n\n`; + if ( + baselineStats.parseErrors.length > 0 || + candidateStats.parseErrors.length > 0 + ) { + md += "## Parse Errors\n\n"; + md += "| Side | File | Error |\n"; + md += "|---|---|---|\n"; + for (const entry of baselineStats.parseErrors.slice(0, 20)) { + md += `| baseline | ${entry.file} | ${entry.error.replace(/\|/g, "\\|")} |\n`; + } + for (const entry of candidateStats.parseErrors.slice(0, 20)) { + md += `| candidate | ${entry.file} | ${entry.error.replace(/\|/g, "\\|")} |\n`; + } + md += "\n"; + } if (missingCandidateRows.length > 0) { md += "## Missing Candidate Rows\n\n"; md += "| Platform | GPU | Scenario | Recording | Format |\n"; @@ -402,6 +465,8 @@ function buildJsonOutput( missingCandidateRows, candidateOnlyRows, insufficientSampleRows, + baselineStats, + candidateStats, options, ) { const regressions = comparisons.filter( @@ -431,6 +496,13 @@ function buildJsonOutput( if (options.failOnCandidateOnly && hasCandidateOnlyRows) { failureReasons.push("candidate_only_rows"); } + if ( + options.failOnParseErrors && + (baselineStats.parseErrors.length > 0 || + candidateStats.parseErrors.length > 0) + ) { + failureReasons.push("parse_errors"); + } const passed = failureReasons.length === 0; return { generatedAt: new Date().toISOString(), @@ -441,6 +513,11 @@ function buildJsonOutput( allowMissingCandidate: options.allowMissingCandidate, failOnCandidateOnly: options.failOnCandidateOnly, minSamplesPerRow: options.minSamplesPerRow, + failOnParseErrors: options.failOnParseErrors, + }, + fileStats: { + baseline: baselineStats, + candidate: candidateStats, }, summary: { comparedRows: comparisons.length, @@ -457,6 +534,10 @@ function buildJsonOutput( options.allowMissingCandidate || !hasMissingCandidateRows, candidateOnlyRows: !options.failOnCandidateOnly || !hasCandidateOnlyRows, + parseErrors: + !options.failOnParseErrors || + (baselineStats.parseErrors.length === 0 && + candidateStats.parseErrors.length === 0), }, }, regressions, @@ -493,8 +574,10 @@ function main() { throw new Error("No candidate JSON files found"); } - const baselineRows = collectMetrics(baselineFiles); - const candidateRows = collectMetrics(candidateFiles); + const baselineCollected = collectMetrics(baselineFiles); + const candidateCollected = collectMetrics(candidateFiles); + const baselineRows = baselineCollected.rows; + const candidateRows = candidateCollected.rows; const { comparisons, missingCandidateRows, @@ -506,6 +589,8 @@ function main() { missingCandidateRows, candidateOnlyRows, insufficientSampleRows, + baselineCollected.stats, + candidateCollected.stats, options, ); const outputJson = buildJsonOutput( @@ -513,6 +598,8 @@ function main() { missingCandidateRows, candidateOnlyRows, insufficientSampleRows, + baselineCollected.stats, + candidateCollected.stats, options, ); diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index f4c3107a3c..a2bd3865e8 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -22,6 +22,7 @@ function parseArgs(argv) { allowMissingCandidate: false, failOnCandidateOnly: false, minSamplesPerRow: 1, + failOnParseErrors: false, }; for (let i = 2; i < argv.length; i++) { @@ -137,6 +138,10 @@ function parseArgs(argv) { options.minSamplesPerRow = value; continue; } + if (arg === "--fail-on-parse-errors") { + options.failOnParseErrors = true; + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -144,7 +149,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--output-json ] [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--compare-baseline ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--min-samples-per-row 1] [--publish-target ] + console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--output-json ] [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--compare-baseline ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--min-samples-per-row 1] [--fail-on-parse-errors] [--publish-target ] Generates aggregate markdown, status markdown, validation JSON, and bottleneck analysis for collected playback matrix outputs. Optionally compares candidate inputs against baseline inputs and fails on regressions.`); } @@ -274,6 +279,9 @@ function main() { if (options.failOnCandidateOnly) { compareArgs.push("--fail-on-candidate-only"); } + if (options.failOnParseErrors) { + compareArgs.push("--fail-on-parse-errors"); + } compareArgs.push("--min-samples-per-row", String(options.minSamplesPerRow)); run("node", compareArgs); } @@ -321,6 +329,7 @@ function main() { allowMissingCandidate: options.allowMissingCandidate, failOnCandidateOnly: options.failOnCandidateOnly, minSamplesPerRow: options.minSamplesPerRow, + failOnParseErrors: options.failOnParseErrors, }, results: { validationPassed: validation.passed === true, diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js index 14f389da15..218f9a5e1d 100644 --- a/scripts/publish-playback-matrix-summary.js +++ b/scripts/publish-playback-matrix-summary.js @@ -103,6 +103,9 @@ function buildSummarySection( markdown += `- Minimum samples per row: ${comparison.tolerance?.minSamplesPerRow ?? "n/a"}\n\n`; markdown += `- Missing candidate policy: ${comparison.tolerance?.allowMissingCandidate ? "allow" : "fail"}\n`; markdown += `- Candidate-only policy: ${comparison.tolerance?.failOnCandidateOnly ? "fail" : "allow"}\n\n`; + markdown += `- Parse error policy: ${comparison.tolerance?.failOnParseErrors ? "fail" : "allow"}\n`; + markdown += `- Baseline parse errors: ${comparison.fileStats?.baseline?.parseErrors?.length ?? "n/a"}\n`; + markdown += `- Candidate parse errors: ${comparison.fileStats?.candidate?.parseErrors?.length ?? "n/a"}\n\n`; const failureReasons = Array.isArray(comparison.summary?.failureReasons) ? comparison.summary.failureReasons : []; From b05da47d66af235deda582899d10f76aa493eb82 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:15:35 +0000 Subject: [PATCH 239/333] improve: invalidate warmup contiguous cache on structural buffer changes --- crates/editor/PLAYBACK-FINDINGS.md | 7 ++++++ crates/editor/src/playback.rs | 36 +++++++++++++++++++++--------- 2 files changed, 32 insertions(+), 11 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index c973dd166a..a5bf591c5f 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -382,6 +382,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Comparison JSON now includes baseline/candidate file parsing stats and parse error entries. - `scripts/finalize-playback-matrix.js` now forwards parse-error gating option to compare stage; published summary surfaces parse policy and parse error counts. +47. **Made keyed prefetch insert helper report structural changes (2026-02-13)** + - `insert_prefetched_frame` now returns whether keyed prefetch buffer changed (insert and/or trim). + - Warmup loop now uses this direct signal instead of length-only delta checks for contiguous coverage cache invalidation. + - Improves warmup cache correctness when inserts and trims occur with stable overall buffer length. + --- ## Root Cause Analysis Archive @@ -524,6 +529,7 @@ Decoder Pipeline: 50. Corrected minimum sample gating semantics to only count comparable metrics and added compared-metric/effective-sample columns in comparison output. 51. Extended finalize summary and publish output with comparison failure reasons and gate outcome metadata. 52. Added comparison parse-error gating (`--fail-on-parse-errors`) with parse stats surfaced in comparison JSON, finalize settings, and published summaries. +53. Updated keyed prefetch insert helper to emit structural-change signals for warmup contiguous coverage cache invalidation. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -574,6 +580,7 @@ Decoder Pipeline: - `scripts/compare-playback-benchmark-runs.js`: added parse-error gating and baseline/candidate file parse stats/parse error entries in JSON output. - `scripts/finalize-playback-matrix.js`: forwards parse-error gating and records parse-error policy in finalize summary settings. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes parse policy and baseline/candidate parse error counts. +- `crates/editor/src/playback.rs`: `insert_prefetched_frame` now returns structural-change signals and warmup cache invalidation uses this signal to avoid stale contiguous counts when insert+trim keeps buffer length unchanged. - `crates/editor/src/playback.rs`: warmup loop now skips contiguous coverage scanning until first warmup frame has been observed. - `crates/editor/src/playback.rs`: warmup contiguous coverage counts are now cached and recomputed only on warmup buffer changes. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 972824a75d..d6bf970227 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -101,7 +101,8 @@ impl FrameCache { } } -fn trim_prefetch_buffer(buffer: &mut BTreeMap, current_frame: u32) { +fn trim_prefetch_buffer(buffer: &mut BTreeMap, current_frame: u32) -> bool { + let mut changed = false; while buffer.len() > PREFETCH_BUFFER_SIZE { let far_ahead_frame = buffer .iter() @@ -111,6 +112,7 @@ fn trim_prefetch_buffer(buffer: &mut BTreeMap, current_fra if let Some(frame) = far_ahead_frame { buffer.remove(&frame); + changed = true; continue; } @@ -118,20 +120,25 @@ fn trim_prefetch_buffer(buffer: &mut BTreeMap, current_fra break; }; buffer.remove(&oldest_frame); + changed = true; } + changed } fn insert_prefetched_frame( buffer: &mut BTreeMap, prefetched: PrefetchedFrame, current_frame: u32, -) { +) -> bool { if prefetched.frame_number < current_frame { - return; + return false; } - buffer.entry(prefetched.frame_number).or_insert(prefetched); - trim_prefetch_buffer(buffer, current_frame); + let frame_number = prefetched.frame_number; + let inserted_new = !buffer.contains_key(&frame_number); + buffer.entry(frame_number).or_insert(prefetched); + let trimmed = trim_prefetch_buffer(buffer, current_frame); + inserted_new || trimmed } fn prune_prefetch_buffer_before_frame( @@ -566,9 +573,11 @@ impl Playback { tokio::select! { Some(prefetched) = prefetch_rx.recv() => { if prefetched.generation == seek_generation { - let pre_insert_len = prefetch_buffer.len(); - insert_prefetched_frame(&mut prefetch_buffer, prefetched, frame_number); - if prefetch_buffer.len() != pre_insert_len { + if insert_prefetched_frame( + &mut prefetch_buffer, + prefetched, + frame_number, + ) { warmup_buffer_changed = true; } if first_frame_time.is_none() && !prefetch_buffer.is_empty() { @@ -617,7 +626,8 @@ impl Playback { } while let Ok(prefetched) = prefetch_rx.try_recv() { if prefetched.generation == seek_generation { - insert_prefetched_frame(&mut prefetch_buffer, prefetched, frame_number); + let _ = + insert_prefetched_frame(&mut prefetch_buffer, prefetched, frame_number); } } prune_prefetch_buffer_before_frame(&mut prefetch_buffer, frame_number); @@ -698,7 +708,11 @@ impl Playback { found_frame = Some(prefetched); break; } else if prefetched.frame_number >= frame_number { - insert_prefetched_frame(&mut prefetch_buffer, prefetched, frame_number); + let _ = insert_prefetched_frame( + &mut prefetch_buffer, + prefetched, + frame_number, + ); } } _ = tokio::time::sleep(in_flight_poll_interval) => { @@ -752,7 +766,7 @@ impl Playback { prefetched.segment_index, )) } else { - insert_prefetched_frame( + let _ = insert_prefetched_frame( &mut prefetch_buffer, prefetched, frame_number, From 63ba3f6b5fb9a509a6f1ae5bcc14930a170344c8 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:16:55 +0000 Subject: [PATCH 240/333] improve: include comparison file stats in finalize summary output --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 7 +++++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 1 + scripts/finalize-playback-matrix.js | 1 + scripts/publish-playback-matrix-summary.js | 5 +++++ 5 files changed, 15 insertions(+) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index ea1ce8c465..9b58bc2f80 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -88,6 +88,7 @@ node scripts/finalize-playback-matrix.js --input /path/to/json-results --output- Finalize summary JSON includes generated timestamp, artifact paths, settings, pass/fail flags, and git branch/commit metadata when available. Finalize summary JSON also carries comparison failure reasons and gate outcomes when comparison is enabled. +Finalize summary JSON includes comparison file stats (including parse error counts) when comparison is enabled. # Publish matrix artifacts into this benchmark history node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --bottlenecks-md /tmp/playback-matrix-final/playback-bottlenecks.md diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index a5bf591c5f..9ebbd6dab2 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -387,6 +387,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Warmup loop now uses this direct signal instead of length-only delta checks for contiguous coverage cache invalidation. - Improves warmup cache correctness when inserts and trims occur with stable overall buffer length. +48. **Extended finalize summary with comparison file stats (2026-02-13)** + - Finalize summary JSON now includes comparison file stats payload when comparison is enabled. + - Publish summary now surfaces finalize baseline/candidate parse error counts from finalize summary metadata. + --- ## Root Cause Analysis Archive @@ -530,6 +534,7 @@ Decoder Pipeline: 51. Extended finalize summary and publish output with comparison failure reasons and gate outcome metadata. 52. Added comparison parse-error gating (`--fail-on-parse-errors`) with parse stats surfaced in comparison JSON, finalize settings, and published summaries. 53. Updated keyed prefetch insert helper to emit structural-change signals for warmup contiguous coverage cache invalidation. +54. Extended finalize summary and publish output with comparison file stats (including parse error counts). **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -581,6 +586,8 @@ Decoder Pipeline: - `scripts/finalize-playback-matrix.js`: forwards parse-error gating and records parse-error policy in finalize summary settings. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes parse policy and baseline/candidate parse error counts. - `crates/editor/src/playback.rs`: `insert_prefetched_frame` now returns structural-change signals and warmup cache invalidation uses this signal to avoid stale contiguous counts when insert+trim keeps buffer length unchanged. +- `scripts/finalize-playback-matrix.js`: finalize summary now includes comparison file stats payload when comparison runs are enabled. +- `scripts/publish-playback-matrix-summary.js`: publish summary now surfaces finalize baseline/candidate parse error counts from finalize summary metadata. - `crates/editor/src/playback.rs`: warmup loop now skips contiguous coverage scanning until first warmup frame has been observed. - `crates/editor/src/playback.rs`: warmup contiguous coverage counts are now cached and recomputed only on warmup buffer changes. - `crates/editor/src/playback.rs`: replaced deque-based prefetch buffering with keyed `BTreeMap` buffering and bounded eviction for faster target frame retrieval. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 67e70870f6..62e9b92582 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -116,6 +116,7 @@ pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-di Finalize summary JSON includes artifact paths, gate outcomes, settings, and git branch/commit metadata when available. When comparison is enabled, finalize summary JSON also includes comparison failure reasons and gate outcome details. +When comparison is enabled, finalize summary JSON also includes comparison file stats such as baseline/candidate parse error counts. Include optimization thresholds when finalizing: diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index a2bd3865e8..fb72687670 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -336,6 +336,7 @@ function main() { comparisonPassed: comparison ? comparison.summary?.passed === true : null, comparisonFailureReasons: comparison?.summary?.failureReasons ?? null, comparisonGateOutcomes: comparison?.summary?.gateOutcomes ?? null, + comparisonFileStats: comparison?.fileStats ?? null, }, }; fs.writeFileSync(summaryJsonPath, JSON.stringify(summary, null, 2), "utf8"); diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js index 218f9a5e1d..4837382634 100644 --- a/scripts/publish-playback-matrix-summary.js +++ b/scripts/publish-playback-matrix-summary.js @@ -121,6 +121,11 @@ function buildSummarySection( if (finalizeSummary.results?.comparisonPassed !== null) { markdown += `- Finalize comparison passed: ${finalizeSummary.results?.comparisonPassed === true ? "true" : "false"}\n`; } + const finalizeFileStats = finalizeSummary.results?.comparisonFileStats; + if (finalizeFileStats?.baseline || finalizeFileStats?.candidate) { + markdown += `- Finalize baseline parse errors: ${finalizeFileStats?.baseline?.parseErrors?.length ?? "n/a"}\n`; + markdown += `- Finalize candidate parse errors: ${finalizeFileStats?.candidate?.parseErrors?.length ?? "n/a"}\n`; + } const finalizeFailureReasons = Array.isArray( finalizeSummary.results?.comparisonFailureReasons, ) From 7f0778b1043f41aa25b2d2b99197587fd5b63757 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:20:23 +0000 Subject: [PATCH 241/333] improve: stabilize comparison artifact ordering for reproducibility --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ scripts/compare-playback-benchmark-runs.js | 18 +++++++++++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 9ebbd6dab2..89093a5d8e 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -391,6 +391,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Finalize summary JSON now includes comparison file stats payload when comparison is enabled. - Publish summary now surfaces finalize baseline/candidate parse error counts from finalize summary metadata. +49. **Stabilized comparison report ordering for reproducibility (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` now deterministically sorts comparison, missing, candidate-only, and insufficient-sample rows. + - Keeps markdown/JSON outputs stable across repeated runs with identical inputs. + --- ## Root Cause Analysis Archive @@ -535,6 +539,7 @@ Decoder Pipeline: 52. Added comparison parse-error gating (`--fail-on-parse-errors`) with parse stats surfaced in comparison JSON, finalize settings, and published summaries. 53. Updated keyed prefetch insert helper to emit structural-change signals for warmup contiguous coverage cache invalidation. 54. Extended finalize summary and publish output with comparison file stats (including parse error counts). +55. Stabilized comparison output ordering with deterministic sorting for comparison rows and coverage-delta sections. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -557,6 +562,7 @@ Decoder Pipeline: - `scripts/analyze-playback-matrix-bottlenecks.js`: added prioritized bottleneck analysis output from matrix JSON evidence. - `scripts/compare-playback-benchmark-runs.js`: added regression-aware baseline/candidate comparison with configurable FPS/startup/scrub tolerances. - `scripts/compare-playback-benchmark-runs.js`: fixed options wiring inside comparison regression checks and now aggregates per-key metrics across multi-input runs with run-count reporting. +- `scripts/compare-playback-benchmark-runs.js`: comparison row sets are now deterministically sorted for stable markdown/json artifact diffs. - `scripts/publish-playback-matrix-summary.js`: added optional baseline-vs-candidate comparison artifact attachment in published summaries. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index 3beec8cd03..39809c17b3 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -264,6 +264,16 @@ function formatNumber(value, digits = 2) { return value === null ? "n/a" : value.toFixed(digits); } +function compareCoverageRows(a, b) { + return ( + a.platform.localeCompare(b.platform) || + a.gpu.localeCompare(b.gpu) || + a.scenario.localeCompare(b.scenario) || + a.recording.localeCompare(b.recording) || + a.format.localeCompare(b.format) + ); +} + function compareMetrics(baselineRows, candidateRows, options) { const comparisons = []; const missingCandidateRows = []; @@ -375,7 +385,13 @@ function compareMetrics(baselineRows, candidateRows, options) { }); } - comparisons.sort((a, b) => b.regressions.length - a.regressions.length); + comparisons.sort( + (a, b) => + b.regressions.length - a.regressions.length || compareCoverageRows(a, b), + ); + missingCandidateRows.sort(compareCoverageRows); + candidateOnlyRows.sort(compareCoverageRows); + insufficientSampleRows.sort(compareCoverageRows); return { comparisons, missingCandidateRows, From f12e5c500714f7581a7ea3b96e689666712d80a2 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:22:11 +0000 Subject: [PATCH 242/333] improve: add comparison count rollups to finalize summaries --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 7 +++++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 1 + scripts/finalize-playback-matrix.js | 8 ++++++++ scripts/publish-playback-matrix-summary.js | 7 +++++++ 5 files changed, 24 insertions(+) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 9b58bc2f80..f2a797d83a 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -89,6 +89,7 @@ node scripts/finalize-playback-matrix.js --input /path/to/json-results --output- Finalize summary JSON includes generated timestamp, artifact paths, settings, pass/fail flags, and git branch/commit metadata when available. Finalize summary JSON also carries comparison failure reasons and gate outcomes when comparison is enabled. Finalize summary JSON includes comparison file stats (including parse error counts) when comparison is enabled. +Finalize summary JSON includes comparison summary counts (compared rows, regressions, missing/candidate-only/insufficient-sample counts) when comparison is enabled. # Publish matrix artifacts into this benchmark history node scripts/publish-playback-matrix-summary.js --aggregate-md /tmp/playback-matrix-final/playback-benchmark-aggregate.md --status-md /tmp/playback-matrix-final/playback-matrix-status.md --validation-json /tmp/playback-matrix-final/playback-matrix-validation.json --bottlenecks-md /tmp/playback-matrix-final/playback-bottlenecks.md diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 89093a5d8e..fb1e1cb290 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -395,6 +395,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - `scripts/compare-playback-benchmark-runs.js` now deterministically sorts comparison, missing, candidate-only, and insufficient-sample rows. - Keeps markdown/JSON outputs stable across repeated runs with identical inputs. +50. **Extended finalize summary with comparison count rollups (2026-02-13)** + - `scripts/finalize-playback-matrix.js` now captures comparison count rollups in summary results (compared rows, regressions, missing/candidate-only/insufficient-sample counts). + - `scripts/publish-playback-matrix-summary.js` now surfaces these finalize comparison counts in published summaries. + --- ## Root Cause Analysis Archive @@ -540,6 +544,7 @@ Decoder Pipeline: 53. Updated keyed prefetch insert helper to emit structural-change signals for warmup contiguous coverage cache invalidation. 54. Extended finalize summary and publish output with comparison file stats (including parse error counts). 55. Stabilized comparison output ordering with deterministic sorting for comparison rows and coverage-delta sections. +56. Extended finalize and publish summaries with comparison count rollups (compared rows, regressions, missing/candidate-only/insufficient-sample counts). **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -563,6 +568,8 @@ Decoder Pipeline: - `scripts/compare-playback-benchmark-runs.js`: added regression-aware baseline/candidate comparison with configurable FPS/startup/scrub tolerances. - `scripts/compare-playback-benchmark-runs.js`: fixed options wiring inside comparison regression checks and now aggregates per-key metrics across multi-input runs with run-count reporting. - `scripts/compare-playback-benchmark-runs.js`: comparison row sets are now deterministically sorted for stable markdown/json artifact diffs. +- `scripts/finalize-playback-matrix.js`: finalize summary now includes comparison count rollup fields for compared rows, regressions, and coverage deltas. +- `scripts/publish-playback-matrix-summary.js`: publish summary now surfaces finalize comparison count rollups when finalize summary metadata is attached. - `scripts/publish-playback-matrix-summary.js`: added optional baseline-vs-candidate comparison artifact attachment in published summaries. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 62e9b92582..303b010ad1 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -117,6 +117,7 @@ pnpm bench:playback:finalize -- --input /path/to/all-machine-results --output-di Finalize summary JSON includes artifact paths, gate outcomes, settings, and git branch/commit metadata when available. When comparison is enabled, finalize summary JSON also includes comparison failure reasons and gate outcome details. When comparison is enabled, finalize summary JSON also includes comparison file stats such as baseline/candidate parse error counts. +When comparison is enabled, finalize summary JSON also includes comparison summary counts for compared rows, regressions, and coverage deltas. Include optimization thresholds when finalizing: diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index fb72687670..6341c59523 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -334,6 +334,14 @@ function main() { results: { validationPassed: validation.passed === true, comparisonPassed: comparison ? comparison.summary?.passed === true : null, + comparisonComparedRows: comparison?.summary?.comparedRows ?? null, + comparisonRegressions: comparison?.summary?.regressions ?? null, + comparisonMissingCandidateRows: + comparison?.summary?.missingCandidateRows ?? null, + comparisonCandidateOnlyRows: + comparison?.summary?.candidateOnlyRows ?? null, + comparisonInsufficientSampleRows: + comparison?.summary?.insufficientSampleRows ?? null, comparisonFailureReasons: comparison?.summary?.failureReasons ?? null, comparisonGateOutcomes: comparison?.summary?.gateOutcomes ?? null, comparisonFileStats: comparison?.fileStats ?? null, diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js index 4837382634..f73c447c1e 100644 --- a/scripts/publish-playback-matrix-summary.js +++ b/scripts/publish-playback-matrix-summary.js @@ -121,6 +121,13 @@ function buildSummarySection( if (finalizeSummary.results?.comparisonPassed !== null) { markdown += `- Finalize comparison passed: ${finalizeSummary.results?.comparisonPassed === true ? "true" : "false"}\n`; } + if (finalizeSummary.results?.comparisonComparedRows !== null) { + markdown += `- Finalize compared rows: ${finalizeSummary.results?.comparisonComparedRows}\n`; + markdown += `- Finalize comparison regressions: ${finalizeSummary.results?.comparisonRegressions ?? "n/a"}\n`; + markdown += `- Finalize missing candidate rows: ${finalizeSummary.results?.comparisonMissingCandidateRows ?? "n/a"}\n`; + markdown += `- Finalize candidate-only rows: ${finalizeSummary.results?.comparisonCandidateOnlyRows ?? "n/a"}\n`; + markdown += `- Finalize insufficient sample rows: ${finalizeSummary.results?.comparisonInsufficientSampleRows ?? "n/a"}\n`; + } const finalizeFileStats = finalizeSummary.results?.comparisonFileStats; if (finalizeFileStats?.baseline || finalizeFileStats?.candidate) { markdown += `- Finalize baseline parse errors: ${finalizeFileStats?.baseline?.parseErrors?.length ?? "n/a"}\n`; From 5d56b008211efbc485635576d04e042b3694d801 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:25:13 +0000 Subject: [PATCH 243/333] improve: add optional zero-comparison gating for matrix compare flows --- crates/editor/PLAYBACK-BENCHMARKS.md | 2 ++ crates/editor/PLAYBACK-FINDINGS.md | 8 ++++++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 6 ++++++ scripts/compare-playback-benchmark-runs.js | 13 ++++++++++++- scripts/finalize-playback-matrix.js | 11 ++++++++++- scripts/publish-playback-matrix-summary.js | 1 + 6 files changed, 39 insertions(+), 2 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index f2a797d83a..43e7d2a355 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -83,6 +83,7 @@ node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --ou node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-candidate-only node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --min-samples-per-row 3 node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-parse-errors +node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-zero-compared node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --output-json /tmp/playback-matrix-final/playback-finalize-summary.json node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 --publish-target /workspace/crates/editor/PLAYBACK-BENCHMARKS.md @@ -107,6 +108,7 @@ node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-res node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-candidate-only node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --min-samples-per-row 3 node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-parse-errors +node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-zero-compared node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-comparison.md --output-json /tmp/playback-comparison.json Comparison output reports both baseline rows missing in candidate and candidate-only rows not present in baseline. diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index fb1e1cb290..5ff649e6e6 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -399,6 +399,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - `scripts/finalize-playback-matrix.js` now captures comparison count rollups in summary results (compared rows, regressions, missing/candidate-only/insufficient-sample counts). - `scripts/publish-playback-matrix-summary.js` now surfaces these finalize comparison counts in published summaries. +51. **Added optional zero-comparison gating (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` now supports `--fail-on-zero-compared`. + - Enables strict failure when comparison processing yields zero comparable rows. + - `scripts/finalize-playback-matrix.js` forwards zero-comparison gating option in integrated compare flows. + --- ## Root Cause Analysis Archive @@ -545,6 +550,7 @@ Decoder Pipeline: 54. Extended finalize summary and publish output with comparison file stats (including parse error counts). 55. Stabilized comparison output ordering with deterministic sorting for comparison rows and coverage-delta sections. 56. Extended finalize and publish summaries with comparison count rollups (compared rows, regressions, missing/candidate-only/insufficient-sample counts). +57. Added optional zero-comparison gating (`--fail-on-zero-compared`) for compare/finalize flows and surfaced zero-compare policy in comparison/published summaries. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -568,6 +574,8 @@ Decoder Pipeline: - `scripts/compare-playback-benchmark-runs.js`: added regression-aware baseline/candidate comparison with configurable FPS/startup/scrub tolerances. - `scripts/compare-playback-benchmark-runs.js`: fixed options wiring inside comparison regression checks and now aggregates per-key metrics across multi-input runs with run-count reporting. - `scripts/compare-playback-benchmark-runs.js`: comparison row sets are now deterministically sorted for stable markdown/json artifact diffs. +- `scripts/compare-playback-benchmark-runs.js`: added optional `--fail-on-zero-compared` and zero-compare gate diagnostics in markdown/json outputs. +- `scripts/finalize-playback-matrix.js`: forwards `--fail-on-zero-compared` into compare stage and records policy in finalize summary settings. - `scripts/finalize-playback-matrix.js`: finalize summary now includes comparison count rollup fields for compared rows, regressions, and coverage deltas. - `scripts/publish-playback-matrix-summary.js`: publish summary now surfaces finalize comparison count rollups when finalize summary metadata is attached. - `scripts/publish-playback-matrix-summary.js`: added optional baseline-vs-candidate comparison artifact attachment in published summaries. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 303b010ad1..686ca5efe5 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -138,6 +138,9 @@ pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir # optional: fail comparison gate when any input JSON fails to parse pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-parse-errors + +# optional: fail comparison gate when no comparable rows remain +pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-zero-compared ``` Finalize and publish to benchmark history in one command: @@ -197,6 +200,9 @@ pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate # minimum sample gating uses metrics that are actually comparable for each row # optional: fail comparison gate when any input JSON fails to parse pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-parse-errors + +# optional: fail comparison gate when no comparable rows remain after filtering +pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-zero-compared ``` ## Evidence checklist diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index 39809c17b3..2aae10ca84 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -16,6 +16,7 @@ function parseArgs(argv) { failOnCandidateOnly: false, minSamplesPerRow: 1, failOnParseErrors: false, + failOnZeroCompared: false, }; for (let i = 2; i < argv.length; i++) { @@ -89,6 +90,10 @@ function parseArgs(argv) { options.failOnParseErrors = true; continue; } + if (arg === "--fail-on-zero-compared") { + options.failOnZeroCompared = true; + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -96,7 +101,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline [--baseline ...] --candidate [--candidate ...] [--output ] [--output-json ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--min-samples-per-row 1] [--fail-on-parse-errors] + console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline [--baseline ...] --candidate [--candidate ...] [--output ] [--output-json ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--min-samples-per-row 1] [--fail-on-parse-errors] [--fail-on-zero-compared] Compares baseline and candidate playback matrix JSON outputs and flags regressions. Multiple --baseline and --candidate inputs are supported.`); } @@ -419,6 +424,7 @@ function toMarkdown( md += `Coverage gate: missing_candidate=${options.allowMissingCandidate ? "allow" : "fail"}, candidate_only=${options.failOnCandidateOnly ? "fail" : "allow"}\n\n`; md += `Sample gate: min_samples_per_row>=${options.minSamplesPerRow}\n\n`; md += `Parse gate: parse_errors=${options.failOnParseErrors ? "fail" : "allow"}\n\n`; + md += `Zero-compare gate: compared_rows=${options.failOnZeroCompared ? "fail_if_zero" : "allow"}\n\n`; md += `Baseline files: total=${baselineStats.totalFiles}, parsed=${baselineStats.parsedFiles}, usable=${baselineStats.usableFiles}, skipped=${baselineStats.skippedFiles}, parse_errors=${baselineStats.parseErrors.length}\n`; md += `Candidate files: total=${candidateStats.totalFiles}, parsed=${candidateStats.parsedFiles}, usable=${candidateStats.usableFiles}, skipped=${candidateStats.skippedFiles}, parse_errors=${candidateStats.parseErrors.length}\n\n`; md += `Compared rows: ${comparisons.length}, regressions: ${regressions.length}, missing candidate rows: ${missingCandidateRows.length}, candidate-only rows: ${candidateOnlyRows.length}, insufficient sample rows: ${insufficientSampleRows.length}\n\n`; @@ -519,6 +525,9 @@ function buildJsonOutput( ) { failureReasons.push("parse_errors"); } + if (options.failOnZeroCompared && comparisons.length === 0) { + failureReasons.push("zero_compared_rows"); + } const passed = failureReasons.length === 0; return { generatedAt: new Date().toISOString(), @@ -530,6 +539,7 @@ function buildJsonOutput( failOnCandidateOnly: options.failOnCandidateOnly, minSamplesPerRow: options.minSamplesPerRow, failOnParseErrors: options.failOnParseErrors, + failOnZeroCompared: options.failOnZeroCompared, }, fileStats: { baseline: baselineStats, @@ -554,6 +564,7 @@ function buildJsonOutput( !options.failOnParseErrors || (baselineStats.parseErrors.length === 0 && candidateStats.parseErrors.length === 0), + zeroComparedRows: !options.failOnZeroCompared || comparisons.length > 0, }, }, regressions, diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index 6341c59523..74f50d1254 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -23,6 +23,7 @@ function parseArgs(argv) { failOnCandidateOnly: false, minSamplesPerRow: 1, failOnParseErrors: false, + failOnZeroCompared: false, }; for (let i = 2; i < argv.length; i++) { @@ -142,6 +143,10 @@ function parseArgs(argv) { options.failOnParseErrors = true; continue; } + if (arg === "--fail-on-zero-compared") { + options.failOnZeroCompared = true; + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -149,7 +154,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--output-json ] [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--compare-baseline ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--min-samples-per-row 1] [--fail-on-parse-errors] [--publish-target ] + console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--output-json ] [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--compare-baseline ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--min-samples-per-row 1] [--fail-on-parse-errors] [--fail-on-zero-compared] [--publish-target ] Generates aggregate markdown, status markdown, validation JSON, and bottleneck analysis for collected playback matrix outputs. Optionally compares candidate inputs against baseline inputs and fails on regressions.`); } @@ -282,6 +287,9 @@ function main() { if (options.failOnParseErrors) { compareArgs.push("--fail-on-parse-errors"); } + if (options.failOnZeroCompared) { + compareArgs.push("--fail-on-zero-compared"); + } compareArgs.push("--min-samples-per-row", String(options.minSamplesPerRow)); run("node", compareArgs); } @@ -330,6 +338,7 @@ function main() { failOnCandidateOnly: options.failOnCandidateOnly, minSamplesPerRow: options.minSamplesPerRow, failOnParseErrors: options.failOnParseErrors, + failOnZeroCompared: options.failOnZeroCompared, }, results: { validationPassed: validation.passed === true, diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js index f73c447c1e..94231e6070 100644 --- a/scripts/publish-playback-matrix-summary.js +++ b/scripts/publish-playback-matrix-summary.js @@ -104,6 +104,7 @@ function buildSummarySection( markdown += `- Missing candidate policy: ${comparison.tolerance?.allowMissingCandidate ? "allow" : "fail"}\n`; markdown += `- Candidate-only policy: ${comparison.tolerance?.failOnCandidateOnly ? "fail" : "allow"}\n\n`; markdown += `- Parse error policy: ${comparison.tolerance?.failOnParseErrors ? "fail" : "allow"}\n`; + markdown += `- Zero-compare policy: ${comparison.tolerance?.failOnZeroCompared ? "fail" : "allow"}\n`; markdown += `- Baseline parse errors: ${comparison.fileStats?.baseline?.parseErrors?.length ?? "n/a"}\n`; markdown += `- Candidate parse errors: ${comparison.fileStats?.candidate?.parseErrors?.length ?? "n/a"}\n\n`; const failureReasons = Array.isArray(comparison.summary?.failureReasons) From 3d202583f4d3e2a3353289b7fdb247c56ffd56eb Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:28:59 +0000 Subject: [PATCH 244/333] improve: handle seek updates during playback warmup --- crates/editor/PLAYBACK-FINDINGS.md | 7 +++++++ crates/editor/src/playback.rs | 23 ++++++++++++++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 5ff649e6e6..437ee37631 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -404,6 +404,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Enables strict failure when comparison processing yields zero comparable rows. - `scripts/finalize-playback-matrix.js` forwards zero-comparison gating option in integrated compare flows. +52. **Added warmup-stage seek handling before playback loop entry (2026-02-13)** + - Warmup loop now consumes seek updates immediately instead of waiting for playback loop start. + - Seek during warmup now resets warmup timers/buffer state and updates frame/audio playhead targets immediately. + - Improves responsiveness when users seek while playback is still warming up. + --- ## Root Cause Analysis Archive @@ -551,6 +556,7 @@ Decoder Pipeline: 55. Stabilized comparison output ordering with deterministic sorting for comparison rows and coverage-delta sections. 56. Extended finalize and publish summaries with comparison count rollups (compared rows, regressions, missing/candidate-only/insufficient-sample counts). 57. Added optional zero-comparison gating (`--fail-on-zero-compared`) for compare/finalize flows and surfaced zero-compare policy in comparison/published summaries. +58. Added warmup-stage seek handling to apply seeks immediately while playback warmup is in progress. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -579,6 +585,7 @@ Decoder Pipeline: - `scripts/finalize-playback-matrix.js`: finalize summary now includes comparison count rollup fields for compared rows, regressions, and coverage deltas. - `scripts/publish-playback-matrix-summary.js`: publish summary now surfaces finalize comparison count rollups when finalize summary metadata is attached. - `scripts/publish-playback-matrix-summary.js`: added optional baseline-vs-candidate comparison artifact attachment in published summaries. +- `crates/editor/src/playback.rs`: warmup loop now handles seek updates immediately, resetting warmup state and updating frame/audio targets before playback loop entry. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index d6bf970227..5735831682 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -526,7 +526,7 @@ impl Playback { .max(Duration::from_millis(200)) .min(Duration::from_millis(700)); let warmup_no_frames_timeout = Duration::from_secs(5); - let warmup_start = Instant::now(); + let mut warmup_start = Instant::now(); let mut first_frame_time: Option = None; let mut warmup_contiguous_prefetched = 0usize; let mut warmup_buffer_changed = false; @@ -585,6 +585,27 @@ impl Playback { } } } + _ = seek_rx.changed() => { + let seek_frame = *seek_rx.borrow_and_update(); + seek_generation = seek_generation.saturating_add(1); + frame_number = seek_frame; + prefetch_buffer.clear(); + frame_cache.cache.clear(); + warmup_contiguous_prefetched = 0; + warmup_buffer_changed = false; + first_frame_time = None; + warmup_start = Instant::now(); + let _ = seek_generation_tx.send(seek_generation); + let _ = frame_request_tx.send(frame_number); + let _ = playback_position_tx.send(frame_number); + if has_audio + && audio_playhead_tx + .send(frame_number as f64 / fps_f64) + .is_err() + { + break; + } + } _ = stop_rx.changed() => { if *stop_rx.borrow() { break; From cd2f465b34f3312a915b3e5b9f7acfa8b0c880d8 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:33:02 +0000 Subject: [PATCH 245/333] improve: add skipped-file gating for comparison workflows --- crates/editor/PLAYBACK-BENCHMARKS.md | 3 +++ crates/editor/PLAYBACK-FINDINGS.md | 9 +++++++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 6 ++++++ scripts/compare-playback-benchmark-runs.js | 19 ++++++++++++++++++- scripts/finalize-playback-matrix.js | 11 ++++++++++- scripts/publish-playback-matrix-summary.js | 1 + 6 files changed, 47 insertions(+), 2 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 43e7d2a355..6b3737b4e7 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -84,6 +84,7 @@ node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --ou node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --min-samples-per-row 3 node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-parse-errors node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-zero-compared +node scripts/finalize-playback-matrix.js --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-skipped-files node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --output-json /tmp/playback-matrix-final/playback-finalize-summary.json node scripts/finalize-playback-matrix.js --input /path/to/json-results --output-dir /tmp/playback-matrix-final --require-formats mp4,fragmented --target-fps 60 --max-scrub-p95-ms 40 --max-startup-ms 250 --publish-target /workspace/crates/editor/PLAYBACK-BENCHMARKS.md @@ -109,6 +110,7 @@ node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-res node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --min-samples-per-row 3 node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-parse-errors node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-zero-compared +node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-skipped-files node scripts/compare-playback-benchmark-runs.js --baseline /path/to/baseline-results --candidate /path/to/candidate-results --output /tmp/playback-comparison.md --output-json /tmp/playback-comparison.json Comparison output reports both baseline rows missing in candidate and candidate-only rows not present in baseline. @@ -116,6 +118,7 @@ Comparison table also reports baseline/candidate run counts per row when multipl Comparison JSON summary includes `failureReasons` and `gateOutcomes` fields for automation-friendly gate diagnostics. Minimum sample gating is applied against metrics that are actually comparable for the row (for example, scrub samples are not required for rows with no scrub metric comparison). Comparison JSON also includes baseline/candidate file parsing stats and optional parse-error gating support. +Comparison can optionally gate on skipped input files via `--fail-on-skipped-files`. ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 437ee37631..be3ceae240 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -409,6 +409,11 @@ cargo run -p cap-recording --example playback-test-runner -- full - Seek during warmup now resets warmup timers/buffer state and updates frame/audio playhead targets immediately. - Improves responsiveness when users seek while playback is still warming up. +53. **Added optional skipped-file gating for comparison workflows (2026-02-13)** + - `scripts/compare-playback-benchmark-runs.js` now supports `--fail-on-skipped-files`. + - Enables strict failure when baseline/candidate inputs include skipped JSON files without usable benchmark payloads. + - `scripts/finalize-playback-matrix.js` forwards skipped-file gating option in integrated compare flows. + --- ## Root Cause Analysis Archive @@ -557,6 +562,7 @@ Decoder Pipeline: 56. Extended finalize and publish summaries with comparison count rollups (compared rows, regressions, missing/candidate-only/insufficient-sample counts). 57. Added optional zero-comparison gating (`--fail-on-zero-compared`) for compare/finalize flows and surfaced zero-compare policy in comparison/published summaries. 58. Added warmup-stage seek handling to apply seeks immediately while playback warmup is in progress. +59. Added optional skipped-file gating (`--fail-on-skipped-files`) for compare/finalize flows and surfaced skipped-file policy in comparison/published summaries. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -593,7 +599,9 @@ Decoder Pipeline: - `scripts/compare-playback-benchmark-runs.js`: comparison outputs now include candidate-only rows in addition to missing-candidate coverage deltas. - `scripts/finalize-playback-matrix.js`: baseline comparison flow now writes both `playback-comparison.md` and `playback-comparison.json`. - `scripts/compare-playback-benchmark-runs.js`: added optional strict `--fail-on-candidate-only` coverage gate and surfaced coverage gate mode in comparison markdown output. +- `scripts/compare-playback-benchmark-runs.js`: added optional strict `--fail-on-skipped-files` gate and parse/skip policy reporting in comparison markdown/json outputs. - `scripts/finalize-playback-matrix.js`: added passthrough support for strict `--fail-on-candidate-only` compare mode in one-shot finalize workflows. +- `scripts/finalize-playback-matrix.js`: forwards `--fail-on-skipped-files` into compare stage and records skipped-file policy in finalize summary settings. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes candidate-only row count from comparison JSON summary. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes missing-candidate and candidate-only coverage policy modes from comparison JSON tolerance settings. - `crates/editor/src/playback.rs`: warmup readiness now requires contiguous prefetched frame coverage from current frame instead of raw buffer length threshold. @@ -613,6 +621,7 @@ Decoder Pipeline: - `scripts/compare-playback-benchmark-runs.js`: added parse-error gating and baseline/candidate file parse stats/parse error entries in JSON output. - `scripts/finalize-playback-matrix.js`: forwards parse-error gating and records parse-error policy in finalize summary settings. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes parse policy and baseline/candidate parse error counts. +- `scripts/publish-playback-matrix-summary.js`: published comparison status now includes skipped-file policy mode from comparison tolerance settings. - `crates/editor/src/playback.rs`: `insert_prefetched_frame` now returns structural-change signals and warmup cache invalidation uses this signal to avoid stale contiguous counts when insert+trim keeps buffer length unchanged. - `scripts/finalize-playback-matrix.js`: finalize summary now includes comparison file stats payload when comparison runs are enabled. - `scripts/publish-playback-matrix-summary.js`: publish summary now surfaces finalize baseline/candidate parse error counts from finalize summary metadata. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index 686ca5efe5..a2fd851e59 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -141,6 +141,9 @@ pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir # optional: fail comparison gate when no comparable rows remain pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-zero-compared + +# optional: fail comparison gate when any input JSON is skipped +pnpm bench:playback:finalize -- --input /path/to/candidate-results --output-dir /tmp/playback-matrix-final --compare-baseline /path/to/baseline-results --fail-on-skipped-files ``` Finalize and publish to benchmark history in one command: @@ -203,6 +206,9 @@ pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate # optional: fail comparison gate when no comparable rows remain after filtering pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-zero-compared + +# optional: fail comparison gate when any input JSON is skipped (no reports/usable metrics) +pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-skipped-files ``` ## Evidence checklist diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index 2aae10ca84..dcc263bbbb 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -17,6 +17,7 @@ function parseArgs(argv) { minSamplesPerRow: 1, failOnParseErrors: false, failOnZeroCompared: false, + failOnSkippedFiles: false, }; for (let i = 2; i < argv.length; i++) { @@ -94,6 +95,10 @@ function parseArgs(argv) { options.failOnZeroCompared = true; continue; } + if (arg === "--fail-on-skipped-files") { + options.failOnSkippedFiles = true; + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -101,7 +106,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline [--baseline ...] --candidate [--candidate ...] [--output ] [--output-json ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--min-samples-per-row 1] [--fail-on-parse-errors] [--fail-on-zero-compared] + console.log(`Usage: node scripts/compare-playback-benchmark-runs.js --baseline [--baseline ...] --candidate [--candidate ...] [--output ] [--output-json ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--min-samples-per-row 1] [--fail-on-parse-errors] [--fail-on-zero-compared] [--fail-on-skipped-files] Compares baseline and candidate playback matrix JSON outputs and flags regressions. Multiple --baseline and --candidate inputs are supported.`); } @@ -425,6 +430,7 @@ function toMarkdown( md += `Sample gate: min_samples_per_row>=${options.minSamplesPerRow}\n\n`; md += `Parse gate: parse_errors=${options.failOnParseErrors ? "fail" : "allow"}\n\n`; md += `Zero-compare gate: compared_rows=${options.failOnZeroCompared ? "fail_if_zero" : "allow"}\n\n`; + md += `Skipped-file gate: skipped_files=${options.failOnSkippedFiles ? "fail" : "allow"}\n\n`; md += `Baseline files: total=${baselineStats.totalFiles}, parsed=${baselineStats.parsedFiles}, usable=${baselineStats.usableFiles}, skipped=${baselineStats.skippedFiles}, parse_errors=${baselineStats.parseErrors.length}\n`; md += `Candidate files: total=${candidateStats.totalFiles}, parsed=${candidateStats.parsedFiles}, usable=${candidateStats.usableFiles}, skipped=${candidateStats.skippedFiles}, parse_errors=${candidateStats.parseErrors.length}\n\n`; md += `Compared rows: ${comparisons.length}, regressions: ${regressions.length}, missing candidate rows: ${missingCandidateRows.length}, candidate-only rows: ${candidateOnlyRows.length}, insufficient sample rows: ${insufficientSampleRows.length}\n\n`; @@ -528,6 +534,12 @@ function buildJsonOutput( if (options.failOnZeroCompared && comparisons.length === 0) { failureReasons.push("zero_compared_rows"); } + if ( + options.failOnSkippedFiles && + (baselineStats.skippedFiles > 0 || candidateStats.skippedFiles > 0) + ) { + failureReasons.push("skipped_files"); + } const passed = failureReasons.length === 0; return { generatedAt: new Date().toISOString(), @@ -540,6 +552,7 @@ function buildJsonOutput( minSamplesPerRow: options.minSamplesPerRow, failOnParseErrors: options.failOnParseErrors, failOnZeroCompared: options.failOnZeroCompared, + failOnSkippedFiles: options.failOnSkippedFiles, }, fileStats: { baseline: baselineStats, @@ -565,6 +578,10 @@ function buildJsonOutput( (baselineStats.parseErrors.length === 0 && candidateStats.parseErrors.length === 0), zeroComparedRows: !options.failOnZeroCompared || comparisons.length > 0, + skippedFiles: + !options.failOnSkippedFiles || + (baselineStats.skippedFiles === 0 && + candidateStats.skippedFiles === 0), }, }, regressions, diff --git a/scripts/finalize-playback-matrix.js b/scripts/finalize-playback-matrix.js index 74f50d1254..3c5dc0eb0b 100644 --- a/scripts/finalize-playback-matrix.js +++ b/scripts/finalize-playback-matrix.js @@ -24,6 +24,7 @@ function parseArgs(argv) { minSamplesPerRow: 1, failOnParseErrors: false, failOnZeroCompared: false, + failOnSkippedFiles: false, }; for (let i = 2; i < argv.length; i++) { @@ -147,6 +148,10 @@ function parseArgs(argv) { options.failOnZeroCompared = true; continue; } + if (arg === "--fail-on-skipped-files") { + options.failOnSkippedFiles = true; + continue; + } throw new Error(`Unknown argument: ${arg}`); } @@ -154,7 +159,7 @@ function parseArgs(argv) { } function usage() { - console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--output-json ] [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--compare-baseline ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--min-samples-per-row 1] [--fail-on-parse-errors] [--fail-on-zero-compared] [--publish-target ] + console.log(`Usage: node scripts/finalize-playback-matrix.js --input [--input ...] --output-dir [--output-json ] [--require-formats mp4,fragmented] [--target-fps 60] [--max-scrub-p95-ms 40] [--max-startup-ms 250] [--compare-baseline ] [--allow-fps-drop 2] [--allow-startup-increase-ms 25] [--allow-scrub-p95-increase-ms 5] [--allow-missing-candidate] [--fail-on-candidate-only] [--min-samples-per-row 1] [--fail-on-parse-errors] [--fail-on-zero-compared] [--fail-on-skipped-files] [--publish-target ] Generates aggregate markdown, status markdown, validation JSON, and bottleneck analysis for collected playback matrix outputs. Optionally compares candidate inputs against baseline inputs and fails on regressions.`); } @@ -290,6 +295,9 @@ function main() { if (options.failOnZeroCompared) { compareArgs.push("--fail-on-zero-compared"); } + if (options.failOnSkippedFiles) { + compareArgs.push("--fail-on-skipped-files"); + } compareArgs.push("--min-samples-per-row", String(options.minSamplesPerRow)); run("node", compareArgs); } @@ -339,6 +347,7 @@ function main() { minSamplesPerRow: options.minSamplesPerRow, failOnParseErrors: options.failOnParseErrors, failOnZeroCompared: options.failOnZeroCompared, + failOnSkippedFiles: options.failOnSkippedFiles, }, results: { validationPassed: validation.passed === true, diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js index 94231e6070..47b6d12148 100644 --- a/scripts/publish-playback-matrix-summary.js +++ b/scripts/publish-playback-matrix-summary.js @@ -105,6 +105,7 @@ function buildSummarySection( markdown += `- Candidate-only policy: ${comparison.tolerance?.failOnCandidateOnly ? "fail" : "allow"}\n\n`; markdown += `- Parse error policy: ${comparison.tolerance?.failOnParseErrors ? "fail" : "allow"}\n`; markdown += `- Zero-compare policy: ${comparison.tolerance?.failOnZeroCompared ? "fail" : "allow"}\n`; + markdown += `- Skipped-file policy: ${comparison.tolerance?.failOnSkippedFiles ? "fail" : "allow"}\n`; markdown += `- Baseline parse errors: ${comparison.fileStats?.baseline?.parseErrors?.length ?? "n/a"}\n`; markdown += `- Candidate parse errors: ${comparison.fileStats?.candidate?.parseErrors?.length ?? "n/a"}\n\n`; const failureReasons = Array.isArray(comparison.summary?.failureReasons) From fbc5fe4d176ef9c50572daeee1533b869627c47b Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:35:09 +0000 Subject: [PATCH 246/333] improve: add skipped-file reason breakdown to comparison diagnostics --- crates/editor/PLAYBACK-BENCHMARKS.md | 1 + crates/editor/PLAYBACK-FINDINGS.md | 7 +++++++ crates/editor/PLAYBACK-MATRIX-RUNBOOK.md | 2 ++ scripts/compare-playback-benchmark-runs.js | 8 ++++++-- scripts/publish-playback-matrix-summary.js | 6 ++++++ 5 files changed, 22 insertions(+), 2 deletions(-) diff --git a/crates/editor/PLAYBACK-BENCHMARKS.md b/crates/editor/PLAYBACK-BENCHMARKS.md index 6b3737b4e7..c06f822c5d 100644 --- a/crates/editor/PLAYBACK-BENCHMARKS.md +++ b/crates/editor/PLAYBACK-BENCHMARKS.md @@ -119,6 +119,7 @@ Comparison JSON summary includes `failureReasons` and `gateOutcomes` fields for Minimum sample gating is applied against metrics that are actually comparable for the row (for example, scrub samples are not required for rows with no scrub metric comparison). Comparison JSON also includes baseline/candidate file parsing stats and optional parse-error gating support. Comparison can optionally gate on skipped input files via `--fail-on-skipped-files`. +Comparison file stats now include skipped-file breakdown for `no_reports` and `no_usable_metrics`. ``` #### Decode Performance Benchmark diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index be3ceae240..674d267922 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -414,6 +414,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Enables strict failure when baseline/candidate inputs include skipped JSON files without usable benchmark payloads. - `scripts/finalize-playback-matrix.js` forwards skipped-file gating option in integrated compare flows. +54. **Added skipped-file reason breakdown in comparison file stats (2026-02-13)** + - Comparison file stats now report skipped-file reasons as `skippedNoReports` and `skippedNoUsableMetrics`. + - Published summary now surfaces skipped-file breakdown for baseline and candidate inputs. + --- ## Root Cause Analysis Archive @@ -563,6 +567,7 @@ Decoder Pipeline: 57. Added optional zero-comparison gating (`--fail-on-zero-compared`) for compare/finalize flows and surfaced zero-compare policy in comparison/published summaries. 58. Added warmup-stage seek handling to apply seeks immediately while playback warmup is in progress. 59. Added optional skipped-file gating (`--fail-on-skipped-files`) for compare/finalize flows and surfaced skipped-file policy in comparison/published summaries. +60. Added skipped-file reason breakdown (`skippedNoReports`, `skippedNoUsableMetrics`) into comparison file stats and published summaries. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -600,6 +605,7 @@ Decoder Pipeline: - `scripts/finalize-playback-matrix.js`: baseline comparison flow now writes both `playback-comparison.md` and `playback-comparison.json`. - `scripts/compare-playback-benchmark-runs.js`: added optional strict `--fail-on-candidate-only` coverage gate and surfaced coverage gate mode in comparison markdown output. - `scripts/compare-playback-benchmark-runs.js`: added optional strict `--fail-on-skipped-files` gate and parse/skip policy reporting in comparison markdown/json outputs. +- `scripts/compare-playback-benchmark-runs.js`: comparison file stats now include skipped-file reason breakdown (`skippedNoReports`, `skippedNoUsableMetrics`). - `scripts/finalize-playback-matrix.js`: added passthrough support for strict `--fail-on-candidate-only` compare mode in one-shot finalize workflows. - `scripts/finalize-playback-matrix.js`: forwards `--fail-on-skipped-files` into compare stage and records skipped-file policy in finalize summary settings. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes candidate-only row count from comparison JSON summary. @@ -622,6 +628,7 @@ Decoder Pipeline: - `scripts/finalize-playback-matrix.js`: forwards parse-error gating and records parse-error policy in finalize summary settings. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes parse policy and baseline/candidate parse error counts. - `scripts/publish-playback-matrix-summary.js`: published comparison status now includes skipped-file policy mode from comparison tolerance settings. +- `scripts/publish-playback-matrix-summary.js`: published comparison status now includes skipped-file breakdown counts for no-reports and no-usable-metrics cases. - `crates/editor/src/playback.rs`: `insert_prefetched_frame` now returns structural-change signals and warmup cache invalidation uses this signal to avoid stale contiguous counts when insert+trim keeps buffer length unchanged. - `scripts/finalize-playback-matrix.js`: finalize summary now includes comparison file stats payload when comparison runs are enabled. - `scripts/publish-playback-matrix-summary.js`: publish summary now surfaces finalize baseline/candidate parse error counts from finalize summary metadata. diff --git a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md index a2fd851e59..b4302f8808 100644 --- a/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md +++ b/crates/editor/PLAYBACK-MATRIX-RUNBOOK.md @@ -209,6 +209,8 @@ pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate # optional: fail comparison gate when any input JSON is skipped (no reports/usable metrics) pnpm bench:playback:compare -- --baseline /path/to/baseline-results --candidate /path/to/candidate-results --fail-on-skipped-files + +# comparison file stats include skipped-file breakdown (no_reports / no_usable_metrics) ``` ## Evidence checklist diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index dcc263bbbb..96b5e9432e 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -161,6 +161,8 @@ function collectMetrics(files) { parsedFiles: 0, usableFiles: 0, skippedFiles: 0, + skippedNoReports: 0, + skippedNoUsableMetrics: 0, parseErrors: [], }; @@ -179,6 +181,7 @@ function collectMetrics(files) { if (!Array.isArray(parsed.reports) || parsed.reports.length === 0) { stats.skippedFiles += 1; + stats.skippedNoReports += 1; continue; } @@ -240,6 +243,7 @@ function collectMetrics(files) { stats.usableFiles += 1; } else { stats.skippedFiles += 1; + stats.skippedNoUsableMetrics += 1; } } @@ -431,8 +435,8 @@ function toMarkdown( md += `Parse gate: parse_errors=${options.failOnParseErrors ? "fail" : "allow"}\n\n`; md += `Zero-compare gate: compared_rows=${options.failOnZeroCompared ? "fail_if_zero" : "allow"}\n\n`; md += `Skipped-file gate: skipped_files=${options.failOnSkippedFiles ? "fail" : "allow"}\n\n`; - md += `Baseline files: total=${baselineStats.totalFiles}, parsed=${baselineStats.parsedFiles}, usable=${baselineStats.usableFiles}, skipped=${baselineStats.skippedFiles}, parse_errors=${baselineStats.parseErrors.length}\n`; - md += `Candidate files: total=${candidateStats.totalFiles}, parsed=${candidateStats.parsedFiles}, usable=${candidateStats.usableFiles}, skipped=${candidateStats.skippedFiles}, parse_errors=${candidateStats.parseErrors.length}\n\n`; + md += `Baseline files: total=${baselineStats.totalFiles}, parsed=${baselineStats.parsedFiles}, usable=${baselineStats.usableFiles}, skipped=${baselineStats.skippedFiles}, skipped_no_reports=${baselineStats.skippedNoReports}, skipped_no_metrics=${baselineStats.skippedNoUsableMetrics}, parse_errors=${baselineStats.parseErrors.length}\n`; + md += `Candidate files: total=${candidateStats.totalFiles}, parsed=${candidateStats.parsedFiles}, usable=${candidateStats.usableFiles}, skipped=${candidateStats.skippedFiles}, skipped_no_reports=${candidateStats.skippedNoReports}, skipped_no_metrics=${candidateStats.skippedNoUsableMetrics}, parse_errors=${candidateStats.parseErrors.length}\n\n`; md += `Compared rows: ${comparisons.length}, regressions: ${regressions.length}, missing candidate rows: ${missingCandidateRows.length}, candidate-only rows: ${candidateOnlyRows.length}, insufficient sample rows: ${insufficientSampleRows.length}\n\n`; if ( baselineStats.parseErrors.length > 0 || diff --git a/scripts/publish-playback-matrix-summary.js b/scripts/publish-playback-matrix-summary.js index 47b6d12148..55f6b5b4cc 100644 --- a/scripts/publish-playback-matrix-summary.js +++ b/scripts/publish-playback-matrix-summary.js @@ -108,6 +108,12 @@ function buildSummarySection( markdown += `- Skipped-file policy: ${comparison.tolerance?.failOnSkippedFiles ? "fail" : "allow"}\n`; markdown += `- Baseline parse errors: ${comparison.fileStats?.baseline?.parseErrors?.length ?? "n/a"}\n`; markdown += `- Candidate parse errors: ${comparison.fileStats?.candidate?.parseErrors?.length ?? "n/a"}\n\n`; + markdown += `- Baseline skipped files: ${comparison.fileStats?.baseline?.skippedFiles ?? "n/a"}\n`; + markdown += `- Candidate skipped files: ${comparison.fileStats?.candidate?.skippedFiles ?? "n/a"}\n`; + markdown += `- Baseline skipped (no reports): ${comparison.fileStats?.baseline?.skippedNoReports ?? "n/a"}\n`; + markdown += `- Candidate skipped (no reports): ${comparison.fileStats?.candidate?.skippedNoReports ?? "n/a"}\n`; + markdown += `- Baseline skipped (no usable metrics): ${comparison.fileStats?.baseline?.skippedNoUsableMetrics ?? "n/a"}\n`; + markdown += `- Candidate skipped (no usable metrics): ${comparison.fileStats?.candidate?.skippedNoUsableMetrics ?? "n/a"}\n\n`; const failureReasons = Array.isArray(comparison.summary?.failureReasons) ? comparison.summary.failureReasons : []; From 16550d2354283bd9bd0d4413b2ed3aa3fae1909e Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:40:42 +0000 Subject: [PATCH 247/333] improve: scale warmup idle polling with frame budget --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 7 ++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 674d267922..f8f779f126 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -418,6 +418,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Comparison file stats now report skipped-file reasons as `skippedNoReports` and `skippedNoUsableMetrics`. - Published summary now surfaces skipped-file breakdown for baseline and candidate inputs. +55. **Scaled warmup idle poll interval by frame budget (2026-02-13)** + - Warmup loop fallback poll now scales with frame duration and stays in bounded low-latency range. + - Reduces fixed 100ms idle poll delay during warmup while avoiding high-frequency busy polling. + --- ## Root Cause Analysis Archive @@ -568,6 +572,7 @@ Decoder Pipeline: 58. Added warmup-stage seek handling to apply seeks immediately while playback warmup is in progress. 59. Added optional skipped-file gating (`--fail-on-skipped-files`) for compare/finalize flows and surfaced skipped-file policy in comparison/published summaries. 60. Added skipped-file reason breakdown (`skippedNoReports`, `skippedNoUsableMetrics`) into comparison file stats and published summaries. +61. Scaled warmup idle poll interval with frame budget to reduce warmup fallback latency under sparse frame arrival. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -597,6 +602,7 @@ Decoder Pipeline: - `scripts/publish-playback-matrix-summary.js`: publish summary now surfaces finalize comparison count rollups when finalize summary metadata is attached. - `scripts/publish-playback-matrix-summary.js`: added optional baseline-vs-candidate comparison artifact attachment in published summaries. - `crates/editor/src/playback.rs`: warmup loop now handles seek updates immediately, resetting warmup state and updating frame/audio targets before playback loop entry. +- `crates/editor/src/playback.rs`: warmup loop fallback polling now scales with frame budget instead of fixed 100ms sleep to improve responsiveness without busy waiting. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 5735831682..2aabb51453 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -526,6 +526,10 @@ impl Playback { .max(Duration::from_millis(200)) .min(Duration::from_millis(700)); let warmup_no_frames_timeout = Duration::from_secs(5); + let warmup_idle_poll_interval = frame_duration + .mul_f64(0.5) + .max(Duration::from_millis(8)) + .min(Duration::from_millis(25)); let mut warmup_start = Instant::now(); let mut first_frame_time: Option = None; let mut warmup_contiguous_prefetched = 0usize; @@ -533,6 +537,7 @@ impl Playback { info!( warmup_target_frames, warmup_after_first_timeout_ms = warmup_after_first_timeout.as_secs_f64() * 1000.0, + warmup_idle_poll_interval_ms = warmup_idle_poll_interval.as_secs_f64() * 1000.0, "Playback warmup configuration" ); @@ -611,7 +616,7 @@ impl Playback { break; } } - _ = tokio::time::sleep(Duration::from_millis(100)) => { + _ = tokio::time::sleep(warmup_idle_poll_interval) => { } } } From eae49303f27244edb24b277b82c90f1ad790793a Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:42:13 +0000 Subject: [PATCH 248/333] improve: avoid duplicate prefetch decodes on small rebases --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 7 +++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index f8f779f126..e3f47b1423 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -422,6 +422,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Warmup loop fallback poll now scales with frame duration and stays in bounded low-latency range. - Reduces fixed 100ms idle poll delay during warmup while avoiding high-frequency busy polling. +56. **Retained in-flight prefetch markers for small frame-request shifts (2026-02-13)** + - Frame-request rebases now clear in-flight marker sets only for backward or large-distance seeks that also reset in-flight futures. + - Prevents duplicate decode scheduling caused by clearing marker sets while earlier in-flight futures are still active. + --- ## Root Cause Analysis Archive @@ -573,6 +577,7 @@ Decoder Pipeline: 59. Added optional skipped-file gating (`--fail-on-skipped-files`) for compare/finalize flows and surfaced skipped-file policy in comparison/published summaries. 60. Added skipped-file reason breakdown (`skippedNoReports`, `skippedNoUsableMetrics`) into comparison file stats and published summaries. 61. Scaled warmup idle poll interval with frame budget to reduce warmup fallback latency under sparse frame arrival. +62. Retained in-flight prefetch markers for small frame-request shifts to avoid duplicate decode scheduling during active prefetch execution. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -603,6 +608,7 @@ Decoder Pipeline: - `scripts/publish-playback-matrix-summary.js`: added optional baseline-vs-candidate comparison artifact attachment in published summaries. - `crates/editor/src/playback.rs`: warmup loop now handles seek updates immediately, resetting warmup state and updating frame/audio targets before playback loop entry. - `crates/editor/src/playback.rs`: warmup loop fallback polling now scales with frame budget instead of fixed 100ms sleep to improve responsiveness without busy waiting. +- `crates/editor/src/playback.rs`: frame-request rebases now preserve in-flight marker sets unless in-flight futures are explicitly reset for backward/large seek changes. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 2aabb51453..c1a974d6e7 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -302,11 +302,10 @@ impl Playback { frames_decoded = 0; prefetched_behind.clear(); - if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { - in_flight_guard.clear(); - } - if is_backward_seek || seek_distance > dynamic_prefetch_ahead / 2 { + if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { + in_flight_guard.clear(); + } in_flight = FuturesUnordered::new(); } } From 22159386327c9e52c701bfb85760dff775449689 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 23:52:31 +0000 Subject: [PATCH 249/333] improve: cache clip-offset lookups in decode scheduling Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 +++++ crates/editor/src/playback.rs | 40 +++++++++++++++++------------- 2 files changed, 29 insertions(+), 17 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 5153575812..72759ab115 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -442,6 +442,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Behind-prefetch dedupe tracking now keeps a bounded eviction-ordered window instead of unbounded growth over long playback sessions. - Prevents long-session hash-set expansion from degrading behind-prefetch lookup efficiency. +61. **Cached clip-offset lookups for decode scheduling paths (2026-02-13)** + - Prefetch and direct-decode paths now use cached clip-index-to-offset maps instead of repeated linear clip scans. + - Reduces per-frame scheduling overhead in playback and prefetch loops when projects contain many clips. + --- ## Root Cause Analysis Archive @@ -598,6 +602,7 @@ Decoder Pipeline: 64. Batched warmup prefetch queue consumption to reduce warmup staging overhead and improve contiguous warmup-fill responsiveness. 65. Scaled prefetch idle polling with frame budget to reduce scheduler wakeup churn during empty in-flight periods. 66. Bounded behind-prefetch dedupe tracking window to avoid unbounded growth and preserve lookup efficiency over long sessions. +67. Cached clip-offset lookups for prefetch and direct-decode scheduling to replace repeated linear clip scans on decode hot paths. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -633,6 +638,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: warmup prefetch receive path now drains immediately queued prefetched frames in batches to accelerate warmup buffer population. - `crates/editor/src/playback.rs`: prefetch scheduler idle polling now scales with frame budget (bounded) instead of fixed 1ms delay, reducing idle wakeup overhead. - `crates/editor/src/playback.rs`: behind-prefetch dedupe tracking now uses a bounded eviction-ordered window to prevent unbounded set growth during long playback. +- `crates/editor/src/playback.rs`: prefetch and playback direct-decode paths now use cached clip-offset maps rebuilt on project updates, avoiding repeated clip list linear searches. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 1f02298528..d2682a7490 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -3,7 +3,7 @@ use cap_audio::{ }; use cap_media::MediaError; use cap_media_info::AudioInfo; -use cap_project::{ProjectConfiguration, XY}; +use cap_project::{ClipOffsets, ProjectConfiguration, XY}; use cap_rendering::{ DecodedSegmentFrames, ProjectUniforms, RenderVideoConstants, ZoomFocusInterpolator, spring_mass_damper::SpringMassDamperSimulationConfig, @@ -16,7 +16,7 @@ use cpal::{ use futures::stream::{FuturesUnordered, StreamExt}; use lru::LruCache; use std::{ - collections::{BTreeMap, HashSet, VecDeque}, + collections::{BTreeMap, HashMap, HashSet, VecDeque}, num::NonZeroUsize, sync::{ Arc, RwLock, @@ -173,6 +173,14 @@ fn count_contiguous_prefetched_frames( contiguous } +fn build_clip_offsets_lookup(project: &ProjectConfiguration) -> HashMap { + project + .clips + .iter() + .map(|clip| (clip.index, clip.offsets)) + .collect() +} + impl Playback { pub async fn start( mut self, @@ -270,6 +278,7 @@ impl Playback { let mut active_generation = *prefetch_seek_generation.borrow(); let mut cached_project = prefetch_project.borrow().clone(); + let mut prefetch_clip_offsets = build_clip_offsets_lookup(&cached_project); info!( dynamic_prefetch_ahead, <<<<<<< HEAD @@ -289,6 +298,7 @@ impl Playback { if prefetch_project.has_changed().unwrap_or(false) { cached_project = prefetch_project.borrow_and_update().clone(); + prefetch_clip_offsets = build_clip_offsets_lookup(&cached_project); } if prefetch_seek_generation.has_changed().unwrap_or(false) { @@ -384,11 +394,9 @@ impl Playback { && let Some(segment_media) = prefetch_segment_medias.get(segment.recording_clip as usize) { - let clip_offsets = cached_project - .clips - .iter() - .find(|v| v.index == segment.recording_clip) - .map(|v| v.offsets) + let clip_offsets = prefetch_clip_offsets + .get(&segment.recording_clip) + .copied() .unwrap_or_default(); let decoders = segment_media.decoders.clone(); @@ -455,11 +463,9 @@ impl Playback { && let Some(segment_media) = prefetch_segment_medias.get(segment.recording_clip as usize) { - let clip_offsets = cached_project - .clips - .iter() - .find(|v| v.index == segment.recording_clip) - .map(|v| v.offsets) + let clip_offsets = prefetch_clip_offsets + .get(&segment.recording_clip) + .copied() .unwrap_or_default(); let decoders = segment_media.decoders.clone(); @@ -722,6 +728,7 @@ impl Playback { let mut playback_anchor_start = Instant::now(); let mut playback_anchor_frame = frame_number; let mut cached_project = self.project.borrow().clone(); + let mut playback_clip_offsets = build_clip_offsets_lookup(&cached_project); 'playback: loop { if seek_rx.has_changed().unwrap_or(false) { @@ -747,6 +754,7 @@ impl Playback { if self.project.has_changed().unwrap_or(false) { cached_project = self.project.borrow_and_update().clone(); + playback_clip_offsets = build_clip_offsets_lookup(&cached_project); } while let Ok(prefetched) = prefetch_rx.try_recv() { if prefetched.generation == seek_generation { @@ -918,11 +926,9 @@ impl Playback { continue; }; - let clip_offsets = cached_project - .clips - .iter() - .find(|v| v.index == segment.recording_clip) - .map(|v| v.offsets) + let clip_offsets = playback_clip_offsets + .get(&segment.recording_clip) + .copied() .unwrap_or_default(); if let Ok(mut guard) = playback_decode_in_flight.write() { From 7ca0f42408f24c3c7efb04137c60e5ba3d6d21ed Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:52:31 +0000 Subject: [PATCH 250/333] improve: cache clip-offset lookups in decode scheduling Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 +++++ crates/editor/src/playback.rs | 40 +++++++++++++++++------------- 2 files changed, 29 insertions(+), 17 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 5153575812..72759ab115 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -442,6 +442,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Behind-prefetch dedupe tracking now keeps a bounded eviction-ordered window instead of unbounded growth over long playback sessions. - Prevents long-session hash-set expansion from degrading behind-prefetch lookup efficiency. +61. **Cached clip-offset lookups for decode scheduling paths (2026-02-13)** + - Prefetch and direct-decode paths now use cached clip-index-to-offset maps instead of repeated linear clip scans. + - Reduces per-frame scheduling overhead in playback and prefetch loops when projects contain many clips. + --- ## Root Cause Analysis Archive @@ -598,6 +602,7 @@ Decoder Pipeline: 64. Batched warmup prefetch queue consumption to reduce warmup staging overhead and improve contiguous warmup-fill responsiveness. 65. Scaled prefetch idle polling with frame budget to reduce scheduler wakeup churn during empty in-flight periods. 66. Bounded behind-prefetch dedupe tracking window to avoid unbounded growth and preserve lookup efficiency over long sessions. +67. Cached clip-offset lookups for prefetch and direct-decode scheduling to replace repeated linear clip scans on decode hot paths. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -633,6 +638,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: warmup prefetch receive path now drains immediately queued prefetched frames in batches to accelerate warmup buffer population. - `crates/editor/src/playback.rs`: prefetch scheduler idle polling now scales with frame budget (bounded) instead of fixed 1ms delay, reducing idle wakeup overhead. - `crates/editor/src/playback.rs`: behind-prefetch dedupe tracking now uses a bounded eviction-ordered window to prevent unbounded set growth during long playback. +- `crates/editor/src/playback.rs`: prefetch and playback direct-decode paths now use cached clip-offset maps rebuilt on project updates, avoiding repeated clip list linear searches. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 1f02298528..d2682a7490 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -3,7 +3,7 @@ use cap_audio::{ }; use cap_media::MediaError; use cap_media_info::AudioInfo; -use cap_project::{ProjectConfiguration, XY}; +use cap_project::{ClipOffsets, ProjectConfiguration, XY}; use cap_rendering::{ DecodedSegmentFrames, ProjectUniforms, RenderVideoConstants, ZoomFocusInterpolator, spring_mass_damper::SpringMassDamperSimulationConfig, @@ -16,7 +16,7 @@ use cpal::{ use futures::stream::{FuturesUnordered, StreamExt}; use lru::LruCache; use std::{ - collections::{BTreeMap, HashSet, VecDeque}, + collections::{BTreeMap, HashMap, HashSet, VecDeque}, num::NonZeroUsize, sync::{ Arc, RwLock, @@ -173,6 +173,14 @@ fn count_contiguous_prefetched_frames( contiguous } +fn build_clip_offsets_lookup(project: &ProjectConfiguration) -> HashMap { + project + .clips + .iter() + .map(|clip| (clip.index, clip.offsets)) + .collect() +} + impl Playback { pub async fn start( mut self, @@ -270,6 +278,7 @@ impl Playback { let mut active_generation = *prefetch_seek_generation.borrow(); let mut cached_project = prefetch_project.borrow().clone(); + let mut prefetch_clip_offsets = build_clip_offsets_lookup(&cached_project); info!( dynamic_prefetch_ahead, <<<<<<< HEAD @@ -289,6 +298,7 @@ impl Playback { if prefetch_project.has_changed().unwrap_or(false) { cached_project = prefetch_project.borrow_and_update().clone(); + prefetch_clip_offsets = build_clip_offsets_lookup(&cached_project); } if prefetch_seek_generation.has_changed().unwrap_or(false) { @@ -384,11 +394,9 @@ impl Playback { && let Some(segment_media) = prefetch_segment_medias.get(segment.recording_clip as usize) { - let clip_offsets = cached_project - .clips - .iter() - .find(|v| v.index == segment.recording_clip) - .map(|v| v.offsets) + let clip_offsets = prefetch_clip_offsets + .get(&segment.recording_clip) + .copied() .unwrap_or_default(); let decoders = segment_media.decoders.clone(); @@ -455,11 +463,9 @@ impl Playback { && let Some(segment_media) = prefetch_segment_medias.get(segment.recording_clip as usize) { - let clip_offsets = cached_project - .clips - .iter() - .find(|v| v.index == segment.recording_clip) - .map(|v| v.offsets) + let clip_offsets = prefetch_clip_offsets + .get(&segment.recording_clip) + .copied() .unwrap_or_default(); let decoders = segment_media.decoders.clone(); @@ -722,6 +728,7 @@ impl Playback { let mut playback_anchor_start = Instant::now(); let mut playback_anchor_frame = frame_number; let mut cached_project = self.project.borrow().clone(); + let mut playback_clip_offsets = build_clip_offsets_lookup(&cached_project); 'playback: loop { if seek_rx.has_changed().unwrap_or(false) { @@ -747,6 +754,7 @@ impl Playback { if self.project.has_changed().unwrap_or(false) { cached_project = self.project.borrow_and_update().clone(); + playback_clip_offsets = build_clip_offsets_lookup(&cached_project); } while let Ok(prefetched) = prefetch_rx.try_recv() { if prefetched.generation == seek_generation { @@ -918,11 +926,9 @@ impl Playback { continue; }; - let clip_offsets = cached_project - .clips - .iter() - .find(|v| v.index == segment.recording_clip) - .map(|v| v.offsets) + let clip_offsets = playback_clip_offsets + .get(&segment.recording_clip) + .copied() .unwrap_or_default(); if let Ok(mut guard) = playback_decode_in_flight.write() { From 8ebe1716c48aeab365b315aabe68b08b42beed05 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:52:31 +0000 Subject: [PATCH 251/333] improve: cache clip-offset lookups in decode scheduling Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 +++++ crates/editor/src/playback.rs | 40 +++++++++++++++++------------- 2 files changed, 29 insertions(+), 17 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 5153575812..72759ab115 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -442,6 +442,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Behind-prefetch dedupe tracking now keeps a bounded eviction-ordered window instead of unbounded growth over long playback sessions. - Prevents long-session hash-set expansion from degrading behind-prefetch lookup efficiency. +61. **Cached clip-offset lookups for decode scheduling paths (2026-02-13)** + - Prefetch and direct-decode paths now use cached clip-index-to-offset maps instead of repeated linear clip scans. + - Reduces per-frame scheduling overhead in playback and prefetch loops when projects contain many clips. + --- ## Root Cause Analysis Archive @@ -598,6 +602,7 @@ Decoder Pipeline: 64. Batched warmup prefetch queue consumption to reduce warmup staging overhead and improve contiguous warmup-fill responsiveness. 65. Scaled prefetch idle polling with frame budget to reduce scheduler wakeup churn during empty in-flight periods. 66. Bounded behind-prefetch dedupe tracking window to avoid unbounded growth and preserve lookup efficiency over long sessions. +67. Cached clip-offset lookups for prefetch and direct-decode scheduling to replace repeated linear clip scans on decode hot paths. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -633,6 +638,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: warmup prefetch receive path now drains immediately queued prefetched frames in batches to accelerate warmup buffer population. - `crates/editor/src/playback.rs`: prefetch scheduler idle polling now scales with frame budget (bounded) instead of fixed 1ms delay, reducing idle wakeup overhead. - `crates/editor/src/playback.rs`: behind-prefetch dedupe tracking now uses a bounded eviction-ordered window to prevent unbounded set growth during long playback. +- `crates/editor/src/playback.rs`: prefetch and playback direct-decode paths now use cached clip-offset maps rebuilt on project updates, avoiding repeated clip list linear searches. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 1f02298528..d2682a7490 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -3,7 +3,7 @@ use cap_audio::{ }; use cap_media::MediaError; use cap_media_info::AudioInfo; -use cap_project::{ProjectConfiguration, XY}; +use cap_project::{ClipOffsets, ProjectConfiguration, XY}; use cap_rendering::{ DecodedSegmentFrames, ProjectUniforms, RenderVideoConstants, ZoomFocusInterpolator, spring_mass_damper::SpringMassDamperSimulationConfig, @@ -16,7 +16,7 @@ use cpal::{ use futures::stream::{FuturesUnordered, StreamExt}; use lru::LruCache; use std::{ - collections::{BTreeMap, HashSet, VecDeque}, + collections::{BTreeMap, HashMap, HashSet, VecDeque}, num::NonZeroUsize, sync::{ Arc, RwLock, @@ -173,6 +173,14 @@ fn count_contiguous_prefetched_frames( contiguous } +fn build_clip_offsets_lookup(project: &ProjectConfiguration) -> HashMap { + project + .clips + .iter() + .map(|clip| (clip.index, clip.offsets)) + .collect() +} + impl Playback { pub async fn start( mut self, @@ -270,6 +278,7 @@ impl Playback { let mut active_generation = *prefetch_seek_generation.borrow(); let mut cached_project = prefetch_project.borrow().clone(); + let mut prefetch_clip_offsets = build_clip_offsets_lookup(&cached_project); info!( dynamic_prefetch_ahead, <<<<<<< HEAD @@ -289,6 +298,7 @@ impl Playback { if prefetch_project.has_changed().unwrap_or(false) { cached_project = prefetch_project.borrow_and_update().clone(); + prefetch_clip_offsets = build_clip_offsets_lookup(&cached_project); } if prefetch_seek_generation.has_changed().unwrap_or(false) { @@ -384,11 +394,9 @@ impl Playback { && let Some(segment_media) = prefetch_segment_medias.get(segment.recording_clip as usize) { - let clip_offsets = cached_project - .clips - .iter() - .find(|v| v.index == segment.recording_clip) - .map(|v| v.offsets) + let clip_offsets = prefetch_clip_offsets + .get(&segment.recording_clip) + .copied() .unwrap_or_default(); let decoders = segment_media.decoders.clone(); @@ -455,11 +463,9 @@ impl Playback { && let Some(segment_media) = prefetch_segment_medias.get(segment.recording_clip as usize) { - let clip_offsets = cached_project - .clips - .iter() - .find(|v| v.index == segment.recording_clip) - .map(|v| v.offsets) + let clip_offsets = prefetch_clip_offsets + .get(&segment.recording_clip) + .copied() .unwrap_or_default(); let decoders = segment_media.decoders.clone(); @@ -722,6 +728,7 @@ impl Playback { let mut playback_anchor_start = Instant::now(); let mut playback_anchor_frame = frame_number; let mut cached_project = self.project.borrow().clone(); + let mut playback_clip_offsets = build_clip_offsets_lookup(&cached_project); 'playback: loop { if seek_rx.has_changed().unwrap_or(false) { @@ -747,6 +754,7 @@ impl Playback { if self.project.has_changed().unwrap_or(false) { cached_project = self.project.borrow_and_update().clone(); + playback_clip_offsets = build_clip_offsets_lookup(&cached_project); } while let Ok(prefetched) = prefetch_rx.try_recv() { if prefetched.generation == seek_generation { @@ -918,11 +926,9 @@ impl Playback { continue; }; - let clip_offsets = cached_project - .clips - .iter() - .find(|v| v.index == segment.recording_clip) - .map(|v| v.offsets) + let clip_offsets = playback_clip_offsets + .get(&segment.recording_clip) + .copied() .unwrap_or_default(); if let Ok(mut guard) = playback_decode_in_flight.write() { From e9f609f0ec659b4a0c3ac4bed218122f56af99fe Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Fri, 13 Feb 2026 23:53:14 +0000 Subject: [PATCH 252/333] improve: add clear() method to FrameCache for better encapsulation --- crates/editor/src/playback.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 1f02298528..bb41ec1885 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -99,6 +99,10 @@ impl FrameCache { self.cache .put(frame_number, (segment_frames, segment_index)); } + + fn clear(&mut self) { + self.cache.clear(); + } } fn trim_prefetch_buffer(buffer: &mut BTreeMap, current_frame: u32) -> bool { @@ -672,7 +676,7 @@ impl Playback { seek_generation = seek_generation.saturating_add(1); frame_number = seek_frame; prefetch_buffer.clear(); - frame_cache.cache.clear(); + frame_cache.clear(); warmup_contiguous_prefetched = 0; warmup_buffer_changed = false; first_frame_time = None; @@ -693,7 +697,7 @@ impl Playback { seek_generation = seek_generation.saturating_add(1); frame_number = seek_frame; prefetch_buffer.clear(); - frame_cache.cache.clear(); + frame_cache.clear(); warmup_contiguous_prefetched = 0; warmup_buffer_changed = false; first_frame_time = None; @@ -732,7 +736,7 @@ impl Playback { playback_anchor_frame = seek_frame; pending_seek_observation = Some((seek_frame, Instant::now())); prefetch_buffer.clear(); - frame_cache.cache.clear(); + frame_cache.clear(); let _ = seek_generation_tx.send(seek_generation); let _ = frame_request_tx.send(frame_number); let _ = playback_position_tx.send(frame_number); @@ -769,7 +773,7 @@ impl Playback { playback_anchor_frame = seek_frame; pending_seek_observation = Some((seek_frame, Instant::now())); prefetch_buffer.clear(); - frame_cache.cache.clear(); + frame_cache.clear(); let _ = seek_generation_tx.send(seek_generation); let _ = frame_request_tx.send(frame_number); let _ = playback_position_tx.send(frame_number); From 39f9465d277964691e60b78b1d3c608256bcd269 Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Fri, 13 Feb 2026 23:53:14 +0000 Subject: [PATCH 253/333] improve: add clear() method to FrameCache for better encapsulation --- crates/editor/src/playback.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 1f02298528..bb41ec1885 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -99,6 +99,10 @@ impl FrameCache { self.cache .put(frame_number, (segment_frames, segment_index)); } + + fn clear(&mut self) { + self.cache.clear(); + } } fn trim_prefetch_buffer(buffer: &mut BTreeMap, current_frame: u32) -> bool { @@ -672,7 +676,7 @@ impl Playback { seek_generation = seek_generation.saturating_add(1); frame_number = seek_frame; prefetch_buffer.clear(); - frame_cache.cache.clear(); + frame_cache.clear(); warmup_contiguous_prefetched = 0; warmup_buffer_changed = false; first_frame_time = None; @@ -693,7 +697,7 @@ impl Playback { seek_generation = seek_generation.saturating_add(1); frame_number = seek_frame; prefetch_buffer.clear(); - frame_cache.cache.clear(); + frame_cache.clear(); warmup_contiguous_prefetched = 0; warmup_buffer_changed = false; first_frame_time = None; @@ -732,7 +736,7 @@ impl Playback { playback_anchor_frame = seek_frame; pending_seek_observation = Some((seek_frame, Instant::now())); prefetch_buffer.clear(); - frame_cache.cache.clear(); + frame_cache.clear(); let _ = seek_generation_tx.send(seek_generation); let _ = frame_request_tx.send(frame_number); let _ = playback_position_tx.send(frame_number); @@ -769,7 +773,7 @@ impl Playback { playback_anchor_frame = seek_frame; pending_seek_observation = Some((seek_frame, Instant::now())); prefetch_buffer.clear(); - frame_cache.cache.clear(); + frame_cache.clear(); let _ = seek_generation_tx.send(seek_generation); let _ = frame_request_tx.send(frame_number); let _ = playback_position_tx.send(frame_number); From 51df1caeeb679525b8c8e743844a7faf4c8a2cc5 Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Fri, 13 Feb 2026 23:53:32 +0000 Subject: [PATCH 254/333] fix: use saturating_add in trim_prefetch_buffer to prevent overflow --- crates/editor/src/playback.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index bb41ec1885..35def97a30 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -111,7 +111,7 @@ fn trim_prefetch_buffer(buffer: &mut BTreeMap, current_fra let far_ahead_frame = buffer .iter() .rev() - .find(|(frame, _)| **frame > current_frame + PREFETCH_BUFFER_SIZE as u32) + .find(|(frame, _)| **frame > current_frame.saturating_add(PREFETCH_BUFFER_SIZE as u32)) .map(|(frame, _)| *frame); if let Some(frame) = far_ahead_frame { From 2b600a3edf4b5f277e46023d90df34951327b152 Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Fri, 13 Feb 2026 23:53:32 +0000 Subject: [PATCH 255/333] fix: use saturating_add in trim_prefetch_buffer to prevent overflow --- crates/editor/src/playback.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index bb41ec1885..35def97a30 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -111,7 +111,7 @@ fn trim_prefetch_buffer(buffer: &mut BTreeMap, current_fra let far_ahead_frame = buffer .iter() .rev() - .find(|(frame, _)| **frame > current_frame + PREFETCH_BUFFER_SIZE as u32) + .find(|(frame, _)| **frame > current_frame.saturating_add(PREFETCH_BUFFER_SIZE as u32)) .map(|(frame, _)| *frame); if let Some(frame) = far_ahead_frame { From caa87717432088061c20b05a1c84ba9479190308 Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Fri, 13 Feb 2026 23:53:47 +0000 Subject: [PATCH 256/333] fix: make AudioResampler::reset() non-panicking --- crates/editor/src/audio.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crates/editor/src/audio.rs b/crates/editor/src/audio.rs index 7051ce5549..d631b0f9a1 100644 --- a/crates/editor/src/audio.rs +++ b/crates/editor/src/audio.rs @@ -437,7 +437,9 @@ impl AudioResampler { } pub fn reset(&mut self) { - *self = Self::new(self.output).unwrap(); + if let Ok(resampler) = Self::new(self.output) { + *self = resampler; + } } fn current_frame_data(&self) -> &[u8] { From 9f110e4c66bf0a242f0a920b74f2149327669625 Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Fri, 13 Feb 2026 23:53:47 +0000 Subject: [PATCH 257/333] fix: make AudioResampler::reset() non-panicking --- crates/editor/src/audio.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crates/editor/src/audio.rs b/crates/editor/src/audio.rs index 7051ce5549..d631b0f9a1 100644 --- a/crates/editor/src/audio.rs +++ b/crates/editor/src/audio.rs @@ -437,7 +437,9 @@ impl AudioResampler { } pub fn reset(&mut self) { - *self = Self::new(self.output).unwrap(); + if let Ok(resampler) = Self::new(self.output) { + *self = resampler; + } } fn current_frame_data(&self) -> &[u8] { From d03cf490e48df277882cbbb7fb6047e1e6322ddb Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Fri, 13 Feb 2026 23:53:47 +0000 Subject: [PATCH 258/333] fix: make AudioResampler::reset() non-panicking --- crates/editor/src/audio.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crates/editor/src/audio.rs b/crates/editor/src/audio.rs index 7051ce5549..d631b0f9a1 100644 --- a/crates/editor/src/audio.rs +++ b/crates/editor/src/audio.rs @@ -437,7 +437,9 @@ impl AudioResampler { } pub fn reset(&mut self) { - *self = Self::new(self.output).unwrap(); + if let Ok(resampler) = Self::new(self.output) { + *self = resampler; + } } fn current_frame_data(&self) -> &[u8] { From af1de593d45d23a78bd69ae8e8e25bab6a42ac23 Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Fri, 13 Feb 2026 23:54:00 +0000 Subject: [PATCH 259/333] fix: clamp targetFrame to prevent out-of-range seekTo during timeline drags --- apps/desktop/src/routes/editor/Timeline/index.tsx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/apps/desktop/src/routes/editor/Timeline/index.tsx b/apps/desktop/src/routes/editor/Timeline/index.tsx index c475fa1d27..a1e32c6c04 100644 --- a/apps/desktop/src/routes/editor/Timeline/index.tsx +++ b/apps/desktop/src/routes/editor/Timeline/index.tsx @@ -332,7 +332,9 @@ export function Timeline() { const rawTime = secsPerPixel() * (e.clientX - left) + transform().position; const newTime = Math.min(Math.max(0, rawTime), totalDuration()); - const targetFrame = Math.round(newTime * FPS); + const total = totalDuration(); + const maxFrame = Math.max(0, Math.ceil(total * FPS) - 1); + const targetFrame = Math.min(Math.round(newTime * FPS), maxFrame); scheduleSeek(targetFrame); setEditorState("playbackTime", newTime); From 684b0ede91a2f7ee3cdf6e492c4c90ce53f3a876 Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Fri, 13 Feb 2026 23:54:00 +0000 Subject: [PATCH 260/333] fix: clamp targetFrame to prevent out-of-range seekTo during timeline drags --- apps/desktop/src/routes/editor/Timeline/index.tsx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/apps/desktop/src/routes/editor/Timeline/index.tsx b/apps/desktop/src/routes/editor/Timeline/index.tsx index c475fa1d27..a1e32c6c04 100644 --- a/apps/desktop/src/routes/editor/Timeline/index.tsx +++ b/apps/desktop/src/routes/editor/Timeline/index.tsx @@ -332,7 +332,9 @@ export function Timeline() { const rawTime = secsPerPixel() * (e.clientX - left) + transform().position; const newTime = Math.min(Math.max(0, rawTime), totalDuration()); - const targetFrame = Math.round(newTime * FPS); + const total = totalDuration(); + const maxFrame = Math.max(0, Math.ceil(total * FPS) - 1); + const targetFrame = Math.min(Math.round(newTime * FPS), maxFrame); scheduleSeek(targetFrame); setEditorState("playbackTime", newTime); From 78eb47db81f303985d3d29bbda60a2d60a36f8b9 Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Fri, 13 Feb 2026 23:54:00 +0000 Subject: [PATCH 261/333] fix: clamp targetFrame to prevent out-of-range seekTo during timeline drags --- apps/desktop/src/routes/editor/Timeline/index.tsx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/apps/desktop/src/routes/editor/Timeline/index.tsx b/apps/desktop/src/routes/editor/Timeline/index.tsx index c475fa1d27..a1e32c6c04 100644 --- a/apps/desktop/src/routes/editor/Timeline/index.tsx +++ b/apps/desktop/src/routes/editor/Timeline/index.tsx @@ -332,7 +332,9 @@ export function Timeline() { const rawTime = secsPerPixel() * (e.clientX - left) + transform().position; const newTime = Math.min(Math.max(0, rawTime), totalDuration()); - const targetFrame = Math.round(newTime * FPS); + const total = totalDuration(); + const maxFrame = Math.max(0, Math.ceil(total * FPS) - 1); + const targetFrame = Math.min(Math.round(newTime * FPS), maxFrame); scheduleSeek(targetFrame); setEditorState("playbackTime", newTime); From 0ab9cdaefb64d79113df9f3cb7114b7a858f13a9 Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Fri, 13 Feb 2026 23:54:28 +0000 Subject: [PATCH 262/333] fix: cap dynamic_prefetch_ahead to PREFETCH_BUFFER_SIZE to avoid wasted decode work --- crates/editor/src/playback.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 580f3a3151..ffb539e882 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -261,7 +261,7 @@ impl Playback { let mut scheduled_in_flight_frames: HashSet = HashSet::new(); >>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 const RAMP_UP_AFTER_FRAMES: u32 = 5; - let dynamic_prefetch_ahead = fps.clamp(30, 90); + let dynamic_prefetch_ahead = fps.clamp(30, 90).min(PREFETCH_BUFFER_SIZE as u32); let dynamic_prefetch_behind = (fps / 4).clamp(8, 24); let dynamic_parallel_tasks = if fps >= 60 { 6 From 3149a667ceb5f1c0f14604f5473bb306632a0166 Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Fri, 13 Feb 2026 23:54:28 +0000 Subject: [PATCH 263/333] fix: cap dynamic_prefetch_ahead to PREFETCH_BUFFER_SIZE to avoid wasted decode work --- crates/editor/src/playback.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 580f3a3151..ffb539e882 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -261,7 +261,7 @@ impl Playback { let mut scheduled_in_flight_frames: HashSet = HashSet::new(); >>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 const RAMP_UP_AFTER_FRAMES: u32 = 5; - let dynamic_prefetch_ahead = fps.clamp(30, 90); + let dynamic_prefetch_ahead = fps.clamp(30, 90).min(PREFETCH_BUFFER_SIZE as u32); let dynamic_prefetch_behind = (fps / 4).clamp(8, 24); let dynamic_parallel_tasks = if fps >= 60 { 6 From 9f4d60707d5535505e297d4cca6fa4e8c69d6ac0 Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Fri, 13 Feb 2026 23:54:28 +0000 Subject: [PATCH 264/333] fix: cap dynamic_prefetch_ahead to PREFETCH_BUFFER_SIZE to avoid wasted decode work --- crates/editor/src/playback.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 580f3a3151..ffb539e882 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -261,7 +261,7 @@ impl Playback { let mut scheduled_in_flight_frames: HashSet = HashSet::new(); >>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 const RAMP_UP_AFTER_FRAMES: u32 = 5; - let dynamic_prefetch_ahead = fps.clamp(30, 90); + let dynamic_prefetch_ahead = fps.clamp(30, 90).min(PREFETCH_BUFFER_SIZE as u32); let dynamic_prefetch_behind = (fps / 4).clamp(8, 24); let dynamic_parallel_tasks = if fps >= 60 { 6 From 18a0d7d91d1b00039127f1b69c7244fde7dd7701 Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Fri, 13 Feb 2026 23:54:53 +0000 Subject: [PATCH 265/333] fix: escape backslashes and pipes in markdown table cells for CI reporting --- scripts/compare-playback-benchmark-runs.js | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index 96b5e9432e..464d6a5159 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -414,6 +414,10 @@ function compareMetrics(baselineRows, candidateRows, options) { }; } +function escapeTableCell(value) { + return String(value).replace(/\\/g, "\\\\").replace(/\|/g, "\\|"); +} + function toMarkdown( comparisons, missingCandidateRows, @@ -446,10 +450,10 @@ function toMarkdown( md += "| Side | File | Error |\n"; md += "|---|---|---|\n"; for (const entry of baselineStats.parseErrors.slice(0, 20)) { - md += `| baseline | ${entry.file} | ${entry.error.replace(/\|/g, "\\|")} |\n`; + md += `| baseline | ${escapeTableCell(entry.file)} | ${escapeTableCell(entry.error)} |\n`; } for (const entry of candidateStats.parseErrors.slice(0, 20)) { - md += `| candidate | ${entry.file} | ${entry.error.replace(/\|/g, "\\|")} |\n`; + md += `| candidate | ${escapeTableCell(entry.file)} | ${escapeTableCell(entry.error)} |\n`; } md += "\n"; } From cc9c79ff482d12e26eeabab6695ae67db811a964 Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Fri, 13 Feb 2026 23:54:53 +0000 Subject: [PATCH 266/333] fix: escape backslashes and pipes in markdown table cells for CI reporting --- scripts/compare-playback-benchmark-runs.js | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index 96b5e9432e..464d6a5159 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -414,6 +414,10 @@ function compareMetrics(baselineRows, candidateRows, options) { }; } +function escapeTableCell(value) { + return String(value).replace(/\\/g, "\\\\").replace(/\|/g, "\\|"); +} + function toMarkdown( comparisons, missingCandidateRows, @@ -446,10 +450,10 @@ function toMarkdown( md += "| Side | File | Error |\n"; md += "|---|---|---|\n"; for (const entry of baselineStats.parseErrors.slice(0, 20)) { - md += `| baseline | ${entry.file} | ${entry.error.replace(/\|/g, "\\|")} |\n`; + md += `| baseline | ${escapeTableCell(entry.file)} | ${escapeTableCell(entry.error)} |\n`; } for (const entry of candidateStats.parseErrors.slice(0, 20)) { - md += `| candidate | ${entry.file} | ${entry.error.replace(/\|/g, "\\|")} |\n`; + md += `| candidate | ${escapeTableCell(entry.file)} | ${escapeTableCell(entry.error)} |\n`; } md += "\n"; } From 9b9335cd4e193d6f794ddcbc91528fe3a6faeab6 Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Fri, 13 Feb 2026 23:54:53 +0000 Subject: [PATCH 267/333] fix: escape backslashes and pipes in markdown table cells for CI reporting --- scripts/compare-playback-benchmark-runs.js | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index 96b5e9432e..464d6a5159 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -414,6 +414,10 @@ function compareMetrics(baselineRows, candidateRows, options) { }; } +function escapeTableCell(value) { + return String(value).replace(/\\/g, "\\\\").replace(/\|/g, "\\|"); +} + function toMarkdown( comparisons, missingCandidateRows, @@ -446,10 +450,10 @@ function toMarkdown( md += "| Side | File | Error |\n"; md += "|---|---|---|\n"; for (const entry of baselineStats.parseErrors.slice(0, 20)) { - md += `| baseline | ${entry.file} | ${entry.error.replace(/\|/g, "\\|")} |\n`; + md += `| baseline | ${escapeTableCell(entry.file)} | ${escapeTableCell(entry.error)} |\n`; } for (const entry of candidateStats.parseErrors.slice(0, 20)) { - md += `| candidate | ${entry.file} | ${entry.error.replace(/\|/g, "\\|")} |\n`; + md += `| candidate | ${escapeTableCell(entry.file)} | ${escapeTableCell(entry.error)} |\n`; } md += "\n"; } From c62eec668a6a324f4a1aec617367c616d318613d Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Fri, 13 Feb 2026 23:55:09 +0000 Subject: [PATCH 268/333] fix: rename skipped_no_metrics to skipped_no_usable_metrics for consistency --- scripts/compare-playback-benchmark-runs.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index 464d6a5159..e34c386eb6 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -439,8 +439,8 @@ function toMarkdown( md += `Parse gate: parse_errors=${options.failOnParseErrors ? "fail" : "allow"}\n\n`; md += `Zero-compare gate: compared_rows=${options.failOnZeroCompared ? "fail_if_zero" : "allow"}\n\n`; md += `Skipped-file gate: skipped_files=${options.failOnSkippedFiles ? "fail" : "allow"}\n\n`; - md += `Baseline files: total=${baselineStats.totalFiles}, parsed=${baselineStats.parsedFiles}, usable=${baselineStats.usableFiles}, skipped=${baselineStats.skippedFiles}, skipped_no_reports=${baselineStats.skippedNoReports}, skipped_no_metrics=${baselineStats.skippedNoUsableMetrics}, parse_errors=${baselineStats.parseErrors.length}\n`; - md += `Candidate files: total=${candidateStats.totalFiles}, parsed=${candidateStats.parsedFiles}, usable=${candidateStats.usableFiles}, skipped=${candidateStats.skippedFiles}, skipped_no_reports=${candidateStats.skippedNoReports}, skipped_no_metrics=${candidateStats.skippedNoUsableMetrics}, parse_errors=${candidateStats.parseErrors.length}\n\n`; + md += `Baseline files: total=${baselineStats.totalFiles}, parsed=${baselineStats.parsedFiles}, usable=${baselineStats.usableFiles}, skipped=${baselineStats.skippedFiles}, skipped_no_reports=${baselineStats.skippedNoReports}, skipped_no_usable_metrics=${baselineStats.skippedNoUsableMetrics}, parse_errors=${baselineStats.parseErrors.length}\n`; + md += `Candidate files: total=${candidateStats.totalFiles}, parsed=${candidateStats.parsedFiles}, usable=${candidateStats.usableFiles}, skipped=${candidateStats.skippedFiles}, skipped_no_reports=${candidateStats.skippedNoReports}, skipped_no_usable_metrics=${candidateStats.skippedNoUsableMetrics}, parse_errors=${candidateStats.parseErrors.length}\n\n`; md += `Compared rows: ${comparisons.length}, regressions: ${regressions.length}, missing candidate rows: ${missingCandidateRows.length}, candidate-only rows: ${candidateOnlyRows.length}, insufficient sample rows: ${insufficientSampleRows.length}\n\n`; if ( baselineStats.parseErrors.length > 0 || From fea600fd1d8bf021ba14d31d21b7acd080c99bd1 Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Fri, 13 Feb 2026 23:55:09 +0000 Subject: [PATCH 269/333] fix: rename skipped_no_metrics to skipped_no_usable_metrics for consistency --- scripts/compare-playback-benchmark-runs.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index 464d6a5159..e34c386eb6 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -439,8 +439,8 @@ function toMarkdown( md += `Parse gate: parse_errors=${options.failOnParseErrors ? "fail" : "allow"}\n\n`; md += `Zero-compare gate: compared_rows=${options.failOnZeroCompared ? "fail_if_zero" : "allow"}\n\n`; md += `Skipped-file gate: skipped_files=${options.failOnSkippedFiles ? "fail" : "allow"}\n\n`; - md += `Baseline files: total=${baselineStats.totalFiles}, parsed=${baselineStats.parsedFiles}, usable=${baselineStats.usableFiles}, skipped=${baselineStats.skippedFiles}, skipped_no_reports=${baselineStats.skippedNoReports}, skipped_no_metrics=${baselineStats.skippedNoUsableMetrics}, parse_errors=${baselineStats.parseErrors.length}\n`; - md += `Candidate files: total=${candidateStats.totalFiles}, parsed=${candidateStats.parsedFiles}, usable=${candidateStats.usableFiles}, skipped=${candidateStats.skippedFiles}, skipped_no_reports=${candidateStats.skippedNoReports}, skipped_no_metrics=${candidateStats.skippedNoUsableMetrics}, parse_errors=${candidateStats.parseErrors.length}\n\n`; + md += `Baseline files: total=${baselineStats.totalFiles}, parsed=${baselineStats.parsedFiles}, usable=${baselineStats.usableFiles}, skipped=${baselineStats.skippedFiles}, skipped_no_reports=${baselineStats.skippedNoReports}, skipped_no_usable_metrics=${baselineStats.skippedNoUsableMetrics}, parse_errors=${baselineStats.parseErrors.length}\n`; + md += `Candidate files: total=${candidateStats.totalFiles}, parsed=${candidateStats.parsedFiles}, usable=${candidateStats.usableFiles}, skipped=${candidateStats.skippedFiles}, skipped_no_reports=${candidateStats.skippedNoReports}, skipped_no_usable_metrics=${candidateStats.skippedNoUsableMetrics}, parse_errors=${candidateStats.parseErrors.length}\n\n`; md += `Compared rows: ${comparisons.length}, regressions: ${regressions.length}, missing candidate rows: ${missingCandidateRows.length}, candidate-only rows: ${candidateOnlyRows.length}, insufficient sample rows: ${insufficientSampleRows.length}\n\n`; if ( baselineStats.parseErrors.length > 0 || From 67f58a2e114215313c62ce285a17e89c2c5f64e4 Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Fri, 13 Feb 2026 23:55:09 +0000 Subject: [PATCH 270/333] fix: rename skipped_no_metrics to skipped_no_usable_metrics for consistency --- scripts/compare-playback-benchmark-runs.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index 464d6a5159..e34c386eb6 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -439,8 +439,8 @@ function toMarkdown( md += `Parse gate: parse_errors=${options.failOnParseErrors ? "fail" : "allow"}\n\n`; md += `Zero-compare gate: compared_rows=${options.failOnZeroCompared ? "fail_if_zero" : "allow"}\n\n`; md += `Skipped-file gate: skipped_files=${options.failOnSkippedFiles ? "fail" : "allow"}\n\n`; - md += `Baseline files: total=${baselineStats.totalFiles}, parsed=${baselineStats.parsedFiles}, usable=${baselineStats.usableFiles}, skipped=${baselineStats.skippedFiles}, skipped_no_reports=${baselineStats.skippedNoReports}, skipped_no_metrics=${baselineStats.skippedNoUsableMetrics}, parse_errors=${baselineStats.parseErrors.length}\n`; - md += `Candidate files: total=${candidateStats.totalFiles}, parsed=${candidateStats.parsedFiles}, usable=${candidateStats.usableFiles}, skipped=${candidateStats.skippedFiles}, skipped_no_reports=${candidateStats.skippedNoReports}, skipped_no_metrics=${candidateStats.skippedNoUsableMetrics}, parse_errors=${candidateStats.parseErrors.length}\n\n`; + md += `Baseline files: total=${baselineStats.totalFiles}, parsed=${baselineStats.parsedFiles}, usable=${baselineStats.usableFiles}, skipped=${baselineStats.skippedFiles}, skipped_no_reports=${baselineStats.skippedNoReports}, skipped_no_usable_metrics=${baselineStats.skippedNoUsableMetrics}, parse_errors=${baselineStats.parseErrors.length}\n`; + md += `Candidate files: total=${candidateStats.totalFiles}, parsed=${candidateStats.parsedFiles}, usable=${candidateStats.usableFiles}, skipped=${candidateStats.skippedFiles}, skipped_no_reports=${candidateStats.skippedNoReports}, skipped_no_usable_metrics=${candidateStats.skippedNoUsableMetrics}, parse_errors=${candidateStats.parseErrors.length}\n\n`; md += `Compared rows: ${comparisons.length}, regressions: ${regressions.length}, missing candidate rows: ${missingCandidateRows.length}, candidate-only rows: ${candidateOnlyRows.length}, insufficient sample rows: ${insufficientSampleRows.length}\n\n`; if ( baselineStats.parseErrors.length > 0 || From 59892cf4b1ead67b5e58a6877c34a2ff01cf846c Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 23:55:38 +0000 Subject: [PATCH 271/333] improve: dedupe playback frame-request watch updates Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++ crates/editor/src/playback.rs | 117 ++++++++++------------------- 2 files changed, 46 insertions(+), 77 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 72759ab115..cb3065d9f4 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -446,6 +446,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Prefetch and direct-decode paths now use cached clip-index-to-offset maps instead of repeated linear clip scans. - Reduces per-frame scheduling overhead in playback and prefetch loops when projects contain many clips. +62. **Deduplicated frame-request watch updates (2026-02-13)** + - Playback loop now uses change-aware frame-request signaling instead of unconditional watch broadcasts. + - Reduces redundant prefetch wakeups and channel churn when requested frame value does not change. + --- ## Root Cause Analysis Archive @@ -603,6 +607,7 @@ Decoder Pipeline: 65. Scaled prefetch idle polling with frame budget to reduce scheduler wakeup churn during empty in-flight periods. 66. Bounded behind-prefetch dedupe tracking window to avoid unbounded growth and preserve lookup efficiency over long sessions. 67. Cached clip-offset lookups for prefetch and direct-decode scheduling to replace repeated linear clip scans on decode hot paths. +68. Deduplicated frame-request watch updates so unchanged frame requests no longer trigger redundant watch notifications. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -639,6 +644,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: prefetch scheduler idle polling now scales with frame budget (bounded) instead of fixed 1ms delay, reducing idle wakeup overhead. - `crates/editor/src/playback.rs`: behind-prefetch dedupe tracking now uses a bounded eviction-ordered window to prevent unbounded set growth during long playback. - `crates/editor/src/playback.rs`: prefetch and playback direct-decode paths now use cached clip-offset maps rebuilt on project updates, avoiding repeated clip list linear searches. +- `crates/editor/src/playback.rs`: frame-request updates now use `watch::Sender::send_if_modified` across playback/warmup/skip paths to avoid redundant unchanged-frame notifications. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 580f3a3151..7f0c8ab47e 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -255,11 +255,8 @@ impl Playback { let mut in_flight: FuturesUnordered = FuturesUnordered::new(); let mut frames_decoded: u32 = 0; let mut prefetched_behind: HashSet = HashSet::new(); -<<<<<<< HEAD -======= let mut prefetched_behind_order: VecDeque = VecDeque::new(); let mut scheduled_in_flight_frames: HashSet = HashSet::new(); ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 const RAMP_UP_AFTER_FRAMES: u32 = 5; let dynamic_prefetch_ahead = fps.clamp(30, 90); let dynamic_prefetch_behind = (fps / 4).clamp(8, 24); @@ -271,28 +268,21 @@ impl Playback { PARALLEL_DECODE_TASKS }; let initial_parallel_tasks = dynamic_parallel_tasks.min(4); -<<<<<<< HEAD -======= let prefetch_idle_poll_interval = Duration::from_secs_f64(1.0 / fps_f64) .mul_f64(0.25) .max(Duration::from_millis(2)) .min(Duration::from_millis(8)); let prefetched_behind_capacity = (dynamic_prefetch_behind as usize).saturating_mul(8); ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 let mut active_generation = *prefetch_seek_generation.borrow(); let mut cached_project = prefetch_project.borrow().clone(); let mut prefetch_clip_offsets = build_clip_offsets_lookup(&cached_project); info!( dynamic_prefetch_ahead, -<<<<<<< HEAD - dynamic_prefetch_behind, dynamic_parallel_tasks, "Prefetch window configuration" -======= dynamic_prefetch_behind, dynamic_parallel_tasks, prefetch_idle_poll_interval_ms = prefetch_idle_poll_interval.as_secs_f64() * 1000.0, "Prefetch window configuration" ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 ); loop { @@ -312,18 +302,12 @@ impl Playback { next_prefetch_frame = *frame_request_rx.borrow(); frames_decoded = 0; prefetched_behind.clear(); -<<<<<<< HEAD -======= prefetched_behind_order.clear(); ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.clear(); } -<<<<<<< HEAD -======= scheduled_in_flight_frames.clear(); ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 in_flight = FuturesUnordered::new(); } @@ -349,10 +333,7 @@ impl Playback { if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.clear(); } -<<<<<<< HEAD -======= scheduled_in_flight_frames.clear(); ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 in_flight = FuturesUnordered::new(); } } @@ -380,15 +361,7 @@ impl Playback { break; } -<<<<<<< HEAD - let already_in_flight = prefetch_in_flight - .read() - .map(|guard| guard.contains(&(active_generation, frame_num))) - .unwrap_or(false); - if already_in_flight { -======= if scheduled_in_flight_frames.contains(&frame_num) { ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 next_prefetch_frame += 1; continue; } @@ -450,15 +423,7 @@ impl Playback { continue; } -<<<<<<< HEAD - let already_in_flight = prefetch_in_flight - .read() - .map(|guard| guard.contains(&(active_generation, behind_frame))) - .unwrap_or(false); - if already_in_flight { -======= if scheduled_in_flight_frames.contains(&behind_frame) { ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 continue; } @@ -504,10 +469,7 @@ impl Playback { biased; Some((frame_num, segment_index, generation, result)) = in_flight.next() => { -<<<<<<< HEAD -======= scheduled_in_flight_frames.remove(&frame_num); ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.remove(&(generation, frame_num)); } @@ -640,18 +602,6 @@ impl Playback { tokio::select! { Some(prefetched) = prefetch_rx.recv() => { -<<<<<<< HEAD - if prefetched.generation == seek_generation { - if insert_prefetched_frame( - &mut prefetch_buffer, - prefetched, - frame_number, - ) { - warmup_buffer_changed = true; - } - if first_frame_time.is_none() && !prefetch_buffer.is_empty() { - first_frame_time = Some(Instant::now()); -======= let mut next_prefetched = Some(prefetched); loop { @@ -667,7 +617,6 @@ impl Playback { ) { warmup_buffer_changed = true; ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 } next_prefetched = prefetch_rx.try_recv().ok(); @@ -688,28 +637,14 @@ impl Playback { first_frame_time = None; warmup_start = Instant::now(); let _ = seek_generation_tx.send(seek_generation); - let _ = frame_request_tx.send(frame_number); - let _ = playback_position_tx.send(frame_number); - if has_audio - && audio_playhead_tx - .send(frame_number as f64 / fps_f64) - .is_err() - { - break; - } - } - _ = seek_rx.changed() => { - let seek_frame = *seek_rx.borrow_and_update(); - seek_generation = seek_generation.saturating_add(1); - frame_number = seek_frame; - prefetch_buffer.clear(); - frame_cache.clear(); - warmup_contiguous_prefetched = 0; - warmup_buffer_changed = false; - first_frame_time = None; - warmup_start = Instant::now(); - let _ = seek_generation_tx.send(seek_generation); - let _ = frame_request_tx.send(frame_number); + let _ = frame_request_tx.send_if_modified(|requested| { + if *requested == frame_number { + false + } else { + *requested = frame_number; + true + } + }); let _ = playback_position_tx.send(frame_number); if has_audio && audio_playhead_tx @@ -745,7 +680,14 @@ impl Playback { prefetch_buffer.clear(); frame_cache.clear(); let _ = seek_generation_tx.send(seek_generation); - let _ = frame_request_tx.send(frame_number); + let _ = frame_request_tx.send_if_modified(|requested| { + if *requested == frame_number { + false + } else { + *requested = frame_number; + true + } + }); let _ = playback_position_tx.send(frame_number); if has_audio && audio_playhead_tx @@ -783,7 +725,14 @@ impl Playback { prefetch_buffer.clear(); frame_cache.clear(); let _ = seek_generation_tx.send(seek_generation); - let _ = frame_request_tx.send(frame_number); + let _ = frame_request_tx.send_if_modified(|requested| { + if *requested == frame_number { + false + } else { + *requested = frame_number; + true + } + }); let _ = playback_position_tx.send(frame_number); if has_audio && audio_playhead_tx @@ -885,7 +834,14 @@ impl Playback { } } } else if prefetch_buffer.is_empty() && total_frames_rendered < 15 { - let _ = frame_request_tx.send(frame_number); + let _ = frame_request_tx.send_if_modified(|requested| { + if *requested == frame_number { + false + } else { + *requested = frame_number; + true + } + }); let wait_result = tokio::time::timeout(frame_fetch_timeout, prefetch_rx.recv()).await; @@ -1078,7 +1034,14 @@ impl Playback { skip_events = skip_events.saturating_add(1); prune_prefetch_buffer_before_frame(&mut prefetch_buffer, frame_number); - let _ = frame_request_tx.send(frame_number); + let _ = frame_request_tx.send_if_modified(|requested| { + if *requested == frame_number { + false + } else { + *requested = frame_number; + true + } + }); let _ = playback_position_tx.send(frame_number); if has_audio && audio_playhead_tx From ba37eb34cb172fcd422ac9ab7ce032aa11726031 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:55:38 +0000 Subject: [PATCH 272/333] improve: dedupe playback frame-request watch updates Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++ crates/editor/src/playback.rs | 117 ++++++++++------------------- 2 files changed, 46 insertions(+), 77 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 72759ab115..cb3065d9f4 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -446,6 +446,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Prefetch and direct-decode paths now use cached clip-index-to-offset maps instead of repeated linear clip scans. - Reduces per-frame scheduling overhead in playback and prefetch loops when projects contain many clips. +62. **Deduplicated frame-request watch updates (2026-02-13)** + - Playback loop now uses change-aware frame-request signaling instead of unconditional watch broadcasts. + - Reduces redundant prefetch wakeups and channel churn when requested frame value does not change. + --- ## Root Cause Analysis Archive @@ -603,6 +607,7 @@ Decoder Pipeline: 65. Scaled prefetch idle polling with frame budget to reduce scheduler wakeup churn during empty in-flight periods. 66. Bounded behind-prefetch dedupe tracking window to avoid unbounded growth and preserve lookup efficiency over long sessions. 67. Cached clip-offset lookups for prefetch and direct-decode scheduling to replace repeated linear clip scans on decode hot paths. +68. Deduplicated frame-request watch updates so unchanged frame requests no longer trigger redundant watch notifications. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -639,6 +644,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: prefetch scheduler idle polling now scales with frame budget (bounded) instead of fixed 1ms delay, reducing idle wakeup overhead. - `crates/editor/src/playback.rs`: behind-prefetch dedupe tracking now uses a bounded eviction-ordered window to prevent unbounded set growth during long playback. - `crates/editor/src/playback.rs`: prefetch and playback direct-decode paths now use cached clip-offset maps rebuilt on project updates, avoiding repeated clip list linear searches. +- `crates/editor/src/playback.rs`: frame-request updates now use `watch::Sender::send_if_modified` across playback/warmup/skip paths to avoid redundant unchanged-frame notifications. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 580f3a3151..7f0c8ab47e 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -255,11 +255,8 @@ impl Playback { let mut in_flight: FuturesUnordered = FuturesUnordered::new(); let mut frames_decoded: u32 = 0; let mut prefetched_behind: HashSet = HashSet::new(); -<<<<<<< HEAD -======= let mut prefetched_behind_order: VecDeque = VecDeque::new(); let mut scheduled_in_flight_frames: HashSet = HashSet::new(); ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 const RAMP_UP_AFTER_FRAMES: u32 = 5; let dynamic_prefetch_ahead = fps.clamp(30, 90); let dynamic_prefetch_behind = (fps / 4).clamp(8, 24); @@ -271,28 +268,21 @@ impl Playback { PARALLEL_DECODE_TASKS }; let initial_parallel_tasks = dynamic_parallel_tasks.min(4); -<<<<<<< HEAD -======= let prefetch_idle_poll_interval = Duration::from_secs_f64(1.0 / fps_f64) .mul_f64(0.25) .max(Duration::from_millis(2)) .min(Duration::from_millis(8)); let prefetched_behind_capacity = (dynamic_prefetch_behind as usize).saturating_mul(8); ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 let mut active_generation = *prefetch_seek_generation.borrow(); let mut cached_project = prefetch_project.borrow().clone(); let mut prefetch_clip_offsets = build_clip_offsets_lookup(&cached_project); info!( dynamic_prefetch_ahead, -<<<<<<< HEAD - dynamic_prefetch_behind, dynamic_parallel_tasks, "Prefetch window configuration" -======= dynamic_prefetch_behind, dynamic_parallel_tasks, prefetch_idle_poll_interval_ms = prefetch_idle_poll_interval.as_secs_f64() * 1000.0, "Prefetch window configuration" ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 ); loop { @@ -312,18 +302,12 @@ impl Playback { next_prefetch_frame = *frame_request_rx.borrow(); frames_decoded = 0; prefetched_behind.clear(); -<<<<<<< HEAD -======= prefetched_behind_order.clear(); ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.clear(); } -<<<<<<< HEAD -======= scheduled_in_flight_frames.clear(); ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 in_flight = FuturesUnordered::new(); } @@ -349,10 +333,7 @@ impl Playback { if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.clear(); } -<<<<<<< HEAD -======= scheduled_in_flight_frames.clear(); ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 in_flight = FuturesUnordered::new(); } } @@ -380,15 +361,7 @@ impl Playback { break; } -<<<<<<< HEAD - let already_in_flight = prefetch_in_flight - .read() - .map(|guard| guard.contains(&(active_generation, frame_num))) - .unwrap_or(false); - if already_in_flight { -======= if scheduled_in_flight_frames.contains(&frame_num) { ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 next_prefetch_frame += 1; continue; } @@ -450,15 +423,7 @@ impl Playback { continue; } -<<<<<<< HEAD - let already_in_flight = prefetch_in_flight - .read() - .map(|guard| guard.contains(&(active_generation, behind_frame))) - .unwrap_or(false); - if already_in_flight { -======= if scheduled_in_flight_frames.contains(&behind_frame) { ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 continue; } @@ -504,10 +469,7 @@ impl Playback { biased; Some((frame_num, segment_index, generation, result)) = in_flight.next() => { -<<<<<<< HEAD -======= scheduled_in_flight_frames.remove(&frame_num); ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.remove(&(generation, frame_num)); } @@ -640,18 +602,6 @@ impl Playback { tokio::select! { Some(prefetched) = prefetch_rx.recv() => { -<<<<<<< HEAD - if prefetched.generation == seek_generation { - if insert_prefetched_frame( - &mut prefetch_buffer, - prefetched, - frame_number, - ) { - warmup_buffer_changed = true; - } - if first_frame_time.is_none() && !prefetch_buffer.is_empty() { - first_frame_time = Some(Instant::now()); -======= let mut next_prefetched = Some(prefetched); loop { @@ -667,7 +617,6 @@ impl Playback { ) { warmup_buffer_changed = true; ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 } next_prefetched = prefetch_rx.try_recv().ok(); @@ -688,28 +637,14 @@ impl Playback { first_frame_time = None; warmup_start = Instant::now(); let _ = seek_generation_tx.send(seek_generation); - let _ = frame_request_tx.send(frame_number); - let _ = playback_position_tx.send(frame_number); - if has_audio - && audio_playhead_tx - .send(frame_number as f64 / fps_f64) - .is_err() - { - break; - } - } - _ = seek_rx.changed() => { - let seek_frame = *seek_rx.borrow_and_update(); - seek_generation = seek_generation.saturating_add(1); - frame_number = seek_frame; - prefetch_buffer.clear(); - frame_cache.clear(); - warmup_contiguous_prefetched = 0; - warmup_buffer_changed = false; - first_frame_time = None; - warmup_start = Instant::now(); - let _ = seek_generation_tx.send(seek_generation); - let _ = frame_request_tx.send(frame_number); + let _ = frame_request_tx.send_if_modified(|requested| { + if *requested == frame_number { + false + } else { + *requested = frame_number; + true + } + }); let _ = playback_position_tx.send(frame_number); if has_audio && audio_playhead_tx @@ -745,7 +680,14 @@ impl Playback { prefetch_buffer.clear(); frame_cache.clear(); let _ = seek_generation_tx.send(seek_generation); - let _ = frame_request_tx.send(frame_number); + let _ = frame_request_tx.send_if_modified(|requested| { + if *requested == frame_number { + false + } else { + *requested = frame_number; + true + } + }); let _ = playback_position_tx.send(frame_number); if has_audio && audio_playhead_tx @@ -783,7 +725,14 @@ impl Playback { prefetch_buffer.clear(); frame_cache.clear(); let _ = seek_generation_tx.send(seek_generation); - let _ = frame_request_tx.send(frame_number); + let _ = frame_request_tx.send_if_modified(|requested| { + if *requested == frame_number { + false + } else { + *requested = frame_number; + true + } + }); let _ = playback_position_tx.send(frame_number); if has_audio && audio_playhead_tx @@ -885,7 +834,14 @@ impl Playback { } } } else if prefetch_buffer.is_empty() && total_frames_rendered < 15 { - let _ = frame_request_tx.send(frame_number); + let _ = frame_request_tx.send_if_modified(|requested| { + if *requested == frame_number { + false + } else { + *requested = frame_number; + true + } + }); let wait_result = tokio::time::timeout(frame_fetch_timeout, prefetch_rx.recv()).await; @@ -1078,7 +1034,14 @@ impl Playback { skip_events = skip_events.saturating_add(1); prune_prefetch_buffer_before_frame(&mut prefetch_buffer, frame_number); - let _ = frame_request_tx.send(frame_number); + let _ = frame_request_tx.send_if_modified(|requested| { + if *requested == frame_number { + false + } else { + *requested = frame_number; + true + } + }); let _ = playback_position_tx.send(frame_number); if has_audio && audio_playhead_tx From 8da4cbf0d8d092f9602494b78daf3a26a1cbe3bf Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:55:38 +0000 Subject: [PATCH 273/333] improve: dedupe playback frame-request watch updates Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++ crates/editor/src/playback.rs | 117 ++++++++++------------------- 2 files changed, 46 insertions(+), 77 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 72759ab115..cb3065d9f4 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -446,6 +446,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Prefetch and direct-decode paths now use cached clip-index-to-offset maps instead of repeated linear clip scans. - Reduces per-frame scheduling overhead in playback and prefetch loops when projects contain many clips. +62. **Deduplicated frame-request watch updates (2026-02-13)** + - Playback loop now uses change-aware frame-request signaling instead of unconditional watch broadcasts. + - Reduces redundant prefetch wakeups and channel churn when requested frame value does not change. + --- ## Root Cause Analysis Archive @@ -603,6 +607,7 @@ Decoder Pipeline: 65. Scaled prefetch idle polling with frame budget to reduce scheduler wakeup churn during empty in-flight periods. 66. Bounded behind-prefetch dedupe tracking window to avoid unbounded growth and preserve lookup efficiency over long sessions. 67. Cached clip-offset lookups for prefetch and direct-decode scheduling to replace repeated linear clip scans on decode hot paths. +68. Deduplicated frame-request watch updates so unchanged frame requests no longer trigger redundant watch notifications. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -639,6 +644,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: prefetch scheduler idle polling now scales with frame budget (bounded) instead of fixed 1ms delay, reducing idle wakeup overhead. - `crates/editor/src/playback.rs`: behind-prefetch dedupe tracking now uses a bounded eviction-ordered window to prevent unbounded set growth during long playback. - `crates/editor/src/playback.rs`: prefetch and playback direct-decode paths now use cached clip-offset maps rebuilt on project updates, avoiding repeated clip list linear searches. +- `crates/editor/src/playback.rs`: frame-request updates now use `watch::Sender::send_if_modified` across playback/warmup/skip paths to avoid redundant unchanged-frame notifications. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 580f3a3151..7f0c8ab47e 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -255,11 +255,8 @@ impl Playback { let mut in_flight: FuturesUnordered = FuturesUnordered::new(); let mut frames_decoded: u32 = 0; let mut prefetched_behind: HashSet = HashSet::new(); -<<<<<<< HEAD -======= let mut prefetched_behind_order: VecDeque = VecDeque::new(); let mut scheduled_in_flight_frames: HashSet = HashSet::new(); ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 const RAMP_UP_AFTER_FRAMES: u32 = 5; let dynamic_prefetch_ahead = fps.clamp(30, 90); let dynamic_prefetch_behind = (fps / 4).clamp(8, 24); @@ -271,28 +268,21 @@ impl Playback { PARALLEL_DECODE_TASKS }; let initial_parallel_tasks = dynamic_parallel_tasks.min(4); -<<<<<<< HEAD -======= let prefetch_idle_poll_interval = Duration::from_secs_f64(1.0 / fps_f64) .mul_f64(0.25) .max(Duration::from_millis(2)) .min(Duration::from_millis(8)); let prefetched_behind_capacity = (dynamic_prefetch_behind as usize).saturating_mul(8); ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 let mut active_generation = *prefetch_seek_generation.borrow(); let mut cached_project = prefetch_project.borrow().clone(); let mut prefetch_clip_offsets = build_clip_offsets_lookup(&cached_project); info!( dynamic_prefetch_ahead, -<<<<<<< HEAD - dynamic_prefetch_behind, dynamic_parallel_tasks, "Prefetch window configuration" -======= dynamic_prefetch_behind, dynamic_parallel_tasks, prefetch_idle_poll_interval_ms = prefetch_idle_poll_interval.as_secs_f64() * 1000.0, "Prefetch window configuration" ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 ); loop { @@ -312,18 +302,12 @@ impl Playback { next_prefetch_frame = *frame_request_rx.borrow(); frames_decoded = 0; prefetched_behind.clear(); -<<<<<<< HEAD -======= prefetched_behind_order.clear(); ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.clear(); } -<<<<<<< HEAD -======= scheduled_in_flight_frames.clear(); ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 in_flight = FuturesUnordered::new(); } @@ -349,10 +333,7 @@ impl Playback { if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.clear(); } -<<<<<<< HEAD -======= scheduled_in_flight_frames.clear(); ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 in_flight = FuturesUnordered::new(); } } @@ -380,15 +361,7 @@ impl Playback { break; } -<<<<<<< HEAD - let already_in_flight = prefetch_in_flight - .read() - .map(|guard| guard.contains(&(active_generation, frame_num))) - .unwrap_or(false); - if already_in_flight { -======= if scheduled_in_flight_frames.contains(&frame_num) { ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 next_prefetch_frame += 1; continue; } @@ -450,15 +423,7 @@ impl Playback { continue; } -<<<<<<< HEAD - let already_in_flight = prefetch_in_flight - .read() - .map(|guard| guard.contains(&(active_generation, behind_frame))) - .unwrap_or(false); - if already_in_flight { -======= if scheduled_in_flight_frames.contains(&behind_frame) { ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 continue; } @@ -504,10 +469,7 @@ impl Playback { biased; Some((frame_num, segment_index, generation, result)) = in_flight.next() => { -<<<<<<< HEAD -======= scheduled_in_flight_frames.remove(&frame_num); ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.remove(&(generation, frame_num)); } @@ -640,18 +602,6 @@ impl Playback { tokio::select! { Some(prefetched) = prefetch_rx.recv() => { -<<<<<<< HEAD - if prefetched.generation == seek_generation { - if insert_prefetched_frame( - &mut prefetch_buffer, - prefetched, - frame_number, - ) { - warmup_buffer_changed = true; - } - if first_frame_time.is_none() && !prefetch_buffer.is_empty() { - first_frame_time = Some(Instant::now()); -======= let mut next_prefetched = Some(prefetched); loop { @@ -667,7 +617,6 @@ impl Playback { ) { warmup_buffer_changed = true; ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 } next_prefetched = prefetch_rx.try_recv().ok(); @@ -688,28 +637,14 @@ impl Playback { first_frame_time = None; warmup_start = Instant::now(); let _ = seek_generation_tx.send(seek_generation); - let _ = frame_request_tx.send(frame_number); - let _ = playback_position_tx.send(frame_number); - if has_audio - && audio_playhead_tx - .send(frame_number as f64 / fps_f64) - .is_err() - { - break; - } - } - _ = seek_rx.changed() => { - let seek_frame = *seek_rx.borrow_and_update(); - seek_generation = seek_generation.saturating_add(1); - frame_number = seek_frame; - prefetch_buffer.clear(); - frame_cache.clear(); - warmup_contiguous_prefetched = 0; - warmup_buffer_changed = false; - first_frame_time = None; - warmup_start = Instant::now(); - let _ = seek_generation_tx.send(seek_generation); - let _ = frame_request_tx.send(frame_number); + let _ = frame_request_tx.send_if_modified(|requested| { + if *requested == frame_number { + false + } else { + *requested = frame_number; + true + } + }); let _ = playback_position_tx.send(frame_number); if has_audio && audio_playhead_tx @@ -745,7 +680,14 @@ impl Playback { prefetch_buffer.clear(); frame_cache.clear(); let _ = seek_generation_tx.send(seek_generation); - let _ = frame_request_tx.send(frame_number); + let _ = frame_request_tx.send_if_modified(|requested| { + if *requested == frame_number { + false + } else { + *requested = frame_number; + true + } + }); let _ = playback_position_tx.send(frame_number); if has_audio && audio_playhead_tx @@ -783,7 +725,14 @@ impl Playback { prefetch_buffer.clear(); frame_cache.clear(); let _ = seek_generation_tx.send(seek_generation); - let _ = frame_request_tx.send(frame_number); + let _ = frame_request_tx.send_if_modified(|requested| { + if *requested == frame_number { + false + } else { + *requested = frame_number; + true + } + }); let _ = playback_position_tx.send(frame_number); if has_audio && audio_playhead_tx @@ -885,7 +834,14 @@ impl Playback { } } } else if prefetch_buffer.is_empty() && total_frames_rendered < 15 { - let _ = frame_request_tx.send(frame_number); + let _ = frame_request_tx.send_if_modified(|requested| { + if *requested == frame_number { + false + } else { + *requested = frame_number; + true + } + }); let wait_result = tokio::time::timeout(frame_fetch_timeout, prefetch_rx.recv()).await; @@ -1078,7 +1034,14 @@ impl Playback { skip_events = skip_events.saturating_add(1); prune_prefetch_buffer_before_frame(&mut prefetch_buffer, frame_number); - let _ = frame_request_tx.send(frame_number); + let _ = frame_request_tx.send_if_modified(|requested| { + if *requested == frame_number { + false + } else { + *requested = frame_number; + true + } + }); let _ = playback_position_tx.send(frame_number); if has_audio && audio_playhead_tx From f0e776031662e3fbe02d1d9ab1fba80de302f607 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 13 Feb 2026 23:57:26 +0000 Subject: [PATCH 274/333] improve: avoid duplicate keyed lookups on prefetch insert Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 9 +++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index cb3065d9f4..7382aea874 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -450,6 +450,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Playback loop now uses change-aware frame-request signaling instead of unconditional watch broadcasts. - Reduces redundant prefetch wakeups and channel churn when requested frame value does not change. +63. **Removed duplicate keyed-buffer lookups during prefetch insert (2026-02-13)** + - Prefetch buffer insertion now uses a single `BTreeMap::entry` match to detect insertion and store new frames. + - Eliminates the prior contains-check plus entry-insert double lookup in prefetch hot path. + --- ## Root Cause Analysis Archive @@ -608,6 +612,7 @@ Decoder Pipeline: 66. Bounded behind-prefetch dedupe tracking window to avoid unbounded growth and preserve lookup efficiency over long sessions. 67. Cached clip-offset lookups for prefetch and direct-decode scheduling to replace repeated linear clip scans on decode hot paths. 68. Deduplicated frame-request watch updates so unchanged frame requests no longer trigger redundant watch notifications. +69. Replaced contains+entry prefetch insertion with single-entry map insertion to remove duplicate keyed-buffer lookups in frame-insert hot path. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -645,6 +650,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: behind-prefetch dedupe tracking now uses a bounded eviction-ordered window to prevent unbounded set growth during long playback. - `crates/editor/src/playback.rs`: prefetch and playback direct-decode paths now use cached clip-offset maps rebuilt on project updates, avoiding repeated clip list linear searches. - `crates/editor/src/playback.rs`: frame-request updates now use `watch::Sender::send_if_modified` across playback/warmup/skip paths to avoid redundant unchanged-frame notifications. +- `crates/editor/src/playback.rs`: prefetch insertion now uses single `BTreeMap::entry` insertion path instead of separate contains-check + insert lookup. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 7f0c8ab47e..d241940d51 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -139,8 +139,13 @@ fn insert_prefetched_frame( } let frame_number = prefetched.frame_number; - let inserted_new = !buffer.contains_key(&frame_number); - buffer.entry(frame_number).or_insert(prefetched); + let inserted_new = match buffer.entry(frame_number) { + std::collections::btree_map::Entry::Vacant(entry) => { + entry.insert(prefetched); + true + } + std::collections::btree_map::Entry::Occupied(_) => false, + }; let trimmed = trim_prefetch_buffer(buffer, current_frame); inserted_new || trimmed } From 35c5a41f97e95e653ffaee4b42382ebfdb9dc42d Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:57:26 +0000 Subject: [PATCH 275/333] improve: avoid duplicate keyed lookups on prefetch insert Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 9 +++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index cb3065d9f4..7382aea874 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -450,6 +450,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Playback loop now uses change-aware frame-request signaling instead of unconditional watch broadcasts. - Reduces redundant prefetch wakeups and channel churn when requested frame value does not change. +63. **Removed duplicate keyed-buffer lookups during prefetch insert (2026-02-13)** + - Prefetch buffer insertion now uses a single `BTreeMap::entry` match to detect insertion and store new frames. + - Eliminates the prior contains-check plus entry-insert double lookup in prefetch hot path. + --- ## Root Cause Analysis Archive @@ -608,6 +612,7 @@ Decoder Pipeline: 66. Bounded behind-prefetch dedupe tracking window to avoid unbounded growth and preserve lookup efficiency over long sessions. 67. Cached clip-offset lookups for prefetch and direct-decode scheduling to replace repeated linear clip scans on decode hot paths. 68. Deduplicated frame-request watch updates so unchanged frame requests no longer trigger redundant watch notifications. +69. Replaced contains+entry prefetch insertion with single-entry map insertion to remove duplicate keyed-buffer lookups in frame-insert hot path. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -645,6 +650,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: behind-prefetch dedupe tracking now uses a bounded eviction-ordered window to prevent unbounded set growth during long playback. - `crates/editor/src/playback.rs`: prefetch and playback direct-decode paths now use cached clip-offset maps rebuilt on project updates, avoiding repeated clip list linear searches. - `crates/editor/src/playback.rs`: frame-request updates now use `watch::Sender::send_if_modified` across playback/warmup/skip paths to avoid redundant unchanged-frame notifications. +- `crates/editor/src/playback.rs`: prefetch insertion now uses single `BTreeMap::entry` insertion path instead of separate contains-check + insert lookup. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 7f0c8ab47e..d241940d51 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -139,8 +139,13 @@ fn insert_prefetched_frame( } let frame_number = prefetched.frame_number; - let inserted_new = !buffer.contains_key(&frame_number); - buffer.entry(frame_number).or_insert(prefetched); + let inserted_new = match buffer.entry(frame_number) { + std::collections::btree_map::Entry::Vacant(entry) => { + entry.insert(prefetched); + true + } + std::collections::btree_map::Entry::Occupied(_) => false, + }; let trimmed = trim_prefetch_buffer(buffer, current_frame); inserted_new || trimmed } From 64612626b14e8d33b59829183bce539ad6629e8c Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:57:26 +0000 Subject: [PATCH 276/333] improve: avoid duplicate keyed lookups on prefetch insert Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 9 +++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index cb3065d9f4..7382aea874 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -450,6 +450,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Playback loop now uses change-aware frame-request signaling instead of unconditional watch broadcasts. - Reduces redundant prefetch wakeups and channel churn when requested frame value does not change. +63. **Removed duplicate keyed-buffer lookups during prefetch insert (2026-02-13)** + - Prefetch buffer insertion now uses a single `BTreeMap::entry` match to detect insertion and store new frames. + - Eliminates the prior contains-check plus entry-insert double lookup in prefetch hot path. + --- ## Root Cause Analysis Archive @@ -608,6 +612,7 @@ Decoder Pipeline: 66. Bounded behind-prefetch dedupe tracking window to avoid unbounded growth and preserve lookup efficiency over long sessions. 67. Cached clip-offset lookups for prefetch and direct-decode scheduling to replace repeated linear clip scans on decode hot paths. 68. Deduplicated frame-request watch updates so unchanged frame requests no longer trigger redundant watch notifications. +69. Replaced contains+entry prefetch insertion with single-entry map insertion to remove duplicate keyed-buffer lookups in frame-insert hot path. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -645,6 +650,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: behind-prefetch dedupe tracking now uses a bounded eviction-ordered window to prevent unbounded set growth during long playback. - `crates/editor/src/playback.rs`: prefetch and playback direct-decode paths now use cached clip-offset maps rebuilt on project updates, avoiding repeated clip list linear searches. - `crates/editor/src/playback.rs`: frame-request updates now use `watch::Sender::send_if_modified` across playback/warmup/skip paths to avoid redundant unchanged-frame notifications. +- `crates/editor/src/playback.rs`: prefetch insertion now uses single `BTreeMap::entry` insertion path instead of separate contains-check + insert lookup. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 7f0c8ab47e..d241940d51 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -139,8 +139,13 @@ fn insert_prefetched_frame( } let frame_number = prefetched.frame_number; - let inserted_new = !buffer.contains_key(&frame_number); - buffer.entry(frame_number).or_insert(prefetched); + let inserted_new = match buffer.entry(frame_number) { + std::collections::btree_map::Entry::Vacant(entry) => { + entry.insert(prefetched); + true + } + std::collections::btree_map::Entry::Occupied(_) => false, + }; let trimmed = trim_prefetch_buffer(buffer, current_frame); inserted_new || trimmed } From 99a1063724b47cb2dcb193d0971430c4d9d22ec6 Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Sat, 14 Feb 2026 00:01:26 +0000 Subject: [PATCH 277/333] fmt --- scripts/aggregate-playback-benchmarks.js | 11 ++++------- scripts/analyze-playback-matrix-bottlenecks.js | 11 ++++++++--- scripts/build-playback-matrix-report.js | 9 +++++++-- scripts/run-playback-benchmark-matrix.js | 17 ++++++++++++++--- scripts/validate-playback-matrix.js | 5 ++++- 5 files changed, 37 insertions(+), 16 deletions(-) diff --git a/scripts/aggregate-playback-benchmarks.js b/scripts/aggregate-playback-benchmarks.js index 75275c944f..b1e89d0e10 100644 --- a/scripts/aggregate-playback-benchmarks.js +++ b/scripts/aggregate-playback-benchmarks.js @@ -91,9 +91,7 @@ function numberOrNull(value) { } function maxOrNull(values) { - const numeric = values - .map(numberOrNull) - .filter((value) => value !== null); + const numeric = values.map(numberOrNull).filter((value) => value !== null); if (numeric.length === 0) { return null; } @@ -101,9 +99,7 @@ function maxOrNull(values) { } function avgOrNull(values) { - const numeric = values - .map(numberOrNull) - .filter((value) => value !== null); + const numeric = values.map(numberOrNull).filter((value) => value !== null); if (numeric.length === 0) { return null; } @@ -191,7 +187,8 @@ function buildMarkdown(rows) { md += `# Playback Benchmark Aggregate\n\n`; md += `Generated: ${new Date().toISOString()}\n\n`; md += `Total rows: ${sorted.length}, Passed: ${passed}, Failed: ${failed}\n\n`; - md += "| Run Time (UTC) | Platform | GPU | Scenario | Recording | Format | Status | FPS(min) | Scrub p95(ms) | Startup avg(ms) | Mic diff max(ms) | Sys diff max(ms) |\n"; + md += + "| Run Time (UTC) | Platform | GPU | Scenario | Recording | Format | Status | FPS(min) | Scrub p95(ms) | Startup avg(ms) | Mic diff max(ms) | Sys diff max(ms) |\n"; md += "|---|---|---|---|---|---|---|---:|---:|---:|---:|---:|\n"; for (const row of sorted) { md += `| ${row.runTime} | ${row.platform} | ${row.gpu} | ${row.scenario} | ${row.recording} | ${row.format} | ${row.status} | ${formatMetric(row.effectiveFpsMin)} | ${formatMetric(row.scrubP95Max)} | ${formatMetric(row.startupAvg)} | ${formatMetric(row.micDiffMax)} | ${formatMetric(row.sysDiffMax)} |\n`; diff --git a/scripts/analyze-playback-matrix-bottlenecks.js b/scripts/analyze-playback-matrix-bottlenecks.js index ac91d0c8ce..8bf7c4317a 100644 --- a/scripts/analyze-playback-matrix-bottlenecks.js +++ b/scripts/analyze-playback-matrix-bottlenecks.js @@ -150,7 +150,9 @@ function collectIssues(files, options) { const playback = Array.isArray(report.playback_results) ? report.playback_results : []; - const scrub = Array.isArray(report.scrub_results) ? report.scrub_results : []; + const scrub = Array.isArray(report.scrub_results) + ? report.scrub_results + : []; const fpsValues = playback .map((entry) => entry.effective_fps) @@ -190,7 +192,9 @@ function recommendation(issue, options) { recommendations.push("inspect decode/render path and frame wait behavior"); } if (issue.scrubP95 !== null && issue.scrubP95 > options.maxScrubP95Ms) { - recommendations.push("investigate seek dispatch pressure and decoder reposition cost"); + recommendations.push( + "investigate seek dispatch pressure and decoder reposition cost", + ); } if (issue.startupAvg !== null && issue.startupAvg > options.maxStartupMs) { recommendations.push("optimize startup warmup and first-frame path"); @@ -209,7 +213,8 @@ function buildMarkdown(issues, options) { return md; } - md += "| Rank | Platform | GPU | Scenario | Recording | Format | FPS(min) | Startup avg(ms) | Scrub p95(ms) | Score | Recommendation |\n"; + md += + "| Rank | Platform | GPU | Scenario | Recording | Format | FPS(min) | Startup avg(ms) | Scrub p95(ms) | Score | Recommendation |\n"; md += "|---:|---|---|---|---|---|---:|---:|---:|---:|---|\n"; issues.forEach((issue, index) => { md += `| ${index + 1} | ${issue.platform} | ${issue.gpu} | ${issue.scenario} | ${issue.recording} | ${issue.format} | ${formatValue(issue.fpsMin)} | ${formatValue(issue.startupAvg)} | ${formatValue(issue.scrubP95)} | ${formatValue(issue.score, 2)} | ${recommendation(issue, options)} |\n`; diff --git a/scripts/build-playback-matrix-report.js b/scripts/build-playback-matrix-report.js index a07b1876c5..e29c6b4621 100644 --- a/scripts/build-playback-matrix-report.js +++ b/scripts/build-playback-matrix-report.js @@ -125,7 +125,11 @@ function timestampOrEpoch(value) { function upsertLatestCell(cells, candidate) { const key = cellKey(candidate.platform, candidate.gpu, candidate.scenario); const existing = cells.get(key); - if (!existing || timestampOrEpoch(candidate.generatedAt) >= timestampOrEpoch(existing.generatedAt)) { + if ( + !existing || + timestampOrEpoch(candidate.generatedAt) >= + timestampOrEpoch(existing.generatedAt) + ) { cells.set(key, candidate); } } @@ -222,7 +226,8 @@ function buildReport(requiredCells, latestCells, formatCoverage) { markdown += "# Playback Matrix Status Report\n\n"; markdown += `Generated: ${new Date().toISOString()}\n\n`; markdown += `Rows: ${rows.length}, Missing rows: ${missingCount}, Rows with failures: ${failCount}\n\n`; - markdown += "| Platform | GPU | Full | Scrub | MP4 Seen | Fragmented Seen | Full Timestamp | Scrub Timestamp |\n"; + markdown += + "| Platform | GPU | Full | Scrub | MP4 Seen | Fragmented Seen | Full Timestamp | Scrub Timestamp |\n"; markdown += "|---|---|---|---|---|---|---|---|\n"; for (const row of rows) { markdown += `| ${row.platform} | ${row.gpu} | ${row.fullStatus} | ${row.scrubStatus} | ${row.mp4} | ${row.fragmented} | ${row.fullTime} | ${row.scrubTime} |\n`; diff --git a/scripts/run-playback-benchmark-matrix.js b/scripts/run-playback-benchmark-matrix.js index d49fec6024..626c19a7ba 100644 --- a/scripts/run-playback-benchmark-matrix.js +++ b/scripts/run-playback-benchmark-matrix.js @@ -163,9 +163,18 @@ function scenarioArgs(options, scenario) { function validateOptions(options) { if (!options.platform || !options.gpu || !options.outputDir) { - throw new Error("Missing required options: --platform, --gpu, --output-dir"); + throw new Error( + "Missing required options: --platform, --gpu, --output-dir", + ); } - const validScenarios = new Set(["full", "scrub", "decoder", "playback", "audio-sync", "camera-sync"]); + const validScenarios = new Set([ + "full", + "scrub", + "decoder", + "playback", + "audio-sync", + "camera-sync", + ]); for (const scenario of options.scenarios) { if (!validScenarios.has(scenario)) { throw new Error(`Unsupported scenario: ${scenario}`); @@ -188,7 +197,9 @@ function main() { validateOptions(options); - console.log(`Running matrix for platform=${options.platform} gpu=${options.gpu}`); + console.log( + `Running matrix for platform=${options.platform} gpu=${options.gpu}`, + ); for (const scenario of options.scenarios) { run("cargo", scenarioArgs(options, scenario)); } diff --git a/scripts/validate-playback-matrix.js b/scripts/validate-playback-matrix.js index c919369c42..b06f01e006 100644 --- a/scripts/validate-playback-matrix.js +++ b/scripts/validate-playback-matrix.js @@ -218,7 +218,10 @@ function main() { }; if (options.outputJson) { - fs.writeFileSync(options.outputJson, JSON.stringify(validationResult, null, 2)); + fs.writeFileSync( + options.outputJson, + JSON.stringify(validationResult, null, 2), + ); console.log(`Validation JSON: ${options.outputJson}`); } From 45059cad8b8ef29a91cd40da5673ea2095979d4f Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Sat, 14 Feb 2026 00:01:26 +0000 Subject: [PATCH 278/333] fmt --- scripts/aggregate-playback-benchmarks.js | 11 ++++------- scripts/analyze-playback-matrix-bottlenecks.js | 11 ++++++++--- scripts/build-playback-matrix-report.js | 9 +++++++-- scripts/run-playback-benchmark-matrix.js | 17 ++++++++++++++--- scripts/validate-playback-matrix.js | 5 ++++- 5 files changed, 37 insertions(+), 16 deletions(-) diff --git a/scripts/aggregate-playback-benchmarks.js b/scripts/aggregate-playback-benchmarks.js index 75275c944f..b1e89d0e10 100644 --- a/scripts/aggregate-playback-benchmarks.js +++ b/scripts/aggregate-playback-benchmarks.js @@ -91,9 +91,7 @@ function numberOrNull(value) { } function maxOrNull(values) { - const numeric = values - .map(numberOrNull) - .filter((value) => value !== null); + const numeric = values.map(numberOrNull).filter((value) => value !== null); if (numeric.length === 0) { return null; } @@ -101,9 +99,7 @@ function maxOrNull(values) { } function avgOrNull(values) { - const numeric = values - .map(numberOrNull) - .filter((value) => value !== null); + const numeric = values.map(numberOrNull).filter((value) => value !== null); if (numeric.length === 0) { return null; } @@ -191,7 +187,8 @@ function buildMarkdown(rows) { md += `# Playback Benchmark Aggregate\n\n`; md += `Generated: ${new Date().toISOString()}\n\n`; md += `Total rows: ${sorted.length}, Passed: ${passed}, Failed: ${failed}\n\n`; - md += "| Run Time (UTC) | Platform | GPU | Scenario | Recording | Format | Status | FPS(min) | Scrub p95(ms) | Startup avg(ms) | Mic diff max(ms) | Sys diff max(ms) |\n"; + md += + "| Run Time (UTC) | Platform | GPU | Scenario | Recording | Format | Status | FPS(min) | Scrub p95(ms) | Startup avg(ms) | Mic diff max(ms) | Sys diff max(ms) |\n"; md += "|---|---|---|---|---|---|---|---:|---:|---:|---:|---:|\n"; for (const row of sorted) { md += `| ${row.runTime} | ${row.platform} | ${row.gpu} | ${row.scenario} | ${row.recording} | ${row.format} | ${row.status} | ${formatMetric(row.effectiveFpsMin)} | ${formatMetric(row.scrubP95Max)} | ${formatMetric(row.startupAvg)} | ${formatMetric(row.micDiffMax)} | ${formatMetric(row.sysDiffMax)} |\n`; diff --git a/scripts/analyze-playback-matrix-bottlenecks.js b/scripts/analyze-playback-matrix-bottlenecks.js index ac91d0c8ce..8bf7c4317a 100644 --- a/scripts/analyze-playback-matrix-bottlenecks.js +++ b/scripts/analyze-playback-matrix-bottlenecks.js @@ -150,7 +150,9 @@ function collectIssues(files, options) { const playback = Array.isArray(report.playback_results) ? report.playback_results : []; - const scrub = Array.isArray(report.scrub_results) ? report.scrub_results : []; + const scrub = Array.isArray(report.scrub_results) + ? report.scrub_results + : []; const fpsValues = playback .map((entry) => entry.effective_fps) @@ -190,7 +192,9 @@ function recommendation(issue, options) { recommendations.push("inspect decode/render path and frame wait behavior"); } if (issue.scrubP95 !== null && issue.scrubP95 > options.maxScrubP95Ms) { - recommendations.push("investigate seek dispatch pressure and decoder reposition cost"); + recommendations.push( + "investigate seek dispatch pressure and decoder reposition cost", + ); } if (issue.startupAvg !== null && issue.startupAvg > options.maxStartupMs) { recommendations.push("optimize startup warmup and first-frame path"); @@ -209,7 +213,8 @@ function buildMarkdown(issues, options) { return md; } - md += "| Rank | Platform | GPU | Scenario | Recording | Format | FPS(min) | Startup avg(ms) | Scrub p95(ms) | Score | Recommendation |\n"; + md += + "| Rank | Platform | GPU | Scenario | Recording | Format | FPS(min) | Startup avg(ms) | Scrub p95(ms) | Score | Recommendation |\n"; md += "|---:|---|---|---|---|---|---:|---:|---:|---:|---|\n"; issues.forEach((issue, index) => { md += `| ${index + 1} | ${issue.platform} | ${issue.gpu} | ${issue.scenario} | ${issue.recording} | ${issue.format} | ${formatValue(issue.fpsMin)} | ${formatValue(issue.startupAvg)} | ${formatValue(issue.scrubP95)} | ${formatValue(issue.score, 2)} | ${recommendation(issue, options)} |\n`; diff --git a/scripts/build-playback-matrix-report.js b/scripts/build-playback-matrix-report.js index a07b1876c5..e29c6b4621 100644 --- a/scripts/build-playback-matrix-report.js +++ b/scripts/build-playback-matrix-report.js @@ -125,7 +125,11 @@ function timestampOrEpoch(value) { function upsertLatestCell(cells, candidate) { const key = cellKey(candidate.platform, candidate.gpu, candidate.scenario); const existing = cells.get(key); - if (!existing || timestampOrEpoch(candidate.generatedAt) >= timestampOrEpoch(existing.generatedAt)) { + if ( + !existing || + timestampOrEpoch(candidate.generatedAt) >= + timestampOrEpoch(existing.generatedAt) + ) { cells.set(key, candidate); } } @@ -222,7 +226,8 @@ function buildReport(requiredCells, latestCells, formatCoverage) { markdown += "# Playback Matrix Status Report\n\n"; markdown += `Generated: ${new Date().toISOString()}\n\n`; markdown += `Rows: ${rows.length}, Missing rows: ${missingCount}, Rows with failures: ${failCount}\n\n`; - markdown += "| Platform | GPU | Full | Scrub | MP4 Seen | Fragmented Seen | Full Timestamp | Scrub Timestamp |\n"; + markdown += + "| Platform | GPU | Full | Scrub | MP4 Seen | Fragmented Seen | Full Timestamp | Scrub Timestamp |\n"; markdown += "|---|---|---|---|---|---|---|---|\n"; for (const row of rows) { markdown += `| ${row.platform} | ${row.gpu} | ${row.fullStatus} | ${row.scrubStatus} | ${row.mp4} | ${row.fragmented} | ${row.fullTime} | ${row.scrubTime} |\n`; diff --git a/scripts/run-playback-benchmark-matrix.js b/scripts/run-playback-benchmark-matrix.js index d49fec6024..626c19a7ba 100644 --- a/scripts/run-playback-benchmark-matrix.js +++ b/scripts/run-playback-benchmark-matrix.js @@ -163,9 +163,18 @@ function scenarioArgs(options, scenario) { function validateOptions(options) { if (!options.platform || !options.gpu || !options.outputDir) { - throw new Error("Missing required options: --platform, --gpu, --output-dir"); + throw new Error( + "Missing required options: --platform, --gpu, --output-dir", + ); } - const validScenarios = new Set(["full", "scrub", "decoder", "playback", "audio-sync", "camera-sync"]); + const validScenarios = new Set([ + "full", + "scrub", + "decoder", + "playback", + "audio-sync", + "camera-sync", + ]); for (const scenario of options.scenarios) { if (!validScenarios.has(scenario)) { throw new Error(`Unsupported scenario: ${scenario}`); @@ -188,7 +197,9 @@ function main() { validateOptions(options); - console.log(`Running matrix for platform=${options.platform} gpu=${options.gpu}`); + console.log( + `Running matrix for platform=${options.platform} gpu=${options.gpu}`, + ); for (const scenario of options.scenarios) { run("cargo", scenarioArgs(options, scenario)); } diff --git a/scripts/validate-playback-matrix.js b/scripts/validate-playback-matrix.js index c919369c42..b06f01e006 100644 --- a/scripts/validate-playback-matrix.js +++ b/scripts/validate-playback-matrix.js @@ -218,7 +218,10 @@ function main() { }; if (options.outputJson) { - fs.writeFileSync(options.outputJson, JSON.stringify(validationResult, null, 2)); + fs.writeFileSync( + options.outputJson, + JSON.stringify(validationResult, null, 2), + ); console.log(`Validation JSON: ${options.outputJson}`); } From 81ad88a8ad339b151b9c24e41d3ffea1b8149967 Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Sat, 14 Feb 2026 00:01:26 +0000 Subject: [PATCH 279/333] fmt --- scripts/aggregate-playback-benchmarks.js | 11 ++++------- scripts/analyze-playback-matrix-bottlenecks.js | 11 ++++++++--- scripts/build-playback-matrix-report.js | 9 +++++++-- scripts/run-playback-benchmark-matrix.js | 17 ++++++++++++++--- scripts/validate-playback-matrix.js | 5 ++++- 5 files changed, 37 insertions(+), 16 deletions(-) diff --git a/scripts/aggregate-playback-benchmarks.js b/scripts/aggregate-playback-benchmarks.js index 75275c944f..b1e89d0e10 100644 --- a/scripts/aggregate-playback-benchmarks.js +++ b/scripts/aggregate-playback-benchmarks.js @@ -91,9 +91,7 @@ function numberOrNull(value) { } function maxOrNull(values) { - const numeric = values - .map(numberOrNull) - .filter((value) => value !== null); + const numeric = values.map(numberOrNull).filter((value) => value !== null); if (numeric.length === 0) { return null; } @@ -101,9 +99,7 @@ function maxOrNull(values) { } function avgOrNull(values) { - const numeric = values - .map(numberOrNull) - .filter((value) => value !== null); + const numeric = values.map(numberOrNull).filter((value) => value !== null); if (numeric.length === 0) { return null; } @@ -191,7 +187,8 @@ function buildMarkdown(rows) { md += `# Playback Benchmark Aggregate\n\n`; md += `Generated: ${new Date().toISOString()}\n\n`; md += `Total rows: ${sorted.length}, Passed: ${passed}, Failed: ${failed}\n\n`; - md += "| Run Time (UTC) | Platform | GPU | Scenario | Recording | Format | Status | FPS(min) | Scrub p95(ms) | Startup avg(ms) | Mic diff max(ms) | Sys diff max(ms) |\n"; + md += + "| Run Time (UTC) | Platform | GPU | Scenario | Recording | Format | Status | FPS(min) | Scrub p95(ms) | Startup avg(ms) | Mic diff max(ms) | Sys diff max(ms) |\n"; md += "|---|---|---|---|---|---|---|---:|---:|---:|---:|---:|\n"; for (const row of sorted) { md += `| ${row.runTime} | ${row.platform} | ${row.gpu} | ${row.scenario} | ${row.recording} | ${row.format} | ${row.status} | ${formatMetric(row.effectiveFpsMin)} | ${formatMetric(row.scrubP95Max)} | ${formatMetric(row.startupAvg)} | ${formatMetric(row.micDiffMax)} | ${formatMetric(row.sysDiffMax)} |\n`; diff --git a/scripts/analyze-playback-matrix-bottlenecks.js b/scripts/analyze-playback-matrix-bottlenecks.js index ac91d0c8ce..8bf7c4317a 100644 --- a/scripts/analyze-playback-matrix-bottlenecks.js +++ b/scripts/analyze-playback-matrix-bottlenecks.js @@ -150,7 +150,9 @@ function collectIssues(files, options) { const playback = Array.isArray(report.playback_results) ? report.playback_results : []; - const scrub = Array.isArray(report.scrub_results) ? report.scrub_results : []; + const scrub = Array.isArray(report.scrub_results) + ? report.scrub_results + : []; const fpsValues = playback .map((entry) => entry.effective_fps) @@ -190,7 +192,9 @@ function recommendation(issue, options) { recommendations.push("inspect decode/render path and frame wait behavior"); } if (issue.scrubP95 !== null && issue.scrubP95 > options.maxScrubP95Ms) { - recommendations.push("investigate seek dispatch pressure and decoder reposition cost"); + recommendations.push( + "investigate seek dispatch pressure and decoder reposition cost", + ); } if (issue.startupAvg !== null && issue.startupAvg > options.maxStartupMs) { recommendations.push("optimize startup warmup and first-frame path"); @@ -209,7 +213,8 @@ function buildMarkdown(issues, options) { return md; } - md += "| Rank | Platform | GPU | Scenario | Recording | Format | FPS(min) | Startup avg(ms) | Scrub p95(ms) | Score | Recommendation |\n"; + md += + "| Rank | Platform | GPU | Scenario | Recording | Format | FPS(min) | Startup avg(ms) | Scrub p95(ms) | Score | Recommendation |\n"; md += "|---:|---|---|---|---|---|---:|---:|---:|---:|---|\n"; issues.forEach((issue, index) => { md += `| ${index + 1} | ${issue.platform} | ${issue.gpu} | ${issue.scenario} | ${issue.recording} | ${issue.format} | ${formatValue(issue.fpsMin)} | ${formatValue(issue.startupAvg)} | ${formatValue(issue.scrubP95)} | ${formatValue(issue.score, 2)} | ${recommendation(issue, options)} |\n`; diff --git a/scripts/build-playback-matrix-report.js b/scripts/build-playback-matrix-report.js index a07b1876c5..e29c6b4621 100644 --- a/scripts/build-playback-matrix-report.js +++ b/scripts/build-playback-matrix-report.js @@ -125,7 +125,11 @@ function timestampOrEpoch(value) { function upsertLatestCell(cells, candidate) { const key = cellKey(candidate.platform, candidate.gpu, candidate.scenario); const existing = cells.get(key); - if (!existing || timestampOrEpoch(candidate.generatedAt) >= timestampOrEpoch(existing.generatedAt)) { + if ( + !existing || + timestampOrEpoch(candidate.generatedAt) >= + timestampOrEpoch(existing.generatedAt) + ) { cells.set(key, candidate); } } @@ -222,7 +226,8 @@ function buildReport(requiredCells, latestCells, formatCoverage) { markdown += "# Playback Matrix Status Report\n\n"; markdown += `Generated: ${new Date().toISOString()}\n\n`; markdown += `Rows: ${rows.length}, Missing rows: ${missingCount}, Rows with failures: ${failCount}\n\n`; - markdown += "| Platform | GPU | Full | Scrub | MP4 Seen | Fragmented Seen | Full Timestamp | Scrub Timestamp |\n"; + markdown += + "| Platform | GPU | Full | Scrub | MP4 Seen | Fragmented Seen | Full Timestamp | Scrub Timestamp |\n"; markdown += "|---|---|---|---|---|---|---|---|\n"; for (const row of rows) { markdown += `| ${row.platform} | ${row.gpu} | ${row.fullStatus} | ${row.scrubStatus} | ${row.mp4} | ${row.fragmented} | ${row.fullTime} | ${row.scrubTime} |\n`; diff --git a/scripts/run-playback-benchmark-matrix.js b/scripts/run-playback-benchmark-matrix.js index d49fec6024..626c19a7ba 100644 --- a/scripts/run-playback-benchmark-matrix.js +++ b/scripts/run-playback-benchmark-matrix.js @@ -163,9 +163,18 @@ function scenarioArgs(options, scenario) { function validateOptions(options) { if (!options.platform || !options.gpu || !options.outputDir) { - throw new Error("Missing required options: --platform, --gpu, --output-dir"); + throw new Error( + "Missing required options: --platform, --gpu, --output-dir", + ); } - const validScenarios = new Set(["full", "scrub", "decoder", "playback", "audio-sync", "camera-sync"]); + const validScenarios = new Set([ + "full", + "scrub", + "decoder", + "playback", + "audio-sync", + "camera-sync", + ]); for (const scenario of options.scenarios) { if (!validScenarios.has(scenario)) { throw new Error(`Unsupported scenario: ${scenario}`); @@ -188,7 +197,9 @@ function main() { validateOptions(options); - console.log(`Running matrix for platform=${options.platform} gpu=${options.gpu}`); + console.log( + `Running matrix for platform=${options.platform} gpu=${options.gpu}`, + ); for (const scenario of options.scenarios) { run("cargo", scenarioArgs(options, scenario)); } diff --git a/scripts/validate-playback-matrix.js b/scripts/validate-playback-matrix.js index c919369c42..b06f01e006 100644 --- a/scripts/validate-playback-matrix.js +++ b/scripts/validate-playback-matrix.js @@ -218,7 +218,10 @@ function main() { }; if (options.outputJson) { - fs.writeFileSync(options.outputJson, JSON.stringify(validationResult, null, 2)); + fs.writeFileSync( + options.outputJson, + JSON.stringify(validationResult, null, 2), + ); console.log(`Validation JSON: ${options.outputJson}`); } From baacc3981e596f9a6cb9558fad02ae77aad862fb Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Sat, 14 Feb 2026 00:02:53 +0000 Subject: [PATCH 280/333] improve: centralize change-aware frame request signaling Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 +++ crates/editor/src/playback.rs | 62 +++++++++--------------------- 2 files changed, 25 insertions(+), 43 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 7382aea874..9f45c85bf6 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -454,6 +454,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Prefetch buffer insertion now uses a single `BTreeMap::entry` match to detect insertion and store new frames. - Eliminates the prior contains-check plus entry-insert double lookup in prefetch hot path. +64. **Centralized change-aware frame request signaling (2026-02-13)** + - Playback now uses a shared helper for change-aware frame-request watch updates across warmup/seek/skip paths. + - Reduces duplicated watch-update closure logic and keeps no-op request dedupe behavior consistent in all frame-request call sites. + --- ## Root Cause Analysis Archive @@ -613,6 +617,7 @@ Decoder Pipeline: 67. Cached clip-offset lookups for prefetch and direct-decode scheduling to replace repeated linear clip scans on decode hot paths. 68. Deduplicated frame-request watch updates so unchanged frame requests no longer trigger redundant watch notifications. 69. Replaced contains+entry prefetch insertion with single-entry map insertion to remove duplicate keyed-buffer lookups in frame-insert hot path. +70. Centralized change-aware frame-request watch updates via shared helper for consistent no-op dedupe behavior across warmup/seek/skip paths. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -651,6 +656,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: prefetch and playback direct-decode paths now use cached clip-offset maps rebuilt on project updates, avoiding repeated clip list linear searches. - `crates/editor/src/playback.rs`: frame-request updates now use `watch::Sender::send_if_modified` across playback/warmup/skip paths to avoid redundant unchanged-frame notifications. - `crates/editor/src/playback.rs`: prefetch insertion now uses single `BTreeMap::entry` insertion path instead of separate contains-check + insert lookup. +- `crates/editor/src/playback.rs`: frame-request watch updates now route through shared helper to keep no-op dedupe behavior and call-site logic consistent across warmup/seek/skip paths. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 5f61d26ea7..acdd144ee2 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -190,6 +190,17 @@ fn build_clip_offsets_lookup(project: &ProjectConfiguration) -> HashMap, value: u32) { + let _ = tx.send_if_modified(|current| { + if *current == value { + false + } else { + *current = value; + true + } + }); +} + impl Playback { pub async fn start( mut self, @@ -642,15 +653,8 @@ impl Playback { first_frame_time = None; warmup_start = Instant::now(); let _ = seek_generation_tx.send(seek_generation); - let _ = frame_request_tx.send_if_modified(|requested| { - if *requested == frame_number { - false - } else { - *requested = frame_number; - true - } - }); - let _ = playback_position_tx.send(frame_number); + send_watch_u32_if_changed(&frame_request_tx, frame_number); + send_watch_u32_if_changed(&playback_position_tx, frame_number); if has_audio && audio_playhead_tx .send(frame_number as f64 / fps_f64) @@ -685,15 +689,8 @@ impl Playback { prefetch_buffer.clear(); frame_cache.clear(); let _ = seek_generation_tx.send(seek_generation); - let _ = frame_request_tx.send_if_modified(|requested| { - if *requested == frame_number { - false - } else { - *requested = frame_number; - true - } - }); - let _ = playback_position_tx.send(frame_number); + send_watch_u32_if_changed(&frame_request_tx, frame_number); + send_watch_u32_if_changed(&playback_position_tx, frame_number); if has_audio && audio_playhead_tx .send(frame_number as f64 / fps_f64) @@ -730,15 +727,8 @@ impl Playback { prefetch_buffer.clear(); frame_cache.clear(); let _ = seek_generation_tx.send(seek_generation); - let _ = frame_request_tx.send_if_modified(|requested| { - if *requested == frame_number { - false - } else { - *requested = frame_number; - true - } - }); - let _ = playback_position_tx.send(frame_number); + send_watch_u32_if_changed(&frame_request_tx, frame_number); + send_watch_u32_if_changed(&playback_position_tx, frame_number); if has_audio && audio_playhead_tx .send(frame_number as f64 / fps_f64) @@ -839,14 +829,7 @@ impl Playback { } } } else if prefetch_buffer.is_empty() && total_frames_rendered < 15 { - let _ = frame_request_tx.send_if_modified(|requested| { - if *requested == frame_number { - false - } else { - *requested = frame_number; - true - } - }); + send_watch_u32_if_changed(&frame_request_tx, frame_number); let wait_result = tokio::time::timeout(frame_fetch_timeout, prefetch_rx.recv()).await; @@ -1039,14 +1022,7 @@ impl Playback { skip_events = skip_events.saturating_add(1); prune_prefetch_buffer_before_frame(&mut prefetch_buffer, frame_number); - let _ = frame_request_tx.send_if_modified(|requested| { - if *requested == frame_number { - false - } else { - *requested = frame_number; - true - } - }); + send_watch_u32_if_changed(&frame_request_tx, frame_number); let _ = playback_position_tx.send(frame_number); if has_audio && audio_playhead_tx From 45265dfb4651a67af10c87453aecfa4875388af9 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Sat, 14 Feb 2026 00:02:53 +0000 Subject: [PATCH 281/333] improve: centralize change-aware frame request signaling Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 +++ crates/editor/src/playback.rs | 62 +++++++++--------------------- 2 files changed, 25 insertions(+), 43 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 7382aea874..9f45c85bf6 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -454,6 +454,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Prefetch buffer insertion now uses a single `BTreeMap::entry` match to detect insertion and store new frames. - Eliminates the prior contains-check plus entry-insert double lookup in prefetch hot path. +64. **Centralized change-aware frame request signaling (2026-02-13)** + - Playback now uses a shared helper for change-aware frame-request watch updates across warmup/seek/skip paths. + - Reduces duplicated watch-update closure logic and keeps no-op request dedupe behavior consistent in all frame-request call sites. + --- ## Root Cause Analysis Archive @@ -613,6 +617,7 @@ Decoder Pipeline: 67. Cached clip-offset lookups for prefetch and direct-decode scheduling to replace repeated linear clip scans on decode hot paths. 68. Deduplicated frame-request watch updates so unchanged frame requests no longer trigger redundant watch notifications. 69. Replaced contains+entry prefetch insertion with single-entry map insertion to remove duplicate keyed-buffer lookups in frame-insert hot path. +70. Centralized change-aware frame-request watch updates via shared helper for consistent no-op dedupe behavior across warmup/seek/skip paths. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -651,6 +656,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: prefetch and playback direct-decode paths now use cached clip-offset maps rebuilt on project updates, avoiding repeated clip list linear searches. - `crates/editor/src/playback.rs`: frame-request updates now use `watch::Sender::send_if_modified` across playback/warmup/skip paths to avoid redundant unchanged-frame notifications. - `crates/editor/src/playback.rs`: prefetch insertion now uses single `BTreeMap::entry` insertion path instead of separate contains-check + insert lookup. +- `crates/editor/src/playback.rs`: frame-request watch updates now route through shared helper to keep no-op dedupe behavior and call-site logic consistent across warmup/seek/skip paths. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 5f61d26ea7..acdd144ee2 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -190,6 +190,17 @@ fn build_clip_offsets_lookup(project: &ProjectConfiguration) -> HashMap, value: u32) { + let _ = tx.send_if_modified(|current| { + if *current == value { + false + } else { + *current = value; + true + } + }); +} + impl Playback { pub async fn start( mut self, @@ -642,15 +653,8 @@ impl Playback { first_frame_time = None; warmup_start = Instant::now(); let _ = seek_generation_tx.send(seek_generation); - let _ = frame_request_tx.send_if_modified(|requested| { - if *requested == frame_number { - false - } else { - *requested = frame_number; - true - } - }); - let _ = playback_position_tx.send(frame_number); + send_watch_u32_if_changed(&frame_request_tx, frame_number); + send_watch_u32_if_changed(&playback_position_tx, frame_number); if has_audio && audio_playhead_tx .send(frame_number as f64 / fps_f64) @@ -685,15 +689,8 @@ impl Playback { prefetch_buffer.clear(); frame_cache.clear(); let _ = seek_generation_tx.send(seek_generation); - let _ = frame_request_tx.send_if_modified(|requested| { - if *requested == frame_number { - false - } else { - *requested = frame_number; - true - } - }); - let _ = playback_position_tx.send(frame_number); + send_watch_u32_if_changed(&frame_request_tx, frame_number); + send_watch_u32_if_changed(&playback_position_tx, frame_number); if has_audio && audio_playhead_tx .send(frame_number as f64 / fps_f64) @@ -730,15 +727,8 @@ impl Playback { prefetch_buffer.clear(); frame_cache.clear(); let _ = seek_generation_tx.send(seek_generation); - let _ = frame_request_tx.send_if_modified(|requested| { - if *requested == frame_number { - false - } else { - *requested = frame_number; - true - } - }); - let _ = playback_position_tx.send(frame_number); + send_watch_u32_if_changed(&frame_request_tx, frame_number); + send_watch_u32_if_changed(&playback_position_tx, frame_number); if has_audio && audio_playhead_tx .send(frame_number as f64 / fps_f64) @@ -839,14 +829,7 @@ impl Playback { } } } else if prefetch_buffer.is_empty() && total_frames_rendered < 15 { - let _ = frame_request_tx.send_if_modified(|requested| { - if *requested == frame_number { - false - } else { - *requested = frame_number; - true - } - }); + send_watch_u32_if_changed(&frame_request_tx, frame_number); let wait_result = tokio::time::timeout(frame_fetch_timeout, prefetch_rx.recv()).await; @@ -1039,14 +1022,7 @@ impl Playback { skip_events = skip_events.saturating_add(1); prune_prefetch_buffer_before_frame(&mut prefetch_buffer, frame_number); - let _ = frame_request_tx.send_if_modified(|requested| { - if *requested == frame_number { - false - } else { - *requested = frame_number; - true - } - }); + send_watch_u32_if_changed(&frame_request_tx, frame_number); let _ = playback_position_tx.send(frame_number); if has_audio && audio_playhead_tx From a8abcf57ff1dc3fee6367c5a522ce78e36650159 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Sat, 14 Feb 2026 00:02:53 +0000 Subject: [PATCH 282/333] improve: centralize change-aware frame request signaling Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 +++ crates/editor/src/playback.rs | 62 +++++++++--------------------- 2 files changed, 25 insertions(+), 43 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 7382aea874..9f45c85bf6 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -454,6 +454,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Prefetch buffer insertion now uses a single `BTreeMap::entry` match to detect insertion and store new frames. - Eliminates the prior contains-check plus entry-insert double lookup in prefetch hot path. +64. **Centralized change-aware frame request signaling (2026-02-13)** + - Playback now uses a shared helper for change-aware frame-request watch updates across warmup/seek/skip paths. + - Reduces duplicated watch-update closure logic and keeps no-op request dedupe behavior consistent in all frame-request call sites. + --- ## Root Cause Analysis Archive @@ -613,6 +617,7 @@ Decoder Pipeline: 67. Cached clip-offset lookups for prefetch and direct-decode scheduling to replace repeated linear clip scans on decode hot paths. 68. Deduplicated frame-request watch updates so unchanged frame requests no longer trigger redundant watch notifications. 69. Replaced contains+entry prefetch insertion with single-entry map insertion to remove duplicate keyed-buffer lookups in frame-insert hot path. +70. Centralized change-aware frame-request watch updates via shared helper for consistent no-op dedupe behavior across warmup/seek/skip paths. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -651,6 +656,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: prefetch and playback direct-decode paths now use cached clip-offset maps rebuilt on project updates, avoiding repeated clip list linear searches. - `crates/editor/src/playback.rs`: frame-request updates now use `watch::Sender::send_if_modified` across playback/warmup/skip paths to avoid redundant unchanged-frame notifications. - `crates/editor/src/playback.rs`: prefetch insertion now uses single `BTreeMap::entry` insertion path instead of separate contains-check + insert lookup. +- `crates/editor/src/playback.rs`: frame-request watch updates now route through shared helper to keep no-op dedupe behavior and call-site logic consistent across warmup/seek/skip paths. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 5f61d26ea7..acdd144ee2 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -190,6 +190,17 @@ fn build_clip_offsets_lookup(project: &ProjectConfiguration) -> HashMap, value: u32) { + let _ = tx.send_if_modified(|current| { + if *current == value { + false + } else { + *current = value; + true + } + }); +} + impl Playback { pub async fn start( mut self, @@ -642,15 +653,8 @@ impl Playback { first_frame_time = None; warmup_start = Instant::now(); let _ = seek_generation_tx.send(seek_generation); - let _ = frame_request_tx.send_if_modified(|requested| { - if *requested == frame_number { - false - } else { - *requested = frame_number; - true - } - }); - let _ = playback_position_tx.send(frame_number); + send_watch_u32_if_changed(&frame_request_tx, frame_number); + send_watch_u32_if_changed(&playback_position_tx, frame_number); if has_audio && audio_playhead_tx .send(frame_number as f64 / fps_f64) @@ -685,15 +689,8 @@ impl Playback { prefetch_buffer.clear(); frame_cache.clear(); let _ = seek_generation_tx.send(seek_generation); - let _ = frame_request_tx.send_if_modified(|requested| { - if *requested == frame_number { - false - } else { - *requested = frame_number; - true - } - }); - let _ = playback_position_tx.send(frame_number); + send_watch_u32_if_changed(&frame_request_tx, frame_number); + send_watch_u32_if_changed(&playback_position_tx, frame_number); if has_audio && audio_playhead_tx .send(frame_number as f64 / fps_f64) @@ -730,15 +727,8 @@ impl Playback { prefetch_buffer.clear(); frame_cache.clear(); let _ = seek_generation_tx.send(seek_generation); - let _ = frame_request_tx.send_if_modified(|requested| { - if *requested == frame_number { - false - } else { - *requested = frame_number; - true - } - }); - let _ = playback_position_tx.send(frame_number); + send_watch_u32_if_changed(&frame_request_tx, frame_number); + send_watch_u32_if_changed(&playback_position_tx, frame_number); if has_audio && audio_playhead_tx .send(frame_number as f64 / fps_f64) @@ -839,14 +829,7 @@ impl Playback { } } } else if prefetch_buffer.is_empty() && total_frames_rendered < 15 { - let _ = frame_request_tx.send_if_modified(|requested| { - if *requested == frame_number { - false - } else { - *requested = frame_number; - true - } - }); + send_watch_u32_if_changed(&frame_request_tx, frame_number); let wait_result = tokio::time::timeout(frame_fetch_timeout, prefetch_rx.recv()).await; @@ -1039,14 +1022,7 @@ impl Playback { skip_events = skip_events.saturating_add(1); prune_prefetch_buffer_before_frame(&mut prefetch_buffer, frame_number); - let _ = frame_request_tx.send_if_modified(|requested| { - if *requested == frame_number { - false - } else { - *requested = frame_number; - true - } - }); + send_watch_u32_if_changed(&frame_request_tx, frame_number); let _ = playback_position_tx.send(frame_number); if has_audio && audio_playhead_tx From 4a94a8d7b9deddf8f08841684af85d4964cbaeae Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Sat, 14 Feb 2026 00:03:59 +0000 Subject: [PATCH 283/333] improve: short-circuit frame waits when seeks are pending Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 11 +++++++++++ 2 files changed, 17 insertions(+) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 9f45c85bf6..6196689569 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -458,6 +458,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Playback now uses a shared helper for change-aware frame-request watch updates across warmup/seek/skip paths. - Reduces duplicated watch-update closure logic and keeps no-op request dedupe behavior consistent in all frame-request call sites. +65. **Short-circuited frame waits when seek updates are pending (2026-02-13)** + - In-flight frame wait polling now exits early when a seek change is observed, instead of waiting through full wait budgets. + - Startup prefetch wait path now also bails out immediately when seek state changes during wait. + --- ## Root Cause Analysis Archive @@ -618,6 +622,7 @@ Decoder Pipeline: 68. Deduplicated frame-request watch updates so unchanged frame requests no longer trigger redundant watch notifications. 69. Replaced contains+entry prefetch insertion with single-entry map insertion to remove duplicate keyed-buffer lookups in frame-insert hot path. 70. Centralized change-aware frame-request watch updates via shared helper for consistent no-op dedupe behavior across warmup/seek/skip paths. +71. Added seek-aware short-circuiting in in-flight/startup frame wait paths to avoid waiting full frame-fetch budgets when seek updates arrive. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -657,6 +662,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: frame-request updates now use `watch::Sender::send_if_modified` across playback/warmup/skip paths to avoid redundant unchanged-frame notifications. - `crates/editor/src/playback.rs`: prefetch insertion now uses single `BTreeMap::entry` insertion path instead of separate contains-check + insert lookup. - `crates/editor/src/playback.rs`: frame-request watch updates now route through shared helper to keep no-op dedupe behavior and call-site logic consistent across warmup/seek/skip paths. +- `crates/editor/src/playback.rs`: in-flight and startup frame wait paths now short-circuit when seek updates are pending to improve scrub responsiveness under wait pressure. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index acdd144ee2..ee44267f9c 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -796,6 +796,9 @@ impl Playback { } } _ = tokio::time::sleep(in_flight_poll_interval) => { + if seek_rx.has_changed().unwrap_or(false) { + break; + } let still_in_flight = playback_prefetch_in_flight .read() .map(|guard| guard.contains(&in_flight_key)) @@ -811,6 +814,10 @@ impl Playback { } } + if seek_rx.has_changed().unwrap_or(false) { + continue; + } + if let Some(prefetched) = found_frame { Some(( Arc::new(prefetched.segment_frames), @@ -834,6 +841,10 @@ impl Playback { let wait_result = tokio::time::timeout(frame_fetch_timeout, prefetch_rx.recv()).await; + if seek_rx.has_changed().unwrap_or(false) { + continue; + } + if let Ok(Some(prefetched)) = wait_result { if prefetched.generation != seek_generation { frame_number = frame_number.saturating_add(1); From 366463667d7c5f6cc7bbb06636120e1552541608 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Sat, 14 Feb 2026 00:03:59 +0000 Subject: [PATCH 284/333] improve: short-circuit frame waits when seeks are pending Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 11 +++++++++++ 2 files changed, 17 insertions(+) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 9f45c85bf6..6196689569 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -458,6 +458,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Playback now uses a shared helper for change-aware frame-request watch updates across warmup/seek/skip paths. - Reduces duplicated watch-update closure logic and keeps no-op request dedupe behavior consistent in all frame-request call sites. +65. **Short-circuited frame waits when seek updates are pending (2026-02-13)** + - In-flight frame wait polling now exits early when a seek change is observed, instead of waiting through full wait budgets. + - Startup prefetch wait path now also bails out immediately when seek state changes during wait. + --- ## Root Cause Analysis Archive @@ -618,6 +622,7 @@ Decoder Pipeline: 68. Deduplicated frame-request watch updates so unchanged frame requests no longer trigger redundant watch notifications. 69. Replaced contains+entry prefetch insertion with single-entry map insertion to remove duplicate keyed-buffer lookups in frame-insert hot path. 70. Centralized change-aware frame-request watch updates via shared helper for consistent no-op dedupe behavior across warmup/seek/skip paths. +71. Added seek-aware short-circuiting in in-flight/startup frame wait paths to avoid waiting full frame-fetch budgets when seek updates arrive. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -657,6 +662,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: frame-request updates now use `watch::Sender::send_if_modified` across playback/warmup/skip paths to avoid redundant unchanged-frame notifications. - `crates/editor/src/playback.rs`: prefetch insertion now uses single `BTreeMap::entry` insertion path instead of separate contains-check + insert lookup. - `crates/editor/src/playback.rs`: frame-request watch updates now route through shared helper to keep no-op dedupe behavior and call-site logic consistent across warmup/seek/skip paths. +- `crates/editor/src/playback.rs`: in-flight and startup frame wait paths now short-circuit when seek updates are pending to improve scrub responsiveness under wait pressure. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index acdd144ee2..ee44267f9c 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -796,6 +796,9 @@ impl Playback { } } _ = tokio::time::sleep(in_flight_poll_interval) => { + if seek_rx.has_changed().unwrap_or(false) { + break; + } let still_in_flight = playback_prefetch_in_flight .read() .map(|guard| guard.contains(&in_flight_key)) @@ -811,6 +814,10 @@ impl Playback { } } + if seek_rx.has_changed().unwrap_or(false) { + continue; + } + if let Some(prefetched) = found_frame { Some(( Arc::new(prefetched.segment_frames), @@ -834,6 +841,10 @@ impl Playback { let wait_result = tokio::time::timeout(frame_fetch_timeout, prefetch_rx.recv()).await; + if seek_rx.has_changed().unwrap_or(false) { + continue; + } + if let Ok(Some(prefetched)) = wait_result { if prefetched.generation != seek_generation { frame_number = frame_number.saturating_add(1); From 12a4a2f799041a56d3dd7104136c6c89e436ab96 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Sat, 14 Feb 2026 00:03:59 +0000 Subject: [PATCH 285/333] improve: short-circuit frame waits when seeks are pending Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 11 +++++++++++ 2 files changed, 17 insertions(+) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 9f45c85bf6..6196689569 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -458,6 +458,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Playback now uses a shared helper for change-aware frame-request watch updates across warmup/seek/skip paths. - Reduces duplicated watch-update closure logic and keeps no-op request dedupe behavior consistent in all frame-request call sites. +65. **Short-circuited frame waits when seek updates are pending (2026-02-13)** + - In-flight frame wait polling now exits early when a seek change is observed, instead of waiting through full wait budgets. + - Startup prefetch wait path now also bails out immediately when seek state changes during wait. + --- ## Root Cause Analysis Archive @@ -618,6 +622,7 @@ Decoder Pipeline: 68. Deduplicated frame-request watch updates so unchanged frame requests no longer trigger redundant watch notifications. 69. Replaced contains+entry prefetch insertion with single-entry map insertion to remove duplicate keyed-buffer lookups in frame-insert hot path. 70. Centralized change-aware frame-request watch updates via shared helper for consistent no-op dedupe behavior across warmup/seek/skip paths. +71. Added seek-aware short-circuiting in in-flight/startup frame wait paths to avoid waiting full frame-fetch budgets when seek updates arrive. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -657,6 +662,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: frame-request updates now use `watch::Sender::send_if_modified` across playback/warmup/skip paths to avoid redundant unchanged-frame notifications. - `crates/editor/src/playback.rs`: prefetch insertion now uses single `BTreeMap::entry` insertion path instead of separate contains-check + insert lookup. - `crates/editor/src/playback.rs`: frame-request watch updates now route through shared helper to keep no-op dedupe behavior and call-site logic consistent across warmup/seek/skip paths. +- `crates/editor/src/playback.rs`: in-flight and startup frame wait paths now short-circuit when seek updates are pending to improve scrub responsiveness under wait pressure. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index acdd144ee2..ee44267f9c 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -796,6 +796,9 @@ impl Playback { } } _ = tokio::time::sleep(in_flight_poll_interval) => { + if seek_rx.has_changed().unwrap_or(false) { + break; + } let still_in_flight = playback_prefetch_in_flight .read() .map(|guard| guard.contains(&in_flight_key)) @@ -811,6 +814,10 @@ impl Playback { } } + if seek_rx.has_changed().unwrap_or(false) { + continue; + } + if let Some(prefetched) = found_frame { Some(( Arc::new(prefetched.segment_frames), @@ -834,6 +841,10 @@ impl Playback { let wait_result = tokio::time::timeout(frame_fetch_timeout, prefetch_rx.recv()).await; + if seek_rx.has_changed().unwrap_or(false) { + continue; + } + if let Ok(Some(prefetched)) = wait_result { if prefetched.generation != seek_generation { frame_number = frame_number.saturating_add(1); From a846a894897486effecbf7d068cb56329c605179 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Sat, 14 Feb 2026 00:07:32 +0000 Subject: [PATCH 286/333] improve: skip stale waits and decode fallback on pending seeks Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 11 +++++++++++ 2 files changed, 17 insertions(+) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 6196689569..34c85a4c20 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -462,6 +462,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - In-flight frame wait polling now exits early when a seek change is observed, instead of waiting through full wait budgets. - Startup prefetch wait path now also bails out immediately when seek state changes during wait. +66. **Added pre-wait seek guards before startup and direct decode waits (2026-02-13)** + - Startup prefetch wait path now checks for pending seeks before entering timeout waits and before skip fallback on timeout. + - Direct decode fallback path now checks for pending seek updates before scheduling synchronous decode work. + --- ## Root Cause Analysis Archive @@ -623,6 +627,7 @@ Decoder Pipeline: 69. Replaced contains+entry prefetch insertion with single-entry map insertion to remove duplicate keyed-buffer lookups in frame-insert hot path. 70. Centralized change-aware frame-request watch updates via shared helper for consistent no-op dedupe behavior across warmup/seek/skip paths. 71. Added seek-aware short-circuiting in in-flight/startup frame wait paths to avoid waiting full frame-fetch budgets when seek updates arrive. +72. Added pre-wait seek guards in startup/direct-decode fallback paths so pending seeks skip timeout waits and avoid stale synchronous decode work. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -663,6 +668,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: prefetch insertion now uses single `BTreeMap::entry` insertion path instead of separate contains-check + insert lookup. - `crates/editor/src/playback.rs`: frame-request watch updates now route through shared helper to keep no-op dedupe behavior and call-site logic consistent across warmup/seek/skip paths. - `crates/editor/src/playback.rs`: in-flight and startup frame wait paths now short-circuit when seek updates are pending to improve scrub responsiveness under wait pressure. +- `crates/editor/src/playback.rs`: startup prefetch wait and direct-decode fallback paths now pre-check seek updates before waiting/synchronous decode scheduling to skip stale work under active seeks. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index ee44267f9c..b1b4466f1c 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -836,6 +836,10 @@ impl Playback { } } } else if prefetch_buffer.is_empty() && total_frames_rendered < 15 { + if seek_rx.has_changed().unwrap_or(false) { + continue; + } + send_watch_u32_if_changed(&frame_request_tx, frame_number); let wait_result = @@ -867,11 +871,18 @@ impl Playback { continue; } } else { + if seek_rx.has_changed().unwrap_or(false) { + continue; + } frame_number = frame_number.saturating_add(1); total_frames_skipped += 1; continue; } } else { + if seek_rx.has_changed().unwrap_or(false) { + continue; + } + let Some((segment_time, segment)) = cached_project.get_segment_time(playback_time) else { From e8a0910a48bad230ba6e0d8ed4e1dcc69c16cc2f Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Sat, 14 Feb 2026 00:07:32 +0000 Subject: [PATCH 287/333] improve: skip stale waits and decode fallback on pending seeks Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 11 +++++++++++ 2 files changed, 17 insertions(+) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 6196689569..34c85a4c20 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -462,6 +462,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - In-flight frame wait polling now exits early when a seek change is observed, instead of waiting through full wait budgets. - Startup prefetch wait path now also bails out immediately when seek state changes during wait. +66. **Added pre-wait seek guards before startup and direct decode waits (2026-02-13)** + - Startup prefetch wait path now checks for pending seeks before entering timeout waits and before skip fallback on timeout. + - Direct decode fallback path now checks for pending seek updates before scheduling synchronous decode work. + --- ## Root Cause Analysis Archive @@ -623,6 +627,7 @@ Decoder Pipeline: 69. Replaced contains+entry prefetch insertion with single-entry map insertion to remove duplicate keyed-buffer lookups in frame-insert hot path. 70. Centralized change-aware frame-request watch updates via shared helper for consistent no-op dedupe behavior across warmup/seek/skip paths. 71. Added seek-aware short-circuiting in in-flight/startup frame wait paths to avoid waiting full frame-fetch budgets when seek updates arrive. +72. Added pre-wait seek guards in startup/direct-decode fallback paths so pending seeks skip timeout waits and avoid stale synchronous decode work. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -663,6 +668,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: prefetch insertion now uses single `BTreeMap::entry` insertion path instead of separate contains-check + insert lookup. - `crates/editor/src/playback.rs`: frame-request watch updates now route through shared helper to keep no-op dedupe behavior and call-site logic consistent across warmup/seek/skip paths. - `crates/editor/src/playback.rs`: in-flight and startup frame wait paths now short-circuit when seek updates are pending to improve scrub responsiveness under wait pressure. +- `crates/editor/src/playback.rs`: startup prefetch wait and direct-decode fallback paths now pre-check seek updates before waiting/synchronous decode scheduling to skip stale work under active seeks. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index ee44267f9c..b1b4466f1c 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -836,6 +836,10 @@ impl Playback { } } } else if prefetch_buffer.is_empty() && total_frames_rendered < 15 { + if seek_rx.has_changed().unwrap_or(false) { + continue; + } + send_watch_u32_if_changed(&frame_request_tx, frame_number); let wait_result = @@ -867,11 +871,18 @@ impl Playback { continue; } } else { + if seek_rx.has_changed().unwrap_or(false) { + continue; + } frame_number = frame_number.saturating_add(1); total_frames_skipped += 1; continue; } } else { + if seek_rx.has_changed().unwrap_or(false) { + continue; + } + let Some((segment_time, segment)) = cached_project.get_segment_time(playback_time) else { From 8f2d0e5c7c6333f65ea47aeed64c820e5edb5f37 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Sat, 14 Feb 2026 00:07:32 +0000 Subject: [PATCH 288/333] improve: skip stale waits and decode fallback on pending seeks Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 11 +++++++++++ 2 files changed, 17 insertions(+) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 6196689569..34c85a4c20 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -462,6 +462,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - In-flight frame wait polling now exits early when a seek change is observed, instead of waiting through full wait budgets. - Startup prefetch wait path now also bails out immediately when seek state changes during wait. +66. **Added pre-wait seek guards before startup and direct decode waits (2026-02-13)** + - Startup prefetch wait path now checks for pending seeks before entering timeout waits and before skip fallback on timeout. + - Direct decode fallback path now checks for pending seek updates before scheduling synchronous decode work. + --- ## Root Cause Analysis Archive @@ -623,6 +627,7 @@ Decoder Pipeline: 69. Replaced contains+entry prefetch insertion with single-entry map insertion to remove duplicate keyed-buffer lookups in frame-insert hot path. 70. Centralized change-aware frame-request watch updates via shared helper for consistent no-op dedupe behavior across warmup/seek/skip paths. 71. Added seek-aware short-circuiting in in-flight/startup frame wait paths to avoid waiting full frame-fetch budgets when seek updates arrive. +72. Added pre-wait seek guards in startup/direct-decode fallback paths so pending seeks skip timeout waits and avoid stale synchronous decode work. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -663,6 +668,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: prefetch insertion now uses single `BTreeMap::entry` insertion path instead of separate contains-check + insert lookup. - `crates/editor/src/playback.rs`: frame-request watch updates now route through shared helper to keep no-op dedupe behavior and call-site logic consistent across warmup/seek/skip paths. - `crates/editor/src/playback.rs`: in-flight and startup frame wait paths now short-circuit when seek updates are pending to improve scrub responsiveness under wait pressure. +- `crates/editor/src/playback.rs`: startup prefetch wait and direct-decode fallback paths now pre-check seek updates before waiting/synchronous decode scheduling to skip stale work under active seeks. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index ee44267f9c..b1b4466f1c 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -836,6 +836,10 @@ impl Playback { } } } else if prefetch_buffer.is_empty() && total_frames_rendered < 15 { + if seek_rx.has_changed().unwrap_or(false) { + continue; + } + send_watch_u32_if_changed(&frame_request_tx, frame_number); let wait_result = @@ -867,11 +871,18 @@ impl Playback { continue; } } else { + if seek_rx.has_changed().unwrap_or(false) { + continue; + } frame_number = frame_number.saturating_add(1); total_frames_skipped += 1; continue; } } else { + if seek_rx.has_changed().unwrap_or(false) { + continue; + } + let Some((segment_time, segment)) = cached_project.get_segment_time(playback_time) else { From 55df9a520d9eee8742ff7f476364a2a37ab77425 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Sat, 14 Feb 2026 00:08:45 +0000 Subject: [PATCH 289/333] improve: batch prefetch buffer trims during queue drains Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 +++++ crates/editor/src/playback.rs | 39 +++++++++++++++++++++++++----- 2 files changed, 39 insertions(+), 6 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 34c85a4c20..128b67aff3 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -466,6 +466,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Startup prefetch wait path now checks for pending seeks before entering timeout waits and before skip fallback on timeout. - Direct decode fallback path now checks for pending seek updates before scheduling synchronous decode work. +67. **Batched keyed-buffer trims during queue-drain insertion (2026-02-13)** + - Warmup and playback queue-drain paths now insert prefetched frames without per-item trim checks and apply one bounded trim pass after the batch. + - Reduces repeated trim work when multiple prefetched frames are drained in the same loop iteration. + --- ## Root Cause Analysis Archive @@ -628,6 +632,7 @@ Decoder Pipeline: 70. Centralized change-aware frame-request watch updates via shared helper for consistent no-op dedupe behavior across warmup/seek/skip paths. 71. Added seek-aware short-circuiting in in-flight/startup frame wait paths to avoid waiting full frame-fetch budgets when seek updates arrive. 72. Added pre-wait seek guards in startup/direct-decode fallback paths so pending seeks skip timeout waits and avoid stale synchronous decode work. +73. Batched keyed-buffer trimming during warmup/playback prefetch queue drains to avoid per-insert trim checks under burst frame arrivals. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -669,6 +674,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: frame-request watch updates now route through shared helper to keep no-op dedupe behavior and call-site logic consistent across warmup/seek/skip paths. - `crates/editor/src/playback.rs`: in-flight and startup frame wait paths now short-circuit when seek updates are pending to improve scrub responsiveness under wait pressure. - `crates/editor/src/playback.rs`: startup prefetch wait and direct-decode fallback paths now pre-check seek updates before waiting/synchronous decode scheduling to skip stale work under active seeks. +- `crates/editor/src/playback.rs`: warmup and playback queue-drain insertion paths now perform untrimmed batch insertions and run a single keyed-buffer trim pass after each drain batch. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index b1b4466f1c..b305d8c2f2 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -133,6 +133,16 @@ fn insert_prefetched_frame( buffer: &mut BTreeMap, prefetched: PrefetchedFrame, current_frame: u32, +) -> bool { + let inserted_new = insert_prefetched_frame_untrimmed(buffer, prefetched, current_frame); + let trimmed = trim_prefetch_buffer(buffer, current_frame); + inserted_new || trimmed +} + +fn insert_prefetched_frame_untrimmed( + buffer: &mut BTreeMap, + prefetched: PrefetchedFrame, + current_frame: u32, ) -> bool { if prefetched.frame_number < current_frame { return false; @@ -146,8 +156,7 @@ fn insert_prefetched_frame( } std::collections::btree_map::Entry::Occupied(_) => false, }; - let trimmed = trim_prefetch_buffer(buffer, current_frame); - inserted_new || trimmed + inserted_new } fn prune_prefetch_buffer_before_frame( @@ -619,6 +628,7 @@ impl Playback { tokio::select! { Some(prefetched) = prefetch_rx.recv() => { let mut next_prefetched = Some(prefetched); + let mut prefetched_batch_changed = false; loop { let Some(prefetched) = next_prefetched.take() else { @@ -626,18 +636,26 @@ impl Playback { }; if prefetched.generation == seek_generation - && insert_prefetched_frame( + && insert_prefetched_frame_untrimmed( &mut prefetch_buffer, prefetched, frame_number, ) { - warmup_buffer_changed = true; + prefetched_batch_changed = true; } next_prefetched = prefetch_rx.try_recv().ok(); } + if trim_prefetch_buffer(&mut prefetch_buffer, frame_number) { + prefetched_batch_changed = true; + } + + if prefetched_batch_changed { + warmup_buffer_changed = true; + } + if first_frame_time.is_none() && !prefetch_buffer.is_empty() { first_frame_time = Some(Instant::now()); } @@ -704,12 +722,21 @@ impl Playback { cached_project = self.project.borrow_and_update().clone(); playback_clip_offsets = build_clip_offsets_lookup(&cached_project); } + let mut drained_prefetch_changed = false; while let Ok(prefetched) = prefetch_rx.try_recv() { if prefetched.generation == seek_generation { - let _ = - insert_prefetched_frame(&mut prefetch_buffer, prefetched, frame_number); + if insert_prefetched_frame_untrimmed( + &mut prefetch_buffer, + prefetched, + frame_number, + ) { + drained_prefetch_changed = true; + } } } + if drained_prefetch_changed { + let _ = trim_prefetch_buffer(&mut prefetch_buffer, frame_number); + } prune_prefetch_buffer_before_frame(&mut prefetch_buffer, frame_number); let frame_offset = frame_number.saturating_sub(playback_anchor_frame) as f64; From f87f62e590d70f10e91fa21dd88a45b0926653ea Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Sat, 14 Feb 2026 00:08:45 +0000 Subject: [PATCH 290/333] improve: batch prefetch buffer trims during queue drains Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 +++++ crates/editor/src/playback.rs | 39 +++++++++++++++++++++++++----- 2 files changed, 39 insertions(+), 6 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 34c85a4c20..128b67aff3 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -466,6 +466,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Startup prefetch wait path now checks for pending seeks before entering timeout waits and before skip fallback on timeout. - Direct decode fallback path now checks for pending seek updates before scheduling synchronous decode work. +67. **Batched keyed-buffer trims during queue-drain insertion (2026-02-13)** + - Warmup and playback queue-drain paths now insert prefetched frames without per-item trim checks and apply one bounded trim pass after the batch. + - Reduces repeated trim work when multiple prefetched frames are drained in the same loop iteration. + --- ## Root Cause Analysis Archive @@ -628,6 +632,7 @@ Decoder Pipeline: 70. Centralized change-aware frame-request watch updates via shared helper for consistent no-op dedupe behavior across warmup/seek/skip paths. 71. Added seek-aware short-circuiting in in-flight/startup frame wait paths to avoid waiting full frame-fetch budgets when seek updates arrive. 72. Added pre-wait seek guards in startup/direct-decode fallback paths so pending seeks skip timeout waits and avoid stale synchronous decode work. +73. Batched keyed-buffer trimming during warmup/playback prefetch queue drains to avoid per-insert trim checks under burst frame arrivals. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -669,6 +674,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: frame-request watch updates now route through shared helper to keep no-op dedupe behavior and call-site logic consistent across warmup/seek/skip paths. - `crates/editor/src/playback.rs`: in-flight and startup frame wait paths now short-circuit when seek updates are pending to improve scrub responsiveness under wait pressure. - `crates/editor/src/playback.rs`: startup prefetch wait and direct-decode fallback paths now pre-check seek updates before waiting/synchronous decode scheduling to skip stale work under active seeks. +- `crates/editor/src/playback.rs`: warmup and playback queue-drain insertion paths now perform untrimmed batch insertions and run a single keyed-buffer trim pass after each drain batch. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index b1b4466f1c..b305d8c2f2 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -133,6 +133,16 @@ fn insert_prefetched_frame( buffer: &mut BTreeMap, prefetched: PrefetchedFrame, current_frame: u32, +) -> bool { + let inserted_new = insert_prefetched_frame_untrimmed(buffer, prefetched, current_frame); + let trimmed = trim_prefetch_buffer(buffer, current_frame); + inserted_new || trimmed +} + +fn insert_prefetched_frame_untrimmed( + buffer: &mut BTreeMap, + prefetched: PrefetchedFrame, + current_frame: u32, ) -> bool { if prefetched.frame_number < current_frame { return false; @@ -146,8 +156,7 @@ fn insert_prefetched_frame( } std::collections::btree_map::Entry::Occupied(_) => false, }; - let trimmed = trim_prefetch_buffer(buffer, current_frame); - inserted_new || trimmed + inserted_new } fn prune_prefetch_buffer_before_frame( @@ -619,6 +628,7 @@ impl Playback { tokio::select! { Some(prefetched) = prefetch_rx.recv() => { let mut next_prefetched = Some(prefetched); + let mut prefetched_batch_changed = false; loop { let Some(prefetched) = next_prefetched.take() else { @@ -626,18 +636,26 @@ impl Playback { }; if prefetched.generation == seek_generation - && insert_prefetched_frame( + && insert_prefetched_frame_untrimmed( &mut prefetch_buffer, prefetched, frame_number, ) { - warmup_buffer_changed = true; + prefetched_batch_changed = true; } next_prefetched = prefetch_rx.try_recv().ok(); } + if trim_prefetch_buffer(&mut prefetch_buffer, frame_number) { + prefetched_batch_changed = true; + } + + if prefetched_batch_changed { + warmup_buffer_changed = true; + } + if first_frame_time.is_none() && !prefetch_buffer.is_empty() { first_frame_time = Some(Instant::now()); } @@ -704,12 +722,21 @@ impl Playback { cached_project = self.project.borrow_and_update().clone(); playback_clip_offsets = build_clip_offsets_lookup(&cached_project); } + let mut drained_prefetch_changed = false; while let Ok(prefetched) = prefetch_rx.try_recv() { if prefetched.generation == seek_generation { - let _ = - insert_prefetched_frame(&mut prefetch_buffer, prefetched, frame_number); + if insert_prefetched_frame_untrimmed( + &mut prefetch_buffer, + prefetched, + frame_number, + ) { + drained_prefetch_changed = true; + } } } + if drained_prefetch_changed { + let _ = trim_prefetch_buffer(&mut prefetch_buffer, frame_number); + } prune_prefetch_buffer_before_frame(&mut prefetch_buffer, frame_number); let frame_offset = frame_number.saturating_sub(playback_anchor_frame) as f64; From 0ddd232ed700b10ae21d4140f9d11885ee8830b1 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Sat, 14 Feb 2026 00:08:45 +0000 Subject: [PATCH 291/333] improve: batch prefetch buffer trims during queue drains Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 +++++ crates/editor/src/playback.rs | 39 +++++++++++++++++++++++++----- 2 files changed, 39 insertions(+), 6 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 34c85a4c20..128b67aff3 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -466,6 +466,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Startup prefetch wait path now checks for pending seeks before entering timeout waits and before skip fallback on timeout. - Direct decode fallback path now checks for pending seek updates before scheduling synchronous decode work. +67. **Batched keyed-buffer trims during queue-drain insertion (2026-02-13)** + - Warmup and playback queue-drain paths now insert prefetched frames without per-item trim checks and apply one bounded trim pass after the batch. + - Reduces repeated trim work when multiple prefetched frames are drained in the same loop iteration. + --- ## Root Cause Analysis Archive @@ -628,6 +632,7 @@ Decoder Pipeline: 70. Centralized change-aware frame-request watch updates via shared helper for consistent no-op dedupe behavior across warmup/seek/skip paths. 71. Added seek-aware short-circuiting in in-flight/startup frame wait paths to avoid waiting full frame-fetch budgets when seek updates arrive. 72. Added pre-wait seek guards in startup/direct-decode fallback paths so pending seeks skip timeout waits and avoid stale synchronous decode work. +73. Batched keyed-buffer trimming during warmup/playback prefetch queue drains to avoid per-insert trim checks under burst frame arrivals. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -669,6 +674,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: frame-request watch updates now route through shared helper to keep no-op dedupe behavior and call-site logic consistent across warmup/seek/skip paths. - `crates/editor/src/playback.rs`: in-flight and startup frame wait paths now short-circuit when seek updates are pending to improve scrub responsiveness under wait pressure. - `crates/editor/src/playback.rs`: startup prefetch wait and direct-decode fallback paths now pre-check seek updates before waiting/synchronous decode scheduling to skip stale work under active seeks. +- `crates/editor/src/playback.rs`: warmup and playback queue-drain insertion paths now perform untrimmed batch insertions and run a single keyed-buffer trim pass after each drain batch. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index b1b4466f1c..b305d8c2f2 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -133,6 +133,16 @@ fn insert_prefetched_frame( buffer: &mut BTreeMap, prefetched: PrefetchedFrame, current_frame: u32, +) -> bool { + let inserted_new = insert_prefetched_frame_untrimmed(buffer, prefetched, current_frame); + let trimmed = trim_prefetch_buffer(buffer, current_frame); + inserted_new || trimmed +} + +fn insert_prefetched_frame_untrimmed( + buffer: &mut BTreeMap, + prefetched: PrefetchedFrame, + current_frame: u32, ) -> bool { if prefetched.frame_number < current_frame { return false; @@ -146,8 +156,7 @@ fn insert_prefetched_frame( } std::collections::btree_map::Entry::Occupied(_) => false, }; - let trimmed = trim_prefetch_buffer(buffer, current_frame); - inserted_new || trimmed + inserted_new } fn prune_prefetch_buffer_before_frame( @@ -619,6 +628,7 @@ impl Playback { tokio::select! { Some(prefetched) = prefetch_rx.recv() => { let mut next_prefetched = Some(prefetched); + let mut prefetched_batch_changed = false; loop { let Some(prefetched) = next_prefetched.take() else { @@ -626,18 +636,26 @@ impl Playback { }; if prefetched.generation == seek_generation - && insert_prefetched_frame( + && insert_prefetched_frame_untrimmed( &mut prefetch_buffer, prefetched, frame_number, ) { - warmup_buffer_changed = true; + prefetched_batch_changed = true; } next_prefetched = prefetch_rx.try_recv().ok(); } + if trim_prefetch_buffer(&mut prefetch_buffer, frame_number) { + prefetched_batch_changed = true; + } + + if prefetched_batch_changed { + warmup_buffer_changed = true; + } + if first_frame_time.is_none() && !prefetch_buffer.is_empty() { first_frame_time = Some(Instant::now()); } @@ -704,12 +722,21 @@ impl Playback { cached_project = self.project.borrow_and_update().clone(); playback_clip_offsets = build_clip_offsets_lookup(&cached_project); } + let mut drained_prefetch_changed = false; while let Ok(prefetched) = prefetch_rx.try_recv() { if prefetched.generation == seek_generation { - let _ = - insert_prefetched_frame(&mut prefetch_buffer, prefetched, frame_number); + if insert_prefetched_frame_untrimmed( + &mut prefetch_buffer, + prefetched, + frame_number, + ) { + drained_prefetch_changed = true; + } } } + if drained_prefetch_changed { + let _ = trim_prefetch_buffer(&mut prefetch_buffer, frame_number); + } prune_prefetch_buffer_before_frame(&mut prefetch_buffer, frame_number); let frame_offset = frame_number.saturating_sub(playback_anchor_frame) as f64; From 4766b551e9b00ccfcbf1aa575ad74c592dddf8b6 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Sat, 14 Feb 2026 00:09:46 +0000 Subject: [PATCH 292/333] improve: preserve prefetch ramp on small forward rebases Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 6 +++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 128b67aff3..674e990f1f 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -470,6 +470,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Warmup and playback queue-drain paths now insert prefetched frames without per-item trim checks and apply one bounded trim pass after the batch. - Reduces repeated trim work when multiple prefetched frames are drained in the same loop iteration. +68. **Limited prefetch state resets to major/backward rebases (2026-02-13)** + - Frame-request rebases now only reset decoded-ramp and behind-prefetch tracking on backward seeks or large seek-distance jumps. + - Preserves prefetch ramp state on small forward rebases to reduce unnecessary throughput drops. + --- ## Root Cause Analysis Archive @@ -633,6 +637,7 @@ Decoder Pipeline: 71. Added seek-aware short-circuiting in in-flight/startup frame wait paths to avoid waiting full frame-fetch budgets when seek updates arrive. 72. Added pre-wait seek guards in startup/direct-decode fallback paths so pending seeks skip timeout waits and avoid stale synchronous decode work. 73. Batched keyed-buffer trimming during warmup/playback prefetch queue drains to avoid per-insert trim checks under burst frame arrivals. +74. Limited prefetch state resets to backward/major frame-request rebases so small forward rebases preserve decode-ramp and behind-prefetch tracking state. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -675,6 +680,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: in-flight and startup frame wait paths now short-circuit when seek updates are pending to improve scrub responsiveness under wait pressure. - `crates/editor/src/playback.rs`: startup prefetch wait and direct-decode fallback paths now pre-check seek updates before waiting/synchronous decode scheduling to skip stale work under active seeks. - `crates/editor/src/playback.rs`: warmup and playback queue-drain insertion paths now perform untrimmed batch insertions and run a single keyed-buffer trim pass after each drain batch. +- `crates/editor/src/playback.rs`: frame-request rebases now only clear prefetch ramp/behind-tracking state on backward or large-distance jumps, preserving throughput state on small forward rebases. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index b305d8c2f2..25cd6c18dc 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -350,11 +350,11 @@ impl Playback { }; next_prefetch_frame = requested; - frames_decoded = 0; - prefetched_behind.clear(); - prefetched_behind_order.clear(); if is_backward_seek || seek_distance > dynamic_prefetch_ahead / 2 { + frames_decoded = 0; + prefetched_behind.clear(); + prefetched_behind_order.clear(); if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.clear(); } From cc18f5a77cba92099d07011bdff1953ee142ddb0 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Sat, 14 Feb 2026 00:09:46 +0000 Subject: [PATCH 293/333] improve: preserve prefetch ramp on small forward rebases Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 6 +++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 128b67aff3..674e990f1f 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -470,6 +470,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Warmup and playback queue-drain paths now insert prefetched frames without per-item trim checks and apply one bounded trim pass after the batch. - Reduces repeated trim work when multiple prefetched frames are drained in the same loop iteration. +68. **Limited prefetch state resets to major/backward rebases (2026-02-13)** + - Frame-request rebases now only reset decoded-ramp and behind-prefetch tracking on backward seeks or large seek-distance jumps. + - Preserves prefetch ramp state on small forward rebases to reduce unnecessary throughput drops. + --- ## Root Cause Analysis Archive @@ -633,6 +637,7 @@ Decoder Pipeline: 71. Added seek-aware short-circuiting in in-flight/startup frame wait paths to avoid waiting full frame-fetch budgets when seek updates arrive. 72. Added pre-wait seek guards in startup/direct-decode fallback paths so pending seeks skip timeout waits and avoid stale synchronous decode work. 73. Batched keyed-buffer trimming during warmup/playback prefetch queue drains to avoid per-insert trim checks under burst frame arrivals. +74. Limited prefetch state resets to backward/major frame-request rebases so small forward rebases preserve decode-ramp and behind-prefetch tracking state. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -675,6 +680,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: in-flight and startup frame wait paths now short-circuit when seek updates are pending to improve scrub responsiveness under wait pressure. - `crates/editor/src/playback.rs`: startup prefetch wait and direct-decode fallback paths now pre-check seek updates before waiting/synchronous decode scheduling to skip stale work under active seeks. - `crates/editor/src/playback.rs`: warmup and playback queue-drain insertion paths now perform untrimmed batch insertions and run a single keyed-buffer trim pass after each drain batch. +- `crates/editor/src/playback.rs`: frame-request rebases now only clear prefetch ramp/behind-tracking state on backward or large-distance jumps, preserving throughput state on small forward rebases. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index b305d8c2f2..25cd6c18dc 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -350,11 +350,11 @@ impl Playback { }; next_prefetch_frame = requested; - frames_decoded = 0; - prefetched_behind.clear(); - prefetched_behind_order.clear(); if is_backward_seek || seek_distance > dynamic_prefetch_ahead / 2 { + frames_decoded = 0; + prefetched_behind.clear(); + prefetched_behind_order.clear(); if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.clear(); } From 6cd720ad044eab28661aa6937b69cc890f4b2ecd Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Sat, 14 Feb 2026 00:09:46 +0000 Subject: [PATCH 294/333] improve: preserve prefetch ramp on small forward rebases Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 6 +++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 128b67aff3..674e990f1f 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -470,6 +470,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Warmup and playback queue-drain paths now insert prefetched frames without per-item trim checks and apply one bounded trim pass after the batch. - Reduces repeated trim work when multiple prefetched frames are drained in the same loop iteration. +68. **Limited prefetch state resets to major/backward rebases (2026-02-13)** + - Frame-request rebases now only reset decoded-ramp and behind-prefetch tracking on backward seeks or large seek-distance jumps. + - Preserves prefetch ramp state on small forward rebases to reduce unnecessary throughput drops. + --- ## Root Cause Analysis Archive @@ -633,6 +637,7 @@ Decoder Pipeline: 71. Added seek-aware short-circuiting in in-flight/startup frame wait paths to avoid waiting full frame-fetch budgets when seek updates arrive. 72. Added pre-wait seek guards in startup/direct-decode fallback paths so pending seeks skip timeout waits and avoid stale synchronous decode work. 73. Batched keyed-buffer trimming during warmup/playback prefetch queue drains to avoid per-insert trim checks under burst frame arrivals. +74. Limited prefetch state resets to backward/major frame-request rebases so small forward rebases preserve decode-ramp and behind-prefetch tracking state. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -675,6 +680,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: in-flight and startup frame wait paths now short-circuit when seek updates are pending to improve scrub responsiveness under wait pressure. - `crates/editor/src/playback.rs`: startup prefetch wait and direct-decode fallback paths now pre-check seek updates before waiting/synchronous decode scheduling to skip stale work under active seeks. - `crates/editor/src/playback.rs`: warmup and playback queue-drain insertion paths now perform untrimmed batch insertions and run a single keyed-buffer trim pass after each drain batch. +- `crates/editor/src/playback.rs`: frame-request rebases now only clear prefetch ramp/behind-tracking state on backward or large-distance jumps, preserving throughput state on small forward rebases. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index b305d8c2f2..25cd6c18dc 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -350,11 +350,11 @@ impl Playback { }; next_prefetch_frame = requested; - frames_decoded = 0; - prefetched_behind.clear(); - prefetched_behind_order.clear(); if is_backward_seek || seek_distance > dynamic_prefetch_ahead / 2 { + frames_decoded = 0; + prefetched_behind.clear(); + prefetched_behind_order.clear(); if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.clear(); } From a8d52a53ed0e7ffe39593cb87d6dc4a480f994b4 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Sat, 14 Feb 2026 00:12:55 +0000 Subject: [PATCH 295/333] improve: gate behind-prefetch scans per playback frame Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 8 +++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 674e990f1f..f3207c403e 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -474,6 +474,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Frame-request rebases now only reset decoded-ramp and behind-prefetch tracking on backward seeks or large seek-distance jumps. - Preserves prefetch ramp state on small forward rebases to reduce unnecessary throughput drops. +69. **Gated behind-prefetch scans to one pass per playback frame (2026-02-13)** + - Behind-prefetch scheduling now scans at most once for each observed playback frame value. + - Avoids repeated behind-window scan work in tight scheduler loops when playback position has not advanced. + --- ## Root Cause Analysis Archive @@ -638,6 +642,7 @@ Decoder Pipeline: 72. Added pre-wait seek guards in startup/direct-decode fallback paths so pending seeks skip timeout waits and avoid stale synchronous decode work. 73. Batched keyed-buffer trimming during warmup/playback prefetch queue drains to avoid per-insert trim checks under burst frame arrivals. 74. Limited prefetch state resets to backward/major frame-request rebases so small forward rebases preserve decode-ramp and behind-prefetch tracking state. +75. Gated behind-prefetch scheduling scans to one pass per playback frame to avoid repeated behind-window scan churn while playback position is unchanged. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -681,6 +686,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: startup prefetch wait and direct-decode fallback paths now pre-check seek updates before waiting/synchronous decode scheduling to skip stale work under active seeks. - `crates/editor/src/playback.rs`: warmup and playback queue-drain insertion paths now perform untrimmed batch insertions and run a single keyed-buffer trim pass after each drain batch. - `crates/editor/src/playback.rs`: frame-request rebases now only clear prefetch ramp/behind-tracking state on backward or large-distance jumps, preserving throughput state on small forward rebases. +- `crates/editor/src/playback.rs`: behind-prefetch scheduling now scans at most once per playback frame value, reducing repeated behind-window scan overhead in tight scheduler loops. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 25cd6c18dc..777d451dc0 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -282,6 +282,7 @@ impl Playback { let mut prefetched_behind: HashSet = HashSet::new(); let mut prefetched_behind_order: VecDeque = VecDeque::new(); let mut scheduled_in_flight_frames: HashSet = HashSet::new(); + let mut last_behind_scan_frame: Option = None; const RAMP_UP_AFTER_FRAMES: u32 = 5; let dynamic_prefetch_ahead = fps.clamp(30, 90).min(PREFETCH_BUFFER_SIZE as u32); let dynamic_prefetch_behind = (fps / 4).clamp(8, 24); @@ -328,6 +329,7 @@ impl Playback { frames_decoded = 0; prefetched_behind.clear(); prefetched_behind_order.clear(); + last_behind_scan_frame = None; if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.clear(); @@ -355,6 +357,7 @@ impl Playback { frames_decoded = 0; prefetched_behind.clear(); prefetched_behind_order.clear(); + last_behind_scan_frame = None; if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.clear(); } @@ -433,7 +436,10 @@ impl Playback { next_prefetch_frame += 1; } - if in_flight.len() < effective_parallel { + if in_flight.len() < effective_parallel + && last_behind_scan_frame != Some(current_playback_frame) + { + last_behind_scan_frame = Some(current_playback_frame); for behind_offset in 1..=dynamic_prefetch_behind { if in_flight.len() >= effective_parallel { break; From 004e28172729c2a4d18c22b594fa8332e1a9a117 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Sat, 14 Feb 2026 00:12:55 +0000 Subject: [PATCH 296/333] improve: gate behind-prefetch scans per playback frame Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 8 +++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 674e990f1f..f3207c403e 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -474,6 +474,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Frame-request rebases now only reset decoded-ramp and behind-prefetch tracking on backward seeks or large seek-distance jumps. - Preserves prefetch ramp state on small forward rebases to reduce unnecessary throughput drops. +69. **Gated behind-prefetch scans to one pass per playback frame (2026-02-13)** + - Behind-prefetch scheduling now scans at most once for each observed playback frame value. + - Avoids repeated behind-window scan work in tight scheduler loops when playback position has not advanced. + --- ## Root Cause Analysis Archive @@ -638,6 +642,7 @@ Decoder Pipeline: 72. Added pre-wait seek guards in startup/direct-decode fallback paths so pending seeks skip timeout waits and avoid stale synchronous decode work. 73. Batched keyed-buffer trimming during warmup/playback prefetch queue drains to avoid per-insert trim checks under burst frame arrivals. 74. Limited prefetch state resets to backward/major frame-request rebases so small forward rebases preserve decode-ramp and behind-prefetch tracking state. +75. Gated behind-prefetch scheduling scans to one pass per playback frame to avoid repeated behind-window scan churn while playback position is unchanged. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -681,6 +686,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: startup prefetch wait and direct-decode fallback paths now pre-check seek updates before waiting/synchronous decode scheduling to skip stale work under active seeks. - `crates/editor/src/playback.rs`: warmup and playback queue-drain insertion paths now perform untrimmed batch insertions and run a single keyed-buffer trim pass after each drain batch. - `crates/editor/src/playback.rs`: frame-request rebases now only clear prefetch ramp/behind-tracking state on backward or large-distance jumps, preserving throughput state on small forward rebases. +- `crates/editor/src/playback.rs`: behind-prefetch scheduling now scans at most once per playback frame value, reducing repeated behind-window scan overhead in tight scheduler loops. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 25cd6c18dc..777d451dc0 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -282,6 +282,7 @@ impl Playback { let mut prefetched_behind: HashSet = HashSet::new(); let mut prefetched_behind_order: VecDeque = VecDeque::new(); let mut scheduled_in_flight_frames: HashSet = HashSet::new(); + let mut last_behind_scan_frame: Option = None; const RAMP_UP_AFTER_FRAMES: u32 = 5; let dynamic_prefetch_ahead = fps.clamp(30, 90).min(PREFETCH_BUFFER_SIZE as u32); let dynamic_prefetch_behind = (fps / 4).clamp(8, 24); @@ -328,6 +329,7 @@ impl Playback { frames_decoded = 0; prefetched_behind.clear(); prefetched_behind_order.clear(); + last_behind_scan_frame = None; if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.clear(); @@ -355,6 +357,7 @@ impl Playback { frames_decoded = 0; prefetched_behind.clear(); prefetched_behind_order.clear(); + last_behind_scan_frame = None; if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.clear(); } @@ -433,7 +436,10 @@ impl Playback { next_prefetch_frame += 1; } - if in_flight.len() < effective_parallel { + if in_flight.len() < effective_parallel + && last_behind_scan_frame != Some(current_playback_frame) + { + last_behind_scan_frame = Some(current_playback_frame); for behind_offset in 1..=dynamic_prefetch_behind { if in_flight.len() >= effective_parallel { break; From 5ddf6045437ab8d67f77ee9b5d5eebe554a59615 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Sat, 14 Feb 2026 00:12:55 +0000 Subject: [PATCH 297/333] improve: gate behind-prefetch scans per playback frame Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 8 +++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 674e990f1f..f3207c403e 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -474,6 +474,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Frame-request rebases now only reset decoded-ramp and behind-prefetch tracking on backward seeks or large seek-distance jumps. - Preserves prefetch ramp state on small forward rebases to reduce unnecessary throughput drops. +69. **Gated behind-prefetch scans to one pass per playback frame (2026-02-13)** + - Behind-prefetch scheduling now scans at most once for each observed playback frame value. + - Avoids repeated behind-window scan work in tight scheduler loops when playback position has not advanced. + --- ## Root Cause Analysis Archive @@ -638,6 +642,7 @@ Decoder Pipeline: 72. Added pre-wait seek guards in startup/direct-decode fallback paths so pending seeks skip timeout waits and avoid stale synchronous decode work. 73. Batched keyed-buffer trimming during warmup/playback prefetch queue drains to avoid per-insert trim checks under burst frame arrivals. 74. Limited prefetch state resets to backward/major frame-request rebases so small forward rebases preserve decode-ramp and behind-prefetch tracking state. +75. Gated behind-prefetch scheduling scans to one pass per playback frame to avoid repeated behind-window scan churn while playback position is unchanged. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -681,6 +686,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: startup prefetch wait and direct-decode fallback paths now pre-check seek updates before waiting/synchronous decode scheduling to skip stale work under active seeks. - `crates/editor/src/playback.rs`: warmup and playback queue-drain insertion paths now perform untrimmed batch insertions and run a single keyed-buffer trim pass after each drain batch. - `crates/editor/src/playback.rs`: frame-request rebases now only clear prefetch ramp/behind-tracking state on backward or large-distance jumps, preserving throughput state on small forward rebases. +- `crates/editor/src/playback.rs`: behind-prefetch scheduling now scans at most once per playback frame value, reducing repeated behind-window scan overhead in tight scheduler loops. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 25cd6c18dc..777d451dc0 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -282,6 +282,7 @@ impl Playback { let mut prefetched_behind: HashSet = HashSet::new(); let mut prefetched_behind_order: VecDeque = VecDeque::new(); let mut scheduled_in_flight_frames: HashSet = HashSet::new(); + let mut last_behind_scan_frame: Option = None; const RAMP_UP_AFTER_FRAMES: u32 = 5; let dynamic_prefetch_ahead = fps.clamp(30, 90).min(PREFETCH_BUFFER_SIZE as u32); let dynamic_prefetch_behind = (fps / 4).clamp(8, 24); @@ -328,6 +329,7 @@ impl Playback { frames_decoded = 0; prefetched_behind.clear(); prefetched_behind_order.clear(); + last_behind_scan_frame = None; if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.clear(); @@ -355,6 +357,7 @@ impl Playback { frames_decoded = 0; prefetched_behind.clear(); prefetched_behind_order.clear(); + last_behind_scan_frame = None; if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.clear(); } @@ -433,7 +436,10 @@ impl Playback { next_prefetch_frame += 1; } - if in_flight.len() < effective_parallel { + if in_flight.len() < effective_parallel + && last_behind_scan_frame != Some(current_playback_frame) + { + last_behind_scan_frame = Some(current_playback_frame); for behind_offset in 1..=dynamic_prefetch_behind { if in_flight.len() >= effective_parallel { break; From 1a12a55bb444bad2456429cc48c34b472f3c91b5 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:55:38 +0000 Subject: [PATCH 298/333] improve: dedupe playback frame-request watch updates --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++ crates/editor/src/playback.rs | 117 ++++++++++------------------- 2 files changed, 46 insertions(+), 77 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 72759ab115..cb3065d9f4 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -446,6 +446,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Prefetch and direct-decode paths now use cached clip-index-to-offset maps instead of repeated linear clip scans. - Reduces per-frame scheduling overhead in playback and prefetch loops when projects contain many clips. +62. **Deduplicated frame-request watch updates (2026-02-13)** + - Playback loop now uses change-aware frame-request signaling instead of unconditional watch broadcasts. + - Reduces redundant prefetch wakeups and channel churn when requested frame value does not change. + --- ## Root Cause Analysis Archive @@ -603,6 +607,7 @@ Decoder Pipeline: 65. Scaled prefetch idle polling with frame budget to reduce scheduler wakeup churn during empty in-flight periods. 66. Bounded behind-prefetch dedupe tracking window to avoid unbounded growth and preserve lookup efficiency over long sessions. 67. Cached clip-offset lookups for prefetch and direct-decode scheduling to replace repeated linear clip scans on decode hot paths. +68. Deduplicated frame-request watch updates so unchanged frame requests no longer trigger redundant watch notifications. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -639,6 +644,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: prefetch scheduler idle polling now scales with frame budget (bounded) instead of fixed 1ms delay, reducing idle wakeup overhead. - `crates/editor/src/playback.rs`: behind-prefetch dedupe tracking now uses a bounded eviction-ordered window to prevent unbounded set growth during long playback. - `crates/editor/src/playback.rs`: prefetch and playback direct-decode paths now use cached clip-offset maps rebuilt on project updates, avoiding repeated clip list linear searches. +- `crates/editor/src/playback.rs`: frame-request updates now use `watch::Sender::send_if_modified` across playback/warmup/skip paths to avoid redundant unchanged-frame notifications. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 580f3a3151..7f0c8ab47e 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -255,11 +255,8 @@ impl Playback { let mut in_flight: FuturesUnordered = FuturesUnordered::new(); let mut frames_decoded: u32 = 0; let mut prefetched_behind: HashSet = HashSet::new(); -<<<<<<< HEAD -======= let mut prefetched_behind_order: VecDeque = VecDeque::new(); let mut scheduled_in_flight_frames: HashSet = HashSet::new(); ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 const RAMP_UP_AFTER_FRAMES: u32 = 5; let dynamic_prefetch_ahead = fps.clamp(30, 90); let dynamic_prefetch_behind = (fps / 4).clamp(8, 24); @@ -271,28 +268,21 @@ impl Playback { PARALLEL_DECODE_TASKS }; let initial_parallel_tasks = dynamic_parallel_tasks.min(4); -<<<<<<< HEAD -======= let prefetch_idle_poll_interval = Duration::from_secs_f64(1.0 / fps_f64) .mul_f64(0.25) .max(Duration::from_millis(2)) .min(Duration::from_millis(8)); let prefetched_behind_capacity = (dynamic_prefetch_behind as usize).saturating_mul(8); ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 let mut active_generation = *prefetch_seek_generation.borrow(); let mut cached_project = prefetch_project.borrow().clone(); let mut prefetch_clip_offsets = build_clip_offsets_lookup(&cached_project); info!( dynamic_prefetch_ahead, -<<<<<<< HEAD - dynamic_prefetch_behind, dynamic_parallel_tasks, "Prefetch window configuration" -======= dynamic_prefetch_behind, dynamic_parallel_tasks, prefetch_idle_poll_interval_ms = prefetch_idle_poll_interval.as_secs_f64() * 1000.0, "Prefetch window configuration" ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 ); loop { @@ -312,18 +302,12 @@ impl Playback { next_prefetch_frame = *frame_request_rx.borrow(); frames_decoded = 0; prefetched_behind.clear(); -<<<<<<< HEAD -======= prefetched_behind_order.clear(); ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.clear(); } -<<<<<<< HEAD -======= scheduled_in_flight_frames.clear(); ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 in_flight = FuturesUnordered::new(); } @@ -349,10 +333,7 @@ impl Playback { if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.clear(); } -<<<<<<< HEAD -======= scheduled_in_flight_frames.clear(); ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 in_flight = FuturesUnordered::new(); } } @@ -380,15 +361,7 @@ impl Playback { break; } -<<<<<<< HEAD - let already_in_flight = prefetch_in_flight - .read() - .map(|guard| guard.contains(&(active_generation, frame_num))) - .unwrap_or(false); - if already_in_flight { -======= if scheduled_in_flight_frames.contains(&frame_num) { ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 next_prefetch_frame += 1; continue; } @@ -450,15 +423,7 @@ impl Playback { continue; } -<<<<<<< HEAD - let already_in_flight = prefetch_in_flight - .read() - .map(|guard| guard.contains(&(active_generation, behind_frame))) - .unwrap_or(false); - if already_in_flight { -======= if scheduled_in_flight_frames.contains(&behind_frame) { ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 continue; } @@ -504,10 +469,7 @@ impl Playback { biased; Some((frame_num, segment_index, generation, result)) = in_flight.next() => { -<<<<<<< HEAD -======= scheduled_in_flight_frames.remove(&frame_num); ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.remove(&(generation, frame_num)); } @@ -640,18 +602,6 @@ impl Playback { tokio::select! { Some(prefetched) = prefetch_rx.recv() => { -<<<<<<< HEAD - if prefetched.generation == seek_generation { - if insert_prefetched_frame( - &mut prefetch_buffer, - prefetched, - frame_number, - ) { - warmup_buffer_changed = true; - } - if first_frame_time.is_none() && !prefetch_buffer.is_empty() { - first_frame_time = Some(Instant::now()); -======= let mut next_prefetched = Some(prefetched); loop { @@ -667,7 +617,6 @@ impl Playback { ) { warmup_buffer_changed = true; ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 } next_prefetched = prefetch_rx.try_recv().ok(); @@ -688,28 +637,14 @@ impl Playback { first_frame_time = None; warmup_start = Instant::now(); let _ = seek_generation_tx.send(seek_generation); - let _ = frame_request_tx.send(frame_number); - let _ = playback_position_tx.send(frame_number); - if has_audio - && audio_playhead_tx - .send(frame_number as f64 / fps_f64) - .is_err() - { - break; - } - } - _ = seek_rx.changed() => { - let seek_frame = *seek_rx.borrow_and_update(); - seek_generation = seek_generation.saturating_add(1); - frame_number = seek_frame; - prefetch_buffer.clear(); - frame_cache.clear(); - warmup_contiguous_prefetched = 0; - warmup_buffer_changed = false; - first_frame_time = None; - warmup_start = Instant::now(); - let _ = seek_generation_tx.send(seek_generation); - let _ = frame_request_tx.send(frame_number); + let _ = frame_request_tx.send_if_modified(|requested| { + if *requested == frame_number { + false + } else { + *requested = frame_number; + true + } + }); let _ = playback_position_tx.send(frame_number); if has_audio && audio_playhead_tx @@ -745,7 +680,14 @@ impl Playback { prefetch_buffer.clear(); frame_cache.clear(); let _ = seek_generation_tx.send(seek_generation); - let _ = frame_request_tx.send(frame_number); + let _ = frame_request_tx.send_if_modified(|requested| { + if *requested == frame_number { + false + } else { + *requested = frame_number; + true + } + }); let _ = playback_position_tx.send(frame_number); if has_audio && audio_playhead_tx @@ -783,7 +725,14 @@ impl Playback { prefetch_buffer.clear(); frame_cache.clear(); let _ = seek_generation_tx.send(seek_generation); - let _ = frame_request_tx.send(frame_number); + let _ = frame_request_tx.send_if_modified(|requested| { + if *requested == frame_number { + false + } else { + *requested = frame_number; + true + } + }); let _ = playback_position_tx.send(frame_number); if has_audio && audio_playhead_tx @@ -885,7 +834,14 @@ impl Playback { } } } else if prefetch_buffer.is_empty() && total_frames_rendered < 15 { - let _ = frame_request_tx.send(frame_number); + let _ = frame_request_tx.send_if_modified(|requested| { + if *requested == frame_number { + false + } else { + *requested = frame_number; + true + } + }); let wait_result = tokio::time::timeout(frame_fetch_timeout, prefetch_rx.recv()).await; @@ -1078,7 +1034,14 @@ impl Playback { skip_events = skip_events.saturating_add(1); prune_prefetch_buffer_before_frame(&mut prefetch_buffer, frame_number); - let _ = frame_request_tx.send(frame_number); + let _ = frame_request_tx.send_if_modified(|requested| { + if *requested == frame_number { + false + } else { + *requested = frame_number; + true + } + }); let _ = playback_position_tx.send(frame_number); if has_audio && audio_playhead_tx From c8cccf065c3434db14e297c689c67e42139e27e4 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:54:53 +0000 Subject: [PATCH 299/333] fix: escape backslashes and pipes in markdown table cells for CI reporting --- scripts/compare-playback-benchmark-runs.js | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index 96b5e9432e..464d6a5159 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -414,6 +414,10 @@ function compareMetrics(baselineRows, candidateRows, options) { }; } +function escapeTableCell(value) { + return String(value).replace(/\\/g, "\\\\").replace(/\|/g, "\\|"); +} + function toMarkdown( comparisons, missingCandidateRows, @@ -446,10 +450,10 @@ function toMarkdown( md += "| Side | File | Error |\n"; md += "|---|---|---|\n"; for (const entry of baselineStats.parseErrors.slice(0, 20)) { - md += `| baseline | ${entry.file} | ${entry.error.replace(/\|/g, "\\|")} |\n`; + md += `| baseline | ${escapeTableCell(entry.file)} | ${escapeTableCell(entry.error)} |\n`; } for (const entry of candidateStats.parseErrors.slice(0, 20)) { - md += `| candidate | ${entry.file} | ${entry.error.replace(/\|/g, "\\|")} |\n`; + md += `| candidate | ${escapeTableCell(entry.file)} | ${escapeTableCell(entry.error)} |\n`; } md += "\n"; } From 09f2b57368e1d56bbb53f64c54719391cfc8ff4d Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:55:38 +0000 Subject: [PATCH 300/333] improve: dedupe playback frame-request watch updates --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++ crates/editor/src/playback.rs | 117 ++++++++++------------------- 2 files changed, 46 insertions(+), 77 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 72759ab115..cb3065d9f4 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -446,6 +446,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Prefetch and direct-decode paths now use cached clip-index-to-offset maps instead of repeated linear clip scans. - Reduces per-frame scheduling overhead in playback and prefetch loops when projects contain many clips. +62. **Deduplicated frame-request watch updates (2026-02-13)** + - Playback loop now uses change-aware frame-request signaling instead of unconditional watch broadcasts. + - Reduces redundant prefetch wakeups and channel churn when requested frame value does not change. + --- ## Root Cause Analysis Archive @@ -603,6 +607,7 @@ Decoder Pipeline: 65. Scaled prefetch idle polling with frame budget to reduce scheduler wakeup churn during empty in-flight periods. 66. Bounded behind-prefetch dedupe tracking window to avoid unbounded growth and preserve lookup efficiency over long sessions. 67. Cached clip-offset lookups for prefetch and direct-decode scheduling to replace repeated linear clip scans on decode hot paths. +68. Deduplicated frame-request watch updates so unchanged frame requests no longer trigger redundant watch notifications. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -639,6 +644,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: prefetch scheduler idle polling now scales with frame budget (bounded) instead of fixed 1ms delay, reducing idle wakeup overhead. - `crates/editor/src/playback.rs`: behind-prefetch dedupe tracking now uses a bounded eviction-ordered window to prevent unbounded set growth during long playback. - `crates/editor/src/playback.rs`: prefetch and playback direct-decode paths now use cached clip-offset maps rebuilt on project updates, avoiding repeated clip list linear searches. +- `crates/editor/src/playback.rs`: frame-request updates now use `watch::Sender::send_if_modified` across playback/warmup/skip paths to avoid redundant unchanged-frame notifications. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 580f3a3151..7f0c8ab47e 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -255,11 +255,8 @@ impl Playback { let mut in_flight: FuturesUnordered = FuturesUnordered::new(); let mut frames_decoded: u32 = 0; let mut prefetched_behind: HashSet = HashSet::new(); -<<<<<<< HEAD -======= let mut prefetched_behind_order: VecDeque = VecDeque::new(); let mut scheduled_in_flight_frames: HashSet = HashSet::new(); ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 const RAMP_UP_AFTER_FRAMES: u32 = 5; let dynamic_prefetch_ahead = fps.clamp(30, 90); let dynamic_prefetch_behind = (fps / 4).clamp(8, 24); @@ -271,28 +268,21 @@ impl Playback { PARALLEL_DECODE_TASKS }; let initial_parallel_tasks = dynamic_parallel_tasks.min(4); -<<<<<<< HEAD -======= let prefetch_idle_poll_interval = Duration::from_secs_f64(1.0 / fps_f64) .mul_f64(0.25) .max(Duration::from_millis(2)) .min(Duration::from_millis(8)); let prefetched_behind_capacity = (dynamic_prefetch_behind as usize).saturating_mul(8); ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 let mut active_generation = *prefetch_seek_generation.borrow(); let mut cached_project = prefetch_project.borrow().clone(); let mut prefetch_clip_offsets = build_clip_offsets_lookup(&cached_project); info!( dynamic_prefetch_ahead, -<<<<<<< HEAD - dynamic_prefetch_behind, dynamic_parallel_tasks, "Prefetch window configuration" -======= dynamic_prefetch_behind, dynamic_parallel_tasks, prefetch_idle_poll_interval_ms = prefetch_idle_poll_interval.as_secs_f64() * 1000.0, "Prefetch window configuration" ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 ); loop { @@ -312,18 +302,12 @@ impl Playback { next_prefetch_frame = *frame_request_rx.borrow(); frames_decoded = 0; prefetched_behind.clear(); -<<<<<<< HEAD -======= prefetched_behind_order.clear(); ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.clear(); } -<<<<<<< HEAD -======= scheduled_in_flight_frames.clear(); ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 in_flight = FuturesUnordered::new(); } @@ -349,10 +333,7 @@ impl Playback { if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.clear(); } -<<<<<<< HEAD -======= scheduled_in_flight_frames.clear(); ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 in_flight = FuturesUnordered::new(); } } @@ -380,15 +361,7 @@ impl Playback { break; } -<<<<<<< HEAD - let already_in_flight = prefetch_in_flight - .read() - .map(|guard| guard.contains(&(active_generation, frame_num))) - .unwrap_or(false); - if already_in_flight { -======= if scheduled_in_flight_frames.contains(&frame_num) { ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 next_prefetch_frame += 1; continue; } @@ -450,15 +423,7 @@ impl Playback { continue; } -<<<<<<< HEAD - let already_in_flight = prefetch_in_flight - .read() - .map(|guard| guard.contains(&(active_generation, behind_frame))) - .unwrap_or(false); - if already_in_flight { -======= if scheduled_in_flight_frames.contains(&behind_frame) { ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 continue; } @@ -504,10 +469,7 @@ impl Playback { biased; Some((frame_num, segment_index, generation, result)) = in_flight.next() => { -<<<<<<< HEAD -======= scheduled_in_flight_frames.remove(&frame_num); ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.remove(&(generation, frame_num)); } @@ -640,18 +602,6 @@ impl Playback { tokio::select! { Some(prefetched) = prefetch_rx.recv() => { -<<<<<<< HEAD - if prefetched.generation == seek_generation { - if insert_prefetched_frame( - &mut prefetch_buffer, - prefetched, - frame_number, - ) { - warmup_buffer_changed = true; - } - if first_frame_time.is_none() && !prefetch_buffer.is_empty() { - first_frame_time = Some(Instant::now()); -======= let mut next_prefetched = Some(prefetched); loop { @@ -667,7 +617,6 @@ impl Playback { ) { warmup_buffer_changed = true; ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 } next_prefetched = prefetch_rx.try_recv().ok(); @@ -688,28 +637,14 @@ impl Playback { first_frame_time = None; warmup_start = Instant::now(); let _ = seek_generation_tx.send(seek_generation); - let _ = frame_request_tx.send(frame_number); - let _ = playback_position_tx.send(frame_number); - if has_audio - && audio_playhead_tx - .send(frame_number as f64 / fps_f64) - .is_err() - { - break; - } - } - _ = seek_rx.changed() => { - let seek_frame = *seek_rx.borrow_and_update(); - seek_generation = seek_generation.saturating_add(1); - frame_number = seek_frame; - prefetch_buffer.clear(); - frame_cache.clear(); - warmup_contiguous_prefetched = 0; - warmup_buffer_changed = false; - first_frame_time = None; - warmup_start = Instant::now(); - let _ = seek_generation_tx.send(seek_generation); - let _ = frame_request_tx.send(frame_number); + let _ = frame_request_tx.send_if_modified(|requested| { + if *requested == frame_number { + false + } else { + *requested = frame_number; + true + } + }); let _ = playback_position_tx.send(frame_number); if has_audio && audio_playhead_tx @@ -745,7 +680,14 @@ impl Playback { prefetch_buffer.clear(); frame_cache.clear(); let _ = seek_generation_tx.send(seek_generation); - let _ = frame_request_tx.send(frame_number); + let _ = frame_request_tx.send_if_modified(|requested| { + if *requested == frame_number { + false + } else { + *requested = frame_number; + true + } + }); let _ = playback_position_tx.send(frame_number); if has_audio && audio_playhead_tx @@ -783,7 +725,14 @@ impl Playback { prefetch_buffer.clear(); frame_cache.clear(); let _ = seek_generation_tx.send(seek_generation); - let _ = frame_request_tx.send(frame_number); + let _ = frame_request_tx.send_if_modified(|requested| { + if *requested == frame_number { + false + } else { + *requested = frame_number; + true + } + }); let _ = playback_position_tx.send(frame_number); if has_audio && audio_playhead_tx @@ -885,7 +834,14 @@ impl Playback { } } } else if prefetch_buffer.is_empty() && total_frames_rendered < 15 { - let _ = frame_request_tx.send(frame_number); + let _ = frame_request_tx.send_if_modified(|requested| { + if *requested == frame_number { + false + } else { + *requested = frame_number; + true + } + }); let wait_result = tokio::time::timeout(frame_fetch_timeout, prefetch_rx.recv()).await; @@ -1078,7 +1034,14 @@ impl Playback { skip_events = skip_events.saturating_add(1); prune_prefetch_buffer_before_frame(&mut prefetch_buffer, frame_number); - let _ = frame_request_tx.send(frame_number); + let _ = frame_request_tx.send_if_modified(|requested| { + if *requested == frame_number { + false + } else { + *requested = frame_number; + true + } + }); let _ = playback_position_tx.send(frame_number); if has_audio && audio_playhead_tx From 6ce10a375ae8b0c2e5d028800de4f414c5b55c49 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:54:53 +0000 Subject: [PATCH 301/333] fix: escape backslashes and pipes in markdown table cells for CI reporting --- scripts/compare-playback-benchmark-runs.js | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index 96b5e9432e..464d6a5159 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -414,6 +414,10 @@ function compareMetrics(baselineRows, candidateRows, options) { }; } +function escapeTableCell(value) { + return String(value).replace(/\\/g, "\\\\").replace(/\|/g, "\\|"); +} + function toMarkdown( comparisons, missingCandidateRows, @@ -446,10 +450,10 @@ function toMarkdown( md += "| Side | File | Error |\n"; md += "|---|---|---|\n"; for (const entry of baselineStats.parseErrors.slice(0, 20)) { - md += `| baseline | ${entry.file} | ${entry.error.replace(/\|/g, "\\|")} |\n`; + md += `| baseline | ${escapeTableCell(entry.file)} | ${escapeTableCell(entry.error)} |\n`; } for (const entry of candidateStats.parseErrors.slice(0, 20)) { - md += `| candidate | ${entry.file} | ${entry.error.replace(/\|/g, "\\|")} |\n`; + md += `| candidate | ${escapeTableCell(entry.file)} | ${escapeTableCell(entry.error)} |\n`; } md += "\n"; } From 21c57c91d3c1e6eba9ec7f1203ea42415f7e29d6 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:55:38 +0000 Subject: [PATCH 302/333] improve: dedupe playback frame-request watch updates --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++ crates/editor/src/playback.rs | 117 ++++++++++------------------- 2 files changed, 46 insertions(+), 77 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 72759ab115..cb3065d9f4 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -446,6 +446,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Prefetch and direct-decode paths now use cached clip-index-to-offset maps instead of repeated linear clip scans. - Reduces per-frame scheduling overhead in playback and prefetch loops when projects contain many clips. +62. **Deduplicated frame-request watch updates (2026-02-13)** + - Playback loop now uses change-aware frame-request signaling instead of unconditional watch broadcasts. + - Reduces redundant prefetch wakeups and channel churn when requested frame value does not change. + --- ## Root Cause Analysis Archive @@ -603,6 +607,7 @@ Decoder Pipeline: 65. Scaled prefetch idle polling with frame budget to reduce scheduler wakeup churn during empty in-flight periods. 66. Bounded behind-prefetch dedupe tracking window to avoid unbounded growth and preserve lookup efficiency over long sessions. 67. Cached clip-offset lookups for prefetch and direct-decode scheduling to replace repeated linear clip scans on decode hot paths. +68. Deduplicated frame-request watch updates so unchanged frame requests no longer trigger redundant watch notifications. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -639,6 +644,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: prefetch scheduler idle polling now scales with frame budget (bounded) instead of fixed 1ms delay, reducing idle wakeup overhead. - `crates/editor/src/playback.rs`: behind-prefetch dedupe tracking now uses a bounded eviction-ordered window to prevent unbounded set growth during long playback. - `crates/editor/src/playback.rs`: prefetch and playback direct-decode paths now use cached clip-offset maps rebuilt on project updates, avoiding repeated clip list linear searches. +- `crates/editor/src/playback.rs`: frame-request updates now use `watch::Sender::send_if_modified` across playback/warmup/skip paths to avoid redundant unchanged-frame notifications. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 580f3a3151..7f0c8ab47e 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -255,11 +255,8 @@ impl Playback { let mut in_flight: FuturesUnordered = FuturesUnordered::new(); let mut frames_decoded: u32 = 0; let mut prefetched_behind: HashSet = HashSet::new(); -<<<<<<< HEAD -======= let mut prefetched_behind_order: VecDeque = VecDeque::new(); let mut scheduled_in_flight_frames: HashSet = HashSet::new(); ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 const RAMP_UP_AFTER_FRAMES: u32 = 5; let dynamic_prefetch_ahead = fps.clamp(30, 90); let dynamic_prefetch_behind = (fps / 4).clamp(8, 24); @@ -271,28 +268,21 @@ impl Playback { PARALLEL_DECODE_TASKS }; let initial_parallel_tasks = dynamic_parallel_tasks.min(4); -<<<<<<< HEAD -======= let prefetch_idle_poll_interval = Duration::from_secs_f64(1.0 / fps_f64) .mul_f64(0.25) .max(Duration::from_millis(2)) .min(Duration::from_millis(8)); let prefetched_behind_capacity = (dynamic_prefetch_behind as usize).saturating_mul(8); ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 let mut active_generation = *prefetch_seek_generation.borrow(); let mut cached_project = prefetch_project.borrow().clone(); let mut prefetch_clip_offsets = build_clip_offsets_lookup(&cached_project); info!( dynamic_prefetch_ahead, -<<<<<<< HEAD - dynamic_prefetch_behind, dynamic_parallel_tasks, "Prefetch window configuration" -======= dynamic_prefetch_behind, dynamic_parallel_tasks, prefetch_idle_poll_interval_ms = prefetch_idle_poll_interval.as_secs_f64() * 1000.0, "Prefetch window configuration" ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 ); loop { @@ -312,18 +302,12 @@ impl Playback { next_prefetch_frame = *frame_request_rx.borrow(); frames_decoded = 0; prefetched_behind.clear(); -<<<<<<< HEAD -======= prefetched_behind_order.clear(); ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.clear(); } -<<<<<<< HEAD -======= scheduled_in_flight_frames.clear(); ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 in_flight = FuturesUnordered::new(); } @@ -349,10 +333,7 @@ impl Playback { if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.clear(); } -<<<<<<< HEAD -======= scheduled_in_flight_frames.clear(); ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 in_flight = FuturesUnordered::new(); } } @@ -380,15 +361,7 @@ impl Playback { break; } -<<<<<<< HEAD - let already_in_flight = prefetch_in_flight - .read() - .map(|guard| guard.contains(&(active_generation, frame_num))) - .unwrap_or(false); - if already_in_flight { -======= if scheduled_in_flight_frames.contains(&frame_num) { ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 next_prefetch_frame += 1; continue; } @@ -450,15 +423,7 @@ impl Playback { continue; } -<<<<<<< HEAD - let already_in_flight = prefetch_in_flight - .read() - .map(|guard| guard.contains(&(active_generation, behind_frame))) - .unwrap_or(false); - if already_in_flight { -======= if scheduled_in_flight_frames.contains(&behind_frame) { ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 continue; } @@ -504,10 +469,7 @@ impl Playback { biased; Some((frame_num, segment_index, generation, result)) = in_flight.next() => { -<<<<<<< HEAD -======= scheduled_in_flight_frames.remove(&frame_num); ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.remove(&(generation, frame_num)); } @@ -640,18 +602,6 @@ impl Playback { tokio::select! { Some(prefetched) = prefetch_rx.recv() => { -<<<<<<< HEAD - if prefetched.generation == seek_generation { - if insert_prefetched_frame( - &mut prefetch_buffer, - prefetched, - frame_number, - ) { - warmup_buffer_changed = true; - } - if first_frame_time.is_none() && !prefetch_buffer.is_empty() { - first_frame_time = Some(Instant::now()); -======= let mut next_prefetched = Some(prefetched); loop { @@ -667,7 +617,6 @@ impl Playback { ) { warmup_buffer_changed = true; ->>>>>>> c801652efd85d0cf6bde578ebe963bc9d7221587 } next_prefetched = prefetch_rx.try_recv().ok(); @@ -688,28 +637,14 @@ impl Playback { first_frame_time = None; warmup_start = Instant::now(); let _ = seek_generation_tx.send(seek_generation); - let _ = frame_request_tx.send(frame_number); - let _ = playback_position_tx.send(frame_number); - if has_audio - && audio_playhead_tx - .send(frame_number as f64 / fps_f64) - .is_err() - { - break; - } - } - _ = seek_rx.changed() => { - let seek_frame = *seek_rx.borrow_and_update(); - seek_generation = seek_generation.saturating_add(1); - frame_number = seek_frame; - prefetch_buffer.clear(); - frame_cache.clear(); - warmup_contiguous_prefetched = 0; - warmup_buffer_changed = false; - first_frame_time = None; - warmup_start = Instant::now(); - let _ = seek_generation_tx.send(seek_generation); - let _ = frame_request_tx.send(frame_number); + let _ = frame_request_tx.send_if_modified(|requested| { + if *requested == frame_number { + false + } else { + *requested = frame_number; + true + } + }); let _ = playback_position_tx.send(frame_number); if has_audio && audio_playhead_tx @@ -745,7 +680,14 @@ impl Playback { prefetch_buffer.clear(); frame_cache.clear(); let _ = seek_generation_tx.send(seek_generation); - let _ = frame_request_tx.send(frame_number); + let _ = frame_request_tx.send_if_modified(|requested| { + if *requested == frame_number { + false + } else { + *requested = frame_number; + true + } + }); let _ = playback_position_tx.send(frame_number); if has_audio && audio_playhead_tx @@ -783,7 +725,14 @@ impl Playback { prefetch_buffer.clear(); frame_cache.clear(); let _ = seek_generation_tx.send(seek_generation); - let _ = frame_request_tx.send(frame_number); + let _ = frame_request_tx.send_if_modified(|requested| { + if *requested == frame_number { + false + } else { + *requested = frame_number; + true + } + }); let _ = playback_position_tx.send(frame_number); if has_audio && audio_playhead_tx @@ -885,7 +834,14 @@ impl Playback { } } } else if prefetch_buffer.is_empty() && total_frames_rendered < 15 { - let _ = frame_request_tx.send(frame_number); + let _ = frame_request_tx.send_if_modified(|requested| { + if *requested == frame_number { + false + } else { + *requested = frame_number; + true + } + }); let wait_result = tokio::time::timeout(frame_fetch_timeout, prefetch_rx.recv()).await; @@ -1078,7 +1034,14 @@ impl Playback { skip_events = skip_events.saturating_add(1); prune_prefetch_buffer_before_frame(&mut prefetch_buffer, frame_number); - let _ = frame_request_tx.send(frame_number); + let _ = frame_request_tx.send_if_modified(|requested| { + if *requested == frame_number { + false + } else { + *requested = frame_number; + true + } + }); let _ = playback_position_tx.send(frame_number); if has_audio && audio_playhead_tx From 796e83911485d0470a8e3f6dd448000a7a4f799b Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:54:53 +0000 Subject: [PATCH 303/333] fix: escape backslashes and pipes in markdown table cells for CI reporting --- scripts/compare-playback-benchmark-runs.js | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index 96b5e9432e..464d6a5159 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -414,6 +414,10 @@ function compareMetrics(baselineRows, candidateRows, options) { }; } +function escapeTableCell(value) { + return String(value).replace(/\\/g, "\\\\").replace(/\|/g, "\\|"); +} + function toMarkdown( comparisons, missingCandidateRows, @@ -446,10 +450,10 @@ function toMarkdown( md += "| Side | File | Error |\n"; md += "|---|---|---|\n"; for (const entry of baselineStats.parseErrors.slice(0, 20)) { - md += `| baseline | ${entry.file} | ${entry.error.replace(/\|/g, "\\|")} |\n`; + md += `| baseline | ${escapeTableCell(entry.file)} | ${escapeTableCell(entry.error)} |\n`; } for (const entry of candidateStats.parseErrors.slice(0, 20)) { - md += `| candidate | ${entry.file} | ${entry.error.replace(/\|/g, "\\|")} |\n`; + md += `| candidate | ${escapeTableCell(entry.file)} | ${escapeTableCell(entry.error)} |\n`; } md += "\n"; } From 96731524ac610e8a5c0e7a0a569b917222a89770 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:57:26 +0000 Subject: [PATCH 304/333] improve: avoid duplicate keyed lookups on prefetch insert --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 9 +++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index cb3065d9f4..7382aea874 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -450,6 +450,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Playback loop now uses change-aware frame-request signaling instead of unconditional watch broadcasts. - Reduces redundant prefetch wakeups and channel churn when requested frame value does not change. +63. **Removed duplicate keyed-buffer lookups during prefetch insert (2026-02-13)** + - Prefetch buffer insertion now uses a single `BTreeMap::entry` match to detect insertion and store new frames. + - Eliminates the prior contains-check plus entry-insert double lookup in prefetch hot path. + --- ## Root Cause Analysis Archive @@ -608,6 +612,7 @@ Decoder Pipeline: 66. Bounded behind-prefetch dedupe tracking window to avoid unbounded growth and preserve lookup efficiency over long sessions. 67. Cached clip-offset lookups for prefetch and direct-decode scheduling to replace repeated linear clip scans on decode hot paths. 68. Deduplicated frame-request watch updates so unchanged frame requests no longer trigger redundant watch notifications. +69. Replaced contains+entry prefetch insertion with single-entry map insertion to remove duplicate keyed-buffer lookups in frame-insert hot path. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -645,6 +650,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: behind-prefetch dedupe tracking now uses a bounded eviction-ordered window to prevent unbounded set growth during long playback. - `crates/editor/src/playback.rs`: prefetch and playback direct-decode paths now use cached clip-offset maps rebuilt on project updates, avoiding repeated clip list linear searches. - `crates/editor/src/playback.rs`: frame-request updates now use `watch::Sender::send_if_modified` across playback/warmup/skip paths to avoid redundant unchanged-frame notifications. +- `crates/editor/src/playback.rs`: prefetch insertion now uses single `BTreeMap::entry` insertion path instead of separate contains-check + insert lookup. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 7f0c8ab47e..d241940d51 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -139,8 +139,13 @@ fn insert_prefetched_frame( } let frame_number = prefetched.frame_number; - let inserted_new = !buffer.contains_key(&frame_number); - buffer.entry(frame_number).or_insert(prefetched); + let inserted_new = match buffer.entry(frame_number) { + std::collections::btree_map::Entry::Vacant(entry) => { + entry.insert(prefetched); + true + } + std::collections::btree_map::Entry::Occupied(_) => false, + }; let trimmed = trim_prefetch_buffer(buffer, current_frame); inserted_new || trimmed } From 74fe6c825dfceabc07850752651994ee66afefcd Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:55:09 +0000 Subject: [PATCH 305/333] fix: rename skipped_no_metrics to skipped_no_usable_metrics for consistency --- scripts/compare-playback-benchmark-runs.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index 464d6a5159..e34c386eb6 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -439,8 +439,8 @@ function toMarkdown( md += `Parse gate: parse_errors=${options.failOnParseErrors ? "fail" : "allow"}\n\n`; md += `Zero-compare gate: compared_rows=${options.failOnZeroCompared ? "fail_if_zero" : "allow"}\n\n`; md += `Skipped-file gate: skipped_files=${options.failOnSkippedFiles ? "fail" : "allow"}\n\n`; - md += `Baseline files: total=${baselineStats.totalFiles}, parsed=${baselineStats.parsedFiles}, usable=${baselineStats.usableFiles}, skipped=${baselineStats.skippedFiles}, skipped_no_reports=${baselineStats.skippedNoReports}, skipped_no_metrics=${baselineStats.skippedNoUsableMetrics}, parse_errors=${baselineStats.parseErrors.length}\n`; - md += `Candidate files: total=${candidateStats.totalFiles}, parsed=${candidateStats.parsedFiles}, usable=${candidateStats.usableFiles}, skipped=${candidateStats.skippedFiles}, skipped_no_reports=${candidateStats.skippedNoReports}, skipped_no_metrics=${candidateStats.skippedNoUsableMetrics}, parse_errors=${candidateStats.parseErrors.length}\n\n`; + md += `Baseline files: total=${baselineStats.totalFiles}, parsed=${baselineStats.parsedFiles}, usable=${baselineStats.usableFiles}, skipped=${baselineStats.skippedFiles}, skipped_no_reports=${baselineStats.skippedNoReports}, skipped_no_usable_metrics=${baselineStats.skippedNoUsableMetrics}, parse_errors=${baselineStats.parseErrors.length}\n`; + md += `Candidate files: total=${candidateStats.totalFiles}, parsed=${candidateStats.parsedFiles}, usable=${candidateStats.usableFiles}, skipped=${candidateStats.skippedFiles}, skipped_no_reports=${candidateStats.skippedNoReports}, skipped_no_usable_metrics=${candidateStats.skippedNoUsableMetrics}, parse_errors=${candidateStats.parseErrors.length}\n\n`; md += `Compared rows: ${comparisons.length}, regressions: ${regressions.length}, missing candidate rows: ${missingCandidateRows.length}, candidate-only rows: ${candidateOnlyRows.length}, insufficient sample rows: ${insufficientSampleRows.length}\n\n`; if ( baselineStats.parseErrors.length > 0 || From 358812af45c39b7f77bbe86b0b5f09244d6f4b4d Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:57:26 +0000 Subject: [PATCH 306/333] improve: avoid duplicate keyed lookups on prefetch insert --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 9 +++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index cb3065d9f4..7382aea874 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -450,6 +450,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Playback loop now uses change-aware frame-request signaling instead of unconditional watch broadcasts. - Reduces redundant prefetch wakeups and channel churn when requested frame value does not change. +63. **Removed duplicate keyed-buffer lookups during prefetch insert (2026-02-13)** + - Prefetch buffer insertion now uses a single `BTreeMap::entry` match to detect insertion and store new frames. + - Eliminates the prior contains-check plus entry-insert double lookup in prefetch hot path. + --- ## Root Cause Analysis Archive @@ -608,6 +612,7 @@ Decoder Pipeline: 66. Bounded behind-prefetch dedupe tracking window to avoid unbounded growth and preserve lookup efficiency over long sessions. 67. Cached clip-offset lookups for prefetch and direct-decode scheduling to replace repeated linear clip scans on decode hot paths. 68. Deduplicated frame-request watch updates so unchanged frame requests no longer trigger redundant watch notifications. +69. Replaced contains+entry prefetch insertion with single-entry map insertion to remove duplicate keyed-buffer lookups in frame-insert hot path. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -645,6 +650,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: behind-prefetch dedupe tracking now uses a bounded eviction-ordered window to prevent unbounded set growth during long playback. - `crates/editor/src/playback.rs`: prefetch and playback direct-decode paths now use cached clip-offset maps rebuilt on project updates, avoiding repeated clip list linear searches. - `crates/editor/src/playback.rs`: frame-request updates now use `watch::Sender::send_if_modified` across playback/warmup/skip paths to avoid redundant unchanged-frame notifications. +- `crates/editor/src/playback.rs`: prefetch insertion now uses single `BTreeMap::entry` insertion path instead of separate contains-check + insert lookup. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 7f0c8ab47e..d241940d51 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -139,8 +139,13 @@ fn insert_prefetched_frame( } let frame_number = prefetched.frame_number; - let inserted_new = !buffer.contains_key(&frame_number); - buffer.entry(frame_number).or_insert(prefetched); + let inserted_new = match buffer.entry(frame_number) { + std::collections::btree_map::Entry::Vacant(entry) => { + entry.insert(prefetched); + true + } + std::collections::btree_map::Entry::Occupied(_) => false, + }; let trimmed = trim_prefetch_buffer(buffer, current_frame); inserted_new || trimmed } From 93311887f82fdf53cb49ea70722a13e72318cbaf Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:55:09 +0000 Subject: [PATCH 307/333] fix: rename skipped_no_metrics to skipped_no_usable_metrics for consistency --- scripts/compare-playback-benchmark-runs.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index 464d6a5159..e34c386eb6 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -439,8 +439,8 @@ function toMarkdown( md += `Parse gate: parse_errors=${options.failOnParseErrors ? "fail" : "allow"}\n\n`; md += `Zero-compare gate: compared_rows=${options.failOnZeroCompared ? "fail_if_zero" : "allow"}\n\n`; md += `Skipped-file gate: skipped_files=${options.failOnSkippedFiles ? "fail" : "allow"}\n\n`; - md += `Baseline files: total=${baselineStats.totalFiles}, parsed=${baselineStats.parsedFiles}, usable=${baselineStats.usableFiles}, skipped=${baselineStats.skippedFiles}, skipped_no_reports=${baselineStats.skippedNoReports}, skipped_no_metrics=${baselineStats.skippedNoUsableMetrics}, parse_errors=${baselineStats.parseErrors.length}\n`; - md += `Candidate files: total=${candidateStats.totalFiles}, parsed=${candidateStats.parsedFiles}, usable=${candidateStats.usableFiles}, skipped=${candidateStats.skippedFiles}, skipped_no_reports=${candidateStats.skippedNoReports}, skipped_no_metrics=${candidateStats.skippedNoUsableMetrics}, parse_errors=${candidateStats.parseErrors.length}\n\n`; + md += `Baseline files: total=${baselineStats.totalFiles}, parsed=${baselineStats.parsedFiles}, usable=${baselineStats.usableFiles}, skipped=${baselineStats.skippedFiles}, skipped_no_reports=${baselineStats.skippedNoReports}, skipped_no_usable_metrics=${baselineStats.skippedNoUsableMetrics}, parse_errors=${baselineStats.parseErrors.length}\n`; + md += `Candidate files: total=${candidateStats.totalFiles}, parsed=${candidateStats.parsedFiles}, usable=${candidateStats.usableFiles}, skipped=${candidateStats.skippedFiles}, skipped_no_reports=${candidateStats.skippedNoReports}, skipped_no_usable_metrics=${candidateStats.skippedNoUsableMetrics}, parse_errors=${candidateStats.parseErrors.length}\n\n`; md += `Compared rows: ${comparisons.length}, regressions: ${regressions.length}, missing candidate rows: ${missingCandidateRows.length}, candidate-only rows: ${candidateOnlyRows.length}, insufficient sample rows: ${insufficientSampleRows.length}\n\n`; if ( baselineStats.parseErrors.length > 0 || From 34b063d0f300bcc0a03d54eadc9d091a31d6c748 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:57:26 +0000 Subject: [PATCH 308/333] improve: avoid duplicate keyed lookups on prefetch insert --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 9 +++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index cb3065d9f4..7382aea874 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -450,6 +450,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Playback loop now uses change-aware frame-request signaling instead of unconditional watch broadcasts. - Reduces redundant prefetch wakeups and channel churn when requested frame value does not change. +63. **Removed duplicate keyed-buffer lookups during prefetch insert (2026-02-13)** + - Prefetch buffer insertion now uses a single `BTreeMap::entry` match to detect insertion and store new frames. + - Eliminates the prior contains-check plus entry-insert double lookup in prefetch hot path. + --- ## Root Cause Analysis Archive @@ -608,6 +612,7 @@ Decoder Pipeline: 66. Bounded behind-prefetch dedupe tracking window to avoid unbounded growth and preserve lookup efficiency over long sessions. 67. Cached clip-offset lookups for prefetch and direct-decode scheduling to replace repeated linear clip scans on decode hot paths. 68. Deduplicated frame-request watch updates so unchanged frame requests no longer trigger redundant watch notifications. +69. Replaced contains+entry prefetch insertion with single-entry map insertion to remove duplicate keyed-buffer lookups in frame-insert hot path. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -645,6 +650,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: behind-prefetch dedupe tracking now uses a bounded eviction-ordered window to prevent unbounded set growth during long playback. - `crates/editor/src/playback.rs`: prefetch and playback direct-decode paths now use cached clip-offset maps rebuilt on project updates, avoiding repeated clip list linear searches. - `crates/editor/src/playback.rs`: frame-request updates now use `watch::Sender::send_if_modified` across playback/warmup/skip paths to avoid redundant unchanged-frame notifications. +- `crates/editor/src/playback.rs`: prefetch insertion now uses single `BTreeMap::entry` insertion path instead of separate contains-check + insert lookup. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 7f0c8ab47e..d241940d51 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -139,8 +139,13 @@ fn insert_prefetched_frame( } let frame_number = prefetched.frame_number; - let inserted_new = !buffer.contains_key(&frame_number); - buffer.entry(frame_number).or_insert(prefetched); + let inserted_new = match buffer.entry(frame_number) { + std::collections::btree_map::Entry::Vacant(entry) => { + entry.insert(prefetched); + true + } + std::collections::btree_map::Entry::Occupied(_) => false, + }; let trimmed = trim_prefetch_buffer(buffer, current_frame); inserted_new || trimmed } From e52e799a747ee68105bb6db96695ba52159193ca Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Fri, 13 Feb 2026 23:55:09 +0000 Subject: [PATCH 309/333] fix: rename skipped_no_metrics to skipped_no_usable_metrics for consistency --- scripts/compare-playback-benchmark-runs.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/compare-playback-benchmark-runs.js b/scripts/compare-playback-benchmark-runs.js index 464d6a5159..e34c386eb6 100644 --- a/scripts/compare-playback-benchmark-runs.js +++ b/scripts/compare-playback-benchmark-runs.js @@ -439,8 +439,8 @@ function toMarkdown( md += `Parse gate: parse_errors=${options.failOnParseErrors ? "fail" : "allow"}\n\n`; md += `Zero-compare gate: compared_rows=${options.failOnZeroCompared ? "fail_if_zero" : "allow"}\n\n`; md += `Skipped-file gate: skipped_files=${options.failOnSkippedFiles ? "fail" : "allow"}\n\n`; - md += `Baseline files: total=${baselineStats.totalFiles}, parsed=${baselineStats.parsedFiles}, usable=${baselineStats.usableFiles}, skipped=${baselineStats.skippedFiles}, skipped_no_reports=${baselineStats.skippedNoReports}, skipped_no_metrics=${baselineStats.skippedNoUsableMetrics}, parse_errors=${baselineStats.parseErrors.length}\n`; - md += `Candidate files: total=${candidateStats.totalFiles}, parsed=${candidateStats.parsedFiles}, usable=${candidateStats.usableFiles}, skipped=${candidateStats.skippedFiles}, skipped_no_reports=${candidateStats.skippedNoReports}, skipped_no_metrics=${candidateStats.skippedNoUsableMetrics}, parse_errors=${candidateStats.parseErrors.length}\n\n`; + md += `Baseline files: total=${baselineStats.totalFiles}, parsed=${baselineStats.parsedFiles}, usable=${baselineStats.usableFiles}, skipped=${baselineStats.skippedFiles}, skipped_no_reports=${baselineStats.skippedNoReports}, skipped_no_usable_metrics=${baselineStats.skippedNoUsableMetrics}, parse_errors=${baselineStats.parseErrors.length}\n`; + md += `Candidate files: total=${candidateStats.totalFiles}, parsed=${candidateStats.parsedFiles}, usable=${candidateStats.usableFiles}, skipped=${candidateStats.skippedFiles}, skipped_no_reports=${candidateStats.skippedNoReports}, skipped_no_usable_metrics=${candidateStats.skippedNoUsableMetrics}, parse_errors=${candidateStats.parseErrors.length}\n\n`; md += `Compared rows: ${comparisons.length}, regressions: ${regressions.length}, missing candidate rows: ${missingCandidateRows.length}, candidate-only rows: ${candidateOnlyRows.length}, insufficient sample rows: ${insufficientSampleRows.length}\n\n`; if ( baselineStats.parseErrors.length > 0 || From ade9d8b86f740aca87d83a69048dbb3bd4801b97 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Sat, 14 Feb 2026 00:01:26 +0000 Subject: [PATCH 310/333] fmt --- scripts/aggregate-playback-benchmarks.js | 11 ++++------- scripts/analyze-playback-matrix-bottlenecks.js | 11 ++++++++--- scripts/build-playback-matrix-report.js | 9 +++++++-- scripts/run-playback-benchmark-matrix.js | 17 ++++++++++++++--- scripts/validate-playback-matrix.js | 5 ++++- 5 files changed, 37 insertions(+), 16 deletions(-) diff --git a/scripts/aggregate-playback-benchmarks.js b/scripts/aggregate-playback-benchmarks.js index 75275c944f..b1e89d0e10 100644 --- a/scripts/aggregate-playback-benchmarks.js +++ b/scripts/aggregate-playback-benchmarks.js @@ -91,9 +91,7 @@ function numberOrNull(value) { } function maxOrNull(values) { - const numeric = values - .map(numberOrNull) - .filter((value) => value !== null); + const numeric = values.map(numberOrNull).filter((value) => value !== null); if (numeric.length === 0) { return null; } @@ -101,9 +99,7 @@ function maxOrNull(values) { } function avgOrNull(values) { - const numeric = values - .map(numberOrNull) - .filter((value) => value !== null); + const numeric = values.map(numberOrNull).filter((value) => value !== null); if (numeric.length === 0) { return null; } @@ -191,7 +187,8 @@ function buildMarkdown(rows) { md += `# Playback Benchmark Aggregate\n\n`; md += `Generated: ${new Date().toISOString()}\n\n`; md += `Total rows: ${sorted.length}, Passed: ${passed}, Failed: ${failed}\n\n`; - md += "| Run Time (UTC) | Platform | GPU | Scenario | Recording | Format | Status | FPS(min) | Scrub p95(ms) | Startup avg(ms) | Mic diff max(ms) | Sys diff max(ms) |\n"; + md += + "| Run Time (UTC) | Platform | GPU | Scenario | Recording | Format | Status | FPS(min) | Scrub p95(ms) | Startup avg(ms) | Mic diff max(ms) | Sys diff max(ms) |\n"; md += "|---|---|---|---|---|---|---|---:|---:|---:|---:|---:|\n"; for (const row of sorted) { md += `| ${row.runTime} | ${row.platform} | ${row.gpu} | ${row.scenario} | ${row.recording} | ${row.format} | ${row.status} | ${formatMetric(row.effectiveFpsMin)} | ${formatMetric(row.scrubP95Max)} | ${formatMetric(row.startupAvg)} | ${formatMetric(row.micDiffMax)} | ${formatMetric(row.sysDiffMax)} |\n`; diff --git a/scripts/analyze-playback-matrix-bottlenecks.js b/scripts/analyze-playback-matrix-bottlenecks.js index ac91d0c8ce..8bf7c4317a 100644 --- a/scripts/analyze-playback-matrix-bottlenecks.js +++ b/scripts/analyze-playback-matrix-bottlenecks.js @@ -150,7 +150,9 @@ function collectIssues(files, options) { const playback = Array.isArray(report.playback_results) ? report.playback_results : []; - const scrub = Array.isArray(report.scrub_results) ? report.scrub_results : []; + const scrub = Array.isArray(report.scrub_results) + ? report.scrub_results + : []; const fpsValues = playback .map((entry) => entry.effective_fps) @@ -190,7 +192,9 @@ function recommendation(issue, options) { recommendations.push("inspect decode/render path and frame wait behavior"); } if (issue.scrubP95 !== null && issue.scrubP95 > options.maxScrubP95Ms) { - recommendations.push("investigate seek dispatch pressure and decoder reposition cost"); + recommendations.push( + "investigate seek dispatch pressure and decoder reposition cost", + ); } if (issue.startupAvg !== null && issue.startupAvg > options.maxStartupMs) { recommendations.push("optimize startup warmup and first-frame path"); @@ -209,7 +213,8 @@ function buildMarkdown(issues, options) { return md; } - md += "| Rank | Platform | GPU | Scenario | Recording | Format | FPS(min) | Startup avg(ms) | Scrub p95(ms) | Score | Recommendation |\n"; + md += + "| Rank | Platform | GPU | Scenario | Recording | Format | FPS(min) | Startup avg(ms) | Scrub p95(ms) | Score | Recommendation |\n"; md += "|---:|---|---|---|---|---|---:|---:|---:|---:|---|\n"; issues.forEach((issue, index) => { md += `| ${index + 1} | ${issue.platform} | ${issue.gpu} | ${issue.scenario} | ${issue.recording} | ${issue.format} | ${formatValue(issue.fpsMin)} | ${formatValue(issue.startupAvg)} | ${formatValue(issue.scrubP95)} | ${formatValue(issue.score, 2)} | ${recommendation(issue, options)} |\n`; diff --git a/scripts/build-playback-matrix-report.js b/scripts/build-playback-matrix-report.js index a07b1876c5..e29c6b4621 100644 --- a/scripts/build-playback-matrix-report.js +++ b/scripts/build-playback-matrix-report.js @@ -125,7 +125,11 @@ function timestampOrEpoch(value) { function upsertLatestCell(cells, candidate) { const key = cellKey(candidate.platform, candidate.gpu, candidate.scenario); const existing = cells.get(key); - if (!existing || timestampOrEpoch(candidate.generatedAt) >= timestampOrEpoch(existing.generatedAt)) { + if ( + !existing || + timestampOrEpoch(candidate.generatedAt) >= + timestampOrEpoch(existing.generatedAt) + ) { cells.set(key, candidate); } } @@ -222,7 +226,8 @@ function buildReport(requiredCells, latestCells, formatCoverage) { markdown += "# Playback Matrix Status Report\n\n"; markdown += `Generated: ${new Date().toISOString()}\n\n`; markdown += `Rows: ${rows.length}, Missing rows: ${missingCount}, Rows with failures: ${failCount}\n\n`; - markdown += "| Platform | GPU | Full | Scrub | MP4 Seen | Fragmented Seen | Full Timestamp | Scrub Timestamp |\n"; + markdown += + "| Platform | GPU | Full | Scrub | MP4 Seen | Fragmented Seen | Full Timestamp | Scrub Timestamp |\n"; markdown += "|---|---|---|---|---|---|---|---|\n"; for (const row of rows) { markdown += `| ${row.platform} | ${row.gpu} | ${row.fullStatus} | ${row.scrubStatus} | ${row.mp4} | ${row.fragmented} | ${row.fullTime} | ${row.scrubTime} |\n`; diff --git a/scripts/run-playback-benchmark-matrix.js b/scripts/run-playback-benchmark-matrix.js index d49fec6024..626c19a7ba 100644 --- a/scripts/run-playback-benchmark-matrix.js +++ b/scripts/run-playback-benchmark-matrix.js @@ -163,9 +163,18 @@ function scenarioArgs(options, scenario) { function validateOptions(options) { if (!options.platform || !options.gpu || !options.outputDir) { - throw new Error("Missing required options: --platform, --gpu, --output-dir"); + throw new Error( + "Missing required options: --platform, --gpu, --output-dir", + ); } - const validScenarios = new Set(["full", "scrub", "decoder", "playback", "audio-sync", "camera-sync"]); + const validScenarios = new Set([ + "full", + "scrub", + "decoder", + "playback", + "audio-sync", + "camera-sync", + ]); for (const scenario of options.scenarios) { if (!validScenarios.has(scenario)) { throw new Error(`Unsupported scenario: ${scenario}`); @@ -188,7 +197,9 @@ function main() { validateOptions(options); - console.log(`Running matrix for platform=${options.platform} gpu=${options.gpu}`); + console.log( + `Running matrix for platform=${options.platform} gpu=${options.gpu}`, + ); for (const scenario of options.scenarios) { run("cargo", scenarioArgs(options, scenario)); } diff --git a/scripts/validate-playback-matrix.js b/scripts/validate-playback-matrix.js index c919369c42..b06f01e006 100644 --- a/scripts/validate-playback-matrix.js +++ b/scripts/validate-playback-matrix.js @@ -218,7 +218,10 @@ function main() { }; if (options.outputJson) { - fs.writeFileSync(options.outputJson, JSON.stringify(validationResult, null, 2)); + fs.writeFileSync( + options.outputJson, + JSON.stringify(validationResult, null, 2), + ); console.log(`Validation JSON: ${options.outputJson}`); } From d797e3f5242786398be129a762ffd1b75eb5618f Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Sat, 14 Feb 2026 00:01:26 +0000 Subject: [PATCH 311/333] fmt --- scripts/aggregate-playback-benchmarks.js | 11 ++++------- scripts/analyze-playback-matrix-bottlenecks.js | 11 ++++++++--- scripts/build-playback-matrix-report.js | 9 +++++++-- scripts/run-playback-benchmark-matrix.js | 17 ++++++++++++++--- scripts/validate-playback-matrix.js | 5 ++++- 5 files changed, 37 insertions(+), 16 deletions(-) diff --git a/scripts/aggregate-playback-benchmarks.js b/scripts/aggregate-playback-benchmarks.js index 75275c944f..b1e89d0e10 100644 --- a/scripts/aggregate-playback-benchmarks.js +++ b/scripts/aggregate-playback-benchmarks.js @@ -91,9 +91,7 @@ function numberOrNull(value) { } function maxOrNull(values) { - const numeric = values - .map(numberOrNull) - .filter((value) => value !== null); + const numeric = values.map(numberOrNull).filter((value) => value !== null); if (numeric.length === 0) { return null; } @@ -101,9 +99,7 @@ function maxOrNull(values) { } function avgOrNull(values) { - const numeric = values - .map(numberOrNull) - .filter((value) => value !== null); + const numeric = values.map(numberOrNull).filter((value) => value !== null); if (numeric.length === 0) { return null; } @@ -191,7 +187,8 @@ function buildMarkdown(rows) { md += `# Playback Benchmark Aggregate\n\n`; md += `Generated: ${new Date().toISOString()}\n\n`; md += `Total rows: ${sorted.length}, Passed: ${passed}, Failed: ${failed}\n\n`; - md += "| Run Time (UTC) | Platform | GPU | Scenario | Recording | Format | Status | FPS(min) | Scrub p95(ms) | Startup avg(ms) | Mic diff max(ms) | Sys diff max(ms) |\n"; + md += + "| Run Time (UTC) | Platform | GPU | Scenario | Recording | Format | Status | FPS(min) | Scrub p95(ms) | Startup avg(ms) | Mic diff max(ms) | Sys diff max(ms) |\n"; md += "|---|---|---|---|---|---|---|---:|---:|---:|---:|---:|\n"; for (const row of sorted) { md += `| ${row.runTime} | ${row.platform} | ${row.gpu} | ${row.scenario} | ${row.recording} | ${row.format} | ${row.status} | ${formatMetric(row.effectiveFpsMin)} | ${formatMetric(row.scrubP95Max)} | ${formatMetric(row.startupAvg)} | ${formatMetric(row.micDiffMax)} | ${formatMetric(row.sysDiffMax)} |\n`; diff --git a/scripts/analyze-playback-matrix-bottlenecks.js b/scripts/analyze-playback-matrix-bottlenecks.js index ac91d0c8ce..8bf7c4317a 100644 --- a/scripts/analyze-playback-matrix-bottlenecks.js +++ b/scripts/analyze-playback-matrix-bottlenecks.js @@ -150,7 +150,9 @@ function collectIssues(files, options) { const playback = Array.isArray(report.playback_results) ? report.playback_results : []; - const scrub = Array.isArray(report.scrub_results) ? report.scrub_results : []; + const scrub = Array.isArray(report.scrub_results) + ? report.scrub_results + : []; const fpsValues = playback .map((entry) => entry.effective_fps) @@ -190,7 +192,9 @@ function recommendation(issue, options) { recommendations.push("inspect decode/render path and frame wait behavior"); } if (issue.scrubP95 !== null && issue.scrubP95 > options.maxScrubP95Ms) { - recommendations.push("investigate seek dispatch pressure and decoder reposition cost"); + recommendations.push( + "investigate seek dispatch pressure and decoder reposition cost", + ); } if (issue.startupAvg !== null && issue.startupAvg > options.maxStartupMs) { recommendations.push("optimize startup warmup and first-frame path"); @@ -209,7 +213,8 @@ function buildMarkdown(issues, options) { return md; } - md += "| Rank | Platform | GPU | Scenario | Recording | Format | FPS(min) | Startup avg(ms) | Scrub p95(ms) | Score | Recommendation |\n"; + md += + "| Rank | Platform | GPU | Scenario | Recording | Format | FPS(min) | Startup avg(ms) | Scrub p95(ms) | Score | Recommendation |\n"; md += "|---:|---|---|---|---|---|---:|---:|---:|---:|---|\n"; issues.forEach((issue, index) => { md += `| ${index + 1} | ${issue.platform} | ${issue.gpu} | ${issue.scenario} | ${issue.recording} | ${issue.format} | ${formatValue(issue.fpsMin)} | ${formatValue(issue.startupAvg)} | ${formatValue(issue.scrubP95)} | ${formatValue(issue.score, 2)} | ${recommendation(issue, options)} |\n`; diff --git a/scripts/build-playback-matrix-report.js b/scripts/build-playback-matrix-report.js index a07b1876c5..e29c6b4621 100644 --- a/scripts/build-playback-matrix-report.js +++ b/scripts/build-playback-matrix-report.js @@ -125,7 +125,11 @@ function timestampOrEpoch(value) { function upsertLatestCell(cells, candidate) { const key = cellKey(candidate.platform, candidate.gpu, candidate.scenario); const existing = cells.get(key); - if (!existing || timestampOrEpoch(candidate.generatedAt) >= timestampOrEpoch(existing.generatedAt)) { + if ( + !existing || + timestampOrEpoch(candidate.generatedAt) >= + timestampOrEpoch(existing.generatedAt) + ) { cells.set(key, candidate); } } @@ -222,7 +226,8 @@ function buildReport(requiredCells, latestCells, formatCoverage) { markdown += "# Playback Matrix Status Report\n\n"; markdown += `Generated: ${new Date().toISOString()}\n\n`; markdown += `Rows: ${rows.length}, Missing rows: ${missingCount}, Rows with failures: ${failCount}\n\n`; - markdown += "| Platform | GPU | Full | Scrub | MP4 Seen | Fragmented Seen | Full Timestamp | Scrub Timestamp |\n"; + markdown += + "| Platform | GPU | Full | Scrub | MP4 Seen | Fragmented Seen | Full Timestamp | Scrub Timestamp |\n"; markdown += "|---|---|---|---|---|---|---|---|\n"; for (const row of rows) { markdown += `| ${row.platform} | ${row.gpu} | ${row.fullStatus} | ${row.scrubStatus} | ${row.mp4} | ${row.fragmented} | ${row.fullTime} | ${row.scrubTime} |\n`; diff --git a/scripts/run-playback-benchmark-matrix.js b/scripts/run-playback-benchmark-matrix.js index d49fec6024..626c19a7ba 100644 --- a/scripts/run-playback-benchmark-matrix.js +++ b/scripts/run-playback-benchmark-matrix.js @@ -163,9 +163,18 @@ function scenarioArgs(options, scenario) { function validateOptions(options) { if (!options.platform || !options.gpu || !options.outputDir) { - throw new Error("Missing required options: --platform, --gpu, --output-dir"); + throw new Error( + "Missing required options: --platform, --gpu, --output-dir", + ); } - const validScenarios = new Set(["full", "scrub", "decoder", "playback", "audio-sync", "camera-sync"]); + const validScenarios = new Set([ + "full", + "scrub", + "decoder", + "playback", + "audio-sync", + "camera-sync", + ]); for (const scenario of options.scenarios) { if (!validScenarios.has(scenario)) { throw new Error(`Unsupported scenario: ${scenario}`); @@ -188,7 +197,9 @@ function main() { validateOptions(options); - console.log(`Running matrix for platform=${options.platform} gpu=${options.gpu}`); + console.log( + `Running matrix for platform=${options.platform} gpu=${options.gpu}`, + ); for (const scenario of options.scenarios) { run("cargo", scenarioArgs(options, scenario)); } diff --git a/scripts/validate-playback-matrix.js b/scripts/validate-playback-matrix.js index c919369c42..b06f01e006 100644 --- a/scripts/validate-playback-matrix.js +++ b/scripts/validate-playback-matrix.js @@ -218,7 +218,10 @@ function main() { }; if (options.outputJson) { - fs.writeFileSync(options.outputJson, JSON.stringify(validationResult, null, 2)); + fs.writeFileSync( + options.outputJson, + JSON.stringify(validationResult, null, 2), + ); console.log(`Validation JSON: ${options.outputJson}`); } From dc7d73e4928cd9b562a17c98bb7a16c53e6d0ff1 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Sat, 14 Feb 2026 00:01:26 +0000 Subject: [PATCH 312/333] fmt --- scripts/aggregate-playback-benchmarks.js | 11 ++++------- scripts/analyze-playback-matrix-bottlenecks.js | 11 ++++++++--- scripts/build-playback-matrix-report.js | 9 +++++++-- scripts/run-playback-benchmark-matrix.js | 17 ++++++++++++++--- scripts/validate-playback-matrix.js | 5 ++++- 5 files changed, 37 insertions(+), 16 deletions(-) diff --git a/scripts/aggregate-playback-benchmarks.js b/scripts/aggregate-playback-benchmarks.js index 75275c944f..b1e89d0e10 100644 --- a/scripts/aggregate-playback-benchmarks.js +++ b/scripts/aggregate-playback-benchmarks.js @@ -91,9 +91,7 @@ function numberOrNull(value) { } function maxOrNull(values) { - const numeric = values - .map(numberOrNull) - .filter((value) => value !== null); + const numeric = values.map(numberOrNull).filter((value) => value !== null); if (numeric.length === 0) { return null; } @@ -101,9 +99,7 @@ function maxOrNull(values) { } function avgOrNull(values) { - const numeric = values - .map(numberOrNull) - .filter((value) => value !== null); + const numeric = values.map(numberOrNull).filter((value) => value !== null); if (numeric.length === 0) { return null; } @@ -191,7 +187,8 @@ function buildMarkdown(rows) { md += `# Playback Benchmark Aggregate\n\n`; md += `Generated: ${new Date().toISOString()}\n\n`; md += `Total rows: ${sorted.length}, Passed: ${passed}, Failed: ${failed}\n\n`; - md += "| Run Time (UTC) | Platform | GPU | Scenario | Recording | Format | Status | FPS(min) | Scrub p95(ms) | Startup avg(ms) | Mic diff max(ms) | Sys diff max(ms) |\n"; + md += + "| Run Time (UTC) | Platform | GPU | Scenario | Recording | Format | Status | FPS(min) | Scrub p95(ms) | Startup avg(ms) | Mic diff max(ms) | Sys diff max(ms) |\n"; md += "|---|---|---|---|---|---|---|---:|---:|---:|---:|---:|\n"; for (const row of sorted) { md += `| ${row.runTime} | ${row.platform} | ${row.gpu} | ${row.scenario} | ${row.recording} | ${row.format} | ${row.status} | ${formatMetric(row.effectiveFpsMin)} | ${formatMetric(row.scrubP95Max)} | ${formatMetric(row.startupAvg)} | ${formatMetric(row.micDiffMax)} | ${formatMetric(row.sysDiffMax)} |\n`; diff --git a/scripts/analyze-playback-matrix-bottlenecks.js b/scripts/analyze-playback-matrix-bottlenecks.js index ac91d0c8ce..8bf7c4317a 100644 --- a/scripts/analyze-playback-matrix-bottlenecks.js +++ b/scripts/analyze-playback-matrix-bottlenecks.js @@ -150,7 +150,9 @@ function collectIssues(files, options) { const playback = Array.isArray(report.playback_results) ? report.playback_results : []; - const scrub = Array.isArray(report.scrub_results) ? report.scrub_results : []; + const scrub = Array.isArray(report.scrub_results) + ? report.scrub_results + : []; const fpsValues = playback .map((entry) => entry.effective_fps) @@ -190,7 +192,9 @@ function recommendation(issue, options) { recommendations.push("inspect decode/render path and frame wait behavior"); } if (issue.scrubP95 !== null && issue.scrubP95 > options.maxScrubP95Ms) { - recommendations.push("investigate seek dispatch pressure and decoder reposition cost"); + recommendations.push( + "investigate seek dispatch pressure and decoder reposition cost", + ); } if (issue.startupAvg !== null && issue.startupAvg > options.maxStartupMs) { recommendations.push("optimize startup warmup and first-frame path"); @@ -209,7 +213,8 @@ function buildMarkdown(issues, options) { return md; } - md += "| Rank | Platform | GPU | Scenario | Recording | Format | FPS(min) | Startup avg(ms) | Scrub p95(ms) | Score | Recommendation |\n"; + md += + "| Rank | Platform | GPU | Scenario | Recording | Format | FPS(min) | Startup avg(ms) | Scrub p95(ms) | Score | Recommendation |\n"; md += "|---:|---|---|---|---|---|---:|---:|---:|---:|---|\n"; issues.forEach((issue, index) => { md += `| ${index + 1} | ${issue.platform} | ${issue.gpu} | ${issue.scenario} | ${issue.recording} | ${issue.format} | ${formatValue(issue.fpsMin)} | ${formatValue(issue.startupAvg)} | ${formatValue(issue.scrubP95)} | ${formatValue(issue.score, 2)} | ${recommendation(issue, options)} |\n`; diff --git a/scripts/build-playback-matrix-report.js b/scripts/build-playback-matrix-report.js index a07b1876c5..e29c6b4621 100644 --- a/scripts/build-playback-matrix-report.js +++ b/scripts/build-playback-matrix-report.js @@ -125,7 +125,11 @@ function timestampOrEpoch(value) { function upsertLatestCell(cells, candidate) { const key = cellKey(candidate.platform, candidate.gpu, candidate.scenario); const existing = cells.get(key); - if (!existing || timestampOrEpoch(candidate.generatedAt) >= timestampOrEpoch(existing.generatedAt)) { + if ( + !existing || + timestampOrEpoch(candidate.generatedAt) >= + timestampOrEpoch(existing.generatedAt) + ) { cells.set(key, candidate); } } @@ -222,7 +226,8 @@ function buildReport(requiredCells, latestCells, formatCoverage) { markdown += "# Playback Matrix Status Report\n\n"; markdown += `Generated: ${new Date().toISOString()}\n\n`; markdown += `Rows: ${rows.length}, Missing rows: ${missingCount}, Rows with failures: ${failCount}\n\n`; - markdown += "| Platform | GPU | Full | Scrub | MP4 Seen | Fragmented Seen | Full Timestamp | Scrub Timestamp |\n"; + markdown += + "| Platform | GPU | Full | Scrub | MP4 Seen | Fragmented Seen | Full Timestamp | Scrub Timestamp |\n"; markdown += "|---|---|---|---|---|---|---|---|\n"; for (const row of rows) { markdown += `| ${row.platform} | ${row.gpu} | ${row.fullStatus} | ${row.scrubStatus} | ${row.mp4} | ${row.fragmented} | ${row.fullTime} | ${row.scrubTime} |\n`; diff --git a/scripts/run-playback-benchmark-matrix.js b/scripts/run-playback-benchmark-matrix.js index d49fec6024..626c19a7ba 100644 --- a/scripts/run-playback-benchmark-matrix.js +++ b/scripts/run-playback-benchmark-matrix.js @@ -163,9 +163,18 @@ function scenarioArgs(options, scenario) { function validateOptions(options) { if (!options.platform || !options.gpu || !options.outputDir) { - throw new Error("Missing required options: --platform, --gpu, --output-dir"); + throw new Error( + "Missing required options: --platform, --gpu, --output-dir", + ); } - const validScenarios = new Set(["full", "scrub", "decoder", "playback", "audio-sync", "camera-sync"]); + const validScenarios = new Set([ + "full", + "scrub", + "decoder", + "playback", + "audio-sync", + "camera-sync", + ]); for (const scenario of options.scenarios) { if (!validScenarios.has(scenario)) { throw new Error(`Unsupported scenario: ${scenario}`); @@ -188,7 +197,9 @@ function main() { validateOptions(options); - console.log(`Running matrix for platform=${options.platform} gpu=${options.gpu}`); + console.log( + `Running matrix for platform=${options.platform} gpu=${options.gpu}`, + ); for (const scenario of options.scenarios) { run("cargo", scenarioArgs(options, scenario)); } diff --git a/scripts/validate-playback-matrix.js b/scripts/validate-playback-matrix.js index c919369c42..b06f01e006 100644 --- a/scripts/validate-playback-matrix.js +++ b/scripts/validate-playback-matrix.js @@ -218,7 +218,10 @@ function main() { }; if (options.outputJson) { - fs.writeFileSync(options.outputJson, JSON.stringify(validationResult, null, 2)); + fs.writeFileSync( + options.outputJson, + JSON.stringify(validationResult, null, 2), + ); console.log(`Validation JSON: ${options.outputJson}`); } From e808b1644b346dd74ada767d34b82400cc6f9d12 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Sat, 14 Feb 2026 00:02:53 +0000 Subject: [PATCH 313/333] improve: centralize change-aware frame request signaling --- crates/editor/PLAYBACK-FINDINGS.md | 6 +++ crates/editor/src/playback.rs | 62 +++++++++--------------------- 2 files changed, 25 insertions(+), 43 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 7382aea874..9f45c85bf6 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -454,6 +454,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Prefetch buffer insertion now uses a single `BTreeMap::entry` match to detect insertion and store new frames. - Eliminates the prior contains-check plus entry-insert double lookup in prefetch hot path. +64. **Centralized change-aware frame request signaling (2026-02-13)** + - Playback now uses a shared helper for change-aware frame-request watch updates across warmup/seek/skip paths. + - Reduces duplicated watch-update closure logic and keeps no-op request dedupe behavior consistent in all frame-request call sites. + --- ## Root Cause Analysis Archive @@ -613,6 +617,7 @@ Decoder Pipeline: 67. Cached clip-offset lookups for prefetch and direct-decode scheduling to replace repeated linear clip scans on decode hot paths. 68. Deduplicated frame-request watch updates so unchanged frame requests no longer trigger redundant watch notifications. 69. Replaced contains+entry prefetch insertion with single-entry map insertion to remove duplicate keyed-buffer lookups in frame-insert hot path. +70. Centralized change-aware frame-request watch updates via shared helper for consistent no-op dedupe behavior across warmup/seek/skip paths. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -651,6 +656,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: prefetch and playback direct-decode paths now use cached clip-offset maps rebuilt on project updates, avoiding repeated clip list linear searches. - `crates/editor/src/playback.rs`: frame-request updates now use `watch::Sender::send_if_modified` across playback/warmup/skip paths to avoid redundant unchanged-frame notifications. - `crates/editor/src/playback.rs`: prefetch insertion now uses single `BTreeMap::entry` insertion path instead of separate contains-check + insert lookup. +- `crates/editor/src/playback.rs`: frame-request watch updates now route through shared helper to keep no-op dedupe behavior and call-site logic consistent across warmup/seek/skip paths. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 5f61d26ea7..acdd144ee2 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -190,6 +190,17 @@ fn build_clip_offsets_lookup(project: &ProjectConfiguration) -> HashMap, value: u32) { + let _ = tx.send_if_modified(|current| { + if *current == value { + false + } else { + *current = value; + true + } + }); +} + impl Playback { pub async fn start( mut self, @@ -642,15 +653,8 @@ impl Playback { first_frame_time = None; warmup_start = Instant::now(); let _ = seek_generation_tx.send(seek_generation); - let _ = frame_request_tx.send_if_modified(|requested| { - if *requested == frame_number { - false - } else { - *requested = frame_number; - true - } - }); - let _ = playback_position_tx.send(frame_number); + send_watch_u32_if_changed(&frame_request_tx, frame_number); + send_watch_u32_if_changed(&playback_position_tx, frame_number); if has_audio && audio_playhead_tx .send(frame_number as f64 / fps_f64) @@ -685,15 +689,8 @@ impl Playback { prefetch_buffer.clear(); frame_cache.clear(); let _ = seek_generation_tx.send(seek_generation); - let _ = frame_request_tx.send_if_modified(|requested| { - if *requested == frame_number { - false - } else { - *requested = frame_number; - true - } - }); - let _ = playback_position_tx.send(frame_number); + send_watch_u32_if_changed(&frame_request_tx, frame_number); + send_watch_u32_if_changed(&playback_position_tx, frame_number); if has_audio && audio_playhead_tx .send(frame_number as f64 / fps_f64) @@ -730,15 +727,8 @@ impl Playback { prefetch_buffer.clear(); frame_cache.clear(); let _ = seek_generation_tx.send(seek_generation); - let _ = frame_request_tx.send_if_modified(|requested| { - if *requested == frame_number { - false - } else { - *requested = frame_number; - true - } - }); - let _ = playback_position_tx.send(frame_number); + send_watch_u32_if_changed(&frame_request_tx, frame_number); + send_watch_u32_if_changed(&playback_position_tx, frame_number); if has_audio && audio_playhead_tx .send(frame_number as f64 / fps_f64) @@ -839,14 +829,7 @@ impl Playback { } } } else if prefetch_buffer.is_empty() && total_frames_rendered < 15 { - let _ = frame_request_tx.send_if_modified(|requested| { - if *requested == frame_number { - false - } else { - *requested = frame_number; - true - } - }); + send_watch_u32_if_changed(&frame_request_tx, frame_number); let wait_result = tokio::time::timeout(frame_fetch_timeout, prefetch_rx.recv()).await; @@ -1039,14 +1022,7 @@ impl Playback { skip_events = skip_events.saturating_add(1); prune_prefetch_buffer_before_frame(&mut prefetch_buffer, frame_number); - let _ = frame_request_tx.send_if_modified(|requested| { - if *requested == frame_number { - false - } else { - *requested = frame_number; - true - } - }); + send_watch_u32_if_changed(&frame_request_tx, frame_number); let _ = playback_position_tx.send(frame_number); if has_audio && audio_playhead_tx From e34446bf4f886b2a2832ceaf256a05a55834857f Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Sat, 14 Feb 2026 00:02:53 +0000 Subject: [PATCH 314/333] improve: centralize change-aware frame request signaling --- crates/editor/PLAYBACK-FINDINGS.md | 6 +++ crates/editor/src/playback.rs | 62 +++++++++--------------------- 2 files changed, 25 insertions(+), 43 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 7382aea874..9f45c85bf6 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -454,6 +454,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Prefetch buffer insertion now uses a single `BTreeMap::entry` match to detect insertion and store new frames. - Eliminates the prior contains-check plus entry-insert double lookup in prefetch hot path. +64. **Centralized change-aware frame request signaling (2026-02-13)** + - Playback now uses a shared helper for change-aware frame-request watch updates across warmup/seek/skip paths. + - Reduces duplicated watch-update closure logic and keeps no-op request dedupe behavior consistent in all frame-request call sites. + --- ## Root Cause Analysis Archive @@ -613,6 +617,7 @@ Decoder Pipeline: 67. Cached clip-offset lookups for prefetch and direct-decode scheduling to replace repeated linear clip scans on decode hot paths. 68. Deduplicated frame-request watch updates so unchanged frame requests no longer trigger redundant watch notifications. 69. Replaced contains+entry prefetch insertion with single-entry map insertion to remove duplicate keyed-buffer lookups in frame-insert hot path. +70. Centralized change-aware frame-request watch updates via shared helper for consistent no-op dedupe behavior across warmup/seek/skip paths. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -651,6 +656,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: prefetch and playback direct-decode paths now use cached clip-offset maps rebuilt on project updates, avoiding repeated clip list linear searches. - `crates/editor/src/playback.rs`: frame-request updates now use `watch::Sender::send_if_modified` across playback/warmup/skip paths to avoid redundant unchanged-frame notifications. - `crates/editor/src/playback.rs`: prefetch insertion now uses single `BTreeMap::entry` insertion path instead of separate contains-check + insert lookup. +- `crates/editor/src/playback.rs`: frame-request watch updates now route through shared helper to keep no-op dedupe behavior and call-site logic consistent across warmup/seek/skip paths. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 5f61d26ea7..acdd144ee2 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -190,6 +190,17 @@ fn build_clip_offsets_lookup(project: &ProjectConfiguration) -> HashMap, value: u32) { + let _ = tx.send_if_modified(|current| { + if *current == value { + false + } else { + *current = value; + true + } + }); +} + impl Playback { pub async fn start( mut self, @@ -642,15 +653,8 @@ impl Playback { first_frame_time = None; warmup_start = Instant::now(); let _ = seek_generation_tx.send(seek_generation); - let _ = frame_request_tx.send_if_modified(|requested| { - if *requested == frame_number { - false - } else { - *requested = frame_number; - true - } - }); - let _ = playback_position_tx.send(frame_number); + send_watch_u32_if_changed(&frame_request_tx, frame_number); + send_watch_u32_if_changed(&playback_position_tx, frame_number); if has_audio && audio_playhead_tx .send(frame_number as f64 / fps_f64) @@ -685,15 +689,8 @@ impl Playback { prefetch_buffer.clear(); frame_cache.clear(); let _ = seek_generation_tx.send(seek_generation); - let _ = frame_request_tx.send_if_modified(|requested| { - if *requested == frame_number { - false - } else { - *requested = frame_number; - true - } - }); - let _ = playback_position_tx.send(frame_number); + send_watch_u32_if_changed(&frame_request_tx, frame_number); + send_watch_u32_if_changed(&playback_position_tx, frame_number); if has_audio && audio_playhead_tx .send(frame_number as f64 / fps_f64) @@ -730,15 +727,8 @@ impl Playback { prefetch_buffer.clear(); frame_cache.clear(); let _ = seek_generation_tx.send(seek_generation); - let _ = frame_request_tx.send_if_modified(|requested| { - if *requested == frame_number { - false - } else { - *requested = frame_number; - true - } - }); - let _ = playback_position_tx.send(frame_number); + send_watch_u32_if_changed(&frame_request_tx, frame_number); + send_watch_u32_if_changed(&playback_position_tx, frame_number); if has_audio && audio_playhead_tx .send(frame_number as f64 / fps_f64) @@ -839,14 +829,7 @@ impl Playback { } } } else if prefetch_buffer.is_empty() && total_frames_rendered < 15 { - let _ = frame_request_tx.send_if_modified(|requested| { - if *requested == frame_number { - false - } else { - *requested = frame_number; - true - } - }); + send_watch_u32_if_changed(&frame_request_tx, frame_number); let wait_result = tokio::time::timeout(frame_fetch_timeout, prefetch_rx.recv()).await; @@ -1039,14 +1022,7 @@ impl Playback { skip_events = skip_events.saturating_add(1); prune_prefetch_buffer_before_frame(&mut prefetch_buffer, frame_number); - let _ = frame_request_tx.send_if_modified(|requested| { - if *requested == frame_number { - false - } else { - *requested = frame_number; - true - } - }); + send_watch_u32_if_changed(&frame_request_tx, frame_number); let _ = playback_position_tx.send(frame_number); if has_audio && audio_playhead_tx From 27dd7999779094316ddf0d055e5af8bca549c444 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Sat, 14 Feb 2026 00:02:53 +0000 Subject: [PATCH 315/333] improve: centralize change-aware frame request signaling --- crates/editor/PLAYBACK-FINDINGS.md | 6 +++ crates/editor/src/playback.rs | 62 +++++++++--------------------- 2 files changed, 25 insertions(+), 43 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 7382aea874..9f45c85bf6 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -454,6 +454,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Prefetch buffer insertion now uses a single `BTreeMap::entry` match to detect insertion and store new frames. - Eliminates the prior contains-check plus entry-insert double lookup in prefetch hot path. +64. **Centralized change-aware frame request signaling (2026-02-13)** + - Playback now uses a shared helper for change-aware frame-request watch updates across warmup/seek/skip paths. + - Reduces duplicated watch-update closure logic and keeps no-op request dedupe behavior consistent in all frame-request call sites. + --- ## Root Cause Analysis Archive @@ -613,6 +617,7 @@ Decoder Pipeline: 67. Cached clip-offset lookups for prefetch and direct-decode scheduling to replace repeated linear clip scans on decode hot paths. 68. Deduplicated frame-request watch updates so unchanged frame requests no longer trigger redundant watch notifications. 69. Replaced contains+entry prefetch insertion with single-entry map insertion to remove duplicate keyed-buffer lookups in frame-insert hot path. +70. Centralized change-aware frame-request watch updates via shared helper for consistent no-op dedupe behavior across warmup/seek/skip paths. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -651,6 +656,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: prefetch and playback direct-decode paths now use cached clip-offset maps rebuilt on project updates, avoiding repeated clip list linear searches. - `crates/editor/src/playback.rs`: frame-request updates now use `watch::Sender::send_if_modified` across playback/warmup/skip paths to avoid redundant unchanged-frame notifications. - `crates/editor/src/playback.rs`: prefetch insertion now uses single `BTreeMap::entry` insertion path instead of separate contains-check + insert lookup. +- `crates/editor/src/playback.rs`: frame-request watch updates now route through shared helper to keep no-op dedupe behavior and call-site logic consistent across warmup/seek/skip paths. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 5f61d26ea7..acdd144ee2 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -190,6 +190,17 @@ fn build_clip_offsets_lookup(project: &ProjectConfiguration) -> HashMap, value: u32) { + let _ = tx.send_if_modified(|current| { + if *current == value { + false + } else { + *current = value; + true + } + }); +} + impl Playback { pub async fn start( mut self, @@ -642,15 +653,8 @@ impl Playback { first_frame_time = None; warmup_start = Instant::now(); let _ = seek_generation_tx.send(seek_generation); - let _ = frame_request_tx.send_if_modified(|requested| { - if *requested == frame_number { - false - } else { - *requested = frame_number; - true - } - }); - let _ = playback_position_tx.send(frame_number); + send_watch_u32_if_changed(&frame_request_tx, frame_number); + send_watch_u32_if_changed(&playback_position_tx, frame_number); if has_audio && audio_playhead_tx .send(frame_number as f64 / fps_f64) @@ -685,15 +689,8 @@ impl Playback { prefetch_buffer.clear(); frame_cache.clear(); let _ = seek_generation_tx.send(seek_generation); - let _ = frame_request_tx.send_if_modified(|requested| { - if *requested == frame_number { - false - } else { - *requested = frame_number; - true - } - }); - let _ = playback_position_tx.send(frame_number); + send_watch_u32_if_changed(&frame_request_tx, frame_number); + send_watch_u32_if_changed(&playback_position_tx, frame_number); if has_audio && audio_playhead_tx .send(frame_number as f64 / fps_f64) @@ -730,15 +727,8 @@ impl Playback { prefetch_buffer.clear(); frame_cache.clear(); let _ = seek_generation_tx.send(seek_generation); - let _ = frame_request_tx.send_if_modified(|requested| { - if *requested == frame_number { - false - } else { - *requested = frame_number; - true - } - }); - let _ = playback_position_tx.send(frame_number); + send_watch_u32_if_changed(&frame_request_tx, frame_number); + send_watch_u32_if_changed(&playback_position_tx, frame_number); if has_audio && audio_playhead_tx .send(frame_number as f64 / fps_f64) @@ -839,14 +829,7 @@ impl Playback { } } } else if prefetch_buffer.is_empty() && total_frames_rendered < 15 { - let _ = frame_request_tx.send_if_modified(|requested| { - if *requested == frame_number { - false - } else { - *requested = frame_number; - true - } - }); + send_watch_u32_if_changed(&frame_request_tx, frame_number); let wait_result = tokio::time::timeout(frame_fetch_timeout, prefetch_rx.recv()).await; @@ -1039,14 +1022,7 @@ impl Playback { skip_events = skip_events.saturating_add(1); prune_prefetch_buffer_before_frame(&mut prefetch_buffer, frame_number); - let _ = frame_request_tx.send_if_modified(|requested| { - if *requested == frame_number { - false - } else { - *requested = frame_number; - true - } - }); + send_watch_u32_if_changed(&frame_request_tx, frame_number); let _ = playback_position_tx.send(frame_number); if has_audio && audio_playhead_tx From d53082bc486aed15147359b27f15f7b6b80d2e03 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Sat, 14 Feb 2026 00:03:59 +0000 Subject: [PATCH 316/333] improve: short-circuit frame waits when seeks are pending --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 11 +++++++++++ 2 files changed, 17 insertions(+) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 9f45c85bf6..6196689569 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -458,6 +458,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Playback now uses a shared helper for change-aware frame-request watch updates across warmup/seek/skip paths. - Reduces duplicated watch-update closure logic and keeps no-op request dedupe behavior consistent in all frame-request call sites. +65. **Short-circuited frame waits when seek updates are pending (2026-02-13)** + - In-flight frame wait polling now exits early when a seek change is observed, instead of waiting through full wait budgets. + - Startup prefetch wait path now also bails out immediately when seek state changes during wait. + --- ## Root Cause Analysis Archive @@ -618,6 +622,7 @@ Decoder Pipeline: 68. Deduplicated frame-request watch updates so unchanged frame requests no longer trigger redundant watch notifications. 69. Replaced contains+entry prefetch insertion with single-entry map insertion to remove duplicate keyed-buffer lookups in frame-insert hot path. 70. Centralized change-aware frame-request watch updates via shared helper for consistent no-op dedupe behavior across warmup/seek/skip paths. +71. Added seek-aware short-circuiting in in-flight/startup frame wait paths to avoid waiting full frame-fetch budgets when seek updates arrive. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -657,6 +662,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: frame-request updates now use `watch::Sender::send_if_modified` across playback/warmup/skip paths to avoid redundant unchanged-frame notifications. - `crates/editor/src/playback.rs`: prefetch insertion now uses single `BTreeMap::entry` insertion path instead of separate contains-check + insert lookup. - `crates/editor/src/playback.rs`: frame-request watch updates now route through shared helper to keep no-op dedupe behavior and call-site logic consistent across warmup/seek/skip paths. +- `crates/editor/src/playback.rs`: in-flight and startup frame wait paths now short-circuit when seek updates are pending to improve scrub responsiveness under wait pressure. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index acdd144ee2..ee44267f9c 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -796,6 +796,9 @@ impl Playback { } } _ = tokio::time::sleep(in_flight_poll_interval) => { + if seek_rx.has_changed().unwrap_or(false) { + break; + } let still_in_flight = playback_prefetch_in_flight .read() .map(|guard| guard.contains(&in_flight_key)) @@ -811,6 +814,10 @@ impl Playback { } } + if seek_rx.has_changed().unwrap_or(false) { + continue; + } + if let Some(prefetched) = found_frame { Some(( Arc::new(prefetched.segment_frames), @@ -834,6 +841,10 @@ impl Playback { let wait_result = tokio::time::timeout(frame_fetch_timeout, prefetch_rx.recv()).await; + if seek_rx.has_changed().unwrap_or(false) { + continue; + } + if let Ok(Some(prefetched)) = wait_result { if prefetched.generation != seek_generation { frame_number = frame_number.saturating_add(1); From ee3ee2d3ad72c2d35a8aa32bc0d616b1a873e40b Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Sat, 14 Feb 2026 00:03:59 +0000 Subject: [PATCH 317/333] improve: short-circuit frame waits when seeks are pending --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 11 +++++++++++ 2 files changed, 17 insertions(+) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 9f45c85bf6..6196689569 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -458,6 +458,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Playback now uses a shared helper for change-aware frame-request watch updates across warmup/seek/skip paths. - Reduces duplicated watch-update closure logic and keeps no-op request dedupe behavior consistent in all frame-request call sites. +65. **Short-circuited frame waits when seek updates are pending (2026-02-13)** + - In-flight frame wait polling now exits early when a seek change is observed, instead of waiting through full wait budgets. + - Startup prefetch wait path now also bails out immediately when seek state changes during wait. + --- ## Root Cause Analysis Archive @@ -618,6 +622,7 @@ Decoder Pipeline: 68. Deduplicated frame-request watch updates so unchanged frame requests no longer trigger redundant watch notifications. 69. Replaced contains+entry prefetch insertion with single-entry map insertion to remove duplicate keyed-buffer lookups in frame-insert hot path. 70. Centralized change-aware frame-request watch updates via shared helper for consistent no-op dedupe behavior across warmup/seek/skip paths. +71. Added seek-aware short-circuiting in in-flight/startup frame wait paths to avoid waiting full frame-fetch budgets when seek updates arrive. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -657,6 +662,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: frame-request updates now use `watch::Sender::send_if_modified` across playback/warmup/skip paths to avoid redundant unchanged-frame notifications. - `crates/editor/src/playback.rs`: prefetch insertion now uses single `BTreeMap::entry` insertion path instead of separate contains-check + insert lookup. - `crates/editor/src/playback.rs`: frame-request watch updates now route through shared helper to keep no-op dedupe behavior and call-site logic consistent across warmup/seek/skip paths. +- `crates/editor/src/playback.rs`: in-flight and startup frame wait paths now short-circuit when seek updates are pending to improve scrub responsiveness under wait pressure. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index acdd144ee2..ee44267f9c 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -796,6 +796,9 @@ impl Playback { } } _ = tokio::time::sleep(in_flight_poll_interval) => { + if seek_rx.has_changed().unwrap_or(false) { + break; + } let still_in_flight = playback_prefetch_in_flight .read() .map(|guard| guard.contains(&in_flight_key)) @@ -811,6 +814,10 @@ impl Playback { } } + if seek_rx.has_changed().unwrap_or(false) { + continue; + } + if let Some(prefetched) = found_frame { Some(( Arc::new(prefetched.segment_frames), @@ -834,6 +841,10 @@ impl Playback { let wait_result = tokio::time::timeout(frame_fetch_timeout, prefetch_rx.recv()).await; + if seek_rx.has_changed().unwrap_or(false) { + continue; + } + if let Ok(Some(prefetched)) = wait_result { if prefetched.generation != seek_generation { frame_number = frame_number.saturating_add(1); From 5e8ffd617bd058cad599ce559a859ec5406bfa5d Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Sat, 14 Feb 2026 00:03:59 +0000 Subject: [PATCH 318/333] improve: short-circuit frame waits when seeks are pending --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 11 +++++++++++ 2 files changed, 17 insertions(+) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 9f45c85bf6..6196689569 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -458,6 +458,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Playback now uses a shared helper for change-aware frame-request watch updates across warmup/seek/skip paths. - Reduces duplicated watch-update closure logic and keeps no-op request dedupe behavior consistent in all frame-request call sites. +65. **Short-circuited frame waits when seek updates are pending (2026-02-13)** + - In-flight frame wait polling now exits early when a seek change is observed, instead of waiting through full wait budgets. + - Startup prefetch wait path now also bails out immediately when seek state changes during wait. + --- ## Root Cause Analysis Archive @@ -618,6 +622,7 @@ Decoder Pipeline: 68. Deduplicated frame-request watch updates so unchanged frame requests no longer trigger redundant watch notifications. 69. Replaced contains+entry prefetch insertion with single-entry map insertion to remove duplicate keyed-buffer lookups in frame-insert hot path. 70. Centralized change-aware frame-request watch updates via shared helper for consistent no-op dedupe behavior across warmup/seek/skip paths. +71. Added seek-aware short-circuiting in in-flight/startup frame wait paths to avoid waiting full frame-fetch budgets when seek updates arrive. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -657,6 +662,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: frame-request updates now use `watch::Sender::send_if_modified` across playback/warmup/skip paths to avoid redundant unchanged-frame notifications. - `crates/editor/src/playback.rs`: prefetch insertion now uses single `BTreeMap::entry` insertion path instead of separate contains-check + insert lookup. - `crates/editor/src/playback.rs`: frame-request watch updates now route through shared helper to keep no-op dedupe behavior and call-site logic consistent across warmup/seek/skip paths. +- `crates/editor/src/playback.rs`: in-flight and startup frame wait paths now short-circuit when seek updates are pending to improve scrub responsiveness under wait pressure. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index acdd144ee2..ee44267f9c 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -796,6 +796,9 @@ impl Playback { } } _ = tokio::time::sleep(in_flight_poll_interval) => { + if seek_rx.has_changed().unwrap_or(false) { + break; + } let still_in_flight = playback_prefetch_in_flight .read() .map(|guard| guard.contains(&in_flight_key)) @@ -811,6 +814,10 @@ impl Playback { } } + if seek_rx.has_changed().unwrap_or(false) { + continue; + } + if let Some(prefetched) = found_frame { Some(( Arc::new(prefetched.segment_frames), @@ -834,6 +841,10 @@ impl Playback { let wait_result = tokio::time::timeout(frame_fetch_timeout, prefetch_rx.recv()).await; + if seek_rx.has_changed().unwrap_or(false) { + continue; + } + if let Ok(Some(prefetched)) = wait_result { if prefetched.generation != seek_generation { frame_number = frame_number.saturating_add(1); From 51044cf37b3d95d4b3b24887ddf0d7bf2b9c5d36 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Sat, 14 Feb 2026 00:07:32 +0000 Subject: [PATCH 319/333] improve: skip stale waits and decode fallback on pending seeks --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 11 +++++++++++ 2 files changed, 17 insertions(+) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 6196689569..34c85a4c20 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -462,6 +462,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - In-flight frame wait polling now exits early when a seek change is observed, instead of waiting through full wait budgets. - Startup prefetch wait path now also bails out immediately when seek state changes during wait. +66. **Added pre-wait seek guards before startup and direct decode waits (2026-02-13)** + - Startup prefetch wait path now checks for pending seeks before entering timeout waits and before skip fallback on timeout. + - Direct decode fallback path now checks for pending seek updates before scheduling synchronous decode work. + --- ## Root Cause Analysis Archive @@ -623,6 +627,7 @@ Decoder Pipeline: 69. Replaced contains+entry prefetch insertion with single-entry map insertion to remove duplicate keyed-buffer lookups in frame-insert hot path. 70. Centralized change-aware frame-request watch updates via shared helper for consistent no-op dedupe behavior across warmup/seek/skip paths. 71. Added seek-aware short-circuiting in in-flight/startup frame wait paths to avoid waiting full frame-fetch budgets when seek updates arrive. +72. Added pre-wait seek guards in startup/direct-decode fallback paths so pending seeks skip timeout waits and avoid stale synchronous decode work. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -663,6 +668,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: prefetch insertion now uses single `BTreeMap::entry` insertion path instead of separate contains-check + insert lookup. - `crates/editor/src/playback.rs`: frame-request watch updates now route through shared helper to keep no-op dedupe behavior and call-site logic consistent across warmup/seek/skip paths. - `crates/editor/src/playback.rs`: in-flight and startup frame wait paths now short-circuit when seek updates are pending to improve scrub responsiveness under wait pressure. +- `crates/editor/src/playback.rs`: startup prefetch wait and direct-decode fallback paths now pre-check seek updates before waiting/synchronous decode scheduling to skip stale work under active seeks. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index ee44267f9c..b1b4466f1c 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -836,6 +836,10 @@ impl Playback { } } } else if prefetch_buffer.is_empty() && total_frames_rendered < 15 { + if seek_rx.has_changed().unwrap_or(false) { + continue; + } + send_watch_u32_if_changed(&frame_request_tx, frame_number); let wait_result = @@ -867,11 +871,18 @@ impl Playback { continue; } } else { + if seek_rx.has_changed().unwrap_or(false) { + continue; + } frame_number = frame_number.saturating_add(1); total_frames_skipped += 1; continue; } } else { + if seek_rx.has_changed().unwrap_or(false) { + continue; + } + let Some((segment_time, segment)) = cached_project.get_segment_time(playback_time) else { From 7f796ff304abbb341adde373cf91423e5ad879fa Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Sat, 14 Feb 2026 00:07:32 +0000 Subject: [PATCH 320/333] improve: skip stale waits and decode fallback on pending seeks --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 11 +++++++++++ 2 files changed, 17 insertions(+) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 6196689569..34c85a4c20 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -462,6 +462,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - In-flight frame wait polling now exits early when a seek change is observed, instead of waiting through full wait budgets. - Startup prefetch wait path now also bails out immediately when seek state changes during wait. +66. **Added pre-wait seek guards before startup and direct decode waits (2026-02-13)** + - Startup prefetch wait path now checks for pending seeks before entering timeout waits and before skip fallback on timeout. + - Direct decode fallback path now checks for pending seek updates before scheduling synchronous decode work. + --- ## Root Cause Analysis Archive @@ -623,6 +627,7 @@ Decoder Pipeline: 69. Replaced contains+entry prefetch insertion with single-entry map insertion to remove duplicate keyed-buffer lookups in frame-insert hot path. 70. Centralized change-aware frame-request watch updates via shared helper for consistent no-op dedupe behavior across warmup/seek/skip paths. 71. Added seek-aware short-circuiting in in-flight/startup frame wait paths to avoid waiting full frame-fetch budgets when seek updates arrive. +72. Added pre-wait seek guards in startup/direct-decode fallback paths so pending seeks skip timeout waits and avoid stale synchronous decode work. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -663,6 +668,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: prefetch insertion now uses single `BTreeMap::entry` insertion path instead of separate contains-check + insert lookup. - `crates/editor/src/playback.rs`: frame-request watch updates now route through shared helper to keep no-op dedupe behavior and call-site logic consistent across warmup/seek/skip paths. - `crates/editor/src/playback.rs`: in-flight and startup frame wait paths now short-circuit when seek updates are pending to improve scrub responsiveness under wait pressure. +- `crates/editor/src/playback.rs`: startup prefetch wait and direct-decode fallback paths now pre-check seek updates before waiting/synchronous decode scheduling to skip stale work under active seeks. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index ee44267f9c..b1b4466f1c 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -836,6 +836,10 @@ impl Playback { } } } else if prefetch_buffer.is_empty() && total_frames_rendered < 15 { + if seek_rx.has_changed().unwrap_or(false) { + continue; + } + send_watch_u32_if_changed(&frame_request_tx, frame_number); let wait_result = @@ -867,11 +871,18 @@ impl Playback { continue; } } else { + if seek_rx.has_changed().unwrap_or(false) { + continue; + } frame_number = frame_number.saturating_add(1); total_frames_skipped += 1; continue; } } else { + if seek_rx.has_changed().unwrap_or(false) { + continue; + } + let Some((segment_time, segment)) = cached_project.get_segment_time(playback_time) else { From 3c81ece7a3fc2c6163236afe5e34bfe9678a20ec Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Sat, 14 Feb 2026 00:07:32 +0000 Subject: [PATCH 321/333] improve: skip stale waits and decode fallback on pending seeks --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 11 +++++++++++ 2 files changed, 17 insertions(+) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 6196689569..34c85a4c20 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -462,6 +462,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - In-flight frame wait polling now exits early when a seek change is observed, instead of waiting through full wait budgets. - Startup prefetch wait path now also bails out immediately when seek state changes during wait. +66. **Added pre-wait seek guards before startup and direct decode waits (2026-02-13)** + - Startup prefetch wait path now checks for pending seeks before entering timeout waits and before skip fallback on timeout. + - Direct decode fallback path now checks for pending seek updates before scheduling synchronous decode work. + --- ## Root Cause Analysis Archive @@ -623,6 +627,7 @@ Decoder Pipeline: 69. Replaced contains+entry prefetch insertion with single-entry map insertion to remove duplicate keyed-buffer lookups in frame-insert hot path. 70. Centralized change-aware frame-request watch updates via shared helper for consistent no-op dedupe behavior across warmup/seek/skip paths. 71. Added seek-aware short-circuiting in in-flight/startup frame wait paths to avoid waiting full frame-fetch budgets when seek updates arrive. +72. Added pre-wait seek guards in startup/direct-decode fallback paths so pending seeks skip timeout waits and avoid stale synchronous decode work. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -663,6 +668,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: prefetch insertion now uses single `BTreeMap::entry` insertion path instead of separate contains-check + insert lookup. - `crates/editor/src/playback.rs`: frame-request watch updates now route through shared helper to keep no-op dedupe behavior and call-site logic consistent across warmup/seek/skip paths. - `crates/editor/src/playback.rs`: in-flight and startup frame wait paths now short-circuit when seek updates are pending to improve scrub responsiveness under wait pressure. +- `crates/editor/src/playback.rs`: startup prefetch wait and direct-decode fallback paths now pre-check seek updates before waiting/synchronous decode scheduling to skip stale work under active seeks. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index ee44267f9c..b1b4466f1c 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -836,6 +836,10 @@ impl Playback { } } } else if prefetch_buffer.is_empty() && total_frames_rendered < 15 { + if seek_rx.has_changed().unwrap_or(false) { + continue; + } + send_watch_u32_if_changed(&frame_request_tx, frame_number); let wait_result = @@ -867,11 +871,18 @@ impl Playback { continue; } } else { + if seek_rx.has_changed().unwrap_or(false) { + continue; + } frame_number = frame_number.saturating_add(1); total_frames_skipped += 1; continue; } } else { + if seek_rx.has_changed().unwrap_or(false) { + continue; + } + let Some((segment_time, segment)) = cached_project.get_segment_time(playback_time) else { From 75892b7792b6698baaab97419fabf333f6323220 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Sat, 14 Feb 2026 00:08:45 +0000 Subject: [PATCH 322/333] improve: batch prefetch buffer trims during queue drains --- crates/editor/PLAYBACK-FINDINGS.md | 6 +++++ crates/editor/src/playback.rs | 39 +++++++++++++++++++++++++----- 2 files changed, 39 insertions(+), 6 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 34c85a4c20..128b67aff3 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -466,6 +466,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Startup prefetch wait path now checks for pending seeks before entering timeout waits and before skip fallback on timeout. - Direct decode fallback path now checks for pending seek updates before scheduling synchronous decode work. +67. **Batched keyed-buffer trims during queue-drain insertion (2026-02-13)** + - Warmup and playback queue-drain paths now insert prefetched frames without per-item trim checks and apply one bounded trim pass after the batch. + - Reduces repeated trim work when multiple prefetched frames are drained in the same loop iteration. + --- ## Root Cause Analysis Archive @@ -628,6 +632,7 @@ Decoder Pipeline: 70. Centralized change-aware frame-request watch updates via shared helper for consistent no-op dedupe behavior across warmup/seek/skip paths. 71. Added seek-aware short-circuiting in in-flight/startup frame wait paths to avoid waiting full frame-fetch budgets when seek updates arrive. 72. Added pre-wait seek guards in startup/direct-decode fallback paths so pending seeks skip timeout waits and avoid stale synchronous decode work. +73. Batched keyed-buffer trimming during warmup/playback prefetch queue drains to avoid per-insert trim checks under burst frame arrivals. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -669,6 +674,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: frame-request watch updates now route through shared helper to keep no-op dedupe behavior and call-site logic consistent across warmup/seek/skip paths. - `crates/editor/src/playback.rs`: in-flight and startup frame wait paths now short-circuit when seek updates are pending to improve scrub responsiveness under wait pressure. - `crates/editor/src/playback.rs`: startup prefetch wait and direct-decode fallback paths now pre-check seek updates before waiting/synchronous decode scheduling to skip stale work under active seeks. +- `crates/editor/src/playback.rs`: warmup and playback queue-drain insertion paths now perform untrimmed batch insertions and run a single keyed-buffer trim pass after each drain batch. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index b1b4466f1c..b305d8c2f2 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -133,6 +133,16 @@ fn insert_prefetched_frame( buffer: &mut BTreeMap, prefetched: PrefetchedFrame, current_frame: u32, +) -> bool { + let inserted_new = insert_prefetched_frame_untrimmed(buffer, prefetched, current_frame); + let trimmed = trim_prefetch_buffer(buffer, current_frame); + inserted_new || trimmed +} + +fn insert_prefetched_frame_untrimmed( + buffer: &mut BTreeMap, + prefetched: PrefetchedFrame, + current_frame: u32, ) -> bool { if prefetched.frame_number < current_frame { return false; @@ -146,8 +156,7 @@ fn insert_prefetched_frame( } std::collections::btree_map::Entry::Occupied(_) => false, }; - let trimmed = trim_prefetch_buffer(buffer, current_frame); - inserted_new || trimmed + inserted_new } fn prune_prefetch_buffer_before_frame( @@ -619,6 +628,7 @@ impl Playback { tokio::select! { Some(prefetched) = prefetch_rx.recv() => { let mut next_prefetched = Some(prefetched); + let mut prefetched_batch_changed = false; loop { let Some(prefetched) = next_prefetched.take() else { @@ -626,18 +636,26 @@ impl Playback { }; if prefetched.generation == seek_generation - && insert_prefetched_frame( + && insert_prefetched_frame_untrimmed( &mut prefetch_buffer, prefetched, frame_number, ) { - warmup_buffer_changed = true; + prefetched_batch_changed = true; } next_prefetched = prefetch_rx.try_recv().ok(); } + if trim_prefetch_buffer(&mut prefetch_buffer, frame_number) { + prefetched_batch_changed = true; + } + + if prefetched_batch_changed { + warmup_buffer_changed = true; + } + if first_frame_time.is_none() && !prefetch_buffer.is_empty() { first_frame_time = Some(Instant::now()); } @@ -704,12 +722,21 @@ impl Playback { cached_project = self.project.borrow_and_update().clone(); playback_clip_offsets = build_clip_offsets_lookup(&cached_project); } + let mut drained_prefetch_changed = false; while let Ok(prefetched) = prefetch_rx.try_recv() { if prefetched.generation == seek_generation { - let _ = - insert_prefetched_frame(&mut prefetch_buffer, prefetched, frame_number); + if insert_prefetched_frame_untrimmed( + &mut prefetch_buffer, + prefetched, + frame_number, + ) { + drained_prefetch_changed = true; + } } } + if drained_prefetch_changed { + let _ = trim_prefetch_buffer(&mut prefetch_buffer, frame_number); + } prune_prefetch_buffer_before_frame(&mut prefetch_buffer, frame_number); let frame_offset = frame_number.saturating_sub(playback_anchor_frame) as f64; From 09e8307079b2c6e98652d4a63ab8434e3d43d8a2 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Sat, 14 Feb 2026 00:08:45 +0000 Subject: [PATCH 323/333] improve: batch prefetch buffer trims during queue drains --- crates/editor/PLAYBACK-FINDINGS.md | 6 +++++ crates/editor/src/playback.rs | 39 +++++++++++++++++++++++++----- 2 files changed, 39 insertions(+), 6 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 34c85a4c20..128b67aff3 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -466,6 +466,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Startup prefetch wait path now checks for pending seeks before entering timeout waits and before skip fallback on timeout. - Direct decode fallback path now checks for pending seek updates before scheduling synchronous decode work. +67. **Batched keyed-buffer trims during queue-drain insertion (2026-02-13)** + - Warmup and playback queue-drain paths now insert prefetched frames without per-item trim checks and apply one bounded trim pass after the batch. + - Reduces repeated trim work when multiple prefetched frames are drained in the same loop iteration. + --- ## Root Cause Analysis Archive @@ -628,6 +632,7 @@ Decoder Pipeline: 70. Centralized change-aware frame-request watch updates via shared helper for consistent no-op dedupe behavior across warmup/seek/skip paths. 71. Added seek-aware short-circuiting in in-flight/startup frame wait paths to avoid waiting full frame-fetch budgets when seek updates arrive. 72. Added pre-wait seek guards in startup/direct-decode fallback paths so pending seeks skip timeout waits and avoid stale synchronous decode work. +73. Batched keyed-buffer trimming during warmup/playback prefetch queue drains to avoid per-insert trim checks under burst frame arrivals. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -669,6 +674,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: frame-request watch updates now route through shared helper to keep no-op dedupe behavior and call-site logic consistent across warmup/seek/skip paths. - `crates/editor/src/playback.rs`: in-flight and startup frame wait paths now short-circuit when seek updates are pending to improve scrub responsiveness under wait pressure. - `crates/editor/src/playback.rs`: startup prefetch wait and direct-decode fallback paths now pre-check seek updates before waiting/synchronous decode scheduling to skip stale work under active seeks. +- `crates/editor/src/playback.rs`: warmup and playback queue-drain insertion paths now perform untrimmed batch insertions and run a single keyed-buffer trim pass after each drain batch. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index b1b4466f1c..b305d8c2f2 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -133,6 +133,16 @@ fn insert_prefetched_frame( buffer: &mut BTreeMap, prefetched: PrefetchedFrame, current_frame: u32, +) -> bool { + let inserted_new = insert_prefetched_frame_untrimmed(buffer, prefetched, current_frame); + let trimmed = trim_prefetch_buffer(buffer, current_frame); + inserted_new || trimmed +} + +fn insert_prefetched_frame_untrimmed( + buffer: &mut BTreeMap, + prefetched: PrefetchedFrame, + current_frame: u32, ) -> bool { if prefetched.frame_number < current_frame { return false; @@ -146,8 +156,7 @@ fn insert_prefetched_frame( } std::collections::btree_map::Entry::Occupied(_) => false, }; - let trimmed = trim_prefetch_buffer(buffer, current_frame); - inserted_new || trimmed + inserted_new } fn prune_prefetch_buffer_before_frame( @@ -619,6 +628,7 @@ impl Playback { tokio::select! { Some(prefetched) = prefetch_rx.recv() => { let mut next_prefetched = Some(prefetched); + let mut prefetched_batch_changed = false; loop { let Some(prefetched) = next_prefetched.take() else { @@ -626,18 +636,26 @@ impl Playback { }; if prefetched.generation == seek_generation - && insert_prefetched_frame( + && insert_prefetched_frame_untrimmed( &mut prefetch_buffer, prefetched, frame_number, ) { - warmup_buffer_changed = true; + prefetched_batch_changed = true; } next_prefetched = prefetch_rx.try_recv().ok(); } + if trim_prefetch_buffer(&mut prefetch_buffer, frame_number) { + prefetched_batch_changed = true; + } + + if prefetched_batch_changed { + warmup_buffer_changed = true; + } + if first_frame_time.is_none() && !prefetch_buffer.is_empty() { first_frame_time = Some(Instant::now()); } @@ -704,12 +722,21 @@ impl Playback { cached_project = self.project.borrow_and_update().clone(); playback_clip_offsets = build_clip_offsets_lookup(&cached_project); } + let mut drained_prefetch_changed = false; while let Ok(prefetched) = prefetch_rx.try_recv() { if prefetched.generation == seek_generation { - let _ = - insert_prefetched_frame(&mut prefetch_buffer, prefetched, frame_number); + if insert_prefetched_frame_untrimmed( + &mut prefetch_buffer, + prefetched, + frame_number, + ) { + drained_prefetch_changed = true; + } } } + if drained_prefetch_changed { + let _ = trim_prefetch_buffer(&mut prefetch_buffer, frame_number); + } prune_prefetch_buffer_before_frame(&mut prefetch_buffer, frame_number); let frame_offset = frame_number.saturating_sub(playback_anchor_frame) as f64; From dad0f7958b740fc8de0f0629890804d3a199d5e9 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Sat, 14 Feb 2026 00:08:45 +0000 Subject: [PATCH 324/333] improve: batch prefetch buffer trims during queue drains --- crates/editor/PLAYBACK-FINDINGS.md | 6 +++++ crates/editor/src/playback.rs | 39 +++++++++++++++++++++++++----- 2 files changed, 39 insertions(+), 6 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 34c85a4c20..128b67aff3 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -466,6 +466,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Startup prefetch wait path now checks for pending seeks before entering timeout waits and before skip fallback on timeout. - Direct decode fallback path now checks for pending seek updates before scheduling synchronous decode work. +67. **Batched keyed-buffer trims during queue-drain insertion (2026-02-13)** + - Warmup and playback queue-drain paths now insert prefetched frames without per-item trim checks and apply one bounded trim pass after the batch. + - Reduces repeated trim work when multiple prefetched frames are drained in the same loop iteration. + --- ## Root Cause Analysis Archive @@ -628,6 +632,7 @@ Decoder Pipeline: 70. Centralized change-aware frame-request watch updates via shared helper for consistent no-op dedupe behavior across warmup/seek/skip paths. 71. Added seek-aware short-circuiting in in-flight/startup frame wait paths to avoid waiting full frame-fetch budgets when seek updates arrive. 72. Added pre-wait seek guards in startup/direct-decode fallback paths so pending seeks skip timeout waits and avoid stale synchronous decode work. +73. Batched keyed-buffer trimming during warmup/playback prefetch queue drains to avoid per-insert trim checks under burst frame arrivals. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -669,6 +674,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: frame-request watch updates now route through shared helper to keep no-op dedupe behavior and call-site logic consistent across warmup/seek/skip paths. - `crates/editor/src/playback.rs`: in-flight and startup frame wait paths now short-circuit when seek updates are pending to improve scrub responsiveness under wait pressure. - `crates/editor/src/playback.rs`: startup prefetch wait and direct-decode fallback paths now pre-check seek updates before waiting/synchronous decode scheduling to skip stale work under active seeks. +- `crates/editor/src/playback.rs`: warmup and playback queue-drain insertion paths now perform untrimmed batch insertions and run a single keyed-buffer trim pass after each drain batch. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index b1b4466f1c..b305d8c2f2 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -133,6 +133,16 @@ fn insert_prefetched_frame( buffer: &mut BTreeMap, prefetched: PrefetchedFrame, current_frame: u32, +) -> bool { + let inserted_new = insert_prefetched_frame_untrimmed(buffer, prefetched, current_frame); + let trimmed = trim_prefetch_buffer(buffer, current_frame); + inserted_new || trimmed +} + +fn insert_prefetched_frame_untrimmed( + buffer: &mut BTreeMap, + prefetched: PrefetchedFrame, + current_frame: u32, ) -> bool { if prefetched.frame_number < current_frame { return false; @@ -146,8 +156,7 @@ fn insert_prefetched_frame( } std::collections::btree_map::Entry::Occupied(_) => false, }; - let trimmed = trim_prefetch_buffer(buffer, current_frame); - inserted_new || trimmed + inserted_new } fn prune_prefetch_buffer_before_frame( @@ -619,6 +628,7 @@ impl Playback { tokio::select! { Some(prefetched) = prefetch_rx.recv() => { let mut next_prefetched = Some(prefetched); + let mut prefetched_batch_changed = false; loop { let Some(prefetched) = next_prefetched.take() else { @@ -626,18 +636,26 @@ impl Playback { }; if prefetched.generation == seek_generation - && insert_prefetched_frame( + && insert_prefetched_frame_untrimmed( &mut prefetch_buffer, prefetched, frame_number, ) { - warmup_buffer_changed = true; + prefetched_batch_changed = true; } next_prefetched = prefetch_rx.try_recv().ok(); } + if trim_prefetch_buffer(&mut prefetch_buffer, frame_number) { + prefetched_batch_changed = true; + } + + if prefetched_batch_changed { + warmup_buffer_changed = true; + } + if first_frame_time.is_none() && !prefetch_buffer.is_empty() { first_frame_time = Some(Instant::now()); } @@ -704,12 +722,21 @@ impl Playback { cached_project = self.project.borrow_and_update().clone(); playback_clip_offsets = build_clip_offsets_lookup(&cached_project); } + let mut drained_prefetch_changed = false; while let Ok(prefetched) = prefetch_rx.try_recv() { if prefetched.generation == seek_generation { - let _ = - insert_prefetched_frame(&mut prefetch_buffer, prefetched, frame_number); + if insert_prefetched_frame_untrimmed( + &mut prefetch_buffer, + prefetched, + frame_number, + ) { + drained_prefetch_changed = true; + } } } + if drained_prefetch_changed { + let _ = trim_prefetch_buffer(&mut prefetch_buffer, frame_number); + } prune_prefetch_buffer_before_frame(&mut prefetch_buffer, frame_number); let frame_offset = frame_number.saturating_sub(playback_anchor_frame) as f64; From 94fdebe5d3bf03a9de22f6d617a54171782643c5 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Sat, 14 Feb 2026 00:09:46 +0000 Subject: [PATCH 325/333] improve: preserve prefetch ramp on small forward rebases --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 6 +++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 128b67aff3..674e990f1f 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -470,6 +470,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Warmup and playback queue-drain paths now insert prefetched frames without per-item trim checks and apply one bounded trim pass after the batch. - Reduces repeated trim work when multiple prefetched frames are drained in the same loop iteration. +68. **Limited prefetch state resets to major/backward rebases (2026-02-13)** + - Frame-request rebases now only reset decoded-ramp and behind-prefetch tracking on backward seeks or large seek-distance jumps. + - Preserves prefetch ramp state on small forward rebases to reduce unnecessary throughput drops. + --- ## Root Cause Analysis Archive @@ -633,6 +637,7 @@ Decoder Pipeline: 71. Added seek-aware short-circuiting in in-flight/startup frame wait paths to avoid waiting full frame-fetch budgets when seek updates arrive. 72. Added pre-wait seek guards in startup/direct-decode fallback paths so pending seeks skip timeout waits and avoid stale synchronous decode work. 73. Batched keyed-buffer trimming during warmup/playback prefetch queue drains to avoid per-insert trim checks under burst frame arrivals. +74. Limited prefetch state resets to backward/major frame-request rebases so small forward rebases preserve decode-ramp and behind-prefetch tracking state. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -675,6 +680,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: in-flight and startup frame wait paths now short-circuit when seek updates are pending to improve scrub responsiveness under wait pressure. - `crates/editor/src/playback.rs`: startup prefetch wait and direct-decode fallback paths now pre-check seek updates before waiting/synchronous decode scheduling to skip stale work under active seeks. - `crates/editor/src/playback.rs`: warmup and playback queue-drain insertion paths now perform untrimmed batch insertions and run a single keyed-buffer trim pass after each drain batch. +- `crates/editor/src/playback.rs`: frame-request rebases now only clear prefetch ramp/behind-tracking state on backward or large-distance jumps, preserving throughput state on small forward rebases. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index b305d8c2f2..25cd6c18dc 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -350,11 +350,11 @@ impl Playback { }; next_prefetch_frame = requested; - frames_decoded = 0; - prefetched_behind.clear(); - prefetched_behind_order.clear(); if is_backward_seek || seek_distance > dynamic_prefetch_ahead / 2 { + frames_decoded = 0; + prefetched_behind.clear(); + prefetched_behind_order.clear(); if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.clear(); } From 09bb2f03fae514b3259ed864275be9a49eef165a Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Sat, 14 Feb 2026 00:09:46 +0000 Subject: [PATCH 326/333] improve: preserve prefetch ramp on small forward rebases --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 6 +++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 128b67aff3..674e990f1f 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -470,6 +470,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Warmup and playback queue-drain paths now insert prefetched frames without per-item trim checks and apply one bounded trim pass after the batch. - Reduces repeated trim work when multiple prefetched frames are drained in the same loop iteration. +68. **Limited prefetch state resets to major/backward rebases (2026-02-13)** + - Frame-request rebases now only reset decoded-ramp and behind-prefetch tracking on backward seeks or large seek-distance jumps. + - Preserves prefetch ramp state on small forward rebases to reduce unnecessary throughput drops. + --- ## Root Cause Analysis Archive @@ -633,6 +637,7 @@ Decoder Pipeline: 71. Added seek-aware short-circuiting in in-flight/startup frame wait paths to avoid waiting full frame-fetch budgets when seek updates arrive. 72. Added pre-wait seek guards in startup/direct-decode fallback paths so pending seeks skip timeout waits and avoid stale synchronous decode work. 73. Batched keyed-buffer trimming during warmup/playback prefetch queue drains to avoid per-insert trim checks under burst frame arrivals. +74. Limited prefetch state resets to backward/major frame-request rebases so small forward rebases preserve decode-ramp and behind-prefetch tracking state. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -675,6 +680,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: in-flight and startup frame wait paths now short-circuit when seek updates are pending to improve scrub responsiveness under wait pressure. - `crates/editor/src/playback.rs`: startup prefetch wait and direct-decode fallback paths now pre-check seek updates before waiting/synchronous decode scheduling to skip stale work under active seeks. - `crates/editor/src/playback.rs`: warmup and playback queue-drain insertion paths now perform untrimmed batch insertions and run a single keyed-buffer trim pass after each drain batch. +- `crates/editor/src/playback.rs`: frame-request rebases now only clear prefetch ramp/behind-tracking state on backward or large-distance jumps, preserving throughput state on small forward rebases. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index b305d8c2f2..25cd6c18dc 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -350,11 +350,11 @@ impl Playback { }; next_prefetch_frame = requested; - frames_decoded = 0; - prefetched_behind.clear(); - prefetched_behind_order.clear(); if is_backward_seek || seek_distance > dynamic_prefetch_ahead / 2 { + frames_decoded = 0; + prefetched_behind.clear(); + prefetched_behind_order.clear(); if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.clear(); } From 6d0b27204fb7725eee7d1ef0696f1a48186c9214 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Sat, 14 Feb 2026 00:09:46 +0000 Subject: [PATCH 327/333] improve: preserve prefetch ramp on small forward rebases --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 6 +++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 128b67aff3..674e990f1f 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -470,6 +470,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Warmup and playback queue-drain paths now insert prefetched frames without per-item trim checks and apply one bounded trim pass after the batch. - Reduces repeated trim work when multiple prefetched frames are drained in the same loop iteration. +68. **Limited prefetch state resets to major/backward rebases (2026-02-13)** + - Frame-request rebases now only reset decoded-ramp and behind-prefetch tracking on backward seeks or large seek-distance jumps. + - Preserves prefetch ramp state on small forward rebases to reduce unnecessary throughput drops. + --- ## Root Cause Analysis Archive @@ -633,6 +637,7 @@ Decoder Pipeline: 71. Added seek-aware short-circuiting in in-flight/startup frame wait paths to avoid waiting full frame-fetch budgets when seek updates arrive. 72. Added pre-wait seek guards in startup/direct-decode fallback paths so pending seeks skip timeout waits and avoid stale synchronous decode work. 73. Batched keyed-buffer trimming during warmup/playback prefetch queue drains to avoid per-insert trim checks under burst frame arrivals. +74. Limited prefetch state resets to backward/major frame-request rebases so small forward rebases preserve decode-ramp and behind-prefetch tracking state. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -675,6 +680,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: in-flight and startup frame wait paths now short-circuit when seek updates are pending to improve scrub responsiveness under wait pressure. - `crates/editor/src/playback.rs`: startup prefetch wait and direct-decode fallback paths now pre-check seek updates before waiting/synchronous decode scheduling to skip stale work under active seeks. - `crates/editor/src/playback.rs`: warmup and playback queue-drain insertion paths now perform untrimmed batch insertions and run a single keyed-buffer trim pass after each drain batch. +- `crates/editor/src/playback.rs`: frame-request rebases now only clear prefetch ramp/behind-tracking state on backward or large-distance jumps, preserving throughput state on small forward rebases. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index b305d8c2f2..25cd6c18dc 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -350,11 +350,11 @@ impl Playback { }; next_prefetch_frame = requested; - frames_decoded = 0; - prefetched_behind.clear(); - prefetched_behind_order.clear(); if is_backward_seek || seek_distance > dynamic_prefetch_ahead / 2 { + frames_decoded = 0; + prefetched_behind.clear(); + prefetched_behind_order.clear(); if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.clear(); } From 6b4f0c9823721eed47e10bf18e084dffdd1df255 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Sat, 14 Feb 2026 00:12:55 +0000 Subject: [PATCH 328/333] improve: gate behind-prefetch scans per playback frame --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 8 +++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 674e990f1f..f3207c403e 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -474,6 +474,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Frame-request rebases now only reset decoded-ramp and behind-prefetch tracking on backward seeks or large seek-distance jumps. - Preserves prefetch ramp state on small forward rebases to reduce unnecessary throughput drops. +69. **Gated behind-prefetch scans to one pass per playback frame (2026-02-13)** + - Behind-prefetch scheduling now scans at most once for each observed playback frame value. + - Avoids repeated behind-window scan work in tight scheduler loops when playback position has not advanced. + --- ## Root Cause Analysis Archive @@ -638,6 +642,7 @@ Decoder Pipeline: 72. Added pre-wait seek guards in startup/direct-decode fallback paths so pending seeks skip timeout waits and avoid stale synchronous decode work. 73. Batched keyed-buffer trimming during warmup/playback prefetch queue drains to avoid per-insert trim checks under burst frame arrivals. 74. Limited prefetch state resets to backward/major frame-request rebases so small forward rebases preserve decode-ramp and behind-prefetch tracking state. +75. Gated behind-prefetch scheduling scans to one pass per playback frame to avoid repeated behind-window scan churn while playback position is unchanged. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -681,6 +686,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: startup prefetch wait and direct-decode fallback paths now pre-check seek updates before waiting/synchronous decode scheduling to skip stale work under active seeks. - `crates/editor/src/playback.rs`: warmup and playback queue-drain insertion paths now perform untrimmed batch insertions and run a single keyed-buffer trim pass after each drain batch. - `crates/editor/src/playback.rs`: frame-request rebases now only clear prefetch ramp/behind-tracking state on backward or large-distance jumps, preserving throughput state on small forward rebases. +- `crates/editor/src/playback.rs`: behind-prefetch scheduling now scans at most once per playback frame value, reducing repeated behind-window scan overhead in tight scheduler loops. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 25cd6c18dc..777d451dc0 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -282,6 +282,7 @@ impl Playback { let mut prefetched_behind: HashSet = HashSet::new(); let mut prefetched_behind_order: VecDeque = VecDeque::new(); let mut scheduled_in_flight_frames: HashSet = HashSet::new(); + let mut last_behind_scan_frame: Option = None; const RAMP_UP_AFTER_FRAMES: u32 = 5; let dynamic_prefetch_ahead = fps.clamp(30, 90).min(PREFETCH_BUFFER_SIZE as u32); let dynamic_prefetch_behind = (fps / 4).clamp(8, 24); @@ -328,6 +329,7 @@ impl Playback { frames_decoded = 0; prefetched_behind.clear(); prefetched_behind_order.clear(); + last_behind_scan_frame = None; if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.clear(); @@ -355,6 +357,7 @@ impl Playback { frames_decoded = 0; prefetched_behind.clear(); prefetched_behind_order.clear(); + last_behind_scan_frame = None; if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.clear(); } @@ -433,7 +436,10 @@ impl Playback { next_prefetch_frame += 1; } - if in_flight.len() < effective_parallel { + if in_flight.len() < effective_parallel + && last_behind_scan_frame != Some(current_playback_frame) + { + last_behind_scan_frame = Some(current_playback_frame); for behind_offset in 1..=dynamic_prefetch_behind { if in_flight.len() >= effective_parallel { break; From 4beb9af4d6ebc3b965d1d6eec6e8a34d345bee3f Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Sat, 14 Feb 2026 00:12:55 +0000 Subject: [PATCH 329/333] improve: gate behind-prefetch scans per playback frame --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 8 +++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 674e990f1f..f3207c403e 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -474,6 +474,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Frame-request rebases now only reset decoded-ramp and behind-prefetch tracking on backward seeks or large seek-distance jumps. - Preserves prefetch ramp state on small forward rebases to reduce unnecessary throughput drops. +69. **Gated behind-prefetch scans to one pass per playback frame (2026-02-13)** + - Behind-prefetch scheduling now scans at most once for each observed playback frame value. + - Avoids repeated behind-window scan work in tight scheduler loops when playback position has not advanced. + --- ## Root Cause Analysis Archive @@ -638,6 +642,7 @@ Decoder Pipeline: 72. Added pre-wait seek guards in startup/direct-decode fallback paths so pending seeks skip timeout waits and avoid stale synchronous decode work. 73. Batched keyed-buffer trimming during warmup/playback prefetch queue drains to avoid per-insert trim checks under burst frame arrivals. 74. Limited prefetch state resets to backward/major frame-request rebases so small forward rebases preserve decode-ramp and behind-prefetch tracking state. +75. Gated behind-prefetch scheduling scans to one pass per playback frame to avoid repeated behind-window scan churn while playback position is unchanged. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -681,6 +686,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: startup prefetch wait and direct-decode fallback paths now pre-check seek updates before waiting/synchronous decode scheduling to skip stale work under active seeks. - `crates/editor/src/playback.rs`: warmup and playback queue-drain insertion paths now perform untrimmed batch insertions and run a single keyed-buffer trim pass after each drain batch. - `crates/editor/src/playback.rs`: frame-request rebases now only clear prefetch ramp/behind-tracking state on backward or large-distance jumps, preserving throughput state on small forward rebases. +- `crates/editor/src/playback.rs`: behind-prefetch scheduling now scans at most once per playback frame value, reducing repeated behind-window scan overhead in tight scheduler loops. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 25cd6c18dc..777d451dc0 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -282,6 +282,7 @@ impl Playback { let mut prefetched_behind: HashSet = HashSet::new(); let mut prefetched_behind_order: VecDeque = VecDeque::new(); let mut scheduled_in_flight_frames: HashSet = HashSet::new(); + let mut last_behind_scan_frame: Option = None; const RAMP_UP_AFTER_FRAMES: u32 = 5; let dynamic_prefetch_ahead = fps.clamp(30, 90).min(PREFETCH_BUFFER_SIZE as u32); let dynamic_prefetch_behind = (fps / 4).clamp(8, 24); @@ -328,6 +329,7 @@ impl Playback { frames_decoded = 0; prefetched_behind.clear(); prefetched_behind_order.clear(); + last_behind_scan_frame = None; if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.clear(); @@ -355,6 +357,7 @@ impl Playback { frames_decoded = 0; prefetched_behind.clear(); prefetched_behind_order.clear(); + last_behind_scan_frame = None; if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.clear(); } @@ -433,7 +436,10 @@ impl Playback { next_prefetch_frame += 1; } - if in_flight.len() < effective_parallel { + if in_flight.len() < effective_parallel + && last_behind_scan_frame != Some(current_playback_frame) + { + last_behind_scan_frame = Some(current_playback_frame); for behind_offset in 1..=dynamic_prefetch_behind { if in_flight.len() >= effective_parallel { break; From c6e1116b44aad190791ca53237b499a30e40cde4 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Sat, 14 Feb 2026 00:12:55 +0000 Subject: [PATCH 330/333] improve: gate behind-prefetch scans per playback frame --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 8 +++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index 674e990f1f..f3207c403e 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -474,6 +474,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Frame-request rebases now only reset decoded-ramp and behind-prefetch tracking on backward seeks or large seek-distance jumps. - Preserves prefetch ramp state on small forward rebases to reduce unnecessary throughput drops. +69. **Gated behind-prefetch scans to one pass per playback frame (2026-02-13)** + - Behind-prefetch scheduling now scans at most once for each observed playback frame value. + - Avoids repeated behind-window scan work in tight scheduler loops when playback position has not advanced. + --- ## Root Cause Analysis Archive @@ -638,6 +642,7 @@ Decoder Pipeline: 72. Added pre-wait seek guards in startup/direct-decode fallback paths so pending seeks skip timeout waits and avoid stale synchronous decode work. 73. Batched keyed-buffer trimming during warmup/playback prefetch queue drains to avoid per-insert trim checks under burst frame arrivals. 74. Limited prefetch state resets to backward/major frame-request rebases so small forward rebases preserve decode-ramp and behind-prefetch tracking state. +75. Gated behind-prefetch scheduling scans to one pass per playback frame to avoid repeated behind-window scan churn while playback position is unchanged. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -681,6 +686,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: startup prefetch wait and direct-decode fallback paths now pre-check seek updates before waiting/synchronous decode scheduling to skip stale work under active seeks. - `crates/editor/src/playback.rs`: warmup and playback queue-drain insertion paths now perform untrimmed batch insertions and run a single keyed-buffer trim pass after each drain batch. - `crates/editor/src/playback.rs`: frame-request rebases now only clear prefetch ramp/behind-tracking state on backward or large-distance jumps, preserving throughput state on small forward rebases. +- `crates/editor/src/playback.rs`: behind-prefetch scheduling now scans at most once per playback frame value, reducing repeated behind-window scan overhead in tight scheduler loops. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 25cd6c18dc..777d451dc0 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -282,6 +282,7 @@ impl Playback { let mut prefetched_behind: HashSet = HashSet::new(); let mut prefetched_behind_order: VecDeque = VecDeque::new(); let mut scheduled_in_flight_frames: HashSet = HashSet::new(); + let mut last_behind_scan_frame: Option = None; const RAMP_UP_AFTER_FRAMES: u32 = 5; let dynamic_prefetch_ahead = fps.clamp(30, 90).min(PREFETCH_BUFFER_SIZE as u32); let dynamic_prefetch_behind = (fps / 4).clamp(8, 24); @@ -328,6 +329,7 @@ impl Playback { frames_decoded = 0; prefetched_behind.clear(); prefetched_behind_order.clear(); + last_behind_scan_frame = None; if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.clear(); @@ -355,6 +357,7 @@ impl Playback { frames_decoded = 0; prefetched_behind.clear(); prefetched_behind_order.clear(); + last_behind_scan_frame = None; if let Ok(mut in_flight_guard) = prefetch_in_flight.write() { in_flight_guard.clear(); } @@ -433,7 +436,10 @@ impl Playback { next_prefetch_frame += 1; } - if in_flight.len() < effective_parallel { + if in_flight.len() < effective_parallel + && last_behind_scan_frame != Some(current_playback_frame) + { + last_behind_scan_frame = Some(current_playback_frame); for behind_offset in 1..=dynamic_prefetch_behind { if in_flight.len() >= effective_parallel { break; From 6e055402168fb2d8a2bec4d4c0aefa203f0279d1 Mon Sep 17 00:00:00 2001 From: Richie McIlroy <33632126+richiemcilroy@users.noreply.github.com> Date: Sat, 14 Feb 2026 00:15:38 +0000 Subject: [PATCH 331/333] Merge branch 'cursor/playback-performance-and-sync-dec3' of https://github.com/CapSoftware/Cap into cursor/playback-performance-and-sync-dec3 From 977414e0eeed95ea6ce6656995f860f1feb04cc3 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Sat, 14 Feb 2026 00:15:38 +0000 Subject: [PATCH 332/333] improve: batch prefetch trims in in-flight wait buffering Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 11 +++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index f3207c403e..62ccbb4f18 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -478,6 +478,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Behind-prefetch scheduling now scans at most once for each observed playback frame value. - Avoids repeated behind-window scan work in tight scheduler loops when playback position has not advanced. +70. **Batched in-flight wait-path prefetch trims (2026-02-13)** + - In-flight wait buffering now inserts prefetched frames without per-frame trim checks while waiting. + - Applies one bounded trim pass after wait-loop buffering to reduce repeated trim overhead under burst receive windows. + --- ## Root Cause Analysis Archive @@ -643,6 +647,7 @@ Decoder Pipeline: 73. Batched keyed-buffer trimming during warmup/playback prefetch queue drains to avoid per-insert trim checks under burst frame arrivals. 74. Limited prefetch state resets to backward/major frame-request rebases so small forward rebases preserve decode-ramp and behind-prefetch tracking state. 75. Gated behind-prefetch scheduling scans to one pass per playback frame to avoid repeated behind-window scan churn while playback position is unchanged. +76. Batched in-flight wait-path prefetch trimming so buffered wait inserts trim once per wait pass instead of per buffered frame. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -687,6 +692,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: warmup and playback queue-drain insertion paths now perform untrimmed batch insertions and run a single keyed-buffer trim pass after each drain batch. - `crates/editor/src/playback.rs`: frame-request rebases now only clear prefetch ramp/behind-tracking state on backward or large-distance jumps, preserving throughput state on small forward rebases. - `crates/editor/src/playback.rs`: behind-prefetch scheduling now scans at most once per playback frame value, reducing repeated behind-window scan overhead in tight scheduler loops. +- `crates/editor/src/playback.rs`: in-flight wait buffering now uses untrimmed inserts plus a single post-wait trim pass, reducing repeated keyed-buffer trim operations during wait-path burst buffering. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 777d451dc0..4b63f7b7f3 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -809,6 +809,7 @@ impl Playback { let wait_start = Instant::now(); let max_wait = frame_fetch_timeout; let mut found_frame = None; + let mut buffered_wait_prefetch_changed = false; while wait_start.elapsed() < max_wait { tokio::select! { @@ -821,11 +822,13 @@ impl Playback { found_frame = Some(prefetched); break; } else if prefetched.frame_number >= frame_number { - let _ = insert_prefetched_frame( + if insert_prefetched_frame_untrimmed( &mut prefetch_buffer, prefetched, frame_number, - ); + ) { + buffered_wait_prefetch_changed = true; + } } } _ = tokio::time::sleep(in_flight_poll_interval) => { @@ -847,6 +850,10 @@ impl Playback { } } + if buffered_wait_prefetch_changed { + let _ = trim_prefetch_buffer(&mut prefetch_buffer, frame_number); + } + if seek_rx.has_changed().unwrap_or(false) { continue; } From 1fea3469791d28eb647c0563ed5094cc32cd1bd7 Mon Sep 17 00:00:00 2001 From: Richie McIlroy Date: Sat, 14 Feb 2026 00:15:38 +0000 Subject: [PATCH 333/333] improve: batch prefetch trims in in-flight wait buffering Co-authored-by: Richie McIlroy --- crates/editor/PLAYBACK-FINDINGS.md | 6 ++++++ crates/editor/src/playback.rs | 11 +++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index f3207c403e..62ccbb4f18 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -478,6 +478,10 @@ cargo run -p cap-recording --example playback-test-runner -- full - Behind-prefetch scheduling now scans at most once for each observed playback frame value. - Avoids repeated behind-window scan work in tight scheduler loops when playback position has not advanced. +70. **Batched in-flight wait-path prefetch trims (2026-02-13)** + - In-flight wait buffering now inserts prefetched frames without per-frame trim checks while waiting. + - Applies one bounded trim pass after wait-loop buffering to reduce repeated trim overhead under burst receive windows. + --- ## Root Cause Analysis Archive @@ -643,6 +647,7 @@ Decoder Pipeline: 73. Batched keyed-buffer trimming during warmup/playback prefetch queue drains to avoid per-insert trim checks under burst frame arrivals. 74. Limited prefetch state resets to backward/major frame-request rebases so small forward rebases preserve decode-ramp and behind-prefetch tracking state. 75. Gated behind-prefetch scheduling scans to one pass per playback frame to avoid repeated behind-window scan churn while playback position is unchanged. +76. Batched in-flight wait-path prefetch trimming so buffered wait inserts trim once per wait pass instead of per buffered frame. **Changes Made**: - `crates/editor/src/playback.rs`: default low-latency audio mode, playback seek channel, seek-aware scheduling. @@ -687,6 +692,7 @@ Decoder Pipeline: - `crates/editor/src/playback.rs`: warmup and playback queue-drain insertion paths now perform untrimmed batch insertions and run a single keyed-buffer trim pass after each drain batch. - `crates/editor/src/playback.rs`: frame-request rebases now only clear prefetch ramp/behind-tracking state on backward or large-distance jumps, preserving throughput state on small forward rebases. - `crates/editor/src/playback.rs`: behind-prefetch scheduling now scans at most once per playback frame value, reducing repeated behind-window scan overhead in tight scheduler loops. +- `crates/editor/src/playback.rs`: in-flight wait buffering now uses untrimmed inserts plus a single post-wait trim pass, reducing repeated keyed-buffer trim operations during wait-path burst buffering. - `crates/editor/src/playback.rs`: split prefetch/direct decode in-flight tracking and combined both sets in wait-path in-flight checks. - `scripts/compare-playback-benchmark-runs.js`: comparison now reports baseline rows missing from candidate and fails by default on coverage gaps. - `scripts/finalize-playback-matrix.js`: compare stage now runs before publish stage in combined workflows and forwards allow-missing-candidate flag. diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 777d451dc0..4b63f7b7f3 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -809,6 +809,7 @@ impl Playback { let wait_start = Instant::now(); let max_wait = frame_fetch_timeout; let mut found_frame = None; + let mut buffered_wait_prefetch_changed = false; while wait_start.elapsed() < max_wait { tokio::select! { @@ -821,11 +822,13 @@ impl Playback { found_frame = Some(prefetched); break; } else if prefetched.frame_number >= frame_number { - let _ = insert_prefetched_frame( + if insert_prefetched_frame_untrimmed( &mut prefetch_buffer, prefetched, frame_number, - ); + ) { + buffered_wait_prefetch_changed = true; + } } } _ = tokio::time::sleep(in_flight_poll_interval) => { @@ -847,6 +850,10 @@ impl Playback { } } + if buffered_wait_prefetch_changed { + let _ = trim_prefetch_buffer(&mut prefetch_buffer, frame_number); + } + if seek_rx.has_changed().unwrap_or(false) { continue; }