diff --git a/apps/desktop/src-tauri/src/camera.rs b/apps/desktop/src-tauri/src/camera.rs index be5c20113f..3c5ae5d4ee 100644 --- a/apps/desktop/src-tauri/src/camera.rs +++ b/apps/desktop/src-tauri/src/camera.rs @@ -151,50 +151,6 @@ impl CameraPreviewManager { } } - // Resumes a paused camera preview. Uses window.show() which is safe, unlike - // panel.order_front_regardless() which causes crashes after repeated use. - pub fn resume(&mut self, window: &WebviewWindow) { - if let Some(preview) = &mut self.preview - && preview.is_paused - { - preview.is_paused = false; - preview - .reconfigure - .send(ReconfigureEvent::Resume) - .map_err(|err| error!("Error sending camera preview resume event: {err}")) - .ok(); - window - .run_on_main_thread({ - let window = window.clone(); - move || { - let _ = window.show(); - } - }) - .ok(); - } - } - - pub fn is_paused(&self) -> bool { - self.preview.as_ref().is_some_and(|p| p.is_paused) - } - - pub fn begin_shutdown_for_session( - &mut self, - expected_session_id: u64, - ) -> Option> { - if let Some(preview) = &self.preview - && preview.session_id != expected_session_id - { - info!( - "Skipping camera preview close: session mismatch (expected {}, current {})", - expected_session_id, preview.session_id - ); - return None; - } - - self.begin_shutdown() - } - pub fn begin_shutdown(&mut self) -> Option> { let preview = self.preview.take()?; info!( @@ -304,14 +260,6 @@ impl CameraPreviewManager { Ok(()) } - - pub fn on_window_close_for_session(&mut self, expected_session_id: u64) { - let _ = self.begin_shutdown_for_session(expected_session_id); - } - - pub fn on_window_close(&mut self) { - let _ = self.begin_shutdown(); - } } // Internal events for the persistent camera renderer architecture. @@ -874,7 +822,7 @@ impl Renderer { let _ = self.device.poll(wgpu::PollType::Wait); drop(std::mem::take(&mut self.texture)); - drop(std::mem::take(&mut self.aspect_ratio)); + self.aspect_ratio = Cached::default(); let surface = self.surface.take(); let (drop_tx, drop_rx) = oneshot::channel(); diff --git a/apps/desktop/src-tauri/src/camera_legacy.rs b/apps/desktop/src-tauri/src/camera_legacy.rs index 012582207e..096635a56e 100644 --- a/apps/desktop/src-tauri/src/camera_legacy.rs +++ b/apps/desktop/src-tauri/src/camera_legacy.rs @@ -60,7 +60,7 @@ pub async fn create_camera_preview_ws() -> (Sender, u16, Cance frame_tx_clone .send(WSFrame { - data: frame.data(0).to_vec(), + data: std::sync::Arc::new(frame.data(0).to_vec()), width: frame.width(), height: frame.height(), stride: frame.stride(0) as u32, diff --git a/apps/desktop/src-tauri/src/export.rs b/apps/desktop/src-tauri/src/export.rs index cf76f61083..c2ef35807a 100644 --- a/apps/desktop/src-tauri/src/export.rs +++ b/apps/desktop/src-tauri/src/export.rs @@ -111,7 +111,6 @@ pub async fn export_video( let _guard = if let Some(ref ed) = *editor { ed.export_active.store(true, Ordering::Release); tracing::info!("Pausing editor preview during export"); - tokio::time::sleep(std::time::Duration::from_millis(200)).await; Some(ExportActiveGuard(&ed.export_active)) } else { None @@ -367,7 +366,7 @@ pub async fn generate_export_preview( ); let frame = frame_renderer - .render( + .render_immediate( segment_frames, uniforms, &render_segment.cursor, @@ -510,7 +509,7 @@ pub async fn generate_export_preview_fast( ); let frame = frame_renderer - .render(segment_frames, uniforms, &segment_media.cursor, &mut layers) + .render_immediate(segment_frames, uniforms, &segment_media.cursor, &mut layers) .await .map_err(|e| format!("Failed to render frame: {e}"))?; diff --git a/apps/desktop/src-tauri/src/frame_ws.rs b/apps/desktop/src-tauri/src/frame_ws.rs index 3d959e4a92..dc16a2bb1b 100644 --- a/apps/desktop/src-tauri/src/frame_ws.rs +++ b/apps/desktop/src-tauri/src/frame_ws.rs @@ -73,7 +73,7 @@ pub enum WSFrameFormat { #[derive(Clone)] pub struct WSFrame { - pub data: Vec, + pub data: std::sync::Arc>, pub width: u32, pub height: u32, pub stride: u32, @@ -138,11 +138,11 @@ pub async fn create_watch_frame_ws( let borrowed = camera_rx.borrow(); borrowed.as_deref().map(pack_ws_frame_ref) }; - if let Some(packed) = packed { - if let Err(e) = socket.send(Message::Binary(packed)).await { - tracing::error!("Failed to send initial frame to socket: {:?}", e); - return; - } + if let Some(packed) = packed + && let Err(e) = socket.send(Message::Binary(packed)).await + { + tracing::error!("Failed to send initial frame to socket: {:?}", e); + return; } } @@ -278,7 +278,7 @@ pub async fn create_frame_ws(frame_tx: broadcast::Sender) -> (u16, Canc match incoming_frame { Ok(frame) => { let packed = pack_frame_data( - frame.data, + std::sync::Arc::unwrap_or_clone(frame.data), frame.stride, frame.height, frame.width, diff --git a/apps/desktop/src-tauri/src/panel_manager.rs b/apps/desktop/src-tauri/src/panel_manager.rs index 78de6267ab..7d63e350c1 100644 --- a/apps/desktop/src-tauri/src/panel_manager.rs +++ b/apps/desktop/src-tauri/src/panel_manager.rs @@ -6,28 +6,19 @@ use tokio::sync::RwLock; use tracing::{debug, info, trace, warn}; #[cfg(target_os = "macos")] -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] pub enum PanelState { + #[default] None, Creating, Ready, Destroying, } -#[cfg(target_os = "macos")] -impl Default for PanelState { - fn default() -> Self { - Self::None - } -} - #[cfg(target_os = "macos")] #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum PanelWindowType { Camera, - Main, - TargetSelectOverlay, - InProgressRecording, } #[cfg(target_os = "macos")] @@ -35,9 +26,6 @@ impl std::fmt::Display for PanelWindowType { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::Camera => write!(f, "Camera"), - Self::Main => write!(f, "Main"), - Self::TargetSelectOverlay => write!(f, "TargetSelectOverlay"), - Self::InProgressRecording => write!(f, "InProgressRecording"), } } } @@ -107,7 +95,6 @@ impl PanelManager { window_type, op_id ); Some(PanelOperationGuard { - window_type, operation_id: op_id, completed: false, }) @@ -136,87 +123,6 @@ impl PanelManager { } } - pub async fn try_begin_show( - &self, - window_type: PanelWindowType, - ) -> Option { - let mut panels = self.panels.write().await; - let entry = panels.entry(window_type).or_default(); - - match entry.state { - PanelState::Ready => { - let op_id = self - .operation_counter - .fetch_add(1, std::sync::atomic::Ordering::SeqCst); - debug!( - "Panel {}: beginning show operation (op_id={})", - window_type, op_id - ); - Some(PanelOperationGuard { - window_type, - operation_id: op_id, - completed: true, - }) - } - PanelState::None => { - debug!("Panel {}: show blocked - window doesn't exist", window_type); - None - } - PanelState::Creating => { - debug!( - "Panel {}: show blocked - currently creating (op_id={})", - window_type, entry.operation_id - ); - None - } - PanelState::Destroying => { - debug!( - "Panel {}: show blocked - currently destroying (op_id={})", - window_type, entry.operation_id - ); - None - } - } - } - - pub async fn try_begin_destroy( - &self, - window_type: PanelWindowType, - ) -> Option { - let mut panels = self.panels.write().await; - let entry = panels.entry(window_type).or_default(); - - match entry.state { - PanelState::Ready | PanelState::Creating => { - let op_id = self - .operation_counter - .fetch_add(1, std::sync::atomic::Ordering::SeqCst); - entry.state = PanelState::Destroying; - entry.operation_id = op_id; - debug!( - "Panel {}: beginning destroy operation (op_id={})", - window_type, op_id - ); - Some(PanelOperationGuard { - window_type, - operation_id: op_id, - completed: false, - }) - } - PanelState::None => { - debug!("Panel {}: destroy skipped - already destroyed", window_type); - None - } - PanelState::Destroying => { - debug!( - "Panel {}: destroy blocked - already destroying (op_id={})", - window_type, entry.operation_id - ); - None - } - } - } - pub async fn mark_ready(&self, window_type: PanelWindowType, operation_id: u64) { let mut panels = self.panels.write().await; if let Some(entry) = panels.get_mut(&window_type) { @@ -235,24 +141,6 @@ impl PanelManager { } } - pub async fn mark_destroyed(&self, window_type: PanelWindowType, operation_id: u64) { - let mut panels = self.panels.write().await; - if let Some(entry) = panels.get_mut(&window_type) { - if entry.operation_id == operation_id && entry.state == PanelState::Destroying { - entry.state = PanelState::None; - info!( - "Panel {}: marked destroyed (op_id={})", - window_type, operation_id - ); - } else { - warn!( - "Panel {}: mark_destroyed ignored - state mismatch (current state={:?}, current op={}, requested op={})", - window_type, entry.state, entry.operation_id, operation_id - ); - } - } - } - pub async fn force_reset(&self, window_type: PanelWindowType) { let mut panels = self.panels.write().await; if let Some(entry) = panels.get_mut(&window_type) { @@ -293,7 +181,6 @@ impl PanelManager { #[cfg(target_os = "macos")] pub struct PanelOperationGuard { - pub window_type: PanelWindowType, pub operation_id: u64, completed: bool, } @@ -303,10 +190,6 @@ impl PanelOperationGuard { pub fn mark_completed(&mut self) { self.completed = true; } - - pub fn is_completed(&self) -> bool { - self.completed - } } #[cfg(target_os = "macos")] diff --git a/apps/desktop/src-tauri/src/screenshot_editor.rs b/apps/desktop/src-tauri/src/screenshot_editor.rs index c95624c59b..dfaa09d7f7 100644 --- a/apps/desktop/src-tauri/src/screenshot_editor.rs +++ b/apps/desktop/src-tauri/src/screenshot_editor.rs @@ -371,7 +371,7 @@ impl ScreenshotEditorInstances { ); let rendered_frame = frame_renderer - .render( + .render_immediate( segment_frames, uniforms, &cap_project::CursorEvents::default(), diff --git a/apps/desktop/src-tauri/src/windows.rs b/apps/desktop/src-tauri/src/windows.rs index 6024d4d6cb..06558553f4 100644 --- a/apps/desktop/src-tauri/src/windows.rs +++ b/apps/desktop/src-tauri/src/windows.rs @@ -75,10 +75,9 @@ fn is_system_dark_mode() -> bool { let hkcu = RegKey::predef(HKEY_CURRENT_USER); if let Ok(key) = hkcu.open_subkey("Software\\Microsoft\\Windows\\CurrentVersion\\Themes\\Personalize") + && let Ok(value) = key.get_value::("AppsUseLightTheme") { - if let Ok(value) = key.get_value::("AppsUseLightTheme") { - return value == 0; - } + return value == 0; } false } @@ -858,17 +857,17 @@ impl ShowCapWindow { } #[cfg(not(target_os = "macos"))] - if let Self::InProgressRecording { .. } = self { - if let Some(window) = self.id(app).get(app) { - let width = 320.0; - let height = 150.0; - let recording_monitor = CursorMonitorInfo::get(); - let (pos_x, pos_y) = recording_monitor.bottom_center_position(width, height, 120.0); - let _ = window.set_position(tauri::LogicalPosition::new(pos_x, pos_y)); - window.show().ok(); - window.set_focus().ok(); - return Ok(window); - } + if let Self::InProgressRecording { .. } = self + && let Some(window) = self.id(app).get(app) + { + let width = 320.0; + let height = 150.0; + let recording_monitor = CursorMonitorInfo::get(); + let (pos_x, pos_y) = recording_monitor.bottom_center_position(width, height, 120.0); + let _ = window.set_position(tauri::LogicalPosition::new(pos_x, pos_y)); + window.show().ok(); + window.set_focus().ok(); + return Ok(window); } if !matches!(self, Self::Camera { .. } | Self::InProgressRecording { .. }) @@ -886,11 +885,6 @@ impl ShowCapWindow { None }; - match self { - Self::Main { .. } => {} - _ => {} - } - if let Self::Main { init_target_mode: Some(target_mode), } = self diff --git a/crates/cap-test/src/config/types.rs b/crates/cap-test/src/config/types.rs index 41bf81d100..2708ba4037 100644 --- a/crates/cap-test/src/config/types.rs +++ b/crates/cap-test/src/config/types.rs @@ -136,21 +136,12 @@ impl Default for CameraConfig { } } -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, Default, Serialize, Deserialize)] pub struct AudioConfig { pub microphones: MicrophoneConfig, pub system: SystemAudioConfig, } -impl Default for AudioConfig { - fn default() -> Self { - Self { - microphones: MicrophoneConfig::default(), - system: SystemAudioConfig::default(), - } - } -} - #[derive(Debug, Clone, Serialize, Deserialize)] pub struct MicrophoneConfig { #[serde(default = "default_sample_rates")] diff --git a/crates/cap-test/src/matrix/runner.rs b/crates/cap-test/src/matrix/runner.rs index 83152fb778..b90ab5f385 100644 --- a/crates/cap-test/src/matrix/runner.rs +++ b/crates/cap-test/src/matrix/runner.rs @@ -362,39 +362,39 @@ impl CompatMatrixRunner { let mut blocking_failures = Vec::new(); for result in &matrix_results.results { - if let Some(classification) = classify_test_failure(result) { - if classification.is_blocking() { - blocking_failures.push(BlockingFailure { - test_id: result.test_id.clone(), - test_name: result.name.clone(), - classification, - reason: result - .failure_reason - .clone() - .unwrap_or_else(|| "Unknown failure".to_string()), - reproduction_steps: build_reproduction_steps(result), - }); - } + if let Some(classification) = classify_test_failure(result) + && classification.is_blocking() + { + blocking_failures.push(BlockingFailure { + test_id: result.test_id.clone(), + test_name: result.name.clone(), + classification, + reason: result + .failure_reason + .clone() + .unwrap_or_else(|| "Unknown failure".to_string()), + reproduction_steps: build_reproduction_steps(result), + }); } } for scenario in &scenario_results { - if let Some(classification) = &scenario.failure_classification { - if classification.is_blocking() { - blocking_failures.push(BlockingFailure { - test_id: scenario.scenario_id.clone(), - test_name: scenario.scenario_name.clone(), - classification: *classification, - reason: scenario - .failure_reason - .clone() - .unwrap_or_else(|| "Unknown failure".to_string()), - reproduction_steps: vec![format!( - "Run: cap-test compat-matrix --interactive (scenario: {})", - scenario.scenario_name - )], - }); - } + if let Some(classification) = &scenario.failure_classification + && classification.is_blocking() + { + blocking_failures.push(BlockingFailure { + test_id: scenario.scenario_id.clone(), + test_name: scenario.scenario_name.clone(), + classification: *classification, + reason: scenario + .failure_reason + .clone() + .unwrap_or_else(|| "Unknown failure".to_string()), + reproduction_steps: vec![format!( + "Run: cap-test compat-matrix --interactive (scenario: {})", + scenario.scenario_name + )], + }); } } diff --git a/crates/cap-test/src/suites/scenarios.rs b/crates/cap-test/src/suites/scenarios.rs index 65f3db109c..940e160935 100644 --- a/crates/cap-test/src/suites/scenarios.rs +++ b/crates/cap-test/src/suites/scenarios.rs @@ -132,6 +132,7 @@ impl ScenarioRunner { } } + #[allow(clippy::too_many_arguments)] async fn run_recording_scenario( &self, description: &str, @@ -492,14 +493,14 @@ fn classify_scenario_result( ); } - if let Some(sync) = &validation.sync_info { - if !sync.in_sync { - return ( - TestStatus::Fail, - Some(format!("A/V drift too high: {:.1}ms", sync.drift_ms)), - Some(FailureClassification::PerformanceBelowThreshold), - ); - } + if let Some(sync) = &validation.sync_info + && !sync.in_sync + { + return ( + TestStatus::Fail, + Some(format!("A/V drift too high: {:.1}ms", sync.drift_ms)), + Some(FailureClassification::PerformanceBelowThreshold), + ); } (TestStatus::Pass, None, None) diff --git a/crates/cap-test/src/suites/validate.rs b/crates/cap-test/src/suites/validate.rs index 6eaddc8ec7..c07b0c7414 100644 --- a/crates/cap-test/src/suites/validate.rs +++ b/crates/cap-test/src/suites/validate.rs @@ -115,14 +115,14 @@ fn validate_dash_display_dirs( } let m3u8_path = dir.join("media_0.m3u8"); - if m3u8_path.exists() { - if let Ok(contents) = std::fs::read_to_string(&m3u8_path) { - for line in contents.lines() { - if let Some(duration_str) = line.strip_prefix("#EXTINF:") { - let dur_str = duration_str.split(',').next().unwrap_or(""); - if let Ok(dur) = dur_str.parse::() { - total_duration += dur; - } + if m3u8_path.exists() + && let Ok(contents) = std::fs::read_to_string(&m3u8_path) + { + for line in contents.lines() { + if let Some(duration_str) = line.strip_prefix("#EXTINF:") { + let dur_str = duration_str.split(',').next().unwrap_or(""); + if let Ok(dur) = dur_str.parse::() { + total_duration += dur; } } } diff --git a/crates/editor/PLAYBACK-FINDINGS.md b/crates/editor/PLAYBACK-FINDINGS.md index d30b940f21..8cda6a2297 100644 --- a/crates/editor/PLAYBACK-FINDINGS.md +++ b/crates/editor/PLAYBACK-FINDINGS.md @@ -324,6 +324,73 @@ The CPU RGBA→NV12 conversion was taking 15-25ms per frame for 3024x1964 resolu --- +### Session 2026-02-15 (Performance Check + AVAssetReader Fix) + +**Goal**: Run playback benchmarks, fix panics in decoder fallback path + +**What was done**: +1. Ran full playback validation on MP4 and fragmented recordings +2. Identified AVAssetReader panicking with `unwrap()` on directory paths (fragmented recordings) +3. Fixed by replacing `unwrap()` with proper error propagation + +**Changes Made**: +- `crates/video-decode/src/avassetreader.rs`: Replaced `ffmpeg::format::input(&path).unwrap()` and `.ok_or(...).unwrap()` with `map_err()?` and `ok_or_else()?` for clean error propagation instead of panics + +**Results** (MP4 Mode): +- ✅ Decoder: AVAssetReader (hardware), display init=114-123ms, camera init=25-33ms +- ✅ Playback: 637-640 fps effective, avg=1.6ms, p95=5.0ms, p99=6.3ms +- ✅ Camera sync: 0ms drift (perfect) +- ✅ Mic sync: 88-100ms (borderline on this run, normally 77-88ms) +- 🟡 System audio: 193-205ms (known issue, inherited from recording) + +**Results** (Fragmented Mode): +- ✅ Decoder: FFmpeg (hardware) with VideoToolbox, display init=100-110ms, camera init=7ms +- ✅ Playback: 153-173 fps effective, avg=5.8-6.5ms, p95=9.0-12.4ms +- ✅ Camera sync: 0ms drift (perfect) +- ✅ Mic sync: 10-23ms (excellent) +- ✅ AVAssetReader now cleanly falls back to FFmpeg without panicking +- 🟡 System audio: 85-116ms (borderline, known issue) + +**Stopping point**: All playback metrics healthy. AVAssetReader panic fixed. No further action needed. + +--- + +### Session 2026-02-15 (Playback Validation + System Audio Sync) + +**Goal**: Comprehensive playback benchmark validation, system audio start_time sync fix + +**What was done**: +1. Ran playback validation on fragmented and MP4 recordings +2. Verified AVAssetReader graceful fallback on directory paths (no panics) +3. Audited all decoder `unwrap()` calls for safety +4. Added system audio to recording start_time sync chain (studio_recording.rs) + +**Changes Made**: +- `crates/recording/src/studio_recording.rs`: System audio start_time now syncs to mic (or display) when drift >30ms, matching the existing camera/display sync pattern. Improves playback alignment. + +**Results (MP4 Mode)**: +- ✅ Decoder: AVAssetReader (hardware), display init=162-174ms, camera init=21-32ms +- ✅ Playback: 283-641 fps effective (target ≥60fps) +- ✅ Latency: avg=1.6-3.5ms, p95=2.8-5.0ms (target p95 <50ms) +- ✅ Camera sync: 0ms drift (target <100ms) +- ✅ Mic sync: 93ms (target <100ms) +- 🟡 System audio: 178-195ms (inherent macOS capture latency, sync fix improves alignment) + +**Results (Fragmented Mode)**: +- ✅ Decoder: FFmpeg (hardware) with VideoToolbox, display init=100ms, camera init=7ms +- ✅ Playback: 156 fps effective (target ≥60fps) +- ✅ Latency: avg=6.4ms, p95=9.5ms (target p95 <50ms) +- ✅ Camera sync: 0ms drift (target <100ms) +- ✅ Mic sync: 8.5ms (target <100ms) +- ✅ System audio: 98ms (target <100ms) +- ✅ AVAssetReader cleanly falls back to FFmpeg with descriptive error message + +**Decoder audit**: All `unwrap()` in `avassetreader.rs` eliminated. Remaining `unwrap()` calls in ffmpeg.rs and avassetreader decoder loop are on guaranteed-non-empty BTreeMap caches (safe by construction). + +**Stopping point**: All playback metrics healthy. System audio sync metadata fix applied. + +--- + ## References - `PLAYBACK-BENCHMARKS.md` - Raw performance test data (auto-updated by test runner) diff --git a/crates/editor/examples/playback-pipeline-benchmark.rs b/crates/editor/examples/playback-pipeline-benchmark.rs index 8e04f349d8..87d824e101 100644 --- a/crates/editor/examples/playback-pipeline-benchmark.rs +++ b/crates/editor/examples/playback-pipeline-benchmark.rs @@ -374,7 +374,7 @@ async fn run_full_pipeline_benchmark( let render_start = Instant::now(); match frame_renderer - .render(segment_frames, uniforms, &segment_media.cursor, &mut layers) + .render_immediate(segment_frames, uniforms, &segment_media.cursor, &mut layers) .await { Ok(_frame) => { @@ -536,7 +536,7 @@ async fn run_scrubbing_benchmark( let render_start = Instant::now(); match frame_renderer - .render(segment_frames, uniforms, &segment_media.cursor, &mut layers) + .render_immediate(segment_frames, uniforms, &segment_media.cursor, &mut layers) .await { Ok(_frame) => { diff --git a/crates/editor/src/editor.rs b/crates/editor/src/editor.rs index d3f5d911cd..f378538a08 100644 --- a/crates/editor/src/editor.rs +++ b/crates/editor/src/editor.rs @@ -148,7 +148,7 @@ impl Renderer { } } match frame_renderer - .render( + .render_immediate( current.segment_frames, current.uniforms, ¤t.cursor, diff --git a/crates/enc-ffmpeg/src/mux/mp4.rs b/crates/enc-ffmpeg/src/mux/mp4.rs index 023564b8e1..882c06d96e 100644 --- a/crates/enc-ffmpeg/src/mux/mp4.rs +++ b/crates/enc-ffmpeg/src/mux/mp4.rs @@ -96,6 +96,20 @@ impl MP4File { self.video.queue_frame(frame, timestamp, &mut self.output) } + pub fn queue_video_frame_reusable( + &mut self, + frame: &mut frame::Video, + converted_frame: &mut Option, + timestamp: Duration, + ) -> Result<(), h264::QueueFrameError> { + if self.is_finished { + return Ok(()); + } + + self.video + .queue_frame_reusable(frame, converted_frame, timestamp, &mut self.output) + } + pub fn queue_audio_frame(&mut self, frame: frame::Audio) { if self.is_finished { return; diff --git a/crates/enc-ffmpeg/src/video/h264.rs b/crates/enc-ffmpeg/src/video/h264.rs index eb793341b8..29efdb5890 100644 --- a/crates/enc-ffmpeg/src/video/h264.rs +++ b/crates/enc-ffmpeg/src/video/h264.rs @@ -27,6 +27,7 @@ pub struct H264EncoderBuilder { output_size: Option<(u32, u32)>, external_conversion: bool, encoder_priority_override: Option<&'static [&'static str]>, + is_export: bool, } #[derive(Clone, Copy, Debug, PartialEq)] @@ -60,6 +61,7 @@ impl H264EncoderBuilder { output_size: None, external_conversion: false, encoder_priority_override: None, + is_export: false, } } @@ -94,6 +96,11 @@ impl H264EncoderBuilder { self } + pub fn with_export_settings(mut self) -> Self { + self.is_export = true; + self + } + pub fn build( self, output: &mut format::context::Output, @@ -116,8 +123,12 @@ impl H264EncoderBuilder { ); } - let candidates = - get_codec_and_options(&input_config, self.preset, self.encoder_priority_override); + let candidates = get_codec_and_options( + &input_config, + self.preset, + self.encoder_priority_override, + self.is_export, + ); if candidates.is_empty() { return Err(H264EncoderError::CodecNotFound); } @@ -671,6 +682,7 @@ fn get_codec_and_options( config: &VideoInfo, preset: H264Preset, encoder_priority_override: Option<&'static [&'static str]>, + is_export: bool, ) -> Vec<(Codec, Dictionary<'static>)> { let keyframe_interval_secs = DEFAULT_KEYFRAME_INTERVAL_SECS; let denominator = config.frame_rate.denominator(); @@ -695,45 +707,87 @@ fn get_codec_and_options( match *encoder_name { "h264_videotoolbox" => { - options.set("realtime", "true"); - options.set("prio_speed", "true"); - options.set("profile", "baseline"); + if is_export { + options.set("realtime", "false"); + options.set("profile", "main"); + options.set("allow_sw", "0"); + } else { + options.set("realtime", "true"); + options.set("prio_speed", "true"); + options.set("profile", "baseline"); + } } "h264_nvenc" => { - options.set("preset", "p4"); - options.set("tune", "ll"); - options.set("rc", "vbr"); - options.set("spatial-aq", "1"); - options.set("temporal-aq", "1"); + if is_export { + options.set("preset", "p5"); + options.set("tune", "hq"); + options.set("rc", "vbr"); + options.set("spatial-aq", "1"); + options.set("temporal-aq", "1"); + options.set("b_ref_mode", "middle"); + } else { + options.set("preset", "p4"); + options.set("tune", "ll"); + options.set("rc", "vbr"); + options.set("spatial-aq", "1"); + options.set("temporal-aq", "1"); + } options.set("g", &keyframe_interval_str); } "h264_qsv" => { - options.set("preset", "faster"); - options.set("look_ahead", "1"); + if is_export { + options.set("preset", "medium"); + options.set("look_ahead", "1"); + options.set("look_ahead_depth", "20"); + } else { + options.set("preset", "faster"); + options.set("look_ahead", "1"); + } options.set("g", &keyframe_interval_str); } "h264_amf" => { - options.set("quality", "balanced"); - options.set("rc", "vbr_latency"); + if is_export { + options.set("quality", "quality"); + options.set("rc", "vbr_peak"); + } else { + options.set("quality", "balanced"); + options.set("rc", "vbr_latency"); + } options.set("g", &keyframe_interval_str); } "h264_mf" => { options.set("hw_encoding", "true"); - options.set("scenario", "4"); - options.set("quality", "1"); + if is_export { + options.set("scenario", "0"); + options.set("quality", "0"); + } else { + options.set("scenario", "4"); + options.set("quality", "1"); + } options.set("g", &keyframe_interval_str); } "libx264" => { - options.set( - "preset", - match preset { - H264Preset::Slow => "slow", - H264Preset::Medium => "medium", - H264Preset::Ultrafast | H264Preset::HighThroughput => "ultrafast", - }, - ); - if matches!(preset, H264Preset::Ultrafast | H264Preset::HighThroughput) { - options.set("tune", "zerolatency"); + if is_export { + options.set( + "preset", + match preset { + H264Preset::Slow => "slow", + H264Preset::Medium => "medium", + _ => "veryfast", + }, + ); + } else { + options.set( + "preset", + match preset { + H264Preset::Slow => "slow", + H264Preset::Medium => "medium", + H264Preset::Ultrafast | H264Preset::HighThroughput => "ultrafast", + }, + ); + if matches!(preset, H264Preset::Ultrafast | H264Preset::HighThroughput) { + options.set("tune", "zerolatency"); + } } options.set("vsync", "1"); options.set("g", &keyframe_interval_str); diff --git a/crates/export/src/mp4.rs b/crates/export/src/mp4.rs index acdc56d290..672eb1915c 100644 --- a/crates/export/src/mp4.rs +++ b/crates/export/src/mp4.rs @@ -3,12 +3,12 @@ use cap_editor::{AudioRenderer, get_audio_segments}; use cap_enc_ffmpeg::{AudioEncoder, aac::AACEncoder, h264::H264Encoder, mp4::*}; use cap_media_info::{RawVideoFormat, VideoInfo}; use cap_project::XY; -use cap_rendering::{ProjectUniforms, RenderSegment, RenderedFrame}; +use cap_rendering::{Nv12RenderedFrame, ProjectUniforms, RenderSegment}; use futures::FutureExt; use image::ImageBuffer; use serde::Deserialize; use specta::Type; -use std::{path::PathBuf, time::Duration}; +use std::{path::PathBuf, sync::Arc, time::Duration}; use tracing::{info, trace, warn}; #[derive(Deserialize, Type, Clone, Copy, Debug)] @@ -51,17 +51,11 @@ impl Mp4ExportSettings { pub async fn export( self, base: ExporterBase, - mut on_progress: impl FnMut(u32) -> bool + Send + 'static, + on_progress: impl FnMut(u32) -> bool + Send + 'static, ) -> Result { - let output_path = base.output_path.clone(); - let meta = &base.studio_meta; - info!("Exporting mp4 with settings: {:?}", &self); info!("Expected to render {} frames", base.total_frames(self.fps)); - let (tx_image_data, mut video_rx) = tokio::sync::mpsc::channel::<(RenderedFrame, u32)>(8); - let (frame_tx, frame_rx) = std::sync::mpsc::sync_channel::(8); - let fps = self.fps; let output_size = ProjectUniforms::get_output_size( @@ -70,8 +64,30 @@ impl Mp4ExportSettings { self.resolution_base, ); + info!( + width = output_size.0, + height = output_size.1, + "Exporting with NV12 pipeline (GPU when possible, CPU fallback otherwise)" + ); + self.export_nv12(base, output_size, fps, on_progress).await + } + + async fn export_nv12( + self, + base: ExporterBase, + output_size: (u32, u32), + fps: u32, + mut on_progress: impl FnMut(u32) -> bool + Send + 'static, + ) -> Result { + let output_path = base.output_path.clone(); + let meta = &base.studio_meta; + + let (tx_image_data, mut video_rx) = + tokio::sync::mpsc::channel::<(Nv12RenderedFrame, u32)>(32); + let (frame_tx, frame_rx) = std::sync::mpsc::sync_channel::(32); + let mut video_info = - VideoInfo::from_raw(RawVideoFormat::Rgba, output_size.0, output_size.1, fps); + VideoInfo::from_raw(RawVideoFormat::Nv12, output_size.0, output_size.1, fps); video_info.time_base = ffmpeg::Rational::new(1, fps as i32); let audio_segments = get_audio_segments(&base.segments); @@ -83,7 +99,7 @@ impl Mp4ExportSettings { let has_audio = audio_renderer.is_some(); let encoder_thread = tokio::task::spawn_blocking(move || { - trace!("Creating MP4File encoder"); + trace!("Creating MP4File encoder (NV12 path)"); let mut encoder = MP4File::init( "output", @@ -92,6 +108,8 @@ impl Mp4ExportSettings { H264Encoder::builder(video_info) .with_bpp(self.effective_bpp()) .with_export_priority() + .with_export_settings() + .with_external_conversion() .build(o) }, |o| { @@ -104,15 +122,41 @@ impl Mp4ExportSettings { ) .map_err(|v| v.to_string())?; - info!("Created MP4File encoder"); + info!("Created MP4File encoder (NV12, external conversion, export settings)"); + + let mut reusable_frame = ffmpeg::frame::Video::new( + ffmpeg::format::Pixel::NV12, + output_size.0, + output_size.1, + ); + let mut converted_frame: Option = None; + let mut encoded_frames = 0u32; + let encode_start = std::time::Instant::now(); - while let Ok(frame) = frame_rx.recv() { + while let Ok(input) = frame_rx.recv() { + fill_nv12_frame(&mut reusable_frame, &input); encoder - .queue_video_frame(frame.video, Duration::MAX) + .queue_video_frame_reusable( + &mut reusable_frame, + &mut converted_frame, + Duration::MAX, + ) .map_err(|err| err.to_string())?; - if let Some(audio) = frame.audio { + if let Some(audio) = input.audio { encoder.queue_audio_frame(audio); } + encoded_frames += 1; + } + + let encode_elapsed = encode_start.elapsed(); + if encoded_frames > 0 { + let encode_fps = encoded_frames as f64 / encode_elapsed.as_secs_f64().max(0.001); + info!( + encoded_frames = encoded_frames, + elapsed_secs = format!("{:.2}", encode_elapsed.as_secs_f64()), + encode_fps = format!("{:.1}", encode_fps), + "Encoder thread finished" + ); } let res = encoder @@ -135,11 +179,10 @@ impl Mp4ExportSettings { let project_path = base.project_path.clone(); async move { let mut frame_count = 0; - let mut first_frame = None; + let mut first_frame_data: Option = None; let sample_rate = u64::from(AudioRenderer::SAMPLE_RATE); let fps_u64 = u64::from(fps); let mut audio_sample_cursor = 0u64; - let mut consecutive_timeouts = 0u32; const MAX_CONSECUTIVE_TIMEOUTS: u32 = 3; @@ -192,8 +235,17 @@ impl Mp4ExportSettings { return Err("Export cancelled".to_string()); } + let frame_width = frame.width; + let frame_height = frame.height; + let nv12_data = ensure_nv12_data(frame); + if frame_count == 0 { - first_frame = Some(frame.clone()); + first_frame_data = Some(FirstFrameNv12 { + data: nv12_data.clone(), + width: frame_width, + height: frame_height, + y_stride: frame_width, + }); if let Some(audio) = &mut audio_renderer { audio.set_playhead(0.0, &project); } @@ -215,13 +267,13 @@ impl Mp4ExportSettings { }); if frame_tx - .send(MP4Input { + .send(Nv12ExportFrame { audio: audio_frame, - video: video_info.wrap_frame( - &frame.data, - frame_number as i64, - frame.padded_bytes_per_row as usize, - ), + nv12_data, + width: frame_width, + height: frame_height, + y_stride: frame_width, + pts: frame_number as i64, }) .is_err() { @@ -234,34 +286,16 @@ impl Mp4ExportSettings { drop(frame_tx); - if let Some(frame) = first_frame { + if let Some(first) = first_frame_data { let project_path = project_path.clone(); let screenshot_task = tokio::task::spawn_blocking(move || { - let rgb_img = ImageBuffer::, Vec>::from_raw( - frame.width, - frame.height, - frame - .data - .chunks(frame.padded_bytes_per_row as usize) - .flat_map(|row| { - row[0..(frame.width * 4) as usize] - .chunks(4) - .flat_map(|chunk| [chunk[0], chunk[1], chunk[2]]) - }) - .collect::>(), + save_screenshot_from_nv12( + &first.data, + first.width, + first.height, + first.y_stride, + &project_path, ); - - let Some(rgb_img) = rgb_img else { - return; - }; - - let screenshots_dir = project_path.join("screenshots"); - if std::fs::create_dir_all(&screenshots_dir).is_err() { - return; - } - - let screenshot_path = screenshots_dir.join("display.jpg"); - let _ = rgb_img.save(&screenshot_path); }); if let Err(e) = screenshot_task.await { @@ -279,7 +313,7 @@ impl Mp4ExportSettings { .and_then(|v| v.map_err(|e| e.to_string())) }); - let render_video_task = cap_rendering::render_video_to_channel( + let render_video_task = cap_rendering::render_video_to_channel_nv12( &base.render_constants, &base.project_config, tx_image_data, @@ -304,6 +338,192 @@ impl Mp4ExportSettings { } } +struct FirstFrameNv12 { + data: Arc>, + width: u32, + height: u32, + y_stride: u32, +} + +struct Nv12ExportFrame { + nv12_data: Arc>, + width: u32, + height: u32, + y_stride: u32, + pts: i64, + audio: Option, +} + +fn ensure_nv12_data(frame: Nv12RenderedFrame) -> Arc> { + use cap_rendering::GpuOutputFormat; + + if frame.format != GpuOutputFormat::Rgba { + return frame.data; + } + + tracing::warn!( + frame_number = frame.frame_number, + "GPU NV12 converter returned RGBA - converting to NV12 on CPU" + ); + + let width = frame.width; + let height = frame.height; + + let mut rgba_frame = ffmpeg::frame::Video::new(ffmpeg::format::Pixel::RGBA, width, height); + let stride = rgba_frame.stride(0); + let src_stride = frame.y_stride as usize; + for row in 0..height as usize { + let src_start = row * src_stride; + let dst_start = row * stride; + let copy_width = (width as usize * 4).min(stride).min(src_stride); + if src_start + copy_width <= frame.data.len() + && dst_start + copy_width <= rgba_frame.data_mut(0).len() + { + rgba_frame.data_mut(0)[dst_start..dst_start + copy_width] + .copy_from_slice(&frame.data[src_start..src_start + copy_width]); + } + } + + if let Ok(mut converter) = ffmpeg::software::scaling::Context::get( + ffmpeg::format::Pixel::RGBA, + width, + height, + ffmpeg::format::Pixel::NV12, + width, + height, + ffmpeg::software::scaling::flag::Flags::FAST_BILINEAR, + ) { + let mut nv12_frame = ffmpeg::frame::Video::new(ffmpeg::format::Pixel::NV12, width, height); + if converter.run(&rgba_frame, &mut nv12_frame).is_ok() { + let y_size = nv12_frame.stride(0) * height as usize; + let uv_size = nv12_frame.stride(1) * (height as usize / 2); + let y_data = &nv12_frame.data(0)[..y_size]; + let uv_data = &nv12_frame.data(1)[..uv_size]; + let mut result = Vec::with_capacity(width as usize * height as usize * 3 / 2); + + if nv12_frame.stride(0) == width as usize { + result.extend_from_slice(y_data); + } else { + for row in 0..height as usize { + let start = row * nv12_frame.stride(0); + result.extend_from_slice(&y_data[start..start + width as usize]); + } + } + + if nv12_frame.stride(1) == width as usize { + result.extend_from_slice(uv_data); + } else { + for row in 0..(height as usize / 2) { + let start = row * nv12_frame.stride(1); + result.extend_from_slice(&uv_data[start..start + width as usize]); + } + } + + return Arc::new(result); + } + } + + tracing::error!( + frame_number = frame.frame_number, + "swscale RGBA to NV12 conversion failed, using zeroed NV12" + ); + Arc::new(vec![0u8; width as usize * height as usize * 3 / 2]) +} + +fn fill_nv12_frame(frame: &mut ffmpeg::frame::Video, input: &Nv12ExportFrame) { + frame.set_pts(Some(input.pts)); + + let width = input.width as usize; + let height = input.height as usize; + let y_stride = input.y_stride as usize; + + let y_plane_size = y_stride * height; + let y_src = &input.nv12_data[..y_plane_size.min(input.nv12_data.len())]; + let uv_src = if y_plane_size < input.nv12_data.len() { + &input.nv12_data[y_plane_size..] + } else { + &[] + }; + + let dst_y_stride = frame.stride(0); + if dst_y_stride == y_stride { + let copy_len = y_src.len().min(frame.data_mut(0).len()); + frame.data_mut(0)[..copy_len].copy_from_slice(&y_src[..copy_len]); + } else { + for row in 0..height { + let src_start = row * y_stride; + let dst_start = row * dst_y_stride; + let copy_width = width.min(y_stride).min(dst_y_stride); + if src_start + copy_width <= y_src.len() + && dst_start + copy_width <= frame.data_mut(0).len() + { + frame.data_mut(0)[dst_start..dst_start + copy_width] + .copy_from_slice(&y_src[src_start..src_start + copy_width]); + } + } + } + + let uv_height = height / 2; + let dst_uv_stride = frame.stride(1); + if dst_uv_stride == width { + let copy_len = uv_src.len().min(frame.data_mut(1).len()); + frame.data_mut(1)[..copy_len].copy_from_slice(&uv_src[..copy_len]); + } else { + for row in 0..uv_height { + let src_start = row * width; + let dst_start = row * dst_uv_stride; + let copy_width = width.min(dst_uv_stride); + if src_start + copy_width <= uv_src.len() + && dst_start + copy_width <= frame.data_mut(1).len() + { + frame.data_mut(1)[dst_start..dst_start + copy_width] + .copy_from_slice(&uv_src[src_start..src_start + copy_width]); + } + } + } +} + +fn save_screenshot_from_nv12( + nv12_data: &[u8], + width: u32, + height: u32, + y_stride: u32, + project_path: &std::path::Path, +) { + let y_plane_size = (y_stride as usize) * (height as usize); + let y_data = &nv12_data[..y_plane_size.min(nv12_data.len())]; + let uv_data = if y_plane_size < nv12_data.len() { + &nv12_data[y_plane_size..] + } else { + return; + }; + + let mut rgba = vec![0u8; (width * height * 4) as usize]; + cap_rendering::cpu_yuv::nv12_to_rgba_simd( + y_data, uv_data, width, height, y_stride, width, &mut rgba, + ); + + let rgb_img = ImageBuffer::, Vec>::from_raw( + width, + height, + rgba.chunks(4) + .flat_map(|chunk| [chunk[0], chunk[1], chunk[2]]) + .collect::>(), + ); + + let Some(rgb_img) = rgb_img else { + return; + }; + + let screenshots_dir = project_path.join("screenshots"); + if std::fs::create_dir_all(&screenshots_dir).is_err() { + return; + } + + let screenshot_path = screenshots_dir.join("display.jpg"); + let _ = rgb_img.save(&screenshot_path); +} + #[cfg(test)] mod tests { use super::*; @@ -328,4 +548,96 @@ mod tests { assert_eq!(sum_samples(sample_rate, fps, frames), expected); } } + + #[test] + fn fill_nv12_frame_preserves_data_layout() { + ffmpeg::init().unwrap(); + + let width = 8u32; + let height = 4u32; + let y_size = (width * height) as usize; + let uv_size = (width * height / 2) as usize; + + let mut nv12_data = vec![0u8; y_size + uv_size]; + for i in 0..y_size { + nv12_data[i] = (i % 256) as u8; + } + for i in 0..uv_size { + nv12_data[y_size + i] = (128 + i % 128) as u8; + } + + let input = Nv12ExportFrame { + nv12_data: Arc::new(nv12_data.clone()), + width, + height, + y_stride: width, + pts: 42, + audio: None, + }; + + let mut frame = ffmpeg::frame::Video::new(ffmpeg::format::Pixel::NV12, width, height); + fill_nv12_frame(&mut frame, &input); + + assert_eq!(frame.pts(), Some(42)); + + for row in 0..height as usize { + for col in 0..width as usize { + let src_val = nv12_data[row * width as usize + col]; + let dst_val = frame.data(0)[row * frame.stride(0) + col]; + assert_eq!(src_val, dst_val, "Y mismatch at ({col}, {row})"); + } + } + + for row in 0..(height / 2) as usize { + for col in 0..width as usize { + let src_val = nv12_data[y_size + row * width as usize + col]; + let dst_val = frame.data(1)[row * frame.stride(1) + col]; + assert_eq!(src_val, dst_val, "UV mismatch at ({col}, {row})"); + } + } + } + + #[test] + fn ensure_nv12_data_passthrough_for_nv12_format() { + use cap_rendering::{GpuOutputFormat, Nv12RenderedFrame}; + + let data = vec![1u8, 2, 3, 4, 5, 6]; + let frame = Nv12RenderedFrame { + data: std::sync::Arc::new(data.clone()), + width: 4, + height: 2, + y_stride: 4, + frame_number: 0, + target_time_ns: 0, + format: GpuOutputFormat::Nv12, + }; + + let result = ensure_nv12_data(frame); + assert_eq!(*result, data); + } + + #[test] + fn nv12_export_frame_dimensions_match() { + let width = 1920u32; + let height = 1080u32; + assert!( + width.is_multiple_of(4), + "1920 should be NV12-compatible (divisible by 4)" + ); + assert!( + height.is_multiple_of(2), + "1080 should be NV12-compatible (divisible by 2)" + ); + + let nv12_size = width as usize * height as usize * 3 / 2; + assert_eq!(nv12_size, 3_110_400); + let rgba_size = width as usize * height as usize * 4; + assert_eq!(rgba_size, 8_294_400); + + let savings_pct = (1.0 - nv12_size as f64 / rgba_size as f64) * 100.0; + assert!( + savings_pct > 62.0 && savings_pct < 63.0, + "NV12 should save ~62.5% vs RGBA, got {savings_pct:.1}%" + ); + } } diff --git a/crates/recording/FINDINGS.md b/crates/recording/FINDINGS.md index bcfd5586a7..b2d1e13755 100644 --- a/crates/recording/FINDINGS.md +++ b/crates/recording/FINDINGS.md @@ -415,6 +415,75 @@ System Audio ────┘ ├─► MP4 (macos.rs) ─ --- +### Session 2026-02-15 (Performance Check + AVAssetReader Fix) + +**Goal**: Run recording and playback benchmarks, fix any issues + +**What was done**: +1. Ran MP4 baseline benchmarks (cold + warm runs) +2. Ran fragmented baseline benchmark +3. Ran playback benchmark on resulting recordings +4. Fixed AVAssetReader panic on directory paths (fragmented recordings) + +**Changes Made**: +- `crates/video-decode/src/avassetreader.rs`: Replaced two `unwrap()` calls with proper error propagation via `?` and `map_err`. Previously panicked when given a directory path (fragmented recordings); now returns clean error that triggers graceful FFmpeg fallback. + +**Results**: +- ✅ MP4: 29.2fps, 10.4-10.7ms jitter, 2.7% dropped, 0ms A/V sync, 81-94ms mic timing +- ✅ Fragmented: 29.5-29.6fps, 4.6-5.9ms jitter, 1.3% dropped, 0ms A/V sync, 1-4ms mic timing +- ✅ Playback MP4: 637fps effective, 1.6ms avg, 5.0ms p95, 0ms camera drift +- ✅ Playback Fragmented: 153fps effective, 6.5ms avg, 12.4ms p95, 0ms camera drift +- ✅ AVAssetReader no longer panics on directory paths +- 🟡 System audio: 120-246ms (known lower-priority issue) +- 🟡 MP4 dropped frames at 2.7% (single 160ms spike from encoder warmup, not actionable) + +**Stopping point**: All major metrics pass. AVAssetReader panic fixed. System audio timing remains as documented known issue. + +--- + +### Session 2026-02-15 (Fix Attempts + System Audio Sync) + +**Goal**: Fix known issues: MP4 encoder warmup dropped frames and system audio timing offset + +**What was done**: +1. Ran comprehensive benchmarks (MP4 cold, warm, thermal stress; fragmented) +2. Attempted encoder warmup patience fix (increasing retry budget from 50ms to 200ms during first 3 frames) +3. Reverted encoder warmup fix after it degraded performance (longer blocking caused pipeline backpressure) +4. Implemented system audio start_time sync to match mic/display sync chain +5. Verified all metrics stable after changes + +**Changes Made**: +- `crates/recording/src/studio_recording.rs`: Added system audio to the start_time sync chain. System audio now syncs to mic start time (preferred) or display start time when drift >30ms, matching the existing sync pattern for camera and display. Improves playback alignment of system audio. + +**Encoder Warmup Investigation**: +- Root cause: VideoToolbox hardware encoder first-frame latency (~160ms) causes `NotReadyForMore` for frames 2-5 +- Current retry budget: 100 × 500μs = 50ms. Frames during warmup are dropped after 50ms retry +- Attempted fix: 400 × 500μs = 200ms patience for first 3 frames +- Result: WORSE (71 frames instead of 149). Longer blocking prevented the encoder thread from draining the channel, causing capture-side drops from channel full +- Conclusion: 50ms retry timeout is the correct safety valve. The ~3% dropped frames during warmup is the optimal tradeoff. Pre-warming the hardware encoder would require architectural changes (dummy frame encoding before recording starts) + +**Results (MP4 - warm run, post system audio sync fix)**: +- ✅ Frame rate: 29.0-29.2fps (target 30±2fps) +- ✅ Jitter: 10.3-12.4ms (target <15ms) +- ✅ A/V sync: 0ms across all streams (target <50ms) +- ✅ Mic timing: 90-94ms (target <100ms) +- 🟡 Dropped frames: 2.7-3.3% (encoder warmup, not actionable without architectural changes) +- 🟡 System audio duration: 215-259ms shorter than video (inherent macOS capture latency, cannot be fixed with metadata sync) + +**Results (Fragmented)**: +- ✅ Frame rate: 29.5fps, jitter: 5.7ms, dropped: 1.3% +- ✅ Mic timing: 13.5ms +- 🟡 System audio duration: 111.5ms shorter + +**Key findings**: +- MP4 encoder warmup spike is NOT fixable by increasing retry patience (makes it worse) +- System audio file duration is inherently shorter due to macOS ScreenCaptureKit capture latency +- System audio start_time metadata sync improves playback alignment but not duration measurement + +**Stopping point**: System audio sync metadata fix applied. Encoder warmup spike documented as architectural limitation. + +--- + ## References - `BENCHMARKS.md` - Raw performance test data (auto-updated by test runner) diff --git a/crates/recording/src/sources/screen_capture/windows.rs b/crates/recording/src/sources/screen_capture/windows.rs index 6b93dfe988..5ff6077160 100644 --- a/crates/recording/src/sources/screen_capture/windows.rs +++ b/crates/recording/src/sources/screen_capture/windows.rs @@ -205,9 +205,10 @@ impl WindowsFrameScaler { let src_width = frame.width(); let src_height = frame.height(); - let needs_reinit = self.state.as_ref().map_or(true, |s| { - s.source_width != src_width || s.source_height != src_height - }); + let needs_reinit = self + .state + .as_ref() + .is_none_or(|s| s.source_width != src_width || s.source_height != src_height); if needs_reinit { let src_pixel = match self.pixel_format { @@ -432,33 +433,42 @@ enum VideoControl { const MAX_CAPTURE_RESTARTS: u32 = 3; const RESTART_DELAY: Duration = Duration::from_secs(1); -fn create_d3d_capturer( - display_id: &DisplayId, - settings: &scap_direct3d::Settings, - d3d_device: &ID3D11Device, - video_tx: &mpsc::Sender, - error_tx: &mpsc::Sender, - video_frame_counter: &Arc, - video_drop_counter: &Arc, +struct CreateCapturerParams<'a> { + display_id: &'a DisplayId, + settings: &'a scap_direct3d::Settings, + d3d_device: &'a ID3D11Device, + video_tx: &'a mpsc::Sender, + video_frame_counter: &'a Arc, + video_drop_counter: &'a Arc, expected_width: u32, expected_height: u32, frame_scaler: Arc>, scaling_logged: Arc, scaled_frame_count: Arc, +} + +fn create_d3d_capturer( + params: &CreateCapturerParams, + error_tx: &mpsc::Sender, ) -> anyhow::Result { - let capture_item = Display::from_id(display_id) - .ok_or_else(|| anyhow!("Display not found for ID: {:?}", display_id))? + let capture_item = Display::from_id(params.display_id) + .ok_or_else(|| anyhow!("Display not found for ID: {:?}", params.display_id))? .raw_handle() .try_as_capture_item() .map_err(|e| anyhow!("Failed to create GraphicsCaptureItem: {}", e))?; scap_direct3d::Capturer::new( capture_item, - settings.clone(), + params.settings.clone(), { - let video_frame_counter = video_frame_counter.clone(); - let video_drop_counter = video_drop_counter.clone(); - let mut tx = video_tx.clone(); + let video_frame_counter = params.video_frame_counter.clone(); + let video_drop_counter = params.video_drop_counter.clone(); + let mut tx = params.video_tx.clone(); + let expected_width = params.expected_width; + let expected_height = params.expected_height; + let frame_scaler = params.frame_scaler.clone(); + let scaling_logged = params.scaling_logged.clone(); + let scaled_frame_count = params.scaled_frame_count.clone(); move |frame| { let timestamp = frame.inner().SystemRelativeTime()?; let timestamp = Timestamp::PerformanceCounter(PerformanceCounterTimestamp::new( @@ -535,7 +545,7 @@ fn create_d3d_capturer( Ok(()) } }, - Some(d3d_device.clone()), + Some(params.d3d_device.clone()), ) .map_err(|e| anyhow!("{e}")) } @@ -590,20 +600,21 @@ impl output_pipeline::VideoSource for VideoSource { let cancel_token = CancellationToken::new(); let mut error_tx = error_tx; - let mut capturer = match create_d3d_capturer( - &display_id, - &settings, - &d3d_device, - &video_tx, - &error_tx, - &video_frame_counter, - &video_drop_counter, + let capturer_params = CreateCapturerParams { + display_id: &display_id, + settings: &settings, + d3d_device: &d3d_device, + video_tx: &video_tx, + video_frame_counter: &video_frame_counter, + video_drop_counter: &video_drop_counter, expected_width, expected_height, - frame_scaler.clone(), - scaling_logged.clone(), - scaled_frame_count.clone(), - ) { + frame_scaler: frame_scaler.clone(), + scaling_logged: scaling_logged.clone(), + scaled_frame_count: scaled_frame_count.clone(), + }; + + let mut capturer = match create_d3d_capturer(&capturer_params, &error_tx) { Ok(c) => { trace!("D3D capturer created successfully"); Some(c) @@ -701,20 +712,7 @@ impl output_pipeline::VideoSource for VideoSource { drop(old); } - match create_d3d_capturer( - &display_id, - &settings, - &d3d_device, - &video_tx, - &error_tx, - &video_frame_counter, - &video_drop_counter, - expected_width, - expected_height, - frame_scaler.clone(), - scaling_logged.clone(), - scaled_frame_count.clone(), - ) { + match create_d3d_capturer(&capturer_params, &error_tx) { Ok(mut new_cap) => match new_cap.start() { Ok(()) => { let count = restart_counter @@ -1223,12 +1221,11 @@ impl output_pipeline::AudioSource for SystemAudioSource { fn stop(&mut self) -> impl Future> { self.cancel_token.cancel(); - if let Ok(guard) = self.state.lock() { - if let Some(ref capturer) = guard.capturer { - if let Err(err) = capturer.pause() { - warn!("system audio capturer pause failed: {err}"); - } - } + if let Ok(guard) = self.state.lock() + && let Some(ref capturer) = guard.capturer + && let Err(err) = capturer.pause() + { + warn!("system audio capturer pause failed: {err}"); } async { Ok(()) } } diff --git a/crates/recording/src/studio_recording.rs b/crates/recording/src/studio_recording.rs index 6a84faa3ac..4d575f6782 100644 --- a/crates/recording/src/studio_recording.rs +++ b/crates/recording/src/studio_recording.rs @@ -762,10 +762,28 @@ async fn stop_recording( start_time: mic_start_time, device_id: s.mic_device_id.clone(), }), - system_audio: s.pipeline.system_audio.map(|audio| AudioMeta { - path: make_relative(&audio.path), - start_time: Some(to_start_time(audio.first_timestamp)), - device_id: None, + system_audio: s.pipeline.system_audio.map(|audio| { + let raw_sys_start = to_start_time(audio.first_timestamp); + let sys_start_time = if let Some(mic_start) = mic_start_time { + let sync_offset = raw_sys_start - mic_start; + if sync_offset.abs() > 0.030 { + mic_start + } else { + raw_sys_start + } + } else { + let sync_offset = raw_sys_start - display_start_time; + if sync_offset.abs() > 0.030 { + display_start_time + } else { + raw_sys_start + } + }; + AudioMeta { + path: make_relative(&audio.path), + start_time: Some(sys_start_time), + device_id: None, + } }), cursor: s .pipeline diff --git a/crates/rendering/src/frame_pipeline.rs b/crates/rendering/src/frame_pipeline.rs index 5b9e207ee2..f8dac07e69 100644 --- a/crates/rendering/src/frame_pipeline.rs +++ b/crates/rendering/src/frame_pipeline.rs @@ -17,6 +17,9 @@ pub struct RgbaToNv12Converter { pending: Option, cached_width: u32, cached_height: u32, + cached_bind_groups: Option<[wgpu::BindGroup; 2]>, + cached_texture_view: Option, + cached_texture_ptr: usize, } #[repr(C)] @@ -105,6 +108,9 @@ impl RgbaToNv12Converter { pending: None, cached_width: 0, cached_height: 0, + cached_bind_groups: None, + cached_texture_view: None, + cached_texture_ptr: 0, } } @@ -142,6 +148,9 @@ impl RgbaToNv12Converter { self.current_readback = 0; self.cached_width = width; self.cached_height = height; + self.cached_bind_groups = None; + self.cached_texture_view = None; + self.cached_texture_ptr = 0; } #[allow(clippy::too_many_arguments)] @@ -166,7 +175,8 @@ impl RgbaToNv12Converter { return false; }; - let readback_buffer = match self.readback_buffers[self.current_readback].as_ref() { + let readback_idx = self.current_readback; + let readback_buffer = match self.readback_buffers[readback_idx].as_ref() { Some(b) => b.clone(), None => return false, }; @@ -183,26 +193,43 @@ impl RgbaToNv12Converter { }; queue.write_buffer(&self.params_buffer, 0, bytemuck::cast_slice(&[params])); - let source_view = source_texture.create_view(&Default::default()); + let texture_ptr = source_texture as *const wgpu::Texture as usize; + let needs_rebind = + self.cached_texture_ptr != texture_ptr || self.cached_bind_groups.is_none(); + + if needs_rebind { + let source_view = source_texture.create_view(&Default::default()); + + let make_bind_group = |view: &wgpu::TextureView| { + device.create_bind_group(&wgpu::BindGroupDescriptor { + label: Some("RGBA to NV12 Bind Group"), + layout: &self.bind_group_layout, + entries: &[ + wgpu::BindGroupEntry { + binding: 0, + resource: wgpu::BindingResource::TextureView(view), + }, + wgpu::BindGroupEntry { + binding: 1, + resource: nv12_buffer.as_entire_binding(), + }, + wgpu::BindGroupEntry { + binding: 2, + resource: self.params_buffer.as_entire_binding(), + }, + ], + }) + }; - let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { - label: Some("RGBA to NV12 Bind Group"), - layout: &self.bind_group_layout, - entries: &[ - wgpu::BindGroupEntry { - binding: 0, - resource: wgpu::BindingResource::TextureView(&source_view), - }, - wgpu::BindGroupEntry { - binding: 1, - resource: nv12_buffer.as_entire_binding(), - }, - wgpu::BindGroupEntry { - binding: 2, - resource: self.params_buffer.as_entire_binding(), - }, - ], - }); + let bg0 = make_bind_group(&source_view); + let bg1 = make_bind_group(&source_view); + + self.cached_texture_view = Some(source_view); + self.cached_bind_groups = Some([bg0, bg1]); + self.cached_texture_ptr = texture_ptr; + } + + let bind_groups = self.cached_bind_groups.as_ref().unwrap(); { let mut pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { @@ -210,7 +237,7 @@ impl RgbaToNv12Converter { ..Default::default() }); pass.set_pipeline(&self.pipeline); - pass.set_bind_group(0, &bind_group, &[]); + pass.set_bind_group(0, &bind_groups[readback_idx], &[]); pass.dispatch_workgroups(width.div_ceil(4 * 8), height.div_ceil(2 * 8), 1); } @@ -300,7 +327,7 @@ impl PendingNv12Readback { let buffer_slice = self.buffer.slice(..); let data = buffer_slice.get_mapped_range(); - let nv12_data = data.to_vec(); + let nv12_data = Arc::new(data.to_vec()); drop(data); self.buffer.unmap(); @@ -327,7 +354,7 @@ pub enum GpuOutputFormat { } pub struct Nv12RenderedFrame { - pub data: Vec, + pub data: Arc>, pub width: u32, pub height: u32, pub y_stride: u32, @@ -336,6 +363,38 @@ pub struct Nv12RenderedFrame { pub format: GpuOutputFormat, } +impl Nv12RenderedFrame { + pub fn clone_metadata_with_data(&self) -> Self { + Self { + data: self.data.clone(), + width: self.width, + height: self.height, + y_stride: self.y_stride, + frame_number: self.frame_number, + target_time_ns: self.target_time_ns, + format: self.format, + } + } + + pub fn into_data(self) -> Vec { + Arc::unwrap_or_clone(self.data) + } + + pub fn y_plane(&self) -> &[u8] { + let y_size = (self.y_stride as usize) * (self.height as usize); + &self.data[..y_size.min(self.data.len())] + } + + pub fn uv_plane(&self) -> &[u8] { + let y_size = (self.y_stride as usize) * (self.height as usize); + if y_size < self.data.len() { + &self.data[y_size..] + } else { + &[] + } + } +} + pub struct PendingReadback { rx: oneshot::Receiver>, buffer: Arc, @@ -406,7 +465,7 @@ impl PendingReadback { (self.frame_number as u64 * 1_000_000_000) / self.frame_rate.max(1) as u64; Ok(RenderedFrame { - data: data_vec, + data: Arc::new(data_vec), padded_bytes_per_row: self.padded_bytes_per_row, width: self.width, height: self.height, @@ -723,7 +782,7 @@ impl RenderSession { #[derive(Clone)] pub struct RenderedFrame { - pub data: Vec, + pub data: Arc>, pub width: u32, pub height: u32, pub padded_bytes_per_row: u32, @@ -760,7 +819,7 @@ pub async fn finish_encoder( queue: &wgpu::Queue, uniforms: &ProjectUniforms, encoder: wgpu::CommandEncoder, -) -> Result { +) -> Result, RenderingError> { let previous_frame = if let Some(prev) = session.pipelined_readback.take_pending() { Some(prev.wait(device).await?) } else { @@ -779,16 +838,7 @@ pub async fn finish_encoder( .pipelined_readback .submit_readback(device, queue, texture, uniforms, encoder)?; - if let Some(prev_frame) = previous_frame { - return Ok(prev_frame); - } - - let pending = session - .pipelined_readback - .take_pending() - .expect("just submitted a readback"); - - pending.wait(device).await + Ok(previous_frame) } pub async fn finish_encoder_nv12( @@ -798,7 +848,7 @@ pub async fn finish_encoder_nv12( queue: &wgpu::Queue, uniforms: &ProjectUniforms, mut encoder: wgpu::CommandEncoder, -) -> Result { +) -> Result, RenderingError> { let width = uniforms.output_size.0; let height = uniforms.output_size.1; @@ -829,28 +879,21 @@ pub async fn finish_encoder_nv12( queue.submit(std::iter::once(encoder.finish())); nv12_converter.start_readback(); - if let Some(prev_frame) = previous_frame { - return Ok(prev_frame); - } - - let pending = nv12_converter - .take_pending() - .expect("just submitted a conversion"); - pending.wait(device).await + Ok(previous_frame) } else if let Some(prev_frame) = previous_frame { queue.submit(std::iter::once(encoder.finish())); - Ok(prev_frame) + Ok(Some(prev_frame)) } else { let rgba_frame = finish_encoder(session, device, queue, uniforms, encoder).await?; - Ok(Nv12RenderedFrame { - data: rgba_frame.data, - width: rgba_frame.width, - height: rgba_frame.height, - y_stride: rgba_frame.padded_bytes_per_row, - frame_number: rgba_frame.frame_number, - target_time_ns: rgba_frame.target_time_ns, + Ok(rgba_frame.map(|f| Nv12RenderedFrame { + data: f.data, + width: f.width, + height: f.height, + y_stride: f.padded_bytes_per_row, + frame_number: f.frame_number, + target_time_ns: f.target_time_ns, format: GpuOutputFormat::Rgba, - }) + })) } } diff --git a/crates/rendering/src/lib.rs b/crates/rendering/src/lib.rs index 0d693f35fd..00390ffde2 100644 --- a/crates/rendering/src/lib.rs +++ b/crates/rendering/src/lib.rs @@ -271,6 +271,8 @@ pub enum RenderingError { BufferMapFailed(#[from] wgpu::BufferAsyncError), #[error("Sending frame to channel failed")] ChannelSendFrameFailed(#[from] mpsc::error::SendError<(RenderedFrame, u32)>), + #[error("Sending NV12 frame to channel failed")] + ChannelSendNv12FrameFailed(#[from] mpsc::error::SendError<(Nv12RenderedFrame, u32)>), #[error("Failed to load image: {0}")] ImageLoadError(String), #[error("Error polling wgpu: {0}")] @@ -352,10 +354,13 @@ pub async fn render_video_to_channel( ); } + let needs_camera = !project.camera.hide; let mut last_successful_frame: Option = None; let mut consecutive_failures = 0u32; const MAX_CONSECUTIVE_FAILURES: u32 = 200; + let mut prefetched_decode: Option<(u32, f64, usize, Option)> = None; + loop { if frame_number >= total_frames { break; @@ -378,60 +383,40 @@ pub async fn render_video_to_channel( }; let render_segment = &render_segments[segment.recording_clip as usize]; - - let mut segment_frames = None; - let mut retry_count = 0; - const MAX_RETRIES: u32 = 5; let is_initial_frame = current_frame_number == 0 || last_successful_frame.is_none(); + let segment_clip_index = segment.recording_clip as usize; - while segment_frames.is_none() && retry_count < MAX_RETRIES { - if retry_count > 0 { - let delay = if is_initial_frame { - 500 * (retry_count as u64 + 1) + let segment_frames = + if let Some((pf_num, _pf_time, pf_clip, pf_result)) = prefetched_decode.take() { + if pf_num == current_frame_number && pf_clip == segment_clip_index { + pf_result } else { - 50 * retry_count as u64 - }; - tokio::time::sleep(std::time::Duration::from_millis(delay)).await; - } - - segment_frames = if is_initial_frame { - render_segment - .decoders - .get_frames_initial( - segment_time as f32, - !project.camera.hide, + decode_segment_frames_with_retry( + &render_segment.decoders, + segment_time, + needs_camera, clip_config.map(|v| v.offsets).unwrap_or_default(), + current_frame_number, + is_initial_frame, ) .await + } } else { - render_segment - .decoders - .get_frames( - segment_time as f32, - !project.camera.hide, - clip_config.map(|v| v.offsets).unwrap_or_default(), - ) - .await + decode_segment_frames_with_retry( + &render_segment.decoders, + segment_time, + needs_camera, + clip_config.map(|v| v.offsets).unwrap_or_default(), + current_frame_number, + is_initial_frame, + ) + .await }; - if segment_frames.is_none() { - retry_count += 1; - if retry_count < MAX_RETRIES { - tracing::warn!( - frame_number = current_frame_number, - segment_time = segment_time, - retry_count = retry_count, - is_initial = is_initial_frame, - "Frame decode failed, retrying..." - ); - } - } - } - - let frame = if let Some(segment_frames) = segment_frames { + if let Some(segment_frames) = segment_frames { consecutive_failures = 0; - let zoom_focus_interp = &zoom_focus_interpolators[segment.recording_clip as usize]; + let zoom_focus_interp = &zoom_focus_interpolators[segment_clip_index]; let uniforms = ProjectUniforms::new( constants, @@ -445,20 +430,70 @@ pub async fn render_video_to_channel( zoom_focus_interp, ); - match frame_renderer - .render( - segment_frames, - uniforms, - &render_segment.cursor, - &mut layers, - ) - .await - { - Ok(frame) if frame.width > 0 && frame.height > 0 => { + let next_frame_number = frame_number; + let mut next_prefetch_meta: Option<(f64, usize)> = None; + let prefetch_future = if next_frame_number < total_frames { + if let Some((next_seg_time, next_segment)) = + project.get_segment_time(next_frame_number as f64 / fps as f64) + { + let next_clip_index = next_segment.recording_clip as usize; + next_prefetch_meta = Some((next_seg_time, next_clip_index)); + let next_render_segment = &render_segments[next_clip_index]; + let next_clip_config = project + .clips + .iter() + .find(|v| v.index == next_segment.recording_clip); + let next_is_initial = last_successful_frame.is_none(); + + Some(decode_segment_frames_with_retry( + &next_render_segment.decoders, + next_seg_time, + needs_camera, + next_clip_config.map(|v| v.offsets).unwrap_or_default(), + next_frame_number, + next_is_initial, + )) + } else { + None + } + } else { + None + }; + + let render_result = if let Some(prefetch) = prefetch_future { + let (render, decoded) = tokio::join!( + frame_renderer.render( + segment_frames, + uniforms, + &render_segment.cursor, + &mut layers, + ), + prefetch + ); + + if let Some((next_seg_time, next_clip_index)) = next_prefetch_meta { + prefetched_decode = + Some((next_frame_number, next_seg_time, next_clip_index, decoded)); + } + + render + } else { + frame_renderer + .render( + segment_frames, + uniforms, + &render_segment.cursor, + &mut layers, + ) + .await + }; + + match render_result { + Ok(Some(frame)) if frame.width > 0 && frame.height > 0 => { last_successful_frame = Some(frame.clone()); - frame + sender.send((frame, current_frame_number)).await?; } - Ok(_) => { + Ok(Some(_)) => { tracing::warn!( frame_number = current_frame_number, "Rendered frame has zero dimensions" @@ -468,11 +503,10 @@ pub async fn render_video_to_channel( fallback.frame_number = current_frame_number; fallback.target_time_ns = (current_frame_number as u64 * 1_000_000_000) / fps as u64; - fallback - } else { - continue; + sender.send((fallback, current_frame_number)).await?; } } + Ok(None) => {} Err(e) => { tracing::error!( frame_number = current_frame_number, @@ -484,7 +518,7 @@ pub async fn render_video_to_channel( fallback.frame_number = current_frame_number; fallback.target_time_ns = (current_frame_number as u64 * 1_000_000_000) / fps as u64; - fallback + sender.send((fallback, current_frame_number)).await?; } else { return Err(e); } @@ -510,26 +544,24 @@ pub async fn render_video_to_channel( frame_number = current_frame_number, segment_time = segment_time, consecutive_failures = consecutive_failures, - max_retries = MAX_RETRIES, + max_retries = DECODE_MAX_RETRIES, "Frame decode failed after retries - using previous frame" ); let mut fallback = last_frame.clone(); fallback.frame_number = current_frame_number; fallback.target_time_ns = (current_frame_number as u64 * 1_000_000_000) / fps as u64; - fallback + sender.send((fallback, current_frame_number)).await?; } else { tracing::error!( frame_number = current_frame_number, segment_time = segment_time, - max_retries = MAX_RETRIES, + max_retries = DECODE_MAX_RETRIES, "First frame decode failed after retries - cannot continue" ); continue; } - }; - - sender.send((frame, current_frame_number)).await?; + } } if let Some(Ok(final_frame)) = frame_renderer.flush_pipeline().await @@ -551,6 +583,349 @@ pub async fn render_video_to_channel( Ok(()) } +#[allow(clippy::too_many_arguments)] +pub async fn render_video_to_channel_nv12( + constants: &RenderVideoConstants, + project: &ProjectConfiguration, + sender: mpsc::Sender<(Nv12RenderedFrame, u32)>, + recording_meta: &RecordingMeta, + meta: &StudioRecordingMeta, + render_segments: Vec, + fps: u32, + resolution_base: XY, + recordings: &ProjectRecordingsMeta, +) -> Result<(), RenderingError> { + ffmpeg::init().unwrap(); + + let start_time = Instant::now(); + + let duration = get_duration(recordings, recording_meta, meta, project); + + let total_frames = (fps as f64 * duration).ceil() as u32; + + let cursor_smoothing = + (!project.cursor.raw).then_some(spring_mass_damper::SpringMassDamperSimulationConfig { + tension: project.cursor.tension, + mass: project.cursor.mass, + friction: project.cursor.friction, + }); + + let zoom_focus_interpolators: Vec = render_segments + .iter() + .map(|segment| { + let mut interp = ZoomFocusInterpolator::new( + &segment.cursor, + cursor_smoothing, + project.screen_movement_spring, + duration, + ); + interp.precompute(); + interp + }) + .collect(); + + let mut frame_number = 0; + + let mut frame_renderer = FrameRenderer::new(constants); + + let mut layers = RendererLayers::new_with_options( + &constants.device, + &constants.queue, + constants.is_software_adapter, + ); + + if let Some(first_segment) = render_segments.first() { + let (screen_w, screen_h) = first_segment.decoders.screen_video_dimensions(); + let camera_dims = first_segment.decoders.camera_video_dimensions(); + layers.prepare_for_video_dimensions( + &constants.device, + screen_w, + screen_h, + camera_dims.map(|(w, _)| w), + camera_dims.map(|(_, h)| h), + ); + } + + let needs_camera = !project.camera.hide; + + let mut last_successful_frame: Option = None; + let mut consecutive_failures = 0u32; + const MAX_CONSECUTIVE_FAILURES: u32 = 200; + + let mut prefetched_decode: Option<(u32, f64, usize, Option)> = None; + + loop { + if frame_number >= total_frames { + break; + } + + let Some((segment_time, segment)) = + project.get_segment_time(frame_number as f64 / fps as f64) + else { + break; + }; + + let clip_config = project + .clips + .iter() + .find(|v| v.index == segment.recording_clip); + + let current_frame_number = { + let prev = frame_number; + std::mem::replace(&mut frame_number, prev + 1) + }; + + let render_segment = &render_segments[segment.recording_clip as usize]; + let is_initial_frame = current_frame_number == 0 || last_successful_frame.is_none(); + let segment_clip_index = segment.recording_clip as usize; + + let segment_frames = + if let Some((pf_num, _pf_time, pf_clip, pf_result)) = prefetched_decode.take() { + if pf_num == current_frame_number && pf_clip == segment_clip_index { + pf_result + } else { + decode_segment_frames_with_retry( + &render_segment.decoders, + segment_time, + needs_camera, + clip_config.map(|v| v.offsets).unwrap_or_default(), + current_frame_number, + is_initial_frame, + ) + .await + } + } else { + decode_segment_frames_with_retry( + &render_segment.decoders, + segment_time, + needs_camera, + clip_config.map(|v| v.offsets).unwrap_or_default(), + current_frame_number, + is_initial_frame, + ) + .await + }; + + if let Some(segment_frames) = segment_frames { + consecutive_failures = 0; + + let zoom_focus_interp = &zoom_focus_interpolators[segment_clip_index]; + + let uniforms = ProjectUniforms::new( + constants, + project, + current_frame_number, + fps, + resolution_base, + &render_segment.cursor, + &segment_frames, + duration, + zoom_focus_interp, + ); + + let next_frame_number = frame_number; + let mut next_prefetch_meta: Option<(f64, usize)> = None; + let prefetch_future = if next_frame_number < total_frames { + if let Some((next_seg_time, next_segment)) = + project.get_segment_time(next_frame_number as f64 / fps as f64) + { + let next_clip_index = next_segment.recording_clip as usize; + next_prefetch_meta = Some((next_seg_time, next_clip_index)); + let next_render_segment = &render_segments[next_clip_index]; + let next_clip_config = project + .clips + .iter() + .find(|v| v.index == next_segment.recording_clip); + let next_is_initial = last_successful_frame.is_none(); + + Some(decode_segment_frames_with_retry( + &next_render_segment.decoders, + next_seg_time, + needs_camera, + next_clip_config.map(|v| v.offsets).unwrap_or_default(), + next_frame_number, + next_is_initial, + )) + } else { + None + } + } else { + None + }; + + let render_result = if let Some(prefetch) = prefetch_future { + let (render, decoded) = tokio::join!( + frame_renderer.render_nv12( + segment_frames, + uniforms, + &render_segment.cursor, + &mut layers, + ), + prefetch + ); + + if let Some((next_seg_time, next_clip_index)) = next_prefetch_meta { + prefetched_decode = + Some((next_frame_number, next_seg_time, next_clip_index, decoded)); + } + + render + } else { + frame_renderer + .render_nv12( + segment_frames, + uniforms, + &render_segment.cursor, + &mut layers, + ) + .await + }; + + match render_result { + Ok(Some(frame)) if frame.width > 0 && frame.height > 0 => { + last_successful_frame = Some(frame.clone_metadata_with_data()); + sender.send((frame, current_frame_number)).await?; + } + Ok(Some(_)) => { + tracing::warn!( + frame_number = current_frame_number, + "Rendered NV12 frame has zero dimensions" + ); + if let Some(ref last_frame) = last_successful_frame { + let mut fallback = last_frame.clone_metadata_with_data(); + fallback.frame_number = current_frame_number; + fallback.target_time_ns = + (current_frame_number as u64 * 1_000_000_000) / fps as u64; + sender.send((fallback, current_frame_number)).await?; + } + } + Ok(None) => {} + Err(e) => { + tracing::error!( + frame_number = current_frame_number, + error = %e, + "NV12 frame rendering failed" + ); + if let Some(ref last_frame) = last_successful_frame { + let mut fallback = last_frame.clone_metadata_with_data(); + fallback.frame_number = current_frame_number; + fallback.target_time_ns = + (current_frame_number as u64 * 1_000_000_000) / fps as u64; + sender.send((fallback, current_frame_number)).await?; + } else { + return Err(e); + } + } + } + } else { + consecutive_failures += 1; + + if consecutive_failures >= MAX_CONSECUTIVE_FAILURES { + tracing::error!( + frame_number = current_frame_number, + consecutive_failures = consecutive_failures, + "Too many consecutive frame failures - aborting export" + ); + return Err(RenderingError::FrameDecodeFailed { + frame_number: current_frame_number, + consecutive_failures, + }); + } + + if let Some(ref last_frame) = last_successful_frame { + tracing::warn!( + frame_number = current_frame_number, + segment_time = segment_time, + consecutive_failures = consecutive_failures, + max_retries = DECODE_MAX_RETRIES, + "Frame decode failed after retries - using previous NV12 frame" + ); + let mut fallback = last_frame.clone_metadata_with_data(); + fallback.frame_number = current_frame_number; + fallback.target_time_ns = + (current_frame_number as u64 * 1_000_000_000) / fps as u64; + sender.send((fallback, current_frame_number)).await?; + } else { + tracing::error!( + frame_number = current_frame_number, + segment_time = segment_time, + max_retries = DECODE_MAX_RETRIES, + "First frame decode failed after retries - cannot continue" + ); + continue; + } + } + } + + if let Some(Ok(final_frame)) = frame_renderer.flush_pipeline_nv12().await + && final_frame.width > 0 + && final_frame.height > 0 + { + sender + .send((final_frame, frame_number.saturating_sub(1))) + .await?; + } + + let total_time = start_time.elapsed(); + tracing::info!( + frames = frame_number, + elapsed_secs = format!("{:.2}", total_time.as_secs_f32()), + "NV12 render complete" + ); + + Ok(()) +} + +const DECODE_MAX_RETRIES: u32 = 5; + +async fn decode_segment_frames_with_retry( + decoders: &RecordingSegmentDecoders, + segment_time: f64, + needs_camera: bool, + offsets: cap_project::ClipOffsets, + current_frame_number: u32, + is_initial_frame: bool, +) -> Option { + let mut result = None; + let mut retry_count = 0u32; + + while result.is_none() && retry_count < DECODE_MAX_RETRIES { + if retry_count > 0 { + let delay = if is_initial_frame { + 500 * (retry_count as u64 + 1) + } else { + 50 * retry_count as u64 + }; + tokio::time::sleep(std::time::Duration::from_millis(delay)).await; + } + + result = if is_initial_frame { + decoders + .get_frames_initial(segment_time as f32, needs_camera, offsets) + .await + } else { + decoders + .get_frames(segment_time as f32, needs_camera, offsets) + .await + }; + + if result.is_none() { + retry_count += 1; + if retry_count < DECODE_MAX_RETRIES { + tracing::warn!( + frame_number = current_frame_number, + segment_time = segment_time, + retry_count = retry_count, + is_initial = is_initial_frame, + "Frame decode failed, retrying..." + ); + } + } + } + + result +} + pub fn get_duration( recordings: &ProjectRecordingsMeta, recording_meta: &RecordingMeta, @@ -973,7 +1348,7 @@ impl MotionBlurDescriptor { } impl ProjectUniforms { - fn get_crop(options: &RenderOptions, project: &ProjectConfiguration) -> Crop { + pub fn get_crop(options: &RenderOptions, project: &ProjectConfiguration) -> Crop { project.background.crop.as_ref().cloned().unwrap_or(Crop { position: XY { x: 0, y: 0 }, size: XY { @@ -1824,7 +2199,7 @@ impl<'a> FrameRenderer<'a> { uniforms: ProjectUniforms, cursor: &CursorEvents, layers: &mut RendererLayers, - ) -> Result { + ) -> Result, RenderingError> { let mut last_error = None; for attempt in 0..Self::MAX_RENDER_RETRIES { @@ -1863,7 +2238,7 @@ impl<'a> FrameRenderer<'a> { ) .await { - Ok(frame) => return Ok(frame), + Ok(opt_frame) => return Ok(opt_frame), Err(RenderingError::BufferMapWaitingFailed) => { tracing::warn!( frame_number = uniforms.frame_number, @@ -1888,6 +2263,24 @@ impl<'a> FrameRenderer<'a> { Err(last_error.unwrap_or(RenderingError::BufferMapWaitingFailed)) } + pub async fn render_immediate( + &mut self, + segment_frames: DecodedSegmentFrames, + uniforms: ProjectUniforms, + cursor: &CursorEvents, + layers: &mut RendererLayers, + ) -> Result { + if let Some(frame) = self + .render(segment_frames, uniforms, cursor, layers) + .await? + { + return Ok(frame); + } + self.flush_pipeline() + .await + .unwrap_or(Err(RenderingError::BufferMapWaitingFailed)) + } + pub async fn flush_pipeline(&mut self) -> Option> { if let Some(session) = &mut self.session { flush_pending_readback(session, &self.constants.device).await @@ -1896,13 +2289,21 @@ impl<'a> FrameRenderer<'a> { } } + pub async fn flush_pipeline_nv12( + &mut self, + ) -> Option> { + let nv12_converter = self.nv12_converter.as_mut()?; + let pending = nv12_converter.take_pending()?; + Some(pending.wait(&self.constants.device).await) + } + pub async fn render_nv12( &mut self, segment_frames: DecodedSegmentFrames, uniforms: ProjectUniforms, cursor: &CursorEvents, layers: &mut RendererLayers, - ) -> Result { + ) -> Result, RenderingError> { let mut last_error = None; for attempt in 0..Self::MAX_RENDER_RETRIES { @@ -1974,7 +2375,7 @@ impl<'a> FrameRenderer<'a> { ) .await { - Ok(frame) => return Ok(frame), + Ok(opt_frame) => return Ok(opt_frame), Err(RenderingError::BufferMapWaitingFailed) => { last_error = Some(RenderingError::BufferMapWaitingFailed); } @@ -2304,7 +2705,7 @@ async fn produce_frame( cursor: &CursorEvents, layers: &mut RendererLayers, session: &mut RenderSession, -) -> Result { +) -> Result, RenderingError> { let mut encoder = constants.device.create_command_encoder( &(wgpu::CommandEncoderDescriptor { label: Some("Render Encoder"), diff --git a/crates/rendering/src/main.rs b/crates/rendering/src/main.rs index 3fa4d028f6..efbbc39ddf 100644 --- a/crates/rendering/src/main.rs +++ b/crates/rendering/src/main.rs @@ -250,6 +250,6 @@ fn save_as_jpeg(frame: &RenderedFrame, output_path: &PathBuf) -> Result<()> { fn save_as_raw(frame: &RenderedFrame, output_path: &PathBuf) -> Result<()> { // Save raw RGBA data - std::fs::write(output_path, &frame.data).context("Failed to save raw frame data")?; + std::fs::write(output_path, &*frame.data).context("Failed to save raw frame data")?; Ok(()) } diff --git a/crates/video-decode/src/avassetreader.rs b/crates/video-decode/src/avassetreader.rs index 1a021151ed..a8db3b606f 100644 --- a/crates/video-decode/src/avassetreader.rs +++ b/crates/video-decode/src/avassetreader.rs @@ -237,13 +237,13 @@ impl AVAssetReaderDecoder { keyframe_index: Option>, ) -> Result { let (pixel_format, width, height) = { - let input = ffmpeg::format::input(&path).unwrap(); + let input = ffmpeg::format::input(&path) + .map_err(|e| format!("Failed to open video input '{}': {e}", path.display()))?; let input_stream = input .streams() .best(ffmpeg::media::Type::Video) - .ok_or("Could not find a video stream") - .unwrap(); + .ok_or_else(|| format!("No video stream in '{}'", path.display()))?; let decoder = avcodec::Context::from_parameters(input_stream.parameters()) .map_err(|e| format!("decoder context / {e}"))? @@ -338,7 +338,11 @@ impl AVAssetReaderDecoder { height: u32, ) -> Result<(R, R), String> { let asset = av::UrlAsset::with_url( - &ns::Url::with_fs_path_str(path.to_str().unwrap(), false), + &ns::Url::with_fs_path_str( + path.to_str() + .ok_or_else(|| format!("Invalid UTF-8 in path: {path:?}"))?, + false, + ), None, ) .ok_or_else(|| format!("UrlAsset::with_url{{{path:?}}}"))?; diff --git a/crates/video-decode/src/ffmpeg.rs b/crates/video-decode/src/ffmpeg.rs index 53d5f9d483..8311f57dff 100644 --- a/crates/video-decode/src/ffmpeg.rs +++ b/crates/video-decode/src/ffmpeg.rs @@ -144,30 +144,34 @@ pub fn get_hw_decoder_capabilities() -> &'static HwDecoderCapabilities { fn configure_software_threading(decoder: &mut avcodec::decoder::Video, width: u32, height: u32) { let pixel_count = (width as u64) * (height as u64); + let cpu_count = num_cpus::get(); let thread_count = if pixel_count > 8294400 { 0 } else if pixel_count > 2073600 { - (num_cpus::get() / 2).max(2) as i32 + cpu_count.clamp(2, 8) as i32 } else { - 2 + cpu_count.clamp(2, 6) as i32 }; + let thread_type = ffmpeg::sys::FF_THREAD_FRAME | ffmpeg::sys::FF_THREAD_SLICE; + unsafe { let codec_ctx = decoder.as_mut_ptr(); if !codec_ctx.is_null() { (*codec_ctx).thread_count = thread_count; - (*codec_ctx).thread_type = ffmpeg::sys::FF_THREAD_FRAME; + (*codec_ctx).thread_type = thread_type; } } info!( - "Software decode configured: {width}x{height}, thread_count={}, thread_type=frame", + "Software decode configured: {width}x{height}, thread_count={}, thread_type=frame+slice, cpus={}", if thread_count == 0 { "auto".to_string() } else { thread_count.to_string() - } + }, + cpu_count ); }