diff --git a/Cargo.lock b/Cargo.lock index c4f8665e90..2107d7eae8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1315,6 +1315,7 @@ dependencies = [ "tokio", "tokio-util", "tracing", + "tracing-subscriber", "workspace-hack", ] diff --git a/apps/desktop/src-tauri/src/camera_legacy.rs b/apps/desktop/src-tauri/src/camera_legacy.rs index ac980a7d1e..012582207e 100644 --- a/apps/desktop/src-tauri/src/camera_legacy.rs +++ b/apps/desktop/src-tauri/src/camera_legacy.rs @@ -66,6 +66,7 @@ pub async fn create_camera_preview_ws() -> (Sender, u16, Cance stride: frame.stride(0) as u32, frame_number: 0, target_time_ns: 0, + format: crate::frame_ws::WSFrameFormat::Rgba, created_at: Instant::now(), }) .ok(); diff --git a/apps/desktop/src-tauri/src/editor_window.rs b/apps/desktop/src-tauri/src/editor_window.rs index 87c68a916e..33a2d56f8c 100644 --- a/apps/desktop/src-tauri/src/editor_window.rs +++ b/apps/desktop/src-tauri/src/editor_window.rs @@ -5,7 +5,7 @@ use tokio_util::sync::CancellationToken; use crate::{ create_editor_instance_impl, - frame_ws::{WSFrame, create_watch_frame_ws}, + frame_ws::{WSFrame, WSFrameFormat, create_watch_frame_ws}, }; pub struct EditorInstance { @@ -27,16 +27,30 @@ async fn do_prewarm(app: AppHandle, path: PathBuf) -> PendingResult { let inner = create_editor_instance_impl( &app, path, - Box::new(move |frame| { - let _ = frame_tx.send(Some(WSFrame { - data: frame.data, - width: frame.width, - height: frame.height, - stride: frame.padded_bytes_per_row, - frame_number: frame.frame_number, - target_time_ns: frame.target_time_ns, - created_at: Instant::now(), - })); + Box::new(move |output| { + let ws_frame = match output { + cap_editor::EditorFrameOutput::Nv12(frame) => WSFrame { + data: frame.data, + width: frame.width, + height: frame.height, + stride: frame.y_stride, + frame_number: frame.frame_number, + target_time_ns: frame.target_time_ns, + format: WSFrameFormat::Nv12, + created_at: Instant::now(), + }, + cap_editor::EditorFrameOutput::Rgba(frame) => WSFrame { + data: frame.data, + width: frame.width, + height: frame.height, + stride: frame.padded_bytes_per_row, + frame_number: frame.frame_number, + target_time_ns: frame.target_time_ns, + format: WSFrameFormat::Rgba, + created_at: Instant::now(), + }, + }; + let _ = frame_tx.send(Some(std::sync::Arc::new(ws_frame))); }), ) .await?; @@ -218,16 +232,30 @@ impl EditorInstances { let inner = create_editor_instance_impl( window.app_handle(), path, - Box::new(move |frame| { - let _ = frame_tx.send(Some(WSFrame { - data: frame.data, - width: frame.width, - height: frame.height, - stride: frame.padded_bytes_per_row, - frame_number: frame.frame_number, - target_time_ns: frame.target_time_ns, - created_at: Instant::now(), - })); + Box::new(move |output| { + let ws_frame = match output { + cap_editor::EditorFrameOutput::Nv12(frame) => WSFrame { + data: frame.data, + width: frame.width, + height: frame.height, + stride: frame.y_stride, + frame_number: frame.frame_number, + target_time_ns: frame.target_time_ns, + format: WSFrameFormat::Nv12, + created_at: Instant::now(), + }, + cap_editor::EditorFrameOutput::Rgba(frame) => WSFrame { + data: frame.data, + width: frame.width, + height: frame.height, + stride: frame.padded_bytes_per_row, + frame_number: frame.frame_number, + target_time_ns: frame.target_time_ns, + format: WSFrameFormat::Rgba, + created_at: Instant::now(), + }, + }; + let _ = frame_tx.send(Some(std::sync::Arc::new(ws_frame))); }), ) .await?; diff --git a/apps/desktop/src-tauri/src/frame_ws.rs b/apps/desktop/src-tauri/src/frame_ws.rs index 66c9afe20c..3d959e4a92 100644 --- a/apps/desktop/src-tauri/src/frame_ws.rs +++ b/apps/desktop/src-tauri/src/frame_ws.rs @@ -9,82 +9,25 @@ static LAST_LOG_TIME: AtomicU64 = AtomicU64::new(0); const NV12_FORMAT_MAGIC: u32 = 0x4e563132; -fn convert_to_nv12(data: &[u8], width: u32, height: u32, stride: u32) -> Vec { - let width = width & !1; - let height = height & !1; - - if width == 0 || height == 0 { - return Vec::new(); - } - - let y_stride = width; - let uv_stride = width; - let y_size = (y_stride * height) as usize; - let uv_size = (uv_stride * (height / 2)) as usize; - let total_size = y_size + uv_size; - - let stride_bytes = stride as usize; - - let mut output = vec![0u8; total_size]; - let (y_plane, uv_plane) = output.split_at_mut(y_size); - - for y in 0..height as usize { - let src_row = y * stride_bytes; - - if src_row >= data.len() { - continue; - } - - let y_row_start = y * y_stride as usize; - let is_uv_row = y % 2 == 0; - let uv_row_start = if is_uv_row { - (y / 2) * uv_stride as usize - } else { - 0 - }; - - for x in 0..width as usize { - let px = src_row + x * 4; - - if px + 2 < data.len() { - let r = data[px] as i32; - let g = data[px + 1] as i32; - let b = data[px + 2] as i32; - - let y_val = ((66 * r + 129 * g + 25 * b + 128) >> 8) + 16; - y_plane[y_row_start + x] = y_val.clamp(0, 255) as u8; - - if is_uv_row && x % 2 == 0 && x + 1 < width as usize { - let px1 = src_row + (x + 1) * 4; - - let (r1, g1, b1) = if px1 + 2 < data.len() { - (data[px1] as i32, data[px1 + 1] as i32, data[px1 + 2] as i32) - } else { - (r, g, b) - }; - - let avg_r = (r + r1) / 2; - let avg_g = (g + g1) / 2; - let avg_b = (b + b1) / 2; - - let u = ((-38 * avg_r - 74 * avg_g + 112 * avg_b + 128) >> 8) + 128; - let v = ((112 * avg_r - 94 * avg_g - 18 * avg_b + 128) >> 8) + 128; - - let uv_idx = uv_row_start + x; - if uv_idx + 1 < uv_plane.len() { - uv_plane[uv_idx] = u.clamp(0, 255) as u8; - uv_plane[uv_idx + 1] = v.clamp(0, 255) as u8; - } - } - } - } - } - - output +fn pack_frame_data( + mut data: Vec, + stride: u32, + height: u32, + width: u32, + frame_number: u32, + target_time_ns: u64, +) -> Vec { + data.reserve_exact(24); + data.extend_from_slice(&stride.to_le_bytes()); + data.extend_from_slice(&height.to_le_bytes()); + data.extend_from_slice(&width.to_le_bytes()); + data.extend_from_slice(&frame_number.to_le_bytes()); + data.extend_from_slice(&target_time_ns.to_le_bytes()); + data } -fn pack_nv12_frame( - data: Vec, +fn pack_nv12_frame_ref( + data: &[u8], width: u32, height: u32, frame_number: u32, @@ -93,32 +36,39 @@ fn pack_nv12_frame( let y_stride = width; let metadata_size = 28; let mut output = Vec::with_capacity(data.len() + metadata_size); - output.extend_from_slice(&data); + output.extend_from_slice(data); output.extend_from_slice(&y_stride.to_le_bytes()); output.extend_from_slice(&height.to_le_bytes()); output.extend_from_slice(&width.to_le_bytes()); output.extend_from_slice(&frame_number.to_le_bytes()); output.extend_from_slice(&target_time_ns.to_le_bytes()); output.extend_from_slice(&NV12_FORMAT_MAGIC.to_le_bytes()); - output } -fn pack_frame_data( - mut data: Vec, +fn pack_frame_data_ref( + data: &[u8], stride: u32, height: u32, width: u32, frame_number: u32, target_time_ns: u64, ) -> Vec { - data.reserve_exact(24); - data.extend_from_slice(&stride.to_le_bytes()); - data.extend_from_slice(&height.to_le_bytes()); - data.extend_from_slice(&width.to_le_bytes()); - data.extend_from_slice(&frame_number.to_le_bytes()); - data.extend_from_slice(&target_time_ns.to_le_bytes()); - data + let metadata_size = 24; + let mut output = Vec::with_capacity(data.len() + metadata_size); + output.extend_from_slice(data); + output.extend_from_slice(&stride.to_le_bytes()); + output.extend_from_slice(&height.to_le_bytes()); + output.extend_from_slice(&width.to_le_bytes()); + output.extend_from_slice(&frame_number.to_le_bytes()); + output.extend_from_slice(&target_time_ns.to_le_bytes()); + output +} + +#[derive(Clone, Copy, PartialEq, Eq)] +pub enum WSFrameFormat { + Rgba, + Nv12, } #[derive(Clone)] @@ -129,12 +79,33 @@ pub struct WSFrame { pub stride: u32, pub frame_number: u32, pub target_time_ns: u64, + pub format: WSFrameFormat, #[allow(dead_code)] pub created_at: Instant, } +fn pack_ws_frame_ref(frame: &WSFrame) -> Vec { + match frame.format { + WSFrameFormat::Nv12 => pack_nv12_frame_ref( + &frame.data, + frame.width, + frame.height, + frame.frame_number, + frame.target_time_ns, + ), + WSFrameFormat::Rgba => pack_frame_data_ref( + &frame.data, + frame.stride, + frame.height, + frame.width, + frame.frame_number, + frame.target_time_ns, + ), + } +} + pub async fn create_watch_frame_ws( - frame_rx: watch::Receiver>, + frame_rx: watch::Receiver>>, ) -> (u16, CancellationToken) { use axum::{ extract::{ @@ -145,7 +116,7 @@ pub async fn create_watch_frame_ws( routing::get, }; - type RouterState = watch::Receiver>; + type RouterState = watch::Receiver>>; #[axum::debug_handler] async fn ws_handler( @@ -155,22 +126,19 @@ pub async fn create_watch_frame_ws( ws.on_upgrade(move |socket| handle_socket(socket, state)) } - async fn handle_socket(mut socket: WebSocket, mut camera_rx: watch::Receiver>) { + async fn handle_socket( + mut socket: WebSocket, + mut camera_rx: watch::Receiver>>, + ) { tracing::info!("Socket connection established"); let now = std::time::Instant::now(); { - let frame_opt = camera_rx.borrow().clone(); - if let Some(frame) = frame_opt { - let packed = pack_frame_data( - frame.data, - frame.stride, - frame.height, - frame.width, - frame.frame_number, - frame.target_time_ns, - ); - + let packed = { + let borrowed = camera_rx.borrow(); + borrowed.as_deref().map(pack_ws_frame_ref) + }; + if let Some(packed) = packed { if let Err(e) = socket.send(Message::Binary(packed)).await { tracing::error!("Failed to send initial frame to socket: {:?}", e); return; @@ -194,20 +162,16 @@ pub async fn create_watch_frame_ws( } }, _ = camera_rx.changed() => { - let frame_opt = camera_rx.borrow_and_update().clone(); - if let Some(frame) = frame_opt { + let frame_arc = camera_rx.borrow_and_update().clone(); + if let Some(ref frame) = frame_arc { let width = frame.width; let height = frame.height; + let format_label = match frame.format { + WSFrameFormat::Nv12 => "NV12", + WSFrameFormat::Rgba => "RGBA", + }; - let packed = pack_frame_data( - frame.data, - frame.stride, - frame.height, - frame.width, - frame.frame_number, - frame.target_time_ns, - ); - + let packed = pack_ws_frame_ref(frame); let packed_len = packed.len(); match socket.send(Message::Binary(packed)).await { @@ -226,7 +190,7 @@ pub async fn create_watch_frame_ws( mb_per_sec = format!("{:.1}", mb_per_sec), avg_kb = format!("{:.1}", (total_bytes as f64 / total_frames.max(1) as f64) / 1024.0), dims = format!("{}x{}", width, height), - format = "RGBA", + format = format_label, "WS frame stats" ); } @@ -260,7 +224,7 @@ pub async fn create_watch_frame_ws( tokio::select! { _ = server => {}, _ = cancel_token.cancelled() => { - println!("WebSocket server shutting down"); + tracing::info!("WebSocket server shutting down"); } } }); @@ -290,7 +254,6 @@ pub async fn create_frame_ws(frame_tx: broadcast::Sender) -> (u16, Canc } async fn handle_socket(mut socket: WebSocket, mut camera_rx: broadcast::Receiver) { - println!("socket connection established"); tracing::info!("Socket connection established"); let now = std::time::Instant::now(); @@ -343,7 +306,6 @@ pub async fn create_frame_ws(frame_tx: broadcast::Sender) -> (u16, Canc } let elapsed = now.elapsed(); - println!("Websocket closing after {elapsed:.2?}"); tracing::info!("Websocket closing after {elapsed:.2?}"); } @@ -362,7 +324,7 @@ pub async fn create_frame_ws(frame_tx: broadcast::Sender) -> (u16, Canc tokio::select! { _ = server => {}, _ = cancel_token.cancelled() => { - println!("WebSocket server shutting down"); + tracing::info!("WebSocket server shutting down"); } } }); diff --git a/apps/desktop/src-tauri/src/lib.rs b/apps/desktop/src-tauri/src/lib.rs index 00dc6903b1..23cdbed7f7 100644 --- a/apps/desktop/src-tauri/src/lib.rs +++ b/apps/desktop/src-tauri/src/lib.rs @@ -54,7 +54,7 @@ use cap_recording::{ }, sources::screen_capture::ScreenCaptureTarget, }; -use cap_rendering::{ProjectRecordingsMeta, RenderedFrame}; +use cap_rendering::ProjectRecordingsMeta; use clipboard_rs::common::RustImage; use clipboard_rs::{Clipboard, ClipboardContext}; use cpal::StreamError; @@ -3853,7 +3853,7 @@ async fn resume_uploads(app: AppHandle) -> Result<(), String> { async fn create_editor_instance_impl( app: &AppHandle, path: PathBuf, - frame_cb: Box, + frame_cb: Box, ) -> Result, String> { let app = app.clone(); diff --git a/apps/desktop/src-tauri/src/screenshot_editor.rs b/apps/desktop/src-tauri/src/screenshot_editor.rs index 042dda22f4..c95624c59b 100644 --- a/apps/desktop/src-tauri/src/screenshot_editor.rs +++ b/apps/desktop/src-tauri/src/screenshot_editor.rs @@ -381,15 +381,16 @@ impl ScreenshotEditorInstances { match rendered_frame { Ok(frame) => { - let _ = frame_tx.send(Some(WSFrame { + let _ = frame_tx.send(Some(std::sync::Arc::new(WSFrame { data: frame.data, width: frame.width, height: frame.height, stride: frame.padded_bytes_per_row, frame_number: frame.frame_number, target_time_ns: frame.target_time_ns, + format: crate::frame_ws::WSFrameFormat::Rgba, created_at: Instant::now(), - })); + }))); } Err(e) => { tracing::error!("Failed to render screenshot frame: {e}"); diff --git a/crates/editor/Cargo.toml b/crates/editor/Cargo.toml index c612d1e33f..1d52c776f8 100644 --- a/crates/editor/Cargo.toml +++ b/crates/editor/Cargo.toml @@ -10,6 +10,10 @@ workspace = true name = "decode-benchmark" path = "examples/decode-benchmark.rs" +[[example]] +name = "playback-pipeline-benchmark" +path = "examples/playback-pipeline-benchmark.rs" + [dependencies] cap-media = { path = "../media" } cap-project = { path = "../project" } @@ -30,4 +34,5 @@ flume.workspace = true tokio-util = "0.7.15" ringbuf = "0.4.8" lru = "0.12" +tracing-subscriber = { version = "0.3", features = ["env-filter"] } workspace-hack = { version = "0.1", path = "../workspace-hack" } diff --git a/crates/editor/examples/playback-pipeline-benchmark.rs b/crates/editor/examples/playback-pipeline-benchmark.rs new file mode 100644 index 0000000000..8e04f349d8 --- /dev/null +++ b/crates/editor/examples/playback-pipeline-benchmark.rs @@ -0,0 +1,673 @@ +use cap_project::{ + ProjectConfiguration, RecordingMeta, RecordingMetaInner, StudioRecordingMeta, + TimelineConfiguration, TimelineSegment, XY, +}; +use cap_rendering::{ + FrameRenderer, ProjectRecordingsMeta, ProjectUniforms, RenderVideoConstants, RendererLayers, + ZoomFocusInterpolator, decoder::spawn_decoder, + spring_mass_damper::SpringMassDamperSimulationConfig, +}; +use std::{ + path::{Path, PathBuf}, + sync::Arc, + time::Instant, +}; + +fn percentile(data: &[f64], p: f64) -> f64 { + if data.is_empty() { + return 0.0; + } + let mut sorted: Vec = data.iter().copied().filter(|x| !x.is_nan()).collect(); + if sorted.is_empty() { + return 0.0; + } + sorted.sort_by(|a, b| a.partial_cmp(b).unwrap()); + let idx = ((p / 100.0) * (sorted.len() - 1) as f64).round() as usize; + sorted[idx.min(sorted.len() - 1)] +} + +fn print_stats(label: &str, times_ms: &[f64]) { + if times_ms.is_empty() { + println!(" {label}: no data"); + return; + } + let avg = times_ms.iter().sum::() / times_ms.len() as f64; + let min = times_ms.iter().copied().fold(f64::INFINITY, f64::min); + let max = times_ms.iter().copied().fold(f64::NEG_INFINITY, f64::max); + let p50 = percentile(times_ms, 50.0); + let p95 = percentile(times_ms, 95.0); + let p99 = percentile(times_ms, 99.0); + + println!(" {label}:"); + println!(" avg={avg:.2}ms min={min:.2}ms max={max:.2}ms"); + println!(" p50={p50:.2}ms p95={p95:.2}ms p99={p99:.2}ms"); +} + +#[derive(Default)] +struct PipelineTimings { + decode_ms: Vec, + render_ms: Vec, + total_ms: Vec, + decode_failures: usize, + render_failures: usize, + frames_rendered: usize, +} + +impl PipelineTimings { + fn print_report(&self, label: &str) { + println!("\n{}", "=".repeat(60)); + println!(" {label}"); + println!("{}", "=".repeat(60)); + + println!(" Frames rendered: {}", self.frames_rendered); + if self.decode_failures > 0 { + println!(" Decode failures: {}", self.decode_failures); + } + if self.render_failures > 0 { + println!(" Render failures: {}", self.render_failures); + } + + if !self.total_ms.is_empty() { + let total_time: f64 = self.total_ms.iter().sum(); + let effective_fps = self.frames_rendered as f64 / (total_time / 1000.0); + println!(" Effective FPS: {effective_fps:.1}"); + println!( + " Total time: {total_time:.0}ms for {} frames", + self.frames_rendered + ); + } + + println!(); + print_stats("Decode", &self.decode_ms); + print_stats("GPU Render+Readback", &self.render_ms); + print_stats("Total (decode+render)", &self.total_ms); + } +} + +async fn load_recording( + recording_path: &Path, +) -> Result< + ( + RecordingMeta, + Box, + ProjectConfiguration, + Arc, + ), + String, +> { + let recording_meta = RecordingMeta::load_for_project(recording_path) + .map_err(|e| format!("Failed to load recording meta: {e}"))?; + + let RecordingMetaInner::Studio(meta) = &recording_meta.inner else { + return Err("Not a studio recording".to_string()); + }; + let meta = meta.clone(); + + let mut project = recording_meta.project_config(); + + if project.timeline.is_none() { + let timeline_segments = match meta.as_ref() { + StudioRecordingMeta::SingleSegment { segment } => { + let display_path = recording_meta.path(&segment.display.path); + let duration = match cap_rendering::Video::new(&display_path, 0.0) { + Ok(v) => v.duration, + Err(_) => 5.0, + }; + vec![TimelineSegment { + recording_clip: 0, + start: 0.0, + end: duration, + timescale: 1.0, + }] + } + StudioRecordingMeta::MultipleSegments { inner } => inner + .segments + .iter() + .enumerate() + .filter_map(|(i, segment)| { + let display_path = recording_meta.path(&segment.display.path); + let duration = match cap_rendering::Video::new(&display_path, 0.0) { + Ok(v) => v.duration, + Err(_) => 5.0, + }; + if duration <= 0.0 { + return None; + } + Some(TimelineSegment { + recording_clip: i as u32, + start: 0.0, + end: duration, + timescale: 1.0, + }) + }) + .collect(), + }; + + if !timeline_segments.is_empty() { + project.timeline = Some(TimelineConfiguration { + segments: timeline_segments, + zoom_segments: Vec::new(), + scene_segments: Vec::new(), + mask_segments: Vec::new(), + text_segments: Vec::new(), + }); + } + } + + let recordings = Arc::new( + ProjectRecordingsMeta::new(&recording_meta.project_path, meta.as_ref()) + .map_err(|e| format!("Failed to create recordings meta: {e}"))?, + ); + + Ok((recording_meta, meta, project, recordings)) +} + +async fn run_decode_only_benchmark( + recording_meta: &RecordingMeta, + meta: &StudioRecordingMeta, + project: &ProjectConfiguration, + fps: u32, + frame_count: usize, +) -> PipelineTimings { + let mut timings = PipelineTimings::default(); + + let display_path = match meta { + StudioRecordingMeta::SingleSegment { segment } => { + recording_meta.path(&segment.display.path) + } + StudioRecordingMeta::MultipleSegments { inner } => { + recording_meta.path(&inner.segments[0].display.path) + } + }; + + let display_fps = match meta { + StudioRecordingMeta::SingleSegment { segment } => segment.display.fps, + StudioRecordingMeta::MultipleSegments { inner } => inner.segments[0].display.fps, + }; + + let decoder = + match spawn_decoder("benchmark-screen", display_path, display_fps, 0.0, false).await { + Ok(d) => d, + Err(e) => { + eprintln!("Failed to create decoder: {e}"); + return timings; + } + }; + + println!(" Decoder type: {}", decoder.decoder_type()); + println!( + " Hardware accelerated: {}", + decoder.is_hardware_accelerated() + ); + let (w, h) = decoder.video_dimensions(); + println!(" Video dimensions: {w}x{h}"); + + let duration = project + .timeline + .as_ref() + .map(|t| t.duration()) + .unwrap_or(10.0); + let max_frames = ((duration * fps as f64).ceil() as usize).min(frame_count); + + println!(" Decoding {max_frames} frames at {fps}fps..."); + + for i in 0..max_frames { + let time = i as f32 / fps as f32; + let start = Instant::now(); + match decoder.get_frame(time).await { + Some(_frame) => { + let elapsed_ms = start.elapsed().as_secs_f64() * 1000.0; + timings.decode_ms.push(elapsed_ms); + timings.total_ms.push(elapsed_ms); + timings.frames_rendered += 1; + } + None => { + timings.decode_failures += 1; + } + } + } + + timings +} + +async fn run_full_pipeline_benchmark( + recording_meta: &RecordingMeta, + meta: &StudioRecordingMeta, + project: &ProjectConfiguration, + recordings: &ProjectRecordingsMeta, + fps: u32, + frame_count: usize, + resolution_base: XY, +) -> PipelineTimings { + let mut timings = PipelineTimings::default(); + + let render_constants = match RenderVideoConstants::new( + &recordings.segments, + recording_meta.clone(), + (*meta).clone(), + ) + .await + { + Ok(rc) => Arc::new(rc), + Err(e) => { + eprintln!("Failed to create render constants: {e}"); + return timings; + } + }; + + println!( + " GPU adapter: {} (software={})", + render_constants._adapter.get_info().name, + render_constants.is_software_adapter + ); + + let segments = match cap_editor::create_segments(recording_meta, meta, false).await { + Ok(s) => s, + Err(e) => { + eprintln!("Failed to create segments: {e}"); + return timings; + } + }; + + if segments.is_empty() { + eprintln!("No segments found"); + return timings; + } + + let mut frame_renderer = FrameRenderer::new(&render_constants); + let mut layers = RendererLayers::new_with_options( + &render_constants.device, + &render_constants.queue, + render_constants.is_software_adapter, + ); + + let first_segment = &segments[0]; + let (screen_w, screen_h) = first_segment.decoders.screen_video_dimensions(); + let camera_dims = first_segment.decoders.camera_video_dimensions(); + layers.prepare_for_video_dimensions( + &render_constants.device, + screen_w, + screen_h, + camera_dims.map(|(w, _)| w), + camera_dims.map(|(_, h)| h), + ); + + let duration = project + .timeline + .as_ref() + .map(|t| t.duration()) + .unwrap_or(10.0); + let max_frames = ((duration * fps as f64).ceil() as usize).min(frame_count); + + println!( + " Rendering {max_frames} frames at {fps}fps, resolution base: {}x{}...", + resolution_base.x, resolution_base.y + ); + + let cursor_smoothing = (!project.cursor.raw).then_some(SpringMassDamperSimulationConfig { + tension: project.cursor.tension, + mass: project.cursor.mass, + friction: project.cursor.friction, + }); + + for i in 0..max_frames { + let frame_time = i as f64 / fps as f64; + + let Some((segment_time, segment)) = project.get_segment_time(frame_time) else { + break; + }; + + let segment_media = match segments.get(segment.recording_clip as usize) { + Some(sm) => sm, + None => { + timings.decode_failures += 1; + continue; + } + }; + + let clip_offsets = project + .clips + .iter() + .find(|v| v.index == segment.recording_clip) + .map(|v| v.offsets) + .unwrap_or_default(); + + let decode_start = Instant::now(); + let segment_frames_opt = if i == 0 { + segment_media + .decoders + .get_frames_initial(segment_time as f32, !project.camera.hide, clip_offsets) + .await + } else { + segment_media + .decoders + .get_frames(segment_time as f32, !project.camera.hide, clip_offsets) + .await + }; + let decode_elapsed_ms = decode_start.elapsed().as_secs_f64() * 1000.0; + + let Some(segment_frames) = segment_frames_opt else { + timings.decode_failures += 1; + continue; + }; + + timings.decode_ms.push(decode_elapsed_ms); + + let zoom_focus_interpolator = ZoomFocusInterpolator::new( + &segment_media.cursor, + cursor_smoothing, + project.screen_movement_spring, + duration, + ); + + let uniforms = ProjectUniforms::new( + &render_constants, + project, + i as u32, + fps, + resolution_base, + &segment_media.cursor, + &segment_frames, + duration, + &zoom_focus_interpolator, + ); + + let render_start = Instant::now(); + match frame_renderer + .render(segment_frames, uniforms, &segment_media.cursor, &mut layers) + .await + { + Ok(_frame) => { + let render_elapsed_ms = render_start.elapsed().as_secs_f64() * 1000.0; + timings.render_ms.push(render_elapsed_ms); + timings.total_ms.push(decode_elapsed_ms + render_elapsed_ms); + timings.frames_rendered += 1; + } + Err(e) => { + timings.render_failures += 1; + if timings.render_failures <= 3 { + eprintln!(" Render failed at frame {i}: {e}"); + } + } + } + + if (i + 1) % 100 == 0 { + println!(" Progress: {}/{max_frames} frames", i + 1); + } + } + + timings +} + +async fn run_scrubbing_benchmark( + recording_meta: &RecordingMeta, + meta: &StudioRecordingMeta, + project: &ProjectConfiguration, + recordings: &ProjectRecordingsMeta, + fps: u32, + resolution_base: XY, +) -> PipelineTimings { + let mut timings = PipelineTimings::default(); + + let render_constants = match RenderVideoConstants::new( + &recordings.segments, + recording_meta.clone(), + (*meta).clone(), + ) + .await + { + Ok(rc) => Arc::new(rc), + Err(e) => { + eprintln!("Failed to create render constants: {e}"); + return timings; + } + }; + + let segments = match cap_editor::create_segments(recording_meta, meta, false).await { + Ok(s) => s, + Err(e) => { + eprintln!("Failed to create segments: {e}"); + return timings; + } + }; + + if segments.is_empty() { + eprintln!("No segments found"); + return timings; + } + + let mut frame_renderer = FrameRenderer::new(&render_constants); + let mut layers = RendererLayers::new_with_options( + &render_constants.device, + &render_constants.queue, + render_constants.is_software_adapter, + ); + + let first_segment = &segments[0]; + let (screen_w, screen_h) = first_segment.decoders.screen_video_dimensions(); + let camera_dims = first_segment.decoders.camera_video_dimensions(); + layers.prepare_for_video_dimensions( + &render_constants.device, + screen_w, + screen_h, + camera_dims.map(|(w, _)| w), + camera_dims.map(|(_, h)| h), + ); + + let duration = project + .timeline + .as_ref() + .map(|t| t.duration()) + .unwrap_or(10.0); + + let cursor_smoothing = (!project.cursor.raw).then_some(SpringMassDamperSimulationConfig { + tension: project.cursor.tension, + mass: project.cursor.mass, + friction: project.cursor.friction, + }); + + let scrub_positions: Vec = { + let golden_ratio = 1.618_034; + let mut positions = Vec::with_capacity(50); + let mut pos = 0.0; + for _ in 0..50 { + pos = (pos + golden_ratio * duration) % duration; + positions.push(pos); + } + positions + }; + + println!( + " Scrubbing to {} random positions...", + scrub_positions.len() + ); + + for (i, &scrub_time) in scrub_positions.iter().enumerate() { + let Some((segment_time, segment)) = project.get_segment_time(scrub_time) else { + continue; + }; + + let segment_media = match segments.get(segment.recording_clip as usize) { + Some(sm) => sm, + None => continue, + }; + + let clip_offsets = project + .clips + .iter() + .find(|v| v.index == segment.recording_clip) + .map(|v| v.offsets) + .unwrap_or_default(); + + let decode_start = Instant::now(); + let segment_frames_opt = segment_media + .decoders + .get_frames_initial(segment_time as f32, !project.camera.hide, clip_offsets) + .await; + let decode_elapsed_ms = decode_start.elapsed().as_secs_f64() * 1000.0; + + let Some(segment_frames) = segment_frames_opt else { + timings.decode_failures += 1; + continue; + }; + + timings.decode_ms.push(decode_elapsed_ms); + + let frame_number = (scrub_time * fps as f64).round() as u32; + + let zoom_focus_interpolator = ZoomFocusInterpolator::new( + &segment_media.cursor, + cursor_smoothing, + project.screen_movement_spring, + duration, + ); + + let uniforms = ProjectUniforms::new( + &render_constants, + project, + frame_number, + fps, + resolution_base, + &segment_media.cursor, + &segment_frames, + duration, + &zoom_focus_interpolator, + ); + + let render_start = Instant::now(); + match frame_renderer + .render(segment_frames, uniforms, &segment_media.cursor, &mut layers) + .await + { + Ok(_frame) => { + let render_elapsed_ms = render_start.elapsed().as_secs_f64() * 1000.0; + timings.render_ms.push(render_elapsed_ms); + timings.total_ms.push(decode_elapsed_ms + render_elapsed_ms); + timings.frames_rendered += 1; + } + Err(e) => { + timings.render_failures += 1; + if timings.render_failures <= 3 { + eprintln!(" Render failed at scrub {i}: {e}"); + } + } + } + } + + timings +} + +#[tokio::main] +async fn main() { + tracing_subscriber::fmt() + .with_env_filter( + tracing_subscriber::EnvFilter::from_default_env() + .add_directive(tracing::Level::WARN.into()), + ) + .init(); + + ffmpeg::init().expect("Failed to initialize FFmpeg"); + + let args: Vec = std::env::args().collect(); + + let recording_path = args + .iter() + .position(|a| a == "--recording-path") + .and_then(|i| args.get(i + 1)) + .map(PathBuf::from) + .expect("Usage: playback-pipeline-benchmark --recording-path [--fps ] [--frames ]"); + + let fps = args + .iter() + .position(|a| a == "--fps") + .and_then(|i| args.get(i + 1)) + .and_then(|s| s.parse().ok()) + .unwrap_or(60); + + let frame_count = args + .iter() + .position(|a| a == "--frames") + .and_then(|i| args.get(i + 1)) + .and_then(|s| s.parse().ok()) + .unwrap_or(300); + + println!("{}", "=".repeat(60)); + println!(" CAP PLAYBACK PIPELINE BENCHMARK"); + println!("{}", "=".repeat(60)); + println!(); + println!("Recording: {}", recording_path.display()); + println!("Target FPS: {fps}"); + println!("Max frames: {frame_count}"); + println!(); + + let (recording_meta, meta, project, recordings) = match load_recording(&recording_path).await { + Ok(r) => r, + Err(e) => { + eprintln!("Failed to load recording: {e}"); + std::process::exit(1); + } + }; + + let duration = project + .timeline + .as_ref() + .map(|t| t.duration()) + .unwrap_or(0.0); + println!("Recording duration: {duration:.2}s"); + + let resolutions = [ + (XY::new(1920, 1080), "Full (1920x1080)"), + (XY::new(1248, 702), "Half (1248x702)"), + (XY::new(480, 270), "Quarter (480x270)"), + ]; + + println!("\n--- DECODE-ONLY BENCHMARK ---"); + let decode_timings = + run_decode_only_benchmark(&recording_meta, meta.as_ref(), &project, fps, frame_count).await; + decode_timings.print_report("DECODE-ONLY"); + + for (resolution_base, label) in &resolutions { + println!("\n--- FULL PIPELINE: {label} ---"); + let pipeline_timings = run_full_pipeline_benchmark( + &recording_meta, + meta.as_ref(), + &project, + &recordings, + fps, + frame_count, + *resolution_base, + ) + .await; + pipeline_timings.print_report(&format!("FULL PIPELINE - {label}")); + } + + println!("\n--- SCRUBBING BENCHMARK (Half resolution) ---"); + let scrub_timings = run_scrubbing_benchmark( + &recording_meta, + meta.as_ref(), + &project, + &recordings, + fps, + XY::new(1248, 702), + ) + .await; + scrub_timings.print_report("SCRUBBING (Half resolution)"); + + println!("\n{}", "=".repeat(60)); + println!(" BENCHMARK COMPLETE"); + println!("{}", "=".repeat(60)); + + let target_frame_time_ms = 1000.0 / fps as f64; + println!("\nTarget frame time at {fps}fps: {target_frame_time_ms:.2}ms"); + + if !decode_timings.decode_ms.is_empty() { + let decode_p95 = percentile(&decode_timings.decode_ms, 95.0); + let decode_budget_pct = decode_p95 / target_frame_time_ms * 100.0; + println!("Decode p95 ({decode_p95:.2}ms) uses {decode_budget_pct:.0}% of frame budget"); + } + + for (_resolution_base, label) in &resolutions { + println!("\n{label}:"); + println!(" Frame budget: {target_frame_time_ms:.2}ms"); + } +} diff --git a/crates/editor/src/editor.rs b/crates/editor/src/editor.rs index 012869016a..d3f5d911cd 100644 --- a/crates/editor/src/editor.rs +++ b/crates/editor/src/editor.rs @@ -3,7 +3,7 @@ use std::time::Instant; use cap_project::{CursorEvents, RecordingMeta, StudioRecordingMeta}; use cap_rendering::{ - DecodedSegmentFrames, FrameRenderer, ProjectRecordingsMeta, ProjectUniforms, + DecodedSegmentFrames, FrameRenderer, Nv12RenderedFrame, ProjectRecordingsMeta, ProjectUniforms, RenderVideoConstants, RenderedFrame, RendererLayers, }; use tokio::sync::{mpsc, oneshot}; @@ -21,9 +21,14 @@ pub enum RendererMessage { }, } +pub enum EditorFrameOutput { + Rgba(RenderedFrame), + Nv12(Nv12RenderedFrame), +} + pub struct Renderer { rx: mpsc::Receiver, - frame_cb: Box, + frame_cb: Box, render_constants: Arc, #[allow(unused)] total_frames: u32, @@ -36,7 +41,7 @@ pub struct RendererHandle { impl Renderer { pub fn spawn( render_constants: Arc, - frame_cb: Box, + frame_cb: Box, recording_meta: &RecordingMeta, meta: &StudioRecordingMeta, ) -> Result { @@ -152,7 +157,7 @@ impl Renderer { .await { Ok(frame) => { - (self.frame_cb)(frame); + (self.frame_cb)(EditorFrameOutput::Rgba(frame)); } Err(e) => { tracing::error!(error = %e, "Failed to render frame in editor"); diff --git a/crates/editor/src/editor_instance.rs b/crates/editor/src/editor_instance.rs index 16b05e33e8..4879fadc26 100644 --- a/crates/editor/src/editor_instance.rs +++ b/crates/editor/src/editor_instance.rs @@ -8,7 +8,7 @@ use cap_project::{ }; use cap_rendering::{ ProjectRecordingsMeta, ProjectUniforms, RecordingSegmentDecoders, RenderVideoConstants, - RenderedFrame, SegmentVideoPaths, Video, ZoomFocusInterpolator, get_duration, + SegmentVideoPaths, Video, ZoomFocusInterpolator, get_duration, spring_mass_damper::SpringMassDamperSimulationConfig, }; use std::{ @@ -94,7 +94,7 @@ impl EditorInstance { pub async fn new( project_path: PathBuf, on_state_change: impl Fn(&EditorState) + Send + Sync + 'static, - frame_cb: Box, + frame_cb: Box, ) -> Result, String> { if !project_path.exists() { return Err(format!("Video path {} not found!", project_path.display())); @@ -513,8 +513,8 @@ impl EditorInstance { }, ); - let zoom_focus_interpolator = ZoomFocusInterpolator::new( - &segment_medias.cursor, + let zoom_focus_interpolator = ZoomFocusInterpolator::new_arc( + segment_medias.cursor.clone(), cursor_smoothing, project.screen_movement_spring, total_duration, diff --git a/crates/editor/src/lib.rs b/crates/editor/src/lib.rs index 89af32ea2a..0d37d6e87d 100644 --- a/crates/editor/src/lib.rs +++ b/crates/editor/src/lib.rs @@ -5,5 +5,6 @@ mod playback; mod segments; pub use audio::AudioRenderer; +pub use editor::EditorFrameOutput; pub use editor_instance::{EditorInstance, EditorState, SegmentMedia, create_segments}; pub use segments::get_audio_segments; diff --git a/crates/editor/src/playback.rs b/crates/editor/src/playback.rs index 000f209c6b..87a1776a3c 100644 --- a/crates/editor/src/playback.rs +++ b/crates/editor/src/playback.rs @@ -34,11 +34,13 @@ use crate::{ audio::AudioSegment, editor, editor_instance::SegmentMedia, segments::get_audio_segments, }; -const PREFETCH_BUFFER_SIZE: usize = 60; -const PARALLEL_DECODE_TASKS: usize = 4; -const MAX_PREFETCH_AHEAD: u32 = 60; -const PREFETCH_BEHIND: u32 = 15; -const FRAME_CACHE_SIZE: usize = 60; +const PREFETCH_BUFFER_SIZE: usize = 90; +const PARALLEL_DECODE_TASKS: usize = 6; +const INITIAL_PARALLEL_DECODE_TASKS: usize = 8; +const MAX_PREFETCH_AHEAD: u32 = 90; +const PREFETCH_BEHIND: u32 = 10; +const FRAME_CACHE_SIZE: usize = 90; +const RAMP_UP_FRAME_COUNT: u32 = 15; #[derive(Debug)] pub enum PlaybackStartError { @@ -98,6 +100,24 @@ impl FrameCache { self.cache .put(frame_number, (segment_frames, segment_index)); } + + fn evict_far_from(&mut self, current_frame: u32, max_distance: u32) { + let keys_to_remove: Vec = self + .cache + .iter() + .filter_map(|(k, _)| { + if (*k).abs_diff(current_frame) > max_distance { + Some(*k) + } else { + None + } + }) + .collect(); + + for key in keys_to_remove { + self.cache.pop(&key); + } + } } impl Playback { @@ -161,8 +181,6 @@ impl Playback { let mut in_flight: FuturesUnordered = FuturesUnordered::new(); let mut frames_decoded: u32 = 0; let mut prefetched_behind: HashSet = HashSet::new(); - const INITIAL_PARALLEL_TASKS: usize = 4; - const RAMP_UP_AFTER_FRAMES: u32 = 5; let mut cached_project = prefetch_project.borrow().clone(); @@ -203,8 +221,8 @@ impl Playback { let current_playback_frame = *playback_position_rx.borrow(); let max_prefetch_frame = current_playback_frame + MAX_PREFETCH_AHEAD; - let effective_parallel = if frames_decoded < RAMP_UP_AFTER_FRAMES { - INITIAL_PARALLEL_TASKS + let effective_parallel = if frames_decoded < RAMP_UP_FRAME_COUNT { + INITIAL_PARALLEL_DECODE_TASKS } else { PARALLEL_DECODE_TASKS }; @@ -382,13 +400,18 @@ impl Playback { let mut prefetch_buffer: VecDeque = VecDeque::with_capacity(PREFETCH_BUFFER_SIZE); let mut frame_cache = FrameCache::new(FRAME_CACHE_SIZE); - let aggressive_skip_threshold = 10u32; + let aggressive_skip_threshold = 6u32; let mut total_frames_rendered = 0u64; - let mut _total_frames_skipped = 0u64; - - let warmup_target_frames = 20usize; - let warmup_after_first_timeout = Duration::from_millis(1000); + let mut total_frames_skipped = 0u64; + let mut cache_hits = 0u64; + let mut prefetch_hits = 0u64; + let mut sync_decodes = 0u64; + let mut last_stats_time = Instant::now(); + let stats_interval = Duration::from_secs(2); + + let warmup_target_frames = 10usize; + let warmup_after_first_timeout = Duration::from_millis(500); let warmup_no_frames_timeout = Duration::from_secs(5); let warmup_start = Instant::now(); let mut first_frame_time: Option = None; @@ -486,6 +509,7 @@ impl Playback { let segment_frames_opt = if let Some(cached) = frame_cache.get(frame_number) { was_cached = true; + cache_hits += 1; Some(cached) } else { let prefetched_idx = prefetch_buffer @@ -494,6 +518,7 @@ impl Playback { if let Some(idx) = prefetched_idx { let prefetched = prefetch_buffer.remove(idx).unwrap(); + prefetch_hits += 1; Some(( Arc::new(prefetched.segment_frames), prefetched.segment_index, @@ -506,7 +531,7 @@ impl Playback { if is_in_flight { let wait_start = Instant::now(); - let max_wait = Duration::from_millis(200); + let max_wait = Duration::from_millis(100); let mut found_frame = None; while wait_start.elapsed() < max_wait { @@ -549,7 +574,7 @@ impl Playback { )) } else { frame_number = frame_number.saturating_add(1); - _total_frames_skipped += 1; + total_frames_skipped += 1; continue; } } @@ -557,7 +582,7 @@ impl Playback { let _ = frame_request_tx.send(frame_number); let wait_result = tokio::time::timeout( - Duration::from_millis(200), + Duration::from_millis(100), prefetch_rx.recv(), ) .await; @@ -571,12 +596,12 @@ impl Playback { } else { prefetch_buffer.push_back(prefetched); frame_number = frame_number.saturating_add(1); - _total_frames_skipped += 1; + total_frames_skipped += 1; continue; } } else { frame_number = frame_number.saturating_add(1); - _total_frames_skipped += 1; + total_frames_skipped += 1; continue; } } else { @@ -604,7 +629,7 @@ impl Playback { guard.insert(frame_number); } - let max_wait = Duration::from_millis(200); + let max_wait = Duration::from_millis(100); let data = tokio::select! { _ = stop_rx.changed() => { if let Ok(mut guard) = main_in_flight.write() { @@ -617,7 +642,7 @@ impl Playback { guard.remove(&frame_number); } frame_number = frame_number.saturating_add(1); - _total_frames_skipped += 1; + total_frames_skipped += 1; continue; }, data = segment_media @@ -630,6 +655,7 @@ impl Playback { }, }; + sync_decodes += 1; data.map(|frames| (Arc::new(frames), segment.recording_clip)) } } @@ -657,8 +683,8 @@ impl Playback { friction: cached_project.cursor.friction, }); - let zoom_focus_interpolator = ZoomFocusInterpolator::new( - &segment_media.cursor, + let zoom_focus_interpolator = ZoomFocusInterpolator::new_arc( + segment_media.cursor.clone(), cursor_smoothing, cached_project.screen_movement_spring, duration, @@ -687,6 +713,23 @@ impl Playback { total_frames_rendered += 1; } + if last_stats_time.elapsed() >= stats_interval { + let effective_fps = + total_frames_rendered as f64 / start.elapsed().as_secs_f64().max(0.001); + let buffer_len = prefetch_buffer.len(); + info!( + effective_fps = format!("{:.1}", effective_fps), + total_rendered = total_frames_rendered, + total_skipped = total_frames_skipped, + cache_hits = cache_hits, + prefetch_hits = prefetch_hits, + sync_decodes = sync_decodes, + prefetch_buffer = buffer_len, + "Playback stats" + ); + last_stats_time = Instant::now(); + } + event_tx.send(PlaybackEvent::Frame(frame_number)).ok(); frame_number = frame_number.saturating_add(1); @@ -712,9 +755,10 @@ impl Playback { let skipped = frames_behind.saturating_sub(1); if skipped > 0 { frame_number += skipped; - _total_frames_skipped += skipped as u64; + total_frames_skipped += skipped as u64; prefetch_buffer.retain(|p| p.frame_number >= frame_number); + frame_cache.evict_far_from(frame_number, MAX_PREFETCH_AHEAD); let _ = frame_request_tx.send(frame_number); let _ = playback_position_tx.send(frame_number); if has_audio @@ -1040,14 +1084,14 @@ impl AudioPlayback { #[cfg(target_os = "windows")] const FIXED_LATENCY_SECS: f64 = 0.08; #[cfg(target_os = "windows")] - const SYNC_THRESHOLD_SECS: f64 = 0.20; + const SYNC_THRESHOLD_SECS: f64 = 0.10; #[cfg(target_os = "windows")] - const HARD_SEEK_THRESHOLD_SECS: f64 = 0.5; + const HARD_SEEK_THRESHOLD_SECS: f64 = 0.3; #[cfg(target_os = "windows")] - const MIN_SYNC_INTERVAL_CALLBACKS: u32 = 50; + const MIN_SYNC_INTERVAL_CALLBACKS: u32 = 30; #[cfg(not(target_os = "windows"))] - const SYNC_THRESHOLD_SECS: f64 = 0.12; + const SYNC_THRESHOLD_SECS: f64 = 0.08; #[cfg(target_os = "windows")] let mut callbacks_since_last_sync: u32 = MIN_SYNC_INTERVAL_CALLBACKS; @@ -1205,7 +1249,7 @@ impl AudioPlayback { let video_playhead = *playhead_rx_for_stream.borrow_and_update(); let jump = (video_playhead - last_video_playhead).abs(); - if jump > 0.1 { + if jump > 0.05 { audio_buffer.set_playhead(video_playhead); } diff --git a/crates/recording/src/output_pipeline/core.rs b/crates/recording/src/output_pipeline/core.rs index 737cb368a3..b184cc0bea 100644 --- a/crates/recording/src/output_pipeline/core.rs +++ b/crates/recording/src/output_pipeline/core.rs @@ -419,7 +419,7 @@ impl TimestampAnomalyTracker { jump_secs: f64, now: Instant, ) -> Result { - let wall_clock_confirmed = self.last_valid_wall_clock.map_or(false, |last_wc| { + let wall_clock_confirmed = self.last_valid_wall_clock.is_some_and(|last_wc| { let wall_clock_gap_secs = now.duration_since(last_wc).as_secs_f64(); wall_clock_gap_secs >= jump_secs * 0.5 }); diff --git a/crates/recording/src/output_pipeline/macos.rs b/crates/recording/src/output_pipeline/macos.rs index 5a190257ac..dc03b7a51b 100644 --- a/crates/recording/src/output_pipeline/macos.rs +++ b/crates/recording/src/output_pipeline/macos.rs @@ -34,7 +34,7 @@ fn get_available_disk_space_mb(path: &std::path::Path) -> Option { if result != 0 { return None; } - Some((stat.f_bavail as u64).saturating_mul(stat.f_frsize as u64) / (1024 * 1024)) + Some((stat.f_bavail as u64).saturating_mul(stat.f_frsize) / (1024 * 1024)) } fn get_mp4_muxer_buffer_size(instant_mode: bool) -> usize { diff --git a/crates/recording/src/sources/camera.rs b/crates/recording/src/sources/camera.rs index db0fa42b9b..aede5e52d5 100644 --- a/crates/recording/src/sources/camera.rs +++ b/crates/recording/src/sources/camera.rs @@ -133,7 +133,7 @@ impl VideoSource for Camera { if frame_width != original_width || frame_height != original_height { let needs_new_scaler = scaler .as_ref() - .map_or(true, |s| !s.matches_source(frame_width, frame_height)); + .is_none_or(|s| !s.matches_source(frame_width, frame_height)); if needs_new_scaler { let frame_format = frame.inner.format(); diff --git a/crates/recording/src/sources/microphone.rs b/crates/recording/src/sources/microphone.rs index 3e8b6247f3..4bdd47522a 100644 --- a/crates/recording/src/sources/microphone.rs +++ b/crates/recording/src/sources/microphone.rs @@ -299,12 +299,10 @@ impl AudioSource for Microphone { silence_counter.fetch_add(1, Ordering::Relaxed); - match tokio::time::timeout(send_timeout, audio_tx.send(audio_frame)) - .await - { - Ok(Ok(())) => {} - _ => {} - } + if let Ok(Ok(())) = + tokio::time::timeout(send_timeout, audio_tx.send(audio_frame)) + .await + {} } } } diff --git a/crates/rendering/src/decoder/mod.rs b/crates/rendering/src/decoder/mod.rs index 77c9b540fc..1782b5bd86 100644 --- a/crates/rendering/src/decoder/mod.rs +++ b/crates/rendering/src/decoder/mod.rs @@ -456,8 +456,8 @@ pub struct AsyncVideoDecoderHandle { } impl AsyncVideoDecoderHandle { - const NORMAL_TIMEOUT_MS: u64 = 5000; - const INITIAL_SEEK_TIMEOUT_MS: u64 = 20000; + const NORMAL_TIMEOUT_MS: u64 = 2000; + const INITIAL_SEEK_TIMEOUT_MS: u64 = 10000; pub async fn get_frame(&self, time: f32) -> Option { self.get_frame_with_timeout(time, Self::NORMAL_TIMEOUT_MS) diff --git a/crates/rendering/src/frame_pipeline.rs b/crates/rendering/src/frame_pipeline.rs index 2517b6a678..5b9e207ee2 100644 --- a/crates/rendering/src/frame_pipeline.rs +++ b/crates/rendering/src/frame_pipeline.rs @@ -5,7 +5,336 @@ use wgpu::COPY_BYTES_PER_ROW_ALIGNMENT; use crate::{ProjectUniforms, RenderingError}; -const GPU_BUFFER_WAIT_TIMEOUT_SECS: u64 = 30; +const GPU_BUFFER_WAIT_TIMEOUT_SECS: u64 = 10; + +pub struct RgbaToNv12Converter { + pipeline: wgpu::ComputePipeline, + bind_group_layout: wgpu::BindGroupLayout, + params_buffer: wgpu::Buffer, + nv12_buffer: Option, + readback_buffers: [Option>; 2], + current_readback: usize, + pending: Option, + cached_width: u32, + cached_height: u32, +} + +#[repr(C)] +#[derive(Copy, Clone, bytemuck::Pod, bytemuck::Zeroable)] +struct Nv12Params { + width: u32, + height: u32, + y_stride: u32, + uv_stride: u32, +} + +impl RgbaToNv12Converter { + pub fn new(device: &wgpu::Device) -> Self { + let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor { + label: Some("RGBA to NV12 Converter"), + source: wgpu::ShaderSource::Wgsl(std::borrow::Cow::Borrowed(include_str!( + "shaders/rgba_to_nv12.wgsl" + ))), + }); + + let bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: Some("RGBA to NV12 Bind Group Layout"), + entries: &[ + wgpu::BindGroupLayoutEntry { + binding: 0, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Texture { + sample_type: wgpu::TextureSampleType::Float { filterable: false }, + view_dimension: wgpu::TextureViewDimension::D2, + multisampled: false, + }, + count: None, + }, + wgpu::BindGroupLayoutEntry { + binding: 1, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Storage { read_only: false }, + has_dynamic_offset: false, + min_binding_size: None, + }, + count: None, + }, + wgpu::BindGroupLayoutEntry { + binding: 2, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Uniform, + has_dynamic_offset: false, + min_binding_size: None, + }, + count: None, + }, + ], + }); + + let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { + label: Some("RGBA to NV12 Pipeline Layout"), + bind_group_layouts: &[&bind_group_layout], + push_constant_ranges: &[], + }); + + let pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor { + label: Some("RGBA to NV12 Pipeline"), + layout: Some(&pipeline_layout), + module: &shader, + entry_point: Some("main"), + compilation_options: Default::default(), + cache: None, + }); + + let params_buffer = device.create_buffer(&wgpu::BufferDescriptor { + label: Some("NV12 Params Buffer"), + size: std::mem::size_of::() as u64, + usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST, + mapped_at_creation: false, + }); + + Self { + pipeline, + bind_group_layout, + params_buffer, + nv12_buffer: None, + readback_buffers: [None, None], + current_readback: 0, + pending: None, + cached_width: 0, + cached_height: 0, + } + } + + fn nv12_size(width: u32, height: u32) -> u64 { + let y_size = (width as u64) * (height as u64); + let uv_size = (width as u64) * (height as u64 / 2); + y_size + uv_size + } + + fn ensure_buffers(&mut self, device: &wgpu::Device, width: u32, height: u32) { + if self.cached_width == width && self.cached_height == height { + return; + } + + let nv12_size = Self::nv12_size(width, height); + let aligned_size = nv12_size.div_ceil(4) * 4; + + self.nv12_buffer = Some(device.create_buffer(&wgpu::BufferDescriptor { + label: Some("NV12 Storage Buffer"), + size: aligned_size, + usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC, + mapped_at_creation: false, + })); + + let make_readback = || { + Arc::new(device.create_buffer(&wgpu::BufferDescriptor { + label: Some("NV12 Readback Buffer"), + size: nv12_size, + usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ, + mapped_at_creation: false, + })) + }; + + self.readback_buffers = [Some(make_readback()), Some(make_readback())]; + self.current_readback = 0; + self.cached_width = width; + self.cached_height = height; + } + + #[allow(clippy::too_many_arguments)] + pub fn submit_conversion( + &mut self, + device: &wgpu::Device, + queue: &wgpu::Queue, + encoder: &mut wgpu::CommandEncoder, + source_texture: &wgpu::Texture, + width: u32, + height: u32, + frame_number: u32, + frame_rate: u32, + ) -> bool { + if width == 0 || height == 0 || !width.is_multiple_of(4) || !height.is_multiple_of(2) { + return false; + } + + self.ensure_buffers(device, width, height); + + let Some(nv12_buffer) = self.nv12_buffer.as_ref() else { + return false; + }; + + let readback_buffer = match self.readback_buffers[self.current_readback].as_ref() { + Some(b) => b.clone(), + None => return false, + }; + self.current_readback = 1 - self.current_readback; + + let y_stride = width; + let uv_stride = width; + + let params = Nv12Params { + width, + height, + y_stride, + uv_stride, + }; + queue.write_buffer(&self.params_buffer, 0, bytemuck::cast_slice(&[params])); + + let source_view = source_texture.create_view(&Default::default()); + + let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { + label: Some("RGBA to NV12 Bind Group"), + layout: &self.bind_group_layout, + entries: &[ + wgpu::BindGroupEntry { + binding: 0, + resource: wgpu::BindingResource::TextureView(&source_view), + }, + wgpu::BindGroupEntry { + binding: 1, + resource: nv12_buffer.as_entire_binding(), + }, + wgpu::BindGroupEntry { + binding: 2, + resource: self.params_buffer.as_entire_binding(), + }, + ], + }); + + { + let mut pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { + label: Some("RGBA to NV12 Conversion"), + ..Default::default() + }); + pass.set_pipeline(&self.pipeline); + pass.set_bind_group(0, &bind_group, &[]); + pass.dispatch_workgroups(width.div_ceil(4 * 8), height.div_ceil(2 * 8), 1); + } + + let nv12_size = Self::nv12_size(width, height); + encoder.copy_buffer_to_buffer(nv12_buffer, 0, &readback_buffer, 0, nv12_size); + + self.pending = Some(PendingNv12Readback { + rx: None, + buffer: readback_buffer, + width, + height, + y_stride, + frame_number, + frame_rate, + }); + + true + } + + pub fn start_readback(&mut self) { + if let Some(ref mut pending) = self.pending { + let (tx, rx) = oneshot::channel(); + pending + .buffer + .slice(..) + .map_async(wgpu::MapMode::Read, move |result| { + let _ = tx.send(result); + }); + pending.rx = Some(rx); + } + } + + pub fn take_pending(&mut self) -> Option { + self.pending.take() + } +} + +pub struct PendingNv12Readback { + rx: Option>>, + buffer: Arc, + pub width: u32, + pub height: u32, + pub y_stride: u32, + pub frame_number: u32, + pub frame_rate: u32, +} + +impl PendingNv12Readback { + pub async fn wait( + mut self, + device: &wgpu::Device, + ) -> Result { + let Some(mut rx) = self.rx.take() else { + return Err(RenderingError::BufferMapWaitingFailed); + }; + + let mut poll_count = 0u32; + let start_time = Instant::now(); + let timeout_duration = std::time::Duration::from_secs(GPU_BUFFER_WAIT_TIMEOUT_SECS); + + loop { + if start_time.elapsed() > timeout_duration { + return Err(RenderingError::BufferMapWaitingFailed); + } + + match rx.try_recv() { + Ok(result) => { + result?; + break; + } + Err(oneshot::error::TryRecvError::Empty) => { + device.poll(wgpu::PollType::Poll)?; + poll_count += 1; + if poll_count < 10 { + tokio::task::yield_now().await; + } else if poll_count < 100 { + tokio::time::sleep(std::time::Duration::from_micros(100)).await; + } else { + tokio::time::sleep(std::time::Duration::from_millis(1)).await; + } + } + Err(oneshot::error::TryRecvError::Closed) => { + return Err(RenderingError::BufferMapWaitingFailed); + } + } + } + + let buffer_slice = self.buffer.slice(..); + let data = buffer_slice.get_mapped_range(); + let nv12_data = data.to_vec(); + + drop(data); + self.buffer.unmap(); + + let target_time_ns = + (self.frame_number as u64 * 1_000_000_000) / self.frame_rate.max(1) as u64; + + Ok(Nv12RenderedFrame { + data: nv12_data, + width: self.width, + height: self.height, + y_stride: self.y_stride, + frame_number: self.frame_number, + target_time_ns, + format: GpuOutputFormat::Nv12, + }) + } +} + +#[derive(Clone, Copy, PartialEq, Eq)] +pub enum GpuOutputFormat { + Nv12, + Rgba, +} + +pub struct Nv12RenderedFrame { + pub data: Vec, + pub width: u32, + pub height: u32, + pub y_stride: u32, + pub frame_number: u32, + pub target_time_ns: u64, + pub format: GpuOutputFormat, +} pub struct PendingReadback { rx: oneshot::Receiver>, @@ -43,8 +372,12 @@ impl PendingReadback { Err(oneshot::error::TryRecvError::Empty) => { device.poll(wgpu::PollType::Poll)?; poll_count += 1; - if poll_count.is_multiple_of(3) { + if poll_count < 10 { tokio::task::yield_now().await; + } else if poll_count < 100 { + tokio::time::sleep(std::time::Duration::from_micros(100)).await; + } else { + tokio::time::sleep(std::time::Duration::from_millis(1)).await; } if poll_count.is_multiple_of(10000) { tracing::warn!( @@ -428,9 +761,11 @@ pub async fn finish_encoder( uniforms: &ProjectUniforms, encoder: wgpu::CommandEncoder, ) -> Result { - if let Some(prev) = session.pipelined_readback.take_pending() { - let _ = prev.wait(device).await; - } + let previous_frame = if let Some(prev) = session.pipelined_readback.take_pending() { + Some(prev.wait(device).await?) + } else { + None + }; session.pipelined_readback.perform_resize_if_needed(device); @@ -444,6 +779,10 @@ pub async fn finish_encoder( .pipelined_readback .submit_readback(device, queue, texture, uniforms, encoder)?; + if let Some(prev_frame) = previous_frame { + return Ok(prev_frame); + } + let pending = session .pipelined_readback .take_pending() @@ -451,3 +790,77 @@ pub async fn finish_encoder( pending.wait(device).await } + +pub async fn finish_encoder_nv12( + session: &mut RenderSession, + nv12_converter: &mut RgbaToNv12Converter, + device: &wgpu::Device, + queue: &wgpu::Queue, + uniforms: &ProjectUniforms, + mut encoder: wgpu::CommandEncoder, +) -> Result { + let width = uniforms.output_size.0; + let height = uniforms.output_size.1; + + let previous_frame = if let Some(prev) = nv12_converter.take_pending() { + Some(prev.wait(device).await?) + } else { + None + }; + + let texture = if session.current_is_left { + &session.textures.0 + } else { + &session.textures.1 + }; + + let submitted = nv12_converter.submit_conversion( + device, + queue, + &mut encoder, + texture, + width, + height, + uniforms.frame_number, + uniforms.frame_rate, + ); + + if submitted { + queue.submit(std::iter::once(encoder.finish())); + nv12_converter.start_readback(); + + if let Some(prev_frame) = previous_frame { + return Ok(prev_frame); + } + + let pending = nv12_converter + .take_pending() + .expect("just submitted a conversion"); + pending.wait(device).await + } else if let Some(prev_frame) = previous_frame { + queue.submit(std::iter::once(encoder.finish())); + Ok(prev_frame) + } else { + let rgba_frame = finish_encoder(session, device, queue, uniforms, encoder).await?; + Ok(Nv12RenderedFrame { + data: rgba_frame.data, + width: rgba_frame.width, + height: rgba_frame.height, + y_stride: rgba_frame.padded_bytes_per_row, + frame_number: rgba_frame.frame_number, + target_time_ns: rgba_frame.target_time_ns, + format: GpuOutputFormat::Rgba, + }) + } +} + +pub async fn flush_pending_readback( + session: &mut RenderSession, + device: &wgpu::Device, +) -> Option> { + if let Some(pending) = session.pipelined_readback.take_pending() { + Some(pending.wait(device).await) + } else { + None + } +} diff --git a/crates/rendering/src/layers/camera.rs b/crates/rendering/src/layers/camera.rs index 700bf52c41..01c130d5f0 100644 --- a/crates/rendering/src/layers/camera.rs +++ b/crates/rendering/src/layers/camera.rs @@ -289,6 +289,180 @@ impl CameraLayer { } } + fn copy_from_yuv_output_to_encoder( + &self, + encoder: &mut wgpu::CommandEncoder, + next_texture: usize, + frame_size: XY, + ) { + if let Some(output_texture) = self.yuv_converter.output_texture() { + encoder.copy_texture_to_texture( + wgpu::TexelCopyTextureInfo { + texture: output_texture, + mip_level: 0, + origin: wgpu::Origin3d::ZERO, + aspect: wgpu::TextureAspect::All, + }, + wgpu::TexelCopyTextureInfo { + texture: &self.frame_textures[next_texture], + mip_level: 0, + origin: wgpu::Origin3d::ZERO, + aspect: wgpu::TextureAspect::All, + }, + wgpu::Extent3d { + width: frame_size.x, + height: frame_size.y, + depth_or_array_layers: 1, + }, + ); + } + } + + pub fn prepare_with_encoder( + &mut self, + device: &wgpu::Device, + queue: &wgpu::Queue, + uniforms: Option, + frame_data: Option<(XY, &DecodedFrame, f32)>, + encoder: &mut wgpu::CommandEncoder, + ) { + let Some(uniforms) = uniforms else { + self.hidden = true; + return; + }; + + let has_previous_frame = self.last_recording_time.is_some(); + self.hidden = frame_data.is_none() && !has_previous_frame; + + queue.write_buffer(&self.uniforms_buffer, 0, bytemuck::cast_slice(&[uniforms])); + + let Some((frame_size, camera_frame, recording_time)) = frame_data else { + return; + }; + + let format = camera_frame.format(); + + let is_same_frame = self + .last_recording_time + .is_some_and(|last| (last - recording_time).abs() < 0.001); + + if !is_same_frame { + let next_texture = 1 - self.current_texture; + + if self.frame_textures[next_texture].width() != frame_size.x + || self.frame_textures[next_texture].height() != frame_size.y + { + self.frame_textures[next_texture] = + CompositeVideoFramePipeline::create_frame_texture( + device, + frame_size.x, + frame_size.y, + ); + self.frame_texture_views[next_texture] = + self.frame_textures[next_texture].create_view(&Default::default()); + + self.bind_groups[next_texture] = Some(self.pipeline.bind_group( + device, + &self.uniforms_buffer, + &self.frame_texture_views[next_texture], + )); + } + + match format { + PixelFormat::Rgba => { + let frame_data_bytes = camera_frame.data(); + let src_bytes_per_row = frame_size.x * 4; + + queue.write_texture( + wgpu::TexelCopyTextureInfo { + texture: &self.frame_textures[next_texture], + mip_level: 0, + origin: wgpu::Origin3d::ZERO, + aspect: wgpu::TextureAspect::All, + }, + frame_data_bytes, + wgpu::TexelCopyBufferLayout { + offset: 0, + bytes_per_row: Some(src_bytes_per_row), + rows_per_image: Some(frame_size.y), + }, + wgpu::Extent3d { + width: frame_size.x, + height: frame_size.y, + depth_or_array_layers: 1, + }, + ); + } + PixelFormat::Nv12 => { + if let Err(e) = self.yuv_converter.prepare_for_dimensions( + device, + frame_size.x, + frame_size.y, + ) { + tracing::warn!(error = %e, "YUV converter prepare failed"); + return; + } + + if let (Some(y_data), Some(uv_data)) = + (camera_frame.y_plane(), camera_frame.uv_plane()) + && self + .yuv_converter + .convert_nv12_to_encoder( + device, + queue, + encoder, + y_data, + uv_data, + frame_size.x, + frame_size.y, + camera_frame.y_stride(), + camera_frame.uv_stride(), + ) + .is_ok() + { + self.copy_from_yuv_output_to_encoder(encoder, next_texture, frame_size); + } + } + PixelFormat::Yuv420p => { + if let Err(e) = self.yuv_converter.prepare_for_dimensions( + device, + frame_size.x, + frame_size.y, + ) { + tracing::warn!(error = %e, "YUV converter prepare failed"); + return; + } + + if let (Some(y_data), Some(u_data), Some(v_data)) = ( + camera_frame.y_plane(), + camera_frame.u_plane(), + camera_frame.v_plane(), + ) && self + .yuv_converter + .convert_yuv420p_to_encoder( + device, + queue, + encoder, + y_data, + u_data, + v_data, + frame_size.x, + frame_size.y, + camera_frame.y_stride(), + camera_frame.uv_stride(), + ) + .is_ok() + { + self.copy_from_yuv_output_to_encoder(encoder, next_texture, frame_size); + } + } + } + + self.last_recording_time = Some(recording_time); + self.current_texture = next_texture; + } + } + pub fn copy_to_texture(&mut self, _encoder: &mut wgpu::CommandEncoder) {} pub fn render(&self, pass: &mut wgpu::RenderPass<'_>) { diff --git a/crates/rendering/src/layers/display.rs b/crates/rendering/src/layers/display.rs index f2f3df3a40..05d545ce0a 100644 --- a/crates/rendering/src/layers/display.rs +++ b/crates/rendering/src/layers/display.rs @@ -401,6 +401,224 @@ impl DisplayLayer { (skipped, actual_width, actual_height) } + pub fn prepare_with_encoder( + &mut self, + device: &wgpu::Device, + queue: &wgpu::Queue, + segment_frames: &DecodedSegmentFrames, + frame_size: XY, + uniforms: CompositeVideoFrameUniforms, + encoder: &mut wgpu::CommandEncoder, + ) -> (bool, u32, u32) { + self.pending_copy = None; + + let actual_width = segment_frames.screen_frame.width(); + let actual_height = segment_frames.screen_frame.height(); + let format = segment_frames.screen_frame.format(); + let current_recording_time = segment_frames.recording_time; + + let skipped = self + .last_recording_time + .is_some_and(|last| (last - current_recording_time).abs() < 0.001); + + if !skipped { + let next_texture = 1 - self.current_texture; + + if self.frame_textures[next_texture].width() != frame_size.x + || self.frame_textures[next_texture].height() != frame_size.y + { + self.frame_textures[next_texture] = + CompositeVideoFramePipeline::create_frame_texture( + device, + frame_size.x, + frame_size.y, + ); + self.frame_texture_views[next_texture] = + self.frame_textures[next_texture].create_view(&Default::default()); + + self.bind_groups[next_texture] = Some(self.pipeline.bind_group( + device, + &self.uniforms_buffer, + &self.frame_texture_views[next_texture], + )); + } + + let frame_uploaded = match format { + PixelFormat::Rgba => { + let frame_data = segment_frames.screen_frame.data(); + let src_bytes_per_row = frame_size.x * 4; + + queue.write_texture( + wgpu::TexelCopyTextureInfo { + texture: &self.frame_textures[next_texture], + mip_level: 0, + origin: wgpu::Origin3d::ZERO, + aspect: wgpu::TextureAspect::All, + }, + frame_data, + wgpu::TexelCopyBufferLayout { + offset: 0, + bytes_per_row: Some(src_bytes_per_row), + rows_per_image: Some(frame_size.y), + }, + wgpu::Extent3d { + width: frame_size.x, + height: frame_size.y, + depth_or_array_layers: 1, + }, + ); + true + } + PixelFormat::Nv12 => { + let screen_frame = &segment_frames.screen_frame; + + if !self.prefer_cpu_conversion { + if let (Some(y_data), Some(uv_data)) = + (screen_frame.y_plane(), screen_frame.uv_plane()) + { + let y_stride = screen_frame.y_stride(); + let uv_stride = screen_frame.uv_stride(); + + #[cfg(target_os = "windows")] + let convert_width = actual_width; + #[cfg(target_os = "windows")] + let convert_height = actual_height; + #[cfg(not(target_os = "windows"))] + let convert_width = frame_size.x; + #[cfg(not(target_os = "windows"))] + let convert_height = frame_size.y; + + let convert_result = self.yuv_converter.convert_nv12_to_encoder( + device, + queue, + encoder, + y_data, + uv_data, + convert_width, + convert_height, + y_stride, + uv_stride, + ); + + match convert_result { + Ok(_) => { + if self.yuv_converter.output_texture().is_some() { + self.pending_copy = Some(PendingTextureCopy { + width: convert_width, + height: convert_height, + dst_texture_index: next_texture, + }); + true + } else { + false + } + } + Err(_) => false, + } + } else { + false + } + } else if let (Some(y_data), Some(uv_data)) = + (screen_frame.y_plane(), screen_frame.uv_plane()) + { + let y_stride = screen_frame.y_stride(); + let uv_stride = screen_frame.uv_stride(); + let convert_result = self.yuv_converter.convert_nv12_cpu( + device, + queue, + y_data, + uv_data, + frame_size.x, + frame_size.y, + y_stride, + uv_stride, + ); + + match convert_result { + Ok(_) => { + if self.yuv_converter.output_texture().is_some() { + self.pending_copy = Some(PendingTextureCopy { + width: frame_size.x, + height: frame_size.y, + dst_texture_index: next_texture, + }); + true + } else { + false + } + } + Err(_) => false, + } + } else { + false + } + } + PixelFormat::Yuv420p => { + let screen_frame = &segment_frames.screen_frame; + let y_plane = screen_frame.y_plane(); + let u_plane = screen_frame.u_plane(); + let v_plane = screen_frame.v_plane(); + + if let (Some(y_data), Some(u_data), Some(v_data)) = (y_plane, u_plane, v_plane) + { + let convert_result = if self.prefer_cpu_conversion { + self.yuv_converter.convert_yuv420p_cpu( + device, + queue, + y_data, + u_data, + v_data, + frame_size.x, + frame_size.y, + screen_frame.y_stride(), + screen_frame.uv_stride(), + ) + } else { + self.yuv_converter.convert_yuv420p_to_encoder( + device, + queue, + encoder, + y_data, + u_data, + v_data, + frame_size.x, + frame_size.y, + screen_frame.y_stride(), + screen_frame.uv_stride(), + ) + }; + + match convert_result { + Ok(_) => { + if self.yuv_converter.output_texture().is_some() { + self.pending_copy = Some(PendingTextureCopy { + width: frame_size.x, + height: frame_size.y, + dst_texture_index: next_texture, + }); + true + } else { + false + } + } + Err(_) => false, + } + } else { + false + } + } + }; + + if frame_uploaded { + self.last_recording_time = Some(current_recording_time); + self.current_texture = next_texture; + } + } + + uniforms.write_to_buffer(queue, &self.uniforms_buffer); + (skipped, actual_width, actual_height) + } + pub fn copy_to_texture(&mut self, encoder: &mut wgpu::CommandEncoder) { let Some(pending) = self.pending_copy.take() else { return; diff --git a/crates/rendering/src/lib.rs b/crates/rendering/src/lib.rs index 9848f21859..0d693f35fd 100644 --- a/crates/rendering/src/lib.rs +++ b/crates/rendering/src/lib.rs @@ -7,7 +7,7 @@ use composite_frame::CompositeVideoFrameUniforms; use core::f64; use cursor_interpolation::{InterpolatedCursorPosition, interpolate_cursor}; use decoder::{AsyncVideoDecoderHandle, spawn_decoder}; -use frame_pipeline::{RenderSession, finish_encoder}; +use frame_pipeline::{RenderSession, finish_encoder, finish_encoder_nv12, flush_pending_readback}; use futures::FutureExt; use futures::future::OptionFuture; use layers::{ @@ -42,7 +42,7 @@ pub mod zoom_focus_interpolation; pub use coord::*; pub use decoder::{DecodedFrame, DecoderStatus, DecoderType, PixelFormat}; -pub use frame_pipeline::RenderedFrame; +pub use frame_pipeline::{GpuOutputFormat, Nv12RenderedFrame, RenderedFrame}; pub use project_recordings::{ProjectRecordingsMeta, SegmentRecordings, Video}; use mask::interpolate_masks; @@ -532,10 +532,20 @@ pub async fn render_video_to_channel( sender.send((frame, current_frame_number)).await?; } + if let Some(Ok(final_frame)) = frame_renderer.flush_pipeline().await + && final_frame.width > 0 + && final_frame.height > 0 + { + sender + .send((final_frame, frame_number.saturating_sub(1))) + .await?; + } + let total_time = start_time.elapsed(); - println!( - "Render complete. Processed {frame_number} frames in {:?} seconds", - total_time.as_secs_f32() + tracing::info!( + frames = frame_number, + elapsed_secs = format!("{:.2}", total_time.as_secs_f32()), + "Render complete" ); Ok(()) @@ -1790,6 +1800,7 @@ pub struct DecodedSegmentFrames { pub struct FrameRenderer<'a> { constants: &'a RenderVideoConstants, session: Option, + nv12_converter: Option, } impl<'a> FrameRenderer<'a> { @@ -1799,6 +1810,7 @@ impl<'a> FrameRenderer<'a> { Self { constants, session: None, + nv12_converter: None, } } @@ -1875,6 +1887,106 @@ impl<'a> FrameRenderer<'a> { Err(last_error.unwrap_or(RenderingError::BufferMapWaitingFailed)) } + + pub async fn flush_pipeline(&mut self) -> Option> { + if let Some(session) = &mut self.session { + flush_pending_readback(session, &self.constants.device).await + } else { + None + } + } + + pub async fn render_nv12( + &mut self, + segment_frames: DecodedSegmentFrames, + uniforms: ProjectUniforms, + cursor: &CursorEvents, + layers: &mut RendererLayers, + ) -> Result { + let mut last_error = None; + + for attempt in 0..Self::MAX_RENDER_RETRIES { + if attempt > 0 { + tracing::warn!( + frame_number = uniforms.frame_number, + attempt = attempt + 1, + "Retrying NV12 frame render after GPU error" + ); + self.reset_session(); + self.nv12_converter = None; + tokio::time::sleep(std::time::Duration::from_millis(100 * (attempt as u64 + 1))) + .await; + } + + let session = self.session.get_or_insert_with(|| { + RenderSession::new( + &self.constants.device, + uniforms.output_size.0, + uniforms.output_size.1, + ) + }); + + session.update_texture_size( + &self.constants.device, + uniforms.output_size.0, + uniforms.output_size.1, + ); + + let nv12_converter = self.nv12_converter.get_or_insert_with(|| { + frame_pipeline::RgbaToNv12Converter::new(&self.constants.device) + }); + + let mut encoder = self.constants.device.create_command_encoder( + &(wgpu::CommandEncoderDescriptor { + label: Some("Render Encoder (NV12)"), + }), + ); + + if let Err(e) = layers + .prepare_with_encoder( + self.constants, + &uniforms, + &segment_frames, + cursor, + &mut encoder, + ) + .await + { + last_error = Some(e); + continue; + } + + layers.render( + &self.constants.device, + &self.constants.queue, + &mut encoder, + session, + &uniforms, + ); + + match finish_encoder_nv12( + session, + nv12_converter, + &self.constants.device, + &self.constants.queue, + &uniforms, + encoder, + ) + .await + { + Ok(frame) => return Ok(frame), + Err(RenderingError::BufferMapWaitingFailed) => { + last_error = Some(RenderingError::BufferMapWaitingFailed); + } + Err(RenderingError::BufferMapFailed(e)) => { + last_error = Some(RenderingError::BufferMapFailed(e)); + } + Err(e) => return Err(e), + } + } + + Err(last_error.unwrap_or(RenderingError::BufferMapWaitingFailed)) + } } pub struct RendererLayers { @@ -2013,6 +2125,87 @@ impl RendererLayers { Ok(()) } + pub async fn prepare_with_encoder( + &mut self, + constants: &RenderVideoConstants, + uniforms: &ProjectUniforms, + segment_frames: &DecodedSegmentFrames, + cursor: &CursorEvents, + encoder: &mut wgpu::CommandEncoder, + ) -> Result<(), RenderingError> { + self.background + .prepare( + constants, + uniforms, + Background::from(uniforms.project.background.source.clone()), + ) + .await?; + + if uniforms.project.background.blur > 0.0 { + self.background_blur.prepare(&constants.queue, uniforms); + } + + self.display.prepare_with_encoder( + &constants.device, + &constants.queue, + segment_frames, + constants.options.screen_size, + uniforms.display, + encoder, + ); + + self.cursor.prepare( + segment_frames, + uniforms.resolution_base, + cursor, + &uniforms.zoom, + uniforms, + constants, + ); + + self.camera.prepare_with_encoder( + &constants.device, + &constants.queue, + uniforms.camera, + constants.options.camera_size.and_then(|size| { + segment_frames + .camera_frame + .as_ref() + .map(|frame| (size, frame, segment_frames.recording_time)) + }), + encoder, + ); + + self.camera_only.prepare_with_encoder( + &constants.device, + &constants.queue, + uniforms.camera_only, + constants.options.camera_size.and_then(|size| { + segment_frames + .camera_frame + .as_ref() + .map(|frame| (size, frame, segment_frames.recording_time)) + }), + encoder, + ); + + self.text.prepare( + &constants.device, + &constants.queue, + uniforms.output_size, + &uniforms.texts, + ); + + self.captions.prepare( + uniforms, + segment_frames, + XY::new(uniforms.output_size.0, uniforms.output_size.1), + constants, + ); + + Ok(()) + } + pub fn render( &mut self, device: &wgpu::Device, @@ -2112,16 +2305,16 @@ async fn produce_frame( layers: &mut RendererLayers, session: &mut RenderSession, ) -> Result { - layers - .prepare(constants, &uniforms, &segment_frames, cursor) - .await?; - let mut encoder = constants.device.create_command_encoder( &(wgpu::CommandEncoderDescriptor { label: Some("Render Encoder"), }), ); + layers + .prepare_with_encoder(constants, &uniforms, &segment_frames, cursor, &mut encoder) + .await?; + layers.render( &constants.device, &constants.queue, diff --git a/crates/rendering/src/shaders/rgba_to_nv12.wgsl b/crates/rendering/src/shaders/rgba_to_nv12.wgsl new file mode 100644 index 0000000000..444805da2f --- /dev/null +++ b/crates/rendering/src/shaders/rgba_to_nv12.wgsl @@ -0,0 +1,92 @@ +struct Params { + width: u32, + height: u32, + y_stride: u32, + uv_stride: u32, +} + +@group(0) @binding(0) var input: texture_2d; +@group(0) @binding(1) var nv12_output: array; +@group(0) @binding(2) var params: Params; + +fn rgb_to_y(r: f32, g: f32, b: f32) -> u32 { + return u32(clamp(16.0 + 65.481 * r + 128.553 * g + 24.966 * b, 0.0, 255.0)); +} + +fn rgb_to_u(r: f32, g: f32, b: f32) -> u32 { + return u32(clamp(128.0 - 37.797 * r - 74.203 * g + 112.0 * b, 0.0, 255.0)); +} + +fn rgb_to_v(r: f32, g: f32, b: f32) -> u32 { + return u32(clamp(128.0 + 112.0 * r - 93.786 * g - 18.214 * b, 0.0, 255.0)); +} + +fn safe_load(coord: vec2, dims: vec2) -> vec4 { + let c = min(coord, dims - vec2(1u, 1u)); + return textureLoad(input, c, 0); +} + +@compute @workgroup_size(8, 8) +fn main(@builtin(global_invocation_id) global_id: vec3) { + let width = params.width; + let height = params.height; + let y_stride = params.y_stride; + let dims = vec2(width, height); + + let px = global_id.x * 4u; + let py = global_id.y * 2u; + + if (px >= width || py >= height) { + return; + } + + let p0 = safe_load(vec2(px, py), dims); + let p1 = safe_load(vec2(px + 1u, py), dims); + let p2 = safe_load(vec2(px + 2u, py), dims); + let p3 = safe_load(vec2(px + 3u, py), dims); + + let p4 = safe_load(vec2(px, py + 1u), dims); + let p5 = safe_load(vec2(px + 1u, py + 1u), dims); + let p6 = safe_load(vec2(px + 2u, py + 1u), dims); + let p7 = safe_load(vec2(px + 3u, py + 1u), dims); + + let y0 = rgb_to_y(p0.r, p0.g, p0.b); + let y1 = rgb_to_y(p1.r, p1.g, p1.b); + let y2 = rgb_to_y(p2.r, p2.g, p2.b); + let y3 = rgb_to_y(p3.r, p3.g, p3.b); + let y4 = rgb_to_y(p4.r, p4.g, p4.b); + let y5 = rgb_to_y(p5.r, p5.g, p5.b); + let y6 = rgb_to_y(p6.r, p6.g, p6.b); + let y7 = rgb_to_y(p7.r, p7.g, p7.b); + + let y_row0_word = y0 | (y1 << 8u) | (y2 << 16u) | (y3 << 24u); + let y_row0_idx = (py * y_stride + px) / 4u; + nv12_output[y_row0_idx] = y_row0_word; + + if (py + 1u < height) { + let y_row1_word = y4 | (y5 << 8u) | (y6 << 16u) | (y7 << 24u); + let y_row1_idx = ((py + 1u) * y_stride + px) / 4u; + nv12_output[y_row1_idx] = y_row1_word; + } + + let y_plane_size = y_stride * height; + + let avg_r_left = (p0.r + p1.r + p4.r + p5.r) * 0.25; + let avg_g_left = (p0.g + p1.g + p4.g + p5.g) * 0.25; + let avg_b_left = (p0.b + p1.b + p4.b + p5.b) * 0.25; + + let avg_r_right = (p2.r + p3.r + p6.r + p7.r) * 0.25; + let avg_g_right = (p2.g + p3.g + p6.g + p7.g) * 0.25; + let avg_b_right = (p2.b + p3.b + p6.b + p7.b) * 0.25; + + let u_left = rgb_to_u(avg_r_left, avg_g_left, avg_b_left); + let v_left = rgb_to_v(avg_r_left, avg_g_left, avg_b_left); + let u_right = rgb_to_u(avg_r_right, avg_g_right, avg_b_right); + let v_right = rgb_to_v(avg_r_right, avg_g_right, avg_b_right); + + let uv_word = u_left | (v_left << 8u) | (u_right << 16u) | (v_right << 24u); + let uv_row = global_id.y; + let uv_offset = y_plane_size + uv_row * params.uv_stride + px; + let uv_idx = uv_offset / 4u; + nv12_output[uv_idx] = uv_word; +} diff --git a/crates/rendering/src/yuv_converter.rs b/crates/rendering/src/yuv_converter.rs index e93c29f30a..30b107ce48 100644 --- a/crates/rendering/src/yuv_converter.rs +++ b/crates/rendering/src/yuv_converter.rs @@ -753,6 +753,81 @@ impl YuvToRgbaConverter { Ok(self.current_output_view()) } + #[allow(clippy::too_many_arguments)] + pub fn convert_nv12_to_encoder( + &mut self, + device: &wgpu::Device, + queue: &wgpu::Queue, + encoder: &mut wgpu::CommandEncoder, + y_data: &[u8], + uv_data: &[u8], + width: u32, + height: u32, + y_stride: u32, + uv_stride: u32, + ) -> Result<&wgpu::TextureView, YuvConversionError> { + let (effective_width, effective_height, _downscaled) = + validate_dimensions(width, height, self.gpu_max_texture_size)?; + self.ensure_texture_size(device, effective_width, effective_height); + self.swap_output_buffer(); + + upload_plane_with_stride(queue, &self.y_texture, y_data, width, height, y_stride, "Y")?; + + let half_height = height / 2; + let expected_uv_size = (uv_stride * half_height) as usize; + if uv_data.len() < expected_uv_size { + return Err(YuvConversionError::PlaneSizeMismatch { + plane: "UV", + expected: expected_uv_size, + actual: uv_data.len(), + }); + } + + queue.write_texture( + wgpu::TexelCopyTextureInfo { + texture: &self.uv_texture, + mip_level: 0, + origin: wgpu::Origin3d::ZERO, + aspect: wgpu::TextureAspect::All, + }, + uv_data, + wgpu::TexelCopyBufferLayout { + offset: 0, + bytes_per_row: Some(uv_stride), + rows_per_image: Some(half_height), + }, + wgpu::Extent3d { + width: width / 2, + height: half_height, + depth_or_array_layers: 1, + }, + ); + + let output_index = self.current_output; + let bind_group = self.bind_group_cache.get_or_create_nv12( + device, + &self.pipelines.nv12_bind_group_layout, + &self.y_view, + &self.uv_view, + &self.output_views[output_index], + output_index, + self.allocated_width, + self.allocated_height, + ); + + { + let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { + label: Some("NV12 Conversion Pass (Batched)"), + ..Default::default() + }); + compute_pass.set_pipeline(&self.pipelines.nv12_pipeline); + compute_pass.set_bind_group(0, bind_group, &[]); + compute_pass.dispatch_workgroups(width.div_ceil(8), height.div_ceil(8), 1); + } + + Ok(self.current_output_view()) + } + #[cfg(target_os = "macos")] pub fn convert_nv12_from_iosurface( &mut self, @@ -913,6 +988,75 @@ impl YuvToRgbaConverter { Ok(self.current_output_view()) } + #[allow(clippy::too_many_arguments)] + pub fn convert_yuv420p_to_encoder( + &mut self, + device: &wgpu::Device, + queue: &wgpu::Queue, + encoder: &mut wgpu::CommandEncoder, + y_data: &[u8], + u_data: &[u8], + v_data: &[u8], + width: u32, + height: u32, + y_stride: u32, + uv_stride: u32, + ) -> Result<&wgpu::TextureView, YuvConversionError> { + let (effective_width, effective_height, _downscaled) = + validate_dimensions(width, height, self.gpu_max_texture_size)?; + self.ensure_texture_size(device, effective_width, effective_height); + self.swap_output_buffer(); + + upload_plane_with_stride(queue, &self.y_texture, y_data, width, height, y_stride, "Y")?; + + let half_width = width / 2; + let half_height = height / 2; + + upload_plane_with_stride( + queue, + &self.u_texture, + u_data, + half_width, + half_height, + uv_stride, + "U", + )?; + upload_plane_with_stride( + queue, + &self.v_texture, + v_data, + half_width, + half_height, + uv_stride, + "V", + )?; + + let output_index = self.current_output; + let bind_group = self.bind_group_cache.get_or_create_yuv420p( + device, + &self.pipelines.yuv420p_bind_group_layout, + &self.y_view, + &self.u_view, + &self.v_view, + &self.output_views[output_index], + output_index, + self.allocated_width, + self.allocated_height, + ); + + { + let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { + label: Some("YUV420P Conversion Pass (Batched)"), + ..Default::default() + }); + compute_pass.set_pipeline(&self.pipelines.yuv420p_pipeline); + compute_pass.set_bind_group(0, bind_group, &[]); + compute_pass.dispatch_workgroups(width.div_ceil(8), height.div_ceil(8), 1); + } + + Ok(self.current_output_view()) + } + #[cfg(target_os = "windows")] #[allow(clippy::too_many_arguments)] pub fn convert_nv12_from_d3d11_texture( diff --git a/crates/rendering/src/zoom_focus_interpolation.rs b/crates/rendering/src/zoom_focus_interpolation.rs index a622b9c7d0..c698026254 100644 --- a/crates/rendering/src/zoom_focus_interpolation.rs +++ b/crates/rendering/src/zoom_focus_interpolation.rs @@ -16,7 +16,7 @@ struct SmoothedFocusEvent { pub struct ZoomFocusInterpolator { events: Option>, - cursor_events: CursorEvents, + cursor_events: std::sync::Arc, cursor_smoothing: Option, screen_spring: ScreenMovementSpring, duration_secs: f64, @@ -31,7 +31,22 @@ impl ZoomFocusInterpolator { ) -> Self { Self { events: None, - cursor_events: cursor_events.clone(), + cursor_events: std::sync::Arc::new(cursor_events.clone()), + cursor_smoothing, + screen_spring, + duration_secs, + } + } + + pub fn new_arc( + cursor_events: std::sync::Arc, + cursor_smoothing: Option, + screen_spring: ScreenMovementSpring, + duration_secs: f64, + ) -> Self { + Self { + events: None, + cursor_events, cursor_smoothing, screen_spring, duration_secs,