Skip to content

Commit 6a6da93

Browse files
committed
perf(rendering): NV12 stride alignment and shared pipelines
1 parent d740ddb commit 6a6da93

4 files changed

Lines changed: 154 additions & 20 deletions

File tree

crates/rendering/src/frame_pipeline.rs

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ pub struct RgbaToNv12Converter {
1717
pending: Option<PendingNv12Readback>,
1818
cached_width: u32,
1919
cached_height: u32,
20+
cached_stride: u32,
2021
cached_bind_groups: Option<[wgpu::BindGroup; 2]>,
2122
cached_texture_view: Option<wgpu::TextureView>,
2223
cached_texture_ptr: usize,
@@ -108,20 +109,31 @@ impl RgbaToNv12Converter {
108109
pending: None,
109110
cached_width: 0,
110111
cached_height: 0,
112+
cached_stride: 0,
111113
cached_bind_groups: None,
112114
cached_texture_view: None,
113115
cached_texture_ptr: 0,
114116
}
115117
}
116118

119+
fn aligned_stride(width: u32) -> u32 {
120+
(width + 3) & !3
121+
}
122+
117123
fn nv12_size(width: u32, height: u32) -> u64 {
118-
let y_size = (width as u64) * (height as u64);
119-
let uv_size = (width as u64) * (height as u64 / 2);
124+
let stride = Self::aligned_stride(width) as u64;
125+
let aligned_height = ((height + 1) & !1) as u64;
126+
let y_size = stride * aligned_height;
127+
let uv_size = stride * (aligned_height / 2);
120128
y_size + uv_size
121129
}
122130

123131
fn ensure_buffers(&mut self, device: &wgpu::Device, width: u32, height: u32) {
124-
if self.cached_width == width && self.cached_height == height {
132+
let stride = Self::aligned_stride(width);
133+
if self.cached_width == width
134+
&& self.cached_height == height
135+
&& self.cached_stride == stride
136+
{
125137
return;
126138
}
127139

@@ -148,6 +160,7 @@ impl RgbaToNv12Converter {
148160
self.current_readback = 0;
149161
self.cached_width = width;
150162
self.cached_height = height;
163+
self.cached_stride = stride;
151164
self.cached_bind_groups = None;
152165
self.cached_texture_view = None;
153166
self.cached_texture_ptr = 0;
@@ -165,7 +178,7 @@ impl RgbaToNv12Converter {
165178
frame_number: u32,
166179
frame_rate: u32,
167180
) -> bool {
168-
if width == 0 || height == 0 || !width.is_multiple_of(4) || !height.is_multiple_of(2) {
181+
if width == 0 || height == 0 {
169182
return false;
170183
}
171184

@@ -182,8 +195,8 @@ impl RgbaToNv12Converter {
182195
};
183196
self.current_readback = 1 - self.current_readback;
184197

185-
let y_stride = width;
186-
let uv_stride = width;
198+
let y_stride = Self::aligned_stride(width);
199+
let uv_stride = Self::aligned_stride(width);
187200

188201
let params = Nv12Params {
189202
width,

crates/rendering/src/layers/camera.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ pub struct CameraLayer {
2121
}
2222

2323
impl CameraLayer {
24+
#[allow(dead_code)]
2425
pub fn new(device: &wgpu::Device) -> Self {
2526
Self::new_with_all_shared_pipelines(
2627
device,

crates/rendering/src/layers/display.rs

Lines changed: 95 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
use cap_project::XY;
22

3+
use std::sync::Arc;
4+
35
use crate::{
46
DecodedSegmentFrames, PixelFormat,
57
composite_frame::{CompositeVideoFramePipeline, CompositeVideoFrameUniforms},
6-
yuv_converter::YuvToRgbaConverter,
8+
yuv_converter::{YuvConverterPipelines, YuvToRgbaConverter},
79
};
810

911
struct PendingTextureCopy {
@@ -17,7 +19,7 @@ pub struct DisplayLayer {
1719
frame_texture_views: [wgpu::TextureView; 2],
1820
current_texture: usize,
1921
uniforms_buffer: wgpu::Buffer,
20-
pipeline: CompositeVideoFramePipeline,
22+
pipeline: std::sync::Arc<CompositeVideoFramePipeline>,
2123
bind_groups: [Option<wgpu::BindGroup>; 2],
2224
last_recording_time: Option<f32>,
2325
yuv_converter: YuvToRgbaConverter,
@@ -32,19 +34,32 @@ impl DisplayLayer {
3234
}
3335

3436
pub fn new_with_options(device: &wgpu::Device, prefer_cpu_conversion: bool) -> Self {
37+
Self::new_with_all_shared_pipelines(
38+
device,
39+
Arc::new(YuvConverterPipelines::new(device)),
40+
Arc::new(CompositeVideoFramePipeline::new(device)),
41+
prefer_cpu_conversion,
42+
)
43+
}
44+
45+
pub fn new_with_all_shared_pipelines(
46+
device: &wgpu::Device,
47+
yuv_pipelines: Arc<YuvConverterPipelines>,
48+
composite_pipeline: Arc<CompositeVideoFramePipeline>,
49+
prefer_cpu_conversion: bool,
50+
) -> Self {
3551
let frame_texture_0 = CompositeVideoFramePipeline::create_frame_texture(device, 1920, 1080);
3652
let frame_texture_1 = CompositeVideoFramePipeline::create_frame_texture(device, 1920, 1080);
3753
let frame_texture_view_0 = frame_texture_0.create_view(&Default::default());
3854
let frame_texture_view_1 = frame_texture_1.create_view(&Default::default());
3955

4056
let uniforms_buffer = CompositeVideoFrameUniforms::default().to_buffer(device);
41-
let pipeline = CompositeVideoFramePipeline::new(device);
4257
let bind_group_0 =
43-
Some(pipeline.bind_group(device, &uniforms_buffer, &frame_texture_view_0));
58+
Some(composite_pipeline.bind_group(device, &uniforms_buffer, &frame_texture_view_0));
4459
let bind_group_1 =
45-
Some(pipeline.bind_group(device, &uniforms_buffer, &frame_texture_view_1));
60+
Some(composite_pipeline.bind_group(device, &uniforms_buffer, &frame_texture_view_1));
4661

47-
let yuv_converter = YuvToRgbaConverter::new(device);
62+
let yuv_converter = YuvToRgbaConverter::new_with_shared_pipelines(device, yuv_pipelines);
4863

4964
if prefer_cpu_conversion {
5065
tracing::info!("DisplayLayer initialized with CPU YUV conversion preference");
@@ -55,7 +70,7 @@ impl DisplayLayer {
5570
frame_texture_views: [frame_texture_view_0, frame_texture_view_1],
5671
current_texture: 0,
5772
uniforms_buffer,
58-
pipeline,
73+
pipeline: composite_pipeline,
5974
bind_groups: [bind_group_0, bind_group_1],
6075
last_recording_time: None,
6176
yuv_converter,
@@ -64,6 +79,55 @@ impl DisplayLayer {
6479
}
6580
}
6681

82+
#[cfg(target_os = "windows")]
83+
fn try_d3d11_staging_fallback(
84+
&mut self,
85+
device: &wgpu::Device,
86+
queue: &wgpu::Queue,
87+
screen_frame: &crate::DecodedFrame,
88+
actual_width: u32,
89+
actual_height: u32,
90+
next_texture: usize,
91+
) -> bool {
92+
let Some(nv12_texture) = screen_frame.d3d11_texture_backing() else {
93+
return false;
94+
};
95+
96+
let Ok(d3d11_device) = (unsafe { nv12_texture.GetDevice() }) else {
97+
return false;
98+
};
99+
100+
let Ok(d3d11_context) = (unsafe { d3d11_device.GetImmediateContext() }) else {
101+
return false;
102+
};
103+
104+
if self
105+
.yuv_converter
106+
.convert_nv12_with_fallback(
107+
device,
108+
queue,
109+
&d3d11_device,
110+
&d3d11_context,
111+
nv12_texture,
112+
screen_frame.d3d11_y_handle(),
113+
screen_frame.d3d11_uv_handle(),
114+
actual_width,
115+
actual_height,
116+
)
117+
.is_ok()
118+
&& self.yuv_converter.output_texture().is_some()
119+
{
120+
self.pending_copy = Some(PendingTextureCopy {
121+
width: actual_width,
122+
height: actual_height,
123+
dst_texture_index: next_texture,
124+
});
125+
true
126+
} else {
127+
false
128+
}
129+
}
130+
67131
pub fn prepare(
68132
&mut self,
69133
device: &wgpu::Device,
@@ -282,7 +346,14 @@ impl DisplayLayer {
282346
false
283347
}
284348
} else {
285-
false
349+
self.try_d3d11_staging_fallback(
350+
device,
351+
queue,
352+
screen_frame,
353+
actual_width,
354+
actual_height,
355+
next_texture,
356+
)
286357
}
287358
}
288359

@@ -541,7 +612,14 @@ impl DisplayLayer {
541612
Err(_) => false,
542613
}
543614
} else {
544-
false
615+
self.try_d3d11_staging_fallback(
616+
device,
617+
queue,
618+
screen_frame,
619+
actual_width,
620+
actual_height,
621+
next_texture,
622+
)
545623
}
546624
} else if let (Some(y_data), Some(uv_data)) =
547625
(screen_frame.y_plane(), screen_frame.uv_plane())
@@ -575,7 +653,14 @@ impl DisplayLayer {
575653
Err(_) => false,
576654
}
577655
} else {
578-
false
656+
self.try_d3d11_staging_fallback(
657+
device,
658+
queue,
659+
screen_frame,
660+
actual_width,
661+
actual_height,
662+
next_texture,
663+
)
579664
}
580665

581666
#[cfg(not(target_os = "windows"))]

crates/rendering/src/lib.rs

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1455,7 +1455,7 @@ impl ProjectUniforms {
14551455
let height_scale = resolution_base.y as f32 / base_height as f32;
14561456
let scale = width_scale.min(height_scale);
14571457

1458-
let scaled_width = ((base_width as f32 * scale) as u32 + 1) & !1;
1458+
let scaled_width = ((base_width as f32 * scale) as u32 + 3) & !3;
14591459
let scaled_height = ((base_height as f32 * scale) as u32 + 1) & !1;
14601460
(scaled_width, scaled_height)
14611461
}
@@ -2320,6 +2320,24 @@ impl<'a> FrameRenderer<'a> {
23202320
}
23212321
}
23222322

2323+
pub async fn render_immediate_nv12(
2324+
&mut self,
2325+
segment_frames: DecodedSegmentFrames,
2326+
uniforms: ProjectUniforms,
2327+
cursor: &CursorEvents,
2328+
layers: &mut RendererLayers,
2329+
) -> Result<frame_pipeline::Nv12RenderedFrame, RenderingError> {
2330+
if let Some(frame) = self
2331+
.render_nv12(segment_frames, uniforms, cursor, layers)
2332+
.await?
2333+
{
2334+
return Ok(frame);
2335+
}
2336+
self.flush_pipeline_nv12()
2337+
.await
2338+
.unwrap_or(Err(RenderingError::BufferMapWaitingFailed))
2339+
}
2340+
23232341
pub async fn flush_pipeline_nv12(
23242342
&mut self,
23252343
) -> Option<Result<frame_pipeline::Nv12RenderedFrame, RenderingError>> {
@@ -2443,13 +2461,30 @@ impl RendererLayers {
24432461
queue: &wgpu::Queue,
24442462
prefer_cpu_conversion: bool,
24452463
) -> Self {
2464+
let shared_yuv_pipelines = Arc::new(yuv_converter::YuvConverterPipelines::new(device));
2465+
let shared_composite_pipeline =
2466+
Arc::new(composite_frame::CompositeVideoFramePipeline::new(device));
2467+
24462468
Self {
24472469
background: BackgroundLayer::new(device),
24482470
background_blur: BlurLayer::new(device),
2449-
display: DisplayLayer::new_with_options(device, prefer_cpu_conversion),
2471+
display: DisplayLayer::new_with_all_shared_pipelines(
2472+
device,
2473+
shared_yuv_pipelines.clone(),
2474+
shared_composite_pipeline.clone(),
2475+
prefer_cpu_conversion,
2476+
),
24502477
cursor: CursorLayer::new(device),
2451-
camera: CameraLayer::new(device),
2452-
camera_only: CameraLayer::new(device),
2478+
camera: CameraLayer::new_with_all_shared_pipelines(
2479+
device,
2480+
shared_yuv_pipelines.clone(),
2481+
shared_composite_pipeline.clone(),
2482+
),
2483+
camera_only: CameraLayer::new_with_all_shared_pipelines(
2484+
device,
2485+
shared_yuv_pipelines,
2486+
shared_composite_pipeline,
2487+
),
24532488
mask: MaskLayer::new(device),
24542489
text: TextLayer::new(device, queue),
24552490
captions: CaptionsLayer::new(device, queue),

0 commit comments

Comments
 (0)