Skip to content

Commit cfaa971

Browse files
committed
3dgs-offscreen: RenderToImage/RenderToDepthImage with GPU depth merge
gaussian_depth_merge.comp: - New compute shader: merges GS composite depth (R32F) and Filament scene depth (GL_DEPTH_COMPONENT32F, reversed-Z) into a normalised R16UI texture - Per-pixel min-depth merge in linear space; outputs [0,65535] uint16 - Bound at composite stage binding 14 (reuses radix UBO slot which is idle) FilamentRenderToBuffer::Render(): - Mirror FilamentRenderer interactive pipeline for offscreen captures: BeginFrame -> flushAndWait -> RenderGeometryStage -> render() -> flushAndWait -> RenderCompositeStage - Color readback: two parallel readPixels (Filament RGBA+UBYTE base, GS RGBA+FLOAT overlay) then CPU BlendPremultipliedSplatOverRgb8() - Depth readback: ReadMergedDepthToUint16Cpu() reads the R16UI merged texture via glGetTexImage; converts uint16 -> float [0,1] for the RenderToDepthImage callback; fallback to Filament-only depth if GS depth unavailable - Metal: readPixels always uses RGBA+UBYTE (no native RGB Metal format); alpha stripped when n_channels_==3 - EnableViewCaching(true) in SetDimensions when GS geometry present (required for render-target-based readPixels)
1 parent 6dce267 commit cfaa971

2 files changed

Lines changed: 322 additions & 13 deletions

File tree

cpp/open3d/visualization/rendering/filament/FilamentRenderToBuffer.cpp

Lines changed: 250 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -32,16 +32,57 @@
3232
#pragma warning(pop)
3333
#endif // _MSC_VER
3434

35+
#include <algorithm>
36+
#include <cstdint>
37+
#include <vector>
38+
3539
#include "open3d/utility/Logging.h"
3640
#include "open3d/visualization/rendering/filament/FilamentEngine.h"
3741
#include "open3d/visualization/rendering/filament/FilamentRenderer.h"
42+
#include "open3d/visualization/rendering/filament/FilamentResourceManager.h"
3843
#include "open3d/visualization/rendering/filament/FilamentScene.h"
3944
#include "open3d/visualization/rendering/filament/FilamentView.h"
45+
#include "open3d/visualization/rendering/gaussian_splat/GaussianSplatRenderer.h"
4046

4147
namespace open3d {
4248
namespace visualization {
4349
namespace rendering {
4450

51+
namespace {
52+
53+
/// Composite shader stores premultiplied RGB in \p gs_rgba; blend like ImGui
54+
/// \c One / \c OneMinusSrcAlpha over an opaque Filament base.
55+
void BlendPremultipliedSplatOverRgb8(
56+
uint8_t* base_rgb, int n_channels, const float* gs_rgba, int w, int h) {
57+
const int n = w * h;
58+
for (int i = 0; i < n; ++i) {
59+
const float fr = gs_rgba[i * 4 + 0];
60+
const float fg = gs_rgba[i * 4 + 1];
61+
const float fb = gs_rgba[i * 4 + 2];
62+
const float fa = gs_rgba[i * 4 + 3];
63+
const float br = static_cast<float>(base_rgb[i * n_channels + 0]) *
64+
(1.f / 255.f);
65+
const float bg = static_cast<float>(base_rgb[i * n_channels + 1]) *
66+
(1.f / 255.f);
67+
const float bb = static_cast<float>(base_rgb[i * n_channels + 2]) *
68+
(1.f / 255.f);
69+
const float r = fr + br * (1.f - fa);
70+
const float g = fg + bg * (1.f - fa);
71+
const float b = fb + bb * (1.f - fa);
72+
base_rgb[i * n_channels + 0] = static_cast<uint8_t>(
73+
std::min(std::max(r, 0.f), 1.f) * 255.f + 0.5f);
74+
base_rgb[i * n_channels + 1] = static_cast<uint8_t>(
75+
std::min(std::max(g, 0.f), 1.f) * 255.f + 0.5f);
76+
base_rgb[i * n_channels + 2] = static_cast<uint8_t>(
77+
std::min(std::max(b, 0.f), 1.f) * 255.f + 0.5f);
78+
if (n_channels == 4) {
79+
base_rgb[i * n_channels + 3] = 255;
80+
}
81+
}
82+
}
83+
84+
} // namespace
85+
4586
FilamentRenderToBuffer::FilamentRenderToBuffer(filament::Engine& engine)
4687
: engine_(engine) {
4788
renderer_ = engine_.createRenderer();
@@ -121,6 +162,12 @@ void FilamentRenderToBuffer::SetDimensions(const std::uint32_t width,
121162
width_ = width;
122163
height_ = height;
123164

165+
// Allocate cached Filament color/depth attachments for Gaussian splat
166+
// zero-copy and for readPixels of the Filament base pass.
167+
if (scene_ && scene_->HasGaussianSplatGeometry()) {
168+
view_->EnableViewCaching(true);
169+
}
170+
124171
if (depth_image_) {
125172
buffer_size_ = width * height * sizeof(std::float_t);
126173
} else {
@@ -173,31 +220,221 @@ void FilamentRenderToBuffer::ReadPixelsCallback(void*, size_t, void* user) {
173220
delete params;
174221
}
175222

223+
// Ordering mirrors FilamentRenderer::{BeginFrame,Draw,EndFrame}.
224+
// Stage 1 (Geometry) runs before Filament's beginFrame.
225+
// Stage 2 (Composite) runs after render() on non-Apple, after endFrame() on
226+
// Apple.
176227
void FilamentRenderToBuffer::Render() {
177228
frame_done_ = false;
178229
scene_->HideRefractedMaterials();
230+
231+
const bool has_gaussian =
232+
gaussian_splat_renderer_ && scene_->HasGaussianSplatGeometry();
233+
const bool run_gs_pipeline = has_gaussian;
234+
235+
if (run_gs_pipeline) {
236+
gaussian_splat_renderer_->RequestRedrawForView(*view_);
237+
if (depth_image_) {
238+
// Signal that a depth readback is needed so the composite pass
239+
// allocates and populates the merged_depth_u16_tex scratch texture.
240+
gaussian_splat_renderer_->RequestDepthReadbackForView(*view_, true);
241+
}
242+
gaussian_splat_renderer_->BeginFrame();
243+
#if !defined(__APPLE__)
244+
// Drain Filament work before Gaussian compute dispatches (shared
245+
// GL/Vulkan queue on non-Apple backends).
246+
engine_.flushAndWait();
247+
#endif
248+
gaussian_splat_renderer_->RenderGeometryStage(*view_, *scene_);
249+
}
250+
179251
if (renderer_->beginFrame(swapchain_)) {
180252
renderer_->render(view_->GetNativeView());
181253

254+
#if !defined(__APPLE__)
255+
if (run_gs_pipeline) {
256+
engine_.flushAndWait();
257+
gaussian_splat_renderer_->RenderCompositeStage(*view_);
258+
}
259+
#endif
260+
182261
using namespace filament;
183262
using namespace backend;
184263

185-
auto format = (n_channels_ == 3 ? PixelDataFormat::RGB
186-
: PixelDataFormat::RGBA);
187-
auto type = PixelDataType::UBYTE;
188-
if (depth_image_) {
189-
format = PixelDataFormat::DEPTH_COMPONENT;
190-
type = PixelDataType::FLOAT;
191-
}
192-
auto user_param = new PBDParams(this, callback_);
193-
PixelBufferDescriptor pd(buffer_, buffer_size_, format, type,
194-
ReadPixelsCallback, user_param);
195264
auto vp = view_->GetNativeView()->getViewport();
196265

197-
renderer_->readPixels(vp.left, vp.bottom, vp.width, vp.height,
198-
std::move(pd));
199-
200266
renderer_->endFrame();
267+
268+
#if defined(__APPLE__)
269+
if (run_gs_pipeline) {
270+
gaussian_splat_renderer_->RenderCompositeStage(*view_);
271+
}
272+
#endif
273+
274+
engine_.flushAndWait();
275+
276+
auto* resource_mgr = &EngineInstance::GetResourceManager();
277+
278+
RenderTargetHandle view_rt_h = view_->GetRenderTargetHandle();
279+
filament::RenderTarget* native_view_rt = nullptr;
280+
if (view_rt_h) {
281+
auto weak_vrt = resource_mgr->GetRenderTarget(view_rt_h);
282+
if (auto vrt = weak_vrt.lock()) {
283+
native_view_rt = vrt.get();
284+
}
285+
}
286+
287+
RenderTargetHandle gs_rt =
288+
run_gs_pipeline
289+
? gaussian_splat_renderer_->GetColorReadbackRT(*view_)
290+
: RenderTargetHandle();
291+
filament::RenderTarget* native_gs_rt = nullptr;
292+
if (gs_rt) {
293+
auto weak_rt = resource_mgr->GetRenderTarget(gs_rt);
294+
if (auto rt_sptr = weak_rt.lock()) {
295+
native_gs_rt = rt_sptr.get();
296+
}
297+
}
298+
299+
if (!depth_image_ && run_gs_pipeline && native_view_rt) {
300+
// Issue both readPixels (base + GS overlay) together, then do one
301+
// more flushAndWait to collect both callbacks synchronously.
302+
//
303+
// Metal readPixels from a render target only supports RGBA+UBYTE,
304+
// not RGB+UBYTE (Metal has no native RGB texture format). Always
305+
// read RGBA8 for the base and strip alpha when n_channels_==3.
306+
// On GL, RGBA also works fine — use one path for both backends.
307+
308+
const size_t n_pixels = static_cast<size_t>(width_) * height_;
309+
310+
// Scratch buffers for the two parallel readPixels callbacks.
311+
std::vector<uint8_t> base_rgba(n_pixels * 4);
312+
std::vector<float> gs_f32;
313+
314+
PixelBufferDescriptor base_pd(
315+
base_rgba.data(), base_rgba.size(), PixelDataFormat::RGBA,
316+
PixelDataType::UBYTE, [](void*, size_t, void*) {}, nullptr);
317+
renderer_->readPixels(native_view_rt, vp.left, vp.bottom, vp.width,
318+
vp.height, std::move(base_pd));
319+
320+
if (native_gs_rt) {
321+
gs_f32.resize(n_pixels * 4);
322+
PixelBufferDescriptor gs_pd(
323+
gs_f32.data(), gs_f32.size() * sizeof(float),
324+
PixelDataFormat::RGBA, PixelDataType::FLOAT,
325+
[](void*, size_t, void*) {}, nullptr);
326+
renderer_->readPixels(native_gs_rt, vp.left, vp.bottom,
327+
vp.width, vp.height, std::move(gs_pd));
328+
}
329+
330+
// One more flush ensures both callbacks complete before we proceed.
331+
engine_.flushAndWait();
332+
333+
// Unpack RGBA8 base → output buffer (strip alpha for RGB).
334+
const uint8_t* src = base_rgba.data();
335+
uint8_t* dst = buffer_;
336+
const int nc = static_cast<int>(n_channels_);
337+
const int np = static_cast<int>(n_pixels);
338+
for (int i = 0; i < np; ++i) {
339+
dst[i * nc + 0] = src[i * 4 + 0];
340+
dst[i * nc + 1] = src[i * 4 + 1];
341+
dst[i * nc + 2] = src[i * 4 + 2];
342+
if (nc == 4) dst[i * nc + 3] = src[i * 4 + 3];
343+
}
344+
if (native_gs_rt && !gs_f32.empty()) {
345+
BlendPremultipliedSplatOverRgb8(buffer_, nc, gs_f32.data(),
346+
int(width_), int(height_));
347+
}
348+
349+
// Deliver result now; the BeginFrame flushAndWait is a no-op since
350+
// all GPU work has already been collected above.
351+
if (callback_) {
352+
callback_({static_cast<std::size_t>(width_),
353+
static_cast<std::size_t>(height_),
354+
static_cast<std::size_t>(n_channels_), buffer_,
355+
buffer_size_});
356+
callback_ = nullptr;
357+
}
358+
frame_done_ = true;
359+
} else if (depth_image_ && run_gs_pipeline &&
360+
gaussian_splat_renderer_) {
361+
// GPU-merged depth path: the composite pass has already merged
362+
// GS and Filament depth into a normalised R16UI texture.
363+
// Read it back directly — no CPU merge required.
364+
std::vector<std::uint16_t> merged_u16;
365+
const bool got_merged =
366+
gaussian_splat_renderer_->ReadMergedDepthToUint16Cpu(
367+
*view_, merged_u16,
368+
static_cast<std::uint32_t>(width_),
369+
static_cast<std::uint32_t>(height_)) &&
370+
merged_u16.size() == width_ * height_;
371+
if (got_merged) {
372+
// Convert normalised uint16 [0,65535] -> Filament inverse
373+
// depth [0,1]. Renderer::RenderToDepthImage applies the final
374+
// user-facing conversion for z_in_view_space/normalized modes.
375+
float* dst = reinterpret_cast<float*>(buffer_);
376+
for (size_t i = 0; i < merged_u16.size(); ++i) {
377+
dst[i] = merged_u16[i] / 65535.f;
378+
}
379+
if (callback_) {
380+
callback_({static_cast<std::size_t>(width_),
381+
static_cast<std::size_t>(height_), 1u, buffer_,
382+
buffer_size_});
383+
callback_ = nullptr;
384+
}
385+
frame_done_ = true;
386+
} else {
387+
// Try GS-only composite depth (R32F) when no scene depth
388+
// was available for merging.
389+
std::vector<float> gs_depth;
390+
const bool got_gs_depth =
391+
gaussian_splat_renderer_->ReadCompositeDepthToFloatCpu(
392+
*view_, gs_depth,
393+
static_cast<std::uint32_t>(width_),
394+
static_cast<std::uint32_t>(height_)) &&
395+
gs_depth.size() == width_ * height_;
396+
if (got_gs_depth) {
397+
float* dst = reinterpret_cast<float*>(buffer_);
398+
std::copy(gs_depth.begin(), gs_depth.end(), dst);
399+
if (callback_) {
400+
callback_({static_cast<std::size_t>(width_),
401+
static_cast<std::size_t>(height_), 1u,
402+
buffer_, buffer_size_});
403+
callback_ = nullptr;
404+
}
405+
frame_done_ = true;
406+
} else {
407+
// Final fallback: Filament depth only via readPixels
408+
// (backend unsupported or no GS depth available).
409+
auto* user_param = new PBDParams(this, callback_);
410+
PixelBufferDescriptor pd(buffer_, buffer_size_,
411+
PixelDataFormat::DEPTH_COMPONENT,
412+
PixelDataType::FLOAT,
413+
ReadPixelsCallback, user_param);
414+
renderer_->readPixels(vp.left, vp.bottom, vp.width,
415+
vp.height, std::move(pd));
416+
}
417+
}
418+
} else {
419+
if (!depth_image_ && run_gs_pipeline && !native_view_rt) {
420+
utility::LogWarning(
421+
"Gaussian splat offscreen: FilamentView has no render "
422+
"target; expected EnableViewCaching. Reading the "
423+
"swapchain — splat composite may be missing.");
424+
}
425+
auto format = (n_channels_ == 3 ? PixelDataFormat::RGB
426+
: PixelDataFormat::RGBA);
427+
auto type = PixelDataType::UBYTE;
428+
if (depth_image_) {
429+
format = PixelDataFormat::DEPTH_COMPONENT;
430+
type = PixelDataType::FLOAT;
431+
}
432+
auto* user_param = new PBDParams(this, callback_);
433+
PixelBufferDescriptor pd(buffer_, buffer_size_, format, type,
434+
ReadPixelsCallback, user_param);
435+
renderer_->readPixels(vp.left, vp.bottom, vp.width, vp.height,
436+
std::move(pd));
437+
}
201438
}
202439
scene_->HideRefractedMaterials(false);
203440

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
// ----------------------------------------------------------------------------
2+
// - Open3D: www.open3d.org -
3+
// ----------------------------------------------------------------------------
4+
// Copyright (c) 2018-2024 www.open3d.org
5+
// SPDX-License-Identifier: MIT
6+
// ----------------------------------------------------------------------------
7+
8+
// Gaussian splat depth-merge pass.
9+
//
10+
// Merges the GS composite inverse depth (R32F, gs.composite_depth) with
11+
// Filament's reversed-Z scene depth to produce a single normalised R16UI depth
12+
// map suitable for CPU readback via glGetTexImage / GL_RED_INTEGER /
13+
// GL_UNSIGNED_SHORT.
14+
//
15+
// Output: nearest depth from either source, normalised to [0, 65535] where
16+
// 65535 → near-plane depth, 0 → far-plane depth (Filament convention).
17+
//
18+
// The pass is optional and is only dispatched when merged_depth_u16_tex is
19+
// allocated (i.e. an offscreen RenderToDepthImage is in progress and scene
20+
// depth is available). When no scene depth is present the GS composite depth
21+
// is read back directly and this shader is skipped.
22+
//
23+
#version 460 core
24+
25+
layout(local_size_x = 16, local_size_y = 16) in;
26+
27+
// View-params UBO: same layout as every other GS compute shader.
28+
// Provides near/far planes and viewport size.
29+
layout(std140, binding = 0) uniform GaussianViewParams {
30+
mat4 world_from_model;
31+
mat4 view_from_world;
32+
mat4 clip_from_view;
33+
vec4 camera_position_and_near;
34+
vec4 viewport_origin_and_size;
35+
uvec4 scene;
36+
uvec4 tiles;
37+
uvec4 limits;
38+
// depth_range_and_flags: x=near, y=far, z=reserved, w=reserved
39+
vec4 depth_range_and_flags;
40+
}
41+
view_params;
42+
43+
// GS composite depth (Filament inverse convention: near=1, far=0) written by
44+
// gaussian_composite.comp via Linear01ToInverse(). Sampled via texelFetch.
45+
// Binding 15: must stay in 0..15 for Metal (texture/sampler index limit); UBO
46+
// remains at 0 and does not share the sampler binding space.
47+
layout(binding = 15) uniform sampler2D gs_depth;
48+
49+
// Output: merged nearest depth, normalised uint16 [0, 65535].
50+
layout(binding = 1, r16ui) uniform writeonly uimage2D merged_depth;
51+
52+
// Filament reversed-Z scene depth. Binding 14 matches gaussian_composite.comp
53+
// so the same descriptor set works for both shaders.
54+
layout(binding = 14) uniform sampler2D scene_depth;
55+
56+
void main() {
57+
ivec2 pixel = ivec2(gl_GlobalInvocationID.xy);
58+
ivec2 vp = ivec2(view_params.viewport_origin_and_size.zw);
59+
if (pixel.x >= vp.x || pixel.y >= vp.y) return;
60+
61+
// Both depth inputs are in Filament reversed/inverse depth convention.
62+
float gs_inv = texelFetch(gs_depth, pixel, 0).r;
63+
float sc_inv = texelFetch(scene_depth, pixel, 0).r;
64+
65+
// In inverse depth, larger means nearer.
66+
float merged = max(gs_inv, sc_inv);
67+
68+
// Normalise to uint16 preserving Filament convention:
69+
// near -> 65535, far -> 0.
70+
uint u16 = uint(clamp(merged, 0.0, 1.0) * 65535.0 + 0.5);
71+
imageStore(merged_depth, pixel, uvec4(u16, 0u, 0u, 0u));
72+
}

0 commit comments

Comments
 (0)