Skip to content

Commit 65d4471

Browse files
refactored Command Buffer logic (#506)
* refactored Command Buffer logic * introduced render thread * fixed clang format * bounded to 3x buffering * improved render thread communication * removed mailbox cond_variable and mutex * improved swapchain resizing logic * fixed resizing issue on macos * fixed resizing issue on macos * fixed resizing issue on macos * switched from combined image to sampled image and sampler * switched from combined image to sampled image and sampler * fixed gpu crash on intel driver * fixed fence hanging * fixed fence hanging * fixed code formating * fixed async upload with transfer support * added support of clearing async job * fixed windows resizing crash
1 parent ca1c7a1 commit 65d4471

40 files changed

+2769
-1503
lines changed

CMakePresets.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
"BUILD_SHARED_LIBS": "OFF",
1717
"FETCHCONTENT_QUIET": "OFF",
1818

19+
"FMT_MODULE": "OFF",
1920
"VULKAN_HEADERS_ENABLE_INSTALL": "ON",
2021
"ENTT_INCLUDE_HEADERS": "ON",
2122

Obelisk/EntryPoint.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include <ZEngine/Applications/GameApplication.h>
44
#include <ZEngine/Core/Memory/MemoryManager.h>
55
#include <ZEngine/EngineConfiguration.h>
6+
#include <ZEngine/Helpers/ThreadPool.h>
67
#include <ZEngine/Logging/Logger.h>
78

89
#ifdef ZENGINE_PLATFORM
@@ -22,6 +23,8 @@ int applicationEntryPoint(int argc, char* argv[])
2223
LoggerConfiguration logger_cfg = {};
2324
Logger::Initialize(arena, logger_cfg);
2425

26+
Helpers::ThreadPoolHelper::Initialize();
27+
2528
GameApplicationPtr app = nullptr;
2629

2730
CLI::App cli{"ObeliskCLI"};

Resources/Shaders/fragment_common.glsl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,8 @@ layout(std140, set = 0, binding = 5) readonly buffer MatSB
6161
}
6262
MaterialDataBuffer;
6363

64-
layout(set = 1, binding = 0) uniform sampler2D TextureArray[];
64+
layout(set = 1, binding = 0) uniform texture2D TextureArray[];
65+
layout(set = 1, binding = 1) uniform sampler LinearWrapSampler;
6566

6667
MaterialData FetchMaterial(uint dataIndex)
6768
{

Resources/Shaders/g_buffer.frag

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,18 +24,18 @@ void main()
2424
if (material.AlbedoMap < INVALID_MAP_HANDLE)
2525
{
2626
uint texId = uint(material.AlbedoMap);
27-
OutAlbedo = texture(TextureArray[nonuniformEXT(texId)], TexCoord);
27+
OutAlbedo = texture(sampler2D(TextureArray[nonuniformEXT(texId)], LinearWrapSampler), TexCoord);
2828
}
2929

3030
if (material.SpecularMap < INVALID_MAP_HANDLE)
3131
{
3232
uint texId = uint(material.SpecularMap);
33-
OutSpecular = texture(TextureArray[nonuniformEXT(texId)], TexCoord);
33+
OutSpecular = texture(sampler2D(TextureArray[nonuniformEXT(texId)], LinearWrapSampler), TexCoord);
3434
}
3535

3636
if (material.NormalMap < INVALID_MAP_HANDLE)
3737
{
3838
uint texId = uint(material.NormalMap);
39-
OutNormal = texture(TextureArray[nonuniformEXT(texId)], TexCoord).rgb;
39+
OutNormal = texture(sampler2D(TextureArray[nonuniformEXT(texId)], LinearWrapSampler), TexCoord).rgb;
4040
}
41-
}
41+
}

Resources/Shaders/imgui.frag

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
#version 460 core
22
#extension GL_EXT_nonuniform_qualifier : require
3-
#extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable
43

54
layout(location = 0) out vec4 fColor;
6-
layout(set = 0, binding = 0) uniform sampler2D _unused;
7-
layout(set = 1, binding = 0) uniform sampler2D TextureArray[];
5+
layout(set = 0, binding = 0) uniform sampler _unused;
6+
layout(set = 1, binding = 0) uniform texture2D TextureArray[];
7+
layout(set = 1, binding = 1) uniform sampler LinearWrapSampler;
88

99
layout(location = 0) in struct
1010
{
@@ -14,10 +14,7 @@ layout(location = 0) in struct
1414

1515
void main()
1616
{
17-
uint texId = uint(In.TexData.z);
18-
if (texId < 0xFFFFFFFFu)
19-
{
20-
vec4 texVal = texture(TextureArray[nonuniformEXT(texId)], In.TexData.xy);
21-
fColor = In.Color * texVal;
22-
}
23-
}
17+
uint texId = uint(floor(In.TexData.z + 0.5));
18+
vec4 texVal = texture(sampler2D(TextureArray[nonuniformEXT(texId)], LinearWrapSampler), In.TexData.xy);
19+
fColor = In.Color * texVal;
20+
}

Resources/Shaders/imgui.vert

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ layout(push_constant) uniform uPushConstant
88
vec2 uScale;
99
vec2 uTranslate;
1010
uint index;
11+
uint _padding;
1112
}
1213
pc;
1314

Resources/Shaders/skybox.frag

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@
22
layout(location = 0) in vec3 dir;
33
layout(location = 0) out vec4 outColor;
44

5-
layout(set = 0, binding = 1) uniform samplerCube EnvMap;
5+
layout(set = 0, binding = 1) uniform textureCube EnvMap;
6+
layout(set = 1, binding = 1) uniform sampler LinearWrapSampler;
67

78
void main()
89
{
9-
outColor = texture(EnvMap, dir);
10+
outColor = texture(samplerCube(EnvMap, LinearWrapSampler), dir);
1011
}

ZEngine/ZEngine/Applications/AppRenderPipeline.cpp

Lines changed: 127 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,30 @@
11
#include <AppRenderPipeline.h>
22
#include <Core/Containers/Array.h>
3+
#include <Rendering/Specifications/FormatSpecification.h>
34

45
using namespace ZEngine::Core::Containers;
56

67
namespace ZEngine::Applications
78
{
89
void AppRenderPipeline::Initialize(Hardwares::VulkanDevicePtr device)
910
{
10-
Device = device;
11+
Device = device;
12+
RenderWorkerThreadCount = Device->CommandBufferMgr->TotalThreadCount - 1u;
13+
UICommandBufferIndex = RenderMainThreadIndex + 1u;
14+
Device->Arena->CreateSubArena(ZMega(30), &LocalArena);
15+
1116
SceneRenderer = ZPushStructCtor(Device->Arena, Rendering::Renderers::GraphicRenderer);
1217
ImguiRenderer = ZPushStructCtor(Device->Arena, Rendering::Renderers::ImGUIRenderer);
13-
Device->Arena->CreateSubArena(ZMega(30), &LocalArena);
1418

1519
SceneRenderer->Initialize(Device);
1620
ImguiRenderer->Initialize(Device);
21+
22+
for (size_t i = 0; i < MaxMailBoxBufferCount; ++i)
23+
{
24+
RenderPayloads[i].UIOverlay.IndexedCmds.resize(100);
25+
RenderPayloads[i].UIOverlay.ScissorCmds.resize(100);
26+
RenderPayloads[i].UIOverlay.TextureIds.resize(100);
27+
}
1728
}
1829

1930
void AppRenderPipeline::Shutdown()
@@ -33,19 +44,45 @@ namespace ZEngine::Applications
3344

3445
void AppRenderPipeline::BeginFrame()
3546
{
36-
Device->NewFrame();
37-
CurrentCmdBuf = Device->GetCommandBuffer();
47+
auto swapchain = Device->SwapchainPtr;
48+
49+
swapchain->AcquireNextImage(CurrentMailBoxBufferHead);
50+
51+
for (uint8_t thread_idx = 0; thread_idx < Device->CommandBufferMgr->TotalThreadCount; ++thread_idx)
52+
{
53+
Device->CommandBufferMgr->ResetPool(swapchain->CurrentFrame->Index, thread_idx);
54+
Device->AsyncResLoader->ResetCommandBuffers(swapchain->CurrentFrame->Index, thread_idx);
55+
}
56+
57+
Device->AsyncResLoader->CompleteDeferrals();
58+
59+
// uint8_t render_worker_thread_idx = RenderThreadIndex + 1;
60+
// for (uint8_t worker_thread_idx = 0; worker_thread_idx < RenderWorkerThreadCount; ++worker_thread_idx)
61+
// {
62+
// auto thread_idx = render_worker_thread_idx + worker_thread_idx;
63+
// }
64+
CurrentCmdBuf = Device->CommandBufferMgr->GetCommandBuffer(Rendering::QueueType::GRAPHIC_QUEUE, swapchain->CurrentFrame->Index, RenderMainThreadIndex, 0, false);
65+
vkResetCommandBuffer(CurrentCmdBuf->GetHandle(), 0);
66+
CurrentCmdBuf->ResetState();
67+
CurrentCmdBuf->Begin();
3868
}
3969

4070
void AppRenderPipeline::EndFrame()
4171
{
42-
Device->EnqueueCommandBuffer(CurrentCmdBuf);
43-
Device->Present();
72+
Device->AsyncResLoader->SubmitAsyncJobs();
73+
Device->CommandBufferMgr->EnqueueBuffer(CurrentCmdBuf);
74+
Device->CommandBufferMgr->EndEnqueuedBuffers();
75+
76+
Device->SwapchainPtr->Present();
4477
}
4578

4679
void AppRenderPipeline::RenderScene(Rendering::Cameras::CameraPtr camera, Rendering::Scenes::RenderScenePtr scene)
4780
{
48-
if (scene->TransformBufferDirty[Device->CurrentFrameIndex].load(std::memory_order_acquire) || scene->MeshAllocationDirty[Device->CurrentFrameIndex].load(std::memory_order_acquire))
81+
auto swpachain = Device->SwapchainPtr;
82+
auto frame_index = swpachain->CurrentFrame->Index;
83+
auto thread_index = RenderMainThreadIndex;
84+
85+
if (scene->TransformBufferDirty[Device->SwapchainPtr->CurrentFrame->Index].load(std::memory_order_acquire) || scene->MeshAllocationDirty[Device->SwapchainPtr->CurrentFrame->Index].load(std::memory_order_acquire))
4986
{
5087
auto gpu_scene_data = SceneRenderer->RenderSceneData;
5188

@@ -56,21 +93,21 @@ namespace ZEngine::Applications
5693

5794
auto indirect_buffer_set = Device->IndirectBufferSetManager.Access(gpu_scene_data->IndirectBufferHandle);
5895

59-
auto vtx_buffer = vtx_buffer_set->At(Device->CurrentFrameIndex);
60-
auto idx_buffer = idx_buffer_set->At(Device->CurrentFrameIndex);
61-
auto transform_buffer = transform_buffer_set->At(Device->CurrentFrameIndex);
62-
auto rd_buffer = rd_buffer_set->At(Device->CurrentFrameIndex);
63-
auto indirect_buffer = indirect_buffer_set->At(Device->CurrentFrameIndex);
96+
auto vtx_buffer = vtx_buffer_set->At(Device->SwapchainPtr->CurrentFrame->Index);
97+
auto idx_buffer = idx_buffer_set->At(Device->SwapchainPtr->CurrentFrame->Index);
98+
auto transform_buffer = transform_buffer_set->At(Device->SwapchainPtr->CurrentFrame->Index);
99+
auto rd_buffer = rd_buffer_set->At(Device->SwapchainPtr->CurrentFrame->Index);
100+
auto indirect_buffer = indirect_buffer_set->At(Device->SwapchainPtr->CurrentFrame->Index);
64101

65102
auto& suballocs = scene->NodeSubMeshesAllocations;
66103

67-
if (scene->TransformBufferDirty[Device->CurrentFrameIndex].exchange(false, std::memory_order_acquire))
104+
if (scene->TransformBufferDirty[Device->SwapchainPtr->CurrentFrame->Index].exchange(false, std::memory_order_acquire))
68105
{
69106
auto transform_data_view = ArrayView{scene->GlobalTransforms};
70-
transform_buffer->Write(transform_data_view);
107+
transform_buffer->Write(frame_index, thread_index, transform_data_view);
71108
}
72109

73-
if (scene->MeshAllocationDirty[Device->CurrentFrameIndex].exchange(false, std::memory_order_acquire))
110+
if (scene->MeshAllocationDirty[Device->SwapchainPtr->CurrentFrame->Index].exchange(false, std::memory_order_acquire))
74111
{
75112
auto scratch = ZGetScratch(&LocalArena);
76113

@@ -100,29 +137,98 @@ namespace ZEngine::Applications
100137
auto sub_mesh_alloc_view = ArrayView{SubMeshAllocations};
101138
auto indirect_commands_view = ArrayView{DrawIndirectCommands};
102139

103-
vtx_buffer->Write(vertex_data_view);
104-
idx_buffer->Write(index_data_view);
140+
vtx_buffer->Write(frame_index, thread_index, vertex_data_view);
141+
idx_buffer->Write(frame_index, thread_index, index_data_view);
105142

106-
rd_buffer->Write(sub_mesh_alloc_view);
143+
rd_buffer->Write(frame_index, thread_index, sub_mesh_alloc_view);
107144

108-
indirect_buffer->Write(indirect_commands_view);
145+
indirect_buffer->Write(frame_index, thread_index, indirect_commands_view);
109146

110147
ZReleaseScratch(scratch);
111148
}
112149
}
113150

114151
// Todo (Kernel) : When we'll start considering multithreaded support
115152
// we might want to renderer->EnqueueAsync({command_buffer, {camera, frame_data} })
116-
SceneRenderer->DrawScene(CurrentCmdBuf, camera);
153+
SceneRenderer->DrawScene(frame_index, thread_index, CurrentCmdBuf, camera);
117154
}
118155

119156
void AppRenderPipeline::BeginOverlayFrame()
120157
{
121158
ImguiRenderer->NewFrame();
122159
}
123160

161+
void AppRenderPipeline::FillOverlayPayload(Rendering::Renderers::RenderOverlayPayload& payload)
162+
{
163+
ImguiRenderer->PreparePayload(payload);
164+
}
165+
166+
void AppRenderPipeline::RenderOverlay(const Rendering::Renderers::RenderOverlayPayload& payload)
167+
{
168+
if (payload.VertexCount == 0 && payload.IndexCount == 0)
169+
{
170+
return;
171+
}
172+
173+
auto swpachain = Device->SwapchainPtr;
174+
auto frame_index = swpachain->CurrentFrame->Index;
175+
auto thread_index = RenderMainThreadIndex;
176+
177+
auto current_framebuffer = Device->SwapchainPtr->SwapchainFramebuffers[Device->SwapchainPtr->CurrentFrame->ImageIndex];
178+
179+
CurrentCmdBuf->BeginRenderPass(ImguiRenderer->UIPass, current_framebuffer, true);
180+
{
181+
auto vtx_data_view = ArrayView{payload.VertexData.data(), payload.VertexData.size()};
182+
auto idx_data_view = ArrayView{payload.IndexData.data(), payload.IndexData.size()};
183+
184+
auto vertex_buffer_set = Device->VertexBufferSetManager.Access(payload.VBHandle);
185+
auto index_buffer_set = Device->IndexBufferSetManager.Access(payload.IdxBHandle);
186+
187+
auto vertex_buffer = vertex_buffer_set->At(Device->SwapchainPtr->CurrentFrame->Index);
188+
auto index_buffer = index_buffer_set->At(Device->SwapchainPtr->CurrentFrame->Index);
189+
190+
vertex_buffer->Write(frame_index, thread_index, vtx_data_view);
191+
index_buffer->Write(frame_index, thread_index, idx_data_view);
192+
193+
auto ui_second_cb = Device->CommandBufferMgr->GetCommandBuffer(Rendering::QueueType::GRAPHIC_QUEUE, Device->SwapchainPtr->CurrentFrame->Index, RenderMainThreadIndex, UICommandBufferIndex, false);
194+
ui_second_cb->ResetState();
195+
ui_second_cb->BeginSecondary(ImguiRenderer->UIPass, current_framebuffer);
196+
ui_second_cb->SetViewport(ImguiRenderer->UIPass->GetRenderAreaWidth(), ImguiRenderer->UIPass->GetRenderAreaHeight());
197+
198+
ui_second_cb->BindPipeline(Rendering::Specifications::PipelineBindPoint::GRAPHIC, ImguiRenderer->UIPass->Pipeline);
199+
200+
ui_second_cb->BindVertexBuffer(*vertex_buffer);
201+
ui_second_cb->BindIndexBuffer(*index_buffer, payload.IsIndexBufferUint16 ? VK_INDEX_TYPE_UINT16 : VK_INDEX_TYPE_UINT32);
202+
203+
Rendering::Renderers::PushConstantData pc_data = {};
204+
pc_data.Scale[0] = payload.Pc[0];
205+
pc_data.Scale[1] = payload.Pc[1];
206+
207+
pc_data.Translate[0] = payload.Pc[2];
208+
pc_data.Translate[1] = payload.Pc[3];
209+
210+
for (uint32_t i = 0; i < payload.DrawDataIndex; ++i)
211+
{
212+
const auto& scissor_cmd = payload.ScissorCmds[i];
213+
const auto& indexed_cmd = payload.IndexedCmds[i];
214+
215+
ui_second_cb->SetScissor(scissor_cmd.w, scissor_cmd.h, scissor_cmd.x, scissor_cmd.y);
216+
pc_data.TextureId = payload.TextureIds[i];
217+
ui_second_cb->PushConstants(VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(Rendering::Renderers::PushConstantData), &pc_data);
218+
ui_second_cb->BindDescriptorSets(Device->SwapchainPtr->CurrentFrame->Index);
219+
ui_second_cb->DrawIndexed(indexed_cmd.IdxCount, indexed_cmd.InstanceCount, indexed_cmd.FirstIndex, indexed_cmd.VertexOffset, indexed_cmd.FirstInstance);
220+
}
221+
222+
ui_second_cb->End();
223+
224+
CurrentCmdBuf->ExecuteSecondaryCommandBuffers(ArrayView<Hardwares::CommandBuffer>{ui_second_cb, 1});
225+
}
226+
227+
CurrentCmdBuf->EndRenderPass();
228+
}
229+
124230
void AppRenderPipeline::EndOverlayFrame()
125231
{
126-
ImguiRenderer->DrawFrame(CurrentCmdBuf);
232+
ImguiRenderer->EndFrame();
127233
}
128234
} // namespace ZEngine::Applications

ZEngine/ZEngine/Applications/AppRenderPipeline.h

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,32 @@
55

66
namespace ZEngine::Applications
77
{
8-
struct AppRenderPipeline
8+
struct RenderPayload
99
{
10-
Hardwares::VulkanDevicePtr Device = nullptr;
11-
Rendering::Renderers::GraphicRendererPtr SceneRenderer = nullptr;
12-
Rendering::Renderers::ImGUIRendererPtr ImguiRenderer = nullptr;
13-
Hardwares::CommandBufferPtr CurrentCmdBuf = nullptr;
10+
uint32_t RenderTargetW = 0;
11+
uint32_t RenderTargetH = 0;
12+
PaddedAtomic<bool> RenderUIOverlay = {.value = false};
13+
PaddedAtomic<bool> ResizeRenderTarget = {.value = false};
14+
Rendering::Cameras::CameraPtr Camera = nullptr;
15+
Rendering::Scenes::RenderScenePtr Scene = nullptr;
16+
Rendering::Renderers::RenderOverlayPayload UIOverlay = {};
17+
};
1418

15-
ZEngine::Core::Memory::ArenaAllocator LocalArena = {};
19+
struct AppRenderPipeline
20+
{
21+
const uint8_t MaxMailBoxBufferCount = 3;
22+
const uint8_t RenderMainThreadIndex = 0;
23+
uint8_t RenderWorkerThreadCount = 0;
24+
uint8_t UICommandBufferIndex = 0xff;
25+
uint32_t CurrentMailBoxBufferHead = 0;
26+
PaddedAtomic<int> MailBoxBufferHead = {.value = 0};
27+
PaddedAtomic<int> MailBoxBufferTail = {.value = 0};
28+
RenderPayload RenderPayloads[3] = {};
29+
ZEngine::Core::Memory::ArenaAllocator LocalArena = {};
30+
Hardwares::VulkanDevicePtr Device = nullptr;
31+
Rendering::Renderers::GraphicRendererPtr SceneRenderer = nullptr;
32+
Rendering::Renderers::ImGUIRendererPtr ImguiRenderer = nullptr;
33+
Hardwares::CommandBufferPtr CurrentCmdBuf = nullptr;
1634

1735
void Initialize(Hardwares::VulkanDevicePtr device);
1836
void Shutdown();
@@ -26,6 +44,8 @@ namespace ZEngine::Applications
2644

2745
void BeginOverlayFrame();
2846
void EndOverlayFrame();
47+
void FillOverlayPayload(Rendering::Renderers::RenderOverlayPayload& payload);
48+
void RenderOverlay(const Rendering::Renderers::RenderOverlayPayload& payload);
2949
};
3050
ZDEFINE_PTR(AppRenderPipeline);
3151

0 commit comments

Comments
 (0)